aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig4
-rw-r--r--drivers/infiniband/Makefile4
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/addr.c22
-rw-r--r--drivers/infiniband/core/cache.c5
-rw-r--r--drivers/infiniband/core/cm.c66
-rw-r--r--drivers/infiniband/core/cma.c405
-rw-r--r--drivers/infiniband/core/device.c6
-rw-r--r--drivers/infiniband/core/iwcm.c1019
-rw-r--r--drivers/infiniband/core/iwcm.h62
-rw-r--r--drivers/infiniband/core/mad.c19
-rw-r--r--drivers/infiniband/core/mad_priv.h1
-rw-r--r--drivers/infiniband/core/mad_rmpp.c94
-rw-r--r--drivers/infiniband/core/sa_query.c67
-rw-r--r--drivers/infiniband/core/smi.c16
-rw-r--r--drivers/infiniband/core/sysfs.c13
-rw-r--r--drivers/infiniband/core/ucm.c9
-rw-r--r--drivers/infiniband/core/user_mad.c7
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c64
-rw-r--r--drivers/infiniband/core/verbs.c21
-rw-r--r--drivers/infiniband/hw/amso1100/Kbuild8
-rw-r--r--drivers/infiniband/hw/amso1100/Kconfig15
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c1255
-rw-r--r--drivers/infiniband/hw/amso1100/c2.h551
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.c321
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.h108
-rw-r--r--drivers/infiniband/hw/amso1100/c2_alloc.c144
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cm.c452
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cq.c433
-rw-r--r--drivers/infiniband/hw/amso1100/c2_intr.c209
-rw-r--r--drivers/infiniband/hw/amso1100/c2_mm.c375
-rw-r--r--drivers/infiniband/hw/amso1100/c2_mq.c174
-rw-r--r--drivers/infiniband/hw/amso1100/c2_mq.h106
-rw-r--r--drivers/infiniband/hw/amso1100/c2_pd.c89
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c869
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.h181
-rw-r--r--drivers/infiniband/hw/amso1100/c2_qp.c975
-rw-r--r--drivers/infiniband/hw/amso1100/c2_rnic.c663
-rw-r--r--drivers/infiniband/hw/amso1100/c2_status.h158
-rw-r--r--drivers/infiniband/hw/amso1100/c2_user.h82
-rw-r--r--drivers/infiniband/hw/amso1100/c2_vq.c260
-rw-r--r--drivers/infiniband/hw/amso1100/c2_vq.h63
-rw-r--r--drivers/infiniband/hw/amso1100/c2_wr.h1520
-rw-r--r--drivers/infiniband/hw/ehca/Kconfig16
-rw-r--r--drivers/infiniband/hw/ehca/Makefile16
-rw-r--r--drivers/infiniband/hw/ehca/ehca_av.c271
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h346
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes_pSeries.h236
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c427
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c185
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c241
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c762
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h77
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h182
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c818
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mcast.c131
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c2261
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.h140
-rw-r--r--drivers/infiniband/hw/ehca/ehca_pd.c114
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qes.h259
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c1507
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c653
-rw-r--r--drivers/infiniband/hw/ehca/ehca_sqp.c111
-rw-r--r--drivers/infiniband/hw/ehca/ehca_tools.h172
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c392
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c874
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.h261
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.c80
-rw-r--r--drivers/infiniband/hw/ehca/hcp_phyp.h90
-rw-r--r--drivers/infiniband/hw/ehca/hipz_fns.h68
-rw-r--r--drivers/infiniband/hw/ehca/hipz_fns_core.h100
-rw-r--r--drivers/infiniband/hw/ehca/hipz_hw.h388
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c149
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.h247
-rw-r--r--drivers/infiniband/hw/ipath/Kconfig21
-rw-r--r--drivers/infiniband/hw/ipath/Makefile29
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h19
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c183
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c154
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c349
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c35
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c (renamed from drivers/infiniband/hw/ipath/ipath_ht400.c)53
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c (renamed from drivers/infiniband/hw/ipath/ipath_pe800.c)82
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c21
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c24
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h57
-rw-r--r--drivers/infiniband/hw/ipath/ipath_keys.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.c1179
-rw-r--r--drivers/infiniband/hw/ipath/ipath_layer.h115
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c339
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mmap.c122
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c12
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c242
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c160
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c244
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c27
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c41
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c5
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c182
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c687
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h252
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_wc_ppc64.c52
-rw-r--r--drivers/infiniband/hw/ipath/verbs_debug.h108
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c15
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c62
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c88
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c20
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_uar.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c194
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c37
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c35
-rw-r--r--drivers/infiniband/ulp/iser/Kconfig2
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c1
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h7
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c80
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c10
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c57
130 files changed, 25265 insertions, 2698 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 69a53d476b5b..9edfacee7d84 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -14,7 +14,7 @@ config INFINIBAND_USER_MAD
14 ---help--- 14 ---help---
15 Userspace InfiniBand Management Datagram (MAD) support. This 15 Userspace InfiniBand Management Datagram (MAD) support. This
16 is the kernel side of the userspace MAD support, which allows 16 is the kernel side of the userspace MAD support, which allows
17 userspace processes to send and receive MADs. You will also 17 userspace processes to send and receive MADs. You will also
18 need libibumad from <http://www.openib.org>. 18 need libibumad from <http://www.openib.org>.
19 19
20config INFINIBAND_USER_ACCESS 20config INFINIBAND_USER_ACCESS
@@ -36,6 +36,8 @@ config INFINIBAND_ADDR_TRANS
36 36
37source "drivers/infiniband/hw/mthca/Kconfig" 37source "drivers/infiniband/hw/mthca/Kconfig"
38source "drivers/infiniband/hw/ipath/Kconfig" 38source "drivers/infiniband/hw/ipath/Kconfig"
39source "drivers/infiniband/hw/ehca/Kconfig"
40source "drivers/infiniband/hw/amso1100/Kconfig"
39 41
40source "drivers/infiniband/ulp/ipoib/Kconfig" 42source "drivers/infiniband/ulp/ipoib/Kconfig"
41 43
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index c7ff58c1d0e5..2b5d1098ef45 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,6 +1,8 @@
1obj-$(CONFIG_INFINIBAND) += core/ 1obj-$(CONFIG_INFINIBAND) += core/
2obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/ 2obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
3obj-$(CONFIG_IPATH_CORE) += hw/ipath/ 3obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
4obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
5obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
4obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 6obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
5obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ 7obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
6obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ 8obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 68e73ec2d1f8..163d991eb8c9 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,7 +1,7 @@
1infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o 1infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
2 2
3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ 3obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
4 ib_cm.o $(infiniband-y) 4 ib_cm.o iw_cm.o $(infiniband-y)
5obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o 5obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
6obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o 6obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o
7 7
@@ -14,6 +14,8 @@ ib_sa-y := sa_query.o
14 14
15ib_cm-y := cm.o 15ib_cm-y := cm.o
16 16
17iw_cm-y := iwcm.o
18
17rdma_cm-y := cma.o 19rdma_cm-y := cma.o
18 20
19ib_addr-y := addr.o 21ib_addr-y := addr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1205e8027829..9cbf09e2052f 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -61,12 +61,15 @@ static LIST_HEAD(req_list);
61static DECLARE_WORK(work, process_req, NULL); 61static DECLARE_WORK(work, process_req, NULL);
62static struct workqueue_struct *addr_wq; 62static struct workqueue_struct *addr_wq;
63 63
64static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 64int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
65 unsigned char *dst_dev_addr) 65 const unsigned char *dst_dev_addr)
66{ 66{
67 switch (dev->type) { 67 switch (dev->type) {
68 case ARPHRD_INFINIBAND: 68 case ARPHRD_INFINIBAND:
69 dev_addr->dev_type = IB_NODE_CA; 69 dev_addr->dev_type = RDMA_NODE_IB_CA;
70 break;
71 case ARPHRD_ETHER:
72 dev_addr->dev_type = RDMA_NODE_RNIC;
70 break; 73 break;
71 default: 74 default:
72 return -EADDRNOTAVAIL; 75 return -EADDRNOTAVAIL;
@@ -78,6 +81,7 @@ static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
78 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 81 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
79 return 0; 82 return 0;
80} 83}
84EXPORT_SYMBOL(rdma_copy_addr);
81 85
82int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 86int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
83{ 87{
@@ -89,7 +93,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
89 if (!dev) 93 if (!dev)
90 return -EADDRNOTAVAIL; 94 return -EADDRNOTAVAIL;
91 95
92 ret = copy_addr(dev_addr, dev, NULL); 96 ret = rdma_copy_addr(dev_addr, dev, NULL);
93 dev_put(dev); 97 dev_put(dev);
94 return ret; 98 return ret;
95} 99}
@@ -161,7 +165,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
161 165
162 /* If the device does ARP internally, return 'done' */ 166 /* If the device does ARP internally, return 'done' */
163 if (rt->idev->dev->flags & IFF_NOARP) { 167 if (rt->idev->dev->flags & IFF_NOARP) {
164 copy_addr(addr, rt->idev->dev, NULL); 168 rdma_copy_addr(addr, rt->idev->dev, NULL);
165 goto put; 169 goto put;
166 } 170 }
167 171
@@ -181,7 +185,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
181 src_in->sin_addr.s_addr = rt->rt_src; 185 src_in->sin_addr.s_addr = rt->rt_src;
182 } 186 }
183 187
184 ret = copy_addr(addr, neigh->dev, neigh->ha); 188 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
185release: 189release:
186 neigh_release(neigh); 190 neigh_release(neigh);
187put: 191put:
@@ -245,7 +249,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
245 if (ZERONET(src_ip)) { 249 if (ZERONET(src_ip)) {
246 src_in->sin_family = dst_in->sin_family; 250 src_in->sin_family = dst_in->sin_family;
247 src_in->sin_addr.s_addr = dst_ip; 251 src_in->sin_addr.s_addr = dst_ip;
248 ret = copy_addr(addr, dev, dev->dev_addr); 252 ret = rdma_copy_addr(addr, dev, dev->dev_addr);
249 } else if (LOOPBACK(src_ip)) { 253 } else if (LOOPBACK(src_ip)) {
250 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); 254 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
251 if (!ret) 255 if (!ret)
@@ -327,10 +331,10 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
327} 331}
328EXPORT_SYMBOL(rdma_addr_cancel); 332EXPORT_SYMBOL(rdma_addr_cancel);
329 333
330static int netevent_callback(struct notifier_block *self, unsigned long event, 334static int netevent_callback(struct notifier_block *self, unsigned long event,
331 void *ctx) 335 void *ctx)
332{ 336{
333 if (event == NETEVENT_NEIGH_UPDATE) { 337 if (event == NETEVENT_NEIGH_UPDATE) {
334 struct neighbour *neigh = ctx; 338 struct neighbour *neigh = ctx;
335 339
336 if (neigh->dev->type == ARPHRD_INFINIBAND && 340 if (neigh->dev->type == ARPHRD_INFINIBAND &&
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 75313ade2e0d..20e9f64e67a6 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -62,12 +62,13 @@ struct ib_update_work {
62 62
63static inline int start_port(struct ib_device *device) 63static inline int start_port(struct ib_device *device)
64{ 64{
65 return device->node_type == IB_NODE_SWITCH ? 0 : 1; 65 return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
66} 66}
67 67
68static inline int end_port(struct ib_device *device) 68static inline int end_port(struct ib_device *device)
69{ 69{
70 return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt; 70 return (device->node_type == RDMA_NODE_IB_SWITCH) ?
71 0 : device->phys_port_cnt;
71} 72}
72 73
73int ib_get_cached_gid(struct ib_device *device, 74int ib_get_cached_gid(struct ib_device *device,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0de335b7bfc2..f35fcc4c0638 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 2 * Copyright (c) 2004-2006 Intel Corporation. All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -41,6 +41,7 @@
41#include <linux/idr.h> 41#include <linux/idr.h>
42#include <linux/interrupt.h> 42#include <linux/interrupt.h>
43#include <linux/pci.h> 43#include <linux/pci.h>
44#include <linux/random.h>
44#include <linux/rbtree.h> 45#include <linux/rbtree.h>
45#include <linux/spinlock.h> 46#include <linux/spinlock.h>
46#include <linux/workqueue.h> 47#include <linux/workqueue.h>
@@ -73,6 +74,7 @@ static struct ib_cm {
73 struct rb_root remote_id_table; 74 struct rb_root remote_id_table;
74 struct rb_root remote_sidr_table; 75 struct rb_root remote_sidr_table;
75 struct idr local_id_table; 76 struct idr local_id_table;
77 __be32 random_id_operand;
76 struct workqueue_struct *wq; 78 struct workqueue_struct *wq;
77} cm; 79} cm;
78 80
@@ -177,7 +179,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
177 if (IS_ERR(ah)) 179 if (IS_ERR(ah))
178 return PTR_ERR(ah); 180 return PTR_ERR(ah);
179 181
180 m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 182 m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
181 cm_id_priv->av.pkey_index, 183 cm_id_priv->av.pkey_index,
182 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, 184 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
183 GFP_ATOMIC); 185 GFP_ATOMIC);
@@ -299,15 +301,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
299static int cm_alloc_id(struct cm_id_private *cm_id_priv) 301static int cm_alloc_id(struct cm_id_private *cm_id_priv)
300{ 302{
301 unsigned long flags; 303 unsigned long flags;
302 int ret; 304 int ret, id;
303 static int next_id; 305 static int next_id;
304 306
305 do { 307 do {
306 spin_lock_irqsave(&cm.lock, flags); 308 spin_lock_irqsave(&cm.lock, flags);
307 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++, 309 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
308 (__force int *) &cm_id_priv->id.local_id); 310 next_id++, &id);
309 spin_unlock_irqrestore(&cm.lock, flags); 311 spin_unlock_irqrestore(&cm.lock, flags);
310 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); 312 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
313
314 cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand);
311 return ret; 315 return ret;
312} 316}
313 317
@@ -316,7 +320,8 @@ static void cm_free_id(__be32 local_id)
316 unsigned long flags; 320 unsigned long flags;
317 321
318 spin_lock_irqsave(&cm.lock, flags); 322 spin_lock_irqsave(&cm.lock, flags);
319 idr_remove(&cm.local_id_table, (__force int) local_id); 323 idr_remove(&cm.local_id_table,
324 (__force int) (local_id ^ cm.random_id_operand));
320 spin_unlock_irqrestore(&cm.lock, flags); 325 spin_unlock_irqrestore(&cm.lock, flags);
321} 326}
322 327
@@ -324,7 +329,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
324{ 329{
325 struct cm_id_private *cm_id_priv; 330 struct cm_id_private *cm_id_priv;
326 331
327 cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id); 332 cm_id_priv = idr_find(&cm.local_id_table,
333 (__force int) (local_id ^ cm.random_id_operand));
328 if (cm_id_priv) { 334 if (cm_id_priv) {
329 if (cm_id_priv->id.remote_id == remote_id) 335 if (cm_id_priv->id.remote_id == remote_id)
330 atomic_inc(&cm_id_priv->refcount); 336 atomic_inc(&cm_id_priv->refcount);
@@ -679,6 +685,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
679{ 685{
680 int wait_time; 686 int wait_time;
681 687
688 cm_cleanup_timewait(cm_id_priv->timewait_info);
689
682 /* 690 /*
683 * The cm_id could be destroyed by the user before we exit timewait. 691 * The cm_id could be destroyed by the user before we exit timewait.
684 * To protect against this, we search for the cm_id after exiting 692 * To protect against this, we search for the cm_id after exiting
@@ -1354,7 +1362,7 @@ static int cm_req_handler(struct cm_work *work)
1354 id.local_id); 1362 id.local_id);
1355 if (IS_ERR(cm_id_priv->timewait_info)) { 1363 if (IS_ERR(cm_id_priv->timewait_info)) {
1356 ret = PTR_ERR(cm_id_priv->timewait_info); 1364 ret = PTR_ERR(cm_id_priv->timewait_info);
1357 goto error1; 1365 goto destroy;
1358 } 1366 }
1359 cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; 1367 cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1360 cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; 1368 cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
@@ -1363,7 +1371,8 @@ static int cm_req_handler(struct cm_work *work)
1363 listen_cm_id_priv = cm_match_req(work, cm_id_priv); 1371 listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1364 if (!listen_cm_id_priv) { 1372 if (!listen_cm_id_priv) {
1365 ret = -EINVAL; 1373 ret = -EINVAL;
1366 goto error2; 1374 kfree(cm_id_priv->timewait_info);
1375 goto destroy;
1367 } 1376 }
1368 1377
1369 cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; 1378 cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1373,12 +1382,22 @@ static int cm_req_handler(struct cm_work *work)
1373 1382
1374 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); 1383 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1375 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); 1384 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1376 if (ret) 1385 if (ret) {
1377 goto error3; 1386 ib_get_cached_gid(work->port->cm_dev->device,
1387 work->port->port_num, 0, &work->path[0].sgid);
1388 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1389 &work->path[0].sgid, sizeof work->path[0].sgid,
1390 NULL, 0);
1391 goto rejected;
1392 }
1378 if (req_msg->alt_local_lid) { 1393 if (req_msg->alt_local_lid) {
1379 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); 1394 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
1380 if (ret) 1395 if (ret) {
1381 goto error3; 1396 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1397 &work->path[0].sgid,
1398 sizeof work->path[0].sgid, NULL, 0);
1399 goto rejected;
1400 }
1382 } 1401 }
1383 cm_id_priv->tid = req_msg->hdr.tid; 1402 cm_id_priv->tid = req_msg->hdr.tid;
1384 cm_id_priv->timeout_ms = cm_convert_to_ms( 1403 cm_id_priv->timeout_ms = cm_convert_to_ms(
@@ -1400,12 +1419,11 @@ static int cm_req_handler(struct cm_work *work)
1400 cm_deref_id(listen_cm_id_priv); 1419 cm_deref_id(listen_cm_id_priv);
1401 return 0; 1420 return 0;
1402 1421
1403error3: atomic_dec(&cm_id_priv->refcount); 1422rejected:
1423 atomic_dec(&cm_id_priv->refcount);
1404 cm_deref_id(listen_cm_id_priv); 1424 cm_deref_id(listen_cm_id_priv);
1405 cm_cleanup_timewait(cm_id_priv->timewait_info); 1425destroy:
1406error2: kfree(cm_id_priv->timewait_info); 1426 ib_destroy_cm_id(cm_id);
1407 cm_id_priv->timewait_info = NULL;
1408error1: ib_destroy_cm_id(&cm_id_priv->id);
1409 return ret; 1427 return ret;
1410} 1428}
1411 1429
@@ -2072,8 +2090,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2072 spin_unlock_irqrestore(&cm.lock, flags); 2090 spin_unlock_irqrestore(&cm.lock, flags);
2073 return NULL; 2091 return NULL;
2074 } 2092 }
2075 cm_id_priv = idr_find(&cm.local_id_table, 2093 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2076 (__force int) timewait_info->work.local_id); 2094 (timewait_info->work.local_id ^
2095 cm.random_id_operand));
2077 if (cm_id_priv) { 2096 if (cm_id_priv) {
2078 if (cm_id_priv->id.remote_id == remote_id) 2097 if (cm_id_priv->id.remote_id == remote_id)
2079 atomic_inc(&cm_id_priv->refcount); 2098 atomic_inc(&cm_id_priv->refcount);
@@ -3125,7 +3144,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3125 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | 3144 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
3126 IB_ACCESS_REMOTE_WRITE; 3145 IB_ACCESS_REMOTE_WRITE;
3127 if (cm_id_priv->responder_resources) 3146 if (cm_id_priv->responder_resources)
3128 qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ; 3147 qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3148 IB_ACCESS_REMOTE_ATOMIC;
3129 qp_attr->pkey_index = cm_id_priv->av.pkey_index; 3149 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3130 qp_attr->port_num = cm_id_priv->av.port->port_num; 3150 qp_attr->port_num = cm_id_priv->av.port->port_num;
3131 ret = 0; 3151 ret = 0;
@@ -3262,6 +3282,9 @@ static void cm_add_one(struct ib_device *device)
3262 int ret; 3282 int ret;
3263 u8 i; 3283 u8 i;
3264 3284
3285 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
3286 return;
3287
3265 cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) * 3288 cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
3266 device->phys_port_cnt, GFP_KERNEL); 3289 device->phys_port_cnt, GFP_KERNEL);
3267 if (!cm_dev) 3290 if (!cm_dev)
@@ -3349,6 +3372,7 @@ static int __init ib_cm_init(void)
3349 cm.remote_qp_table = RB_ROOT; 3372 cm.remote_qp_table = RB_ROOT;
3350 cm.remote_sidr_table = RB_ROOT; 3373 cm.remote_sidr_table = RB_ROOT;
3351 idr_init(&cm.local_id_table); 3374 idr_init(&cm.local_id_table);
3375 get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
3352 idr_pre_get(&cm.local_id_table, GFP_KERNEL); 3376 idr_pre_get(&cm.local_id_table, GFP_KERNEL);
3353 3377
3354 cm.wq = create_workqueue("ib_cm"); 3378 cm.wq = create_workqueue("ib_cm");
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d6f99d5720fc..1178bd434d1b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -35,6 +35,7 @@
35#include <linux/mutex.h> 35#include <linux/mutex.h>
36#include <linux/random.h> 36#include <linux/random.h>
37#include <linux/idr.h> 37#include <linux/idr.h>
38#include <linux/inetdevice.h>
38 39
39#include <net/tcp.h> 40#include <net/tcp.h>
40 41
@@ -43,13 +44,14 @@
43#include <rdma/ib_cache.h> 44#include <rdma/ib_cache.h>
44#include <rdma/ib_cm.h> 45#include <rdma/ib_cm.h>
45#include <rdma/ib_sa.h> 46#include <rdma/ib_sa.h>
47#include <rdma/iw_cm.h>
46 48
47MODULE_AUTHOR("Sean Hefty"); 49MODULE_AUTHOR("Sean Hefty");
48MODULE_DESCRIPTION("Generic RDMA CM Agent"); 50MODULE_DESCRIPTION("Generic RDMA CM Agent");
49MODULE_LICENSE("Dual BSD/GPL"); 51MODULE_LICENSE("Dual BSD/GPL");
50 52
51#define CMA_CM_RESPONSE_TIMEOUT 20 53#define CMA_CM_RESPONSE_TIMEOUT 20
52#define CMA_MAX_CM_RETRIES 3 54#define CMA_MAX_CM_RETRIES 15
53 55
54static void cma_add_one(struct ib_device *device); 56static void cma_add_one(struct ib_device *device);
55static void cma_remove_one(struct ib_device *device); 57static void cma_remove_one(struct ib_device *device);
@@ -60,6 +62,7 @@ static struct ib_client cma_client = {
60 .remove = cma_remove_one 62 .remove = cma_remove_one
61}; 63};
62 64
65static struct ib_sa_client sa_client;
63static LIST_HEAD(dev_list); 66static LIST_HEAD(dev_list);
64static LIST_HEAD(listen_any_list); 67static LIST_HEAD(listen_any_list);
65static DEFINE_MUTEX(lock); 68static DEFINE_MUTEX(lock);
@@ -124,6 +127,7 @@ struct rdma_id_private {
124 int query_id; 127 int query_id;
125 union { 128 union {
126 struct ib_cm_id *ib; 129 struct ib_cm_id *ib;
130 struct iw_cm_id *iw;
127 } cm_id; 131 } cm_id;
128 132
129 u32 seq_num; 133 u32 seq_num;
@@ -259,15 +263,24 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
259 id_priv->cma_dev = NULL; 263 id_priv->cma_dev = NULL;
260} 264}
261 265
262static int cma_acquire_ib_dev(struct rdma_id_private *id_priv) 266static int cma_acquire_dev(struct rdma_id_private *id_priv)
263{ 267{
268 enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
264 struct cma_device *cma_dev; 269 struct cma_device *cma_dev;
265 union ib_gid gid; 270 union ib_gid gid;
266 int ret = -ENODEV; 271 int ret = -ENODEV;
267 272
268 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid), 273 switch (rdma_node_get_transport(dev_type)) {
274 case RDMA_TRANSPORT_IB:
275 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
276 break;
277 case RDMA_TRANSPORT_IWARP:
278 iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
279 break;
280 default:
281 return -ENODEV;
282 }
269 283
270 mutex_lock(&lock);
271 list_for_each_entry(cma_dev, &dev_list, list) { 284 list_for_each_entry(cma_dev, &dev_list, list) {
272 ret = ib_find_cached_gid(cma_dev->device, &gid, 285 ret = ib_find_cached_gid(cma_dev->device, &gid,
273 &id_priv->id.port_num, NULL); 286 &id_priv->id.port_num, NULL);
@@ -276,20 +289,9 @@ static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
276 break; 289 break;
277 } 290 }
278 } 291 }
279 mutex_unlock(&lock);
280 return ret; 292 return ret;
281} 293}
282 294
283static int cma_acquire_dev(struct rdma_id_private *id_priv)
284{
285 switch (id_priv->id.route.addr.dev_addr.dev_type) {
286 case IB_NODE_CA:
287 return cma_acquire_ib_dev(id_priv);
288 default:
289 return -ENODEV;
290 }
291}
292
293static void cma_deref_id(struct rdma_id_private *id_priv) 295static void cma_deref_id(struct rdma_id_private *id_priv)
294{ 296{
295 if (atomic_dec_and_test(&id_priv->refcount)) 297 if (atomic_dec_and_test(&id_priv->refcount))
@@ -347,6 +349,16 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
347 IB_QP_PKEY_INDEX | IB_QP_PORT); 349 IB_QP_PKEY_INDEX | IB_QP_PORT);
348} 350}
349 351
352static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
353{
354 struct ib_qp_attr qp_attr;
355
356 qp_attr.qp_state = IB_QPS_INIT;
357 qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
358
359 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
360}
361
350int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 362int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
351 struct ib_qp_init_attr *qp_init_attr) 363 struct ib_qp_init_attr *qp_init_attr)
352{ 364{
@@ -362,10 +374,13 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
362 if (IS_ERR(qp)) 374 if (IS_ERR(qp))
363 return PTR_ERR(qp); 375 return PTR_ERR(qp);
364 376
365 switch (id->device->node_type) { 377 switch (rdma_node_get_transport(id->device->node_type)) {
366 case IB_NODE_CA: 378 case RDMA_TRANSPORT_IB:
367 ret = cma_init_ib_qp(id_priv, qp); 379 ret = cma_init_ib_qp(id_priv, qp);
368 break; 380 break;
381 case RDMA_TRANSPORT_IWARP:
382 ret = cma_init_iw_qp(id_priv, qp);
383 break;
369 default: 384 default:
370 ret = -ENOSYS; 385 ret = -ENOSYS;
371 break; 386 break;
@@ -451,13 +466,17 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
451 int ret; 466 int ret;
452 467
453 id_priv = container_of(id, struct rdma_id_private, id); 468 id_priv = container_of(id, struct rdma_id_private, id);
454 switch (id_priv->id.device->node_type) { 469 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
455 case IB_NODE_CA: 470 case RDMA_TRANSPORT_IB:
456 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 471 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
457 qp_attr_mask); 472 qp_attr_mask);
458 if (qp_attr->qp_state == IB_QPS_RTR) 473 if (qp_attr->qp_state == IB_QPS_RTR)
459 qp_attr->rq_psn = id_priv->seq_num; 474 qp_attr->rq_psn = id_priv->seq_num;
460 break; 475 break;
476 case RDMA_TRANSPORT_IWARP:
477 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
478 qp_attr_mask);
479 break;
461 default: 480 default:
462 ret = -ENOSYS; 481 ret = -ENOSYS;
463 break; 482 break;
@@ -590,8 +609,8 @@ static int cma_notify_user(struct rdma_id_private *id_priv,
590 609
591static void cma_cancel_route(struct rdma_id_private *id_priv) 610static void cma_cancel_route(struct rdma_id_private *id_priv)
592{ 611{
593 switch (id_priv->id.device->node_type) { 612 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
594 case IB_NODE_CA: 613 case RDMA_TRANSPORT_IB:
595 if (id_priv->query) 614 if (id_priv->query)
596 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 615 ib_sa_cancel_query(id_priv->query_id, id_priv->query);
597 break; 616 break;
@@ -611,11 +630,15 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv)
611 cma_exch(id_priv, CMA_DESTROYING); 630 cma_exch(id_priv, CMA_DESTROYING);
612 631
613 if (id_priv->cma_dev) { 632 if (id_priv->cma_dev) {
614 switch (id_priv->id.device->node_type) { 633 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
615 case IB_NODE_CA: 634 case RDMA_TRANSPORT_IB:
616 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) 635 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
617 ib_destroy_cm_id(id_priv->cm_id.ib); 636 ib_destroy_cm_id(id_priv->cm_id.ib);
618 break; 637 break;
638 case RDMA_TRANSPORT_IWARP:
639 if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
640 iw_destroy_cm_id(id_priv->cm_id.iw);
641 break;
619 default: 642 default:
620 break; 643 break;
621 } 644 }
@@ -689,19 +712,25 @@ void rdma_destroy_id(struct rdma_cm_id *id)
689 state = cma_exch(id_priv, CMA_DESTROYING); 712 state = cma_exch(id_priv, CMA_DESTROYING);
690 cma_cancel_operation(id_priv, state); 713 cma_cancel_operation(id_priv, state);
691 714
715 mutex_lock(&lock);
692 if (id_priv->cma_dev) { 716 if (id_priv->cma_dev) {
693 switch (id->device->node_type) { 717 mutex_unlock(&lock);
694 case IB_NODE_CA: 718 switch (rdma_node_get_transport(id->device->node_type)) {
695 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) 719 case RDMA_TRANSPORT_IB:
720 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
696 ib_destroy_cm_id(id_priv->cm_id.ib); 721 ib_destroy_cm_id(id_priv->cm_id.ib);
697 break; 722 break;
723 case RDMA_TRANSPORT_IWARP:
724 if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
725 iw_destroy_cm_id(id_priv->cm_id.iw);
726 break;
698 default: 727 default:
699 break; 728 break;
700 } 729 }
701 mutex_lock(&lock); 730 mutex_lock(&lock);
702 cma_detach_from_dev(id_priv); 731 cma_detach_from_dev(id_priv);
703 mutex_unlock(&lock);
704 } 732 }
733 mutex_unlock(&lock);
705 734
706 cma_release_port(id_priv); 735 cma_release_port(id_priv);
707 cma_deref_id(id_priv); 736 cma_deref_id(id_priv);
@@ -869,7 +898,7 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
869 ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 898 ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
870 ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 899 ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
871 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 900 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
872 rt->addr.dev_addr.dev_type = IB_NODE_CA; 901 rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
873 902
874 id_priv = container_of(id, struct rdma_id_private, id); 903 id_priv = container_of(id, struct rdma_id_private, id);
875 id_priv->state = CMA_CONNECT; 904 id_priv->state = CMA_CONNECT;
@@ -898,7 +927,9 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
898 } 927 }
899 928
900 atomic_inc(&conn_id->dev_remove); 929 atomic_inc(&conn_id->dev_remove);
901 ret = cma_acquire_ib_dev(conn_id); 930 mutex_lock(&lock);
931 ret = cma_acquire_dev(conn_id);
932 mutex_unlock(&lock);
902 if (ret) { 933 if (ret) {
903 ret = -ENODEV; 934 ret = -ENODEV;
904 cma_release_remove(conn_id); 935 cma_release_remove(conn_id);
@@ -982,6 +1013,130 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
982 } 1013 }
983} 1014}
984 1015
1016static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1017{
1018 struct rdma_id_private *id_priv = iw_id->context;
1019 enum rdma_cm_event_type event = 0;
1020 struct sockaddr_in *sin;
1021 int ret = 0;
1022
1023 atomic_inc(&id_priv->dev_remove);
1024
1025 switch (iw_event->event) {
1026 case IW_CM_EVENT_CLOSE:
1027 event = RDMA_CM_EVENT_DISCONNECTED;
1028 break;
1029 case IW_CM_EVENT_CONNECT_REPLY:
1030 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1031 *sin = iw_event->local_addr;
1032 sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1033 *sin = iw_event->remote_addr;
1034 if (iw_event->status)
1035 event = RDMA_CM_EVENT_REJECTED;
1036 else
1037 event = RDMA_CM_EVENT_ESTABLISHED;
1038 break;
1039 case IW_CM_EVENT_ESTABLISHED:
1040 event = RDMA_CM_EVENT_ESTABLISHED;
1041 break;
1042 default:
1043 BUG_ON(1);
1044 }
1045
1046 ret = cma_notify_user(id_priv, event, iw_event->status,
1047 iw_event->private_data,
1048 iw_event->private_data_len);
1049 if (ret) {
1050 /* Destroy the CM ID by returning a non-zero value. */
1051 id_priv->cm_id.iw = NULL;
1052 cma_exch(id_priv, CMA_DESTROYING);
1053 cma_release_remove(id_priv);
1054 rdma_destroy_id(&id_priv->id);
1055 return ret;
1056 }
1057
1058 cma_release_remove(id_priv);
1059 return ret;
1060}
1061
1062static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1063 struct iw_cm_event *iw_event)
1064{
1065 struct rdma_cm_id *new_cm_id;
1066 struct rdma_id_private *listen_id, *conn_id;
1067 struct sockaddr_in *sin;
1068 struct net_device *dev = NULL;
1069 int ret;
1070
1071 listen_id = cm_id->context;
1072 atomic_inc(&listen_id->dev_remove);
1073 if (!cma_comp(listen_id, CMA_LISTEN)) {
1074 ret = -ECONNABORTED;
1075 goto out;
1076 }
1077
1078 /* Create a new RDMA id for the new IW CM ID */
1079 new_cm_id = rdma_create_id(listen_id->id.event_handler,
1080 listen_id->id.context,
1081 RDMA_PS_TCP);
1082 if (!new_cm_id) {
1083 ret = -ENOMEM;
1084 goto out;
1085 }
1086 conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1087 atomic_inc(&conn_id->dev_remove);
1088 conn_id->state = CMA_CONNECT;
1089
1090 dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
1091 if (!dev) {
1092 ret = -EADDRNOTAVAIL;
1093 cma_release_remove(conn_id);
1094 rdma_destroy_id(new_cm_id);
1095 goto out;
1096 }
1097 ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1098 if (ret) {
1099 cma_release_remove(conn_id);
1100 rdma_destroy_id(new_cm_id);
1101 goto out;
1102 }
1103
1104 mutex_lock(&lock);
1105 ret = cma_acquire_dev(conn_id);
1106 mutex_unlock(&lock);
1107 if (ret) {
1108 cma_release_remove(conn_id);
1109 rdma_destroy_id(new_cm_id);
1110 goto out;
1111 }
1112
1113 conn_id->cm_id.iw = cm_id;
1114 cm_id->context = conn_id;
1115 cm_id->cm_handler = cma_iw_handler;
1116
1117 sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1118 *sin = iw_event->local_addr;
1119 sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1120 *sin = iw_event->remote_addr;
1121
1122 ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
1123 iw_event->private_data,
1124 iw_event->private_data_len);
1125 if (ret) {
1126 /* User wants to destroy the CM ID */
1127 conn_id->cm_id.iw = NULL;
1128 cma_exch(conn_id, CMA_DESTROYING);
1129 cma_release_remove(conn_id);
1130 rdma_destroy_id(&conn_id->id);
1131 }
1132
1133out:
1134 if (dev)
1135 dev_put(dev);
1136 cma_release_remove(listen_id);
1137 return ret;
1138}
1139
985static int cma_ib_listen(struct rdma_id_private *id_priv) 1140static int cma_ib_listen(struct rdma_id_private *id_priv)
986{ 1141{
987 struct ib_cm_compare_data compare_data; 1142 struct ib_cm_compare_data compare_data;
@@ -1011,6 +1166,30 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
1011 return ret; 1166 return ret;
1012} 1167}
1013 1168
1169static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1170{
1171 int ret;
1172 struct sockaddr_in *sin;
1173
1174 id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
1175 iw_conn_req_handler,
1176 id_priv);
1177 if (IS_ERR(id_priv->cm_id.iw))
1178 return PTR_ERR(id_priv->cm_id.iw);
1179
1180 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1181 id_priv->cm_id.iw->local_addr = *sin;
1182
1183 ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1184
1185 if (ret) {
1186 iw_destroy_cm_id(id_priv->cm_id.iw);
1187 id_priv->cm_id.iw = NULL;
1188 }
1189
1190 return ret;
1191}
1192
1014static int cma_listen_handler(struct rdma_cm_id *id, 1193static int cma_listen_handler(struct rdma_cm_id *id,
1015 struct rdma_cm_event *event) 1194 struct rdma_cm_event *event)
1016{ 1195{
@@ -1087,12 +1266,17 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
1087 1266
1088 id_priv->backlog = backlog; 1267 id_priv->backlog = backlog;
1089 if (id->device) { 1268 if (id->device) {
1090 switch (id->device->node_type) { 1269 switch (rdma_node_get_transport(id->device->node_type)) {
1091 case IB_NODE_CA: 1270 case RDMA_TRANSPORT_IB:
1092 ret = cma_ib_listen(id_priv); 1271 ret = cma_ib_listen(id_priv);
1093 if (ret) 1272 if (ret)
1094 goto err; 1273 goto err;
1095 break; 1274 break;
1275 case RDMA_TRANSPORT_IWARP:
1276 ret = cma_iw_listen(id_priv, backlog);
1277 if (ret)
1278 goto err;
1279 break;
1096 default: 1280 default:
1097 ret = -ENOSYS; 1281 ret = -ENOSYS;
1098 goto err; 1282 goto err;
@@ -1140,7 +1324,7 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1140 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr)); 1324 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
1141 path_rec.numb_path = 1; 1325 path_rec.numb_path = 1;
1142 1326
1143 id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device, 1327 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1144 id_priv->id.port_num, &path_rec, 1328 id_priv->id.port_num, &path_rec,
1145 IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 1329 IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1146 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH, 1330 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
@@ -1231,6 +1415,23 @@ err:
1231} 1415}
1232EXPORT_SYMBOL(rdma_set_ib_paths); 1416EXPORT_SYMBOL(rdma_set_ib_paths);
1233 1417
1418static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1419{
1420 struct cma_work *work;
1421
1422 work = kzalloc(sizeof *work, GFP_KERNEL);
1423 if (!work)
1424 return -ENOMEM;
1425
1426 work->id = id_priv;
1427 INIT_WORK(&work->work, cma_work_handler, work);
1428 work->old_state = CMA_ROUTE_QUERY;
1429 work->new_state = CMA_ROUTE_RESOLVED;
1430 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1431 queue_work(cma_wq, &work->work);
1432 return 0;
1433}
1434
1234int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 1435int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1235{ 1436{
1236 struct rdma_id_private *id_priv; 1437 struct rdma_id_private *id_priv;
@@ -1241,10 +1442,13 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1241 return -EINVAL; 1442 return -EINVAL;
1242 1443
1243 atomic_inc(&id_priv->refcount); 1444 atomic_inc(&id_priv->refcount);
1244 switch (id->device->node_type) { 1445 switch (rdma_node_get_transport(id->device->node_type)) {
1245 case IB_NODE_CA: 1446 case RDMA_TRANSPORT_IB:
1246 ret = cma_resolve_ib_route(id_priv, timeout_ms); 1447 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1247 break; 1448 break;
1449 case RDMA_TRANSPORT_IWARP:
1450 ret = cma_resolve_iw_route(id_priv, timeout_ms);
1451 break;
1248 default: 1452 default:
1249 ret = -ENOSYS; 1453 ret = -ENOSYS;
1250 break; 1454 break;
@@ -1309,16 +1513,26 @@ static void addr_handler(int status, struct sockaddr *src_addr,
1309 enum rdma_cm_event_type event; 1513 enum rdma_cm_event_type event;
1310 1514
1311 atomic_inc(&id_priv->dev_remove); 1515 atomic_inc(&id_priv->dev_remove);
1312 if (!id_priv->cma_dev && !status) 1516
1517 /*
1518 * Grab mutex to block rdma_destroy_id() from removing the device while
1519 * we're trying to acquire it.
1520 */
1521 mutex_lock(&lock);
1522 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1523 mutex_unlock(&lock);
1524 goto out;
1525 }
1526
1527 if (!status && !id_priv->cma_dev)
1313 status = cma_acquire_dev(id_priv); 1528 status = cma_acquire_dev(id_priv);
1529 mutex_unlock(&lock);
1314 1530
1315 if (status) { 1531 if (status) {
1316 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND)) 1532 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1317 goto out; 1533 goto out;
1318 event = RDMA_CM_EVENT_ADDR_ERROR; 1534 event = RDMA_CM_EVENT_ADDR_ERROR;
1319 } else { 1535 } else {
1320 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
1321 goto out;
1322 memcpy(&id_priv->id.route.addr.src_addr, src_addr, 1536 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1323 ip_addr_size(src_addr)); 1537 ip_addr_size(src_addr));
1324 event = RDMA_CM_EVENT_ADDR_RESOLVED; 1538 event = RDMA_CM_EVENT_ADDR_RESOLVED;
@@ -1492,7 +1706,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
1492 hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { 1706 hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
1493 if (cma_any_addr(&cur_id->id.route.addr.src_addr)) 1707 if (cma_any_addr(&cur_id->id.route.addr.src_addr))
1494 return -EADDRNOTAVAIL; 1708 return -EADDRNOTAVAIL;
1495 1709
1496 cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr; 1710 cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
1497 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr) 1711 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
1498 return -EADDRINUSE; 1712 return -EADDRINUSE;
@@ -1542,8 +1756,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
1542 1756
1543 if (!cma_any_addr(addr)) { 1757 if (!cma_any_addr(addr)) {
1544 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); 1758 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
1545 if (!ret) 1759 if (!ret) {
1760 mutex_lock(&lock);
1546 ret = cma_acquire_dev(id_priv); 1761 ret = cma_acquire_dev(id_priv);
1762 mutex_unlock(&lock);
1763 }
1547 if (ret) 1764 if (ret)
1548 goto err; 1765 goto err;
1549 } 1766 }
@@ -1649,6 +1866,47 @@ out:
1649 return ret; 1866 return ret;
1650} 1867}
1651 1868
1869static int cma_connect_iw(struct rdma_id_private *id_priv,
1870 struct rdma_conn_param *conn_param)
1871{
1872 struct iw_cm_id *cm_id;
1873 struct sockaddr_in* sin;
1874 int ret;
1875 struct iw_cm_conn_param iw_param;
1876
1877 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
1878 if (IS_ERR(cm_id)) {
1879 ret = PTR_ERR(cm_id);
1880 goto out;
1881 }
1882
1883 id_priv->cm_id.iw = cm_id;
1884
1885 sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
1886 cm_id->local_addr = *sin;
1887
1888 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
1889 cm_id->remote_addr = *sin;
1890
1891 ret = cma_modify_qp_rtr(&id_priv->id);
1892 if (ret) {
1893 iw_destroy_cm_id(cm_id);
1894 return ret;
1895 }
1896
1897 iw_param.ord = conn_param->initiator_depth;
1898 iw_param.ird = conn_param->responder_resources;
1899 iw_param.private_data = conn_param->private_data;
1900 iw_param.private_data_len = conn_param->private_data_len;
1901 if (id_priv->id.qp)
1902 iw_param.qpn = id_priv->qp_num;
1903 else
1904 iw_param.qpn = conn_param->qp_num;
1905 ret = iw_cm_connect(cm_id, &iw_param);
1906out:
1907 return ret;
1908}
1909
1652int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 1910int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1653{ 1911{
1654 struct rdma_id_private *id_priv; 1912 struct rdma_id_private *id_priv;
@@ -1664,10 +1922,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1664 id_priv->srq = conn_param->srq; 1922 id_priv->srq = conn_param->srq;
1665 } 1923 }
1666 1924
1667 switch (id->device->node_type) { 1925 switch (rdma_node_get_transport(id->device->node_type)) {
1668 case IB_NODE_CA: 1926 case RDMA_TRANSPORT_IB:
1669 ret = cma_connect_ib(id_priv, conn_param); 1927 ret = cma_connect_ib(id_priv, conn_param);
1670 break; 1928 break;
1929 case RDMA_TRANSPORT_IWARP:
1930 ret = cma_connect_iw(id_priv, conn_param);
1931 break;
1671 default: 1932 default:
1672 ret = -ENOSYS; 1933 ret = -ENOSYS;
1673 break; 1934 break;
@@ -1708,6 +1969,28 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
1708 return ib_send_cm_rep(id_priv->cm_id.ib, &rep); 1969 return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
1709} 1970}
1710 1971
1972static int cma_accept_iw(struct rdma_id_private *id_priv,
1973 struct rdma_conn_param *conn_param)
1974{
1975 struct iw_cm_conn_param iw_param;
1976 int ret;
1977
1978 ret = cma_modify_qp_rtr(&id_priv->id);
1979 if (ret)
1980 return ret;
1981
1982 iw_param.ord = conn_param->initiator_depth;
1983 iw_param.ird = conn_param->responder_resources;
1984 iw_param.private_data = conn_param->private_data;
1985 iw_param.private_data_len = conn_param->private_data_len;
1986 if (id_priv->id.qp) {
1987 iw_param.qpn = id_priv->qp_num;
1988 } else
1989 iw_param.qpn = conn_param->qp_num;
1990
1991 return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
1992}
1993
1711int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 1994int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1712{ 1995{
1713 struct rdma_id_private *id_priv; 1996 struct rdma_id_private *id_priv;
@@ -1723,13 +2006,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
1723 id_priv->srq = conn_param->srq; 2006 id_priv->srq = conn_param->srq;
1724 } 2007 }
1725 2008
1726 switch (id->device->node_type) { 2009 switch (rdma_node_get_transport(id->device->node_type)) {
1727 case IB_NODE_CA: 2010 case RDMA_TRANSPORT_IB:
1728 if (conn_param) 2011 if (conn_param)
1729 ret = cma_accept_ib(id_priv, conn_param); 2012 ret = cma_accept_ib(id_priv, conn_param);
1730 else 2013 else
1731 ret = cma_rep_recv(id_priv); 2014 ret = cma_rep_recv(id_priv);
1732 break; 2015 break;
2016 case RDMA_TRANSPORT_IWARP:
2017 ret = cma_accept_iw(id_priv, conn_param);
2018 break;
1733 default: 2019 default:
1734 ret = -ENOSYS; 2020 ret = -ENOSYS;
1735 break; 2021 break;
@@ -1756,12 +2042,16 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
1756 if (!cma_comp(id_priv, CMA_CONNECT)) 2042 if (!cma_comp(id_priv, CMA_CONNECT))
1757 return -EINVAL; 2043 return -EINVAL;
1758 2044
1759 switch (id->device->node_type) { 2045 switch (rdma_node_get_transport(id->device->node_type)) {
1760 case IB_NODE_CA: 2046 case RDMA_TRANSPORT_IB:
1761 ret = ib_send_cm_rej(id_priv->cm_id.ib, 2047 ret = ib_send_cm_rej(id_priv->cm_id.ib,
1762 IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, 2048 IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1763 private_data, private_data_len); 2049 private_data, private_data_len);
1764 break; 2050 break;
2051 case RDMA_TRANSPORT_IWARP:
2052 ret = iw_cm_reject(id_priv->cm_id.iw,
2053 private_data, private_data_len);
2054 break;
1765 default: 2055 default:
1766 ret = -ENOSYS; 2056 ret = -ENOSYS;
1767 break; 2057 break;
@@ -1780,17 +2070,20 @@ int rdma_disconnect(struct rdma_cm_id *id)
1780 !cma_comp(id_priv, CMA_DISCONNECT)) 2070 !cma_comp(id_priv, CMA_DISCONNECT))
1781 return -EINVAL; 2071 return -EINVAL;
1782 2072
1783 ret = cma_modify_qp_err(id); 2073 switch (rdma_node_get_transport(id->device->node_type)) {
1784 if (ret) 2074 case RDMA_TRANSPORT_IB:
1785 goto out; 2075 ret = cma_modify_qp_err(id);
1786 2076 if (ret)
1787 switch (id->device->node_type) { 2077 goto out;
1788 case IB_NODE_CA:
1789 /* Initiate or respond to a disconnect. */ 2078 /* Initiate or respond to a disconnect. */
1790 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 2079 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
1791 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 2080 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
1792 break; 2081 break;
2082 case RDMA_TRANSPORT_IWARP:
2083 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2084 break;
1793 default: 2085 default:
2086 ret = -EINVAL;
1794 break; 2087 break;
1795 } 2088 }
1796out: 2089out:
@@ -1907,12 +2200,15 @@ static int cma_init(void)
1907 if (!cma_wq) 2200 if (!cma_wq)
1908 return -ENOMEM; 2201 return -ENOMEM;
1909 2202
2203 ib_sa_register_client(&sa_client);
2204
1910 ret = ib_register_client(&cma_client); 2205 ret = ib_register_client(&cma_client);
1911 if (ret) 2206 if (ret)
1912 goto err; 2207 goto err;
1913 return 0; 2208 return 0;
1914 2209
1915err: 2210err:
2211 ib_sa_unregister_client(&sa_client);
1916 destroy_workqueue(cma_wq); 2212 destroy_workqueue(cma_wq);
1917 return ret; 2213 return ret;
1918} 2214}
@@ -1920,6 +2216,7 @@ err:
1920static void cma_cleanup(void) 2216static void cma_cleanup(void)
1921{ 2217{
1922 ib_unregister_client(&cma_client); 2218 ib_unregister_client(&cma_client);
2219 ib_sa_unregister_client(&sa_client);
1923 destroy_workqueue(cma_wq); 2220 destroy_workqueue(cma_wq);
1924 idr_destroy(&sdp_ps); 2221 idr_destroy(&sdp_ps);
1925 idr_destroy(&tcp_ps); 2222 idr_destroy(&tcp_ps);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b2f3cb91d9bc..63d2a39fb82c 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -385,7 +385,7 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
385EXPORT_SYMBOL(ib_get_client_data); 385EXPORT_SYMBOL(ib_get_client_data);
386 386
387/** 387/**
388 * ib_set_client_data - Get IB client context 388 * ib_set_client_data - Set IB client context
389 * @device:Device to set context for 389 * @device:Device to set context for
390 * @client:Client to set context for 390 * @client:Client to set context for
391 * @data:Context to set 391 * @data:Context to set
@@ -505,7 +505,7 @@ int ib_query_port(struct ib_device *device,
505 u8 port_num, 505 u8 port_num,
506 struct ib_port_attr *port_attr) 506 struct ib_port_attr *port_attr)
507{ 507{
508 if (device->node_type == IB_NODE_SWITCH) { 508 if (device->node_type == RDMA_NODE_IB_SWITCH) {
509 if (port_num) 509 if (port_num)
510 return -EINVAL; 510 return -EINVAL;
511 } else if (port_num < 1 || port_num > device->phys_port_cnt) 511 } else if (port_num < 1 || port_num > device->phys_port_cnt)
@@ -580,7 +580,7 @@ int ib_modify_port(struct ib_device *device,
580 u8 port_num, int port_modify_mask, 580 u8 port_num, int port_modify_mask,
581 struct ib_port_modify *port_modify) 581 struct ib_port_modify *port_modify)
582{ 582{
583 if (device->node_type == IB_NODE_SWITCH) { 583 if (device->node_type == RDMA_NODE_IB_SWITCH) {
584 if (port_num) 584 if (port_num)
585 return -EINVAL; 585 return -EINVAL;
586 } else if (port_num < 1 || port_num > device->phys_port_cnt) 586 } else if (port_num < 1 || port_num > device->phys_port_cnt)
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
new file mode 100644
index 000000000000..c3fb304a4e86
--- /dev/null
+++ b/drivers/infiniband/core/iwcm.c
@@ -0,0 +1,1019 @@
1/*
2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
8 *
9 * This software is available to you under a choice of one of two
10 * licenses. You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
14 *
15 * Redistribution and use in source and binary forms, with or
16 * without modification, are permitted provided that the following
17 * conditions are met:
18 *
19 * - Redistributions of source code must retain the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer.
22 *
23 * - Redistributions in binary form must reproduce the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer in the documentation and/or other materials
26 * provided with the distribution.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 * SOFTWARE.
36 *
37 */
38#include <linux/dma-mapping.h>
39#include <linux/err.h>
40#include <linux/idr.h>
41#include <linux/interrupt.h>
42#include <linux/pci.h>
43#include <linux/rbtree.h>
44#include <linux/spinlock.h>
45#include <linux/workqueue.h>
46#include <linux/completion.h>
47
48#include <rdma/iw_cm.h>
49#include <rdma/ib_addr.h>
50
51#include "iwcm.h"
52
53MODULE_AUTHOR("Tom Tucker");
54MODULE_DESCRIPTION("iWARP CM");
55MODULE_LICENSE("Dual BSD/GPL");
56
57static struct workqueue_struct *iwcm_wq;
58struct iwcm_work {
59 struct work_struct work;
60 struct iwcm_id_private *cm_id;
61 struct list_head list;
62 struct iw_cm_event event;
63 struct list_head free_list;
64};
65
66/*
67 * The following services provide a mechanism for pre-allocating iwcm_work
68 * elements. The design pre-allocates them based on the cm_id type:
69 * LISTENING IDS: Get enough elements preallocated to handle the
70 * listen backlog.
71 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
72 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE
73 *
74 * Allocating them in connect and listen avoids having to deal
75 * with allocation failures on the event upcall from the provider (which
76 * is called in the interrupt context).
77 *
78 * One exception is when creating the cm_id for incoming connection requests.
79 * There are two cases:
80 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If
81 * the backlog is exceeded, then no more connection request events will
82 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up
83 * to the provider to reject the connectino request.
84 * 2) in the connection request workqueue handler, cm_conn_req_handler().
85 * If work elements cannot be allocated for the new connect request cm_id,
86 * then IWCM will call the provider reject method. This is ok since
87 * cm_conn_req_handler() runs in the workqueue thread context.
88 */
89
90static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
91{
92 struct iwcm_work *work;
93
94 if (list_empty(&cm_id_priv->work_free_list))
95 return NULL;
96 work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
97 free_list);
98 list_del_init(&work->free_list);
99 return work;
100}
101
102static void put_work(struct iwcm_work *work)
103{
104 list_add(&work->free_list, &work->cm_id->work_free_list);
105}
106
107static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
108{
109 struct list_head *e, *tmp;
110
111 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
112 kfree(list_entry(e, struct iwcm_work, free_list));
113}
114
115static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
116{
117 struct iwcm_work *work;
118
119 BUG_ON(!list_empty(&cm_id_priv->work_free_list));
120 while (count--) {
121 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
122 if (!work) {
123 dealloc_work_entries(cm_id_priv);
124 return -ENOMEM;
125 }
126 work->cm_id = cm_id_priv;
127 INIT_LIST_HEAD(&work->list);
128 put_work(work);
129 }
130 return 0;
131}
132
133/*
134 * Save private data from incoming connection requests in the
135 * cm_id_priv so the low level driver doesn't have to. Adjust
136 * the event ptr to point to the local copy.
137 */
138static int copy_private_data(struct iwcm_id_private *cm_id_priv,
139 struct iw_cm_event *event)
140{
141 void *p;
142
143 p = kmalloc(event->private_data_len, GFP_ATOMIC);
144 if (!p)
145 return -ENOMEM;
146 memcpy(p, event->private_data, event->private_data_len);
147 event->private_data = p;
148 return 0;
149}
150
151/*
152 * Release a reference on cm_id. If the last reference is being removed
153 * and iw_destroy_cm_id is waiting, wake up the waiting thread.
154 */
155static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
156{
157 int ret = 0;
158
159 BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
160 if (atomic_dec_and_test(&cm_id_priv->refcount)) {
161 BUG_ON(!list_empty(&cm_id_priv->work_list));
162 if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
163 BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
164 BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
165 &cm_id_priv->flags));
166 ret = 1;
167 }
168 complete(&cm_id_priv->destroy_comp);
169 }
170
171 return ret;
172}
173
174static void add_ref(struct iw_cm_id *cm_id)
175{
176 struct iwcm_id_private *cm_id_priv;
177 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
178 atomic_inc(&cm_id_priv->refcount);
179}
180
181static void rem_ref(struct iw_cm_id *cm_id)
182{
183 struct iwcm_id_private *cm_id_priv;
184 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
185 iwcm_deref_id(cm_id_priv);
186}
187
188static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
189
190struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
191 iw_cm_handler cm_handler,
192 void *context)
193{
194 struct iwcm_id_private *cm_id_priv;
195
196 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
197 if (!cm_id_priv)
198 return ERR_PTR(-ENOMEM);
199
200 cm_id_priv->state = IW_CM_STATE_IDLE;
201 cm_id_priv->id.device = device;
202 cm_id_priv->id.cm_handler = cm_handler;
203 cm_id_priv->id.context = context;
204 cm_id_priv->id.event_handler = cm_event_handler;
205 cm_id_priv->id.add_ref = add_ref;
206 cm_id_priv->id.rem_ref = rem_ref;
207 spin_lock_init(&cm_id_priv->lock);
208 atomic_set(&cm_id_priv->refcount, 1);
209 init_waitqueue_head(&cm_id_priv->connect_wait);
210 init_completion(&cm_id_priv->destroy_comp);
211 INIT_LIST_HEAD(&cm_id_priv->work_list);
212 INIT_LIST_HEAD(&cm_id_priv->work_free_list);
213
214 return &cm_id_priv->id;
215}
216EXPORT_SYMBOL(iw_create_cm_id);
217
218
219static int iwcm_modify_qp_err(struct ib_qp *qp)
220{
221 struct ib_qp_attr qp_attr;
222
223 if (!qp)
224 return -EINVAL;
225
226 qp_attr.qp_state = IB_QPS_ERR;
227 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
228}
229
230/*
231 * This is really the RDMAC CLOSING state. It is most similar to the
232 * IB SQD QP state.
233 */
234static int iwcm_modify_qp_sqd(struct ib_qp *qp)
235{
236 struct ib_qp_attr qp_attr;
237
238 BUG_ON(qp == NULL);
239 qp_attr.qp_state = IB_QPS_SQD;
240 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
241}
242
243/*
244 * CM_ID <-- CLOSING
245 *
246 * Block if a passive or active connection is currenlty being processed. Then
247 * process the event as follows:
248 * - If we are ESTABLISHED, move to CLOSING and modify the QP state
249 * based on the abrupt flag
250 * - If the connection is already in the CLOSING or IDLE state, the peer is
251 * disconnecting concurrently with us and we've already seen the
252 * DISCONNECT event -- ignore the request and return 0
253 * - Disconnect on a listening endpoint returns -EINVAL
254 */
255int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
256{
257 struct iwcm_id_private *cm_id_priv;
258 unsigned long flags;
259 int ret = 0;
260 struct ib_qp *qp = NULL;
261
262 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
263 /* Wait if we're currently in a connect or accept downcall */
264 wait_event(cm_id_priv->connect_wait,
265 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
266
267 spin_lock_irqsave(&cm_id_priv->lock, flags);
268 switch (cm_id_priv->state) {
269 case IW_CM_STATE_ESTABLISHED:
270 cm_id_priv->state = IW_CM_STATE_CLOSING;
271
272 /* QP could be <nul> for user-mode client */
273 if (cm_id_priv->qp)
274 qp = cm_id_priv->qp;
275 else
276 ret = -EINVAL;
277 break;
278 case IW_CM_STATE_LISTEN:
279 ret = -EINVAL;
280 break;
281 case IW_CM_STATE_CLOSING:
282 /* remote peer closed first */
283 case IW_CM_STATE_IDLE:
284 /* accept or connect returned !0 */
285 break;
286 case IW_CM_STATE_CONN_RECV:
287 /*
288 * App called disconnect before/without calling accept after
289 * connect_request event delivered.
290 */
291 break;
292 case IW_CM_STATE_CONN_SENT:
293 /* Can only get here if wait above fails */
294 default:
295 BUG();
296 }
297 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
298
299 if (qp) {
300 if (abrupt)
301 ret = iwcm_modify_qp_err(qp);
302 else
303 ret = iwcm_modify_qp_sqd(qp);
304
305 /*
306 * If both sides are disconnecting the QP could
307 * already be in ERR or SQD states
308 */
309 ret = 0;
310 }
311
312 return ret;
313}
314EXPORT_SYMBOL(iw_cm_disconnect);
315
316/*
317 * CM_ID <-- DESTROYING
318 *
319 * Clean up all resources associated with the connection and release
320 * the initial reference taken by iw_create_cm_id.
321 */
322static void destroy_cm_id(struct iw_cm_id *cm_id)
323{
324 struct iwcm_id_private *cm_id_priv;
325 unsigned long flags;
326 int ret;
327
328 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
329 /*
330 * Wait if we're currently in a connect or accept downcall. A
331 * listening endpoint should never block here.
332 */
333 wait_event(cm_id_priv->connect_wait,
334 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
335
336 spin_lock_irqsave(&cm_id_priv->lock, flags);
337 switch (cm_id_priv->state) {
338 case IW_CM_STATE_LISTEN:
339 cm_id_priv->state = IW_CM_STATE_DESTROYING;
340 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
341 /* destroy the listening endpoint */
342 ret = cm_id->device->iwcm->destroy_listen(cm_id);
343 spin_lock_irqsave(&cm_id_priv->lock, flags);
344 break;
345 case IW_CM_STATE_ESTABLISHED:
346 cm_id_priv->state = IW_CM_STATE_DESTROYING;
347 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
348 /* Abrupt close of the connection */
349 (void)iwcm_modify_qp_err(cm_id_priv->qp);
350 spin_lock_irqsave(&cm_id_priv->lock, flags);
351 break;
352 case IW_CM_STATE_IDLE:
353 case IW_CM_STATE_CLOSING:
354 cm_id_priv->state = IW_CM_STATE_DESTROYING;
355 break;
356 case IW_CM_STATE_CONN_RECV:
357 /*
358 * App called destroy before/without calling accept after
359 * receiving connection request event notification.
360 */
361 cm_id_priv->state = IW_CM_STATE_DESTROYING;
362 break;
363 case IW_CM_STATE_CONN_SENT:
364 case IW_CM_STATE_DESTROYING:
365 default:
366 BUG();
367 break;
368 }
369 if (cm_id_priv->qp) {
370 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
371 cm_id_priv->qp = NULL;
372 }
373 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
374
375 (void)iwcm_deref_id(cm_id_priv);
376}
377
378/*
379 * This function is only called by the application thread and cannot
380 * be called by the event thread. The function will wait for all
381 * references to be released on the cm_id and then kfree the cm_id
382 * object.
383 */
384void iw_destroy_cm_id(struct iw_cm_id *cm_id)
385{
386 struct iwcm_id_private *cm_id_priv;
387
388 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
389 BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
390
391 destroy_cm_id(cm_id);
392
393 wait_for_completion(&cm_id_priv->destroy_comp);
394
395 dealloc_work_entries(cm_id_priv);
396
397 kfree(cm_id_priv);
398}
399EXPORT_SYMBOL(iw_destroy_cm_id);
400
401/*
402 * CM_ID <-- LISTEN
403 *
404 * Start listening for connect requests. Generates one CONNECT_REQUEST
405 * event for each inbound connect request.
406 */
407int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
408{
409 struct iwcm_id_private *cm_id_priv;
410 unsigned long flags;
411 int ret = 0;
412
413 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
414
415 ret = alloc_work_entries(cm_id_priv, backlog);
416 if (ret)
417 return ret;
418
419 spin_lock_irqsave(&cm_id_priv->lock, flags);
420 switch (cm_id_priv->state) {
421 case IW_CM_STATE_IDLE:
422 cm_id_priv->state = IW_CM_STATE_LISTEN;
423 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
424 ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
425 if (ret)
426 cm_id_priv->state = IW_CM_STATE_IDLE;
427 spin_lock_irqsave(&cm_id_priv->lock, flags);
428 break;
429 default:
430 ret = -EINVAL;
431 }
432 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
433
434 return ret;
435}
436EXPORT_SYMBOL(iw_cm_listen);
437
438/*
439 * CM_ID <-- IDLE
440 *
441 * Rejects an inbound connection request. No events are generated.
442 */
443int iw_cm_reject(struct iw_cm_id *cm_id,
444 const void *private_data,
445 u8 private_data_len)
446{
447 struct iwcm_id_private *cm_id_priv;
448 unsigned long flags;
449 int ret;
450
451 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
452 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
453
454 spin_lock_irqsave(&cm_id_priv->lock, flags);
455 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
456 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
457 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
458 wake_up_all(&cm_id_priv->connect_wait);
459 return -EINVAL;
460 }
461 cm_id_priv->state = IW_CM_STATE_IDLE;
462 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
463
464 ret = cm_id->device->iwcm->reject(cm_id, private_data,
465 private_data_len);
466
467 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
468 wake_up_all(&cm_id_priv->connect_wait);
469
470 return ret;
471}
472EXPORT_SYMBOL(iw_cm_reject);
473
474/*
475 * CM_ID <-- ESTABLISHED
476 *
477 * Accepts an inbound connection request and generates an ESTABLISHED
478 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
479 * until the ESTABLISHED event is received from the provider.
480 */
481int iw_cm_accept(struct iw_cm_id *cm_id,
482 struct iw_cm_conn_param *iw_param)
483{
484 struct iwcm_id_private *cm_id_priv;
485 struct ib_qp *qp;
486 unsigned long flags;
487 int ret;
488
489 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
490 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
491
492 spin_lock_irqsave(&cm_id_priv->lock, flags);
493 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
494 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
495 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
496 wake_up_all(&cm_id_priv->connect_wait);
497 return -EINVAL;
498 }
499 /* Get the ib_qp given the QPN */
500 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
501 if (!qp) {
502 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
503 return -EINVAL;
504 }
505 cm_id->device->iwcm->add_ref(qp);
506 cm_id_priv->qp = qp;
507 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
508
509 ret = cm_id->device->iwcm->accept(cm_id, iw_param);
510 if (ret) {
511 /* An error on accept precludes provider events */
512 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
513 cm_id_priv->state = IW_CM_STATE_IDLE;
514 spin_lock_irqsave(&cm_id_priv->lock, flags);
515 if (cm_id_priv->qp) {
516 cm_id->device->iwcm->rem_ref(qp);
517 cm_id_priv->qp = NULL;
518 }
519 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
520 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
521 wake_up_all(&cm_id_priv->connect_wait);
522 }
523
524 return ret;
525}
526EXPORT_SYMBOL(iw_cm_accept);
527
528/*
529 * Active Side: CM_ID <-- CONN_SENT
530 *
531 * If successful, results in the generation of a CONNECT_REPLY
532 * event. iw_cm_disconnect and iw_cm_destroy will block until the
533 * CONNECT_REPLY event is received from the provider.
534 */
535int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
536{
537 struct iwcm_id_private *cm_id_priv;
538 int ret = 0;
539 unsigned long flags;
540 struct ib_qp *qp;
541
542 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
543
544 ret = alloc_work_entries(cm_id_priv, 4);
545 if (ret)
546 return ret;
547
548 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
549 spin_lock_irqsave(&cm_id_priv->lock, flags);
550
551 if (cm_id_priv->state != IW_CM_STATE_IDLE) {
552 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
553 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
554 wake_up_all(&cm_id_priv->connect_wait);
555 return -EINVAL;
556 }
557
558 /* Get the ib_qp given the QPN */
559 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
560 if (!qp) {
561 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
562 return -EINVAL;
563 }
564 cm_id->device->iwcm->add_ref(qp);
565 cm_id_priv->qp = qp;
566 cm_id_priv->state = IW_CM_STATE_CONN_SENT;
567 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
568
569 ret = cm_id->device->iwcm->connect(cm_id, iw_param);
570 if (ret) {
571 spin_lock_irqsave(&cm_id_priv->lock, flags);
572 if (cm_id_priv->qp) {
573 cm_id->device->iwcm->rem_ref(qp);
574 cm_id_priv->qp = NULL;
575 }
576 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
577 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
578 cm_id_priv->state = IW_CM_STATE_IDLE;
579 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
580 wake_up_all(&cm_id_priv->connect_wait);
581 }
582
583 return ret;
584}
585EXPORT_SYMBOL(iw_cm_connect);
586
587/*
588 * Passive Side: new CM_ID <-- CONN_RECV
589 *
590 * Handles an inbound connect request. The function creates a new
591 * iw_cm_id to represent the new connection and inherits the client
592 * callback function and other attributes from the listening parent.
593 *
594 * The work item contains a pointer to the listen_cm_id and the event. The
595 * listen_cm_id contains the client cm_handler, context and
596 * device. These are copied when the device is cloned. The event
597 * contains the new four tuple.
598 *
599 * An error on the child should not affect the parent, so this
600 * function does not return a value.
601 */
602static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
603 struct iw_cm_event *iw_event)
604{
605 unsigned long flags;
606 struct iw_cm_id *cm_id;
607 struct iwcm_id_private *cm_id_priv;
608 int ret;
609
610 /*
611 * The provider should never generate a connection request
612 * event with a bad status.
613 */
614 BUG_ON(iw_event->status);
615
616 /*
617 * We could be destroying the listening id. If so, ignore this
618 * upcall.
619 */
620 spin_lock_irqsave(&listen_id_priv->lock, flags);
621 if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
622 spin_unlock_irqrestore(&listen_id_priv->lock, flags);
623 return;
624 }
625 spin_unlock_irqrestore(&listen_id_priv->lock, flags);
626
627 cm_id = iw_create_cm_id(listen_id_priv->id.device,
628 listen_id_priv->id.cm_handler,
629 listen_id_priv->id.context);
630 /* If the cm_id could not be created, ignore the request */
631 if (IS_ERR(cm_id))
632 return;
633
634 cm_id->provider_data = iw_event->provider_data;
635 cm_id->local_addr = iw_event->local_addr;
636 cm_id->remote_addr = iw_event->remote_addr;
637
638 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
639 cm_id_priv->state = IW_CM_STATE_CONN_RECV;
640
641 ret = alloc_work_entries(cm_id_priv, 3);
642 if (ret) {
643 iw_cm_reject(cm_id, NULL, 0);
644 iw_destroy_cm_id(cm_id);
645 return;
646 }
647
648 /* Call the client CM handler */
649 ret = cm_id->cm_handler(cm_id, iw_event);
650 if (ret) {
651 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
652 destroy_cm_id(cm_id);
653 if (atomic_read(&cm_id_priv->refcount)==0)
654 kfree(cm_id);
655 }
656
657 if (iw_event->private_data_len)
658 kfree(iw_event->private_data);
659}
660
661/*
662 * Passive Side: CM_ID <-- ESTABLISHED
663 *
664 * The provider generated an ESTABLISHED event which means that
665 * the MPA negotion has completed successfully and we are now in MPA
666 * FPDU mode.
667 *
668 * This event can only be received in the CONN_RECV state. If the
669 * remote peer closed, the ESTABLISHED event would be received followed
670 * by the CLOSE event. If the app closes, it will block until we wake
671 * it up after processing this event.
672 */
673static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
674 struct iw_cm_event *iw_event)
675{
676 unsigned long flags;
677 int ret = 0;
678
679 spin_lock_irqsave(&cm_id_priv->lock, flags);
680
681 /*
682 * We clear the CONNECT_WAIT bit here to allow the callback
683 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
684 * from a callback handler is not allowed.
685 */
686 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
687 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
688 cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
689 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
690 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
691 wake_up_all(&cm_id_priv->connect_wait);
692
693 return ret;
694}
695
696/*
697 * Active Side: CM_ID <-- ESTABLISHED
698 *
699 * The app has called connect and is waiting for the established event to
700 * post it's requests to the server. This event will wake up anyone
701 * blocked in iw_cm_disconnect or iw_destroy_id.
702 */
703static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
704 struct iw_cm_event *iw_event)
705{
706 unsigned long flags;
707 int ret = 0;
708
709 spin_lock_irqsave(&cm_id_priv->lock, flags);
710 /*
711 * Clear the connect wait bit so a callback function calling
712 * iw_cm_disconnect will not wait and deadlock this thread
713 */
714 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
715 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
716 if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
717 cm_id_priv->id.local_addr = iw_event->local_addr;
718 cm_id_priv->id.remote_addr = iw_event->remote_addr;
719 cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
720 } else {
721 /* REJECTED or RESET */
722 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
723 cm_id_priv->qp = NULL;
724 cm_id_priv->state = IW_CM_STATE_IDLE;
725 }
726 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
727 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
728
729 if (iw_event->private_data_len)
730 kfree(iw_event->private_data);
731
732 /* Wake up waiters on connect complete */
733 wake_up_all(&cm_id_priv->connect_wait);
734
735 return ret;
736}
737
738/*
739 * CM_ID <-- CLOSING
740 *
741 * If in the ESTABLISHED state, move to CLOSING.
742 */
743static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
744 struct iw_cm_event *iw_event)
745{
746 unsigned long flags;
747
748 spin_lock_irqsave(&cm_id_priv->lock, flags);
749 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
750 cm_id_priv->state = IW_CM_STATE_CLOSING;
751 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
752}
753
754/*
755 * CM_ID <-- IDLE
756 *
757 * If in the ESTBLISHED or CLOSING states, the QP will have have been
758 * moved by the provider to the ERR state. Disassociate the CM_ID from
759 * the QP, move to IDLE, and remove the 'connected' reference.
760 *
761 * If in some other state, the cm_id was destroyed asynchronously.
762 * This is the last reference that will result in waking up
763 * the app thread blocked in iw_destroy_cm_id.
764 */
765static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
766 struct iw_cm_event *iw_event)
767{
768 unsigned long flags;
769 int ret = 0;
770 spin_lock_irqsave(&cm_id_priv->lock, flags);
771
772 if (cm_id_priv->qp) {
773 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
774 cm_id_priv->qp = NULL;
775 }
776 switch (cm_id_priv->state) {
777 case IW_CM_STATE_ESTABLISHED:
778 case IW_CM_STATE_CLOSING:
779 cm_id_priv->state = IW_CM_STATE_IDLE;
780 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
781 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
782 spin_lock_irqsave(&cm_id_priv->lock, flags);
783 break;
784 case IW_CM_STATE_DESTROYING:
785 break;
786 default:
787 BUG();
788 }
789 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
790
791 return ret;
792}
793
794static int process_event(struct iwcm_id_private *cm_id_priv,
795 struct iw_cm_event *iw_event)
796{
797 int ret = 0;
798
799 switch (iw_event->event) {
800 case IW_CM_EVENT_CONNECT_REQUEST:
801 cm_conn_req_handler(cm_id_priv, iw_event);
802 break;
803 case IW_CM_EVENT_CONNECT_REPLY:
804 ret = cm_conn_rep_handler(cm_id_priv, iw_event);
805 break;
806 case IW_CM_EVENT_ESTABLISHED:
807 ret = cm_conn_est_handler(cm_id_priv, iw_event);
808 break;
809 case IW_CM_EVENT_DISCONNECT:
810 cm_disconnect_handler(cm_id_priv, iw_event);
811 break;
812 case IW_CM_EVENT_CLOSE:
813 ret = cm_close_handler(cm_id_priv, iw_event);
814 break;
815 default:
816 BUG();
817 }
818
819 return ret;
820}
821
822/*
823 * Process events on the work_list for the cm_id. If the callback
824 * function requests that the cm_id be deleted, a flag is set in the
825 * cm_id flags to indicate that when the last reference is
826 * removed, the cm_id is to be destroyed. This is necessary to
827 * distinguish between an object that will be destroyed by the app
828 * thread asleep on the destroy_comp list vs. an object destroyed
829 * here synchronously when the last reference is removed.
830 */
831static void cm_work_handler(void *arg)
832{
833 struct iwcm_work *work = arg, lwork;
834 struct iwcm_id_private *cm_id_priv = work->cm_id;
835 unsigned long flags;
836 int empty;
837 int ret = 0;
838
839 spin_lock_irqsave(&cm_id_priv->lock, flags);
840 empty = list_empty(&cm_id_priv->work_list);
841 while (!empty) {
842 work = list_entry(cm_id_priv->work_list.next,
843 struct iwcm_work, list);
844 list_del_init(&work->list);
845 empty = list_empty(&cm_id_priv->work_list);
846 lwork = *work;
847 put_work(work);
848 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
849
850 ret = process_event(cm_id_priv, &work->event);
851 if (ret) {
852 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
853 destroy_cm_id(&cm_id_priv->id);
854 }
855 BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
856 if (iwcm_deref_id(cm_id_priv))
857 return;
858
859 if (atomic_read(&cm_id_priv->refcount)==0 &&
860 test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
861 dealloc_work_entries(cm_id_priv);
862 kfree(cm_id_priv);
863 return;
864 }
865 spin_lock_irqsave(&cm_id_priv->lock, flags);
866 }
867 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
868}
869
870/*
871 * This function is called on interrupt context. Schedule events on
872 * the iwcm_wq thread to allow callback functions to downcall into
873 * the CM and/or block. Events are queued to a per-CM_ID
874 * work_list. If this is the first event on the work_list, the work
875 * element is also queued on the iwcm_wq thread.
876 *
877 * Each event holds a reference on the cm_id. Until the last posted
878 * event has been delivered and processed, the cm_id cannot be
879 * deleted.
880 *
881 * Returns:
882 * 0 - the event was handled.
883 * -ENOMEM - the event was not handled due to lack of resources.
884 */
885static int cm_event_handler(struct iw_cm_id *cm_id,
886 struct iw_cm_event *iw_event)
887{
888 struct iwcm_work *work;
889 struct iwcm_id_private *cm_id_priv;
890 unsigned long flags;
891 int ret = 0;
892
893 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
894
895 spin_lock_irqsave(&cm_id_priv->lock, flags);
896 work = get_work(cm_id_priv);
897 if (!work) {
898 ret = -ENOMEM;
899 goto out;
900 }
901
902 INIT_WORK(&work->work, cm_work_handler, work);
903 work->cm_id = cm_id_priv;
904 work->event = *iw_event;
905
906 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
907 work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
908 work->event.private_data_len) {
909 ret = copy_private_data(cm_id_priv, &work->event);
910 if (ret) {
911 put_work(work);
912 goto out;
913 }
914 }
915
916 atomic_inc(&cm_id_priv->refcount);
917 if (list_empty(&cm_id_priv->work_list)) {
918 list_add_tail(&work->list, &cm_id_priv->work_list);
919 queue_work(iwcm_wq, &work->work);
920 } else
921 list_add_tail(&work->list, &cm_id_priv->work_list);
922out:
923 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
924 return ret;
925}
926
927static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
928 struct ib_qp_attr *qp_attr,
929 int *qp_attr_mask)
930{
931 unsigned long flags;
932 int ret;
933
934 spin_lock_irqsave(&cm_id_priv->lock, flags);
935 switch (cm_id_priv->state) {
936 case IW_CM_STATE_IDLE:
937 case IW_CM_STATE_CONN_SENT:
938 case IW_CM_STATE_CONN_RECV:
939 case IW_CM_STATE_ESTABLISHED:
940 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
941 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
942 IB_ACCESS_REMOTE_WRITE|
943 IB_ACCESS_REMOTE_READ;
944 ret = 0;
945 break;
946 default:
947 ret = -EINVAL;
948 break;
949 }
950 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
951 return ret;
952}
953
954static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
955 struct ib_qp_attr *qp_attr,
956 int *qp_attr_mask)
957{
958 unsigned long flags;
959 int ret;
960
961 spin_lock_irqsave(&cm_id_priv->lock, flags);
962 switch (cm_id_priv->state) {
963 case IW_CM_STATE_IDLE:
964 case IW_CM_STATE_CONN_SENT:
965 case IW_CM_STATE_CONN_RECV:
966 case IW_CM_STATE_ESTABLISHED:
967 *qp_attr_mask = 0;
968 ret = 0;
969 break;
970 default:
971 ret = -EINVAL;
972 break;
973 }
974 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
975 return ret;
976}
977
978int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
979 struct ib_qp_attr *qp_attr,
980 int *qp_attr_mask)
981{
982 struct iwcm_id_private *cm_id_priv;
983 int ret;
984
985 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
986 switch (qp_attr->qp_state) {
987 case IB_QPS_INIT:
988 case IB_QPS_RTR:
989 ret = iwcm_init_qp_init_attr(cm_id_priv,
990 qp_attr, qp_attr_mask);
991 break;
992 case IB_QPS_RTS:
993 ret = iwcm_init_qp_rts_attr(cm_id_priv,
994 qp_attr, qp_attr_mask);
995 break;
996 default:
997 ret = -EINVAL;
998 break;
999 }
1000 return ret;
1001}
1002EXPORT_SYMBOL(iw_cm_init_qp_attr);
1003
1004static int __init iw_cm_init(void)
1005{
1006 iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
1007 if (!iwcm_wq)
1008 return -ENOMEM;
1009
1010 return 0;
1011}
1012
1013static void __exit iw_cm_cleanup(void)
1014{
1015 destroy_workqueue(iwcm_wq);
1016}
1017
1018module_init(iw_cm_init);
1019module_exit(iw_cm_cleanup);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
new file mode 100644
index 000000000000..3f6cc82564c8
--- /dev/null
+++ b/drivers/infiniband/core/iwcm.h
@@ -0,0 +1,62 @@
1/*
2 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef IWCM_H
34#define IWCM_H
35
36enum iw_cm_state {
37 IW_CM_STATE_IDLE, /* unbound, inactive */
38 IW_CM_STATE_LISTEN, /* listen waiting for connect */
39 IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */
40 IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */
41 IW_CM_STATE_ESTABLISHED, /* established */
42 IW_CM_STATE_CLOSING, /* disconnect */
43 IW_CM_STATE_DESTROYING /* object being deleted */
44};
45
46struct iwcm_id_private {
47 struct iw_cm_id id;
48 enum iw_cm_state state;
49 unsigned long flags;
50 struct ib_qp *qp;
51 struct completion destroy_comp;
52 wait_queue_head_t connect_wait;
53 struct list_head work_list;
54 spinlock_t lock;
55 atomic_t refcount;
56 struct list_head work_free_list;
57};
58
59#define IWCM_F_CALLBACK_DESTROY 1
60#define IWCM_F_CONNECT_WAIT 2
61
62#endif /* IWCM_H */
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 1c3cfbbe6a97..082f03c158f0 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1246,8 +1246,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1246 int i; 1246 int i;
1247 1247
1248 for (i = 0; i < MAX_MGMT_OUI; i++) 1248 for (i = 0; i < MAX_MGMT_OUI; i++)
1249 /* Is there matching OUI for this vendor class ? */ 1249 /* Is there matching OUI for this vendor class ? */
1250 if (!memcmp(vendor_class->oui[i], oui, 3)) 1250 if (!memcmp(vendor_class->oui[i], oui, 3))
1251 return i; 1251 return i;
1252 1252
1253 return -1; 1253 return -1;
@@ -2237,7 +2237,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2237 list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, 2237 list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2238 &mad_agent_priv->send_list, agent_list) { 2238 &mad_agent_priv->send_list, agent_list) {
2239 if (mad_send_wr->status == IB_WC_SUCCESS) { 2239 if (mad_send_wr->status == IB_WC_SUCCESS) {
2240 mad_send_wr->status = IB_WC_WR_FLUSH_ERR; 2240 mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2241 mad_send_wr->refcount -= (mad_send_wr->timeout > 0); 2241 mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2242 } 2242 }
2243 } 2243 }
@@ -2528,10 +2528,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2528 } 2528 }
2529 } 2529 }
2530 sg_list.addr = dma_map_single(qp_info->port_priv-> 2530 sg_list.addr = dma_map_single(qp_info->port_priv->
2531 device->dma_device, 2531 device->dma_device,
2532 &mad_priv->grh, 2532 &mad_priv->grh,
2533 sizeof *mad_priv - 2533 sizeof *mad_priv -
2534 sizeof mad_priv->header, 2534 sizeof mad_priv->header,
2535 DMA_FROM_DEVICE); 2535 DMA_FROM_DEVICE);
2536 pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr); 2536 pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
2537 recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; 2537 recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
@@ -2606,7 +2606,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2606 struct ib_qp *qp; 2606 struct ib_qp *qp;
2607 2607
2608 attr = kmalloc(sizeof *attr, GFP_KERNEL); 2608 attr = kmalloc(sizeof *attr, GFP_KERNEL);
2609 if (!attr) { 2609 if (!attr) {
2610 printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); 2610 printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
2611 return -ENOMEM; 2611 return -ENOMEM;
2612 } 2612 }
@@ -2876,7 +2876,10 @@ static void ib_mad_init_device(struct ib_device *device)
2876{ 2876{
2877 int start, end, i; 2877 int start, end, i;
2878 2878
2879 if (device->node_type == IB_NODE_SWITCH) { 2879 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
2880 return;
2881
2882 if (device->node_type == RDMA_NODE_IB_SWITCH) {
2880 start = 0; 2883 start = 0;
2881 end = 0; 2884 end = 0;
2882 } else { 2885 } else {
@@ -2923,7 +2926,7 @@ static void ib_mad_remove_device(struct ib_device *device)
2923{ 2926{
2924 int i, num_ports, cur_port; 2927 int i, num_ports, cur_port;
2925 2928
2926 if (device->node_type == IB_NODE_SWITCH) { 2929 if (device->node_type == RDMA_NODE_IB_SWITCH) {
2927 num_ports = 1; 2930 num_ports = 1;
2928 cur_port = 0; 2931 cur_port = 0;
2929 } else { 2932 } else {
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index d147f3bad2ce..1da9adbccaec 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -39,7 +39,6 @@
39 39
40#include <linux/completion.h> 40#include <linux/completion.h>
41#include <linux/pci.h> 41#include <linux/pci.h>
42#include <linux/kthread.h>
43#include <linux/workqueue.h> 42#include <linux/workqueue.h>
44#include <rdma/ib_mad.h> 43#include <rdma/ib_mad.h>
45#include <rdma/ib_smi.h> 44#include <rdma/ib_smi.h>
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index ebcd5b181770..1ef79d015a1e 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -33,8 +33,6 @@
33 * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $ 33 * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
34 */ 34 */
35 35
36#include <linux/dma-mapping.h>
37
38#include "mad_priv.h" 36#include "mad_priv.h"
39#include "mad_rmpp.h" 37#include "mad_rmpp.h"
40 38
@@ -60,6 +58,7 @@ struct mad_rmpp_recv {
60 int last_ack; 58 int last_ack;
61 int seg_num; 59 int seg_num;
62 int newwin; 60 int newwin;
61 int repwin;
63 62
64 __be64 tid; 63 __be64 tid;
65 u32 src_qp; 64 u32 src_qp;
@@ -170,6 +169,32 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
170 return msg; 169 return msg;
171} 170}
172 171
172static void ack_ds_ack(struct ib_mad_agent_private *agent,
173 struct ib_mad_recv_wc *recv_wc)
174{
175 struct ib_mad_send_buf *msg;
176 struct ib_rmpp_mad *rmpp_mad;
177 int ret;
178
179 msg = alloc_response_msg(&agent->agent, recv_wc);
180 if (IS_ERR(msg))
181 return;
182
183 rmpp_mad = msg->mad;
184 memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
185
186 rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
187 ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
188 rmpp_mad->rmpp_hdr.seg_num = 0;
189 rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
190
191 ret = ib_post_send_mad(msg, NULL);
192 if (ret) {
193 ib_destroy_ah(msg->ah);
194 ib_free_send_mad(msg);
195 }
196}
197
173void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) 198void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
174{ 199{
175 struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad; 200 struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
@@ -271,6 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
271 rmpp_recv->newwin = 1; 296 rmpp_recv->newwin = 1;
272 rmpp_recv->seg_num = 1; 297 rmpp_recv->seg_num = 1;
273 rmpp_recv->last_ack = 0; 298 rmpp_recv->last_ack = 0;
299 rmpp_recv->repwin = 1;
274 300
275 mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; 301 mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
276 rmpp_recv->tid = mad_hdr->tid; 302 rmpp_recv->tid = mad_hdr->tid;
@@ -365,7 +391,7 @@ static inline int window_size(struct ib_mad_agent_private *agent)
365static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list, 391static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
366 int seg_num) 392 int seg_num)
367{ 393{
368 struct ib_mad_recv_buf *seg_buf; 394 struct ib_mad_recv_buf *seg_buf;
369 int cur_seg_num; 395 int cur_seg_num;
370 396
371 list_for_each_entry_reverse(seg_buf, rmpp_list, list) { 397 list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
@@ -591,6 +617,16 @@ static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
591 break; 617 break;
592} 618}
593 619
620static void process_ds_ack(struct ib_mad_agent_private *agent,
621 struct ib_mad_recv_wc *mad_recv_wc, int newwin)
622{
623 struct mad_rmpp_recv *rmpp_recv;
624
625 rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
626 if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
627 rmpp_recv->repwin = newwin;
628}
629
594static void process_rmpp_ack(struct ib_mad_agent_private *agent, 630static void process_rmpp_ack(struct ib_mad_agent_private *agent,
595 struct ib_mad_recv_wc *mad_recv_wc) 631 struct ib_mad_recv_wc *mad_recv_wc)
596{ 632{
@@ -616,8 +652,18 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
616 652
617 spin_lock_irqsave(&agent->lock, flags); 653 spin_lock_irqsave(&agent->lock, flags);
618 mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); 654 mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
619 if (!mad_send_wr) 655 if (!mad_send_wr) {
620 goto out; /* Unmatched ACK */ 656 if (!seg_num)
657 process_ds_ack(agent, mad_recv_wc, newwin);
658 goto out; /* Unmatched or DS RMPP ACK */
659 }
660
661 if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
662 (mad_send_wr->timeout)) {
663 spin_unlock_irqrestore(&agent->lock, flags);
664 ack_ds_ack(agent, mad_recv_wc);
665 return; /* Repeated ACK for DS RMPP transaction */
666 }
621 667
622 if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || 668 if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
623 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) 669 (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
@@ -656,6 +702,9 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
656 if (mad_send_wr->refcount == 1) 702 if (mad_send_wr->refcount == 1)
657 ib_reset_mad_timeout(mad_send_wr, 703 ib_reset_mad_timeout(mad_send_wr,
658 mad_send_wr->send_buf.timeout_ms); 704 mad_send_wr->send_buf.timeout_ms);
705 spin_unlock_irqrestore(&agent->lock, flags);
706 ack_ds_ack(agent, mad_recv_wc);
707 return;
659 } else if (mad_send_wr->refcount == 1 && 708 } else if (mad_send_wr->refcount == 1 &&
660 mad_send_wr->seg_num < mad_send_wr->newwin && 709 mad_send_wr->seg_num < mad_send_wr->newwin &&
661 mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) { 710 mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
@@ -772,6 +821,39 @@ out:
772 return NULL; 821 return NULL;
773} 822}
774 823
824static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
825{
826 struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
827 struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
828 struct mad_rmpp_recv *rmpp_recv;
829 struct ib_ah_attr ah_attr;
830 unsigned long flags;
831 int newwin = 1;
832
833 if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
834 goto out;
835
836 spin_lock_irqsave(&agent->lock, flags);
837 list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
838 if (rmpp_recv->tid != mad_hdr->tid ||
839 rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
840 rmpp_recv->class_version != mad_hdr->class_version ||
841 (rmpp_recv->method & IB_MGMT_METHOD_RESP))
842 continue;
843
844 if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
845 continue;
846
847 if (rmpp_recv->slid == ah_attr.dlid) {
848 newwin = rmpp_recv->repwin;
849 break;
850 }
851 }
852 spin_unlock_irqrestore(&agent->lock, flags);
853out:
854 return newwin;
855}
856
775int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) 857int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
776{ 858{
777 struct ib_rmpp_mad *rmpp_mad; 859 struct ib_rmpp_mad *rmpp_mad;
@@ -787,7 +869,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
787 return IB_RMPP_RESULT_INTERNAL; 869 return IB_RMPP_RESULT_INTERNAL;
788 } 870 }
789 871
790 mad_send_wr->newwin = 1; 872 mad_send_wr->newwin = init_newwin(mad_send_wr);
791 873
792 /* We need to wait for the final ACK even if there isn't a response */ 874 /* We need to wait for the final ACK even if there isn't a response */
793 mad_send_wr->refcount += (mad_send_wr->timeout == 0); 875 mad_send_wr->refcount += (mad_send_wr->timeout == 0);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index d6b84226bba7..1706d3c7e95e 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc.  All rights reserved. 3 * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4 * Copyright (c) 2006 Intel Corporation. All rights reserved.
4 * 5 *
5 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 7 * licenses. You may choose to be licensed under the terms of the GNU
@@ -75,6 +76,7 @@ struct ib_sa_device {
75struct ib_sa_query { 76struct ib_sa_query {
76 void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); 77 void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
77 void (*release)(struct ib_sa_query *); 78 void (*release)(struct ib_sa_query *);
79 struct ib_sa_client *client;
78 struct ib_sa_port *port; 80 struct ib_sa_port *port;
79 struct ib_mad_send_buf *mad_buf; 81 struct ib_mad_send_buf *mad_buf;
80 struct ib_sa_sm_ah *sm_ah; 82 struct ib_sa_sm_ah *sm_ah;
@@ -415,6 +417,31 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
415 } 417 }
416} 418}
417 419
420void ib_sa_register_client(struct ib_sa_client *client)
421{
422 atomic_set(&client->users, 1);
423 init_completion(&client->comp);
424}
425EXPORT_SYMBOL(ib_sa_register_client);
426
427static inline void ib_sa_client_get(struct ib_sa_client *client)
428{
429 atomic_inc(&client->users);
430}
431
432static inline void ib_sa_client_put(struct ib_sa_client *client)
433{
434 if (atomic_dec_and_test(&client->users))
435 complete(&client->comp);
436}
437
438void ib_sa_unregister_client(struct ib_sa_client *client)
439{
440 ib_sa_client_put(client);
441 wait_for_completion(&client->comp);
442}
443EXPORT_SYMBOL(ib_sa_unregister_client);
444
418/** 445/**
419 * ib_sa_cancel_query - try to cancel an SA query 446 * ib_sa_cancel_query - try to cancel an SA query
420 * @id:ID of query to cancel 447 * @id:ID of query to cancel
@@ -557,6 +584,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
557 584
558/** 585/**
559 * ib_sa_path_rec_get - Start a Path get query 586 * ib_sa_path_rec_get - Start a Path get query
587 * @client:SA client
560 * @device:device to send query on 588 * @device:device to send query on
561 * @port_num: port number to send query on 589 * @port_num: port number to send query on
562 * @rec:Path Record to send in query 590 * @rec:Path Record to send in query
@@ -579,7 +607,8 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
579 * error code. Otherwise it is a query ID that can be used to cancel 607 * error code. Otherwise it is a query ID that can be used to cancel
580 * the query. 608 * the query.
581 */ 609 */
582int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, 610int ib_sa_path_rec_get(struct ib_sa_client *client,
611 struct ib_device *device, u8 port_num,
583 struct ib_sa_path_rec *rec, 612 struct ib_sa_path_rec *rec,
584 ib_sa_comp_mask comp_mask, 613 ib_sa_comp_mask comp_mask,
585 int timeout_ms, gfp_t gfp_mask, 614 int timeout_ms, gfp_t gfp_mask,
@@ -614,8 +643,10 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
614 goto err1; 643 goto err1;
615 } 644 }
616 645
617 query->callback = callback; 646 ib_sa_client_get(client);
618 query->context = context; 647 query->sa_query.client = client;
648 query->callback = callback;
649 query->context = context;
619 650
620 mad = query->sa_query.mad_buf->mad; 651 mad = query->sa_query.mad_buf->mad;
621 init_mad(mad, agent); 652 init_mad(mad, agent);
@@ -639,6 +670,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
639 670
640err2: 671err2:
641 *sa_query = NULL; 672 *sa_query = NULL;
673 ib_sa_client_put(query->sa_query.client);
642 ib_free_send_mad(query->sa_query.mad_buf); 674 ib_free_send_mad(query->sa_query.mad_buf);
643 675
644err1: 676err1:
@@ -671,6 +703,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
671 703
672/** 704/**
673 * ib_sa_service_rec_query - Start Service Record operation 705 * ib_sa_service_rec_query - Start Service Record operation
706 * @client:SA client
674 * @device:device to send request on 707 * @device:device to send request on
675 * @port_num: port number to send request on 708 * @port_num: port number to send request on
676 * @method:SA method - should be get, set, or delete 709 * @method:SA method - should be get, set, or delete
@@ -695,7 +728,8 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
695 * error code. Otherwise it is a request ID that can be used to cancel 728 * error code. Otherwise it is a request ID that can be used to cancel
696 * the query. 729 * the query.
697 */ 730 */
698int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method, 731int ib_sa_service_rec_query(struct ib_sa_client *client,
732 struct ib_device *device, u8 port_num, u8 method,
699 struct ib_sa_service_rec *rec, 733 struct ib_sa_service_rec *rec,
700 ib_sa_comp_mask comp_mask, 734 ib_sa_comp_mask comp_mask,
701 int timeout_ms, gfp_t gfp_mask, 735 int timeout_ms, gfp_t gfp_mask,
@@ -735,8 +769,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
735 goto err1; 769 goto err1;
736 } 770 }
737 771
738 query->callback = callback; 772 ib_sa_client_get(client);
739 query->context = context; 773 query->sa_query.client = client;
774 query->callback = callback;
775 query->context = context;
740 776
741 mad = query->sa_query.mad_buf->mad; 777 mad = query->sa_query.mad_buf->mad;
742 init_mad(mad, agent); 778 init_mad(mad, agent);
@@ -761,6 +797,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
761 797
762err2: 798err2:
763 *sa_query = NULL; 799 *sa_query = NULL;
800 ib_sa_client_put(query->sa_query.client);
764 ib_free_send_mad(query->sa_query.mad_buf); 801 ib_free_send_mad(query->sa_query.mad_buf);
765 802
766err1: 803err1:
@@ -791,7 +828,8 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
791 kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); 828 kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
792} 829}
793 830
794int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, 831int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
832 struct ib_device *device, u8 port_num,
795 u8 method, 833 u8 method,
796 struct ib_sa_mcmember_rec *rec, 834 struct ib_sa_mcmember_rec *rec,
797 ib_sa_comp_mask comp_mask, 835 ib_sa_comp_mask comp_mask,
@@ -827,8 +865,10 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
827 goto err1; 865 goto err1;
828 } 866 }
829 867
830 query->callback = callback; 868 ib_sa_client_get(client);
831 query->context = context; 869 query->sa_query.client = client;
870 query->callback = callback;
871 query->context = context;
832 872
833 mad = query->sa_query.mad_buf->mad; 873 mad = query->sa_query.mad_buf->mad;
834 init_mad(mad, agent); 874 init_mad(mad, agent);
@@ -853,6 +893,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
853 893
854err2: 894err2:
855 *sa_query = NULL; 895 *sa_query = NULL;
896 ib_sa_client_put(query->sa_query.client);
856 ib_free_send_mad(query->sa_query.mad_buf); 897 ib_free_send_mad(query->sa_query.mad_buf);
857 898
858err1: 899err1:
@@ -887,8 +928,9 @@ static void send_handler(struct ib_mad_agent *agent,
887 idr_remove(&query_idr, query->id); 928 idr_remove(&query_idr, query->id);
888 spin_unlock_irqrestore(&idr_lock, flags); 929 spin_unlock_irqrestore(&idr_lock, flags);
889 930
890 ib_free_send_mad(mad_send_wc->send_buf); 931 ib_free_send_mad(mad_send_wc->send_buf);
891 kref_put(&query->sm_ah->ref, free_sm_ah); 932 kref_put(&query->sm_ah->ref, free_sm_ah);
933 ib_sa_client_put(query->client);
892 query->release(query); 934 query->release(query);
893} 935}
894 936
@@ -919,7 +961,10 @@ static void ib_sa_add_one(struct ib_device *device)
919 struct ib_sa_device *sa_dev; 961 struct ib_sa_device *sa_dev;
920 int s, e, i; 962 int s, e, i;
921 963
922 if (device->node_type == IB_NODE_SWITCH) 964 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
965 return;
966
967 if (device->node_type == RDMA_NODE_IB_SWITCH)
923 s = e = 0; 968 s = e = 0;
924 else { 969 else {
925 s = 1; 970 s = 1;
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index 35852e794e26..54b81e17ad50 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
64 64
65 /* C14-9:2 */ 65 /* C14-9:2 */
66 if (hop_ptr && hop_ptr < hop_cnt) { 66 if (hop_ptr && hop_ptr < hop_cnt) {
67 if (node_type != IB_NODE_SWITCH) 67 if (node_type != RDMA_NODE_IB_SWITCH)
68 return 0; 68 return 0;
69 69
70 /* smp->return_path set when received */ 70 /* smp->return_path set when received */
@@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
77 if (hop_ptr == hop_cnt) { 77 if (hop_ptr == hop_cnt) {
78 /* smp->return_path set when received */ 78 /* smp->return_path set when received */
79 smp->hop_ptr++; 79 smp->hop_ptr++;
80 return (node_type == IB_NODE_SWITCH || 80 return (node_type == RDMA_NODE_IB_SWITCH ||
81 smp->dr_dlid == IB_LID_PERMISSIVE); 81 smp->dr_dlid == IB_LID_PERMISSIVE);
82 } 82 }
83 83
@@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
95 95
96 /* C14-13:2 */ 96 /* C14-13:2 */
97 if (2 <= hop_ptr && hop_ptr <= hop_cnt) { 97 if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
98 if (node_type != IB_NODE_SWITCH) 98 if (node_type != RDMA_NODE_IB_SWITCH)
99 return 0; 99 return 0;
100 100
101 smp->hop_ptr--; 101 smp->hop_ptr--;
@@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
107 if (hop_ptr == 1) { 107 if (hop_ptr == 1) {
108 smp->hop_ptr--; 108 smp->hop_ptr--;
109 /* C14-13:3 -- SMPs destined for SM shouldn't be here */ 109 /* C14-13:3 -- SMPs destined for SM shouldn't be here */
110 return (node_type == IB_NODE_SWITCH || 110 return (node_type == RDMA_NODE_IB_SWITCH ||
111 smp->dr_slid == IB_LID_PERMISSIVE); 111 smp->dr_slid == IB_LID_PERMISSIVE);
112 } 112 }
113 113
@@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
142 142
143 /* C14-9:2 -- intermediate hop */ 143 /* C14-9:2 -- intermediate hop */
144 if (hop_ptr && hop_ptr < hop_cnt) { 144 if (hop_ptr && hop_ptr < hop_cnt) {
145 if (node_type != IB_NODE_SWITCH) 145 if (node_type != RDMA_NODE_IB_SWITCH)
146 return 0; 146 return 0;
147 147
148 smp->return_path[hop_ptr] = port_num; 148 smp->return_path[hop_ptr] = port_num;
@@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
156 smp->return_path[hop_ptr] = port_num; 156 smp->return_path[hop_ptr] = port_num;
157 /* smp->hop_ptr updated when sending */ 157 /* smp->hop_ptr updated when sending */
158 158
159 return (node_type == IB_NODE_SWITCH || 159 return (node_type == RDMA_NODE_IB_SWITCH ||
160 smp->dr_dlid == IB_LID_PERMISSIVE); 160 smp->dr_dlid == IB_LID_PERMISSIVE);
161 } 161 }
162 162
@@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
175 175
176 /* C14-13:2 */ 176 /* C14-13:2 */
177 if (2 <= hop_ptr && hop_ptr <= hop_cnt) { 177 if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
178 if (node_type != IB_NODE_SWITCH) 178 if (node_type != RDMA_NODE_IB_SWITCH)
179 return 0; 179 return 0;
180 180
181 /* smp->hop_ptr updated when sending */ 181 /* smp->hop_ptr updated when sending */
@@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
190 return 1; 190 return 1;
191 } 191 }
192 /* smp->hop_ptr updated when sending */ 192 /* smp->hop_ptr updated when sending */
193 return (node_type == IB_NODE_SWITCH); 193 return (node_type == RDMA_NODE_IB_SWITCH);
194 } 194 }
195 195
196 /* C14-13:4 -- hop_ptr = 0 -> give to SM */ 196 /* C14-13:4 -- hop_ptr = 0 -> give to SM */
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 21f9282c1b25..709323c14c5d 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -68,7 +68,7 @@ struct port_table_attribute {
68 int index; 68 int index;
69}; 69};
70 70
71static inline int ibdev_is_alive(const struct ib_device *dev) 71static inline int ibdev_is_alive(const struct ib_device *dev)
72{ 72{
73 return dev->reg_state == IB_DEV_REGISTERED; 73 return dev->reg_state == IB_DEV_REGISTERED;
74} 74}
@@ -589,10 +589,11 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
589 return -ENODEV; 589 return -ENODEV;
590 590
591 switch (dev->node_type) { 591 switch (dev->node_type) {
592 case IB_NODE_CA: return sprintf(buf, "%d: CA\n", dev->node_type); 592 case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
593 case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); 593 case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
594 case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); 594 case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
595 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); 595 case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
596 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
596 } 597 }
597} 598}
598 599
@@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_device *device)
708 if (ret) 709 if (ret)
709 goto err_put; 710 goto err_put;
710 711
711 if (device->node_type == IB_NODE_SWITCH) { 712 if (device->node_type == RDMA_NODE_IB_SWITCH) {
712 ret = add_port(device, 0); 713 ret = add_port(device, 0);
713 if (ret) 714 if (ret)
714 goto err_put; 715 goto err_put;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index c1c6fda9452c..ad4f4d5c2924 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -309,9 +309,9 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
309 info = evt->param.apr_rcvd.apr_info; 309 info = evt->param.apr_rcvd.apr_info;
310 break; 310 break;
311 case IB_CM_SIDR_REQ_RECEIVED: 311 case IB_CM_SIDR_REQ_RECEIVED:
312 uvt->resp.u.sidr_req_resp.pkey = 312 uvt->resp.u.sidr_req_resp.pkey =
313 evt->param.sidr_req_rcvd.pkey; 313 evt->param.sidr_req_rcvd.pkey;
314 uvt->resp.u.sidr_req_resp.port = 314 uvt->resp.u.sidr_req_resp.port =
315 evt->param.sidr_req_rcvd.port; 315 evt->param.sidr_req_rcvd.port;
316 uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; 316 uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
317 break; 317 break;
@@ -1237,7 +1237,7 @@ static struct class ucm_class = {
1237static ssize_t show_ibdev(struct class_device *class_dev, char *buf) 1237static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
1238{ 1238{
1239 struct ib_ucm_device *dev; 1239 struct ib_ucm_device *dev;
1240 1240
1241 dev = container_of(class_dev, struct ib_ucm_device, class_dev); 1241 dev = container_of(class_dev, struct ib_ucm_device, class_dev);
1242 return sprintf(buf, "%s\n", dev->ib_dev->name); 1242 return sprintf(buf, "%s\n", dev->ib_dev->name);
1243} 1243}
@@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_device *device)
1247{ 1247{
1248 struct ib_ucm_device *ucm_dev; 1248 struct ib_ucm_device *ucm_dev;
1249 1249
1250 if (!device->alloc_ucontext) 1250 if (!device->alloc_ucontext ||
1251 rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1251 return; 1252 return;
1252 1253
1253 ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); 1254 ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1273f8807e84..807fbd6b8414 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_device *device)
1032 struct ib_umad_device *umad_dev; 1032 struct ib_umad_device *umad_dev;
1033 int s, e, i; 1033 int s, e, i;
1034 1034
1035 if (device->node_type == IB_NODE_SWITCH) 1035 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1036 return;
1037
1038 if (device->node_type == RDMA_NODE_IB_SWITCH)
1036 s = e = 0; 1039 s = e = 0;
1037 else { 1040 else {
1038 s = 1; 1041 s = 1;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 30923eb68ec7..b72c7f69ca90 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -155,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
155} 155}
156 156
157static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, 157static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
158 struct ib_ucontext *context) 158 struct ib_ucontext *context, int nested)
159{ 159{
160 struct ib_uobject *uobj; 160 struct ib_uobject *uobj;
161 161
@@ -163,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
163 if (!uobj) 163 if (!uobj)
164 return NULL; 164 return NULL;
165 165
166 down_read(&uobj->mutex); 166 if (nested)
167 down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
168 else
169 down_read(&uobj->mutex);
167 if (!uobj->live) { 170 if (!uobj->live) {
168 put_uobj_read(uobj); 171 put_uobj_read(uobj);
169 return NULL; 172 return NULL;
@@ -190,17 +193,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
190 return uobj; 193 return uobj;
191} 194}
192 195
193static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context) 196static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
197 int nested)
194{ 198{
195 struct ib_uobject *uobj; 199 struct ib_uobject *uobj;
196 200
197 uobj = idr_read_uobj(idr, id, context); 201 uobj = idr_read_uobj(idr, id, context, nested);
198 return uobj ? uobj->object : NULL; 202 return uobj ? uobj->object : NULL;
199} 203}
200 204
201static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) 205static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
202{ 206{
203 return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context); 207 return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
204} 208}
205 209
206static void put_pd_read(struct ib_pd *pd) 210static void put_pd_read(struct ib_pd *pd)
@@ -208,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd)
208 put_uobj_read(pd->uobject); 212 put_uobj_read(pd->uobject);
209} 213}
210 214
211static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context) 215static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
212{ 216{
213 return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context); 217 return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
214} 218}
215 219
216static void put_cq_read(struct ib_cq *cq) 220static void put_cq_read(struct ib_cq *cq)
@@ -220,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq)
220 224
221static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) 225static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
222{ 226{
223 return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context); 227 return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
224} 228}
225 229
226static void put_ah_read(struct ib_ah *ah) 230static void put_ah_read(struct ib_ah *ah)
@@ -230,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah)
230 234
231static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) 235static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
232{ 236{
233 return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context); 237 return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
234} 238}
235 239
236static void put_qp_read(struct ib_qp *qp) 240static void put_qp_read(struct ib_qp *qp)
@@ -240,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp)
240 244
241static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) 245static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
242{ 246{
243 return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context); 247 return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
244} 248}
245 249
246static void put_srq_read(struct ib_srq *srq) 250static void put_srq_read(struct ib_srq *srq)
@@ -837,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
837err_copy: 841err_copy:
838 idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); 842 idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
839 843
840
841err_free: 844err_free:
842 ib_destroy_cq(cq); 845 ib_destroy_cq(cq);
843 846
@@ -867,7 +870,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
867 (unsigned long) cmd.response + sizeof resp, 870 (unsigned long) cmd.response + sizeof resp,
868 in_len - sizeof cmd, out_len - sizeof resp); 871 in_len - sizeof cmd, out_len - sizeof resp);
869 872
870 cq = idr_read_cq(cmd.cq_handle, file->ucontext); 873 cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
871 if (!cq) 874 if (!cq)
872 return -EINVAL; 875 return -EINVAL;
873 876
@@ -875,11 +878,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
875 if (ret) 878 if (ret)
876 goto out; 879 goto out;
877 880
878 memset(&resp, 0, sizeof resp);
879 resp.cqe = cq->cqe; 881 resp.cqe = cq->cqe;
880 882
881 if (copy_to_user((void __user *) (unsigned long) cmd.response, 883 if (copy_to_user((void __user *) (unsigned long) cmd.response,
882 &resp, sizeof resp)) 884 &resp, sizeof resp.cqe))
883 ret = -EFAULT; 885 ret = -EFAULT;
884 886
885out: 887out:
@@ -894,7 +896,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
894{ 896{
895 struct ib_uverbs_poll_cq cmd; 897 struct ib_uverbs_poll_cq cmd;
896 struct ib_uverbs_poll_cq_resp *resp; 898 struct ib_uverbs_poll_cq_resp *resp;
897 struct ib_uobject *uobj;
898 struct ib_cq *cq; 899 struct ib_cq *cq;
899 struct ib_wc *wc; 900 struct ib_wc *wc;
900 int ret = 0; 901 int ret = 0;
@@ -915,16 +916,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
915 goto out_wc; 916 goto out_wc;
916 } 917 }
917 918
918 uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); 919 cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
919 if (!uobj) { 920 if (!cq) {
920 ret = -EINVAL; 921 ret = -EINVAL;
921 goto out; 922 goto out;
922 } 923 }
923 cq = uobj->object;
924 924
925 resp->count = ib_poll_cq(cq, cmd.ne, wc); 925 resp->count = ib_poll_cq(cq, cmd.ne, wc);
926 926
927 put_uobj_read(uobj); 927 put_cq_read(cq);
928 928
929 for (i = 0; i < resp->count; i++) { 929 for (i = 0; i < resp->count; i++) {
930 resp->wc[i].wr_id = wc[i].wr_id; 930 resp->wc[i].wr_id = wc[i].wr_id;
@@ -959,21 +959,19 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
959 int out_len) 959 int out_len)
960{ 960{
961 struct ib_uverbs_req_notify_cq cmd; 961 struct ib_uverbs_req_notify_cq cmd;
962 struct ib_uobject *uobj;
963 struct ib_cq *cq; 962 struct ib_cq *cq;
964 963
965 if (copy_from_user(&cmd, buf, sizeof cmd)) 964 if (copy_from_user(&cmd, buf, sizeof cmd))
966 return -EFAULT; 965 return -EFAULT;
967 966
968 uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); 967 cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
969 if (!uobj) 968 if (!cq)
970 return -EINVAL; 969 return -EINVAL;
971 cq = uobj->object;
972 970
973 ib_req_notify_cq(cq, cmd.solicited_only ? 971 ib_req_notify_cq(cq, cmd.solicited_only ?
974 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); 972 IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
975 973
976 put_uobj_read(uobj); 974 put_cq_read(cq);
977 975
978 return in_len; 976 return in_len;
979} 977}
@@ -1064,9 +1062,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1064 1062
1065 srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; 1063 srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
1066 pd = idr_read_pd(cmd.pd_handle, file->ucontext); 1064 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1067 scq = idr_read_cq(cmd.send_cq_handle, file->ucontext); 1065 scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
1068 rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? 1066 rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
1069 scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext); 1067 scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
1070 1068
1071 if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { 1069 if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
1072 ret = -EINVAL; 1070 ret = -EINVAL;
@@ -1274,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
1274 int out_len) 1272 int out_len)
1275{ 1273{
1276 struct ib_uverbs_modify_qp cmd; 1274 struct ib_uverbs_modify_qp cmd;
1275 struct ib_udata udata;
1277 struct ib_qp *qp; 1276 struct ib_qp *qp;
1278 struct ib_qp_attr *attr; 1277 struct ib_qp_attr *attr;
1279 int ret; 1278 int ret;
@@ -1281,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
1281 if (copy_from_user(&cmd, buf, sizeof cmd)) 1280 if (copy_from_user(&cmd, buf, sizeof cmd))
1282 return -EFAULT; 1281 return -EFAULT;
1283 1282
1283 INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
1284 out_len);
1285
1284 attr = kmalloc(sizeof *attr, GFP_KERNEL); 1286 attr = kmalloc(sizeof *attr, GFP_KERNEL);
1285 if (!attr) 1287 if (!attr)
1286 return -ENOMEM; 1288 return -ENOMEM;
@@ -1337,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
1337 attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; 1339 attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
1338 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; 1340 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
1339 1341
1340 ret = ib_modify_qp(qp, attr, cmd.attr_mask); 1342 ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
1341 1343
1342 put_qp_read(qp); 1344 put_qp_read(qp);
1343 1345
@@ -1674,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
1674 break; 1676 break;
1675 } 1677 }
1676 1678
1677
1678 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1679 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1679 &resp, sizeof resp)) 1680 &resp, sizeof resp))
1680 ret = -EFAULT; 1681 ret = -EFAULT;
@@ -1724,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
1724 break; 1725 break;
1725 } 1726 }
1726 1727
1727
1728 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1728 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1729 &resp, sizeof resp)) 1729 &resp, sizeof resp))
1730 ret = -EFAULT; 1730 ret = -EFAULT;
@@ -2055,6 +2055,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
2055 int out_len) 2055 int out_len)
2056{ 2056{
2057 struct ib_uverbs_modify_srq cmd; 2057 struct ib_uverbs_modify_srq cmd;
2058 struct ib_udata udata;
2058 struct ib_srq *srq; 2059 struct ib_srq *srq;
2059 struct ib_srq_attr attr; 2060 struct ib_srq_attr attr;
2060 int ret; 2061 int ret;
@@ -2062,6 +2063,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
2062 if (copy_from_user(&cmd, buf, sizeof cmd)) 2063 if (copy_from_user(&cmd, buf, sizeof cmd))
2063 return -EFAULT; 2064 return -EFAULT;
2064 2065
2066 INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
2067 out_len);
2068
2065 srq = idr_read_srq(cmd.srq_handle, file->ucontext); 2069 srq = idr_read_srq(cmd.srq_handle, file->ucontext);
2066 if (!srq) 2070 if (!srq)
2067 return -EINVAL; 2071 return -EINVAL;
@@ -2069,7 +2073,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
2069 attr.max_wr = cmd.max_wr; 2073 attr.max_wr = cmd.max_wr;
2070 attr.srq_limit = cmd.srq_limit; 2074 attr.srq_limit = cmd.srq_limit;
2071 2075
2072 ret = ib_modify_srq(srq, &attr, cmd.attr_mask); 2076 ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
2073 2077
2074 put_srq_read(srq); 2078 put_srq_read(srq);
2075 2079
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 468999c38803..8b5dd3649bbf 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult)
79} 79}
80EXPORT_SYMBOL(mult_to_ib_rate); 80EXPORT_SYMBOL(mult_to_ib_rate);
81 81
82enum rdma_transport_type
83rdma_node_get_transport(enum rdma_node_type node_type)
84{
85 switch (node_type) {
86 case RDMA_NODE_IB_CA:
87 case RDMA_NODE_IB_SWITCH:
88 case RDMA_NODE_IB_ROUTER:
89 return RDMA_TRANSPORT_IB;
90 case RDMA_NODE_RNIC:
91 return RDMA_TRANSPORT_IWARP;
92 default:
93 BUG();
94 return 0;
95 }
96}
97EXPORT_SYMBOL(rdma_node_get_transport);
98
82/* Protection domains */ 99/* Protection domains */
83 100
84struct ib_pd *ib_alloc_pd(struct ib_device *device) 101struct ib_pd *ib_alloc_pd(struct ib_device *device)
@@ -231,7 +248,7 @@ int ib_modify_srq(struct ib_srq *srq,
231 struct ib_srq_attr *srq_attr, 248 struct ib_srq_attr *srq_attr,
232 enum ib_srq_attr_mask srq_attr_mask) 249 enum ib_srq_attr_mask srq_attr_mask)
233{ 250{
234 return srq->device->modify_srq(srq, srq_attr, srq_attr_mask); 251 return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
235} 252}
236EXPORT_SYMBOL(ib_modify_srq); 253EXPORT_SYMBOL(ib_modify_srq);
237 254
@@ -547,7 +564,7 @@ int ib_modify_qp(struct ib_qp *qp,
547 struct ib_qp_attr *qp_attr, 564 struct ib_qp_attr *qp_attr,
548 int qp_attr_mask) 565 int qp_attr_mask)
549{ 566{
550 return qp->device->modify_qp(qp, qp_attr, qp_attr_mask); 567 return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
551} 568}
552EXPORT_SYMBOL(ib_modify_qp); 569EXPORT_SYMBOL(ib_modify_qp);
553 570
diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild
new file mode 100644
index 000000000000..06964c4af849
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kbuild
@@ -0,0 +1,8 @@
1ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
2EXTRA_CFLAGS += -DDEBUG
3endif
4
5obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
6
7iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
8 c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig
new file mode 100644
index 000000000000..809cb14ac6de
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/Kconfig
@@ -0,0 +1,15 @@
1config INFINIBAND_AMSO1100
2 tristate "Ammasso 1100 HCA support"
3 depends on PCI && INET && INFINIBAND
4 ---help---
5 This is a low-level driver for the Ammasso 1100 host
6 channel adapter (HCA).
7
8config INFINIBAND_AMSO1100_DEBUG
9 bool "Verbose debugging output"
10 depends on INFINIBAND_AMSO1100
11 default n
12 ---help---
13 This option causes the amso1100 driver to produce a bunch of
14 debug messages. Select this if you are developing the driver
15 or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
new file mode 100644
index 000000000000..9e9120f36019
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -0,0 +1,1255 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <linux/module.h>
34#include <linux/moduleparam.h>
35#include <linux/pci.h>
36#include <linux/netdevice.h>
37#include <linux/etherdevice.h>
38#include <linux/inetdevice.h>
39#include <linux/delay.h>
40#include <linux/ethtool.h>
41#include <linux/mii.h>
42#include <linux/if_vlan.h>
43#include <linux/crc32.h>
44#include <linux/in.h>
45#include <linux/ip.h>
46#include <linux/tcp.h>
47#include <linux/init.h>
48#include <linux/dma-mapping.h>
49
50#include <asm/io.h>
51#include <asm/irq.h>
52#include <asm/byteorder.h>
53
54#include <rdma/ib_smi.h>
55#include "c2.h"
56#include "c2_provider.h"
57
58MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
59MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
60MODULE_LICENSE("Dual BSD/GPL");
61MODULE_VERSION(DRV_VERSION);
62
63static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
64 | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
65
66static int debug = -1; /* defaults above */
67module_param(debug, int, 0);
68MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
69
70static int c2_up(struct net_device *netdev);
71static int c2_down(struct net_device *netdev);
72static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
73static void c2_tx_interrupt(struct net_device *netdev);
74static void c2_rx_interrupt(struct net_device *netdev);
75static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs);
76static void c2_tx_timeout(struct net_device *netdev);
77static int c2_change_mtu(struct net_device *netdev, int new_mtu);
78static void c2_reset(struct c2_port *c2_port);
79static struct net_device_stats *c2_get_stats(struct net_device *netdev);
80
81static struct pci_device_id c2_pci_table[] = {
82 { PCI_DEVICE(0x18b8, 0xb001) },
83 { 0 }
84};
85
86MODULE_DEVICE_TABLE(pci, c2_pci_table);
87
88static void c2_print_macaddr(struct net_device *netdev)
89{
90 pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
91 "IRQ %u\n", netdev->name,
92 netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
93 netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
94 netdev->irq);
95}
96
97static void c2_set_rxbufsize(struct c2_port *c2_port)
98{
99 struct net_device *netdev = c2_port->netdev;
100
101 if (netdev->mtu > RX_BUF_SIZE)
102 c2_port->rx_buf_size =
103 netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
104 NET_IP_ALIGN;
105 else
106 c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
107}
108
109/*
110 * Allocate TX ring elements and chain them together.
111 * One-to-one association of adapter descriptors with ring elements.
112 */
113static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
114 dma_addr_t base, void __iomem * mmio_txp_ring)
115{
116 struct c2_tx_desc *tx_desc;
117 struct c2_txp_desc __iomem *txp_desc;
118 struct c2_element *elem;
119 int i;
120
121 tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL);
122 if (!tx_ring->start)
123 return -ENOMEM;
124
125 elem = tx_ring->start;
126 tx_desc = vaddr;
127 txp_desc = mmio_txp_ring;
128 for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
129 tx_desc->len = 0;
130 tx_desc->status = 0;
131
132 /* Set TXP_HTXD_UNINIT */
133 __raw_writeq(cpu_to_be64(0x1122334455667788ULL),
134 (void __iomem *) txp_desc + C2_TXP_ADDR);
135 __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
136 __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
137 (void __iomem *) txp_desc + C2_TXP_FLAGS);
138
139 elem->skb = NULL;
140 elem->ht_desc = tx_desc;
141 elem->hw_desc = txp_desc;
142
143 if (i == tx_ring->count - 1) {
144 elem->next = tx_ring->start;
145 tx_desc->next_offset = base;
146 } else {
147 elem->next = elem + 1;
148 tx_desc->next_offset =
149 base + (i + 1) * sizeof(*tx_desc);
150 }
151 }
152
153 tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
154
155 return 0;
156}
157
158/*
159 * Allocate RX ring elements and chain them together.
160 * One-to-one association of adapter descriptors with ring elements.
161 */
162static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
163 dma_addr_t base, void __iomem * mmio_rxp_ring)
164{
165 struct c2_rx_desc *rx_desc;
166 struct c2_rxp_desc __iomem *rxp_desc;
167 struct c2_element *elem;
168 int i;
169
170 rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL);
171 if (!rx_ring->start)
172 return -ENOMEM;
173
174 elem = rx_ring->start;
175 rx_desc = vaddr;
176 rxp_desc = mmio_rxp_ring;
177 for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
178 rx_desc->len = 0;
179 rx_desc->status = 0;
180
181 /* Set RXP_HRXD_UNINIT */
182 __raw_writew(cpu_to_be16(RXP_HRXD_OK),
183 (void __iomem *) rxp_desc + C2_RXP_STATUS);
184 __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
185 __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
186 __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
187 (void __iomem *) rxp_desc + C2_RXP_ADDR);
188 __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
189 (void __iomem *) rxp_desc + C2_RXP_FLAGS);
190
191 elem->skb = NULL;
192 elem->ht_desc = rx_desc;
193 elem->hw_desc = rxp_desc;
194
195 if (i == rx_ring->count - 1) {
196 elem->next = rx_ring->start;
197 rx_desc->next_offset = base;
198 } else {
199 elem->next = elem + 1;
200 rx_desc->next_offset =
201 base + (i + 1) * sizeof(*rx_desc);
202 }
203 }
204
205 rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
206
207 return 0;
208}
209
210/* Setup buffer for receiving */
211static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
212{
213 struct c2_dev *c2dev = c2_port->c2dev;
214 struct c2_rx_desc *rx_desc = elem->ht_desc;
215 struct sk_buff *skb;
216 dma_addr_t mapaddr;
217 u32 maplen;
218 struct c2_rxp_hdr *rxp_hdr;
219
220 skb = dev_alloc_skb(c2_port->rx_buf_size);
221 if (unlikely(!skb)) {
222 pr_debug("%s: out of memory for receive\n",
223 c2_port->netdev->name);
224 return -ENOMEM;
225 }
226
227 /* Zero out the rxp hdr in the sk_buff */
228 memset(skb->data, 0, sizeof(*rxp_hdr));
229
230 skb->dev = c2_port->netdev;
231
232 maplen = c2_port->rx_buf_size;
233 mapaddr =
234 pci_map_single(c2dev->pcidev, skb->data, maplen,
235 PCI_DMA_FROMDEVICE);
236
237 /* Set the sk_buff RXP_header to RXP_HRXD_READY */
238 rxp_hdr = (struct c2_rxp_hdr *) skb->data;
239 rxp_hdr->flags = RXP_HRXD_READY;
240
241 __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
242 __raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
243 elem->hw_desc + C2_RXP_LEN);
244 __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
245 __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
246
247 elem->skb = skb;
248 elem->mapaddr = mapaddr;
249 elem->maplen = maplen;
250 rx_desc->len = maplen;
251
252 return 0;
253}
254
255/*
256 * Allocate buffers for the Rx ring
257 * For receive: rx_ring.to_clean is next received frame
258 */
259static int c2_rx_fill(struct c2_port *c2_port)
260{
261 struct c2_ring *rx_ring = &c2_port->rx_ring;
262 struct c2_element *elem;
263 int ret = 0;
264
265 elem = rx_ring->start;
266 do {
267 if (c2_rx_alloc(c2_port, elem)) {
268 ret = 1;
269 break;
270 }
271 } while ((elem = elem->next) != rx_ring->start);
272
273 rx_ring->to_clean = rx_ring->start;
274 return ret;
275}
276
277/* Free all buffers in RX ring, assumes receiver stopped */
278static void c2_rx_clean(struct c2_port *c2_port)
279{
280 struct c2_dev *c2dev = c2_port->c2dev;
281 struct c2_ring *rx_ring = &c2_port->rx_ring;
282 struct c2_element *elem;
283 struct c2_rx_desc *rx_desc;
284
285 elem = rx_ring->start;
286 do {
287 rx_desc = elem->ht_desc;
288 rx_desc->len = 0;
289
290 __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
291 __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
292 __raw_writew(0, elem->hw_desc + C2_RXP_LEN);
293 __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
294 elem->hw_desc + C2_RXP_ADDR);
295 __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
296 elem->hw_desc + C2_RXP_FLAGS);
297
298 if (elem->skb) {
299 pci_unmap_single(c2dev->pcidev, elem->mapaddr,
300 elem->maplen, PCI_DMA_FROMDEVICE);
301 dev_kfree_skb(elem->skb);
302 elem->skb = NULL;
303 }
304 } while ((elem = elem->next) != rx_ring->start);
305}
306
307static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
308{
309 struct c2_tx_desc *tx_desc = elem->ht_desc;
310
311 tx_desc->len = 0;
312
313 pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
314 PCI_DMA_TODEVICE);
315
316 if (elem->skb) {
317 dev_kfree_skb_any(elem->skb);
318 elem->skb = NULL;
319 }
320
321 return 0;
322}
323
324/* Free all buffers in TX ring, assumes transmitter stopped */
325static void c2_tx_clean(struct c2_port *c2_port)
326{
327 struct c2_ring *tx_ring = &c2_port->tx_ring;
328 struct c2_element *elem;
329 struct c2_txp_desc txp_htxd;
330 int retry;
331 unsigned long flags;
332
333 spin_lock_irqsave(&c2_port->tx_lock, flags);
334
335 elem = tx_ring->start;
336
337 do {
338 retry = 0;
339 do {
340 txp_htxd.flags =
341 readw(elem->hw_desc + C2_TXP_FLAGS);
342
343 if (txp_htxd.flags == TXP_HTXD_READY) {
344 retry = 1;
345 __raw_writew(0,
346 elem->hw_desc + C2_TXP_LEN);
347 __raw_writeq(0,
348 elem->hw_desc + C2_TXP_ADDR);
349 __raw_writew(cpu_to_be16(TXP_HTXD_DONE),
350 elem->hw_desc + C2_TXP_FLAGS);
351 c2_port->netstats.tx_dropped++;
352 break;
353 } else {
354 __raw_writew(0,
355 elem->hw_desc + C2_TXP_LEN);
356 __raw_writeq(cpu_to_be64(0x1122334455667788ULL),
357 elem->hw_desc + C2_TXP_ADDR);
358 __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
359 elem->hw_desc + C2_TXP_FLAGS);
360 }
361
362 c2_tx_free(c2_port->c2dev, elem);
363
364 } while ((elem = elem->next) != tx_ring->start);
365 } while (retry);
366
367 c2_port->tx_avail = c2_port->tx_ring.count - 1;
368 c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
369
370 if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
371 netif_wake_queue(c2_port->netdev);
372
373 spin_unlock_irqrestore(&c2_port->tx_lock, flags);
374}
375
376/*
377 * Process transmit descriptors marked 'DONE' by the firmware,
378 * freeing up their unneeded sk_buffs.
379 */
380static void c2_tx_interrupt(struct net_device *netdev)
381{
382 struct c2_port *c2_port = netdev_priv(netdev);
383 struct c2_dev *c2dev = c2_port->c2dev;
384 struct c2_ring *tx_ring = &c2_port->tx_ring;
385 struct c2_element *elem;
386 struct c2_txp_desc txp_htxd;
387
388 spin_lock(&c2_port->tx_lock);
389
390 for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
391 elem = elem->next) {
392 txp_htxd.flags =
393 be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS));
394
395 if (txp_htxd.flags != TXP_HTXD_DONE)
396 break;
397
398 if (netif_msg_tx_done(c2_port)) {
399 /* PCI reads are expensive in fast path */
400 txp_htxd.len =
401 be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN));
402 pr_debug("%s: tx done slot %3Zu status 0x%x len "
403 "%5u bytes\n",
404 netdev->name, elem - tx_ring->start,
405 txp_htxd.flags, txp_htxd.len);
406 }
407
408 c2_tx_free(c2dev, elem);
409 ++(c2_port->tx_avail);
410 }
411
412 tx_ring->to_clean = elem;
413
414 if (netif_queue_stopped(netdev)
415 && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
416 netif_wake_queue(netdev);
417
418 spin_unlock(&c2_port->tx_lock);
419}
420
421static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
422{
423 struct c2_rx_desc *rx_desc = elem->ht_desc;
424 struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
425
426 if (rxp_hdr->status != RXP_HRXD_OK ||
427 rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
428 pr_debug("BAD RXP_HRXD\n");
429 pr_debug(" rx_desc : %p\n", rx_desc);
430 pr_debug(" index : %Zu\n",
431 elem - c2_port->rx_ring.start);
432 pr_debug(" len : %u\n", rx_desc->len);
433 pr_debug(" rxp_hdr : %p [PA %p]\n", rxp_hdr,
434 (void *) __pa((unsigned long) rxp_hdr));
435 pr_debug(" flags : 0x%x\n", rxp_hdr->flags);
436 pr_debug(" status: 0x%x\n", rxp_hdr->status);
437 pr_debug(" len : %u\n", rxp_hdr->len);
438 pr_debug(" rsvd : 0x%x\n", rxp_hdr->rsvd);
439 }
440
441 /* Setup the skb for reuse since we're dropping this pkt */
442 elem->skb->tail = elem->skb->data = elem->skb->head;
443
444 /* Zero out the rxp hdr in the sk_buff */
445 memset(elem->skb->data, 0, sizeof(*rxp_hdr));
446
447 /* Write the descriptor to the adapter's rx ring */
448 __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
449 __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
450 __raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
451 elem->hw_desc + C2_RXP_LEN);
452 __raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR);
453 __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
454
455 pr_debug("packet dropped\n");
456 c2_port->netstats.rx_dropped++;
457}
458
459static void c2_rx_interrupt(struct net_device *netdev)
460{
461 struct c2_port *c2_port = netdev_priv(netdev);
462 struct c2_dev *c2dev = c2_port->c2dev;
463 struct c2_ring *rx_ring = &c2_port->rx_ring;
464 struct c2_element *elem;
465 struct c2_rx_desc *rx_desc;
466 struct c2_rxp_hdr *rxp_hdr;
467 struct sk_buff *skb;
468 dma_addr_t mapaddr;
469 u32 maplen, buflen;
470 unsigned long flags;
471
472 spin_lock_irqsave(&c2dev->lock, flags);
473
474 /* Begin where we left off */
475 rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
476
477 for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
478 elem = elem->next) {
479 rx_desc = elem->ht_desc;
480 mapaddr = elem->mapaddr;
481 maplen = elem->maplen;
482 skb = elem->skb;
483 rxp_hdr = (struct c2_rxp_hdr *) skb->data;
484
485 if (rxp_hdr->flags != RXP_HRXD_DONE)
486 break;
487 buflen = rxp_hdr->len;
488
489 /* Sanity check the RXP header */
490 if (rxp_hdr->status != RXP_HRXD_OK ||
491 buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
492 c2_rx_error(c2_port, elem);
493 continue;
494 }
495
496 /*
497 * Allocate and map a new skb for replenishing the host
498 * RX desc
499 */
500 if (c2_rx_alloc(c2_port, elem)) {
501 c2_rx_error(c2_port, elem);
502 continue;
503 }
504
505 /* Unmap the old skb */
506 pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
507 PCI_DMA_FROMDEVICE);
508
509 prefetch(skb->data);
510
511 /*
512 * Skip past the leading 8 bytes comprising of the
513 * "struct c2_rxp_hdr", prepended by the adapter
514 * to the usual Ethernet header ("struct ethhdr"),
515 * to the start of the raw Ethernet packet.
516 *
517 * Fix up the various fields in the sk_buff before
518 * passing it up to netif_rx(). The transfer size
519 * (in bytes) specified by the adapter len field of
520 * the "struct rxp_hdr_t" does NOT include the
521 * "sizeof(struct c2_rxp_hdr)".
522 */
523 skb->data += sizeof(*rxp_hdr);
524 skb->tail = skb->data + buflen;
525 skb->len = buflen;
526 skb->dev = netdev;
527 skb->protocol = eth_type_trans(skb, netdev);
528
529 netif_rx(skb);
530
531 netdev->last_rx = jiffies;
532 c2_port->netstats.rx_packets++;
533 c2_port->netstats.rx_bytes += buflen;
534 }
535
536 /* Save where we left off */
537 rx_ring->to_clean = elem;
538 c2dev->cur_rx = elem - rx_ring->start;
539 C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
540
541 spin_unlock_irqrestore(&c2dev->lock, flags);
542}
543
544/*
545 * Handle netisr0 TX & RX interrupts.
546 */
547static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs)
548{
549 unsigned int netisr0, dmaisr;
550 int handled = 0;
551 struct c2_dev *c2dev = (struct c2_dev *) dev_id;
552
553 /* Process CCILNET interrupts */
554 netisr0 = readl(c2dev->regs + C2_NISR0);
555 if (netisr0) {
556
557 /*
558 * There is an issue with the firmware that always
559 * provides the status of RX for both TX & RX
560 * interrupts. So process both queues here.
561 */
562 c2_rx_interrupt(c2dev->netdev);
563 c2_tx_interrupt(c2dev->netdev);
564
565 /* Clear the interrupt */
566 writel(netisr0, c2dev->regs + C2_NISR0);
567 handled++;
568 }
569
570 /* Process RNIC interrupts */
571 dmaisr = readl(c2dev->regs + C2_DISR);
572 if (dmaisr) {
573 writel(dmaisr, c2dev->regs + C2_DISR);
574 c2_rnic_interrupt(c2dev);
575 handled++;
576 }
577
578 if (handled) {
579 return IRQ_HANDLED;
580 } else {
581 return IRQ_NONE;
582 }
583}
584
585static int c2_up(struct net_device *netdev)
586{
587 struct c2_port *c2_port = netdev_priv(netdev);
588 struct c2_dev *c2dev = c2_port->c2dev;
589 struct c2_element *elem;
590 struct c2_rxp_hdr *rxp_hdr;
591 struct in_device *in_dev;
592 size_t rx_size, tx_size;
593 int ret, i;
594 unsigned int netimr0;
595
596 if (netif_msg_ifup(c2_port))
597 pr_debug("%s: enabling interface\n", netdev->name);
598
599 /* Set the Rx buffer size based on MTU */
600 c2_set_rxbufsize(c2_port);
601
602 /* Allocate DMA'able memory for Tx/Rx host descriptor rings */
603 rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
604 tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
605
606 c2_port->mem_size = tx_size + rx_size;
607 c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
608 &c2_port->dma);
609 if (c2_port->mem == NULL) {
610 pr_debug("Unable to allocate memory for "
611 "host descriptor rings\n");
612 return -ENOMEM;
613 }
614
615 memset(c2_port->mem, 0, c2_port->mem_size);
616
617 /* Create the Rx host descriptor ring */
618 if ((ret =
619 c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
620 c2dev->mmio_rxp_ring))) {
621 pr_debug("Unable to create RX ring\n");
622 goto bail0;
623 }
624
625 /* Allocate Rx buffers for the host descriptor ring */
626 if (c2_rx_fill(c2_port)) {
627 pr_debug("Unable to fill RX ring\n");
628 goto bail1;
629 }
630
631 /* Create the Tx host descriptor ring */
632 if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
633 c2_port->dma + rx_size,
634 c2dev->mmio_txp_ring))) {
635 pr_debug("Unable to create TX ring\n");
636 goto bail1;
637 }
638
639 /* Set the TX pointer to where we left off */
640 c2_port->tx_avail = c2_port->tx_ring.count - 1;
641 c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
642 c2_port->tx_ring.start + c2dev->cur_tx;
643
644 /* missing: Initialize MAC */
645
646 BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
647
648 /* Reset the adapter, ensures the driver is in sync with the RXP */
649 c2_reset(c2_port);
650
651 /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
652 for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
653 i++, elem++) {
654 rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
655 rxp_hdr->flags = 0;
656 __raw_writew(cpu_to_be16(RXP_HRXD_READY),
657 elem->hw_desc + C2_RXP_FLAGS);
658 }
659
660 /* Enable network packets */
661 netif_start_queue(netdev);
662
663 /* Enable IRQ */
664 writel(0, c2dev->regs + C2_IDIS);
665 netimr0 = readl(c2dev->regs + C2_NIMR0);
666 netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
667 writel(netimr0, c2dev->regs + C2_NIMR0);
668
669 /* Tell the stack to ignore arp requests for ipaddrs bound to
670 * other interfaces. This is needed to prevent the host stack
671 * from responding to arp requests to the ipaddr bound on the
672 * rdma interface.
673 */
674 in_dev = in_dev_get(netdev);
675 in_dev->cnf.arp_ignore = 1;
676 in_dev_put(in_dev);
677
678 return 0;
679
680 bail1:
681 c2_rx_clean(c2_port);
682 kfree(c2_port->rx_ring.start);
683
684 bail0:
685 pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
686 c2_port->dma);
687
688 return ret;
689}
690
691static int c2_down(struct net_device *netdev)
692{
693 struct c2_port *c2_port = netdev_priv(netdev);
694 struct c2_dev *c2dev = c2_port->c2dev;
695
696 if (netif_msg_ifdown(c2_port))
697 pr_debug("%s: disabling interface\n",
698 netdev->name);
699
700 /* Wait for all the queued packets to get sent */
701 c2_tx_interrupt(netdev);
702
703 /* Disable network packets */
704 netif_stop_queue(netdev);
705
706 /* Disable IRQs by clearing the interrupt mask */
707 writel(1, c2dev->regs + C2_IDIS);
708 writel(0, c2dev->regs + C2_NIMR0);
709
710 /* missing: Stop transmitter */
711
712 /* missing: Stop receiver */
713
714 /* Reset the adapter, ensures the driver is in sync with the RXP */
715 c2_reset(c2_port);
716
717 /* missing: Turn off LEDs here */
718
719 /* Free all buffers in the host descriptor rings */
720 c2_tx_clean(c2_port);
721 c2_rx_clean(c2_port);
722
723 /* Free the host descriptor rings */
724 kfree(c2_port->rx_ring.start);
725 kfree(c2_port->tx_ring.start);
726 pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
727 c2_port->dma);
728
729 return 0;
730}
731
732static void c2_reset(struct c2_port *c2_port)
733{
734 struct c2_dev *c2dev = c2_port->c2dev;
735 unsigned int cur_rx = c2dev->cur_rx;
736
737 /* Tell the hardware to quiesce */
738 C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
739
740 /*
741 * The hardware will reset the C2_PCI_HRX_QUI bit once
742 * the RXP is quiesced. Wait 2 seconds for this.
743 */
744 ssleep(2);
745
746 cur_rx = C2_GET_CUR_RX(c2dev);
747
748 if (cur_rx & C2_PCI_HRX_QUI)
749 pr_debug("c2_reset: failed to quiesce the hardware!\n");
750
751 cur_rx &= ~C2_PCI_HRX_QUI;
752
753 c2dev->cur_rx = cur_rx;
754
755 pr_debug("Current RX: %u\n", c2dev->cur_rx);
756}
757
758static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
759{
760 struct c2_port *c2_port = netdev_priv(netdev);
761 struct c2_dev *c2dev = c2_port->c2dev;
762 struct c2_ring *tx_ring = &c2_port->tx_ring;
763 struct c2_element *elem;
764 dma_addr_t mapaddr;
765 u32 maplen;
766 unsigned long flags;
767 unsigned int i;
768
769 spin_lock_irqsave(&c2_port->tx_lock, flags);
770
771 if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
772 netif_stop_queue(netdev);
773 spin_unlock_irqrestore(&c2_port->tx_lock, flags);
774
775 pr_debug("%s: Tx ring full when queue awake!\n",
776 netdev->name);
777 return NETDEV_TX_BUSY;
778 }
779
780 maplen = skb_headlen(skb);
781 mapaddr =
782 pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
783
784 elem = tx_ring->to_use;
785 elem->skb = skb;
786 elem->mapaddr = mapaddr;
787 elem->maplen = maplen;
788
789 /* Tell HW to xmit */
790 __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR);
791 __raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN);
792 __raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS);
793
794 c2_port->netstats.tx_packets++;
795 c2_port->netstats.tx_bytes += maplen;
796
797 /* Loop thru additional data fragments and queue them */
798 if (skb_shinfo(skb)->nr_frags) {
799 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
800 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
801 maplen = frag->size;
802 mapaddr =
803 pci_map_page(c2dev->pcidev, frag->page,
804 frag->page_offset, maplen,
805 PCI_DMA_TODEVICE);
806
807 elem = elem->next;
808 elem->skb = NULL;
809 elem->mapaddr = mapaddr;
810 elem->maplen = maplen;
811
812 /* Tell HW to xmit */
813 __raw_writeq(cpu_to_be64(mapaddr),
814 elem->hw_desc + C2_TXP_ADDR);
815 __raw_writew(cpu_to_be16(maplen),
816 elem->hw_desc + C2_TXP_LEN);
817 __raw_writew(cpu_to_be16(TXP_HTXD_READY),
818 elem->hw_desc + C2_TXP_FLAGS);
819
820 c2_port->netstats.tx_packets++;
821 c2_port->netstats.tx_bytes += maplen;
822 }
823 }
824
825 tx_ring->to_use = elem->next;
826 c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
827
828 if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
829 netif_stop_queue(netdev);
830 if (netif_msg_tx_queued(c2_port))
831 pr_debug("%s: transmit queue full\n",
832 netdev->name);
833 }
834
835 spin_unlock_irqrestore(&c2_port->tx_lock, flags);
836
837 netdev->trans_start = jiffies;
838
839 return NETDEV_TX_OK;
840}
841
842static struct net_device_stats *c2_get_stats(struct net_device *netdev)
843{
844 struct c2_port *c2_port = netdev_priv(netdev);
845
846 return &c2_port->netstats;
847}
848
849static void c2_tx_timeout(struct net_device *netdev)
850{
851 struct c2_port *c2_port = netdev_priv(netdev);
852
853 if (netif_msg_timer(c2_port))
854 pr_debug("%s: tx timeout\n", netdev->name);
855
856 c2_tx_clean(c2_port);
857}
858
859static int c2_change_mtu(struct net_device *netdev, int new_mtu)
860{
861 int ret = 0;
862
863 if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
864 return -EINVAL;
865
866 netdev->mtu = new_mtu;
867
868 if (netif_running(netdev)) {
869 c2_down(netdev);
870
871 c2_up(netdev);
872 }
873
874 return ret;
875}
876
877/* Initialize network device */
878static struct net_device *c2_devinit(struct c2_dev *c2dev,
879 void __iomem * mmio_addr)
880{
881 struct c2_port *c2_port = NULL;
882 struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
883
884 if (!netdev) {
885 pr_debug("c2_port etherdev alloc failed");
886 return NULL;
887 }
888
889 SET_MODULE_OWNER(netdev);
890 SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
891
892 netdev->open = c2_up;
893 netdev->stop = c2_down;
894 netdev->hard_start_xmit = c2_xmit_frame;
895 netdev->get_stats = c2_get_stats;
896 netdev->tx_timeout = c2_tx_timeout;
897 netdev->change_mtu = c2_change_mtu;
898 netdev->watchdog_timeo = C2_TX_TIMEOUT;
899 netdev->irq = c2dev->pcidev->irq;
900
901 c2_port = netdev_priv(netdev);
902 c2_port->netdev = netdev;
903 c2_port->c2dev = c2dev;
904 c2_port->msg_enable = netif_msg_init(debug, default_msg);
905 c2_port->tx_ring.count = C2_NUM_TX_DESC;
906 c2_port->rx_ring.count = C2_NUM_RX_DESC;
907
908 spin_lock_init(&c2_port->tx_lock);
909
910 /* Copy our 48-bit ethernet hardware address */
911 memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
912
913 /* Validate the MAC address */
914 if (!is_valid_ether_addr(netdev->dev_addr)) {
915 pr_debug("Invalid MAC Address\n");
916 c2_print_macaddr(netdev);
917 free_netdev(netdev);
918 return NULL;
919 }
920
921 c2dev->netdev = netdev;
922
923 return netdev;
924}
925
926static int __devinit c2_probe(struct pci_dev *pcidev,
927 const struct pci_device_id *ent)
928{
929 int ret = 0, i;
930 unsigned long reg0_start, reg0_flags, reg0_len;
931 unsigned long reg2_start, reg2_flags, reg2_len;
932 unsigned long reg4_start, reg4_flags, reg4_len;
933 unsigned kva_map_size;
934 struct net_device *netdev = NULL;
935 struct c2_dev *c2dev = NULL;
936 void __iomem *mmio_regs = NULL;
937
938 printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
939 DRV_VERSION);
940
941 /* Enable PCI device */
942 ret = pci_enable_device(pcidev);
943 if (ret) {
944 printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
945 pci_name(pcidev));
946 goto bail0;
947 }
948
949 reg0_start = pci_resource_start(pcidev, BAR_0);
950 reg0_len = pci_resource_len(pcidev, BAR_0);
951 reg0_flags = pci_resource_flags(pcidev, BAR_0);
952
953 reg2_start = pci_resource_start(pcidev, BAR_2);
954 reg2_len = pci_resource_len(pcidev, BAR_2);
955 reg2_flags = pci_resource_flags(pcidev, BAR_2);
956
957 reg4_start = pci_resource_start(pcidev, BAR_4);
958 reg4_len = pci_resource_len(pcidev, BAR_4);
959 reg4_flags = pci_resource_flags(pcidev, BAR_4);
960
961 pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
962 pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
963 pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
964
965 /* Make sure PCI base addr are MMIO */
966 if (!(reg0_flags & IORESOURCE_MEM) ||
967 !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
968 printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
969 ret = -ENODEV;
970 goto bail1;
971 }
972
973 /* Check for weird/broken PCI region reporting */
974 if ((reg0_len < C2_REG0_SIZE) ||
975 (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
976 printk(KERN_ERR PFX "Invalid PCI region sizes\n");
977 ret = -ENODEV;
978 goto bail1;
979 }
980
981 /* Reserve PCI I/O and memory resources */
982 ret = pci_request_regions(pcidev, DRV_NAME);
983 if (ret) {
984 printk(KERN_ERR PFX "%s: Unable to request regions\n",
985 pci_name(pcidev));
986 goto bail1;
987 }
988
989 if ((sizeof(dma_addr_t) > 4)) {
990 ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
991 if (ret < 0) {
992 printk(KERN_ERR PFX "64b DMA configuration failed\n");
993 goto bail2;
994 }
995 } else {
996 ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
997 if (ret < 0) {
998 printk(KERN_ERR PFX "32b DMA configuration failed\n");
999 goto bail2;
1000 }
1001 }
1002
1003 /* Enables bus-mastering on the device */
1004 pci_set_master(pcidev);
1005
1006 /* Remap the adapter PCI registers in BAR4 */
1007 mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
1008 sizeof(struct c2_adapter_pci_regs));
1009 if (mmio_regs == 0UL) {
1010 printk(KERN_ERR PFX
1011 "Unable to remap adapter PCI registers in BAR4\n");
1012 ret = -EIO;
1013 goto bail2;
1014 }
1015
1016 /* Validate PCI regs magic */
1017 for (i = 0; i < sizeof(c2_magic); i++) {
1018 if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
1019 printk(KERN_ERR PFX "Downlevel Firmware boot loader "
1020 "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
1021 "utility to update your boot loader\n",
1022 i + 1, sizeof(c2_magic),
1023 readb(mmio_regs + C2_REGS_MAGIC + i),
1024 c2_magic[i]);
1025 printk(KERN_ERR PFX "Adapter not claimed\n");
1026 iounmap(mmio_regs);
1027 ret = -EIO;
1028 goto bail2;
1029 }
1030 }
1031
1032 /* Validate the adapter version */
1033 if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
1034 printk(KERN_ERR PFX "Version mismatch "
1035 "[fw=%u, c2=%u], Adapter not claimed\n",
1036 be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)),
1037 C2_VERSION);
1038 ret = -EINVAL;
1039 iounmap(mmio_regs);
1040 goto bail2;
1041 }
1042
1043 /* Validate the adapter IVN */
1044 if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
1045 printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
1046 "the OpenIB device support kit. "
1047 "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
1048 be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)),
1049 C2_IVN);
1050 ret = -EINVAL;
1051 iounmap(mmio_regs);
1052 goto bail2;
1053 }
1054
1055 /* Allocate hardware structure */
1056 c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
1057 if (!c2dev) {
1058 printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
1059 pci_name(pcidev));
1060 ret = -ENOMEM;
1061 iounmap(mmio_regs);
1062 goto bail2;
1063 }
1064
1065 memset(c2dev, 0, sizeof(*c2dev));
1066 spin_lock_init(&c2dev->lock);
1067 c2dev->pcidev = pcidev;
1068 c2dev->cur_tx = 0;
1069
1070 /* Get the last RX index */
1071 c2dev->cur_rx =
1072 (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) -
1073 0xffffc000) / sizeof(struct c2_rxp_desc);
1074
1075 /* Request an interrupt line for the driver */
1076 ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev);
1077 if (ret) {
1078 printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
1079 pci_name(pcidev), pcidev->irq);
1080 iounmap(mmio_regs);
1081 goto bail3;
1082 }
1083
1084 /* Set driver specific data */
1085 pci_set_drvdata(pcidev, c2dev);
1086
1087 /* Initialize network device */
1088 if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
1089 iounmap(mmio_regs);
1090 goto bail4;
1091 }
1092
1093 /* Save off the actual size prior to unmapping mmio_regs */
1094 kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE));
1095
1096 /* Unmap the adapter PCI registers in BAR4 */
1097 iounmap(mmio_regs);
1098
1099 /* Register network device */
1100 ret = register_netdev(netdev);
1101 if (ret) {
1102 printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
1103 ret);
1104 goto bail5;
1105 }
1106
1107 /* Disable network packets */
1108 netif_stop_queue(netdev);
1109
1110 /* Remap the adapter HRXDQ PA space to kernel VA space */
1111 c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
1112 C2_RXP_HRXDQ_SIZE);
1113 if (c2dev->mmio_rxp_ring == 0UL) {
1114 printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
1115 ret = -EIO;
1116 goto bail6;
1117 }
1118
1119 /* Remap the adapter HTXDQ PA space to kernel VA space */
1120 c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
1121 C2_TXP_HTXDQ_SIZE);
1122 if (c2dev->mmio_txp_ring == 0UL) {
1123 printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
1124 ret = -EIO;
1125 goto bail7;
1126 }
1127
1128 /* Save off the current RX index in the last 4 bytes of the TXP Ring */
1129 C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
1130
1131 /* Remap the PCI registers in adapter BAR0 to kernel VA space */
1132 c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
1133 if (c2dev->regs == 0UL) {
1134 printk(KERN_ERR PFX "Unable to remap BAR0\n");
1135 ret = -EIO;
1136 goto bail8;
1137 }
1138
1139 /* Remap the PCI registers in adapter BAR4 to kernel VA space */
1140 c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
1141 c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
1142 kva_map_size);
1143 if (c2dev->kva == 0UL) {
1144 printk(KERN_ERR PFX "Unable to remap BAR4\n");
1145 ret = -EIO;
1146 goto bail9;
1147 }
1148
1149 /* Print out the MAC address */
1150 c2_print_macaddr(netdev);
1151
1152 ret = c2_rnic_init(c2dev);
1153 if (ret) {
1154 printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
1155 goto bail10;
1156 }
1157
1158 c2_register_device(c2dev);
1159
1160 return 0;
1161
1162 bail10:
1163 iounmap(c2dev->kva);
1164
1165 bail9:
1166 iounmap(c2dev->regs);
1167
1168 bail8:
1169 iounmap(c2dev->mmio_txp_ring);
1170
1171 bail7:
1172 iounmap(c2dev->mmio_rxp_ring);
1173
1174 bail6:
1175 unregister_netdev(netdev);
1176
1177 bail5:
1178 free_netdev(netdev);
1179
1180 bail4:
1181 free_irq(pcidev->irq, c2dev);
1182
1183 bail3:
1184 ib_dealloc_device(&c2dev->ibdev);
1185
1186 bail2:
1187 pci_release_regions(pcidev);
1188
1189 bail1:
1190 pci_disable_device(pcidev);
1191
1192 bail0:
1193 return ret;
1194}
1195
1196static void __devexit c2_remove(struct pci_dev *pcidev)
1197{
1198 struct c2_dev *c2dev = pci_get_drvdata(pcidev);
1199 struct net_device *netdev = c2dev->netdev;
1200
1201 /* Unregister with OpenIB */
1202 c2_unregister_device(c2dev);
1203
1204 /* Clean up the RNIC resources */
1205 c2_rnic_term(c2dev);
1206
1207 /* Remove network device from the kernel */
1208 unregister_netdev(netdev);
1209
1210 /* Free network device */
1211 free_netdev(netdev);
1212
1213 /* Free the interrupt line */
1214 free_irq(pcidev->irq, c2dev);
1215
1216 /* missing: Turn LEDs off here */
1217
1218 /* Unmap adapter PA space */
1219 iounmap(c2dev->kva);
1220 iounmap(c2dev->regs);
1221 iounmap(c2dev->mmio_txp_ring);
1222 iounmap(c2dev->mmio_rxp_ring);
1223
1224 /* Free the hardware structure */
1225 ib_dealloc_device(&c2dev->ibdev);
1226
1227 /* Release reserved PCI I/O and memory resources */
1228 pci_release_regions(pcidev);
1229
1230 /* Disable PCI device */
1231 pci_disable_device(pcidev);
1232
1233 /* Clear driver specific data */
1234 pci_set_drvdata(pcidev, NULL);
1235}
1236
1237static struct pci_driver c2_pci_driver = {
1238 .name = DRV_NAME,
1239 .id_table = c2_pci_table,
1240 .probe = c2_probe,
1241 .remove = __devexit_p(c2_remove),
1242};
1243
1244static int __init c2_init_module(void)
1245{
1246 return pci_module_init(&c2_pci_driver);
1247}
1248
1249static void __exit c2_exit_module(void)
1250{
1251 pci_unregister_driver(&c2_pci_driver);
1252}
1253
1254module_init(c2_init_module);
1255module_exit(c2_exit_module);
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
new file mode 100644
index 000000000000..1b17dcdd0505
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2.h
@@ -0,0 +1,551 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef __C2_H
35#define __C2_H
36
37#include <linux/netdevice.h>
38#include <linux/spinlock.h>
39#include <linux/kernel.h>
40#include <linux/pci.h>
41#include <linux/dma-mapping.h>
42#include <linux/idr.h>
43#include <asm/semaphore.h>
44
45#include "c2_provider.h"
46#include "c2_mq.h"
47#include "c2_status.h"
48
49#define DRV_NAME "c2"
50#define DRV_VERSION "1.1"
51#define PFX DRV_NAME ": "
52
53#define BAR_0 0
54#define BAR_2 2
55#define BAR_4 4
56
57#define RX_BUF_SIZE (1536 + 8)
58#define ETH_JUMBO_MTU 9000
59#define C2_MAGIC "CEPHEUS"
60#define C2_VERSION 4
61#define C2_IVN (18 & 0x7fffffff)
62
63#define C2_REG0_SIZE (16 * 1024)
64#define C2_REG2_SIZE (2 * 1024 * 1024)
65#define C2_REG4_SIZE (256 * 1024 * 1024)
66#define C2_NUM_TX_DESC 341
67#define C2_NUM_RX_DESC 256
68#define C2_PCI_REGS_OFFSET (0x10000)
69#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
70#define C2_RXP_HRXDQ_SIZE (4096)
71#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
72#define C2_TXP_HTXDQ_SIZE (4096)
73#define C2_TX_TIMEOUT (6*HZ)
74
75/* CEPHEUS */
76static const u8 c2_magic[] = {
77 0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
78};
79
80enum adapter_pci_regs {
81 C2_REGS_MAGIC = 0x0000,
82 C2_REGS_VERS = 0x0008,
83 C2_REGS_IVN = 0x000C,
84 C2_REGS_PCI_WINSIZE = 0x0010,
85 C2_REGS_Q0_QSIZE = 0x0014,
86 C2_REGS_Q0_MSGSIZE = 0x0018,
87 C2_REGS_Q0_POOLSTART = 0x001C,
88 C2_REGS_Q0_SHARED = 0x0020,
89 C2_REGS_Q1_QSIZE = 0x0024,
90 C2_REGS_Q1_MSGSIZE = 0x0028,
91 C2_REGS_Q1_SHARED = 0x0030,
92 C2_REGS_Q2_QSIZE = 0x0034,
93 C2_REGS_Q2_MSGSIZE = 0x0038,
94 C2_REGS_Q2_SHARED = 0x0040,
95 C2_REGS_ENADDR = 0x004C,
96 C2_REGS_RDMA_ENADDR = 0x0054,
97 C2_REGS_HRX_CUR = 0x006C,
98};
99
100struct c2_adapter_pci_regs {
101 char reg_magic[8];
102 u32 version;
103 u32 ivn;
104 u32 pci_window_size;
105 u32 q0_q_size;
106 u32 q0_msg_size;
107 u32 q0_pool_start;
108 u32 q0_shared;
109 u32 q1_q_size;
110 u32 q1_msg_size;
111 u32 q1_pool_start;
112 u32 q1_shared;
113 u32 q2_q_size;
114 u32 q2_msg_size;
115 u32 q2_pool_start;
116 u32 q2_shared;
117 u32 log_start;
118 u32 log_size;
119 u8 host_enaddr[8];
120 u8 rdma_enaddr[8];
121 u32 crash_entry;
122 u32 crash_ready[2];
123 u32 fw_txd_cur;
124 u32 fw_hrxd_cur;
125 u32 fw_rxd_cur;
126};
127
128enum pci_regs {
129 C2_HISR = 0x0000,
130 C2_DISR = 0x0004,
131 C2_HIMR = 0x0008,
132 C2_DIMR = 0x000C,
133 C2_NISR0 = 0x0010,
134 C2_NISR1 = 0x0014,
135 C2_NIMR0 = 0x0018,
136 C2_NIMR1 = 0x001C,
137 C2_IDIS = 0x0020,
138};
139
140enum {
141 C2_PCI_HRX_INT = 1 << 8,
142 C2_PCI_HTX_INT = 1 << 17,
143 C2_PCI_HRX_QUI = 1 << 31,
144};
145
146/*
147 * Cepheus registers in BAR0.
148 */
149struct c2_pci_regs {
150 u32 hostisr;
151 u32 dmaisr;
152 u32 hostimr;
153 u32 dmaimr;
154 u32 netisr0;
155 u32 netisr1;
156 u32 netimr0;
157 u32 netimr1;
158 u32 int_disable;
159};
160
161/* TXP flags */
162enum c2_txp_flags {
163 TXP_HTXD_DONE = 0,
164 TXP_HTXD_READY = 1 << 0,
165 TXP_HTXD_UNINIT = 1 << 1,
166};
167
168/* RXP flags */
169enum c2_rxp_flags {
170 RXP_HRXD_UNINIT = 0,
171 RXP_HRXD_READY = 1 << 0,
172 RXP_HRXD_DONE = 1 << 1,
173};
174
175/* RXP status */
176enum c2_rxp_status {
177 RXP_HRXD_ZERO = 0,
178 RXP_HRXD_OK = 1 << 0,
179 RXP_HRXD_BUF_OV = 1 << 1,
180};
181
182/* TXP descriptor fields */
183enum txp_desc {
184 C2_TXP_FLAGS = 0x0000,
185 C2_TXP_LEN = 0x0002,
186 C2_TXP_ADDR = 0x0004,
187};
188
189/* RXP descriptor fields */
190enum rxp_desc {
191 C2_RXP_FLAGS = 0x0000,
192 C2_RXP_STATUS = 0x0002,
193 C2_RXP_COUNT = 0x0004,
194 C2_RXP_LEN = 0x0006,
195 C2_RXP_ADDR = 0x0008,
196};
197
198struct c2_txp_desc {
199 u16 flags;
200 u16 len;
201 u64 addr;
202} __attribute__ ((packed));
203
204struct c2_rxp_desc {
205 u16 flags;
206 u16 status;
207 u16 count;
208 u16 len;
209 u64 addr;
210} __attribute__ ((packed));
211
212struct c2_rxp_hdr {
213 u16 flags;
214 u16 status;
215 u16 len;
216 u16 rsvd;
217} __attribute__ ((packed));
218
219struct c2_tx_desc {
220 u32 len;
221 u32 status;
222 dma_addr_t next_offset;
223};
224
225struct c2_rx_desc {
226 u32 len;
227 u32 status;
228 dma_addr_t next_offset;
229};
230
231struct c2_alloc {
232 u32 last;
233 u32 max;
234 spinlock_t lock;
235 unsigned long *table;
236};
237
238struct c2_array {
239 struct {
240 void **page;
241 int used;
242 } *page_list;
243};
244
245/*
246 * The MQ shared pointer pool is organized as a linked list of
247 * chunks. Each chunk contains a linked list of free shared pointers
248 * that can be allocated to a given user mode client.
249 *
250 */
251struct sp_chunk {
252 struct sp_chunk *next;
253 dma_addr_t dma_addr;
254 DECLARE_PCI_UNMAP_ADDR(mapping);
255 u16 head;
256 u16 shared_ptr[0];
257};
258
259struct c2_pd_table {
260 u32 last;
261 u32 max;
262 spinlock_t lock;
263 unsigned long *table;
264};
265
266struct c2_qp_table {
267 struct idr idr;
268 spinlock_t lock;
269 int last;
270};
271
272struct c2_element {
273 struct c2_element *next;
274 void *ht_desc; /* host descriptor */
275 void __iomem *hw_desc; /* hardware descriptor */
276 struct sk_buff *skb;
277 dma_addr_t mapaddr;
278 u32 maplen;
279};
280
281struct c2_ring {
282 struct c2_element *to_clean;
283 struct c2_element *to_use;
284 struct c2_element *start;
285 unsigned long count;
286};
287
288struct c2_dev {
289 struct ib_device ibdev;
290 void __iomem *regs;
291 void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
292 void __iomem *mmio_rxp_ring;
293 spinlock_t lock;
294 struct pci_dev *pcidev;
295 struct net_device *netdev;
296 struct net_device *pseudo_netdev;
297 unsigned int cur_tx;
298 unsigned int cur_rx;
299 u32 adapter_handle;
300 int device_cap_flags;
301 void __iomem *kva; /* KVA device memory */
302 unsigned long pa; /* PA device memory */
303 void **qptr_array;
304
305 kmem_cache_t *host_msg_cache;
306
307 struct list_head cca_link; /* adapter list */
308 struct list_head eh_wakeup_list; /* event wakeup list */
309 wait_queue_head_t req_vq_wo;
310
311 /* Cached RNIC properties */
312 struct ib_device_attr props;
313
314 struct c2_pd_table pd_table;
315 struct c2_qp_table qp_table;
316 int ports; /* num of GigE ports */
317 int devnum;
318 spinlock_t vqlock; /* sync vbs req MQ */
319
320 /* Verbs Queues */
321 struct c2_mq req_vq; /* Verbs Request MQ */
322 struct c2_mq rep_vq; /* Verbs Reply MQ */
323 struct c2_mq aeq; /* Async Events MQ */
324
325 /* Kernel client MQs */
326 struct sp_chunk *kern_mqsp_pool;
327
328 /* Device updates these values when posting messages to a host
329 * target queue */
330 u16 req_vq_shared;
331 u16 rep_vq_shared;
332 u16 aeq_shared;
333 u16 irq_claimed;
334
335 /*
336 * Shared host target pages for user-accessible MQs.
337 */
338 int hthead; /* index of first free entry */
339 void *htpages; /* kernel vaddr */
340 int htlen; /* length of htpages memory */
341 void *htuva; /* user mapped vaddr */
342 spinlock_t htlock; /* serialize allocation */
343
344 u64 adapter_hint_uva; /* access to the activity FIFO */
345
346 // spinlock_t aeq_lock;
347 // spinlock_t rnic_lock;
348
349 u16 *hint_count;
350 dma_addr_t hint_count_dma;
351 u16 hints_read;
352
353 int init; /* TRUE if it's ready */
354 char ae_cache_name[16];
355 char vq_cache_name[16];
356};
357
358struct c2_port {
359 u32 msg_enable;
360 struct c2_dev *c2dev;
361 struct net_device *netdev;
362
363 spinlock_t tx_lock;
364 u32 tx_avail;
365 struct c2_ring tx_ring;
366 struct c2_ring rx_ring;
367
368 void *mem; /* PCI memory for host rings */
369 dma_addr_t dma;
370 unsigned long mem_size;
371
372 u32 rx_buf_size;
373
374 struct net_device_stats netstats;
375};
376
377/*
378 * Activity FIFO registers in BAR0.
379 */
380#define PCI_BAR0_HOST_HINT 0x100
381#define PCI_BAR0_ADAPTER_HINT 0x2000
382
383/*
384 * Ammasso PCI vendor id and Cepheus PCI device id.
385 */
386#define CQ_ARMED 0x01
387#define CQ_WAIT_FOR_DMA 0x80
388
389/*
390 * The format of a hint is as follows:
391 * Lower 16 bits are the count of hints for the queue.
392 * Next 15 bits are the qp_index
393 * Upper most bit depends on who reads it:
394 * If read by producer, then it means Full (1) or Not-Full (0)
395 * If read by consumer, then it means Empty (1) or Not-Empty (0)
396 */
397#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
398#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
399#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
400
401
402/*
403 * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
404 * struct.
405 */
406#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
407
408#ifndef readq
409static inline u64 readq(const void __iomem * addr)
410{
411 u64 ret = readl(addr + 4);
412 ret <<= 32;
413 ret |= readl(addr);
414
415 return ret;
416}
417#endif
418
419#ifndef writeq
420static inline void __raw_writeq(u64 val, void __iomem * addr)
421{
422 __raw_writel((u32) (val), addr);
423 __raw_writel((u32) (val >> 32), (addr + 4));
424}
425#endif
426
427#define C2_SET_CUR_RX(c2dev, cur_rx) \
428 __raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
429
430#define C2_GET_CUR_RX(c2dev) \
431 be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092))
432
433static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
434{
435 return container_of(ibdev, struct c2_dev, ibdev);
436}
437
438static inline int c2_errno(void *reply)
439{
440 switch (c2_wr_get_result(reply)) {
441 case C2_OK:
442 return 0;
443 case CCERR_NO_BUFS:
444 case CCERR_INSUFFICIENT_RESOURCES:
445 case CCERR_ZERO_RDMA_READ_RESOURCES:
446 return -ENOMEM;
447 case CCERR_MR_IN_USE:
448 case CCERR_QP_IN_USE:
449 return -EBUSY;
450 case CCERR_ADDR_IN_USE:
451 return -EADDRINUSE;
452 case CCERR_ADDR_NOT_AVAIL:
453 return -EADDRNOTAVAIL;
454 case CCERR_CONN_RESET:
455 return -ECONNRESET;
456 case CCERR_NOT_IMPLEMENTED:
457 case CCERR_INVALID_WQE:
458 return -ENOSYS;
459 case CCERR_QP_NOT_PRIVILEGED:
460 return -EPERM;
461 case CCERR_STACK_ERROR:
462 return -EPROTO;
463 case CCERR_ACCESS_VIOLATION:
464 case CCERR_BASE_AND_BOUNDS_VIOLATION:
465 return -EFAULT;
466 case CCERR_STAG_STATE_NOT_INVALID:
467 case CCERR_INVALID_ADDRESS:
468 case CCERR_INVALID_CQ:
469 case CCERR_INVALID_EP:
470 case CCERR_INVALID_MODIFIER:
471 case CCERR_INVALID_MTU:
472 case CCERR_INVALID_PD_ID:
473 case CCERR_INVALID_QP:
474 case CCERR_INVALID_RNIC:
475 case CCERR_INVALID_STAG:
476 return -EINVAL;
477 default:
478 return -EAGAIN;
479 }
480}
481
482/* Device */
483extern int c2_register_device(struct c2_dev *c2dev);
484extern void c2_unregister_device(struct c2_dev *c2dev);
485extern int c2_rnic_init(struct c2_dev *c2dev);
486extern void c2_rnic_term(struct c2_dev *c2dev);
487extern void c2_rnic_interrupt(struct c2_dev *c2dev);
488extern int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
489extern int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask);
490
491/* QPs */
492extern int c2_alloc_qp(struct c2_dev *c2dev, struct c2_pd *pd,
493 struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp);
494extern void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp);
495extern struct ib_qp *c2_get_qp(struct ib_device *device, int qpn);
496extern int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
497 struct ib_qp_attr *attr, int attr_mask);
498extern int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
499 int ord, int ird);
500extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
501 struct ib_send_wr **bad_wr);
502extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
503 struct ib_recv_wr **bad_wr);
504extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
505extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
506extern void c2_set_qp_state(struct c2_qp *, int);
507extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
508
509/* PDs */
510extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
511extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
512extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
513extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
514
515/* CQs */
516extern int c2_init_cq(struct c2_dev *c2dev, int entries,
517 struct c2_ucontext *ctx, struct c2_cq *cq);
518extern void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq);
519extern void c2_cq_event(struct c2_dev *c2dev, u32 mq_index);
520extern void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index);
521extern int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
522extern int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify);
523
524/* CM */
525extern int c2_llp_connect(struct iw_cm_id *cm_id,
526 struct iw_cm_conn_param *iw_param);
527extern int c2_llp_accept(struct iw_cm_id *cm_id,
528 struct iw_cm_conn_param *iw_param);
529extern int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata,
530 u8 pdata_len);
531extern int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog);
532extern int c2_llp_service_destroy(struct iw_cm_id *cm_id);
533
534/* MM */
535extern int c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
536 int page_size, int pbl_depth, u32 length,
537 u32 off, u64 *va, enum c2_acf acf,
538 struct c2_mr *mr);
539extern int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index);
540
541/* AE */
542extern void c2_ae_event(struct c2_dev *c2dev, u32 mq_index);
543
544/* MQSP Allocator */
545extern int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
546 struct sp_chunk **root);
547extern void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root);
548extern u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
549 dma_addr_t *dma_addr, gfp_t gfp_mask);
550extern void c2_free_mqsp(u16 * mqsp);
551#endif
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
new file mode 100644
index 000000000000..08f46c83a3a4
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -0,0 +1,321 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include "c2.h"
34#include <rdma/iw_cm.h>
35#include "c2_status.h"
36#include "c2_ae.h"
37
38static int c2_convert_cm_status(u32 c2_status)
39{
40 switch (c2_status) {
41 case C2_CONN_STATUS_SUCCESS:
42 return 0;
43 case C2_CONN_STATUS_REJECTED:
44 return -ENETRESET;
45 case C2_CONN_STATUS_REFUSED:
46 return -ECONNREFUSED;
47 case C2_CONN_STATUS_TIMEDOUT:
48 return -ETIMEDOUT;
49 case C2_CONN_STATUS_NETUNREACH:
50 return -ENETUNREACH;
51 case C2_CONN_STATUS_HOSTUNREACH:
52 return -EHOSTUNREACH;
53 case C2_CONN_STATUS_INVALID_RNIC:
54 return -EINVAL;
55 case C2_CONN_STATUS_INVALID_QP:
56 return -EINVAL;
57 case C2_CONN_STATUS_INVALID_QP_STATE:
58 return -EINVAL;
59 case C2_CONN_STATUS_ADDR_NOT_AVAIL:
60 return -EADDRNOTAVAIL;
61 default:
62 printk(KERN_ERR PFX
63 "%s - Unable to convert CM status: %d\n",
64 __FUNCTION__, c2_status);
65 return -EIO;
66 }
67}
68
69#ifdef DEBUG
70static const char* to_event_str(int event)
71{
72 static const char* event_str[] = {
73 "CCAE_REMOTE_SHUTDOWN",
74 "CCAE_ACTIVE_CONNECT_RESULTS",
75 "CCAE_CONNECTION_REQUEST",
76 "CCAE_LLP_CLOSE_COMPLETE",
77 "CCAE_TERMINATE_MESSAGE_RECEIVED",
78 "CCAE_LLP_CONNECTION_RESET",
79 "CCAE_LLP_CONNECTION_LOST",
80 "CCAE_LLP_SEGMENT_SIZE_INVALID",
81 "CCAE_LLP_INVALID_CRC",
82 "CCAE_LLP_BAD_FPDU",
83 "CCAE_INVALID_DDP_VERSION",
84 "CCAE_INVALID_RDMA_VERSION",
85 "CCAE_UNEXPECTED_OPCODE",
86 "CCAE_INVALID_DDP_QUEUE_NUMBER",
87 "CCAE_RDMA_READ_NOT_ENABLED",
88 "CCAE_RDMA_WRITE_NOT_ENABLED",
89 "CCAE_RDMA_READ_TOO_SMALL",
90 "CCAE_NO_L_BIT",
91 "CCAE_TAGGED_INVALID_STAG",
92 "CCAE_TAGGED_BASE_BOUNDS_VIOLATION",
93 "CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION",
94 "CCAE_TAGGED_INVALID_PD",
95 "CCAE_WRAP_ERROR",
96 "CCAE_BAD_CLOSE",
97 "CCAE_BAD_LLP_CLOSE",
98 "CCAE_INVALID_MSN_RANGE",
99 "CCAE_INVALID_MSN_GAP",
100 "CCAE_IRRQ_OVERFLOW",
101 "CCAE_IRRQ_MSN_GAP",
102 "CCAE_IRRQ_MSN_RANGE",
103 "CCAE_IRRQ_INVALID_STAG",
104 "CCAE_IRRQ_BASE_BOUNDS_VIOLATION",
105 "CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION",
106 "CCAE_IRRQ_INVALID_PD",
107 "CCAE_IRRQ_WRAP_ERROR",
108 "CCAE_CQ_SQ_COMPLETION_OVERFLOW",
109 "CCAE_CQ_RQ_COMPLETION_ERROR",
110 "CCAE_QP_SRQ_WQE_ERROR",
111 "CCAE_QP_LOCAL_CATASTROPHIC_ERROR",
112 "CCAE_CQ_OVERFLOW",
113 "CCAE_CQ_OPERATION_ERROR",
114 "CCAE_SRQ_LIMIT_REACHED",
115 "CCAE_QP_RQ_LIMIT_REACHED",
116 "CCAE_SRQ_CATASTROPHIC_ERROR",
117 "CCAE_RNIC_CATASTROPHIC_ERROR"
118 };
119
120 if (event < CCAE_REMOTE_SHUTDOWN ||
121 event > CCAE_RNIC_CATASTROPHIC_ERROR)
122 return "<invalid event>";
123
124 event -= CCAE_REMOTE_SHUTDOWN;
125 return event_str[event];
126}
127
128static const char *to_qp_state_str(int state)
129{
130 switch (state) {
131 case C2_QP_STATE_IDLE:
132 return "C2_QP_STATE_IDLE";
133 case C2_QP_STATE_CONNECTING:
134 return "C2_QP_STATE_CONNECTING";
135 case C2_QP_STATE_RTS:
136 return "C2_QP_STATE_RTS";
137 case C2_QP_STATE_CLOSING:
138 return "C2_QP_STATE_CLOSING";
139 case C2_QP_STATE_TERMINATE:
140 return "C2_QP_STATE_TERMINATE";
141 case C2_QP_STATE_ERROR:
142 return "C2_QP_STATE_ERROR";
143 default:
144 return "<invalid QP state>";
145 };
146}
147#endif
148
149void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
150{
151 struct c2_mq *mq = c2dev->qptr_array[mq_index];
152 union c2wr *wr;
153 void *resource_user_context;
154 struct iw_cm_event cm_event;
155 struct ib_event ib_event;
156 enum c2_resource_indicator resource_indicator;
157 enum c2_event_id event_id;
158 unsigned long flags;
159 int status;
160
161 /*
162 * retreive the message
163 */
164 wr = c2_mq_consume(mq);
165 if (!wr)
166 return;
167
168 memset(&ib_event, 0, sizeof(ib_event));
169 memset(&cm_event, 0, sizeof(cm_event));
170
171 event_id = c2_wr_get_id(wr);
172 resource_indicator = be32_to_cpu(wr->ae.ae_generic.resource_type);
173 resource_user_context =
174 (void *) (unsigned long) wr->ae.ae_generic.user_context;
175
176 status = cm_event.status = c2_convert_cm_status(c2_wr_get_result(wr));
177
178 pr_debug("event received c2_dev=%p, event_id=%d, "
179 "resource_indicator=%d, user_context=%p, status = %d\n",
180 c2dev, event_id, resource_indicator, resource_user_context,
181 status);
182
183 switch (resource_indicator) {
184 case C2_RES_IND_QP:{
185
186 struct c2_qp *qp = (struct c2_qp *)resource_user_context;
187 struct iw_cm_id *cm_id = qp->cm_id;
188 struct c2wr_ae_active_connect_results *res;
189
190 if (!cm_id) {
191 pr_debug("event received, but cm_id is <nul>, qp=%p!\n",
192 qp);
193 goto ignore_it;
194 }
195 pr_debug("%s: event = %s, user_context=%llx, "
196 "resource_type=%x, "
197 "resource=%x, qp_state=%s\n",
198 __FUNCTION__,
199 to_event_str(event_id),
200 be64_to_cpu(wr->ae.ae_generic.user_context),
201 be32_to_cpu(wr->ae.ae_generic.resource_type),
202 be32_to_cpu(wr->ae.ae_generic.resource),
203 to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
204
205 c2_set_qp_state(qp, be32_to_cpu(wr->ae.ae_generic.qp_state));
206
207 switch (event_id) {
208 case CCAE_ACTIVE_CONNECT_RESULTS:
209 res = &wr->ae.ae_active_connect_results;
210 cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
211 cm_event.local_addr.sin_addr.s_addr = res->laddr;
212 cm_event.remote_addr.sin_addr.s_addr = res->raddr;
213 cm_event.local_addr.sin_port = res->lport;
214 cm_event.remote_addr.sin_port = res->rport;
215 if (status == 0) {
216 cm_event.private_data_len =
217 be32_to_cpu(res->private_data_length);
218 cm_event.private_data = res->private_data;
219 } else {
220 spin_lock_irqsave(&qp->lock, flags);
221 if (qp->cm_id) {
222 qp->cm_id->rem_ref(qp->cm_id);
223 qp->cm_id = NULL;
224 }
225 spin_unlock_irqrestore(&qp->lock, flags);
226 cm_event.private_data_len = 0;
227 cm_event.private_data = NULL;
228 }
229 if (cm_id->event_handler)
230 cm_id->event_handler(cm_id, &cm_event);
231 break;
232 case CCAE_TERMINATE_MESSAGE_RECEIVED:
233 case CCAE_CQ_SQ_COMPLETION_OVERFLOW:
234 ib_event.device = &c2dev->ibdev;
235 ib_event.element.qp = &qp->ibqp;
236 ib_event.event = IB_EVENT_QP_REQ_ERR;
237
238 if (qp->ibqp.event_handler)
239 qp->ibqp.event_handler(&ib_event,
240 qp->ibqp.
241 qp_context);
242 break;
243 case CCAE_BAD_CLOSE:
244 case CCAE_LLP_CLOSE_COMPLETE:
245 case CCAE_LLP_CONNECTION_RESET:
246 case CCAE_LLP_CONNECTION_LOST:
247 BUG_ON(cm_id->event_handler==(void*)0x6b6b6b6b);
248
249 spin_lock_irqsave(&qp->lock, flags);
250 if (qp->cm_id) {
251 qp->cm_id->rem_ref(qp->cm_id);
252 qp->cm_id = NULL;
253 }
254 spin_unlock_irqrestore(&qp->lock, flags);
255 cm_event.event = IW_CM_EVENT_CLOSE;
256 cm_event.status = 0;
257 if (cm_id->event_handler)
258 cm_id->event_handler(cm_id, &cm_event);
259 break;
260 default:
261 BUG_ON(1);
262 pr_debug("%s:%d Unexpected event_id=%d on QP=%p, "
263 "CM_ID=%p\n",
264 __FUNCTION__, __LINE__,
265 event_id, qp, cm_id);
266 break;
267 }
268 break;
269 }
270
271 case C2_RES_IND_EP:{
272
273 struct c2wr_ae_connection_request *req =
274 &wr->ae.ae_connection_request;
275 struct iw_cm_id *cm_id =
276 (struct iw_cm_id *)resource_user_context;
277
278 pr_debug("C2_RES_IND_EP event_id=%d\n", event_id);
279 if (event_id != CCAE_CONNECTION_REQUEST) {
280 pr_debug("%s: Invalid event_id: %d\n",
281 __FUNCTION__, event_id);
282 break;
283 }
284 cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
285 cm_event.provider_data = (void*)(unsigned long)req->cr_handle;
286 cm_event.local_addr.sin_addr.s_addr = req->laddr;
287 cm_event.remote_addr.sin_addr.s_addr = req->raddr;
288 cm_event.local_addr.sin_port = req->lport;
289 cm_event.remote_addr.sin_port = req->rport;
290 cm_event.private_data_len =
291 be32_to_cpu(req->private_data_length);
292 cm_event.private_data = req->private_data;
293
294 if (cm_id->event_handler)
295 cm_id->event_handler(cm_id, &cm_event);
296 break;
297 }
298
299 case C2_RES_IND_CQ:{
300 struct c2_cq *cq =
301 (struct c2_cq *) resource_user_context;
302
303 pr_debug("IB_EVENT_CQ_ERR\n");
304 ib_event.device = &c2dev->ibdev;
305 ib_event.element.cq = &cq->ibcq;
306 ib_event.event = IB_EVENT_CQ_ERR;
307
308 if (cq->ibcq.event_handler)
309 cq->ibcq.event_handler(&ib_event,
310 cq->ibcq.cq_context);
311 }
312
313 default:
314 printk("Bad resource indicator = %d\n",
315 resource_indicator);
316 break;
317 }
318
319 ignore_it:
320 c2_mq_free(mq);
321}
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.h b/drivers/infiniband/hw/amso1100/c2_ae.h
new file mode 100644
index 000000000000..3a065c33b83b
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_ae.h
@@ -0,0 +1,108 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef _C2_AE_H_
34#define _C2_AE_H_
35
36/*
37 * WARNING: If you change this file, also bump C2_IVN_BASE
38 * in common/include/clustercore/c2_ivn.h.
39 */
40
41/*
42 * Asynchronous Event Identifiers
43 *
44 * These start at 0x80 only so it's obvious from inspection that
45 * they are not work-request statuses. This isn't critical.
46 *
47 * NOTE: these event id's must fit in eight bits.
48 */
49enum c2_event_id {
50 CCAE_REMOTE_SHUTDOWN = 0x80,
51 CCAE_ACTIVE_CONNECT_RESULTS,
52 CCAE_CONNECTION_REQUEST,
53 CCAE_LLP_CLOSE_COMPLETE,
54 CCAE_TERMINATE_MESSAGE_RECEIVED,
55 CCAE_LLP_CONNECTION_RESET,
56 CCAE_LLP_CONNECTION_LOST,
57 CCAE_LLP_SEGMENT_SIZE_INVALID,
58 CCAE_LLP_INVALID_CRC,
59 CCAE_LLP_BAD_FPDU,
60 CCAE_INVALID_DDP_VERSION,
61 CCAE_INVALID_RDMA_VERSION,
62 CCAE_UNEXPECTED_OPCODE,
63 CCAE_INVALID_DDP_QUEUE_NUMBER,
64 CCAE_RDMA_READ_NOT_ENABLED,
65 CCAE_RDMA_WRITE_NOT_ENABLED,
66 CCAE_RDMA_READ_TOO_SMALL,
67 CCAE_NO_L_BIT,
68 CCAE_TAGGED_INVALID_STAG,
69 CCAE_TAGGED_BASE_BOUNDS_VIOLATION,
70 CCAE_TAGGED_ACCESS_RIGHTS_VIOLATION,
71 CCAE_TAGGED_INVALID_PD,
72 CCAE_WRAP_ERROR,
73 CCAE_BAD_CLOSE,
74 CCAE_BAD_LLP_CLOSE,
75 CCAE_INVALID_MSN_RANGE,
76 CCAE_INVALID_MSN_GAP,
77 CCAE_IRRQ_OVERFLOW,
78 CCAE_IRRQ_MSN_GAP,
79 CCAE_IRRQ_MSN_RANGE,
80 CCAE_IRRQ_INVALID_STAG,
81 CCAE_IRRQ_BASE_BOUNDS_VIOLATION,
82 CCAE_IRRQ_ACCESS_RIGHTS_VIOLATION,
83 CCAE_IRRQ_INVALID_PD,
84 CCAE_IRRQ_WRAP_ERROR,
85 CCAE_CQ_SQ_COMPLETION_OVERFLOW,
86 CCAE_CQ_RQ_COMPLETION_ERROR,
87 CCAE_QP_SRQ_WQE_ERROR,
88 CCAE_QP_LOCAL_CATASTROPHIC_ERROR,
89 CCAE_CQ_OVERFLOW,
90 CCAE_CQ_OPERATION_ERROR,
91 CCAE_SRQ_LIMIT_REACHED,
92 CCAE_QP_RQ_LIMIT_REACHED,
93 CCAE_SRQ_CATASTROPHIC_ERROR,
94 CCAE_RNIC_CATASTROPHIC_ERROR
95/* WARNING If you add more id's, make sure their values fit in eight bits. */
96};
97
98/*
99 * Resource Indicators and Identifiers
100 */
101enum c2_resource_indicator {
102 C2_RES_IND_QP = 1,
103 C2_RES_IND_EP,
104 C2_RES_IND_CQ,
105 C2_RES_IND_SRQ,
106};
107
108#endif /* _C2_AE_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
new file mode 100644
index 000000000000..1d2529992c0c
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -0,0 +1,144 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/errno.h>
35#include <linux/slab.h>
36#include <linux/bitmap.h>
37
38#include "c2.h"
39
40static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask,
41 struct sp_chunk **head)
42{
43 int i;
44 struct sp_chunk *new_head;
45
46 new_head = (struct sp_chunk *) __get_free_page(gfp_mask);
47 if (new_head == NULL)
48 return -ENOMEM;
49
50 new_head->dma_addr = dma_map_single(c2dev->ibdev.dma_device, new_head,
51 PAGE_SIZE, DMA_FROM_DEVICE);
52 pci_unmap_addr_set(new_head, mapping, new_head->dma_addr);
53
54 new_head->next = NULL;
55 new_head->head = 0;
56
57 /* build list where each index is the next free slot */
58 for (i = 0;
59 i < (PAGE_SIZE - sizeof(struct sp_chunk) -
60 sizeof(u16)) / sizeof(u16) - 1;
61 i++) {
62 new_head->shared_ptr[i] = i + 1;
63 }
64 /* terminate list */
65 new_head->shared_ptr[i] = 0xFFFF;
66
67 *head = new_head;
68 return 0;
69}
70
71int c2_init_mqsp_pool(struct c2_dev *c2dev, gfp_t gfp_mask,
72 struct sp_chunk **root)
73{
74 return c2_alloc_mqsp_chunk(c2dev, gfp_mask, root);
75}
76
77void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root)
78{
79 struct sp_chunk *next;
80
81 while (root) {
82 next = root->next;
83 dma_unmap_single(c2dev->ibdev.dma_device,
84 pci_unmap_addr(root, mapping), PAGE_SIZE,
85 DMA_FROM_DEVICE);
86 __free_page((struct page *) root);
87 root = next;
88 }
89}
90
91u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head,
92 dma_addr_t *dma_addr, gfp_t gfp_mask)
93{
94 u16 mqsp;
95
96 while (head) {
97 mqsp = head->head;
98 if (mqsp != 0xFFFF) {
99 head->head = head->shared_ptr[mqsp];
100 break;
101 } else if (head->next == NULL) {
102 if (c2_alloc_mqsp_chunk(c2dev, gfp_mask, &head->next) ==
103 0) {
104 head = head->next;
105 mqsp = head->head;
106 head->head = head->shared_ptr[mqsp];
107 break;
108 } else
109 return NULL;
110 } else
111 head = head->next;
112 }
113 if (head) {
114 *dma_addr = head->dma_addr +
115 ((unsigned long) &(head->shared_ptr[mqsp]) -
116 (unsigned long) head);
117 pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
118 &(head->shared_ptr[mqsp]), (u64)*dma_addr);
119 return &(head->shared_ptr[mqsp]);
120 }
121 return NULL;
122}
123
124void c2_free_mqsp(u16 * mqsp)
125{
126 struct sp_chunk *head;
127 u16 idx;
128
129 /* The chunk containing this ptr begins at the page boundary */
130 head = (struct sp_chunk *) ((unsigned long) mqsp & PAGE_MASK);
131
132 /* Link head to new mqsp */
133 *mqsp = head->head;
134
135 /* Compute the shared_ptr index */
136 idx = ((unsigned long) mqsp & ~PAGE_MASK) >> 1;
137 idx -= (unsigned long) &(((struct sp_chunk *) 0)->shared_ptr[0]) >> 1;
138
139 /* Point this index at the head */
140 head->shared_ptr[idx] = head->head;
141
142 /* Point head at this index */
143 head->head = idx;
144}
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
new file mode 100644
index 000000000000..485254efdd1e
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -0,0 +1,452 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34#include "c2.h"
35#include "c2_wr.h"
36#include "c2_vq.h"
37#include <rdma/iw_cm.h>
38
39int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
40{
41 struct c2_dev *c2dev = to_c2dev(cm_id->device);
42 struct ib_qp *ibqp;
43 struct c2_qp *qp;
44 struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */
45 struct c2_vq_req *vq_req;
46 int err;
47
48 ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
49 if (!ibqp)
50 return -EINVAL;
51 qp = to_c2qp(ibqp);
52
53 /* Associate QP <--> CM_ID */
54 cm_id->provider_data = qp;
55 cm_id->add_ref(cm_id);
56 qp->cm_id = cm_id;
57
58 /*
59 * only support the max private_data length
60 */
61 if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
62 err = -EINVAL;
63 goto bail0;
64 }
65 /*
66 * Set the rdma read limits
67 */
68 err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
69 if (err)
70 goto bail0;
71
72 /*
73 * Create and send a WR_QP_CONNECT...
74 */
75 wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
76 if (!wr) {
77 err = -ENOMEM;
78 goto bail0;
79 }
80
81 vq_req = vq_req_alloc(c2dev);
82 if (!vq_req) {
83 err = -ENOMEM;
84 goto bail1;
85 }
86
87 c2_wr_set_id(wr, CCWR_QP_CONNECT);
88 wr->hdr.context = 0;
89 wr->rnic_handle = c2dev->adapter_handle;
90 wr->qp_handle = qp->adapter_handle;
91
92 wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr;
93 wr->remote_port = cm_id->remote_addr.sin_port;
94
95 /*
96 * Move any private data from the callers's buf into
97 * the WR.
98 */
99 if (iw_param->private_data) {
100 wr->private_data_length =
101 cpu_to_be32(iw_param->private_data_len);
102 memcpy(&wr->private_data[0], iw_param->private_data,
103 iw_param->private_data_len);
104 } else
105 wr->private_data_length = 0;
106
107 /*
108 * Send WR to adapter. NOTE: There is no synch reply from
109 * the adapter.
110 */
111 err = vq_send_wr(c2dev, (union c2wr *) wr);
112 vq_req_free(c2dev, vq_req);
113
114 bail1:
115 kfree(wr);
116 bail0:
117 if (err) {
118 /*
119 * If we fail, release reference on QP and
120 * disassociate QP from CM_ID
121 */
122 cm_id->provider_data = NULL;
123 qp->cm_id = NULL;
124 cm_id->rem_ref(cm_id);
125 }
126 return err;
127}
128
129int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog)
130{
131 struct c2_dev *c2dev;
132 struct c2wr_ep_listen_create_req wr;
133 struct c2wr_ep_listen_create_rep *reply;
134 struct c2_vq_req *vq_req;
135 int err;
136
137 c2dev = to_c2dev(cm_id->device);
138 if (c2dev == NULL)
139 return -EINVAL;
140
141 /*
142 * Allocate verbs request.
143 */
144 vq_req = vq_req_alloc(c2dev);
145 if (!vq_req)
146 return -ENOMEM;
147
148 /*
149 * Build the WR
150 */
151 c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE);
152 wr.hdr.context = (u64) (unsigned long) vq_req;
153 wr.rnic_handle = c2dev->adapter_handle;
154 wr.local_addr = cm_id->local_addr.sin_addr.s_addr;
155 wr.local_port = cm_id->local_addr.sin_port;
156 wr.backlog = cpu_to_be32(backlog);
157 wr.user_context = (u64) (unsigned long) cm_id;
158
159 /*
160 * Reference the request struct. Dereferenced in the int handler.
161 */
162 vq_req_get(c2dev, vq_req);
163
164 /*
165 * Send WR to adapter
166 */
167 err = vq_send_wr(c2dev, (union c2wr *) & wr);
168 if (err) {
169 vq_req_put(c2dev, vq_req);
170 goto bail0;
171 }
172
173 /*
174 * Wait for reply from adapter
175 */
176 err = vq_wait_for_reply(c2dev, vq_req);
177 if (err)
178 goto bail0;
179
180 /*
181 * Process reply
182 */
183 reply =
184 (struct c2wr_ep_listen_create_rep *) (unsigned long) vq_req->reply_msg;
185 if (!reply) {
186 err = -ENOMEM;
187 goto bail1;
188 }
189
190 if ((err = c2_errno(reply)) != 0)
191 goto bail1;
192
193 /*
194 * Keep the adapter handle. Used in subsequent destroy
195 */
196 cm_id->provider_data = (void*)(unsigned long) reply->ep_handle;
197
198 /*
199 * free vq stuff
200 */
201 vq_repbuf_free(c2dev, reply);
202 vq_req_free(c2dev, vq_req);
203
204 return 0;
205
206 bail1:
207 vq_repbuf_free(c2dev, reply);
208 bail0:
209 vq_req_free(c2dev, vq_req);
210 return err;
211}
212
213
214int c2_llp_service_destroy(struct iw_cm_id *cm_id)
215{
216
217 struct c2_dev *c2dev;
218 struct c2wr_ep_listen_destroy_req wr;
219 struct c2wr_ep_listen_destroy_rep *reply;
220 struct c2_vq_req *vq_req;
221 int err;
222
223 c2dev = to_c2dev(cm_id->device);
224 if (c2dev == NULL)
225 return -EINVAL;
226
227 /*
228 * Allocate verbs request.
229 */
230 vq_req = vq_req_alloc(c2dev);
231 if (!vq_req)
232 return -ENOMEM;
233
234 /*
235 * Build the WR
236 */
237 c2_wr_set_id(&wr, CCWR_EP_LISTEN_DESTROY);
238 wr.hdr.context = (unsigned long) vq_req;
239 wr.rnic_handle = c2dev->adapter_handle;
240 wr.ep_handle = (u32)(unsigned long)cm_id->provider_data;
241
242 /*
243 * reference the request struct. dereferenced in the int handler.
244 */
245 vq_req_get(c2dev, vq_req);
246
247 /*
248 * Send WR to adapter
249 */
250 err = vq_send_wr(c2dev, (union c2wr *) & wr);
251 if (err) {
252 vq_req_put(c2dev, vq_req);
253 goto bail0;
254 }
255
256 /*
257 * Wait for reply from adapter
258 */
259 err = vq_wait_for_reply(c2dev, vq_req);
260 if (err)
261 goto bail0;
262
263 /*
264 * Process reply
265 */
266 reply=(struct c2wr_ep_listen_destroy_rep *)(unsigned long)vq_req->reply_msg;
267 if (!reply) {
268 err = -ENOMEM;
269 goto bail0;
270 }
271 if ((err = c2_errno(reply)) != 0)
272 goto bail1;
273
274 bail1:
275 vq_repbuf_free(c2dev, reply);
276 bail0:
277 vq_req_free(c2dev, vq_req);
278 return err;
279}
280
281int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
282{
283 struct c2_dev *c2dev = to_c2dev(cm_id->device);
284 struct c2_qp *qp;
285 struct ib_qp *ibqp;
286 struct c2wr_cr_accept_req *wr; /* variable length WR */
287 struct c2_vq_req *vq_req;
288 struct c2wr_cr_accept_rep *reply; /* VQ Reply msg ptr. */
289 int err;
290
291 ibqp = c2_get_qp(cm_id->device, iw_param->qpn);
292 if (!ibqp)
293 return -EINVAL;
294 qp = to_c2qp(ibqp);
295
296 /* Set the RDMA read limits */
297 err = c2_qp_set_read_limits(c2dev, qp, iw_param->ord, iw_param->ird);
298 if (err)
299 goto bail0;
300
301 /* Allocate verbs request. */
302 vq_req = vq_req_alloc(c2dev);
303 if (!vq_req) {
304 err = -ENOMEM;
305 goto bail1;
306 }
307 vq_req->qp = qp;
308 vq_req->cm_id = cm_id;
309 vq_req->event = IW_CM_EVENT_ESTABLISHED;
310
311 wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
312 if (!wr) {
313 err = -ENOMEM;
314 goto bail2;
315 }
316
317 /* Build the WR */
318 c2_wr_set_id(wr, CCWR_CR_ACCEPT);
319 wr->hdr.context = (unsigned long) vq_req;
320 wr->rnic_handle = c2dev->adapter_handle;
321 wr->ep_handle = (u32) (unsigned long) cm_id->provider_data;
322 wr->qp_handle = qp->adapter_handle;
323
324 /* Replace the cr_handle with the QP after accept */
325 cm_id->provider_data = qp;
326 cm_id->add_ref(cm_id);
327 qp->cm_id = cm_id;
328
329 cm_id->provider_data = qp;
330
331 /* Validate private_data length */
332 if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
333 err = -EINVAL;
334 goto bail2;
335 }
336
337 if (iw_param->private_data) {
338 wr->private_data_length = cpu_to_be32(iw_param->private_data_len);
339 memcpy(&wr->private_data[0],
340 iw_param->private_data, iw_param->private_data_len);
341 } else
342 wr->private_data_length = 0;
343
344 /* Reference the request struct. Dereferenced in the int handler. */
345 vq_req_get(c2dev, vq_req);
346
347 /* Send WR to adapter */
348 err = vq_send_wr(c2dev, (union c2wr *) wr);
349 if (err) {
350 vq_req_put(c2dev, vq_req);
351 goto bail2;
352 }
353
354 /* Wait for reply from adapter */
355 err = vq_wait_for_reply(c2dev, vq_req);
356 if (err)
357 goto bail2;
358
359 /* Check that reply is present */
360 reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
361 if (!reply) {
362 err = -ENOMEM;
363 goto bail2;
364 }
365
366 err = c2_errno(reply);
367 vq_repbuf_free(c2dev, reply);
368
369 if (!err)
370 c2_set_qp_state(qp, C2_QP_STATE_RTS);
371 bail2:
372 kfree(wr);
373 bail1:
374 vq_req_free(c2dev, vq_req);
375 bail0:
376 if (err) {
377 /*
378 * If we fail, release reference on QP and
379 * disassociate QP from CM_ID
380 */
381 cm_id->provider_data = NULL;
382 qp->cm_id = NULL;
383 cm_id->rem_ref(cm_id);
384 }
385 return err;
386}
387
388int c2_llp_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
389{
390 struct c2_dev *c2dev;
391 struct c2wr_cr_reject_req wr;
392 struct c2_vq_req *vq_req;
393 struct c2wr_cr_reject_rep *reply;
394 int err;
395
396 c2dev = to_c2dev(cm_id->device);
397
398 /*
399 * Allocate verbs request.
400 */
401 vq_req = vq_req_alloc(c2dev);
402 if (!vq_req)
403 return -ENOMEM;
404
405 /*
406 * Build the WR
407 */
408 c2_wr_set_id(&wr, CCWR_CR_REJECT);
409 wr.hdr.context = (unsigned long) vq_req;
410 wr.rnic_handle = c2dev->adapter_handle;
411 wr.ep_handle = (u32) (unsigned long) cm_id->provider_data;
412
413 /*
414 * reference the request struct. dereferenced in the int handler.
415 */
416 vq_req_get(c2dev, vq_req);
417
418 /*
419 * Send WR to adapter
420 */
421 err = vq_send_wr(c2dev, (union c2wr *) & wr);
422 if (err) {
423 vq_req_put(c2dev, vq_req);
424 goto bail0;
425 }
426
427 /*
428 * Wait for reply from adapter
429 */
430 err = vq_wait_for_reply(c2dev, vq_req);
431 if (err)
432 goto bail0;
433
434 /*
435 * Process reply
436 */
437 reply = (struct c2wr_cr_reject_rep *) (unsigned long)
438 vq_req->reply_msg;
439 if (!reply) {
440 err = -ENOMEM;
441 goto bail0;
442 }
443 err = c2_errno(reply);
444 /*
445 * free vq stuff
446 */
447 vq_repbuf_free(c2dev, reply);
448
449 bail0:
450 vq_req_free(c2dev, vq_req);
451 return err;
452}
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
new file mode 100644
index 000000000000..9d7bcc5ade93
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -0,0 +1,433 @@
1/*
2 * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved.
5 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
8 *
9 * This software is available to you under a choice of one of two
10 * licenses. You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
14 *
15 * Redistribution and use in source and binary forms, with or
16 * without modification, are permitted provided that the following
17 * conditions are met:
18 *
19 * - Redistributions of source code must retain the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer.
22 *
23 * - Redistributions in binary form must reproduce the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer in the documentation and/or other materials
26 * provided with the distribution.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 * SOFTWARE.
36 *
37 */
38#include "c2.h"
39#include "c2_vq.h"
40#include "c2_status.h"
41
42#define C2_CQ_MSG_SIZE ((sizeof(struct c2wr_ce) + 32-1) & ~(32-1))
43
44static struct c2_cq *c2_cq_get(struct c2_dev *c2dev, int cqn)
45{
46 struct c2_cq *cq;
47 unsigned long flags;
48
49 spin_lock_irqsave(&c2dev->lock, flags);
50 cq = c2dev->qptr_array[cqn];
51 if (!cq) {
52 spin_unlock_irqrestore(&c2dev->lock, flags);
53 return NULL;
54 }
55 atomic_inc(&cq->refcount);
56 spin_unlock_irqrestore(&c2dev->lock, flags);
57 return cq;
58}
59
60static void c2_cq_put(struct c2_cq *cq)
61{
62 if (atomic_dec_and_test(&cq->refcount))
63 wake_up(&cq->wait);
64}
65
66void c2_cq_event(struct c2_dev *c2dev, u32 mq_index)
67{
68 struct c2_cq *cq;
69
70 cq = c2_cq_get(c2dev, mq_index);
71 if (!cq) {
72 printk("discarding events on destroyed CQN=%d\n", mq_index);
73 return;
74 }
75
76 (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
77 c2_cq_put(cq);
78}
79
80void c2_cq_clean(struct c2_dev *c2dev, struct c2_qp *qp, u32 mq_index)
81{
82 struct c2_cq *cq;
83 struct c2_mq *q;
84
85 cq = c2_cq_get(c2dev, mq_index);
86 if (!cq)
87 return;
88
89 spin_lock_irq(&cq->lock);
90 q = &cq->mq;
91 if (q && !c2_mq_empty(q)) {
92 u16 priv = q->priv;
93 struct c2wr_ce *msg;
94
95 while (priv != be16_to_cpu(*q->shared)) {
96 msg = (struct c2wr_ce *)
97 (q->msg_pool.host + priv * q->msg_size);
98 if (msg->qp_user_context == (u64) (unsigned long) qp) {
99 msg->qp_user_context = (u64) 0;
100 }
101 priv = (priv + 1) % q->q_size;
102 }
103 }
104 spin_unlock_irq(&cq->lock);
105 c2_cq_put(cq);
106}
107
108static inline enum ib_wc_status c2_cqe_status_to_openib(u8 status)
109{
110 switch (status) {
111 case C2_OK:
112 return IB_WC_SUCCESS;
113 case CCERR_FLUSHED:
114 return IB_WC_WR_FLUSH_ERR;
115 case CCERR_BASE_AND_BOUNDS_VIOLATION:
116 return IB_WC_LOC_PROT_ERR;
117 case CCERR_ACCESS_VIOLATION:
118 return IB_WC_LOC_ACCESS_ERR;
119 case CCERR_TOTAL_LENGTH_TOO_BIG:
120 return IB_WC_LOC_LEN_ERR;
121 case CCERR_INVALID_WINDOW:
122 return IB_WC_MW_BIND_ERR;
123 default:
124 return IB_WC_GENERAL_ERR;
125 }
126}
127
128
129static inline int c2_poll_one(struct c2_dev *c2dev,
130 struct c2_cq *cq, struct ib_wc *entry)
131{
132 struct c2wr_ce *ce;
133 struct c2_qp *qp;
134 int is_recv = 0;
135
136 ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
137 if (!ce) {
138 return -EAGAIN;
139 }
140
141 /*
142 * if the qp returned is null then this qp has already
143 * been freed and we are unable process the completion.
144 * try pulling the next message
145 */
146 while ((qp =
147 (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) {
148 c2_mq_free(&cq->mq);
149 ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq);
150 if (!ce)
151 return -EAGAIN;
152 }
153
154 entry->status = c2_cqe_status_to_openib(c2_wr_get_result(ce));
155 entry->wr_id = ce->hdr.context;
156 entry->qp_num = ce->handle;
157 entry->wc_flags = 0;
158 entry->slid = 0;
159 entry->sl = 0;
160 entry->src_qp = 0;
161 entry->dlid_path_bits = 0;
162 entry->pkey_index = 0;
163
164 switch (c2_wr_get_id(ce)) {
165 case C2_WR_TYPE_SEND:
166 entry->opcode = IB_WC_SEND;
167 break;
168 case C2_WR_TYPE_RDMA_WRITE:
169 entry->opcode = IB_WC_RDMA_WRITE;
170 break;
171 case C2_WR_TYPE_RDMA_READ:
172 entry->opcode = IB_WC_RDMA_READ;
173 break;
174 case C2_WR_TYPE_BIND_MW:
175 entry->opcode = IB_WC_BIND_MW;
176 break;
177 case C2_WR_TYPE_RECV:
178 entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
179 entry->opcode = IB_WC_RECV;
180 is_recv = 1;
181 break;
182 default:
183 break;
184 }
185
186 /* consume the WQEs */
187 if (is_recv)
188 c2_mq_lconsume(&qp->rq_mq, 1);
189 else
190 c2_mq_lconsume(&qp->sq_mq,
191 be32_to_cpu(c2_wr_get_wqe_count(ce)) + 1);
192
193 /* free the message */
194 c2_mq_free(&cq->mq);
195
196 return 0;
197}
198
199int c2_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
200{
201 struct c2_dev *c2dev = to_c2dev(ibcq->device);
202 struct c2_cq *cq = to_c2cq(ibcq);
203 unsigned long flags;
204 int npolled, err;
205
206 spin_lock_irqsave(&cq->lock, flags);
207
208 for (npolled = 0; npolled < num_entries; ++npolled) {
209
210 err = c2_poll_one(c2dev, cq, entry + npolled);
211 if (err)
212 break;
213 }
214
215 spin_unlock_irqrestore(&cq->lock, flags);
216
217 return npolled;
218}
219
220int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
221{
222 struct c2_mq_shared __iomem *shared;
223 struct c2_cq *cq;
224
225 cq = to_c2cq(ibcq);
226 shared = cq->mq.peer;
227
228 if (notify == IB_CQ_NEXT_COMP)
229 writeb(C2_CQ_NOTIFICATION_TYPE_NEXT, &shared->notification_type);
230 else if (notify == IB_CQ_SOLICITED)
231 writeb(C2_CQ_NOTIFICATION_TYPE_NEXT_SE, &shared->notification_type);
232 else
233 return -EINVAL;
234
235 writeb(CQ_WAIT_FOR_DMA | CQ_ARMED, &shared->armed);
236
237 /*
238 * Now read back shared->armed to make the PCI
239 * write synchronous. This is necessary for
240 * correct cq notification semantics.
241 */
242 readb(&shared->armed);
243
244 return 0;
245}
246
247static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
248{
249
250 dma_unmap_single(c2dev->ibdev.dma_device, pci_unmap_addr(mq, mapping),
251 mq->q_size * mq->msg_size, DMA_FROM_DEVICE);
252 free_pages((unsigned long) mq->msg_pool.host,
253 get_order(mq->q_size * mq->msg_size));
254}
255
256static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
257 int msg_size)
258{
259 unsigned long pool_start;
260
261 pool_start = __get_free_pages(GFP_KERNEL,
262 get_order(q_size * msg_size));
263 if (!pool_start)
264 return -ENOMEM;
265
266 c2_mq_rep_init(mq,
267 0, /* index (currently unknown) */
268 q_size,
269 msg_size,
270 (u8 *) pool_start,
271 NULL, /* peer (currently unknown) */
272 C2_MQ_HOST_TARGET);
273
274 mq->host_dma = dma_map_single(c2dev->ibdev.dma_device,
275 (void *)pool_start,
276 q_size * msg_size, DMA_FROM_DEVICE);
277 pci_unmap_addr_set(mq, mapping, mq->host_dma);
278
279 return 0;
280}
281
282int c2_init_cq(struct c2_dev *c2dev, int entries,
283 struct c2_ucontext *ctx, struct c2_cq *cq)
284{
285 struct c2wr_cq_create_req wr;
286 struct c2wr_cq_create_rep *reply;
287 unsigned long peer_pa;
288 struct c2_vq_req *vq_req;
289 int err;
290
291 might_sleep();
292
293 cq->ibcq.cqe = entries - 1;
294 cq->is_kernel = !ctx;
295
296 /* Allocate a shared pointer */
297 cq->mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
298 &cq->mq.shared_dma, GFP_KERNEL);
299 if (!cq->mq.shared)
300 return -ENOMEM;
301
302 /* Allocate pages for the message pool */
303 err = c2_alloc_cq_buf(c2dev, &cq->mq, entries + 1, C2_CQ_MSG_SIZE);
304 if (err)
305 goto bail0;
306
307 vq_req = vq_req_alloc(c2dev);
308 if (!vq_req) {
309 err = -ENOMEM;
310 goto bail1;
311 }
312
313 memset(&wr, 0, sizeof(wr));
314 c2_wr_set_id(&wr, CCWR_CQ_CREATE);
315 wr.hdr.context = (unsigned long) vq_req;
316 wr.rnic_handle = c2dev->adapter_handle;
317 wr.msg_size = cpu_to_be32(cq->mq.msg_size);
318 wr.depth = cpu_to_be32(cq->mq.q_size);
319 wr.shared_ht = cpu_to_be64(cq->mq.shared_dma);
320 wr.msg_pool = cpu_to_be64(cq->mq.host_dma);
321 wr.user_context = (u64) (unsigned long) (cq);
322
323 vq_req_get(c2dev, vq_req);
324
325 err = vq_send_wr(c2dev, (union c2wr *) & wr);
326 if (err) {
327 vq_req_put(c2dev, vq_req);
328 goto bail2;
329 }
330
331 err = vq_wait_for_reply(c2dev, vq_req);
332 if (err)
333 goto bail2;
334
335 reply = (struct c2wr_cq_create_rep *) (unsigned long) (vq_req->reply_msg);
336 if (!reply) {
337 err = -ENOMEM;
338 goto bail2;
339 }
340
341 if ((err = c2_errno(reply)) != 0)
342 goto bail3;
343
344 cq->adapter_handle = reply->cq_handle;
345 cq->mq.index = be32_to_cpu(reply->mq_index);
346
347 peer_pa = c2dev->pa + be32_to_cpu(reply->adapter_shared);
348 cq->mq.peer = ioremap_nocache(peer_pa, PAGE_SIZE);
349 if (!cq->mq.peer) {
350 err = -ENOMEM;
351 goto bail3;
352 }
353
354 vq_repbuf_free(c2dev, reply);
355 vq_req_free(c2dev, vq_req);
356
357 spin_lock_init(&cq->lock);
358 atomic_set(&cq->refcount, 1);
359 init_waitqueue_head(&cq->wait);
360
361 /*
362 * Use the MQ index allocated by the adapter to
363 * store the CQ in the qptr_array
364 */
365 cq->cqn = cq->mq.index;
366 c2dev->qptr_array[cq->cqn] = cq;
367
368 return 0;
369
370 bail3:
371 vq_repbuf_free(c2dev, reply);
372 bail2:
373 vq_req_free(c2dev, vq_req);
374 bail1:
375 c2_free_cq_buf(c2dev, &cq->mq);
376 bail0:
377 c2_free_mqsp(cq->mq.shared);
378
379 return err;
380}
381
382void c2_free_cq(struct c2_dev *c2dev, struct c2_cq *cq)
383{
384 int err;
385 struct c2_vq_req *vq_req;
386 struct c2wr_cq_destroy_req wr;
387 struct c2wr_cq_destroy_rep *reply;
388
389 might_sleep();
390
391 /* Clear CQ from the qptr array */
392 spin_lock_irq(&c2dev->lock);
393 c2dev->qptr_array[cq->mq.index] = NULL;
394 atomic_dec(&cq->refcount);
395 spin_unlock_irq(&c2dev->lock);
396
397 wait_event(cq->wait, !atomic_read(&cq->refcount));
398
399 vq_req = vq_req_alloc(c2dev);
400 if (!vq_req) {
401 goto bail0;
402 }
403
404 memset(&wr, 0, sizeof(wr));
405 c2_wr_set_id(&wr, CCWR_CQ_DESTROY);
406 wr.hdr.context = (unsigned long) vq_req;
407 wr.rnic_handle = c2dev->adapter_handle;
408 wr.cq_handle = cq->adapter_handle;
409
410 vq_req_get(c2dev, vq_req);
411
412 err = vq_send_wr(c2dev, (union c2wr *) & wr);
413 if (err) {
414 vq_req_put(c2dev, vq_req);
415 goto bail1;
416 }
417
418 err = vq_wait_for_reply(c2dev, vq_req);
419 if (err)
420 goto bail1;
421
422 reply = (struct c2wr_cq_destroy_rep *) (unsigned long) (vq_req->reply_msg);
423
424 vq_repbuf_free(c2dev, reply);
425 bail1:
426 vq_req_free(c2dev, vq_req);
427 bail0:
428 if (cq->is_kernel) {
429 c2_free_cq_buf(c2dev, &cq->mq);
430 }
431
432 return;
433}
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
new file mode 100644
index 000000000000..0d0bc33ca30a
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -0,0 +1,209 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include "c2.h"
34#include <rdma/iw_cm.h>
35#include "c2_vq.h"
36
37static void handle_mq(struct c2_dev *c2dev, u32 index);
38static void handle_vq(struct c2_dev *c2dev, u32 mq_index);
39
40/*
41 * Handle RNIC interrupts
42 */
43void c2_rnic_interrupt(struct c2_dev *c2dev)
44{
45 unsigned int mq_index;
46
47 while (c2dev->hints_read != be16_to_cpu(*c2dev->hint_count)) {
48 mq_index = readl(c2dev->regs + PCI_BAR0_HOST_HINT);
49 if (mq_index & 0x80000000) {
50 break;
51 }
52
53 c2dev->hints_read++;
54 handle_mq(c2dev, mq_index);
55 }
56
57}
58
59/*
60 * Top level MQ handler
61 */
62static void handle_mq(struct c2_dev *c2dev, u32 mq_index)
63{
64 if (c2dev->qptr_array[mq_index] == NULL) {
65 pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n",
66 mq_index);
67 return;
68 }
69
70 switch (mq_index) {
71 case (0):
72 /*
73 * An index of 0 in the activity queue
74 * indicates the req vq now has messages
75 * available...
76 *
77 * Wake up any waiters waiting on req VQ
78 * message availability.
79 */
80 wake_up(&c2dev->req_vq_wo);
81 break;
82 case (1):
83 handle_vq(c2dev, mq_index);
84 break;
85 case (2):
86 /* We have to purge the VQ in case there are pending
87 * accept reply requests that would result in the
88 * generation of an ESTABLISHED event. If we don't
89 * generate these first, a CLOSE event could end up
90 * being delivered before the ESTABLISHED event.
91 */
92 handle_vq(c2dev, 1);
93
94 c2_ae_event(c2dev, mq_index);
95 break;
96 default:
97 /* There is no event synchronization between CQ events
98 * and AE or CM events. In fact, CQE could be
99 * delivered for all of the I/O up to and including the
100 * FLUSH for a peer disconenct prior to the ESTABLISHED
101 * event being delivered to the app. The reason for this
102 * is that CM events are delivered on a thread, while AE
103 * and CM events are delivered on interrupt context.
104 */
105 c2_cq_event(c2dev, mq_index);
106 break;
107 }
108
109 return;
110}
111
112/*
113 * Handles verbs WR replies.
114 */
115static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
116{
117 void *adapter_msg, *reply_msg;
118 struct c2wr_hdr *host_msg;
119 struct c2wr_hdr tmp;
120 struct c2_mq *reply_vq;
121 struct c2_vq_req *req;
122 struct iw_cm_event cm_event;
123 int err;
124
125 reply_vq = (struct c2_mq *) c2dev->qptr_array[mq_index];
126
127 /*
128 * get next msg from mq_index into adapter_msg.
129 * don't free it yet.
130 */
131 adapter_msg = c2_mq_consume(reply_vq);
132 if (adapter_msg == NULL) {
133 return;
134 }
135
136 host_msg = vq_repbuf_alloc(c2dev);
137
138 /*
139 * If we can't get a host buffer, then we'll still
140 * wakeup the waiter, we just won't give him the msg.
141 * It is assumed the waiter will deal with this...
142 */
143 if (!host_msg) {
144 pr_debug("handle_vq: no repbufs!\n");
145
146 /*
147 * just copy the WR header into a local variable.
148 * this allows us to still demux on the context
149 */
150 host_msg = &tmp;
151 memcpy(host_msg, adapter_msg, sizeof(tmp));
152 reply_msg = NULL;
153 } else {
154 memcpy(host_msg, adapter_msg, reply_vq->msg_size);
155 reply_msg = host_msg;
156 }
157
158 /*
159 * consume the msg from the MQ
160 */
161 c2_mq_free(reply_vq);
162
163 /*
164 * wakeup the waiter.
165 */
166 req = (struct c2_vq_req *) (unsigned long) host_msg->context;
167 if (req == NULL) {
168 /*
169 * We should never get here, as the adapter should
170 * never send us a reply that we're not expecting.
171 */
172 vq_repbuf_free(c2dev, host_msg);
173 pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
174 return;
175 }
176
177 err = c2_errno(reply_msg);
178 if (!err) switch (req->event) {
179 case IW_CM_EVENT_ESTABLISHED:
180 c2_set_qp_state(req->qp,
181 C2_QP_STATE_RTS);
182 case IW_CM_EVENT_CLOSE:
183
184 /*
185 * Move the QP to RTS if this is
186 * the established event
187 */
188 cm_event.event = req->event;
189 cm_event.status = 0;
190 cm_event.local_addr = req->cm_id->local_addr;
191 cm_event.remote_addr = req->cm_id->remote_addr;
192 cm_event.private_data = NULL;
193 cm_event.private_data_len = 0;
194 req->cm_id->event_handler(req->cm_id, &cm_event);
195 break;
196 default:
197 break;
198 }
199
200 req->reply_msg = (u64) (unsigned long) (reply_msg);
201 atomic_set(&req->reply_ready, 1);
202 wake_up(&req->wait_object);
203
204 /*
205 * If the request was cancelled, then this put will
206 * free the vq_req memory...and reply_msg!!!
207 */
208 vq_req_put(c2dev, req);
209}
diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c
new file mode 100644
index 000000000000..1e4f46493fcb
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mm.c
@@ -0,0 +1,375 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include "c2.h"
34#include "c2_vq.h"
35
36#define PBL_VIRT 1
37#define PBL_PHYS 2
38
39/*
40 * Send all the PBL messages to convey the remainder of the PBL
41 * Wait for the adapter's reply on the last one.
42 * This is indicated by setting the MEM_PBL_COMPLETE in the flags.
43 *
44 * NOTE: vq_req is _not_ freed by this function. The VQ Host
45 * Reply buffer _is_ freed by this function.
46 */
47static int
48send_pbl_messages(struct c2_dev *c2dev, u32 stag_index,
49 unsigned long va, u32 pbl_depth,
50 struct c2_vq_req *vq_req, int pbl_type)
51{
52 u32 pbe_count; /* amt that fits in a PBL msg */
53 u32 count; /* amt in this PBL MSG. */
54 struct c2wr_nsmr_pbl_req *wr; /* PBL WR ptr */
55 struct c2wr_nsmr_pbl_rep *reply; /* reply ptr */
56 int err, pbl_virt, pbl_index, i;
57
58 switch (pbl_type) {
59 case PBL_VIRT:
60 pbl_virt = 1;
61 break;
62 case PBL_PHYS:
63 pbl_virt = 0;
64 break;
65 default:
66 return -EINVAL;
67 break;
68 }
69
70 pbe_count = (c2dev->req_vq.msg_size -
71 sizeof(struct c2wr_nsmr_pbl_req)) / sizeof(u64);
72 wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
73 if (!wr) {
74 return -ENOMEM;
75 }
76 c2_wr_set_id(wr, CCWR_NSMR_PBL);
77
78 /*
79 * Only the last PBL message will generate a reply from the verbs,
80 * so we set the context to 0 indicating there is no kernel verbs
81 * handler blocked awaiting this reply.
82 */
83 wr->hdr.context = 0;
84 wr->rnic_handle = c2dev->adapter_handle;
85 wr->stag_index = stag_index; /* already swapped */
86 wr->flags = 0;
87 pbl_index = 0;
88 while (pbl_depth) {
89 count = min(pbe_count, pbl_depth);
90 wr->addrs_length = cpu_to_be32(count);
91
92 /*
93 * If this is the last message, then reference the
94 * vq request struct cuz we're gonna wait for a reply.
95 * also make this PBL msg as the last one.
96 */
97 if (count == pbl_depth) {
98 /*
99 * reference the request struct. dereferenced in the
100 * int handler.
101 */
102 vq_req_get(c2dev, vq_req);
103 wr->flags = cpu_to_be32(MEM_PBL_COMPLETE);
104
105 /*
106 * This is the last PBL message.
107 * Set the context to our VQ Request Object so we can
108 * wait for the reply.
109 */
110 wr->hdr.context = (unsigned long) vq_req;
111 }
112
113 /*
114 * If pbl_virt is set then va is a virtual address
115 * that describes a virtually contiguous memory
116 * allocation. The wr needs the start of each virtual page
117 * to be converted to the corresponding physical address
118 * of the page. If pbl_virt is not set then va is an array
119 * of physical addresses and there is no conversion to do.
120 * Just fill in the wr with what is in the array.
121 */
122 for (i = 0; i < count; i++) {
123 if (pbl_virt) {
124 va += PAGE_SIZE;
125 } else {
126 wr->paddrs[i] =
127 cpu_to_be64(((u64 *)va)[pbl_index + i]);
128 }
129 }
130
131 /*
132 * Send WR to adapter
133 */
134 err = vq_send_wr(c2dev, (union c2wr *) wr);
135 if (err) {
136 if (count <= pbe_count) {
137 vq_req_put(c2dev, vq_req);
138 }
139 goto bail0;
140 }
141 pbl_depth -= count;
142 pbl_index += count;
143 }
144
145 /*
146 * Now wait for the reply...
147 */
148 err = vq_wait_for_reply(c2dev, vq_req);
149 if (err) {
150 goto bail0;
151 }
152
153 /*
154 * Process reply
155 */
156 reply = (struct c2wr_nsmr_pbl_rep *) (unsigned long) vq_req->reply_msg;
157 if (!reply) {
158 err = -ENOMEM;
159 goto bail0;
160 }
161
162 err = c2_errno(reply);
163
164 vq_repbuf_free(c2dev, reply);
165 bail0:
166 kfree(wr);
167 return err;
168}
169
170#define C2_PBL_MAX_DEPTH 131072
171int
172c2_nsmr_register_phys_kern(struct c2_dev *c2dev, u64 *addr_list,
173 int page_size, int pbl_depth, u32 length,
174 u32 offset, u64 *va, enum c2_acf acf,
175 struct c2_mr *mr)
176{
177 struct c2_vq_req *vq_req;
178 struct c2wr_nsmr_register_req *wr;
179 struct c2wr_nsmr_register_rep *reply;
180 u16 flags;
181 int i, pbe_count, count;
182 int err;
183
184 if (!va || !length || !addr_list || !pbl_depth)
185 return -EINTR;
186
187 /*
188 * Verify PBL depth is within rnic max
189 */
190 if (pbl_depth > C2_PBL_MAX_DEPTH) {
191 return -EINTR;
192 }
193
194 /*
195 * allocate verbs request object
196 */
197 vq_req = vq_req_alloc(c2dev);
198 if (!vq_req)
199 return -ENOMEM;
200
201 wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
202 if (!wr) {
203 err = -ENOMEM;
204 goto bail0;
205 }
206
207 /*
208 * build the WR
209 */
210 c2_wr_set_id(wr, CCWR_NSMR_REGISTER);
211 wr->hdr.context = (unsigned long) vq_req;
212 wr->rnic_handle = c2dev->adapter_handle;
213
214 flags = (acf | MEM_VA_BASED | MEM_REMOTE);
215
216 /*
217 * compute how many pbes can fit in the message
218 */
219 pbe_count = (c2dev->req_vq.msg_size -
220 sizeof(struct c2wr_nsmr_register_req)) / sizeof(u64);
221
222 if (pbl_depth <= pbe_count) {
223 flags |= MEM_PBL_COMPLETE;
224 }
225 wr->flags = cpu_to_be16(flags);
226 wr->stag_key = 0; //stag_key;
227 wr->va = cpu_to_be64(*va);
228 wr->pd_id = mr->pd->pd_id;
229 wr->pbe_size = cpu_to_be32(page_size);
230 wr->length = cpu_to_be32(length);
231 wr->pbl_depth = cpu_to_be32(pbl_depth);
232 wr->fbo = cpu_to_be32(offset);
233 count = min(pbl_depth, pbe_count);
234 wr->addrs_length = cpu_to_be32(count);
235
236 /*
237 * fill out the PBL for this message
238 */
239 for (i = 0; i < count; i++) {
240 wr->paddrs[i] = cpu_to_be64(addr_list[i]);
241 }
242
243 /*
244 * regerence the request struct
245 */
246 vq_req_get(c2dev, vq_req);
247
248 /*
249 * send the WR to the adapter
250 */
251 err = vq_send_wr(c2dev, (union c2wr *) wr);
252 if (err) {
253 vq_req_put(c2dev, vq_req);
254 goto bail1;
255 }
256
257 /*
258 * wait for reply from adapter
259 */
260 err = vq_wait_for_reply(c2dev, vq_req);
261 if (err) {
262 goto bail1;
263 }
264
265 /*
266 * process reply
267 */
268 reply =
269 (struct c2wr_nsmr_register_rep *) (unsigned long) (vq_req->reply_msg);
270 if (!reply) {
271 err = -ENOMEM;
272 goto bail1;
273 }
274 if ((err = c2_errno(reply))) {
275 goto bail2;
276 }
277 //*p_pb_entries = be32_to_cpu(reply->pbl_depth);
278 mr->ibmr.lkey = mr->ibmr.rkey = be32_to_cpu(reply->stag_index);
279 vq_repbuf_free(c2dev, reply);
280
281 /*
282 * if there are still more PBEs we need to send them to
283 * the adapter and wait for a reply on the final one.
284 * reuse vq_req for this purpose.
285 */
286 pbl_depth -= count;
287 if (pbl_depth) {
288
289 vq_req->reply_msg = (unsigned long) NULL;
290 atomic_set(&vq_req->reply_ready, 0);
291 err = send_pbl_messages(c2dev,
292 cpu_to_be32(mr->ibmr.lkey),
293 (unsigned long) &addr_list[i],
294 pbl_depth, vq_req, PBL_PHYS);
295 if (err) {
296 goto bail1;
297 }
298 }
299
300 vq_req_free(c2dev, vq_req);
301 kfree(wr);
302
303 return err;
304
305 bail2:
306 vq_repbuf_free(c2dev, reply);
307 bail1:
308 kfree(wr);
309 bail0:
310 vq_req_free(c2dev, vq_req);
311 return err;
312}
313
314int c2_stag_dealloc(struct c2_dev *c2dev, u32 stag_index)
315{
316 struct c2_vq_req *vq_req; /* verbs request object */
317 struct c2wr_stag_dealloc_req wr; /* work request */
318 struct c2wr_stag_dealloc_rep *reply; /* WR reply */
319 int err;
320
321
322 /*
323 * allocate verbs request object
324 */
325 vq_req = vq_req_alloc(c2dev);
326 if (!vq_req) {
327 return -ENOMEM;
328 }
329
330 /*
331 * Build the WR
332 */
333 c2_wr_set_id(&wr, CCWR_STAG_DEALLOC);
334 wr.hdr.context = (u64) (unsigned long) vq_req;
335 wr.rnic_handle = c2dev->adapter_handle;
336 wr.stag_index = cpu_to_be32(stag_index);
337
338 /*
339 * reference the request struct. dereferenced in the int handler.
340 */
341 vq_req_get(c2dev, vq_req);
342
343 /*
344 * Send WR to adapter
345 */
346 err = vq_send_wr(c2dev, (union c2wr *) & wr);
347 if (err) {
348 vq_req_put(c2dev, vq_req);
349 goto bail0;
350 }
351
352 /*
353 * Wait for reply from adapter
354 */
355 err = vq_wait_for_reply(c2dev, vq_req);
356 if (err) {
357 goto bail0;
358 }
359
360 /*
361 * Process reply
362 */
363 reply = (struct c2wr_stag_dealloc_rep *) (unsigned long) vq_req->reply_msg;
364 if (!reply) {
365 err = -ENOMEM;
366 goto bail0;
367 }
368
369 err = c2_errno(reply);
370
371 vq_repbuf_free(c2dev, reply);
372 bail0:
373 vq_req_free(c2dev, vq_req);
374 return err;
375}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.c b/drivers/infiniband/hw/amso1100/c2_mq.c
new file mode 100644
index 000000000000..b88a75592102
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.c
@@ -0,0 +1,174 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include "c2.h"
34#include "c2_mq.h"
35
36void *c2_mq_alloc(struct c2_mq *q)
37{
38 BUG_ON(q->magic != C2_MQ_MAGIC);
39 BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
40
41 if (c2_mq_full(q)) {
42 return NULL;
43 } else {
44#ifdef DEBUG
45 struct c2wr_hdr *m =
46 (struct c2wr_hdr *) (q->msg_pool.host + q->priv * q->msg_size);
47#ifdef CCMSGMAGIC
48 BUG_ON(m->magic != be32_to_cpu(~CCWR_MAGIC));
49 m->magic = cpu_to_be32(CCWR_MAGIC);
50#endif
51 return m;
52#else
53 return q->msg_pool.host + q->priv * q->msg_size;
54#endif
55 }
56}
57
58void c2_mq_produce(struct c2_mq *q)
59{
60 BUG_ON(q->magic != C2_MQ_MAGIC);
61 BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
62
63 if (!c2_mq_full(q)) {
64 q->priv = (q->priv + 1) % q->q_size;
65 q->hint_count++;
66 /* Update peer's offset. */
67 __raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
68 }
69}
70
71void *c2_mq_consume(struct c2_mq *q)
72{
73 BUG_ON(q->magic != C2_MQ_MAGIC);
74 BUG_ON(q->type != C2_MQ_HOST_TARGET);
75
76 if (c2_mq_empty(q)) {
77 return NULL;
78 } else {
79#ifdef DEBUG
80 struct c2wr_hdr *m = (struct c2wr_hdr *)
81 (q->msg_pool.host + q->priv * q->msg_size);
82#ifdef CCMSGMAGIC
83 BUG_ON(m->magic != be32_to_cpu(CCWR_MAGIC));
84#endif
85 return m;
86#else
87 return q->msg_pool.host + q->priv * q->msg_size;
88#endif
89 }
90}
91
92void c2_mq_free(struct c2_mq *q)
93{
94 BUG_ON(q->magic != C2_MQ_MAGIC);
95 BUG_ON(q->type != C2_MQ_HOST_TARGET);
96
97 if (!c2_mq_empty(q)) {
98
99#ifdef CCMSGMAGIC
100 {
101 struct c2wr_hdr __iomem *m = (struct c2wr_hdr __iomem *)
102 (q->msg_pool.adapter + q->priv * q->msg_size);
103 __raw_writel(cpu_to_be32(~CCWR_MAGIC), &m->magic);
104 }
105#endif
106 q->priv = (q->priv + 1) % q->q_size;
107 /* Update peer's offset. */
108 __raw_writew(cpu_to_be16(q->priv), &q->peer->shared);
109 }
110}
111
112
113void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count)
114{
115 BUG_ON(q->magic != C2_MQ_MAGIC);
116 BUG_ON(q->type != C2_MQ_ADAPTER_TARGET);
117
118 while (wqe_count--) {
119 BUG_ON(c2_mq_empty(q));
120 *q->shared = cpu_to_be16((be16_to_cpu(*q->shared)+1) % q->q_size);
121 }
122}
123
124#if 0
125u32 c2_mq_count(struct c2_mq *q)
126{
127 s32 count;
128
129 if (q->type == C2_MQ_HOST_TARGET)
130 count = be16_to_cpu(*q->shared) - q->priv;
131 else
132 count = q->priv - be16_to_cpu(*q->shared);
133
134 if (count < 0)
135 count += q->q_size;
136
137 return (u32) count;
138}
139#endif /* 0 */
140
141void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
142 u8 __iomem *pool_start, u16 __iomem *peer, u32 type)
143{
144 BUG_ON(!q->shared);
145
146 /* This code assumes the byte swapping has already been done! */
147 q->index = index;
148 q->q_size = q_size;
149 q->msg_size = msg_size;
150 q->msg_pool.adapter = pool_start;
151 q->peer = (struct c2_mq_shared __iomem *) peer;
152 q->magic = C2_MQ_MAGIC;
153 q->type = type;
154 q->priv = 0;
155 q->hint_count = 0;
156 return;
157}
158void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
159 u8 *pool_start, u16 __iomem *peer, u32 type)
160{
161 BUG_ON(!q->shared);
162
163 /* This code assumes the byte swapping has already been done! */
164 q->index = index;
165 q->q_size = q_size;
166 q->msg_size = msg_size;
167 q->msg_pool.host = pool_start;
168 q->peer = (struct c2_mq_shared __iomem *) peer;
169 q->magic = C2_MQ_MAGIC;
170 q->type = type;
171 q->priv = 0;
172 q->hint_count = 0;
173 return;
174}
diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h
new file mode 100644
index 000000000000..9185bbb21658
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_mq.h
@@ -0,0 +1,106 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef _C2_MQ_H_
35#define _C2_MQ_H_
36#include <linux/kernel.h>
37#include <linux/dma-mapping.h>
38#include "c2_wr.h"
39
40enum c2_shared_regs {
41
42 C2_SHARED_ARMED = 0x10,
43 C2_SHARED_NOTIFY = 0x18,
44 C2_SHARED_SHARED = 0x40,
45};
46
47struct c2_mq_shared {
48 u16 unused1;
49 u8 armed;
50 u8 notification_type;
51 u32 unused2;
52 u16 shared;
53 /* Pad to 64 bytes. */
54 u8 pad[64 - sizeof(u16) - 2 * sizeof(u8) - sizeof(u32) - sizeof(u16)];
55};
56
57enum c2_mq_type {
58 C2_MQ_HOST_TARGET = 1,
59 C2_MQ_ADAPTER_TARGET = 2,
60};
61
62/*
63 * c2_mq_t is for kernel-mode MQs like the VQs Cand the AEQ.
64 * c2_user_mq_t (which is the same format) is for user-mode MQs...
65 */
66#define C2_MQ_MAGIC 0x4d512020 /* 'MQ ' */
67struct c2_mq {
68 u32 magic;
69 union {
70 u8 *host;
71 u8 __iomem *adapter;
72 } msg_pool;
73 dma_addr_t host_dma;
74 DECLARE_PCI_UNMAP_ADDR(mapping);
75 u16 hint_count;
76 u16 priv;
77 struct c2_mq_shared __iomem *peer;
78 u16 *shared;
79 dma_addr_t shared_dma;
80 u32 q_size;
81 u32 msg_size;
82 u32 index;
83 enum c2_mq_type type;
84};
85
86static __inline__ int c2_mq_empty(struct c2_mq *q)
87{
88 return q->priv == be16_to_cpu(*q->shared);
89}
90
91static __inline__ int c2_mq_full(struct c2_mq *q)
92{
93 return q->priv == (be16_to_cpu(*q->shared) + q->q_size - 1) % q->q_size;
94}
95
96extern void c2_mq_lconsume(struct c2_mq *q, u32 wqe_count);
97extern void *c2_mq_alloc(struct c2_mq *q);
98extern void c2_mq_produce(struct c2_mq *q);
99extern void *c2_mq_consume(struct c2_mq *q);
100extern void c2_mq_free(struct c2_mq *q);
101extern void c2_mq_req_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
102 u8 __iomem *pool_start, u16 __iomem *peer, u32 type);
103extern void c2_mq_rep_init(struct c2_mq *q, u32 index, u32 q_size, u32 msg_size,
104 u8 *pool_start, u16 __iomem *peer, u32 type);
105
106#endif /* _C2_MQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c
new file mode 100644
index 000000000000..00c709926c8d
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_pd.c
@@ -0,0 +1,89 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/init.h>
37#include <linux/errno.h>
38
39#include "c2.h"
40#include "c2_provider.h"
41
42int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd)
43{
44 u32 obj;
45 int ret = 0;
46
47 spin_lock(&c2dev->pd_table.lock);
48 obj = find_next_zero_bit(c2dev->pd_table.table, c2dev->pd_table.max,
49 c2dev->pd_table.last);
50 if (obj >= c2dev->pd_table.max)
51 obj = find_first_zero_bit(c2dev->pd_table.table,
52 c2dev->pd_table.max);
53 if (obj < c2dev->pd_table.max) {
54 pd->pd_id = obj;
55 __set_bit(obj, c2dev->pd_table.table);
56 c2dev->pd_table.last = obj+1;
57 if (c2dev->pd_table.last >= c2dev->pd_table.max)
58 c2dev->pd_table.last = 0;
59 } else
60 ret = -ENOMEM;
61 spin_unlock(&c2dev->pd_table.lock);
62 return ret;
63}
64
65void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
66{
67 spin_lock(&c2dev->pd_table.lock);
68 __clear_bit(pd->pd_id, c2dev->pd_table.table);
69 spin_unlock(&c2dev->pd_table.lock);
70}
71
72int __devinit c2_init_pd_table(struct c2_dev *c2dev)
73{
74
75 c2dev->pd_table.last = 0;
76 c2dev->pd_table.max = c2dev->props.max_pd;
77 spin_lock_init(&c2dev->pd_table.lock);
78 c2dev->pd_table.table = kmalloc(BITS_TO_LONGS(c2dev->props.max_pd) *
79 sizeof(long), GFP_KERNEL);
80 if (!c2dev->pd_table.table)
81 return -ENOMEM;
82 bitmap_zero(c2dev->pd_table.table, c2dev->props.max_pd);
83 return 0;
84}
85
86void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
87{
88 kfree(c2dev->pd_table.table);
89}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
new file mode 100644
index 000000000000..8fddc8cccdf3
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -0,0 +1,869 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35#include <linux/module.h>
36#include <linux/moduleparam.h>
37#include <linux/pci.h>
38#include <linux/netdevice.h>
39#include <linux/etherdevice.h>
40#include <linux/inetdevice.h>
41#include <linux/delay.h>
42#include <linux/ethtool.h>
43#include <linux/mii.h>
44#include <linux/if_vlan.h>
45#include <linux/crc32.h>
46#include <linux/in.h>
47#include <linux/ip.h>
48#include <linux/tcp.h>
49#include <linux/init.h>
50#include <linux/dma-mapping.h>
51#include <linux/if_arp.h>
52
53#include <asm/io.h>
54#include <asm/irq.h>
55#include <asm/byteorder.h>
56
57#include <rdma/ib_smi.h>
58#include <rdma/ib_user_verbs.h>
59#include "c2.h"
60#include "c2_provider.h"
61#include "c2_user.h"
62
63static int c2_query_device(struct ib_device *ibdev,
64 struct ib_device_attr *props)
65{
66 struct c2_dev *c2dev = to_c2dev(ibdev);
67
68 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
69
70 *props = c2dev->props;
71 return 0;
72}
73
74static int c2_query_port(struct ib_device *ibdev,
75 u8 port, struct ib_port_attr *props)
76{
77 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
78
79 props->max_mtu = IB_MTU_4096;
80 props->lid = 0;
81 props->lmc = 0;
82 props->sm_lid = 0;
83 props->sm_sl = 0;
84 props->state = IB_PORT_ACTIVE;
85 props->phys_state = 0;
86 props->port_cap_flags =
87 IB_PORT_CM_SUP |
88 IB_PORT_REINIT_SUP |
89 IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
90 props->gid_tbl_len = 1;
91 props->pkey_tbl_len = 1;
92 props->qkey_viol_cntr = 0;
93 props->active_width = 1;
94 props->active_speed = 1;
95
96 return 0;
97}
98
99static int c2_modify_port(struct ib_device *ibdev,
100 u8 port, int port_modify_mask,
101 struct ib_port_modify *props)
102{
103 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
104 return 0;
105}
106
107static int c2_query_pkey(struct ib_device *ibdev,
108 u8 port, u16 index, u16 * pkey)
109{
110 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
111 *pkey = 0;
112 return 0;
113}
114
115static int c2_query_gid(struct ib_device *ibdev, u8 port,
116 int index, union ib_gid *gid)
117{
118 struct c2_dev *c2dev = to_c2dev(ibdev);
119
120 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
121 memset(&(gid->raw[0]), 0, sizeof(gid->raw));
122 memcpy(&(gid->raw[0]), c2dev->pseudo_netdev->dev_addr, 6);
123
124 return 0;
125}
126
127/* Allocate the user context data structure. This keeps track
128 * of all objects associated with a particular user-mode client.
129 */
130static struct ib_ucontext *c2_alloc_ucontext(struct ib_device *ibdev,
131 struct ib_udata *udata)
132{
133 struct c2_ucontext *context;
134
135 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
136 context = kmalloc(sizeof(*context), GFP_KERNEL);
137 if (!context)
138 return ERR_PTR(-ENOMEM);
139
140 return &context->ibucontext;
141}
142
143static int c2_dealloc_ucontext(struct ib_ucontext *context)
144{
145 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
146 kfree(context);
147 return 0;
148}
149
150static int c2_mmap_uar(struct ib_ucontext *context, struct vm_area_struct *vma)
151{
152 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
153 return -ENOSYS;
154}
155
156static struct ib_pd *c2_alloc_pd(struct ib_device *ibdev,
157 struct ib_ucontext *context,
158 struct ib_udata *udata)
159{
160 struct c2_pd *pd;
161 int err;
162
163 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
164
165 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
166 if (!pd)
167 return ERR_PTR(-ENOMEM);
168
169 err = c2_pd_alloc(to_c2dev(ibdev), !context, pd);
170 if (err) {
171 kfree(pd);
172 return ERR_PTR(err);
173 }
174
175 if (context) {
176 if (ib_copy_to_udata(udata, &pd->pd_id, sizeof(__u32))) {
177 c2_pd_free(to_c2dev(ibdev), pd);
178 kfree(pd);
179 return ERR_PTR(-EFAULT);
180 }
181 }
182
183 return &pd->ibpd;
184}
185
186static int c2_dealloc_pd(struct ib_pd *pd)
187{
188 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
189 c2_pd_free(to_c2dev(pd->device), to_c2pd(pd));
190 kfree(pd);
191
192 return 0;
193}
194
195static struct ib_ah *c2_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
196{
197 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
198 return ERR_PTR(-ENOSYS);
199}
200
201static int c2_ah_destroy(struct ib_ah *ah)
202{
203 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
204 return -ENOSYS;
205}
206
207static void c2_add_ref(struct ib_qp *ibqp)
208{
209 struct c2_qp *qp;
210 BUG_ON(!ibqp);
211 qp = to_c2qp(ibqp);
212 atomic_inc(&qp->refcount);
213}
214
215static void c2_rem_ref(struct ib_qp *ibqp)
216{
217 struct c2_qp *qp;
218 BUG_ON(!ibqp);
219 qp = to_c2qp(ibqp);
220 if (atomic_dec_and_test(&qp->refcount))
221 wake_up(&qp->wait);
222}
223
224struct ib_qp *c2_get_qp(struct ib_device *device, int qpn)
225{
226 struct c2_dev* c2dev = to_c2dev(device);
227 struct c2_qp *qp;
228
229 qp = c2_find_qpn(c2dev, qpn);
230 pr_debug("%s Returning QP=%p for QPN=%d, device=%p, refcount=%d\n",
231 __FUNCTION__, qp, qpn, device,
232 (qp?atomic_read(&qp->refcount):0));
233
234 return (qp?&qp->ibqp:NULL);
235}
236
237static struct ib_qp *c2_create_qp(struct ib_pd *pd,
238 struct ib_qp_init_attr *init_attr,
239 struct ib_udata *udata)
240{
241 struct c2_qp *qp;
242 int err;
243
244 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
245
246 switch (init_attr->qp_type) {
247 case IB_QPT_RC:
248 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
249 if (!qp) {
250 pr_debug("%s: Unable to allocate QP\n", __FUNCTION__);
251 return ERR_PTR(-ENOMEM);
252 }
253 spin_lock_init(&qp->lock);
254 if (pd->uobject) {
255 /* userspace specific */
256 }
257
258 err = c2_alloc_qp(to_c2dev(pd->device),
259 to_c2pd(pd), init_attr, qp);
260
261 if (err && pd->uobject) {
262 /* userspace specific */
263 }
264
265 break;
266 default:
267 pr_debug("%s: Invalid QP type: %d\n", __FUNCTION__,
268 init_attr->qp_type);
269 return ERR_PTR(-EINVAL);
270 break;
271 }
272
273 if (err) {
274 kfree(qp);
275 return ERR_PTR(err);
276 }
277
278 return &qp->ibqp;
279}
280
281static int c2_destroy_qp(struct ib_qp *ib_qp)
282{
283 struct c2_qp *qp = to_c2qp(ib_qp);
284
285 pr_debug("%s:%u qp=%p,qp->state=%d\n",
286 __FUNCTION__, __LINE__,ib_qp,qp->state);
287 c2_free_qp(to_c2dev(ib_qp->device), qp);
288 kfree(qp);
289 return 0;
290}
291
292static struct ib_cq *c2_create_cq(struct ib_device *ibdev, int entries,
293 struct ib_ucontext *context,
294 struct ib_udata *udata)
295{
296 struct c2_cq *cq;
297 int err;
298
299 cq = kmalloc(sizeof(*cq), GFP_KERNEL);
300 if (!cq) {
301 pr_debug("%s: Unable to allocate CQ\n", __FUNCTION__);
302 return ERR_PTR(-ENOMEM);
303 }
304
305 err = c2_init_cq(to_c2dev(ibdev), entries, NULL, cq);
306 if (err) {
307 pr_debug("%s: error initializing CQ\n", __FUNCTION__);
308 kfree(cq);
309 return ERR_PTR(err);
310 }
311
312 return &cq->ibcq;
313}
314
315static int c2_destroy_cq(struct ib_cq *ib_cq)
316{
317 struct c2_cq *cq = to_c2cq(ib_cq);
318
319 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
320
321 c2_free_cq(to_c2dev(ib_cq->device), cq);
322 kfree(cq);
323
324 return 0;
325}
326
327static inline u32 c2_convert_access(int acc)
328{
329 return (acc & IB_ACCESS_REMOTE_WRITE ? C2_ACF_REMOTE_WRITE : 0) |
330 (acc & IB_ACCESS_REMOTE_READ ? C2_ACF_REMOTE_READ : 0) |
331 (acc & IB_ACCESS_LOCAL_WRITE ? C2_ACF_LOCAL_WRITE : 0) |
332 C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
333}
334
335static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
336 struct ib_phys_buf *buffer_list,
337 int num_phys_buf, int acc, u64 * iova_start)
338{
339 struct c2_mr *mr;
340 u64 *page_list;
341 u32 total_len;
342 int err, i, j, k, page_shift, pbl_depth;
343
344 pbl_depth = 0;
345 total_len = 0;
346
347 page_shift = PAGE_SHIFT;
348 /*
349 * If there is only 1 buffer we assume this could
350 * be a map of all phy mem...use a 32k page_shift.
351 */
352 if (num_phys_buf == 1)
353 page_shift += 3;
354
355 for (i = 0; i < num_phys_buf; i++) {
356
357 if (buffer_list[i].addr & ~PAGE_MASK) {
358 pr_debug("Unaligned Memory Buffer: 0x%x\n",
359 (unsigned int) buffer_list[i].addr);
360 return ERR_PTR(-EINVAL);
361 }
362
363 if (!buffer_list[i].size) {
364 pr_debug("Invalid Buffer Size\n");
365 return ERR_PTR(-EINVAL);
366 }
367
368 total_len += buffer_list[i].size;
369 pbl_depth += ALIGN(buffer_list[i].size,
370 (1 << page_shift)) >> page_shift;
371 }
372
373 page_list = vmalloc(sizeof(u64) * pbl_depth);
374 if (!page_list) {
375 pr_debug("couldn't vmalloc page_list of size %zd\n",
376 (sizeof(u64) * pbl_depth));
377 return ERR_PTR(-ENOMEM);
378 }
379
380 for (i = 0, j = 0; i < num_phys_buf; i++) {
381
382 int naddrs;
383
384 naddrs = ALIGN(buffer_list[i].size,
385 (1 << page_shift)) >> page_shift;
386 for (k = 0; k < naddrs; k++)
387 page_list[j++] = (buffer_list[i].addr +
388 (k << page_shift));
389 }
390
391 mr = kmalloc(sizeof(*mr), GFP_KERNEL);
392 if (!mr)
393 return ERR_PTR(-ENOMEM);
394
395 mr->pd = to_c2pd(ib_pd);
396 pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
397 "*iova_start %llx, first pa %llx, last pa %llx\n",
398 __FUNCTION__, page_shift, pbl_depth, total_len,
399 *iova_start, page_list[0], page_list[pbl_depth-1]);
400 err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
401 (1 << page_shift), pbl_depth,
402 total_len, 0, iova_start,
403 c2_convert_access(acc), mr);
404 vfree(page_list);
405 if (err) {
406 kfree(mr);
407 return ERR_PTR(err);
408 }
409
410 return &mr->ibmr;
411}
412
413static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
414{
415 struct ib_phys_buf bl;
416 u64 kva = 0;
417
418 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
419
420 /* AMSO1100 limit */
421 bl.size = 0xffffffff;
422 bl.addr = 0;
423 return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
424}
425
426static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
427 int acc, struct ib_udata *udata)
428{
429 u64 *pages;
430 u64 kva = 0;
431 int shift, n, len;
432 int i, j, k;
433 int err = 0;
434 struct ib_umem_chunk *chunk;
435 struct c2_pd *c2pd = to_c2pd(pd);
436 struct c2_mr *c2mr;
437
438 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
439 shift = ffs(region->page_size) - 1;
440
441 c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
442 if (!c2mr)
443 return ERR_PTR(-ENOMEM);
444 c2mr->pd = c2pd;
445
446 n = 0;
447 list_for_each_entry(chunk, &region->chunk_list, list)
448 n += chunk->nents;
449
450 pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
451 if (!pages) {
452 err = -ENOMEM;
453 goto err;
454 }
455
456 i = 0;
457 list_for_each_entry(chunk, &region->chunk_list, list) {
458 for (j = 0; j < chunk->nmap; ++j) {
459 len = sg_dma_len(&chunk->page_list[j]) >> shift;
460 for (k = 0; k < len; ++k) {
461 pages[i++] =
462 sg_dma_address(&chunk->page_list[j]) +
463 (region->page_size * k);
464 }
465 }
466 }
467
468 kva = (u64)region->virt_base;
469 err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
470 pages,
471 region->page_size,
472 i,
473 region->length,
474 region->offset,
475 &kva,
476 c2_convert_access(acc),
477 c2mr);
478 kfree(pages);
479 if (err) {
480 kfree(c2mr);
481 return ERR_PTR(err);
482 }
483 return &c2mr->ibmr;
484
485err:
486 kfree(c2mr);
487 return ERR_PTR(err);
488}
489
490static int c2_dereg_mr(struct ib_mr *ib_mr)
491{
492 struct c2_mr *mr = to_c2mr(ib_mr);
493 int err;
494
495 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
496
497 err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
498 if (err)
499 pr_debug("c2_stag_dealloc failed: %d\n", err);
500 else
501 kfree(mr);
502
503 return err;
504}
505
506static ssize_t show_rev(struct class_device *cdev, char *buf)
507{
508 struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
509 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
510 return sprintf(buf, "%x\n", dev->props.hw_ver);
511}
512
513static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
514{
515 struct c2_dev *dev = container_of(cdev, struct c2_dev, ibdev.class_dev);
516 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
517 return sprintf(buf, "%x.%x.%x\n",
518 (int) (dev->props.fw_ver >> 32),
519 (int) (dev->props.fw_ver >> 16) & 0xffff,
520 (int) (dev->props.fw_ver & 0xffff));
521}
522
523static ssize_t show_hca(struct class_device *cdev, char *buf)
524{
525 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
526 return sprintf(buf, "AMSO1100\n");
527}
528
529static ssize_t show_board(struct class_device *cdev, char *buf)
530{
531 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
532 return sprintf(buf, "%.*s\n", 32, "AMSO1100 Board ID");
533}
534
535static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
536static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
537static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
538static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
539
540static struct class_device_attribute *c2_class_attributes[] = {
541 &class_device_attr_hw_rev,
542 &class_device_attr_fw_ver,
543 &class_device_attr_hca_type,
544 &class_device_attr_board_id
545};
546
547static int c2_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
548 int attr_mask, struct ib_udata *udata)
549{
550 int err;
551
552 err =
553 c2_qp_modify(to_c2dev(ibqp->device), to_c2qp(ibqp), attr,
554 attr_mask);
555
556 return err;
557}
558
559static int c2_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
560{
561 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
562 return -ENOSYS;
563}
564
565static int c2_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
566{
567 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
568 return -ENOSYS;
569}
570
571static int c2_process_mad(struct ib_device *ibdev,
572 int mad_flags,
573 u8 port_num,
574 struct ib_wc *in_wc,
575 struct ib_grh *in_grh,
576 struct ib_mad *in_mad, struct ib_mad *out_mad)
577{
578 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
579 return -ENOSYS;
580}
581
582static int c2_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
583{
584 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
585
586 /* Request a connection */
587 return c2_llp_connect(cm_id, iw_param);
588}
589
590static int c2_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
591{
592 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
593
594 /* Accept the new connection */
595 return c2_llp_accept(cm_id, iw_param);
596}
597
598static int c2_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
599{
600 int err;
601
602 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
603
604 err = c2_llp_reject(cm_id, pdata, pdata_len);
605 return err;
606}
607
608static int c2_service_create(struct iw_cm_id *cm_id, int backlog)
609{
610 int err;
611
612 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
613 err = c2_llp_service_create(cm_id, backlog);
614 pr_debug("%s:%u err=%d\n",
615 __FUNCTION__, __LINE__,
616 err);
617 return err;
618}
619
620static int c2_service_destroy(struct iw_cm_id *cm_id)
621{
622 int err;
623 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
624
625 err = c2_llp_service_destroy(cm_id);
626
627 return err;
628}
629
630static int c2_pseudo_up(struct net_device *netdev)
631{
632 struct in_device *ind;
633 struct c2_dev *c2dev = netdev->priv;
634
635 ind = in_dev_get(netdev);
636 if (!ind)
637 return 0;
638
639 pr_debug("adding...\n");
640 for_ifa(ind) {
641#ifdef DEBUG
642 u8 *ip = (u8 *) & ifa->ifa_address;
643
644 pr_debug("%s: %d.%d.%d.%d\n",
645 ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
646#endif
647 c2_add_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
648 }
649 endfor_ifa(ind);
650 in_dev_put(ind);
651
652 return 0;
653}
654
655static int c2_pseudo_down(struct net_device *netdev)
656{
657 struct in_device *ind;
658 struct c2_dev *c2dev = netdev->priv;
659
660 ind = in_dev_get(netdev);
661 if (!ind)
662 return 0;
663
664 pr_debug("deleting...\n");
665 for_ifa(ind) {
666#ifdef DEBUG
667 u8 *ip = (u8 *) & ifa->ifa_address;
668
669 pr_debug("%s: %d.%d.%d.%d\n",
670 ifa->ifa_label, ip[0], ip[1], ip[2], ip[3]);
671#endif
672 c2_del_addr(c2dev, ifa->ifa_address, ifa->ifa_mask);
673 }
674 endfor_ifa(ind);
675 in_dev_put(ind);
676
677 return 0;
678}
679
680static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
681{
682 kfree_skb(skb);
683 return NETDEV_TX_OK;
684}
685
686static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu)
687{
688 int ret = 0;
689
690 if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
691 return -EINVAL;
692
693 netdev->mtu = new_mtu;
694
695 /* TODO: Tell rnic about new rmda interface mtu */
696 return ret;
697}
698
699static void setup(struct net_device *netdev)
700{
701 SET_MODULE_OWNER(netdev);
702 netdev->open = c2_pseudo_up;
703 netdev->stop = c2_pseudo_down;
704 netdev->hard_start_xmit = c2_pseudo_xmit_frame;
705 netdev->get_stats = NULL;
706 netdev->tx_timeout = NULL;
707 netdev->set_mac_address = NULL;
708 netdev->change_mtu = c2_pseudo_change_mtu;
709 netdev->watchdog_timeo = 0;
710 netdev->type = ARPHRD_ETHER;
711 netdev->mtu = 1500;
712 netdev->hard_header_len = ETH_HLEN;
713 netdev->addr_len = ETH_ALEN;
714 netdev->tx_queue_len = 0;
715 netdev->flags |= IFF_NOARP;
716 return;
717}
718
719static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
720{
721 char name[IFNAMSIZ];
722 struct net_device *netdev;
723
724 /* change ethxxx to iwxxx */
725 strcpy(name, "iw");
726 strcat(name, &c2dev->netdev->name[3]);
727 netdev = alloc_netdev(sizeof(*netdev), name, setup);
728 if (!netdev) {
729 printk(KERN_ERR PFX "%s - etherdev alloc failed",
730 __FUNCTION__);
731 return NULL;
732 }
733
734 netdev->priv = c2dev;
735
736 SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
737
738 memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
739
740 /* Print out the MAC address */
741 pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
742 netdev->name,
743 netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
744 netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
745
746#if 0
747 /* Disable network packets */
748 netif_stop_queue(netdev);
749#endif
750 return netdev;
751}
752
753int c2_register_device(struct c2_dev *dev)
754{
755 int ret;
756 int i;
757
758 /* Register pseudo network device */
759 dev->pseudo_netdev = c2_pseudo_netdev_init(dev);
760 if (dev->pseudo_netdev) {
761 ret = register_netdev(dev->pseudo_netdev);
762 if (ret) {
763 printk(KERN_ERR PFX
764 "Unable to register netdev, ret = %d\n", ret);
765 free_netdev(dev->pseudo_netdev);
766 return ret;
767 }
768 }
769
770 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
771 strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX);
772 dev->ibdev.owner = THIS_MODULE;
773 dev->ibdev.uverbs_cmd_mask =
774 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
775 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
776 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
777 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
778 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
779 (1ull << IB_USER_VERBS_CMD_REG_MR) |
780 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
781 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
782 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
783 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
784 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
785 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
786 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
787 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
788 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
789 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
790 (1ull << IB_USER_VERBS_CMD_POST_RECV);
791
792 dev->ibdev.node_type = RDMA_NODE_RNIC;
793 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
794 memcpy(&dev->ibdev.node_guid, dev->pseudo_netdev->dev_addr, 6);
795 dev->ibdev.phys_port_cnt = 1;
796 dev->ibdev.dma_device = &dev->pcidev->dev;
797 dev->ibdev.class_dev.dev = &dev->pcidev->dev;
798 dev->ibdev.query_device = c2_query_device;
799 dev->ibdev.query_port = c2_query_port;
800 dev->ibdev.modify_port = c2_modify_port;
801 dev->ibdev.query_pkey = c2_query_pkey;
802 dev->ibdev.query_gid = c2_query_gid;
803 dev->ibdev.alloc_ucontext = c2_alloc_ucontext;
804 dev->ibdev.dealloc_ucontext = c2_dealloc_ucontext;
805 dev->ibdev.mmap = c2_mmap_uar;
806 dev->ibdev.alloc_pd = c2_alloc_pd;
807 dev->ibdev.dealloc_pd = c2_dealloc_pd;
808 dev->ibdev.create_ah = c2_ah_create;
809 dev->ibdev.destroy_ah = c2_ah_destroy;
810 dev->ibdev.create_qp = c2_create_qp;
811 dev->ibdev.modify_qp = c2_modify_qp;
812 dev->ibdev.destroy_qp = c2_destroy_qp;
813 dev->ibdev.create_cq = c2_create_cq;
814 dev->ibdev.destroy_cq = c2_destroy_cq;
815 dev->ibdev.poll_cq = c2_poll_cq;
816 dev->ibdev.get_dma_mr = c2_get_dma_mr;
817 dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
818 dev->ibdev.reg_user_mr = c2_reg_user_mr;
819 dev->ibdev.dereg_mr = c2_dereg_mr;
820
821 dev->ibdev.alloc_fmr = NULL;
822 dev->ibdev.unmap_fmr = NULL;
823 dev->ibdev.dealloc_fmr = NULL;
824 dev->ibdev.map_phys_fmr = NULL;
825
826 dev->ibdev.attach_mcast = c2_multicast_attach;
827 dev->ibdev.detach_mcast = c2_multicast_detach;
828 dev->ibdev.process_mad = c2_process_mad;
829
830 dev->ibdev.req_notify_cq = c2_arm_cq;
831 dev->ibdev.post_send = c2_post_send;
832 dev->ibdev.post_recv = c2_post_receive;
833
834 dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
835 dev->ibdev.iwcm->add_ref = c2_add_ref;
836 dev->ibdev.iwcm->rem_ref = c2_rem_ref;
837 dev->ibdev.iwcm->get_qp = c2_get_qp;
838 dev->ibdev.iwcm->connect = c2_connect;
839 dev->ibdev.iwcm->accept = c2_accept;
840 dev->ibdev.iwcm->reject = c2_reject;
841 dev->ibdev.iwcm->create_listen = c2_service_create;
842 dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
843
844 ret = ib_register_device(&dev->ibdev);
845 if (ret)
846 return ret;
847
848 for (i = 0; i < ARRAY_SIZE(c2_class_attributes); ++i) {
849 ret = class_device_create_file(&dev->ibdev.class_dev,
850 c2_class_attributes[i]);
851 if (ret) {
852 unregister_netdev(dev->pseudo_netdev);
853 free_netdev(dev->pseudo_netdev);
854 ib_unregister_device(&dev->ibdev);
855 return ret;
856 }
857 }
858
859 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
860 return 0;
861}
862
863void c2_unregister_device(struct c2_dev *dev)
864{
865 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
866 unregister_netdev(dev->pseudo_netdev);
867 free_netdev(dev->pseudo_netdev);
868 ib_unregister_device(&dev->ibdev);
869}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
new file mode 100644
index 000000000000..fc906223220f
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -0,0 +1,181 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35#ifndef C2_PROVIDER_H
36#define C2_PROVIDER_H
37#include <linux/inetdevice.h>
38
39#include <rdma/ib_verbs.h>
40#include <rdma/ib_pack.h>
41
42#include "c2_mq.h"
43#include <rdma/iw_cm.h>
44
45#define C2_MPT_FLAG_ATOMIC (1 << 14)
46#define C2_MPT_FLAG_REMOTE_WRITE (1 << 13)
47#define C2_MPT_FLAG_REMOTE_READ (1 << 12)
48#define C2_MPT_FLAG_LOCAL_WRITE (1 << 11)
49#define C2_MPT_FLAG_LOCAL_READ (1 << 10)
50
51struct c2_buf_list {
52 void *buf;
53 DECLARE_PCI_UNMAP_ADDR(mapping)
54};
55
56
57/* The user context keeps track of objects allocated for a
58 * particular user-mode client. */
59struct c2_ucontext {
60 struct ib_ucontext ibucontext;
61};
62
63struct c2_mtt;
64
65/* All objects associated with a PD are kept in the
66 * associated user context if present.
67 */
68struct c2_pd {
69 struct ib_pd ibpd;
70 u32 pd_id;
71};
72
73struct c2_mr {
74 struct ib_mr ibmr;
75 struct c2_pd *pd;
76};
77
78struct c2_av;
79
80enum c2_ah_type {
81 C2_AH_ON_HCA,
82 C2_AH_PCI_POOL,
83 C2_AH_KMALLOC
84};
85
86struct c2_ah {
87 struct ib_ah ibah;
88};
89
90struct c2_cq {
91 struct ib_cq ibcq;
92 spinlock_t lock;
93 atomic_t refcount;
94 int cqn;
95 int is_kernel;
96 wait_queue_head_t wait;
97
98 u32 adapter_handle;
99 struct c2_mq mq;
100};
101
102struct c2_wq {
103 spinlock_t lock;
104};
105struct iw_cm_id;
106struct c2_qp {
107 struct ib_qp ibqp;
108 struct iw_cm_id *cm_id;
109 spinlock_t lock;
110 atomic_t refcount;
111 wait_queue_head_t wait;
112 int qpn;
113
114 u32 adapter_handle;
115 u32 send_sgl_depth;
116 u32 recv_sgl_depth;
117 u32 rdma_write_sgl_depth;
118 u8 state;
119
120 struct c2_mq sq_mq;
121 struct c2_mq rq_mq;
122};
123
124struct c2_cr_query_attrs {
125 u32 local_addr;
126 u32 remote_addr;
127 u16 local_port;
128 u16 remote_port;
129};
130
131static inline struct c2_pd *to_c2pd(struct ib_pd *ibpd)
132{
133 return container_of(ibpd, struct c2_pd, ibpd);
134}
135
136static inline struct c2_ucontext *to_c2ucontext(struct ib_ucontext *ibucontext)
137{
138 return container_of(ibucontext, struct c2_ucontext, ibucontext);
139}
140
141static inline struct c2_mr *to_c2mr(struct ib_mr *ibmr)
142{
143 return container_of(ibmr, struct c2_mr, ibmr);
144}
145
146
147static inline struct c2_ah *to_c2ah(struct ib_ah *ibah)
148{
149 return container_of(ibah, struct c2_ah, ibah);
150}
151
152static inline struct c2_cq *to_c2cq(struct ib_cq *ibcq)
153{
154 return container_of(ibcq, struct c2_cq, ibcq);
155}
156
157static inline struct c2_qp *to_c2qp(struct ib_qp *ibqp)
158{
159 return container_of(ibqp, struct c2_qp, ibqp);
160}
161
162static inline int is_rnic_addr(struct net_device *netdev, u32 addr)
163{
164 struct in_device *ind;
165 int ret = 0;
166
167 ind = in_dev_get(netdev);
168 if (!ind)
169 return 0;
170
171 for_ifa(ind) {
172 if (ifa->ifa_address == addr) {
173 ret = 1;
174 break;
175 }
176 }
177 endfor_ifa(ind);
178 in_dev_put(ind);
179 return ret;
180}
181#endif /* C2_PROVIDER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
new file mode 100644
index 000000000000..12261132b077
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -0,0 +1,975 @@
1/*
2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 */
37
38#include "c2.h"
39#include "c2_vq.h"
40#include "c2_status.h"
41
42#define C2_MAX_ORD_PER_QP 128
43#define C2_MAX_IRD_PER_QP 128
44
45#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
46#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
47#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
48
49#define NO_SUPPORT -1
50static const u8 c2_opcode[] = {
51 [IB_WR_SEND] = C2_WR_TYPE_SEND,
52 [IB_WR_SEND_WITH_IMM] = NO_SUPPORT,
53 [IB_WR_RDMA_WRITE] = C2_WR_TYPE_RDMA_WRITE,
54 [IB_WR_RDMA_WRITE_WITH_IMM] = NO_SUPPORT,
55 [IB_WR_RDMA_READ] = C2_WR_TYPE_RDMA_READ,
56 [IB_WR_ATOMIC_CMP_AND_SWP] = NO_SUPPORT,
57 [IB_WR_ATOMIC_FETCH_AND_ADD] = NO_SUPPORT,
58};
59
60static int to_c2_state(enum ib_qp_state ib_state)
61{
62 switch (ib_state) {
63 case IB_QPS_RESET:
64 return C2_QP_STATE_IDLE;
65 case IB_QPS_RTS:
66 return C2_QP_STATE_RTS;
67 case IB_QPS_SQD:
68 return C2_QP_STATE_CLOSING;
69 case IB_QPS_SQE:
70 return C2_QP_STATE_CLOSING;
71 case IB_QPS_ERR:
72 return C2_QP_STATE_ERROR;
73 default:
74 return -1;
75 }
76}
77
78static int to_ib_state(enum c2_qp_state c2_state)
79{
80 switch (c2_state) {
81 case C2_QP_STATE_IDLE:
82 return IB_QPS_RESET;
83 case C2_QP_STATE_CONNECTING:
84 return IB_QPS_RTR;
85 case C2_QP_STATE_RTS:
86 return IB_QPS_RTS;
87 case C2_QP_STATE_CLOSING:
88 return IB_QPS_SQD;
89 case C2_QP_STATE_ERROR:
90 return IB_QPS_ERR;
91 case C2_QP_STATE_TERMINATE:
92 return IB_QPS_SQE;
93 default:
94 return -1;
95 }
96}
97
98static const char *to_ib_state_str(int ib_state)
99{
100 static const char *state_str[] = {
101 "IB_QPS_RESET",
102 "IB_QPS_INIT",
103 "IB_QPS_RTR",
104 "IB_QPS_RTS",
105 "IB_QPS_SQD",
106 "IB_QPS_SQE",
107 "IB_QPS_ERR"
108 };
109 if (ib_state < IB_QPS_RESET ||
110 ib_state > IB_QPS_ERR)
111 return "<invalid IB QP state>";
112
113 ib_state -= IB_QPS_RESET;
114 return state_str[ib_state];
115}
116
117void c2_set_qp_state(struct c2_qp *qp, int c2_state)
118{
119 int new_state = to_ib_state(c2_state);
120
121 pr_debug("%s: qp[%p] state modify %s --> %s\n",
122 __FUNCTION__,
123 qp,
124 to_ib_state_str(qp->state),
125 to_ib_state_str(new_state));
126 qp->state = new_state;
127}
128
129#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
130
131int c2_qp_modify(struct c2_dev *c2dev, struct c2_qp *qp,
132 struct ib_qp_attr *attr, int attr_mask)
133{
134 struct c2wr_qp_modify_req wr;
135 struct c2wr_qp_modify_rep *reply;
136 struct c2_vq_req *vq_req;
137 unsigned long flags;
138 u8 next_state;
139 int err;
140
141 pr_debug("%s:%d qp=%p, %s --> %s\n",
142 __FUNCTION__, __LINE__,
143 qp,
144 to_ib_state_str(qp->state),
145 to_ib_state_str(attr->qp_state));
146
147 vq_req = vq_req_alloc(c2dev);
148 if (!vq_req)
149 return -ENOMEM;
150
151 c2_wr_set_id(&wr, CCWR_QP_MODIFY);
152 wr.hdr.context = (unsigned long) vq_req;
153 wr.rnic_handle = c2dev->adapter_handle;
154 wr.qp_handle = qp->adapter_handle;
155 wr.ord = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
156 wr.ird = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
157 wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
158 wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
159
160 if (attr_mask & IB_QP_STATE) {
161 /* Ensure the state is valid */
162 if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR)
163 return -EINVAL;
164
165 wr.next_qp_state = cpu_to_be32(to_c2_state(attr->qp_state));
166
167 if (attr->qp_state == IB_QPS_ERR) {
168 spin_lock_irqsave(&qp->lock, flags);
169 if (qp->cm_id && qp->state == IB_QPS_RTS) {
170 pr_debug("Generating CLOSE event for QP-->ERR, "
171 "qp=%p, cm_id=%p\n",qp,qp->cm_id);
172 /* Generate an CLOSE event */
173 vq_req->cm_id = qp->cm_id;
174 vq_req->event = IW_CM_EVENT_CLOSE;
175 }
176 spin_unlock_irqrestore(&qp->lock, flags);
177 }
178 next_state = attr->qp_state;
179
180 } else if (attr_mask & IB_QP_CUR_STATE) {
181
182 if (attr->cur_qp_state != IB_QPS_RTR &&
183 attr->cur_qp_state != IB_QPS_RTS &&
184 attr->cur_qp_state != IB_QPS_SQD &&
185 attr->cur_qp_state != IB_QPS_SQE)
186 return -EINVAL;
187 else
188 wr.next_qp_state =
189 cpu_to_be32(to_c2_state(attr->cur_qp_state));
190
191 next_state = attr->cur_qp_state;
192
193 } else {
194 err = 0;
195 goto bail0;
196 }
197
198 /* reference the request struct */
199 vq_req_get(c2dev, vq_req);
200
201 err = vq_send_wr(c2dev, (union c2wr *) & wr);
202 if (err) {
203 vq_req_put(c2dev, vq_req);
204 goto bail0;
205 }
206
207 err = vq_wait_for_reply(c2dev, vq_req);
208 if (err)
209 goto bail0;
210
211 reply = (struct c2wr_qp_modify_rep *) (unsigned long) vq_req->reply_msg;
212 if (!reply) {
213 err = -ENOMEM;
214 goto bail0;
215 }
216
217 err = c2_errno(reply);
218 if (!err)
219 qp->state = next_state;
220#ifdef DEBUG
221 else
222 pr_debug("%s: c2_errno=%d\n", __FUNCTION__, err);
223#endif
224 /*
225 * If we're going to error and generating the event here, then
226 * we need to remove the reference because there will be no
227 * close event generated by the adapter
228 */
229 spin_lock_irqsave(&qp->lock, flags);
230 if (vq_req->event==IW_CM_EVENT_CLOSE && qp->cm_id) {
231 qp->cm_id->rem_ref(qp->cm_id);
232 qp->cm_id = NULL;
233 }
234 spin_unlock_irqrestore(&qp->lock, flags);
235
236 vq_repbuf_free(c2dev, reply);
237 bail0:
238 vq_req_free(c2dev, vq_req);
239
240 pr_debug("%s:%d qp=%p, cur_state=%s\n",
241 __FUNCTION__, __LINE__,
242 qp,
243 to_ib_state_str(qp->state));
244 return err;
245}
246
247int c2_qp_set_read_limits(struct c2_dev *c2dev, struct c2_qp *qp,
248 int ord, int ird)
249{
250 struct c2wr_qp_modify_req wr;
251 struct c2wr_qp_modify_rep *reply;
252 struct c2_vq_req *vq_req;
253 int err;
254
255 vq_req = vq_req_alloc(c2dev);
256 if (!vq_req)
257 return -ENOMEM;
258
259 c2_wr_set_id(&wr, CCWR_QP_MODIFY);
260 wr.hdr.context = (unsigned long) vq_req;
261 wr.rnic_handle = c2dev->adapter_handle;
262 wr.qp_handle = qp->adapter_handle;
263 wr.ord = cpu_to_be32(ord);
264 wr.ird = cpu_to_be32(ird);
265 wr.sq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
266 wr.rq_depth = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
267 wr.next_qp_state = cpu_to_be32(C2_QP_NO_ATTR_CHANGE);
268
269 /* reference the request struct */
270 vq_req_get(c2dev, vq_req);
271
272 err = vq_send_wr(c2dev, (union c2wr *) & wr);
273 if (err) {
274 vq_req_put(c2dev, vq_req);
275 goto bail0;
276 }
277
278 err = vq_wait_for_reply(c2dev, vq_req);
279 if (err)
280 goto bail0;
281
282 reply = (struct c2wr_qp_modify_rep *) (unsigned long)
283 vq_req->reply_msg;
284 if (!reply) {
285 err = -ENOMEM;
286 goto bail0;
287 }
288
289 err = c2_errno(reply);
290 vq_repbuf_free(c2dev, reply);
291 bail0:
292 vq_req_free(c2dev, vq_req);
293 return err;
294}
295
296static int destroy_qp(struct c2_dev *c2dev, struct c2_qp *qp)
297{
298 struct c2_vq_req *vq_req;
299 struct c2wr_qp_destroy_req wr;
300 struct c2wr_qp_destroy_rep *reply;
301 unsigned long flags;
302 int err;
303
304 /*
305 * Allocate a verb request message
306 */
307 vq_req = vq_req_alloc(c2dev);
308 if (!vq_req) {
309 return -ENOMEM;
310 }
311
312 /*
313 * Initialize the WR
314 */
315 c2_wr_set_id(&wr, CCWR_QP_DESTROY);
316 wr.hdr.context = (unsigned long) vq_req;
317 wr.rnic_handle = c2dev->adapter_handle;
318 wr.qp_handle = qp->adapter_handle;
319
320 /*
321 * reference the request struct. dereferenced in the int handler.
322 */
323 vq_req_get(c2dev, vq_req);
324
325 spin_lock_irqsave(&qp->lock, flags);
326 if (qp->cm_id && qp->state == IB_QPS_RTS) {
327 pr_debug("destroy_qp: generating CLOSE event for QP-->ERR, "
328 "qp=%p, cm_id=%p\n",qp,qp->cm_id);
329 /* Generate an CLOSE event */
330 vq_req->qp = qp;
331 vq_req->cm_id = qp->cm_id;
332 vq_req->event = IW_CM_EVENT_CLOSE;
333 }
334 spin_unlock_irqrestore(&qp->lock, flags);
335
336 /*
337 * Send WR to adapter
338 */
339 err = vq_send_wr(c2dev, (union c2wr *) & wr);
340 if (err) {
341 vq_req_put(c2dev, vq_req);
342 goto bail0;
343 }
344
345 /*
346 * Wait for reply from adapter
347 */
348 err = vq_wait_for_reply(c2dev, vq_req);
349 if (err) {
350 goto bail0;
351 }
352
353 /*
354 * Process reply
355 */
356 reply = (struct c2wr_qp_destroy_rep *) (unsigned long) (vq_req->reply_msg);
357 if (!reply) {
358 err = -ENOMEM;
359 goto bail0;
360 }
361
362 spin_lock_irqsave(&qp->lock, flags);
363 if (qp->cm_id) {
364 qp->cm_id->rem_ref(qp->cm_id);
365 qp->cm_id = NULL;
366 }
367 spin_unlock_irqrestore(&qp->lock, flags);
368
369 vq_repbuf_free(c2dev, reply);
370 bail0:
371 vq_req_free(c2dev, vq_req);
372 return err;
373}
374
375static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp)
376{
377 int ret;
378
379 do {
380 spin_lock_irq(&c2dev->qp_table.lock);
381 ret = idr_get_new_above(&c2dev->qp_table.idr, qp,
382 c2dev->qp_table.last++, &qp->qpn);
383 spin_unlock_irq(&c2dev->qp_table.lock);
384 } while ((ret == -EAGAIN) &&
385 idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL));
386 return ret;
387}
388
389static void c2_free_qpn(struct c2_dev *c2dev, int qpn)
390{
391 spin_lock_irq(&c2dev->qp_table.lock);
392 idr_remove(&c2dev->qp_table.idr, qpn);
393 spin_unlock_irq(&c2dev->qp_table.lock);
394}
395
396struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn)
397{
398 unsigned long flags;
399 struct c2_qp *qp;
400
401 spin_lock_irqsave(&c2dev->qp_table.lock, flags);
402 qp = idr_find(&c2dev->qp_table.idr, qpn);
403 spin_unlock_irqrestore(&c2dev->qp_table.lock, flags);
404 return qp;
405}
406
407int c2_alloc_qp(struct c2_dev *c2dev,
408 struct c2_pd *pd,
409 struct ib_qp_init_attr *qp_attrs, struct c2_qp *qp)
410{
411 struct c2wr_qp_create_req wr;
412 struct c2wr_qp_create_rep *reply;
413 struct c2_vq_req *vq_req;
414 struct c2_cq *send_cq = to_c2cq(qp_attrs->send_cq);
415 struct c2_cq *recv_cq = to_c2cq(qp_attrs->recv_cq);
416 unsigned long peer_pa;
417 u32 q_size, msg_size, mmap_size;
418 void __iomem *mmap;
419 int err;
420
421 err = c2_alloc_qpn(c2dev, qp);
422 if (err)
423 return err;
424 qp->ibqp.qp_num = qp->qpn;
425 qp->ibqp.qp_type = IB_QPT_RC;
426
427 /* Allocate the SQ and RQ shared pointers */
428 qp->sq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
429 &qp->sq_mq.shared_dma, GFP_KERNEL);
430 if (!qp->sq_mq.shared) {
431 err = -ENOMEM;
432 goto bail0;
433 }
434
435 qp->rq_mq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
436 &qp->rq_mq.shared_dma, GFP_KERNEL);
437 if (!qp->rq_mq.shared) {
438 err = -ENOMEM;
439 goto bail1;
440 }
441
442 /* Allocate the verbs request */
443 vq_req = vq_req_alloc(c2dev);
444 if (vq_req == NULL) {
445 err = -ENOMEM;
446 goto bail2;
447 }
448
449 /* Initialize the work request */
450 memset(&wr, 0, sizeof(wr));
451 c2_wr_set_id(&wr, CCWR_QP_CREATE);
452 wr.hdr.context = (unsigned long) vq_req;
453 wr.rnic_handle = c2dev->adapter_handle;
454 wr.sq_cq_handle = send_cq->adapter_handle;
455 wr.rq_cq_handle = recv_cq->adapter_handle;
456 wr.sq_depth = cpu_to_be32(qp_attrs->cap.max_send_wr + 1);
457 wr.rq_depth = cpu_to_be32(qp_attrs->cap.max_recv_wr + 1);
458 wr.srq_handle = 0;
459 wr.flags = cpu_to_be32(QP_RDMA_READ | QP_RDMA_WRITE | QP_MW_BIND |
460 QP_ZERO_STAG | QP_RDMA_READ_RESPONSE);
461 wr.send_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
462 wr.recv_sgl_depth = cpu_to_be32(qp_attrs->cap.max_recv_sge);
463 wr.rdma_write_sgl_depth = cpu_to_be32(qp_attrs->cap.max_send_sge);
464 wr.shared_sq_ht = cpu_to_be64(qp->sq_mq.shared_dma);
465 wr.shared_rq_ht = cpu_to_be64(qp->rq_mq.shared_dma);
466 wr.ord = cpu_to_be32(C2_MAX_ORD_PER_QP);
467 wr.ird = cpu_to_be32(C2_MAX_IRD_PER_QP);
468 wr.pd_id = pd->pd_id;
469 wr.user_context = (unsigned long) qp;
470
471 vq_req_get(c2dev, vq_req);
472
473 /* Send the WR to the adapter */
474 err = vq_send_wr(c2dev, (union c2wr *) & wr);
475 if (err) {
476 vq_req_put(c2dev, vq_req);
477 goto bail3;
478 }
479
480 /* Wait for the verb reply */
481 err = vq_wait_for_reply(c2dev, vq_req);
482 if (err) {
483 goto bail3;
484 }
485
486 /* Process the reply */
487 reply = (struct c2wr_qp_create_rep *) (unsigned long) (vq_req->reply_msg);
488 if (!reply) {
489 err = -ENOMEM;
490 goto bail3;
491 }
492
493 if ((err = c2_wr_get_result(reply)) != 0) {
494 goto bail4;
495 }
496
497 /* Fill in the kernel QP struct */
498 atomic_set(&qp->refcount, 1);
499 qp->adapter_handle = reply->qp_handle;
500 qp->state = IB_QPS_RESET;
501 qp->send_sgl_depth = qp_attrs->cap.max_send_sge;
502 qp->rdma_write_sgl_depth = qp_attrs->cap.max_send_sge;
503 qp->recv_sgl_depth = qp_attrs->cap.max_recv_sge;
504
505 /* Initialize the SQ MQ */
506 q_size = be32_to_cpu(reply->sq_depth);
507 msg_size = be32_to_cpu(reply->sq_msg_size);
508 peer_pa = c2dev->pa + be32_to_cpu(reply->sq_mq_start);
509 mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
510 mmap = ioremap_nocache(peer_pa, mmap_size);
511 if (!mmap) {
512 err = -ENOMEM;
513 goto bail5;
514 }
515
516 c2_mq_req_init(&qp->sq_mq,
517 be32_to_cpu(reply->sq_mq_index),
518 q_size,
519 msg_size,
520 mmap + sizeof(struct c2_mq_shared), /* pool start */
521 mmap, /* peer */
522 C2_MQ_ADAPTER_TARGET);
523
524 /* Initialize the RQ mq */
525 q_size = be32_to_cpu(reply->rq_depth);
526 msg_size = be32_to_cpu(reply->rq_msg_size);
527 peer_pa = c2dev->pa + be32_to_cpu(reply->rq_mq_start);
528 mmap_size = PAGE_ALIGN(sizeof(struct c2_mq_shared) + msg_size * q_size);
529 mmap = ioremap_nocache(peer_pa, mmap_size);
530 if (!mmap) {
531 err = -ENOMEM;
532 goto bail6;
533 }
534
535 c2_mq_req_init(&qp->rq_mq,
536 be32_to_cpu(reply->rq_mq_index),
537 q_size,
538 msg_size,
539 mmap + sizeof(struct c2_mq_shared), /* pool start */
540 mmap, /* peer */
541 C2_MQ_ADAPTER_TARGET);
542
543 vq_repbuf_free(c2dev, reply);
544 vq_req_free(c2dev, vq_req);
545
546 return 0;
547
548 bail6:
549 iounmap(qp->sq_mq.peer);
550 bail5:
551 destroy_qp(c2dev, qp);
552 bail4:
553 vq_repbuf_free(c2dev, reply);
554 bail3:
555 vq_req_free(c2dev, vq_req);
556 bail2:
557 c2_free_mqsp(qp->rq_mq.shared);
558 bail1:
559 c2_free_mqsp(qp->sq_mq.shared);
560 bail0:
561 c2_free_qpn(c2dev, qp->qpn);
562 return err;
563}
564
565void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp)
566{
567 struct c2_cq *send_cq;
568 struct c2_cq *recv_cq;
569
570 send_cq = to_c2cq(qp->ibqp.send_cq);
571 recv_cq = to_c2cq(qp->ibqp.recv_cq);
572
573 /*
574 * Lock CQs here, so that CQ polling code can do QP lookup
575 * without taking a lock.
576 */
577 spin_lock_irq(&send_cq->lock);
578 if (send_cq != recv_cq)
579 spin_lock(&recv_cq->lock);
580
581 c2_free_qpn(c2dev, qp->qpn);
582
583 if (send_cq != recv_cq)
584 spin_unlock(&recv_cq->lock);
585 spin_unlock_irq(&send_cq->lock);
586
587 /*
588 * Destory qp in the rnic...
589 */
590 destroy_qp(c2dev, qp);
591
592 /*
593 * Mark any unreaped CQEs as null and void.
594 */
595 c2_cq_clean(c2dev, qp, send_cq->cqn);
596 if (send_cq != recv_cq)
597 c2_cq_clean(c2dev, qp, recv_cq->cqn);
598 /*
599 * Unmap the MQs and return the shared pointers
600 * to the message pool.
601 */
602 iounmap(qp->sq_mq.peer);
603 iounmap(qp->rq_mq.peer);
604 c2_free_mqsp(qp->sq_mq.shared);
605 c2_free_mqsp(qp->rq_mq.shared);
606
607 atomic_dec(&qp->refcount);
608 wait_event(qp->wait, !atomic_read(&qp->refcount));
609}
610
611/*
612 * Function: move_sgl
613 *
614 * Description:
615 * Move an SGL from the user's work request struct into a CCIL Work Request
616 * message, swapping to WR byte order and ensure the total length doesn't
617 * overflow.
618 *
619 * IN:
620 * dst - ptr to CCIL Work Request message SGL memory.
621 * src - ptr to the consumers SGL memory.
622 *
623 * OUT: none
624 *
625 * Return:
626 * CCIL status codes.
627 */
628static int
629move_sgl(struct c2_data_addr * dst, struct ib_sge *src, int count, u32 * p_len,
630 u8 * actual_count)
631{
632 u32 tot = 0; /* running total */
633 u8 acount = 0; /* running total non-0 len sge's */
634
635 while (count > 0) {
636 /*
637 * If the addition of this SGE causes the
638 * total SGL length to exceed 2^32-1, then
639 * fail-n-bail.
640 *
641 * If the current total plus the next element length
642 * wraps, then it will go negative and be less than the
643 * current total...
644 */
645 if ((tot + src->length) < tot) {
646 return -EINVAL;
647 }
648 /*
649 * Bug: 1456 (as well as 1498 & 1643)
650 * Skip over any sge's supplied with len=0
651 */
652 if (src->length) {
653 tot += src->length;
654 dst->stag = cpu_to_be32(src->lkey);
655 dst->to = cpu_to_be64(src->addr);
656 dst->length = cpu_to_be32(src->length);
657 dst++;
658 acount++;
659 }
660 src++;
661 count--;
662 }
663
664 if (acount == 0) {
665 /*
666 * Bug: 1476 (as well as 1498, 1456 and 1643)
667 * Setup the SGL in the WR to make it easier for the RNIC.
668 * This way, the FW doesn't have to deal with special cases.
669 * Setting length=0 should be sufficient.
670 */
671 dst->stag = 0;
672 dst->to = 0;
673 dst->length = 0;
674 }
675
676 *p_len = tot;
677 *actual_count = acount;
678 return 0;
679}
680
681/*
682 * Function: c2_activity (private function)
683 *
684 * Description:
685 * Post an mq index to the host->adapter activity fifo.
686 *
687 * IN:
688 * c2dev - ptr to c2dev structure
689 * mq_index - mq index to post
690 * shared - value most recently written to shared
691 *
692 * OUT:
693 *
694 * Return:
695 * none
696 */
697static inline void c2_activity(struct c2_dev *c2dev, u32 mq_index, u16 shared)
698{
699 /*
700 * First read the register to see if the FIFO is full, and if so,
701 * spin until it's not. This isn't perfect -- there is no
702 * synchronization among the clients of the register, but in
703 * practice it prevents multiple CPU from hammering the bus
704 * with PCI RETRY. Note that when this does happen, the card
705 * cannot get on the bus and the card and system hang in a
706 * deadlock -- thus the need for this code. [TOT]
707 */
708 while (readl(c2dev->regs + PCI_BAR0_ADAPTER_HINT) & 0x80000000) {
709 set_current_state(TASK_UNINTERRUPTIBLE);
710 schedule_timeout(0);
711 }
712
713 __raw_writel(C2_HINT_MAKE(mq_index, shared),
714 c2dev->regs + PCI_BAR0_ADAPTER_HINT);
715}
716
717/*
718 * Function: qp_wr_post
719 *
720 * Description:
721 * This in-line function allocates a MQ msg, then moves the host-copy of
722 * the completed WR into msg. Then it posts the message.
723 *
724 * IN:
725 * q - ptr to user MQ.
726 * wr - ptr to host-copy of the WR.
727 * qp - ptr to user qp
728 * size - Number of bytes to post. Assumed to be divisible by 4.
729 *
730 * OUT: none
731 *
732 * Return:
733 * CCIL status codes.
734 */
735static int qp_wr_post(struct c2_mq *q, union c2wr * wr, struct c2_qp *qp, u32 size)
736{
737 union c2wr *msg;
738
739 msg = c2_mq_alloc(q);
740 if (msg == NULL) {
741 return -EINVAL;
742 }
743#ifdef CCMSGMAGIC
744 ((c2wr_hdr_t *) wr)->magic = cpu_to_be32(CCWR_MAGIC);
745#endif
746
747 /*
748 * Since all header fields in the WR are the same as the
749 * CQE, set the following so the adapter need not.
750 */
751 c2_wr_set_result(wr, CCERR_PENDING);
752
753 /*
754 * Copy the wr down to the adapter
755 */
756 memcpy((void *) msg, (void *) wr, size);
757
758 c2_mq_produce(q);
759 return 0;
760}
761
762
763int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
764 struct ib_send_wr **bad_wr)
765{
766 struct c2_dev *c2dev = to_c2dev(ibqp->device);
767 struct c2_qp *qp = to_c2qp(ibqp);
768 union c2wr wr;
769 int err = 0;
770
771 u32 flags;
772 u32 tot_len;
773 u8 actual_sge_count;
774 u32 msg_size;
775
776 if (qp->state > IB_QPS_RTS)
777 return -EINVAL;
778
779 while (ib_wr) {
780
781 flags = 0;
782 wr.sqwr.sq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
783 if (ib_wr->send_flags & IB_SEND_SIGNALED) {
784 flags |= SQ_SIGNALED;
785 }
786
787 switch (ib_wr->opcode) {
788 case IB_WR_SEND:
789 if (ib_wr->send_flags & IB_SEND_SOLICITED) {
790 c2_wr_set_id(&wr, C2_WR_TYPE_SEND_SE);
791 msg_size = sizeof(struct c2wr_send_req);
792 } else {
793 c2_wr_set_id(&wr, C2_WR_TYPE_SEND);
794 msg_size = sizeof(struct c2wr_send_req);
795 }
796
797 wr.sqwr.send.remote_stag = 0;
798 msg_size += sizeof(struct c2_data_addr) * ib_wr->num_sge;
799 if (ib_wr->num_sge > qp->send_sgl_depth) {
800 err = -EINVAL;
801 break;
802 }
803 if (ib_wr->send_flags & IB_SEND_FENCE) {
804 flags |= SQ_READ_FENCE;
805 }
806 err = move_sgl((struct c2_data_addr *) & (wr.sqwr.send.data),
807 ib_wr->sg_list,
808 ib_wr->num_sge,
809 &tot_len, &actual_sge_count);
810 wr.sqwr.send.sge_len = cpu_to_be32(tot_len);
811 c2_wr_set_sge_count(&wr, actual_sge_count);
812 break;
813 case IB_WR_RDMA_WRITE:
814 c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_WRITE);
815 msg_size = sizeof(struct c2wr_rdma_write_req) +
816 (sizeof(struct c2_data_addr) * ib_wr->num_sge);
817 if (ib_wr->num_sge > qp->rdma_write_sgl_depth) {
818 err = -EINVAL;
819 break;
820 }
821 if (ib_wr->send_flags & IB_SEND_FENCE) {
822 flags |= SQ_READ_FENCE;
823 }
824 wr.sqwr.rdma_write.remote_stag =
825 cpu_to_be32(ib_wr->wr.rdma.rkey);
826 wr.sqwr.rdma_write.remote_to =
827 cpu_to_be64(ib_wr->wr.rdma.remote_addr);
828 err = move_sgl((struct c2_data_addr *)
829 & (wr.sqwr.rdma_write.data),
830 ib_wr->sg_list,
831 ib_wr->num_sge,
832 &tot_len, &actual_sge_count);
833 wr.sqwr.rdma_write.sge_len = cpu_to_be32(tot_len);
834 c2_wr_set_sge_count(&wr, actual_sge_count);
835 break;
836 case IB_WR_RDMA_READ:
837 c2_wr_set_id(&wr, C2_WR_TYPE_RDMA_READ);
838 msg_size = sizeof(struct c2wr_rdma_read_req);
839
840 /* IWarp only suppots 1 sge for RDMA reads */
841 if (ib_wr->num_sge > 1) {
842 err = -EINVAL;
843 break;
844 }
845
846 /*
847 * Move the local and remote stag/to/len into the WR.
848 */
849 wr.sqwr.rdma_read.local_stag =
850 cpu_to_be32(ib_wr->sg_list->lkey);
851 wr.sqwr.rdma_read.local_to =
852 cpu_to_be64(ib_wr->sg_list->addr);
853 wr.sqwr.rdma_read.remote_stag =
854 cpu_to_be32(ib_wr->wr.rdma.rkey);
855 wr.sqwr.rdma_read.remote_to =
856 cpu_to_be64(ib_wr->wr.rdma.remote_addr);
857 wr.sqwr.rdma_read.length =
858 cpu_to_be32(ib_wr->sg_list->length);
859 break;
860 default:
861 /* error */
862 msg_size = 0;
863 err = -EINVAL;
864 break;
865 }
866
867 /*
868 * If we had an error on the last wr build, then
869 * break out. Possible errors include bogus WR
870 * type, and a bogus SGL length...
871 */
872 if (err) {
873 break;
874 }
875
876 /*
877 * Store flags
878 */
879 c2_wr_set_flags(&wr, flags);
880
881 /*
882 * Post the puppy!
883 */
884 err = qp_wr_post(&qp->sq_mq, &wr, qp, msg_size);
885 if (err) {
886 break;
887 }
888
889 /*
890 * Enqueue mq index to activity FIFO.
891 */
892 c2_activity(c2dev, qp->sq_mq.index, qp->sq_mq.hint_count);
893
894 ib_wr = ib_wr->next;
895 }
896
897 if (err)
898 *bad_wr = ib_wr;
899 return err;
900}
901
902int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
903 struct ib_recv_wr **bad_wr)
904{
905 struct c2_dev *c2dev = to_c2dev(ibqp->device);
906 struct c2_qp *qp = to_c2qp(ibqp);
907 union c2wr wr;
908 int err = 0;
909
910 if (qp->state > IB_QPS_RTS)
911 return -EINVAL;
912
913 /*
914 * Try and post each work request
915 */
916 while (ib_wr) {
917 u32 tot_len;
918 u8 actual_sge_count;
919
920 if (ib_wr->num_sge > qp->recv_sgl_depth) {
921 err = -EINVAL;
922 break;
923 }
924
925 /*
926 * Create local host-copy of the WR
927 */
928 wr.rqwr.rq_hdr.user_hdr.hdr.context = ib_wr->wr_id;
929 c2_wr_set_id(&wr, CCWR_RECV);
930 c2_wr_set_flags(&wr, 0);
931
932 /* sge_count is limited to eight bits. */
933 BUG_ON(ib_wr->num_sge >= 256);
934 err = move_sgl((struct c2_data_addr *) & (wr.rqwr.data),
935 ib_wr->sg_list,
936 ib_wr->num_sge, &tot_len, &actual_sge_count);
937 c2_wr_set_sge_count(&wr, actual_sge_count);
938
939 /*
940 * If we had an error on the last wr build, then
941 * break out. Possible errors include bogus WR
942 * type, and a bogus SGL length...
943 */
944 if (err) {
945 break;
946 }
947
948 err = qp_wr_post(&qp->rq_mq, &wr, qp, qp->rq_mq.msg_size);
949 if (err) {
950 break;
951 }
952
953 /*
954 * Enqueue mq index to activity FIFO
955 */
956 c2_activity(c2dev, qp->rq_mq.index, qp->rq_mq.hint_count);
957
958 ib_wr = ib_wr->next;
959 }
960
961 if (err)
962 *bad_wr = ib_wr;
963 return err;
964}
965
966void __devinit c2_init_qp_table(struct c2_dev *c2dev)
967{
968 spin_lock_init(&c2dev->qp_table.lock);
969 idr_init(&c2dev->qp_table.idr);
970}
971
972void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
973{
974 idr_destroy(&c2dev->qp_table.idr);
975}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
new file mode 100644
index 000000000000..1c3c9d65ecea
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -0,0 +1,663 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35
36#include <linux/module.h>
37#include <linux/moduleparam.h>
38#include <linux/pci.h>
39#include <linux/netdevice.h>
40#include <linux/etherdevice.h>
41#include <linux/delay.h>
42#include <linux/ethtool.h>
43#include <linux/mii.h>
44#include <linux/if_vlan.h>
45#include <linux/crc32.h>
46#include <linux/in.h>
47#include <linux/ip.h>
48#include <linux/tcp.h>
49#include <linux/init.h>
50#include <linux/dma-mapping.h>
51#include <linux/mm.h>
52#include <linux/inet.h>
53
54#include <linux/route.h>
55
56#include <asm/io.h>
57#include <asm/irq.h>
58#include <asm/byteorder.h>
59#include <rdma/ib_smi.h>
60#include "c2.h"
61#include "c2_vq.h"
62
63/* Device capabilities */
64#define C2_MIN_PAGESIZE 1024
65
66#define C2_MAX_MRS 32768
67#define C2_MAX_QPS 16000
68#define C2_MAX_WQE_SZ 256
69#define C2_MAX_QP_WR ((128*1024)/C2_MAX_WQE_SZ)
70#define C2_MAX_SGES 4
71#define C2_MAX_SGE_RD 1
72#define C2_MAX_CQS 32768
73#define C2_MAX_CQES 4096
74#define C2_MAX_PDS 16384
75
76/*
77 * Send the adapter INIT message to the amso1100
78 */
79static int c2_adapter_init(struct c2_dev *c2dev)
80{
81 struct c2wr_init_req wr;
82 int err;
83
84 memset(&wr, 0, sizeof(wr));
85 c2_wr_set_id(&wr, CCWR_INIT);
86 wr.hdr.context = 0;
87 wr.hint_count = cpu_to_be64(c2dev->hint_count_dma);
88 wr.q0_host_shared = cpu_to_be64(c2dev->req_vq.shared_dma);
89 wr.q1_host_shared = cpu_to_be64(c2dev->rep_vq.shared_dma);
90 wr.q1_host_msg_pool = cpu_to_be64(c2dev->rep_vq.host_dma);
91 wr.q2_host_shared = cpu_to_be64(c2dev->aeq.shared_dma);
92 wr.q2_host_msg_pool = cpu_to_be64(c2dev->aeq.host_dma);
93
94 /* Post the init message */
95 err = vq_send_wr(c2dev, (union c2wr *) & wr);
96
97 return err;
98}
99
100/*
101 * Send the adapter TERM message to the amso1100
102 */
103static void c2_adapter_term(struct c2_dev *c2dev)
104{
105 struct c2wr_init_req wr;
106
107 memset(&wr, 0, sizeof(wr));
108 c2_wr_set_id(&wr, CCWR_TERM);
109 wr.hdr.context = 0;
110
111 /* Post the init message */
112 vq_send_wr(c2dev, (union c2wr *) & wr);
113 c2dev->init = 0;
114
115 return;
116}
117
118/*
119 * Query the adapter
120 */
121static int c2_rnic_query(struct c2_dev *c2dev, struct ib_device_attr *props)
122{
123 struct c2_vq_req *vq_req;
124 struct c2wr_rnic_query_req wr;
125 struct c2wr_rnic_query_rep *reply;
126 int err;
127
128 vq_req = vq_req_alloc(c2dev);
129 if (!vq_req)
130 return -ENOMEM;
131
132 c2_wr_set_id(&wr, CCWR_RNIC_QUERY);
133 wr.hdr.context = (unsigned long) vq_req;
134 wr.rnic_handle = c2dev->adapter_handle;
135
136 vq_req_get(c2dev, vq_req);
137
138 err = vq_send_wr(c2dev, (union c2wr *) &wr);
139 if (err) {
140 vq_req_put(c2dev, vq_req);
141 goto bail1;
142 }
143
144 err = vq_wait_for_reply(c2dev, vq_req);
145 if (err)
146 goto bail1;
147
148 reply =
149 (struct c2wr_rnic_query_rep *) (unsigned long) (vq_req->reply_msg);
150 if (!reply)
151 err = -ENOMEM;
152
153 err = c2_errno(reply);
154 if (err)
155 goto bail2;
156
157 props->fw_ver =
158 ((u64)be32_to_cpu(reply->fw_ver_major) << 32) |
159 ((be32_to_cpu(reply->fw_ver_minor) && 0xFFFF) << 16) |
160 (be32_to_cpu(reply->fw_ver_patch) && 0xFFFF);
161 memcpy(&props->sys_image_guid, c2dev->netdev->dev_addr, 6);
162 props->max_mr_size = 0xFFFFFFFF;
163 props->page_size_cap = ~(C2_MIN_PAGESIZE-1);
164 props->vendor_id = be32_to_cpu(reply->vendor_id);
165 props->vendor_part_id = be32_to_cpu(reply->part_number);
166 props->hw_ver = be32_to_cpu(reply->hw_version);
167 props->max_qp = be32_to_cpu(reply->max_qps);
168 props->max_qp_wr = be32_to_cpu(reply->max_qp_depth);
169 props->device_cap_flags = c2dev->device_cap_flags;
170 props->max_sge = C2_MAX_SGES;
171 props->max_sge_rd = C2_MAX_SGE_RD;
172 props->max_cq = be32_to_cpu(reply->max_cqs);
173 props->max_cqe = be32_to_cpu(reply->max_cq_depth);
174 props->max_mr = be32_to_cpu(reply->max_mrs);
175 props->max_pd = be32_to_cpu(reply->max_pds);
176 props->max_qp_rd_atom = be32_to_cpu(reply->max_qp_ird);
177 props->max_ee_rd_atom = 0;
178 props->max_res_rd_atom = be32_to_cpu(reply->max_global_ird);
179 props->max_qp_init_rd_atom = be32_to_cpu(reply->max_qp_ord);
180 props->max_ee_init_rd_atom = 0;
181 props->atomic_cap = IB_ATOMIC_NONE;
182 props->max_ee = 0;
183 props->max_rdd = 0;
184 props->max_mw = be32_to_cpu(reply->max_mws);
185 props->max_raw_ipv6_qp = 0;
186 props->max_raw_ethy_qp = 0;
187 props->max_mcast_grp = 0;
188 props->max_mcast_qp_attach = 0;
189 props->max_total_mcast_qp_attach = 0;
190 props->max_ah = 0;
191 props->max_fmr = 0;
192 props->max_map_per_fmr = 0;
193 props->max_srq = 0;
194 props->max_srq_wr = 0;
195 props->max_srq_sge = 0;
196 props->max_pkeys = 0;
197 props->local_ca_ack_delay = 0;
198
199 bail2:
200 vq_repbuf_free(c2dev, reply);
201
202 bail1:
203 vq_req_free(c2dev, vq_req);
204 return err;
205}
206
207/*
208 * Add an IP address to the RNIC interface
209 */
210int c2_add_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
211{
212 struct c2_vq_req *vq_req;
213 struct c2wr_rnic_setconfig_req *wr;
214 struct c2wr_rnic_setconfig_rep *reply;
215 struct c2_netaddr netaddr;
216 int err, len;
217
218 vq_req = vq_req_alloc(c2dev);
219 if (!vq_req)
220 return -ENOMEM;
221
222 len = sizeof(struct c2_netaddr);
223 wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
224 if (!wr) {
225 err = -ENOMEM;
226 goto bail0;
227 }
228
229 c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
230 wr->hdr.context = (unsigned long) vq_req;
231 wr->rnic_handle = c2dev->adapter_handle;
232 wr->option = cpu_to_be32(C2_CFG_ADD_ADDR);
233
234 netaddr.ip_addr = inaddr;
235 netaddr.netmask = inmask;
236 netaddr.mtu = 0;
237
238 memcpy(wr->data, &netaddr, len);
239
240 vq_req_get(c2dev, vq_req);
241
242 err = vq_send_wr(c2dev, (union c2wr *) wr);
243 if (err) {
244 vq_req_put(c2dev, vq_req);
245 goto bail1;
246 }
247
248 err = vq_wait_for_reply(c2dev, vq_req);
249 if (err)
250 goto bail1;
251
252 reply =
253 (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
254 if (!reply) {
255 err = -ENOMEM;
256 goto bail1;
257 }
258
259 err = c2_errno(reply);
260 vq_repbuf_free(c2dev, reply);
261
262 bail1:
263 kfree(wr);
264 bail0:
265 vq_req_free(c2dev, vq_req);
266 return err;
267}
268
269/*
270 * Delete an IP address from the RNIC interface
271 */
272int c2_del_addr(struct c2_dev *c2dev, u32 inaddr, u32 inmask)
273{
274 struct c2_vq_req *vq_req;
275 struct c2wr_rnic_setconfig_req *wr;
276 struct c2wr_rnic_setconfig_rep *reply;
277 struct c2_netaddr netaddr;
278 int err, len;
279
280 vq_req = vq_req_alloc(c2dev);
281 if (!vq_req)
282 return -ENOMEM;
283
284 len = sizeof(struct c2_netaddr);
285 wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
286 if (!wr) {
287 err = -ENOMEM;
288 goto bail0;
289 }
290
291 c2_wr_set_id(wr, CCWR_RNIC_SETCONFIG);
292 wr->hdr.context = (unsigned long) vq_req;
293 wr->rnic_handle = c2dev->adapter_handle;
294 wr->option = cpu_to_be32(C2_CFG_DEL_ADDR);
295
296 netaddr.ip_addr = inaddr;
297 netaddr.netmask = inmask;
298 netaddr.mtu = 0;
299
300 memcpy(wr->data, &netaddr, len);
301
302 vq_req_get(c2dev, vq_req);
303
304 err = vq_send_wr(c2dev, (union c2wr *) wr);
305 if (err) {
306 vq_req_put(c2dev, vq_req);
307 goto bail1;
308 }
309
310 err = vq_wait_for_reply(c2dev, vq_req);
311 if (err)
312 goto bail1;
313
314 reply =
315 (struct c2wr_rnic_setconfig_rep *) (unsigned long) (vq_req->reply_msg);
316 if (!reply) {
317 err = -ENOMEM;
318 goto bail1;
319 }
320
321 err = c2_errno(reply);
322 vq_repbuf_free(c2dev, reply);
323
324 bail1:
325 kfree(wr);
326 bail0:
327 vq_req_free(c2dev, vq_req);
328 return err;
329}
330
331/*
332 * Open a single RNIC instance to use with all
333 * low level openib calls
334 */
335static int c2_rnic_open(struct c2_dev *c2dev)
336{
337 struct c2_vq_req *vq_req;
338 union c2wr wr;
339 struct c2wr_rnic_open_rep *reply;
340 int err;
341
342 vq_req = vq_req_alloc(c2dev);
343 if (vq_req == NULL) {
344 return -ENOMEM;
345 }
346
347 memset(&wr, 0, sizeof(wr));
348 c2_wr_set_id(&wr, CCWR_RNIC_OPEN);
349 wr.rnic_open.req.hdr.context = (unsigned long) (vq_req);
350 wr.rnic_open.req.flags = cpu_to_be16(RNIC_PRIV_MODE);
351 wr.rnic_open.req.port_num = cpu_to_be16(0);
352 wr.rnic_open.req.user_context = (unsigned long) c2dev;
353
354 vq_req_get(c2dev, vq_req);
355
356 err = vq_send_wr(c2dev, &wr);
357 if (err) {
358 vq_req_put(c2dev, vq_req);
359 goto bail0;
360 }
361
362 err = vq_wait_for_reply(c2dev, vq_req);
363 if (err) {
364 goto bail0;
365 }
366
367 reply = (struct c2wr_rnic_open_rep *) (unsigned long) (vq_req->reply_msg);
368 if (!reply) {
369 err = -ENOMEM;
370 goto bail0;
371 }
372
373 if ((err = c2_errno(reply)) != 0) {
374 goto bail1;
375 }
376
377 c2dev->adapter_handle = reply->rnic_handle;
378
379 bail1:
380 vq_repbuf_free(c2dev, reply);
381 bail0:
382 vq_req_free(c2dev, vq_req);
383 return err;
384}
385
386/*
387 * Close the RNIC instance
388 */
389static int c2_rnic_close(struct c2_dev *c2dev)
390{
391 struct c2_vq_req *vq_req;
392 union c2wr wr;
393 struct c2wr_rnic_close_rep *reply;
394 int err;
395
396 vq_req = vq_req_alloc(c2dev);
397 if (vq_req == NULL) {
398 return -ENOMEM;
399 }
400
401 memset(&wr, 0, sizeof(wr));
402 c2_wr_set_id(&wr, CCWR_RNIC_CLOSE);
403 wr.rnic_close.req.hdr.context = (unsigned long) vq_req;
404 wr.rnic_close.req.rnic_handle = c2dev->adapter_handle;
405
406 vq_req_get(c2dev, vq_req);
407
408 err = vq_send_wr(c2dev, &wr);
409 if (err) {
410 vq_req_put(c2dev, vq_req);
411 goto bail0;
412 }
413
414 err = vq_wait_for_reply(c2dev, vq_req);
415 if (err) {
416 goto bail0;
417 }
418
419 reply = (struct c2wr_rnic_close_rep *) (unsigned long) (vq_req->reply_msg);
420 if (!reply) {
421 err = -ENOMEM;
422 goto bail0;
423 }
424
425 if ((err = c2_errno(reply)) != 0) {
426 goto bail1;
427 }
428
429 c2dev->adapter_handle = 0;
430
431 bail1:
432 vq_repbuf_free(c2dev, reply);
433 bail0:
434 vq_req_free(c2dev, vq_req);
435 return err;
436}
437
438/*
439 * Called by c2_probe to initialize the RNIC. This principally
440 * involves initalizing the various limits and resouce pools that
441 * comprise the RNIC instance.
442 */
443int c2_rnic_init(struct c2_dev *c2dev)
444{
445 int err;
446 u32 qsize, msgsize;
447 void *q1_pages;
448 void *q2_pages;
449 void __iomem *mmio_regs;
450
451 /* Device capabilities */
452 c2dev->device_cap_flags =
453 (IB_DEVICE_RESIZE_MAX_WR |
454 IB_DEVICE_CURR_QP_STATE_MOD |
455 IB_DEVICE_SYS_IMAGE_GUID |
456 IB_DEVICE_ZERO_STAG |
457 IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
458
459 /* Allocate the qptr_array */
460 c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *));
461 if (!c2dev->qptr_array) {
462 return -ENOMEM;
463 }
464
465 /* Inialize the qptr_array */
466 memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *));
467 c2dev->qptr_array[0] = (void *) &c2dev->req_vq;
468 c2dev->qptr_array[1] = (void *) &c2dev->rep_vq;
469 c2dev->qptr_array[2] = (void *) &c2dev->aeq;
470
471 /* Initialize data structures */
472 init_waitqueue_head(&c2dev->req_vq_wo);
473 spin_lock_init(&c2dev->vqlock);
474 spin_lock_init(&c2dev->lock);
475
476 /* Allocate MQ shared pointer pool for kernel clients. User
477 * mode client pools are hung off the user context
478 */
479 err = c2_init_mqsp_pool(c2dev, GFP_KERNEL, &c2dev->kern_mqsp_pool);
480 if (err) {
481 goto bail0;
482 }
483
484 /* Allocate shared pointers for Q0, Q1, and Q2 from
485 * the shared pointer pool.
486 */
487
488 c2dev->hint_count = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
489 &c2dev->hint_count_dma,
490 GFP_KERNEL);
491 c2dev->req_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
492 &c2dev->req_vq.shared_dma,
493 GFP_KERNEL);
494 c2dev->rep_vq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
495 &c2dev->rep_vq.shared_dma,
496 GFP_KERNEL);
497 c2dev->aeq.shared = c2_alloc_mqsp(c2dev, c2dev->kern_mqsp_pool,
498 &c2dev->aeq.shared_dma, GFP_KERNEL);
499 if (!c2dev->hint_count || !c2dev->req_vq.shared ||
500 !c2dev->rep_vq.shared || !c2dev->aeq.shared) {
501 err = -ENOMEM;
502 goto bail1;
503 }
504
505 mmio_regs = c2dev->kva;
506 /* Initialize the Verbs Request Queue */
507 c2_mq_req_init(&c2dev->req_vq, 0,
508 be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_QSIZE)),
509 be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_MSGSIZE)),
510 mmio_regs +
511 be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_POOLSTART)),
512 mmio_regs +
513 be32_to_cpu(readl(mmio_regs + C2_REGS_Q0_SHARED)),
514 C2_MQ_ADAPTER_TARGET);
515
516 /* Initialize the Verbs Reply Queue */
517 qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_QSIZE));
518 msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_MSGSIZE));
519 q1_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
520 if (!q1_pages) {
521 err = -ENOMEM;
522 goto bail1;
523 }
524 c2dev->rep_vq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
525 (void *)q1_pages, qsize * msgsize,
526 DMA_FROM_DEVICE);
527 pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
528 pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
529 (u64)c2dev->rep_vq.host_dma);
530 c2_mq_rep_init(&c2dev->rep_vq,
531 1,
532 qsize,
533 msgsize,
534 q1_pages,
535 mmio_regs +
536 be32_to_cpu(readl(mmio_regs + C2_REGS_Q1_SHARED)),
537 C2_MQ_HOST_TARGET);
538
539 /* Initialize the Asynchronus Event Queue */
540 qsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_QSIZE));
541 msgsize = be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_MSGSIZE));
542 q2_pages = kmalloc(qsize * msgsize, GFP_KERNEL);
543 if (!q2_pages) {
544 err = -ENOMEM;
545 goto bail2;
546 }
547 c2dev->aeq.host_dma = dma_map_single(c2dev->ibdev.dma_device,
548 (void *)q2_pages, qsize * msgsize,
549 DMA_FROM_DEVICE);
550 pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
551 pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages,
552 (u64)c2dev->rep_vq.host_dma);
553 c2_mq_rep_init(&c2dev->aeq,
554 2,
555 qsize,
556 msgsize,
557 q2_pages,
558 mmio_regs +
559 be32_to_cpu(readl(mmio_regs + C2_REGS_Q2_SHARED)),
560 C2_MQ_HOST_TARGET);
561
562 /* Initialize the verbs request allocator */
563 err = vq_init(c2dev);
564 if (err)
565 goto bail3;
566
567 /* Enable interrupts on the adapter */
568 writel(0, c2dev->regs + C2_IDIS);
569
570 /* create the WR init message */
571 err = c2_adapter_init(c2dev);
572 if (err)
573 goto bail4;
574 c2dev->init++;
575
576 /* open an adapter instance */
577 err = c2_rnic_open(c2dev);
578 if (err)
579 goto bail4;
580
581 /* Initialize cached the adapter limits */
582 if (c2_rnic_query(c2dev, &c2dev->props))
583 goto bail5;
584
585 /* Initialize the PD pool */
586 err = c2_init_pd_table(c2dev);
587 if (err)
588 goto bail5;
589
590 /* Initialize the QP pool */
591 c2_init_qp_table(c2dev);
592 return 0;
593
594 bail5:
595 c2_rnic_close(c2dev);
596 bail4:
597 vq_term(c2dev);
598 bail3:
599 dma_unmap_single(c2dev->ibdev.dma_device,
600 pci_unmap_addr(&c2dev->aeq, mapping),
601 c2dev->aeq.q_size * c2dev->aeq.msg_size,
602 DMA_FROM_DEVICE);
603 kfree(q2_pages);
604 bail2:
605 dma_unmap_single(c2dev->ibdev.dma_device,
606 pci_unmap_addr(&c2dev->rep_vq, mapping),
607 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
608 DMA_FROM_DEVICE);
609 kfree(q1_pages);
610 bail1:
611 c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
612 bail0:
613 vfree(c2dev->qptr_array);
614
615 return err;
616}
617
618/*
619 * Called by c2_remove to cleanup the RNIC resources.
620 */
621void c2_rnic_term(struct c2_dev *c2dev)
622{
623
624 /* Close the open adapter instance */
625 c2_rnic_close(c2dev);
626
627 /* Send the TERM message to the adapter */
628 c2_adapter_term(c2dev);
629
630 /* Disable interrupts on the adapter */
631 writel(1, c2dev->regs + C2_IDIS);
632
633 /* Free the QP pool */
634 c2_cleanup_qp_table(c2dev);
635
636 /* Free the PD pool */
637 c2_cleanup_pd_table(c2dev);
638
639 /* Free the verbs request allocator */
640 vq_term(c2dev);
641
642 /* Unmap and free the asynchronus event queue */
643 dma_unmap_single(c2dev->ibdev.dma_device,
644 pci_unmap_addr(&c2dev->aeq, mapping),
645 c2dev->aeq.q_size * c2dev->aeq.msg_size,
646 DMA_FROM_DEVICE);
647 kfree(c2dev->aeq.msg_pool.host);
648
649 /* Unmap and free the verbs reply queue */
650 dma_unmap_single(c2dev->ibdev.dma_device,
651 pci_unmap_addr(&c2dev->rep_vq, mapping),
652 c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size,
653 DMA_FROM_DEVICE);
654 kfree(c2dev->rep_vq.msg_pool.host);
655
656 /* Free the MQ shared pointer pool */
657 c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool);
658
659 /* Free the qptr_array */
660 vfree(c2dev->qptr_array);
661
662 return;
663}
diff --git a/drivers/infiniband/hw/amso1100/c2_status.h b/drivers/infiniband/hw/amso1100/c2_status.h
new file mode 100644
index 000000000000..6ee4aa92d875
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_status.h
@@ -0,0 +1,158 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef _C2_STATUS_H_
34#define _C2_STATUS_H_
35
36/*
37 * Verbs Status Codes
38 */
39enum c2_status {
40 C2_OK = 0, /* This must be zero */
41 CCERR_INSUFFICIENT_RESOURCES = 1,
42 CCERR_INVALID_MODIFIER = 2,
43 CCERR_INVALID_MODE = 3,
44 CCERR_IN_USE = 4,
45 CCERR_INVALID_RNIC = 5,
46 CCERR_INTERRUPTED_OPERATION = 6,
47 CCERR_INVALID_EH = 7,
48 CCERR_INVALID_CQ = 8,
49 CCERR_CQ_EMPTY = 9,
50 CCERR_NOT_IMPLEMENTED = 10,
51 CCERR_CQ_DEPTH_TOO_SMALL = 11,
52 CCERR_PD_IN_USE = 12,
53 CCERR_INVALID_PD = 13,
54 CCERR_INVALID_SRQ = 14,
55 CCERR_INVALID_ADDRESS = 15,
56 CCERR_INVALID_NETMASK = 16,
57 CCERR_INVALID_QP = 17,
58 CCERR_INVALID_QP_STATE = 18,
59 CCERR_TOO_MANY_WRS_POSTED = 19,
60 CCERR_INVALID_WR_TYPE = 20,
61 CCERR_INVALID_SGL_LENGTH = 21,
62 CCERR_INVALID_SQ_DEPTH = 22,
63 CCERR_INVALID_RQ_DEPTH = 23,
64 CCERR_INVALID_ORD = 24,
65 CCERR_INVALID_IRD = 25,
66 CCERR_QP_ATTR_CANNOT_CHANGE = 26,
67 CCERR_INVALID_STAG = 27,
68 CCERR_QP_IN_USE = 28,
69 CCERR_OUTSTANDING_WRS = 29,
70 CCERR_STAG_IN_USE = 30,
71 CCERR_INVALID_STAG_INDEX = 31,
72 CCERR_INVALID_SGL_FORMAT = 32,
73 CCERR_ADAPTER_TIMEOUT = 33,
74 CCERR_INVALID_CQ_DEPTH = 34,
75 CCERR_INVALID_PRIVATE_DATA_LENGTH = 35,
76 CCERR_INVALID_EP = 36,
77 CCERR_MR_IN_USE = CCERR_STAG_IN_USE,
78 CCERR_FLUSHED = 38,
79 CCERR_INVALID_WQE = 39,
80 CCERR_LOCAL_QP_CATASTROPHIC_ERROR = 40,
81 CCERR_REMOTE_TERMINATION_ERROR = 41,
82 CCERR_BASE_AND_BOUNDS_VIOLATION = 42,
83 CCERR_ACCESS_VIOLATION = 43,
84 CCERR_INVALID_PD_ID = 44,
85 CCERR_WRAP_ERROR = 45,
86 CCERR_INV_STAG_ACCESS_ERROR = 46,
87 CCERR_ZERO_RDMA_READ_RESOURCES = 47,
88 CCERR_QP_NOT_PRIVILEGED = 48,
89 CCERR_STAG_STATE_NOT_INVALID = 49,
90 CCERR_INVALID_PAGE_SIZE = 50,
91 CCERR_INVALID_BUFFER_SIZE = 51,
92 CCERR_INVALID_PBE = 52,
93 CCERR_INVALID_FBO = 53,
94 CCERR_INVALID_LENGTH = 54,
95 CCERR_INVALID_ACCESS_RIGHTS = 55,
96 CCERR_PBL_TOO_BIG = 56,
97 CCERR_INVALID_VA = 57,
98 CCERR_INVALID_REGION = 58,
99 CCERR_INVALID_WINDOW = 59,
100 CCERR_TOTAL_LENGTH_TOO_BIG = 60,
101 CCERR_INVALID_QP_ID = 61,
102 CCERR_ADDR_IN_USE = 62,
103 CCERR_ADDR_NOT_AVAIL = 63,
104 CCERR_NET_DOWN = 64,
105 CCERR_NET_UNREACHABLE = 65,
106 CCERR_CONN_ABORTED = 66,
107 CCERR_CONN_RESET = 67,
108 CCERR_NO_BUFS = 68,
109 CCERR_CONN_TIMEDOUT = 69,
110 CCERR_CONN_REFUSED = 70,
111 CCERR_HOST_UNREACHABLE = 71,
112 CCERR_INVALID_SEND_SGL_DEPTH = 72,
113 CCERR_INVALID_RECV_SGL_DEPTH = 73,
114 CCERR_INVALID_RDMA_WRITE_SGL_DEPTH = 74,
115 CCERR_INSUFFICIENT_PRIVILEGES = 75,
116 CCERR_STACK_ERROR = 76,
117 CCERR_INVALID_VERSION = 77,
118 CCERR_INVALID_MTU = 78,
119 CCERR_INVALID_IMAGE = 79,
120 CCERR_PENDING = 98, /* not an error; user internally by adapter */
121 CCERR_DEFER = 99, /* not an error; used internally by adapter */
122 CCERR_FAILED_WRITE = 100,
123 CCERR_FAILED_ERASE = 101,
124 CCERR_FAILED_VERIFICATION = 102,
125 CCERR_NOT_FOUND = 103,
126
127};
128
129/*
130 * CCAE_ACTIVE_CONNECT_RESULTS status result codes.
131 */
132enum c2_connect_status {
133 C2_CONN_STATUS_SUCCESS = C2_OK,
134 C2_CONN_STATUS_NO_MEM = CCERR_INSUFFICIENT_RESOURCES,
135 C2_CONN_STATUS_TIMEDOUT = CCERR_CONN_TIMEDOUT,
136 C2_CONN_STATUS_REFUSED = CCERR_CONN_REFUSED,
137 C2_CONN_STATUS_NETUNREACH = CCERR_NET_UNREACHABLE,
138 C2_CONN_STATUS_HOSTUNREACH = CCERR_HOST_UNREACHABLE,
139 C2_CONN_STATUS_INVALID_RNIC = CCERR_INVALID_RNIC,
140 C2_CONN_STATUS_INVALID_QP = CCERR_INVALID_QP,
141 C2_CONN_STATUS_INVALID_QP_STATE = CCERR_INVALID_QP_STATE,
142 C2_CONN_STATUS_REJECTED = CCERR_CONN_RESET,
143 C2_CONN_STATUS_ADDR_NOT_AVAIL = CCERR_ADDR_NOT_AVAIL,
144};
145
146/*
147 * Flash programming status codes.
148 */
149enum c2_flash_status {
150 C2_FLASH_STATUS_SUCCESS = 0x0000,
151 C2_FLASH_STATUS_VERIFY_ERR = 0x0002,
152 C2_FLASH_STATUS_IMAGE_ERR = 0x0004,
153 C2_FLASH_STATUS_ECLBS = 0x0400,
154 C2_FLASH_STATUS_PSLBS = 0x0800,
155 C2_FLASH_STATUS_VPENS = 0x1000,
156};
157
158#endif /* _C2_STATUS_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_user.h b/drivers/infiniband/hw/amso1100/c2_user.h
new file mode 100644
index 000000000000..7e9e7ad65467
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_user.h
@@ -0,0 +1,82 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 *
34 */
35
36#ifndef C2_USER_H
37#define C2_USER_H
38
39#include <linux/types.h>
40
41/*
42 * Make sure that all structs defined in this file remain laid out so
43 * that they pack the same way on 32-bit and 64-bit architectures (to
44 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
45 * In particular do not use pointer types -- pass pointers in __u64
46 * instead.
47 */
48
49struct c2_alloc_ucontext_resp {
50 __u32 qp_tab_size;
51 __u32 uarc_size;
52};
53
54struct c2_alloc_pd_resp {
55 __u32 pdn;
56 __u32 reserved;
57};
58
59struct c2_create_cq {
60 __u32 lkey;
61 __u32 pdn;
62 __u64 arm_db_page;
63 __u64 set_db_page;
64 __u32 arm_db_index;
65 __u32 set_db_index;
66};
67
68struct c2_create_cq_resp {
69 __u32 cqn;
70 __u32 reserved;
71};
72
73struct c2_create_qp {
74 __u32 lkey;
75 __u32 reserved;
76 __u64 sq_db_page;
77 __u64 rq_db_page;
78 __u32 sq_db_index;
79 __u32 rq_db_index;
80};
81
82#endif /* C2_USER_H */
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c
new file mode 100644
index 000000000000..40caeb5f41b4
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.c
@@ -0,0 +1,260 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <linux/slab.h>
34#include <linux/spinlock.h>
35
36#include "c2_vq.h"
37#include "c2_provider.h"
38
39/*
40 * Verbs Request Objects:
41 *
42 * VQ Request Objects are allocated by the kernel verbs handlers.
43 * They contain a wait object, a refcnt, an atomic bool indicating that the
44 * adapter has replied, and a copy of the verb reply work request.
45 * A pointer to the VQ Request Object is passed down in the context
46 * field of the work request message, and reflected back by the adapter
47 * in the verbs reply message. The function handle_vq() in the interrupt
48 * path will use this pointer to:
49 * 1) append a copy of the verbs reply message
50 * 2) mark that the reply is ready
51 * 3) wake up the kernel verbs handler blocked awaiting the reply.
52 *
53 *
54 * The kernel verbs handlers do a "get" to put a 2nd reference on the
55 * VQ Request object. If the kernel verbs handler exits before the adapter
56 * can respond, this extra reference will keep the VQ Request object around
57 * until the adapter's reply can be processed. The reason we need this is
58 * because a pointer to this object is stuffed into the context field of
59 * the verbs work request message, and reflected back in the reply message.
60 * It is used in the interrupt handler (handle_vq()) to wake up the appropriate
61 * kernel verb handler that is blocked awaiting the verb reply.
62 * So handle_vq() will do a "put" on the object when it's done accessing it.
63 * NOTE: If we guarantee that the kernel verb handler will never bail before
64 * getting the reply, then we don't need these refcnts.
65 *
66 *
67 * VQ Request objects are freed by the kernel verbs handlers only
68 * after the verb has been processed, or when the adapter fails and
69 * does not reply.
70 *
71 *
72 * Verbs Reply Buffers:
73 *
74 * VQ Reply bufs are local host memory copies of a
75 * outstanding Verb Request reply
76 * message. The are always allocated by the kernel verbs handlers, and _may_ be
77 * freed by either the kernel verbs handler -or- the interrupt handler. The
78 * kernel verbs handler _must_ free the repbuf, then free the vq request object
79 * in that order.
80 */
81
82int vq_init(struct c2_dev *c2dev)
83{
84 sprintf(c2dev->vq_cache_name, "c2-vq:dev%c",
85 (char) ('0' + c2dev->devnum));
86 c2dev->host_msg_cache =
87 kmem_cache_create(c2dev->vq_cache_name, c2dev->rep_vq.msg_size, 0,
88 SLAB_HWCACHE_ALIGN, NULL, NULL);
89 if (c2dev->host_msg_cache == NULL) {
90 return -ENOMEM;
91 }
92 return 0;
93}
94
95void vq_term(struct c2_dev *c2dev)
96{
97 kmem_cache_destroy(c2dev->host_msg_cache);
98}
99
100/* vq_req_alloc - allocate a VQ Request Object and initialize it.
101 * The refcnt is set to 1.
102 */
103struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev)
104{
105 struct c2_vq_req *r;
106
107 r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
108 if (r) {
109 init_waitqueue_head(&r->wait_object);
110 r->reply_msg = (u64) NULL;
111 r->event = 0;
112 r->cm_id = NULL;
113 r->qp = NULL;
114 atomic_set(&r->refcnt, 1);
115 atomic_set(&r->reply_ready, 0);
116 }
117 return r;
118}
119
120
121/* vq_req_free - free the VQ Request Object. It is assumed the verbs handler
122 * has already free the VQ Reply Buffer if it existed.
123 */
124void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
125{
126 r->reply_msg = (u64) NULL;
127 if (atomic_dec_and_test(&r->refcnt)) {
128 kfree(r);
129 }
130}
131
132/* vq_req_get - reference a VQ Request Object. Done
133 * only in the kernel verbs handlers.
134 */
135void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r)
136{
137 atomic_inc(&r->refcnt);
138}
139
140
141/* vq_req_put - dereference and potentially free a VQ Request Object.
142 *
143 * This is only called by handle_vq() on the
144 * interrupt when it is done processing
145 * a verb reply message. If the associated
146 * kernel verbs handler has already bailed,
147 * then this put will actually free the VQ
148 * Request object _and_ the VQ Reply Buffer
149 * if it exists.
150 */
151void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
152{
153 if (atomic_dec_and_test(&r->refcnt)) {
154 if (r->reply_msg != (u64) NULL)
155 vq_repbuf_free(c2dev,
156 (void *) (unsigned long) r->reply_msg);
157 kfree(r);
158 }
159}
160
161
162/*
163 * vq_repbuf_alloc - allocate a VQ Reply Buffer.
164 */
165void *vq_repbuf_alloc(struct c2_dev *c2dev)
166{
167 return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC);
168}
169
170/*
171 * vq_send_wr - post a verbs request message to the Verbs Request Queue.
172 * If a message is not available in the MQ, then block until one is available.
173 * NOTE: handle_mq() on the interrupt context will wake up threads blocked here.
174 * When the adapter drains the Verbs Request Queue,
175 * it inserts MQ index 0 in to the
176 * adapter->host activity fifo and interrupts the host.
177 */
178int vq_send_wr(struct c2_dev *c2dev, union c2wr *wr)
179{
180 void *msg;
181 wait_queue_t __wait;
182
183 /*
184 * grab adapter vq lock
185 */
186 spin_lock(&c2dev->vqlock);
187
188 /*
189 * allocate msg
190 */
191 msg = c2_mq_alloc(&c2dev->req_vq);
192
193 /*
194 * If we cannot get a msg, then we'll wait
195 * When a messages are available, the int handler will wake_up()
196 * any waiters.
197 */
198 while (msg == NULL) {
199 pr_debug("%s:%d no available msg in VQ, waiting...\n",
200 __FUNCTION__, __LINE__);
201 init_waitqueue_entry(&__wait, current);
202 add_wait_queue(&c2dev->req_vq_wo, &__wait);
203 spin_unlock(&c2dev->vqlock);
204 for (;;) {
205 set_current_state(TASK_INTERRUPTIBLE);
206 if (!c2_mq_full(&c2dev->req_vq)) {
207 break;
208 }
209 if (!signal_pending(current)) {
210 schedule_timeout(1 * HZ); /* 1 second... */
211 continue;
212 }
213 set_current_state(TASK_RUNNING);
214 remove_wait_queue(&c2dev->req_vq_wo, &__wait);
215 return -EINTR;
216 }
217 set_current_state(TASK_RUNNING);
218 remove_wait_queue(&c2dev->req_vq_wo, &__wait);
219 spin_lock(&c2dev->vqlock);
220 msg = c2_mq_alloc(&c2dev->req_vq);
221 }
222
223 /*
224 * copy wr into adapter msg
225 */
226 memcpy(msg, wr, c2dev->req_vq.msg_size);
227
228 /*
229 * post msg
230 */
231 c2_mq_produce(&c2dev->req_vq);
232
233 /*
234 * release adapter vq lock
235 */
236 spin_unlock(&c2dev->vqlock);
237 return 0;
238}
239
240
241/*
242 * vq_wait_for_reply - block until the adapter posts a Verb Reply Message.
243 */
244int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req)
245{
246 if (!wait_event_timeout(req->wait_object,
247 atomic_read(&req->reply_ready),
248 60*HZ))
249 return -ETIMEDOUT;
250
251 return 0;
252}
253
254/*
255 * vq_repbuf_free - Free a Verbs Reply Buffer.
256 */
257void vq_repbuf_free(struct c2_dev *c2dev, void *reply)
258{
259 kmem_cache_free(c2dev->host_msg_cache, reply);
260}
diff --git a/drivers/infiniband/hw/amso1100/c2_vq.h b/drivers/infiniband/hw/amso1100/c2_vq.h
new file mode 100644
index 000000000000..33805627a607
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_vq.h
@@ -0,0 +1,63 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef _C2_VQ_H_
34#define _C2_VQ_H_
35#include <linux/sched.h>
36#include "c2.h"
37#include "c2_wr.h"
38#include "c2_provider.h"
39
40struct c2_vq_req {
41 u64 reply_msg; /* ptr to reply msg */
42 wait_queue_head_t wait_object; /* wait object for vq reqs */
43 atomic_t reply_ready; /* set when reply is ready */
44 atomic_t refcnt; /* used to cancel WRs... */
45 int event;
46 struct iw_cm_id *cm_id;
47 struct c2_qp *qp;
48};
49
50extern int vq_init(struct c2_dev *c2dev);
51extern void vq_term(struct c2_dev *c2dev);
52
53extern struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev);
54extern void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *req);
55extern void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *req);
56extern void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *req);
57extern int vq_send_wr(struct c2_dev *c2dev, union c2wr * wr);
58
59extern void *vq_repbuf_alloc(struct c2_dev *c2dev);
60extern void vq_repbuf_free(struct c2_dev *c2dev, void *reply);
61
62extern int vq_wait_for_reply(struct c2_dev *c2dev, struct c2_vq_req *req);
63#endif /* _C2_VQ_H_ */
diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h
new file mode 100644
index 000000000000..3ec6c43bb0ef
--- /dev/null
+++ b/drivers/infiniband/hw/amso1100/c2_wr.h
@@ -0,0 +1,1520 @@
1/*
2 * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef _C2_WR_H_
34#define _C2_WR_H_
35
36#ifdef CCDEBUG
37#define CCWR_MAGIC 0xb07700b0
38#endif
39
40#define C2_QP_NO_ATTR_CHANGE 0xFFFFFFFF
41
42/* Maximum allowed size in bytes of private_data exchange
43 * on connect.
44 */
45#define C2_MAX_PRIVATE_DATA_SIZE 200
46
47/*
48 * These types are shared among the adapter, host, and CCIL consumer.
49 */
50enum c2_cq_notification_type {
51 C2_CQ_NOTIFICATION_TYPE_NONE = 1,
52 C2_CQ_NOTIFICATION_TYPE_NEXT,
53 C2_CQ_NOTIFICATION_TYPE_NEXT_SE
54};
55
56enum c2_setconfig_cmd {
57 C2_CFG_ADD_ADDR = 1,
58 C2_CFG_DEL_ADDR = 2,
59 C2_CFG_ADD_ROUTE = 3,
60 C2_CFG_DEL_ROUTE = 4
61};
62
63enum c2_getconfig_cmd {
64 C2_GETCONFIG_ROUTES = 1,
65 C2_GETCONFIG_ADDRS
66};
67
68/*
69 * CCIL Work Request Identifiers
70 */
71enum c2wr_ids {
72 CCWR_RNIC_OPEN = 1,
73 CCWR_RNIC_QUERY,
74 CCWR_RNIC_SETCONFIG,
75 CCWR_RNIC_GETCONFIG,
76 CCWR_RNIC_CLOSE,
77 CCWR_CQ_CREATE,
78 CCWR_CQ_QUERY,
79 CCWR_CQ_MODIFY,
80 CCWR_CQ_DESTROY,
81 CCWR_QP_CONNECT,
82 CCWR_PD_ALLOC,
83 CCWR_PD_DEALLOC,
84 CCWR_SRQ_CREATE,
85 CCWR_SRQ_QUERY,
86 CCWR_SRQ_MODIFY,
87 CCWR_SRQ_DESTROY,
88 CCWR_QP_CREATE,
89 CCWR_QP_QUERY,
90 CCWR_QP_MODIFY,
91 CCWR_QP_DESTROY,
92 CCWR_NSMR_STAG_ALLOC,
93 CCWR_NSMR_REGISTER,
94 CCWR_NSMR_PBL,
95 CCWR_STAG_DEALLOC,
96 CCWR_NSMR_REREGISTER,
97 CCWR_SMR_REGISTER,
98 CCWR_MR_QUERY,
99 CCWR_MW_ALLOC,
100 CCWR_MW_QUERY,
101 CCWR_EP_CREATE,
102 CCWR_EP_GETOPT,
103 CCWR_EP_SETOPT,
104 CCWR_EP_DESTROY,
105 CCWR_EP_BIND,
106 CCWR_EP_CONNECT,
107 CCWR_EP_LISTEN,
108 CCWR_EP_SHUTDOWN,
109 CCWR_EP_LISTEN_CREATE,
110 CCWR_EP_LISTEN_DESTROY,
111 CCWR_EP_QUERY,
112 CCWR_CR_ACCEPT,
113 CCWR_CR_REJECT,
114 CCWR_CONSOLE,
115 CCWR_TERM,
116 CCWR_FLASH_INIT,
117 CCWR_FLASH,
118 CCWR_BUF_ALLOC,
119 CCWR_BUF_FREE,
120 CCWR_FLASH_WRITE,
121 CCWR_INIT, /* WARNING: Don't move this ever again! */
122
123
124
125 /* Add new IDs here */
126
127
128
129 /*
130 * WARNING: CCWR_LAST must always be the last verbs id defined!
131 * All the preceding IDs are fixed, and must not change.
132 * You can add new IDs, but must not remove or reorder
133 * any IDs. If you do, YOU will ruin any hope of
134 * compatability between versions.
135 */
136 CCWR_LAST,
137
138 /*
139 * Start over at 1 so that arrays indexed by user wr id's
140 * begin at 1. This is OK since the verbs and user wr id's
141 * are always used on disjoint sets of queues.
142 */
143 /*
144 * The order of the CCWR_SEND_XX verbs must
145 * match the order of the RDMA_OPs
146 */
147 CCWR_SEND = 1,
148 CCWR_SEND_INV,
149 CCWR_SEND_SE,
150 CCWR_SEND_SE_INV,
151 CCWR_RDMA_WRITE,
152 CCWR_RDMA_READ,
153 CCWR_RDMA_READ_INV,
154 CCWR_MW_BIND,
155 CCWR_NSMR_FASTREG,
156 CCWR_STAG_INVALIDATE,
157 CCWR_RECV,
158 CCWR_NOP,
159 CCWR_UNIMPL,
160/* WARNING: This must always be the last user wr id defined! */
161};
162#define RDMA_SEND_OPCODE_FROM_WR_ID(x) (x+2)
163
164/*
165 * SQ/RQ Work Request Types
166 */
167enum c2_wr_type {
168 C2_WR_TYPE_SEND = CCWR_SEND,
169 C2_WR_TYPE_SEND_SE = CCWR_SEND_SE,
170 C2_WR_TYPE_SEND_INV = CCWR_SEND_INV,
171 C2_WR_TYPE_SEND_SE_INV = CCWR_SEND_SE_INV,
172 C2_WR_TYPE_RDMA_WRITE = CCWR_RDMA_WRITE,
173 C2_WR_TYPE_RDMA_READ = CCWR_RDMA_READ,
174 C2_WR_TYPE_RDMA_READ_INV_STAG = CCWR_RDMA_READ_INV,
175 C2_WR_TYPE_BIND_MW = CCWR_MW_BIND,
176 C2_WR_TYPE_FASTREG_NSMR = CCWR_NSMR_FASTREG,
177 C2_WR_TYPE_INV_STAG = CCWR_STAG_INVALIDATE,
178 C2_WR_TYPE_RECV = CCWR_RECV,
179 C2_WR_TYPE_NOP = CCWR_NOP,
180};
181
182struct c2_netaddr {
183 u32 ip_addr;
184 u32 netmask;
185 u32 mtu;
186};
187
188struct c2_route {
189 u32 ip_addr; /* 0 indicates the default route */
190 u32 netmask; /* netmask associated with dst */
191 u32 flags;
192 union {
193 u32 ipaddr; /* address of the nexthop interface */
194 u8 enaddr[6];
195 } nexthop;
196};
197
198/*
199 * A Scatter Gather Entry.
200 */
201struct c2_data_addr {
202 u32 stag;
203 u32 length;
204 u64 to;
205};
206
207/*
208 * MR and MW flags used by the consumer, RI, and RNIC.
209 */
210enum c2_mm_flags {
211 MEM_REMOTE = 0x0001, /* allow mw binds with remote access. */
212 MEM_VA_BASED = 0x0002, /* Not Zero-based */
213 MEM_PBL_COMPLETE = 0x0004, /* PBL array is complete in this msg */
214 MEM_LOCAL_READ = 0x0008, /* allow local reads */
215 MEM_LOCAL_WRITE = 0x0010, /* allow local writes */
216 MEM_REMOTE_READ = 0x0020, /* allow remote reads */
217 MEM_REMOTE_WRITE = 0x0040, /* allow remote writes */
218 MEM_WINDOW_BIND = 0x0080, /* binds allowed */
219 MEM_SHARED = 0x0100, /* set if MR is shared */
220 MEM_STAG_VALID = 0x0200 /* set if STAG is in valid state */
221};
222
223/*
224 * CCIL API ACF flags defined in terms of the low level mem flags.
225 * This minimizes translation needed in the user API
226 */
227enum c2_acf {
228 C2_ACF_LOCAL_READ = MEM_LOCAL_READ,
229 C2_ACF_LOCAL_WRITE = MEM_LOCAL_WRITE,
230 C2_ACF_REMOTE_READ = MEM_REMOTE_READ,
231 C2_ACF_REMOTE_WRITE = MEM_REMOTE_WRITE,
232 C2_ACF_WINDOW_BIND = MEM_WINDOW_BIND
233};
234
235/*
236 * Image types of objects written to flash
237 */
238#define C2_FLASH_IMG_BITFILE 1
239#define C2_FLASH_IMG_OPTION_ROM 2
240#define C2_FLASH_IMG_VPD 3
241
242/*
243 * to fix bug 1815 we define the max size allowable of the
244 * terminate message (per the IETF spec).Refer to the IETF
245 * protocal specification, section 12.1.6, page 64)
246 * The message is prefixed by 20 types of DDP info.
247 *
248 * Then the message has 6 bytes for the terminate control
249 * and DDP segment length info plus a DDP header (either
250 * 14 or 18 byts) plus 28 bytes for the RDMA header.
251 * Thus the max size in:
252 * 20 + (6 + 18 + 28) = 72
253 */
254#define C2_MAX_TERMINATE_MESSAGE_SIZE (72)
255
256/*
257 * Build String Length. It must be the same as C2_BUILD_STR_LEN in ccil_api.h
258 */
259#define WR_BUILD_STR_LEN 64
260
261/*
262 * WARNING: All of these structs need to align any 64bit types on
263 * 64 bit boundaries! 64bit types include u64 and u64.
264 */
265
266/*
267 * Clustercore Work Request Header. Be sensitive to field layout
268 * and alignment.
269 */
270struct c2wr_hdr {
271 /* wqe_count is part of the cqe. It is put here so the
272 * adapter can write to it while the wr is pending without
273 * clobbering part of the wr. This word need not be dma'd
274 * from the host to adapter by libccil, but we copy it anyway
275 * to make the memcpy to the adapter better aligned.
276 */
277 u32 wqe_count;
278
279 /* Put these fields next so that later 32- and 64-bit
280 * quantities are naturally aligned.
281 */
282 u8 id;
283 u8 result; /* adapter -> host */
284 u8 sge_count; /* host -> adapter */
285 u8 flags; /* host -> adapter */
286
287 u64 context;
288#ifdef CCMSGMAGIC
289 u32 magic;
290 u32 pad;
291#endif
292} __attribute__((packed));
293
294/*
295 *------------------------ RNIC ------------------------
296 */
297
298/*
299 * WR_RNIC_OPEN
300 */
301
302/*
303 * Flags for the RNIC WRs
304 */
305enum c2_rnic_flags {
306 RNIC_IRD_STATIC = 0x0001,
307 RNIC_ORD_STATIC = 0x0002,
308 RNIC_QP_STATIC = 0x0004,
309 RNIC_SRQ_SUPPORTED = 0x0008,
310 RNIC_PBL_BLOCK_MODE = 0x0010,
311 RNIC_SRQ_MODEL_ARRIVAL = 0x0020,
312 RNIC_CQ_OVF_DETECTED = 0x0040,
313 RNIC_PRIV_MODE = 0x0080
314};
315
316struct c2wr_rnic_open_req {
317 struct c2wr_hdr hdr;
318 u64 user_context;
319 u16 flags; /* See enum c2_rnic_flags */
320 u16 port_num;
321} __attribute__((packed));
322
323struct c2wr_rnic_open_rep {
324 struct c2wr_hdr hdr;
325 u32 rnic_handle;
326} __attribute__((packed));
327
328union c2wr_rnic_open {
329 struct c2wr_rnic_open_req req;
330 struct c2wr_rnic_open_rep rep;
331} __attribute__((packed));
332
333struct c2wr_rnic_query_req {
334 struct c2wr_hdr hdr;
335 u32 rnic_handle;
336} __attribute__((packed));
337
338/*
339 * WR_RNIC_QUERY
340 */
341struct c2wr_rnic_query_rep {
342 struct c2wr_hdr hdr;
343 u64 user_context;
344 u32 vendor_id;
345 u32 part_number;
346 u32 hw_version;
347 u32 fw_ver_major;
348 u32 fw_ver_minor;
349 u32 fw_ver_patch;
350 char fw_ver_build_str[WR_BUILD_STR_LEN];
351 u32 max_qps;
352 u32 max_qp_depth;
353 u32 max_srq_depth;
354 u32 max_send_sgl_depth;
355 u32 max_rdma_sgl_depth;
356 u32 max_cqs;
357 u32 max_cq_depth;
358 u32 max_cq_event_handlers;
359 u32 max_mrs;
360 u32 max_pbl_depth;
361 u32 max_pds;
362 u32 max_global_ird;
363 u32 max_global_ord;
364 u32 max_qp_ird;
365 u32 max_qp_ord;
366 u32 flags;
367 u32 max_mws;
368 u32 pbe_range_low;
369 u32 pbe_range_high;
370 u32 max_srqs;
371 u32 page_size;
372} __attribute__((packed));
373
374union c2wr_rnic_query {
375 struct c2wr_rnic_query_req req;
376 struct c2wr_rnic_query_rep rep;
377} __attribute__((packed));
378
379/*
380 * WR_RNIC_GETCONFIG
381 */
382
383struct c2wr_rnic_getconfig_req {
384 struct c2wr_hdr hdr;
385 u32 rnic_handle;
386 u32 option; /* see c2_getconfig_cmd_t */
387 u64 reply_buf;
388 u32 reply_buf_len;
389} __attribute__((packed)) ;
390
391struct c2wr_rnic_getconfig_rep {
392 struct c2wr_hdr hdr;
393 u32 option; /* see c2_getconfig_cmd_t */
394 u32 count_len; /* length of the number of addresses configured */
395} __attribute__((packed)) ;
396
397union c2wr_rnic_getconfig {
398 struct c2wr_rnic_getconfig_req req;
399 struct c2wr_rnic_getconfig_rep rep;
400} __attribute__((packed)) ;
401
402/*
403 * WR_RNIC_SETCONFIG
404 */
405struct c2wr_rnic_setconfig_req {
406 struct c2wr_hdr hdr;
407 u32 rnic_handle;
408 u32 option; /* See c2_setconfig_cmd_t */
409 /* variable data and pad. See c2_netaddr and c2_route */
410 u8 data[0];
411} __attribute__((packed)) ;
412
413struct c2wr_rnic_setconfig_rep {
414 struct c2wr_hdr hdr;
415} __attribute__((packed)) ;
416
417union c2wr_rnic_setconfig {
418 struct c2wr_rnic_setconfig_req req;
419 struct c2wr_rnic_setconfig_rep rep;
420} __attribute__((packed)) ;
421
422/*
423 * WR_RNIC_CLOSE
424 */
425struct c2wr_rnic_close_req {
426 struct c2wr_hdr hdr;
427 u32 rnic_handle;
428} __attribute__((packed)) ;
429
430struct c2wr_rnic_close_rep {
431 struct c2wr_hdr hdr;
432} __attribute__((packed)) ;
433
434union c2wr_rnic_close {
435 struct c2wr_rnic_close_req req;
436 struct c2wr_rnic_close_rep rep;
437} __attribute__((packed)) ;
438
439/*
440 *------------------------ CQ ------------------------
441 */
442struct c2wr_cq_create_req {
443 struct c2wr_hdr hdr;
444 u64 shared_ht;
445 u64 user_context;
446 u64 msg_pool;
447 u32 rnic_handle;
448 u32 msg_size;
449 u32 depth;
450} __attribute__((packed)) ;
451
452struct c2wr_cq_create_rep {
453 struct c2wr_hdr hdr;
454 u32 mq_index;
455 u32 adapter_shared;
456 u32 cq_handle;
457} __attribute__((packed)) ;
458
459union c2wr_cq_create {
460 struct c2wr_cq_create_req req;
461 struct c2wr_cq_create_rep rep;
462} __attribute__((packed)) ;
463
464struct c2wr_cq_modify_req {
465 struct c2wr_hdr hdr;
466 u32 rnic_handle;
467 u32 cq_handle;
468 u32 new_depth;
469 u64 new_msg_pool;
470} __attribute__((packed)) ;
471
472struct c2wr_cq_modify_rep {
473 struct c2wr_hdr hdr;
474} __attribute__((packed)) ;
475
476union c2wr_cq_modify {
477 struct c2wr_cq_modify_req req;
478 struct c2wr_cq_modify_rep rep;
479} __attribute__((packed)) ;
480
481struct c2wr_cq_destroy_req {
482 struct c2wr_hdr hdr;
483 u32 rnic_handle;
484 u32 cq_handle;
485} __attribute__((packed)) ;
486
487struct c2wr_cq_destroy_rep {
488 struct c2wr_hdr hdr;
489} __attribute__((packed)) ;
490
491union c2wr_cq_destroy {
492 struct c2wr_cq_destroy_req req;
493 struct c2wr_cq_destroy_rep rep;
494} __attribute__((packed)) ;
495
496/*
497 *------------------------ PD ------------------------
498 */
499struct c2wr_pd_alloc_req {
500 struct c2wr_hdr hdr;
501 u32 rnic_handle;
502 u32 pd_id;
503} __attribute__((packed)) ;
504
505struct c2wr_pd_alloc_rep {
506 struct c2wr_hdr hdr;
507} __attribute__((packed)) ;
508
509union c2wr_pd_alloc {
510 struct c2wr_pd_alloc_req req;
511 struct c2wr_pd_alloc_rep rep;
512} __attribute__((packed)) ;
513
514struct c2wr_pd_dealloc_req {
515 struct c2wr_hdr hdr;
516 u32 rnic_handle;
517 u32 pd_id;
518} __attribute__((packed)) ;
519
520struct c2wr_pd_dealloc_rep {
521 struct c2wr_hdr hdr;
522} __attribute__((packed)) ;
523
524union c2wr_pd_dealloc {
525 struct c2wr_pd_dealloc_req req;
526 struct c2wr_pd_dealloc_rep rep;
527} __attribute__((packed)) ;
528
529/*
530 *------------------------ SRQ ------------------------
531 */
532struct c2wr_srq_create_req {
533 struct c2wr_hdr hdr;
534 u64 shared_ht;
535 u64 user_context;
536 u32 rnic_handle;
537 u32 srq_depth;
538 u32 srq_limit;
539 u32 sgl_depth;
540 u32 pd_id;
541} __attribute__((packed)) ;
542
543struct c2wr_srq_create_rep {
544 struct c2wr_hdr hdr;
545 u32 srq_depth;
546 u32 sgl_depth;
547 u32 msg_size;
548 u32 mq_index;
549 u32 mq_start;
550 u32 srq_handle;
551} __attribute__((packed)) ;
552
553union c2wr_srq_create {
554 struct c2wr_srq_create_req req;
555 struct c2wr_srq_create_rep rep;
556} __attribute__((packed)) ;
557
558struct c2wr_srq_destroy_req {
559 struct c2wr_hdr hdr;
560 u32 rnic_handle;
561 u32 srq_handle;
562} __attribute__((packed)) ;
563
564struct c2wr_srq_destroy_rep {
565 struct c2wr_hdr hdr;
566} __attribute__((packed)) ;
567
568union c2wr_srq_destroy {
569 struct c2wr_srq_destroy_req req;
570 struct c2wr_srq_destroy_rep rep;
571} __attribute__((packed)) ;
572
573/*
574 *------------------------ QP ------------------------
575 */
576enum c2wr_qp_flags {
577 QP_RDMA_READ = 0x00000001, /* RDMA read enabled? */
578 QP_RDMA_WRITE = 0x00000002, /* RDMA write enabled? */
579 QP_MW_BIND = 0x00000004, /* MWs enabled */
580 QP_ZERO_STAG = 0x00000008, /* enabled? */
581 QP_REMOTE_TERMINATION = 0x00000010, /* remote end terminated */
582 QP_RDMA_READ_RESPONSE = 0x00000020 /* Remote RDMA read */
583 /* enabled? */
584};
585
586struct c2wr_qp_create_req {
587 struct c2wr_hdr hdr;
588 u64 shared_sq_ht;
589 u64 shared_rq_ht;
590 u64 user_context;
591 u32 rnic_handle;
592 u32 sq_cq_handle;
593 u32 rq_cq_handle;
594 u32 sq_depth;
595 u32 rq_depth;
596 u32 srq_handle;
597 u32 srq_limit;
598 u32 flags; /* see enum c2wr_qp_flags */
599 u32 send_sgl_depth;
600 u32 recv_sgl_depth;
601 u32 rdma_write_sgl_depth;
602 u32 ord;
603 u32 ird;
604 u32 pd_id;
605} __attribute__((packed)) ;
606
607struct c2wr_qp_create_rep {
608 struct c2wr_hdr hdr;
609 u32 sq_depth;
610 u32 rq_depth;
611 u32 send_sgl_depth;
612 u32 recv_sgl_depth;
613 u32 rdma_write_sgl_depth;
614 u32 ord;
615 u32 ird;
616 u32 sq_msg_size;
617 u32 sq_mq_index;
618 u32 sq_mq_start;
619 u32 rq_msg_size;
620 u32 rq_mq_index;
621 u32 rq_mq_start;
622 u32 qp_handle;
623} __attribute__((packed)) ;
624
625union c2wr_qp_create {
626 struct c2wr_qp_create_req req;
627 struct c2wr_qp_create_rep rep;
628} __attribute__((packed)) ;
629
630struct c2wr_qp_query_req {
631 struct c2wr_hdr hdr;
632 u32 rnic_handle;
633 u32 qp_handle;
634} __attribute__((packed)) ;
635
636struct c2wr_qp_query_rep {
637 struct c2wr_hdr hdr;
638 u64 user_context;
639 u32 rnic_handle;
640 u32 sq_depth;
641 u32 rq_depth;
642 u32 send_sgl_depth;
643 u32 rdma_write_sgl_depth;
644 u32 recv_sgl_depth;
645 u32 ord;
646 u32 ird;
647 u16 qp_state;
648 u16 flags; /* see c2wr_qp_flags_t */
649 u32 qp_id;
650 u32 local_addr;
651 u32 remote_addr;
652 u16 local_port;
653 u16 remote_port;
654 u32 terminate_msg_length; /* 0 if not present */
655 u8 data[0];
656 /* Terminate Message in-line here. */
657} __attribute__((packed)) ;
658
659union c2wr_qp_query {
660 struct c2wr_qp_query_req req;
661 struct c2wr_qp_query_rep rep;
662} __attribute__((packed)) ;
663
664struct c2wr_qp_modify_req {
665 struct c2wr_hdr hdr;
666 u64 stream_msg;
667 u32 stream_msg_length;
668 u32 rnic_handle;
669 u32 qp_handle;
670 u32 next_qp_state;
671 u32 ord;
672 u32 ird;
673 u32 sq_depth;
674 u32 rq_depth;
675 u32 llp_ep_handle;
676} __attribute__((packed)) ;
677
678struct c2wr_qp_modify_rep {
679 struct c2wr_hdr hdr;
680 u32 ord;
681 u32 ird;
682 u32 sq_depth;
683 u32 rq_depth;
684 u32 sq_msg_size;
685 u32 sq_mq_index;
686 u32 sq_mq_start;
687 u32 rq_msg_size;
688 u32 rq_mq_index;
689 u32 rq_mq_start;
690} __attribute__((packed)) ;
691
692union c2wr_qp_modify {
693 struct c2wr_qp_modify_req req;
694 struct c2wr_qp_modify_rep rep;
695} __attribute__((packed)) ;
696
697struct c2wr_qp_destroy_req {
698 struct c2wr_hdr hdr;
699 u32 rnic_handle;
700 u32 qp_handle;
701} __attribute__((packed)) ;
702
703struct c2wr_qp_destroy_rep {
704 struct c2wr_hdr hdr;
705} __attribute__((packed)) ;
706
707union c2wr_qp_destroy {
708 struct c2wr_qp_destroy_req req;
709 struct c2wr_qp_destroy_rep rep;
710} __attribute__((packed)) ;
711
712/*
713 * The CCWR_QP_CONNECT msg is posted on the verbs request queue. It can
714 * only be posted when a QP is in IDLE state. After the connect request is
715 * submitted to the LLP, the adapter moves the QP to CONNECT_PENDING state.
716 * No synchronous reply from adapter to this WR. The results of
717 * connection are passed back in an async event CCAE_ACTIVE_CONNECT_RESULTS
718 * See c2wr_ae_active_connect_results_t
719 */
720struct c2wr_qp_connect_req {
721 struct c2wr_hdr hdr;
722 u32 rnic_handle;
723 u32 qp_handle;
724 u32 remote_addr;
725 u16 remote_port;
726 u16 pad;
727 u32 private_data_length;
728 u8 private_data[0]; /* Private data in-line. */
729} __attribute__((packed)) ;
730
731struct c2wr_qp_connect {
732 struct c2wr_qp_connect_req req;
733 /* no synchronous reply. */
734} __attribute__((packed)) ;
735
736
737/*
738 *------------------------ MM ------------------------
739 */
740
741struct c2wr_nsmr_stag_alloc_req {
742 struct c2wr_hdr hdr;
743 u32 rnic_handle;
744 u32 pbl_depth;
745 u32 pd_id;
746 u32 flags;
747} __attribute__((packed)) ;
748
749struct c2wr_nsmr_stag_alloc_rep {
750 struct c2wr_hdr hdr;
751 u32 pbl_depth;
752 u32 stag_index;
753} __attribute__((packed)) ;
754
755union c2wr_nsmr_stag_alloc {
756 struct c2wr_nsmr_stag_alloc_req req;
757 struct c2wr_nsmr_stag_alloc_rep rep;
758} __attribute__((packed)) ;
759
760struct c2wr_nsmr_register_req {
761 struct c2wr_hdr hdr;
762 u64 va;
763 u32 rnic_handle;
764 u16 flags;
765 u8 stag_key;
766 u8 pad;
767 u32 pd_id;
768 u32 pbl_depth;
769 u32 pbe_size;
770 u32 fbo;
771 u32 length;
772 u32 addrs_length;
773 /* array of paddrs (must be aligned on a 64bit boundary) */
774 u64 paddrs[0];
775} __attribute__((packed)) ;
776
777struct c2wr_nsmr_register_rep {
778 struct c2wr_hdr hdr;
779 u32 pbl_depth;
780 u32 stag_index;
781} __attribute__((packed)) ;
782
783union c2wr_nsmr_register {
784 struct c2wr_nsmr_register_req req;
785 struct c2wr_nsmr_register_rep rep;
786} __attribute__((packed)) ;
787
788struct c2wr_nsmr_pbl_req {
789 struct c2wr_hdr hdr;
790 u32 rnic_handle;
791 u32 flags;
792 u32 stag_index;
793 u32 addrs_length;
794 /* array of paddrs (must be aligned on a 64bit boundary) */
795 u64 paddrs[0];
796} __attribute__((packed)) ;
797
798struct c2wr_nsmr_pbl_rep {
799 struct c2wr_hdr hdr;
800} __attribute__((packed)) ;
801
802union c2wr_nsmr_pbl {
803 struct c2wr_nsmr_pbl_req req;
804 struct c2wr_nsmr_pbl_rep rep;
805} __attribute__((packed)) ;
806
807struct c2wr_mr_query_req {
808 struct c2wr_hdr hdr;
809 u32 rnic_handle;
810 u32 stag_index;
811} __attribute__((packed)) ;
812
813struct c2wr_mr_query_rep {
814 struct c2wr_hdr hdr;
815 u8 stag_key;
816 u8 pad[3];
817 u32 pd_id;
818 u32 flags;
819 u32 pbl_depth;
820} __attribute__((packed)) ;
821
822union c2wr_mr_query {
823 struct c2wr_mr_query_req req;
824 struct c2wr_mr_query_rep rep;
825} __attribute__((packed)) ;
826
827struct c2wr_mw_query_req {
828 struct c2wr_hdr hdr;
829 u32 rnic_handle;
830 u32 stag_index;
831} __attribute__((packed)) ;
832
833struct c2wr_mw_query_rep {
834 struct c2wr_hdr hdr;
835 u8 stag_key;
836 u8 pad[3];
837 u32 pd_id;
838 u32 flags;
839} __attribute__((packed)) ;
840
841union c2wr_mw_query {
842 struct c2wr_mw_query_req req;
843 struct c2wr_mw_query_rep rep;
844} __attribute__((packed)) ;
845
846
847struct c2wr_stag_dealloc_req {
848 struct c2wr_hdr hdr;
849 u32 rnic_handle;
850 u32 stag_index;
851} __attribute__((packed)) ;
852
853struct c2wr_stag_dealloc_rep {
854 struct c2wr_hdr hdr;
855} __attribute__((packed)) ;
856
857union c2wr_stag_dealloc {
858 struct c2wr_stag_dealloc_req req;
859 struct c2wr_stag_dealloc_rep rep;
860} __attribute__((packed)) ;
861
862struct c2wr_nsmr_reregister_req {
863 struct c2wr_hdr hdr;
864 u64 va;
865 u32 rnic_handle;
866 u16 flags;
867 u8 stag_key;
868 u8 pad;
869 u32 stag_index;
870 u32 pd_id;
871 u32 pbl_depth;
872 u32 pbe_size;
873 u32 fbo;
874 u32 length;
875 u32 addrs_length;
876 u32 pad1;
877 /* array of paddrs (must be aligned on a 64bit boundary) */
878 u64 paddrs[0];
879} __attribute__((packed)) ;
880
881struct c2wr_nsmr_reregister_rep {
882 struct c2wr_hdr hdr;
883 u32 pbl_depth;
884 u32 stag_index;
885} __attribute__((packed)) ;
886
887union c2wr_nsmr_reregister {
888 struct c2wr_nsmr_reregister_req req;
889 struct c2wr_nsmr_reregister_rep rep;
890} __attribute__((packed)) ;
891
892struct c2wr_smr_register_req {
893 struct c2wr_hdr hdr;
894 u64 va;
895 u32 rnic_handle;
896 u16 flags;
897 u8 stag_key;
898 u8 pad;
899 u32 stag_index;
900 u32 pd_id;
901} __attribute__((packed)) ;
902
903struct c2wr_smr_register_rep {
904 struct c2wr_hdr hdr;
905 u32 stag_index;
906} __attribute__((packed)) ;
907
908union c2wr_smr_register {
909 struct c2wr_smr_register_req req;
910 struct c2wr_smr_register_rep rep;
911} __attribute__((packed)) ;
912
913struct c2wr_mw_alloc_req {
914 struct c2wr_hdr hdr;
915 u32 rnic_handle;
916 u32 pd_id;
917} __attribute__((packed)) ;
918
919struct c2wr_mw_alloc_rep {
920 struct c2wr_hdr hdr;
921 u32 stag_index;
922} __attribute__((packed)) ;
923
924union c2wr_mw_alloc {
925 struct c2wr_mw_alloc_req req;
926 struct c2wr_mw_alloc_rep rep;
927} __attribute__((packed)) ;
928
929/*
930 *------------------------ WRs -----------------------
931 */
932
933struct c2wr_user_hdr {
934 struct c2wr_hdr hdr; /* Has status and WR Type */
935} __attribute__((packed)) ;
936
937enum c2_qp_state {
938 C2_QP_STATE_IDLE = 0x01,
939 C2_QP_STATE_CONNECTING = 0x02,
940 C2_QP_STATE_RTS = 0x04,
941 C2_QP_STATE_CLOSING = 0x08,
942 C2_QP_STATE_TERMINATE = 0x10,
943 C2_QP_STATE_ERROR = 0x20,
944};
945
946/* Completion queue entry. */
947struct c2wr_ce {
948 struct c2wr_hdr hdr; /* Has status and WR Type */
949 u64 qp_user_context; /* c2_user_qp_t * */
950 u32 qp_state; /* Current QP State */
951 u32 handle; /* QPID or EP Handle */
952 u32 bytes_rcvd; /* valid for RECV WCs */
953 u32 stag;
954} __attribute__((packed)) ;
955
956
957/*
958 * Flags used for all post-sq WRs. These must fit in the flags
959 * field of the struct c2wr_hdr (eight bits).
960 */
961enum {
962 SQ_SIGNALED = 0x01,
963 SQ_READ_FENCE = 0x02,
964 SQ_FENCE = 0x04,
965};
966
967/*
968 * Common fields for all post-sq WRs. Namely the standard header and a
969 * secondary header with fields common to all post-sq WRs.
970 */
971struct c2_sq_hdr {
972 struct c2wr_user_hdr user_hdr;
973} __attribute__((packed));
974
975/*
976 * Same as above but for post-rq WRs.
977 */
978struct c2_rq_hdr {
979 struct c2wr_user_hdr user_hdr;
980} __attribute__((packed));
981
982/*
983 * use the same struct for all sends.
984 */
985struct c2wr_send_req {
986 struct c2_sq_hdr sq_hdr;
987 u32 sge_len;
988 u32 remote_stag;
989 u8 data[0]; /* SGE array */
990} __attribute__((packed));
991
992union c2wr_send {
993 struct c2wr_send_req req;
994 struct c2wr_ce rep;
995} __attribute__((packed));
996
997struct c2wr_rdma_write_req {
998 struct c2_sq_hdr sq_hdr;
999 u64 remote_to;
1000 u32 remote_stag;
1001 u32 sge_len;
1002 u8 data[0]; /* SGE array */
1003} __attribute__((packed));
1004
1005union c2wr_rdma_write {
1006 struct c2wr_rdma_write_req req;
1007 struct c2wr_ce rep;
1008} __attribute__((packed));
1009
1010struct c2wr_rdma_read_req {
1011 struct c2_sq_hdr sq_hdr;
1012 u64 local_to;
1013 u64 remote_to;
1014 u32 local_stag;
1015 u32 remote_stag;
1016 u32 length;
1017} __attribute__((packed));
1018
1019union c2wr_rdma_read {
1020 struct c2wr_rdma_read_req req;
1021 struct c2wr_ce rep;
1022} __attribute__((packed));
1023
1024struct c2wr_mw_bind_req {
1025 struct c2_sq_hdr sq_hdr;
1026 u64 va;
1027 u8 stag_key;
1028 u8 pad[3];
1029 u32 mw_stag_index;
1030 u32 mr_stag_index;
1031 u32 length;
1032 u32 flags;
1033} __attribute__((packed));
1034
1035union c2wr_mw_bind {
1036 struct c2wr_mw_bind_req req;
1037 struct c2wr_ce rep;
1038} __attribute__((packed));
1039
1040struct c2wr_nsmr_fastreg_req {
1041 struct c2_sq_hdr sq_hdr;
1042 u64 va;
1043 u8 stag_key;
1044 u8 pad[3];
1045 u32 stag_index;
1046 u32 pbe_size;
1047 u32 fbo;
1048 u32 length;
1049 u32 addrs_length;
1050 /* array of paddrs (must be aligned on a 64bit boundary) */
1051 u64 paddrs[0];
1052} __attribute__((packed));
1053
1054union c2wr_nsmr_fastreg {
1055 struct c2wr_nsmr_fastreg_req req;
1056 struct c2wr_ce rep;
1057} __attribute__((packed));
1058
1059struct c2wr_stag_invalidate_req {
1060 struct c2_sq_hdr sq_hdr;
1061 u8 stag_key;
1062 u8 pad[3];
1063 u32 stag_index;
1064} __attribute__((packed));
1065
1066union c2wr_stag_invalidate {
1067 struct c2wr_stag_invalidate_req req;
1068 struct c2wr_ce rep;
1069} __attribute__((packed));
1070
1071union c2wr_sqwr {
1072 struct c2_sq_hdr sq_hdr;
1073 struct c2wr_send_req send;
1074 struct c2wr_send_req send_se;
1075 struct c2wr_send_req send_inv;
1076 struct c2wr_send_req send_se_inv;
1077 struct c2wr_rdma_write_req rdma_write;
1078 struct c2wr_rdma_read_req rdma_read;
1079 struct c2wr_mw_bind_req mw_bind;
1080 struct c2wr_nsmr_fastreg_req nsmr_fastreg;
1081 struct c2wr_stag_invalidate_req stag_inv;
1082} __attribute__((packed));
1083
1084
1085/*
1086 * RQ WRs
1087 */
1088struct c2wr_rqwr {
1089 struct c2_rq_hdr rq_hdr;
1090 u8 data[0]; /* array of SGEs */
1091} __attribute__((packed));
1092
1093union c2wr_recv {
1094 struct c2wr_rqwr req;
1095 struct c2wr_ce rep;
1096} __attribute__((packed));
1097
1098/*
1099 * All AEs start with this header. Most AEs only need to convey the
1100 * information in the header. Some, like LLP connection events, need
1101 * more info. The union typdef c2wr_ae_t has all the possible AEs.
1102 *
1103 * hdr.context is the user_context from the rnic_open WR. NULL If this
1104 * is not affiliated with an rnic
1105 *
1106 * hdr.id is the AE identifier (eg; CCAE_REMOTE_SHUTDOWN,
1107 * CCAE_LLP_CLOSE_COMPLETE)
1108 *
1109 * resource_type is one of: C2_RES_IND_QP, C2_RES_IND_CQ, C2_RES_IND_SRQ
1110 *
1111 * user_context is the context passed down when the host created the resource.
1112 */
1113struct c2wr_ae_hdr {
1114 struct c2wr_hdr hdr;
1115 u64 user_context; /* user context for this res. */
1116 u32 resource_type; /* see enum c2_resource_indicator */
1117 u32 resource; /* handle for resource */
1118 u32 qp_state; /* current QP State */
1119} __attribute__((packed));
1120
1121/*
1122 * After submitting the CCAE_ACTIVE_CONNECT_RESULTS message on the AEQ,
1123 * the adapter moves the QP into RTS state
1124 */
1125struct c2wr_ae_active_connect_results {
1126 struct c2wr_ae_hdr ae_hdr;
1127 u32 laddr;
1128 u32 raddr;
1129 u16 lport;
1130 u16 rport;
1131 u32 private_data_length;
1132 u8 private_data[0]; /* data is in-line in the msg. */
1133} __attribute__((packed));
1134
1135/*
1136 * When connections are established by the stack (and the private data
1137 * MPA frame is received), the adapter will generate an event to the host.
1138 * The details of the connection, any private data, and the new connection
1139 * request handle is passed up via the CCAE_CONNECTION_REQUEST msg on the
1140 * AE queue:
1141 */
1142struct c2wr_ae_connection_request {
1143 struct c2wr_ae_hdr ae_hdr;
1144 u32 cr_handle; /* connreq handle (sock ptr) */
1145 u32 laddr;
1146 u32 raddr;
1147 u16 lport;
1148 u16 rport;
1149 u32 private_data_length;
1150 u8 private_data[0]; /* data is in-line in the msg. */
1151} __attribute__((packed));
1152
1153union c2wr_ae {
1154 struct c2wr_ae_hdr ae_generic;
1155 struct c2wr_ae_active_connect_results ae_active_connect_results;
1156 struct c2wr_ae_connection_request ae_connection_request;
1157} __attribute__((packed));
1158
1159struct c2wr_init_req {
1160 struct c2wr_hdr hdr;
1161 u64 hint_count;
1162 u64 q0_host_shared;
1163 u64 q1_host_shared;
1164 u64 q1_host_msg_pool;
1165 u64 q2_host_shared;
1166 u64 q2_host_msg_pool;
1167} __attribute__((packed));
1168
1169struct c2wr_init_rep {
1170 struct c2wr_hdr hdr;
1171} __attribute__((packed));
1172
1173union c2wr_init {
1174 struct c2wr_init_req req;
1175 struct c2wr_init_rep rep;
1176} __attribute__((packed));
1177
1178/*
1179 * For upgrading flash.
1180 */
1181
1182struct c2wr_flash_init_req {
1183 struct c2wr_hdr hdr;
1184 u32 rnic_handle;
1185} __attribute__((packed));
1186
1187struct c2wr_flash_init_rep {
1188 struct c2wr_hdr hdr;
1189 u32 adapter_flash_buf_offset;
1190 u32 adapter_flash_len;
1191} __attribute__((packed));
1192
1193union c2wr_flash_init {
1194 struct c2wr_flash_init_req req;
1195 struct c2wr_flash_init_rep rep;
1196} __attribute__((packed));
1197
1198struct c2wr_flash_req {
1199 struct c2wr_hdr hdr;
1200 u32 rnic_handle;
1201 u32 len;
1202} __attribute__((packed));
1203
1204struct c2wr_flash_rep {
1205 struct c2wr_hdr hdr;
1206 u32 status;
1207} __attribute__((packed));
1208
1209union c2wr_flash {
1210 struct c2wr_flash_req req;
1211 struct c2wr_flash_rep rep;
1212} __attribute__((packed));
1213
1214struct c2wr_buf_alloc_req {
1215 struct c2wr_hdr hdr;
1216 u32 rnic_handle;
1217 u32 size;
1218} __attribute__((packed));
1219
1220struct c2wr_buf_alloc_rep {
1221 struct c2wr_hdr hdr;
1222 u32 offset; /* 0 if mem not available */
1223 u32 size; /* 0 if mem not available */
1224} __attribute__((packed));
1225
1226union c2wr_buf_alloc {
1227 struct c2wr_buf_alloc_req req;
1228 struct c2wr_buf_alloc_rep rep;
1229} __attribute__((packed));
1230
1231struct c2wr_buf_free_req {
1232 struct c2wr_hdr hdr;
1233 u32 rnic_handle;
1234 u32 offset; /* Must match value from alloc */
1235 u32 size; /* Must match value from alloc */
1236} __attribute__((packed));
1237
1238struct c2wr_buf_free_rep {
1239 struct c2wr_hdr hdr;
1240} __attribute__((packed));
1241
1242union c2wr_buf_free {
1243 struct c2wr_buf_free_req req;
1244 struct c2wr_ce rep;
1245} __attribute__((packed));
1246
1247struct c2wr_flash_write_req {
1248 struct c2wr_hdr hdr;
1249 u32 rnic_handle;
1250 u32 offset;
1251 u32 size;
1252 u32 type;
1253 u32 flags;
1254} __attribute__((packed));
1255
1256struct c2wr_flash_write_rep {
1257 struct c2wr_hdr hdr;
1258 u32 status;
1259} __attribute__((packed));
1260
1261union c2wr_flash_write {
1262 struct c2wr_flash_write_req req;
1263 struct c2wr_flash_write_rep rep;
1264} __attribute__((packed));
1265
1266/*
1267 * Messages for LLP connection setup.
1268 */
1269
1270/*
1271 * Listen Request. This allocates a listening endpoint to allow passive
1272 * connection setup. Newly established LLP connections are passed up
1273 * via an AE. See c2wr_ae_connection_request_t
1274 */
1275struct c2wr_ep_listen_create_req {
1276 struct c2wr_hdr hdr;
1277 u64 user_context; /* returned in AEs. */
1278 u32 rnic_handle;
1279 u32 local_addr; /* local addr, or 0 */
1280 u16 local_port; /* 0 means "pick one" */
1281 u16 pad;
1282 u32 backlog; /* tradional tcp listen bl */
1283} __attribute__((packed));
1284
1285struct c2wr_ep_listen_create_rep {
1286 struct c2wr_hdr hdr;
1287 u32 ep_handle; /* handle to new listening ep */
1288 u16 local_port; /* resulting port... */
1289 u16 pad;
1290} __attribute__((packed));
1291
1292union c2wr_ep_listen_create {
1293 struct c2wr_ep_listen_create_req req;
1294 struct c2wr_ep_listen_create_rep rep;
1295} __attribute__((packed));
1296
1297struct c2wr_ep_listen_destroy_req {
1298 struct c2wr_hdr hdr;
1299 u32 rnic_handle;
1300 u32 ep_handle;
1301} __attribute__((packed));
1302
1303struct c2wr_ep_listen_destroy_rep {
1304 struct c2wr_hdr hdr;
1305} __attribute__((packed));
1306
1307union c2wr_ep_listen_destroy {
1308 struct c2wr_ep_listen_destroy_req req;
1309 struct c2wr_ep_listen_destroy_rep rep;
1310} __attribute__((packed));
1311
1312struct c2wr_ep_query_req {
1313 struct c2wr_hdr hdr;
1314 u32 rnic_handle;
1315 u32 ep_handle;
1316} __attribute__((packed));
1317
1318struct c2wr_ep_query_rep {
1319 struct c2wr_hdr hdr;
1320 u32 rnic_handle;
1321 u32 local_addr;
1322 u32 remote_addr;
1323 u16 local_port;
1324 u16 remote_port;
1325} __attribute__((packed));
1326
1327union c2wr_ep_query {
1328 struct c2wr_ep_query_req req;
1329 struct c2wr_ep_query_rep rep;
1330} __attribute__((packed));
1331
1332
1333/*
1334 * The host passes this down to indicate acceptance of a pending iWARP
1335 * connection. The cr_handle was obtained from the CONNECTION_REQUEST
1336 * AE passed up by the adapter. See c2wr_ae_connection_request_t.
1337 */
1338struct c2wr_cr_accept_req {
1339 struct c2wr_hdr hdr;
1340 u32 rnic_handle;
1341 u32 qp_handle; /* QP to bind to this LLP conn */
1342 u32 ep_handle; /* LLP handle to accept */
1343 u32 private_data_length;
1344 u8 private_data[0]; /* data in-line in msg. */
1345} __attribute__((packed));
1346
1347/*
1348 * adapter sends reply when private data is successfully submitted to
1349 * the LLP.
1350 */
1351struct c2wr_cr_accept_rep {
1352 struct c2wr_hdr hdr;
1353} __attribute__((packed));
1354
1355union c2wr_cr_accept {
1356 struct c2wr_cr_accept_req req;
1357 struct c2wr_cr_accept_rep rep;
1358} __attribute__((packed));
1359
1360/*
1361 * The host sends this down if a given iWARP connection request was
1362 * rejected by the consumer. The cr_handle was obtained from a
1363 * previous c2wr_ae_connection_request_t AE sent by the adapter.
1364 */
1365struct c2wr_cr_reject_req {
1366 struct c2wr_hdr hdr;
1367 u32 rnic_handle;
1368 u32 ep_handle; /* LLP handle to reject */
1369} __attribute__((packed));
1370
1371/*
1372 * Dunno if this is needed, but we'll add it for now. The adapter will
1373 * send the reject_reply after the LLP endpoint has been destroyed.
1374 */
1375struct c2wr_cr_reject_rep {
1376 struct c2wr_hdr hdr;
1377} __attribute__((packed));
1378
1379union c2wr_cr_reject {
1380 struct c2wr_cr_reject_req req;
1381 struct c2wr_cr_reject_rep rep;
1382} __attribute__((packed));
1383
1384/*
1385 * console command. Used to implement a debug console over the verbs
1386 * request and reply queues.
1387 */
1388
1389/*
1390 * Console request message. It contains:
1391 * - message hdr with id = CCWR_CONSOLE
1392 * - the physaddr/len of host memory to be used for the reply.
1393 * - the command string. eg: "netstat -s" or "zoneinfo"
1394 */
1395struct c2wr_console_req {
1396 struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */
1397 u64 reply_buf; /* pinned host buf for reply */
1398 u32 reply_buf_len; /* length of reply buffer */
1399 u8 command[0]; /* NUL terminated ascii string */
1400 /* containing the command req */
1401} __attribute__((packed));
1402
1403/*
1404 * flags used in the console reply.
1405 */
1406enum c2_console_flags {
1407 CONS_REPLY_TRUNCATED = 0x00000001 /* reply was truncated */
1408} __attribute__((packed));
1409
1410/*
1411 * Console reply message.
1412 * hdr.result contains the c2_status_t error if the reply was _not_ generated,
1413 * or C2_OK if the reply was generated.
1414 */
1415struct c2wr_console_rep {
1416 struct c2wr_hdr hdr; /* id = CCWR_CONSOLE */
1417 u32 flags;
1418} __attribute__((packed));
1419
1420union c2wr_console {
1421 struct c2wr_console_req req;
1422 struct c2wr_console_rep rep;
1423} __attribute__((packed));
1424
1425
1426/*
1427 * Giant union with all WRs. Makes life easier...
1428 */
1429union c2wr {
1430 struct c2wr_hdr hdr;
1431 struct c2wr_user_hdr user_hdr;
1432 union c2wr_rnic_open rnic_open;
1433 union c2wr_rnic_query rnic_query;
1434 union c2wr_rnic_getconfig rnic_getconfig;
1435 union c2wr_rnic_setconfig rnic_setconfig;
1436 union c2wr_rnic_close rnic_close;
1437 union c2wr_cq_create cq_create;
1438 union c2wr_cq_modify cq_modify;
1439 union c2wr_cq_destroy cq_destroy;
1440 union c2wr_pd_alloc pd_alloc;
1441 union c2wr_pd_dealloc pd_dealloc;
1442 union c2wr_srq_create srq_create;
1443 union c2wr_srq_destroy srq_destroy;
1444 union c2wr_qp_create qp_create;
1445 union c2wr_qp_query qp_query;
1446 union c2wr_qp_modify qp_modify;
1447 union c2wr_qp_destroy qp_destroy;
1448 struct c2wr_qp_connect qp_connect;
1449 union c2wr_nsmr_stag_alloc nsmr_stag_alloc;
1450 union c2wr_nsmr_register nsmr_register;
1451 union c2wr_nsmr_pbl nsmr_pbl;
1452 union c2wr_mr_query mr_query;
1453 union c2wr_mw_query mw_query;
1454 union c2wr_stag_dealloc stag_dealloc;
1455 union c2wr_sqwr sqwr;
1456 struct c2wr_rqwr rqwr;
1457 struct c2wr_ce ce;
1458 union c2wr_ae ae;
1459 union c2wr_init init;
1460 union c2wr_ep_listen_create ep_listen_create;
1461 union c2wr_ep_listen_destroy ep_listen_destroy;
1462 union c2wr_cr_accept cr_accept;
1463 union c2wr_cr_reject cr_reject;
1464 union c2wr_console console;
1465 union c2wr_flash_init flash_init;
1466 union c2wr_flash flash;
1467 union c2wr_buf_alloc buf_alloc;
1468 union c2wr_buf_free buf_free;
1469 union c2wr_flash_write flash_write;
1470} __attribute__((packed));
1471
1472
1473/*
1474 * Accessors for the wr fields that are packed together tightly to
1475 * reduce the wr message size. The wr arguments are void* so that
1476 * either a struct c2wr*, a struct c2wr_hdr*, or a pointer to any of the types
1477 * in the struct c2wr union can be passed in.
1478 */
1479static __inline__ u8 c2_wr_get_id(void *wr)
1480{
1481 return ((struct c2wr_hdr *) wr)->id;
1482}
1483static __inline__ void c2_wr_set_id(void *wr, u8 id)
1484{
1485 ((struct c2wr_hdr *) wr)->id = id;
1486}
1487static __inline__ u8 c2_wr_get_result(void *wr)
1488{
1489 return ((struct c2wr_hdr *) wr)->result;
1490}
1491static __inline__ void c2_wr_set_result(void *wr, u8 result)
1492{
1493 ((struct c2wr_hdr *) wr)->result = result;
1494}
1495static __inline__ u8 c2_wr_get_flags(void *wr)
1496{
1497 return ((struct c2wr_hdr *) wr)->flags;
1498}
1499static __inline__ void c2_wr_set_flags(void *wr, u8 flags)
1500{
1501 ((struct c2wr_hdr *) wr)->flags = flags;
1502}
1503static __inline__ u8 c2_wr_get_sge_count(void *wr)
1504{
1505 return ((struct c2wr_hdr *) wr)->sge_count;
1506}
1507static __inline__ void c2_wr_set_sge_count(void *wr, u8 sge_count)
1508{
1509 ((struct c2wr_hdr *) wr)->sge_count = sge_count;
1510}
1511static __inline__ u32 c2_wr_get_wqe_count(void *wr)
1512{
1513 return ((struct c2wr_hdr *) wr)->wqe_count;
1514}
1515static __inline__ void c2_wr_set_wqe_count(void *wr, u32 wqe_count)
1516{
1517 ((struct c2wr_hdr *) wr)->wqe_count = wqe_count;
1518}
1519
1520#endif /* _C2_WR_H_ */
diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig
new file mode 100644
index 000000000000..922389b64394
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Kconfig
@@ -0,0 +1,16 @@
1config INFINIBAND_EHCA
2 tristate "eHCA support"
3 depends on IBMEBUS && INFINIBAND
4 ---help---
5 This driver supports the IBM pSeries eHCA InfiniBand adapter.
6
7 To compile the driver as a module, choose M here. The module
8 will be called ib_ehca.
9
10config INFINIBAND_EHCA_SCALING
11 bool "Scaling support (EXPERIMENTAL)"
12 depends on IBMEBUS && INFINIBAND_EHCA && HOTPLUG_CPU && EXPERIMENTAL
13 ---help---
14 eHCA scaling support schedules the CQ callbacks to different CPUs.
15
16 To enable this feature choose Y here.
diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile
new file mode 100644
index 000000000000..74d284e46a40
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/Makefile
@@ -0,0 +1,16 @@
1# Authors: Heiko J Schick <schickhj@de.ibm.com>
2# Christoph Raisch <raisch@de.ibm.com>
3# Joachim Fenkes <fenkes@de.ibm.com>
4#
5# Copyright (c) 2005 IBM Corporation
6#
7# All rights reserved.
8#
9# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD.
10
11obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o
12
13ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \
14 ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \
15 ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o
16
diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c
new file mode 100644
index 000000000000..3bac197f9014
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_av.c
@@ -0,0 +1,271 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * adress vector functions
5 *
6 * Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
7 * Khadija Souissi <souissik@de.ibm.com>
8 * Reinhard Ernst <rernst@de.ibm.com>
9 * Christoph Raisch <raisch@de.ibm.com>
10 *
11 * Copyright (c) 2005 IBM Corporation
12 *
13 * All rights reserved.
14 *
15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
16 * BSD.
17 *
18 * OpenIB BSD License
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
22 *
23 * Redistributions of source code must retain the above copyright notice, this
24 * list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright notice,
27 * this list of conditions and the following disclaimer in the documentation
28 * and/or other materials
29 * provided with the distribution.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGE.
42 */
43
44
45#include <asm/current.h>
46
47#include "ehca_tools.h"
48#include "ehca_iverbs.h"
49#include "hcp_if.h"
50
51static struct kmem_cache *av_cache;
52
53struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
54{
55 int ret;
56 struct ehca_av *av;
57 struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
58 ib_device);
59
60 av = kmem_cache_alloc(av_cache, SLAB_KERNEL);
61 if (!av) {
62 ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p",
63 pd, ah_attr);
64 return ERR_PTR(-ENOMEM);
65 }
66
67 av->av.sl = ah_attr->sl;
68 av->av.dlid = ah_attr->dlid;
69 av->av.slid_path_bits = ah_attr->src_path_bits;
70
71 if (ehca_static_rate < 0) {
72 int ah_mult = ib_rate_to_mult(ah_attr->static_rate);
73 int ehca_mult =
74 ib_rate_to_mult(shca->sport[ah_attr->port_num].rate );
75
76 if (ah_mult >= ehca_mult)
77 av->av.ipd = 0;
78 else
79 av->av.ipd = (ah_mult > 0) ?
80 ((ehca_mult - 1) / ah_mult) : 0;
81 } else
82 av->av.ipd = ehca_static_rate;
83
84 av->av.lnh = ah_attr->ah_flags;
85 av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6);
86 av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK,
87 ah_attr->grh.traffic_class);
88 av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
89 ah_attr->grh.flow_label);
90 av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
91 ah_attr->grh.hop_limit);
92 av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B);
93 /* set sgid in grh.word_1 */
94 if (ah_attr->ah_flags & IB_AH_GRH) {
95 int rc;
96 struct ib_port_attr port_attr;
97 union ib_gid gid;
98 memset(&port_attr, 0, sizeof(port_attr));
99 rc = ehca_query_port(pd->device, ah_attr->port_num,
100 &port_attr);
101 if (rc) { /* invalid port number */
102 ret = -EINVAL;
103 ehca_err(pd->device, "Invalid port number "
104 "ehca_query_port() returned %x "
105 "pd=%p ah_attr=%p", rc, pd, ah_attr);
106 goto create_ah_exit1;
107 }
108 memset(&gid, 0, sizeof(gid));
109 rc = ehca_query_gid(pd->device,
110 ah_attr->port_num,
111 ah_attr->grh.sgid_index, &gid);
112 if (rc) {
113 ret = -EINVAL;
114 ehca_err(pd->device, "Failed to retrieve sgid "
115 "ehca_query_gid() returned %x "
116 "pd=%p ah_attr=%p", rc, pd, ah_attr);
117 goto create_ah_exit1;
118 }
119 memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
120 }
121 /* for the time being we use a hard coded PMTU of 2048 Bytes */
122 av->av.pmtu = 4;
123
124 /* dgid comes in grh.word_3 */
125 memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
126 sizeof(ah_attr->grh.dgid));
127
128 return &av->ib_ah;
129
130create_ah_exit1:
131 kmem_cache_free(av_cache, av);
132
133 return ERR_PTR(ret);
134}
135
136int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
137{
138 struct ehca_av *av;
139 struct ehca_ud_av new_ehca_av;
140 struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
141 u32 cur_pid = current->tgid;
142
143 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
144 my_pd->ownpid != cur_pid) {
145 ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
146 cur_pid, my_pd->ownpid);
147 return -EINVAL;
148 }
149
150 memset(&new_ehca_av, 0, sizeof(new_ehca_av));
151 new_ehca_av.sl = ah_attr->sl;
152 new_ehca_av.dlid = ah_attr->dlid;
153 new_ehca_av.slid_path_bits = ah_attr->src_path_bits;
154 new_ehca_av.ipd = ah_attr->static_rate;
155 new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK,
156 (ah_attr->ah_flags & IB_AH_GRH) > 0);
157 new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK,
158 ah_attr->grh.traffic_class);
159 new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK,
160 ah_attr->grh.flow_label);
161 new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK,
162 ah_attr->grh.hop_limit);
163 new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b);
164
165 /* set sgid in grh.word_1 */
166 if (ah_attr->ah_flags & IB_AH_GRH) {
167 int rc;
168 struct ib_port_attr port_attr;
169 union ib_gid gid;
170 memset(&port_attr, 0, sizeof(port_attr));
171 rc = ehca_query_port(ah->device, ah_attr->port_num,
172 &port_attr);
173 if (rc) { /* invalid port number */
174 ehca_err(ah->device, "Invalid port number "
175 "ehca_query_port() returned %x "
176 "ah=%p ah_attr=%p port_num=%x",
177 rc, ah, ah_attr, ah_attr->port_num);
178 return -EINVAL;
179 }
180 memset(&gid, 0, sizeof(gid));
181 rc = ehca_query_gid(ah->device,
182 ah_attr->port_num,
183 ah_attr->grh.sgid_index, &gid);
184 if (rc) {
185 ehca_err(ah->device, "Failed to retrieve sgid "
186 "ehca_query_gid() returned %x "
187 "ah=%p ah_attr=%p port_num=%x "
188 "sgid_index=%x",
189 rc, ah, ah_attr, ah_attr->port_num,
190 ah_attr->grh.sgid_index);
191 return -EINVAL;
192 }
193 memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
194 }
195
196 new_ehca_av.pmtu = 4; /* see also comment in create_ah() */
197
198 memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
199 sizeof(ah_attr->grh.dgid));
200
201 av = container_of(ah, struct ehca_av, ib_ah);
202 av->av = new_ehca_av;
203
204 return 0;
205}
206
207int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
208{
209 struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah);
210 struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
211 u32 cur_pid = current->tgid;
212
213 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
214 my_pd->ownpid != cur_pid) {
215 ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
216 cur_pid, my_pd->ownpid);
217 return -EINVAL;
218 }
219
220 memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3,
221 sizeof(ah_attr->grh.dgid));
222 ah_attr->sl = av->av.sl;
223
224 ah_attr->dlid = av->av.dlid;
225
226 ah_attr->src_path_bits = av->av.slid_path_bits;
227 ah_attr->static_rate = av->av.ipd;
228 ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh);
229 ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK,
230 av->av.grh.word_0);
231 ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK,
232 av->av.grh.word_0);
233 ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK,
234 av->av.grh.word_0);
235
236 return 0;
237}
238
239int ehca_destroy_ah(struct ib_ah *ah)
240{
241 struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
242 u32 cur_pid = current->tgid;
243
244 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
245 my_pd->ownpid != cur_pid) {
246 ehca_err(ah->device, "Invalid caller pid=%x ownpid=%x",
247 cur_pid, my_pd->ownpid);
248 return -EINVAL;
249 }
250
251 kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah));
252
253 return 0;
254}
255
256int ehca_init_av_cache(void)
257{
258 av_cache = kmem_cache_create("ehca_cache_av",
259 sizeof(struct ehca_av), 0,
260 SLAB_HWCACHE_ALIGN,
261 NULL, NULL);
262 if (!av_cache)
263 return -ENOMEM;
264 return 0;
265}
266
267void ehca_cleanup_av_cache(void)
268{
269 if (av_cache)
270 kmem_cache_destroy(av_cache);
271}
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
new file mode 100644
index 000000000000..1c722032319c
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -0,0 +1,346 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Struct definition for eHCA internal structures
5 *
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Christoph Raisch <raisch@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#ifndef __EHCA_CLASSES_H__
43#define __EHCA_CLASSES_H__
44
45#include "ehca_classes.h"
46#include "ipz_pt_fn.h"
47
48struct ehca_module;
49struct ehca_qp;
50struct ehca_cq;
51struct ehca_eq;
52struct ehca_mr;
53struct ehca_mw;
54struct ehca_pd;
55struct ehca_av;
56
57#ifdef CONFIG_PPC64
58#include "ehca_classes_pSeries.h"
59#endif
60
61#include <rdma/ib_verbs.h>
62#include <rdma/ib_user_verbs.h>
63
64#include "ehca_irq.h"
65
66struct ehca_eq {
67 u32 length;
68 struct ipz_queue ipz_queue;
69 struct ipz_eq_handle ipz_eq_handle;
70 struct work_struct work;
71 struct h_galpas galpas;
72 int is_initialized;
73 struct ehca_pfeq pf;
74 spinlock_t spinlock;
75 struct tasklet_struct interrupt_task;
76 u32 ist;
77};
78
79struct ehca_sport {
80 struct ib_cq *ibcq_aqp1;
81 struct ib_qp *ibqp_aqp1;
82 enum ib_rate rate;
83 enum ib_port_state port_state;
84};
85
86struct ehca_shca {
87 struct ib_device ib_device;
88 struct ibmebus_dev *ibmebus_dev;
89 u8 num_ports;
90 int hw_level;
91 struct list_head shca_list;
92 struct ipz_adapter_handle ipz_hca_handle;
93 struct ehca_sport sport[2];
94 struct ehca_eq eq;
95 struct ehca_eq neq;
96 struct ehca_mr *maxmr;
97 struct ehca_pd *pd;
98 struct h_galpas galpas;
99};
100
101struct ehca_pd {
102 struct ib_pd ib_pd;
103 struct ipz_pd fw_pd;
104 u32 ownpid;
105};
106
107struct ehca_qp {
108 struct ib_qp ib_qp;
109 u32 qp_type;
110 struct ipz_queue ipz_squeue;
111 struct ipz_queue ipz_rqueue;
112 struct h_galpas galpas;
113 u32 qkey;
114 u32 real_qp_num;
115 u32 token;
116 spinlock_t spinlock_s;
117 spinlock_t spinlock_r;
118 u32 sq_max_inline_data_size;
119 struct ipz_qp_handle ipz_qp_handle;
120 struct ehca_pfqp pf;
121 struct ib_qp_init_attr init_attr;
122 u64 uspace_squeue;
123 u64 uspace_rqueue;
124 u64 uspace_fwh;
125 struct ehca_cq *send_cq;
126 struct ehca_cq *recv_cq;
127 unsigned int sqerr_purgeflag;
128 struct hlist_node list_entries;
129};
130
131/* must be power of 2 */
132#define QP_HASHTAB_LEN 8
133
134struct ehca_cq {
135 struct ib_cq ib_cq;
136 struct ipz_queue ipz_queue;
137 struct h_galpas galpas;
138 spinlock_t spinlock;
139 u32 cq_number;
140 u32 token;
141 u32 nr_of_entries;
142 struct ipz_cq_handle ipz_cq_handle;
143 struct ehca_pfcq pf;
144 spinlock_t cb_lock;
145 u64 uspace_queue;
146 u64 uspace_fwh;
147 struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
148 struct list_head entry;
149 u32 nr_callbacks;
150 spinlock_t task_lock;
151 u32 ownpid;
152};
153
154enum ehca_mr_flag {
155 EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */
156 EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */
157};
158
159struct ehca_mr {
160 union {
161 struct ib_mr ib_mr; /* must always be first in ehca_mr */
162 struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
163 } ib;
164 spinlock_t mrlock;
165
166 enum ehca_mr_flag flags;
167 u32 num_pages; /* number of MR pages */
168 u32 num_4k; /* number of 4k "page" portions to form MR */
169 int acl; /* ACL (stored here for usage in reregister) */
170 u64 *start; /* virtual start address (stored here for */
171 /* usage in reregister) */
172 u64 size; /* size (stored here for usage in reregister) */
173 u32 fmr_page_size; /* page size for FMR */
174 u32 fmr_max_pages; /* max pages for FMR */
175 u32 fmr_max_maps; /* max outstanding maps for FMR */
176 u32 fmr_map_cnt; /* map counter for FMR */
177 /* fw specific data */
178 struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */
179 struct h_galpas galpas;
180 /* data for userspace bridge */
181 u32 nr_of_pages;
182 void *pagearray;
183};
184
185struct ehca_mw {
186 struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */
187 spinlock_t mwlock;
188
189 u8 never_bound; /* indication MW was never bound */
190 struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */
191 struct h_galpas galpas;
192};
193
194enum ehca_mr_pgi_type {
195 EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr,
196 * ehca_rereg_phys_mr,
197 * ehca_reg_internal_maxmr */
198 EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */
199 EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */
200};
201
202struct ehca_mr_pginfo {
203 enum ehca_mr_pgi_type type;
204 u64 num_pages;
205 u64 page_cnt;
206 u64 num_4k; /* number of 4k "page" portions */
207 u64 page_4k_cnt; /* counter for 4k "page" portions */
208 u64 next_4k; /* next 4k "page" portion in buffer/chunk/listelem */
209
210 /* type EHCA_MR_PGI_PHYS section */
211 int num_phys_buf;
212 struct ib_phys_buf *phys_buf_array;
213 u64 next_buf;
214
215 /* type EHCA_MR_PGI_USER section */
216 struct ib_umem *region;
217 struct ib_umem_chunk *next_chunk;
218 u64 next_nmap;
219
220 /* type EHCA_MR_PGI_FMR section */
221 u64 *page_list;
222 u64 next_listelem;
223 /* next_4k also used within EHCA_MR_PGI_FMR */
224};
225
226/* output parameters for MR/FMR hipz calls */
227struct ehca_mr_hipzout_parms {
228 struct ipz_mrmw_handle handle;
229 u32 lkey;
230 u32 rkey;
231 u64 len;
232 u64 vaddr;
233 u32 acl;
234};
235
236/* output parameters for MW hipz calls */
237struct ehca_mw_hipzout_parms {
238 struct ipz_mrmw_handle handle;
239 u32 rkey;
240};
241
242struct ehca_av {
243 struct ib_ah ib_ah;
244 struct ehca_ud_av av;
245};
246
247struct ehca_ucontext {
248 struct ib_ucontext ib_ucontext;
249};
250
251struct ehca_module *ehca_module_new(void);
252
253int ehca_module_delete(struct ehca_module *me);
254
255int ehca_eq_ctor(struct ehca_eq *eq);
256
257int ehca_eq_dtor(struct ehca_eq *eq);
258
259struct ehca_shca *ehca_shca_new(void);
260
261int ehca_shca_delete(struct ehca_shca *me);
262
263struct ehca_sport *ehca_sport_new(struct ehca_shca *anchor);
264
265int ehca_init_pd_cache(void);
266void ehca_cleanup_pd_cache(void);
267int ehca_init_cq_cache(void);
268void ehca_cleanup_cq_cache(void);
269int ehca_init_qp_cache(void);
270void ehca_cleanup_qp_cache(void);
271int ehca_init_av_cache(void);
272void ehca_cleanup_av_cache(void);
273int ehca_init_mrmw_cache(void);
274void ehca_cleanup_mrmw_cache(void);
275
276extern spinlock_t ehca_qp_idr_lock;
277extern spinlock_t ehca_cq_idr_lock;
278extern struct idr ehca_qp_idr;
279extern struct idr ehca_cq_idr;
280
281extern int ehca_static_rate;
282extern int ehca_port_act_time;
283extern int ehca_use_hp_mr;
284
285struct ipzu_queue_resp {
286 u64 queue; /* points to first queue entry */
287 u32 qe_size; /* queue entry size */
288 u32 act_nr_of_sg;
289 u32 queue_length; /* queue length allocated in bytes */
290 u32 pagesize;
291 u32 toggle_state;
292 u32 dummy; /* padding for 8 byte alignment */
293};
294
295struct ehca_create_cq_resp {
296 u32 cq_number;
297 u32 token;
298 struct ipzu_queue_resp ipz_queue;
299 struct h_galpas galpas;
300};
301
302struct ehca_create_qp_resp {
303 u32 qp_num;
304 u32 token;
305 u32 qp_type;
306 u32 qkey;
307 /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
308 u32 real_qp_num;
309 u32 dummy; /* padding for 8 byte alignment */
310 struct ipzu_queue_resp ipz_squeue;
311 struct ipzu_queue_resp ipz_rqueue;
312 struct h_galpas galpas;
313};
314
315struct ehca_alloc_cq_parms {
316 u32 nr_cqe;
317 u32 act_nr_of_entries;
318 u32 act_pages;
319 struct ipz_eq_handle eq_handle;
320};
321
322struct ehca_alloc_qp_parms {
323 int servicetype;
324 int sigtype;
325 int daqp_ctrl;
326 int max_send_sge;
327 int max_recv_sge;
328 int ud_av_l_key_ctl;
329
330 u16 act_nr_send_wqes;
331 u16 act_nr_recv_wqes;
332 u8 act_nr_recv_sges;
333 u8 act_nr_send_sges;
334
335 u32 nr_rq_pages;
336 u32 nr_sq_pages;
337
338 struct ipz_eq_handle ipz_eq_handle;
339 struct ipz_pd pd;
340};
341
342int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
343int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num);
344struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int qp_num);
345
346#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
new file mode 100644
index 000000000000..5665f213b81a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
@@ -0,0 +1,236 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * pSeries interface definitions
5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com>
7 * Christoph Raisch <raisch@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#ifndef __EHCA_CLASSES_PSERIES_H__
43#define __EHCA_CLASSES_PSERIES_H__
44
45#include "hcp_phyp.h"
46#include "ipz_pt_fn.h"
47
48
49struct ehca_pfqp {
50 struct ipz_qpt sqpt;
51 struct ipz_qpt rqpt;
52};
53
54struct ehca_pfcq {
55 struct ipz_qpt qpt;
56 u32 cqnr;
57};
58
59struct ehca_pfeq {
60 struct ipz_qpt qpt;
61 struct h_galpa galpa;
62 u32 eqnr;
63};
64
65struct ipz_adapter_handle {
66 u64 handle;
67};
68
69struct ipz_cq_handle {
70 u64 handle;
71};
72
73struct ipz_eq_handle {
74 u64 handle;
75};
76
77struct ipz_qp_handle {
78 u64 handle;
79};
80struct ipz_mrmw_handle {
81 u64 handle;
82};
83
84struct ipz_pd {
85 u32 value;
86};
87
88struct hcp_modify_qp_control_block {
89 u32 qkey; /* 00 */
90 u32 rdd; /* reliable datagram domain */
91 u32 send_psn; /* 02 */
92 u32 receive_psn; /* 03 */
93 u32 prim_phys_port; /* 04 */
94 u32 alt_phys_port; /* 05 */
95 u32 prim_p_key_idx; /* 06 */
96 u32 alt_p_key_idx; /* 07 */
97 u32 rdma_atomic_ctrl; /* 08 */
98 u32 qp_state; /* 09 */
99 u32 reserved_10; /* 10 */
100 u32 rdma_nr_atomic_resp_res; /* 11 */
101 u32 path_migration_state; /* 12 */
102 u32 rdma_atomic_outst_dest_qp; /* 13 */
103 u32 dest_qp_nr; /* 14 */
104 u32 min_rnr_nak_timer_field; /* 15 */
105 u32 service_level; /* 16 */
106 u32 send_grh_flag; /* 17 */
107 u32 retry_count; /* 18 */
108 u32 timeout; /* 19 */
109 u32 path_mtu; /* 20 */
110 u32 max_static_rate; /* 21 */
111 u32 dlid; /* 22 */
112 u32 rnr_retry_count; /* 23 */
113 u32 source_path_bits; /* 24 */
114 u32 traffic_class; /* 25 */
115 u32 hop_limit; /* 26 */
116 u32 source_gid_idx; /* 27 */
117 u32 flow_label; /* 28 */
118 u32 reserved_29; /* 29 */
119 union { /* 30 */
120 u64 dw[2];
121 u8 byte[16];
122 } dest_gid;
123 u32 service_level_al; /* 34 */
124 u32 send_grh_flag_al; /* 35 */
125 u32 retry_count_al; /* 36 */
126 u32 timeout_al; /* 37 */
127 u32 max_static_rate_al; /* 38 */
128 u32 dlid_al; /* 39 */
129 u32 rnr_retry_count_al; /* 40 */
130 u32 source_path_bits_al; /* 41 */
131 u32 traffic_class_al; /* 42 */
132 u32 hop_limit_al; /* 43 */
133 u32 source_gid_idx_al; /* 44 */
134 u32 flow_label_al; /* 45 */
135 u32 reserved_46; /* 46 */
136 u32 reserved_47; /* 47 */
137 union { /* 48 */
138 u64 dw[2];
139 u8 byte[16];
140 } dest_gid_al;
141 u32 max_nr_outst_send_wr; /* 52 */
142 u32 max_nr_outst_recv_wr; /* 53 */
143 u32 disable_ete_credit_check; /* 54 */
144 u32 qp_number; /* 55 */
145 u64 send_queue_handle; /* 56 */
146 u64 recv_queue_handle; /* 58 */
147 u32 actual_nr_sges_in_sq_wqe; /* 60 */
148 u32 actual_nr_sges_in_rq_wqe; /* 61 */
149 u32 qp_enable; /* 62 */
150 u32 curr_srq_limit; /* 63 */
151 u64 qp_aff_asyn_ev_log_reg; /* 64 */
152 u64 shared_rq_hndl; /* 66 */
153 u64 trigg_doorbell_qp_hndl; /* 68 */
154 u32 reserved_70_127[58]; /* 70 */
155};
156
157#define MQPCB_MASK_QKEY EHCA_BMASK_IBM(0,0)
158#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM(2,2)
159#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM(3,3)
160#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM(4,4)
161#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24,31)
162#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM(5,5)
163#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM(6,6)
164#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24,31)
165#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM(7,7)
166#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM(8,8)
167#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM(9,9)
168#define MQPCB_QP_STATE EHCA_BMASK_IBM(24,31)
169#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11,11)
170#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12,12)
171#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13,13)
172#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14,14)
173#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15,15)
174#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16,16)
175#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17,17)
176#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18,18)
177#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19,19)
178#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20,20)
179#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24,31)
180#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21,21)
181#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24,31)
182#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22,22)
183#define MQPCB_DLID EHCA_BMASK_IBM(16,31)
184#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23,23)
185#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29,31)
186#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24,24)
187#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25,31)
188#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25,25)
189#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24,31)
190#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26,26)
191#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24,31)
192#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27,27)
193#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24,31)
194#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28,28)
195#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12,31)
196#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30,30)
197#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31,31)
198#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28,31)
199#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32,32)
200#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31,31)
201#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33,33)
202#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31)
203#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34,34)
204#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27,31)
205#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35,35)
206#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24,31)
207#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36,36)
208#define MQPCB_DLID_AL EHCA_BMASK_IBM(16,31)
209#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37,37)
210#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29,31)
211#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38,38)
212#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25,31)
213#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39,39)
214#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24,31)
215#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40,40)
216#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24,31)
217#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41,41)
218#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24,31)
219#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42,42)
220#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12,31)
221#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44,44)
222#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45,45)
223#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16,31)
224#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46,46)
225#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16,31)
226#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47,47)
227#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31,31)
228#define MQPCB_QP_NUMBER EHCA_BMASK_IBM(8,31)
229#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48,48)
230#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31,31)
231#define MQPCB_MASK_CURR_SQR_LIMIT EHCA_BMASK_IBM(49,49)
232#define MQPCB_CURR_SQR_LIMIT EHCA_BMASK_IBM(15,31)
233#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50,50)
234#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51,51)
235
236#endif /* __EHCA_CLASSES_PSERIES_H__ */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
new file mode 100644
index 000000000000..458fe19648a1
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -0,0 +1,427 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Completion queue handling
5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
8 * Reinhard Ernst <rernst@de.ibm.com>
9 * Heiko J Schick <schickhj@de.ibm.com>
10 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
11 *
12 *
13 * Copyright (c) 2005 IBM Corporation
14 *
15 * All rights reserved.
16 *
17 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
18 * BSD.
19 *
20 * OpenIB BSD License
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions are met:
24 *
25 * Redistributions of source code must retain the above copyright notice, this
26 * list of conditions and the following disclaimer.
27 *
28 * Redistributions in binary form must reproduce the above copyright notice,
29 * this list of conditions and the following disclaimer in the documentation
30 * and/or other materials
31 * provided with the distribution.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
40 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
41 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 * POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include <asm/current.h>
47
48#include "ehca_iverbs.h"
49#include "ehca_classes.h"
50#include "ehca_irq.h"
51#include "hcp_if.h"
52
53static struct kmem_cache *cq_cache;
54
55int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
56{
57 unsigned int qp_num = qp->real_qp_num;
58 unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
59 unsigned long spl_flags;
60
61 spin_lock_irqsave(&cq->spinlock, spl_flags);
62 hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
63 spin_unlock_irqrestore(&cq->spinlock, spl_flags);
64
65 ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
66 cq->cq_number, qp_num);
67
68 return 0;
69}
70
71int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
72{
73 int ret = -EINVAL;
74 unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
75 struct hlist_node *iter;
76 struct ehca_qp *qp;
77 unsigned long spl_flags;
78
79 spin_lock_irqsave(&cq->spinlock, spl_flags);
80 hlist_for_each(iter, &cq->qp_hashtab[key]) {
81 qp = hlist_entry(iter, struct ehca_qp, list_entries);
82 if (qp->real_qp_num == real_qp_num) {
83 hlist_del(iter);
84 ehca_dbg(cq->ib_cq.device,
85 "removed qp from cq .cq_num=%x real_qp_num=%x",
86 cq->cq_number, real_qp_num);
87 ret = 0;
88 break;
89 }
90 }
91 spin_unlock_irqrestore(&cq->spinlock, spl_flags);
92 if (ret)
93 ehca_err(cq->ib_cq.device,
94 "qp not found cq_num=%x real_qp_num=%x",
95 cq->cq_number, real_qp_num);
96
97 return ret;
98}
99
100struct ehca_qp* ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num)
101{
102 struct ehca_qp *ret = NULL;
103 unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
104 struct hlist_node *iter;
105 struct ehca_qp *qp;
106 hlist_for_each(iter, &cq->qp_hashtab[key]) {
107 qp = hlist_entry(iter, struct ehca_qp, list_entries);
108 if (qp->real_qp_num == real_qp_num) {
109 ret = qp;
110 break;
111 }
112 }
113 return ret;
114}
115
116struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
117 struct ib_ucontext *context,
118 struct ib_udata *udata)
119{
120 static const u32 additional_cqe = 20;
121 struct ib_cq *cq;
122 struct ehca_cq *my_cq;
123 struct ehca_shca *shca =
124 container_of(device, struct ehca_shca, ib_device);
125 struct ipz_adapter_handle adapter_handle;
126 struct ehca_alloc_cq_parms param; /* h_call's out parameters */
127 struct h_galpa gal;
128 void *vpage;
129 u32 counter;
130 u64 rpage, cqx_fec, h_ret;
131 int ipz_rc, ret, i;
132 unsigned long flags;
133
134 if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
135 return ERR_PTR(-EINVAL);
136
137 my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL);
138 if (!my_cq) {
139 ehca_err(device, "Out of memory for ehca_cq struct device=%p",
140 device);
141 return ERR_PTR(-ENOMEM);
142 }
143
144 memset(my_cq, 0, sizeof(struct ehca_cq));
145 memset(&param, 0, sizeof(struct ehca_alloc_cq_parms));
146
147 spin_lock_init(&my_cq->spinlock);
148 spin_lock_init(&my_cq->cb_lock);
149 spin_lock_init(&my_cq->task_lock);
150 my_cq->ownpid = current->tgid;
151
152 cq = &my_cq->ib_cq;
153
154 adapter_handle = shca->ipz_hca_handle;
155 param.eq_handle = shca->eq.ipz_eq_handle;
156
157 do {
158 if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) {
159 cq = ERR_PTR(-ENOMEM);
160 ehca_err(device, "Can't reserve idr nr. device=%p",
161 device);
162 goto create_cq_exit1;
163 }
164
165 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
166 ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
167 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
168
169 } while (ret == -EAGAIN);
170
171 if (ret) {
172 cq = ERR_PTR(-ENOMEM);
173 ehca_err(device, "Can't allocate new idr entry. device=%p",
174 device);
175 goto create_cq_exit1;
176 }
177
178 /*
179 * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
180 * for receiving errors CQEs.
181 */
182 param.nr_cqe = cqe + additional_cqe;
183 h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, &param);
184
185 if (h_ret != H_SUCCESS) {
186 ehca_err(device, "hipz_h_alloc_resource_cq() failed "
187 "h_ret=%lx device=%p", h_ret, device);
188 cq = ERR_PTR(ehca2ib_return_code(h_ret));
189 goto create_cq_exit2;
190 }
191
192 ipz_rc = ipz_queue_ctor(&my_cq->ipz_queue, param.act_pages,
193 EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0);
194 if (!ipz_rc) {
195 ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
196 ipz_rc, device);
197 cq = ERR_PTR(-EINVAL);
198 goto create_cq_exit3;
199 }
200
201 for (counter = 0; counter < param.act_pages; counter++) {
202 vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
203 if (!vpage) {
204 ehca_err(device, "ipz_qpageit_get_inc() "
205 "returns NULL device=%p", device);
206 cq = ERR_PTR(-EAGAIN);
207 goto create_cq_exit4;
208 }
209 rpage = virt_to_abs(vpage);
210
211 h_ret = hipz_h_register_rpage_cq(adapter_handle,
212 my_cq->ipz_cq_handle,
213 &my_cq->pf,
214 0,
215 0,
216 rpage,
217 1,
218 my_cq->galpas.
219 kernel);
220
221 if (h_ret < H_SUCCESS) {
222 ehca_err(device, "hipz_h_register_rpage_cq() failed "
223 "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i "
224 "act_pages=%i", my_cq, my_cq->cq_number,
225 h_ret, counter, param.act_pages);
226 cq = ERR_PTR(-EINVAL);
227 goto create_cq_exit4;
228 }
229
230 if (counter == (param.act_pages - 1)) {
231 vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue);
232 if ((h_ret != H_SUCCESS) || vpage) {
233 ehca_err(device, "Registration of pages not "
234 "complete ehca_cq=%p cq_num=%x "
235 "h_ret=%lx", my_cq, my_cq->cq_number,
236 h_ret);
237 cq = ERR_PTR(-EAGAIN);
238 goto create_cq_exit4;
239 }
240 } else {
241 if (h_ret != H_PAGE_REGISTERED) {
242 ehca_err(device, "Registration of page failed "
243 "ehca_cq=%p cq_num=%x h_ret=%lx"
244 "counter=%i act_pages=%i",
245 my_cq, my_cq->cq_number,
246 h_ret, counter, param.act_pages);
247 cq = ERR_PTR(-ENOMEM);
248 goto create_cq_exit4;
249 }
250 }
251 }
252
253 ipz_qeit_reset(&my_cq->ipz_queue);
254
255 gal = my_cq->galpas.kernel;
256 cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec));
257 ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx",
258 my_cq, my_cq->cq_number, cqx_fec);
259
260 my_cq->ib_cq.cqe = my_cq->nr_of_entries =
261 param.act_nr_of_entries - additional_cqe;
262 my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff;
263
264 for (i = 0; i < QP_HASHTAB_LEN; i++)
265 INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]);
266
267 if (context) {
268 struct ipz_queue *ipz_queue = &my_cq->ipz_queue;
269 struct ehca_create_cq_resp resp;
270 struct vm_area_struct *vma;
271 memset(&resp, 0, sizeof(resp));
272 resp.cq_number = my_cq->cq_number;
273 resp.token = my_cq->token;
274 resp.ipz_queue.qe_size = ipz_queue->qe_size;
275 resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg;
276 resp.ipz_queue.queue_length = ipz_queue->queue_length;
277 resp.ipz_queue.pagesize = ipz_queue->pagesize;
278 resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
279 ret = ehca_mmap_nopage(((u64)(my_cq->token) << 32) | 0x12000000,
280 ipz_queue->queue_length,
281 (void**)&resp.ipz_queue.queue,
282 &vma);
283 if (ret) {
284 ehca_err(device, "Could not mmap queue pages");
285 cq = ERR_PTR(ret);
286 goto create_cq_exit4;
287 }
288 my_cq->uspace_queue = resp.ipz_queue.queue;
289 resp.galpas = my_cq->galpas;
290 ret = ehca_mmap_register(my_cq->galpas.user.fw_handle,
291 (void**)&resp.galpas.kernel.fw_handle,
292 &vma);
293 if (ret) {
294 ehca_err(device, "Could not mmap fw_handle");
295 cq = ERR_PTR(ret);
296 goto create_cq_exit5;
297 }
298 my_cq->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
299 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
300 ehca_err(device, "Copy to udata failed.");
301 goto create_cq_exit6;
302 }
303 }
304
305 return cq;
306
307create_cq_exit6:
308 ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
309
310create_cq_exit5:
311 ehca_munmap(my_cq->uspace_queue, my_cq->ipz_queue.queue_length);
312
313create_cq_exit4:
314 ipz_queue_dtor(&my_cq->ipz_queue);
315
316create_cq_exit3:
317 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
318 if (h_ret != H_SUCCESS)
319 ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
320 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
321
322create_cq_exit2:
323 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
324 idr_remove(&ehca_cq_idr, my_cq->token);
325 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
326
327create_cq_exit1:
328 kmem_cache_free(cq_cache, my_cq);
329
330 return cq;
331}
332
333int ehca_destroy_cq(struct ib_cq *cq)
334{
335 u64 h_ret;
336 int ret;
337 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
338 int cq_num = my_cq->cq_number;
339 struct ib_device *device = cq->device;
340 struct ehca_shca *shca = container_of(device, struct ehca_shca,
341 ib_device);
342 struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
343 u32 cur_pid = current->tgid;
344 unsigned long flags;
345
346 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
347 while (my_cq->nr_callbacks)
348 yield();
349
350 idr_remove(&ehca_cq_idr, my_cq->token);
351 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
352
353 if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
354 ehca_err(device, "Invalid caller pid=%x ownpid=%x",
355 cur_pid, my_cq->ownpid);
356 return -EINVAL;
357 }
358
359 /* un-mmap if vma alloc */
360 if (my_cq->uspace_queue ) {
361 ret = ehca_munmap(my_cq->uspace_queue,
362 my_cq->ipz_queue.queue_length);
363 if (ret)
364 ehca_err(device, "Could not munmap queue ehca_cq=%p "
365 "cq_num=%x", my_cq, cq_num);
366 ret = ehca_munmap(my_cq->uspace_fwh, EHCA_PAGESIZE);
367 if (ret)
368 ehca_err(device, "Could not munmap fwh ehca_cq=%p "
369 "cq_num=%x", my_cq, cq_num);
370 }
371
372 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
373 if (h_ret == H_R_STATE) {
374 /* cq in err: read err data and destroy it forcibly */
375 ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err "
376 "state. Try to delete it forcibly.",
377 my_cq, cq_num, my_cq->ipz_cq_handle.handle);
378 ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle);
379 h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
380 if (h_ret == H_SUCCESS)
381 ehca_dbg(device, "cq_num=%x deleted successfully.",
382 cq_num);
383 }
384 if (h_ret != H_SUCCESS) {
385 ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx "
386 "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
387 return ehca2ib_return_code(h_ret);
388 }
389 ipz_queue_dtor(&my_cq->ipz_queue);
390 kmem_cache_free(cq_cache, my_cq);
391
392 return 0;
393}
394
395int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
396{
397 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
398 u32 cur_pid = current->tgid;
399
400 if (my_cq->uspace_queue && my_cq->ownpid != cur_pid) {
401 ehca_err(cq->device, "Invalid caller pid=%x ownpid=%x",
402 cur_pid, my_cq->ownpid);
403 return -EINVAL;
404 }
405
406 /* TODO: proper resize needs to be done */
407 ehca_err(cq->device, "not implemented yet");
408
409 return -EFAULT;
410}
411
412int ehca_init_cq_cache(void)
413{
414 cq_cache = kmem_cache_create("ehca_cache_cq",
415 sizeof(struct ehca_cq), 0,
416 SLAB_HWCACHE_ALIGN,
417 NULL, NULL);
418 if (!cq_cache)
419 return -ENOMEM;
420 return 0;
421}
422
423void ehca_cleanup_cq_cache(void)
424{
425 if (cq_cache)
426 kmem_cache_destroy(cq_cache);
427}
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
new file mode 100644
index 000000000000..5281dec66f12
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -0,0 +1,185 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Event queue handling
5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
8 * Reinhard Ernst <rernst@de.ibm.com>
9 * Heiko J Schick <schickhj@de.ibm.com>
10 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
11 *
12 *
13 * Copyright (c) 2005 IBM Corporation
14 *
15 * All rights reserved.
16 *
17 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
18 * BSD.
19 *
20 * OpenIB BSD License
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions are met:
24 *
25 * Redistributions of source code must retain the above copyright notice, this
26 * list of conditions and the following disclaimer.
27 *
28 * Redistributions in binary form must reproduce the above copyright notice,
29 * this list of conditions and the following disclaimer in the documentation
30 * and/or other materials
31 * provided with the distribution.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
40 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
41 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 * POSSIBILITY OF SUCH DAMAGE.
44 */
45
46#include "ehca_classes.h"
47#include "ehca_irq.h"
48#include "ehca_iverbs.h"
49#include "ehca_qes.h"
50#include "hcp_if.h"
51#include "ipz_pt_fn.h"
52
53int ehca_create_eq(struct ehca_shca *shca,
54 struct ehca_eq *eq,
55 const enum ehca_eq_type type, const u32 length)
56{
57 u64 ret;
58 u32 nr_pages;
59 u32 i;
60 void *vpage;
61 struct ib_device *ib_dev = &shca->ib_device;
62
63 spin_lock_init(&eq->spinlock);
64 eq->is_initialized = 0;
65
66 if (type != EHCA_EQ && type != EHCA_NEQ) {
67 ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq);
68 return -EINVAL;
69 }
70 if (!length) {
71 ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq);
72 return -EINVAL;
73 }
74
75 ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
76 &eq->pf,
77 type,
78 length,
79 &eq->ipz_eq_handle,
80 &eq->length,
81 &nr_pages, &eq->ist);
82
83 if (ret != H_SUCCESS) {
84 ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
85 return -EINVAL;
86 }
87
88 ret = ipz_queue_ctor(&eq->ipz_queue, nr_pages,
89 EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0);
90 if (!ret) {
91 ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq);
92 goto create_eq_exit1;
93 }
94
95 for (i = 0; i < nr_pages; i++) {
96 u64 rpage;
97
98 if (!(vpage = ipz_qpageit_get_inc(&eq->ipz_queue))) {
99 ret = H_RESOURCE;
100 goto create_eq_exit2;
101 }
102
103 rpage = virt_to_abs(vpage);
104 ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
105 eq->ipz_eq_handle,
106 &eq->pf,
107 0, 0, rpage, 1);
108
109 if (i == (nr_pages - 1)) {
110 /* last page */
111 vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
112 if (ret != H_SUCCESS || vpage)
113 goto create_eq_exit2;
114 } else {
115 if (ret != H_PAGE_REGISTERED || !vpage)
116 goto create_eq_exit2;
117 }
118 }
119
120 ipz_qeit_reset(&eq->ipz_queue);
121
122 /* register interrupt handlers and initialize work queues */
123 if (type == EHCA_EQ) {
124 ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_eq,
125 SA_INTERRUPT, "ehca_eq",
126 (void *)shca);
127 if (ret < 0)
128 ehca_err(ib_dev, "Can't map interrupt handler.");
129
130 tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
131 } else if (type == EHCA_NEQ) {
132 ret = ibmebus_request_irq(NULL, eq->ist, ehca_interrupt_neq,
133 SA_INTERRUPT, "ehca_neq",
134 (void *)shca);
135 if (ret < 0)
136 ehca_err(ib_dev, "Can't map interrupt handler.");
137
138 tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
139 }
140
141 eq->is_initialized = 1;
142
143 return 0;
144
145create_eq_exit2:
146 ipz_queue_dtor(&eq->ipz_queue);
147
148create_eq_exit1:
149 hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
150
151 return -EINVAL;
152}
153
154void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq)
155{
156 unsigned long flags;
157 void *eqe;
158
159 spin_lock_irqsave(&eq->spinlock, flags);
160 eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue);
161 spin_unlock_irqrestore(&eq->spinlock, flags);
162
163 return eqe;
164}
165
166int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
167{
168 unsigned long flags;
169 u64 h_ret;
170
171 spin_lock_irqsave(&eq->spinlock, flags);
172 ibmebus_free_irq(NULL, eq->ist, (void *)shca);
173
174 h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
175
176 spin_unlock_irqrestore(&eq->spinlock, flags);
177
178 if (h_ret != H_SUCCESS) {
179 ehca_err(&shca->ib_device, "Can't free EQ resources.");
180 return -EINVAL;
181 }
182 ipz_queue_dtor(&eq->ipz_queue);
183
184 return 0;
185}
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
new file mode 100644
index 000000000000..5eae6ac48425
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -0,0 +1,241 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * HCA query functions
5 *
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Christoph Raisch <raisch@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "ehca_tools.h"
43#include "hcp_if.h"
44
45int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
46{
47 int ret = 0;
48 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
49 ib_device);
50 struct hipz_query_hca *rblock;
51
52 rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
53 if (!rblock) {
54 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
55 return -ENOMEM;
56 }
57
58 if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
59 ehca_err(&shca->ib_device, "Can't query device properties");
60 ret = -EINVAL;
61 goto query_device1;
62 }
63
64 memset(props, 0, sizeof(struct ib_device_attr));
65 props->fw_ver = rblock->hw_ver;
66 props->max_mr_size = rblock->max_mr_size;
67 props->vendor_id = rblock->vendor_id >> 8;
68 props->vendor_part_id = rblock->vendor_part_id >> 16;
69 props->hw_ver = rblock->hw_ver;
70 props->max_qp = min_t(int, rblock->max_qp, INT_MAX);
71 props->max_qp_wr = min_t(int, rblock->max_wqes_wq, INT_MAX);
72 props->max_sge = min_t(int, rblock->max_sge, INT_MAX);
73 props->max_sge_rd = min_t(int, rblock->max_sge_rd, INT_MAX);
74 props->max_cq = min_t(int, rblock->max_cq, INT_MAX);
75 props->max_cqe = min_t(int, rblock->max_cqe, INT_MAX);
76 props->max_mr = min_t(int, rblock->max_mr, INT_MAX);
77 props->max_mw = min_t(int, rblock->max_mw, INT_MAX);
78 props->max_pd = min_t(int, rblock->max_pd, INT_MAX);
79 props->max_ah = min_t(int, rblock->max_ah, INT_MAX);
80 props->max_fmr = min_t(int, rblock->max_mr, INT_MAX);
81 props->max_srq = 0;
82 props->max_srq_wr = 0;
83 props->max_srq_sge = 0;
84 props->max_pkeys = 16;
85 props->local_ca_ack_delay
86 = rblock->local_ca_ack_delay;
87 props->max_raw_ipv6_qp
88 = min_t(int, rblock->max_raw_ipv6_qp, INT_MAX);
89 props->max_raw_ethy_qp
90 = min_t(int, rblock->max_raw_ethy_qp, INT_MAX);
91 props->max_mcast_grp
92 = min_t(int, rblock->max_mcast_grp, INT_MAX);
93 props->max_mcast_qp_attach
94 = min_t(int, rblock->max_mcast_qp_attach, INT_MAX);
95 props->max_total_mcast_qp_attach
96 = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
97
98query_device1:
99 kfree(rblock);
100
101 return ret;
102}
103
104int ehca_query_port(struct ib_device *ibdev,
105 u8 port, struct ib_port_attr *props)
106{
107 int ret = 0;
108 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
109 ib_device);
110 struct hipz_query_port *rblock;
111
112 rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
113 if (!rblock) {
114 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
115 return -ENOMEM;
116 }
117
118 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
119 ehca_err(&shca->ib_device, "Can't query port properties");
120 ret = -EINVAL;
121 goto query_port1;
122 }
123
124 memset(props, 0, sizeof(struct ib_port_attr));
125 props->state = rblock->state;
126
127 switch (rblock->max_mtu) {
128 case 0x1:
129 props->active_mtu = props->max_mtu = IB_MTU_256;
130 break;
131 case 0x2:
132 props->active_mtu = props->max_mtu = IB_MTU_512;
133 break;
134 case 0x3:
135 props->active_mtu = props->max_mtu = IB_MTU_1024;
136 break;
137 case 0x4:
138 props->active_mtu = props->max_mtu = IB_MTU_2048;
139 break;
140 case 0x5:
141 props->active_mtu = props->max_mtu = IB_MTU_4096;
142 break;
143 default:
144 ehca_err(&shca->ib_device, "Unknown MTU size: %x.",
145 rblock->max_mtu);
146 break;
147 }
148
149 props->gid_tbl_len = rblock->gid_tbl_len;
150 props->max_msg_sz = rblock->max_msg_sz;
151 props->bad_pkey_cntr = rblock->bad_pkey_cntr;
152 props->qkey_viol_cntr = rblock->qkey_viol_cntr;
153 props->pkey_tbl_len = rblock->pkey_tbl_len;
154 props->lid = rblock->lid;
155 props->sm_lid = rblock->sm_lid;
156 props->lmc = rblock->lmc;
157 props->sm_sl = rblock->sm_sl;
158 props->subnet_timeout = rblock->subnet_timeout;
159 props->init_type_reply = rblock->init_type_reply;
160
161 props->active_width = IB_WIDTH_12X;
162 props->active_speed = 0x1;
163
164query_port1:
165 kfree(rblock);
166
167 return ret;
168}
169
170int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
171{
172 int ret = 0;
173 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device);
174 struct hipz_query_port *rblock;
175
176 if (index > 16) {
177 ehca_err(&shca->ib_device, "Invalid index: %x.", index);
178 return -EINVAL;
179 }
180
181 rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
182 if (!rblock) {
183 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
184 return -ENOMEM;
185 }
186
187 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
188 ehca_err(&shca->ib_device, "Can't query port properties");
189 ret = -EINVAL;
190 goto query_pkey1;
191 }
192
193 memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16));
194
195query_pkey1:
196 kfree(rblock);
197
198 return ret;
199}
200
201int ehca_query_gid(struct ib_device *ibdev, u8 port,
202 int index, union ib_gid *gid)
203{
204 int ret = 0;
205 struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
206 ib_device);
207 struct hipz_query_port *rblock;
208
209 if (index > 255) {
210 ehca_err(&shca->ib_device, "Invalid index: %x.", index);
211 return -EINVAL;
212 }
213
214 rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
215 if (!rblock) {
216 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
217 return -ENOMEM;
218 }
219
220 if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
221 ehca_err(&shca->ib_device, "Can't query port properties");
222 ret = -EINVAL;
223 goto query_gid1;
224 }
225
226 memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64));
227 memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64));
228
229query_gid1:
230 kfree(rblock);
231
232 return ret;
233}
234
235int ehca_modify_port(struct ib_device *ibdev,
236 u8 port, int port_modify_mask,
237 struct ib_port_modify *props)
238{
239 /* Not implemented yet */
240 return -EFAULT;
241}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
new file mode 100644
index 000000000000..2a65b5be1979
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -0,0 +1,762 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Functions for EQs, NEQs and interrupts
5 *
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "ehca_classes.h"
43#include "ehca_irq.h"
44#include "ehca_iverbs.h"
45#include "ehca_tools.h"
46#include "hcp_if.h"
47#include "hipz_fns.h"
48
49#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
50#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31)
51#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7)
52#define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31)
53#define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31)
54#define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63)
55#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63)
56
57#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
58#define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7)
59#define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15)
60#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
61
62#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
63#define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
64
65#ifdef CONFIG_INFINIBAND_EHCA_SCALING
66
67static void queue_comp_task(struct ehca_cq *__cq);
68
69static struct ehca_comp_pool* pool;
70static struct notifier_block comp_pool_callback_nb;
71
72#endif
73
74static inline void comp_event_callback(struct ehca_cq *cq)
75{
76 if (!cq->ib_cq.comp_handler)
77 return;
78
79 spin_lock(&cq->cb_lock);
80 cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context);
81 spin_unlock(&cq->cb_lock);
82
83 return;
84}
85
86static void print_error_data(struct ehca_shca * shca, void* data,
87 u64* rblock, int length)
88{
89 u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
90 u64 resource = rblock[1];
91
92 switch (type) {
93 case 0x1: /* Queue Pair */
94 {
95 struct ehca_qp *qp = (struct ehca_qp*)data;
96
97 /* only print error data if AER is set */
98 if (rblock[6] == 0)
99 return;
100
101 ehca_err(&shca->ib_device,
102 "QP 0x%x (resource=%lx) has errors.",
103 qp->ib_qp.qp_num, resource);
104 break;
105 }
106 case 0x4: /* Completion Queue */
107 {
108 struct ehca_cq *cq = (struct ehca_cq*)data;
109
110 ehca_err(&shca->ib_device,
111 "CQ 0x%x (resource=%lx) has errors.",
112 cq->cq_number, resource);
113 break;
114 }
115 default:
116 ehca_err(&shca->ib_device,
117 "Unknown errror type: %lx on %s.",
118 type, shca->ib_device.name);
119 break;
120 }
121
122 ehca_err(&shca->ib_device, "Error data is available: %lx.", resource);
123 ehca_err(&shca->ib_device, "EHCA ----- error data begin "
124 "---------------------------------------------------");
125 ehca_dmp(rblock, length, "resource=%lx", resource);
126 ehca_err(&shca->ib_device, "EHCA ----- error data end "
127 "----------------------------------------------------");
128
129 return;
130}
131
132int ehca_error_data(struct ehca_shca *shca, void *data,
133 u64 resource)
134{
135
136 unsigned long ret;
137 u64 *rblock;
138 unsigned long block_count;
139
140 rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
141 if (!rblock) {
142 ehca_err(&shca->ib_device, "Cannot allocate rblock memory.");
143 ret = -ENOMEM;
144 goto error_data1;
145 }
146
147 ret = hipz_h_error_data(shca->ipz_hca_handle,
148 resource,
149 rblock,
150 &block_count);
151
152 if (ret == H_R_STATE) {
153 ehca_err(&shca->ib_device,
154 "No error data is available: %lx.", resource);
155 }
156 else if (ret == H_SUCCESS) {
157 int length;
158
159 length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]);
160
161 if (length > PAGE_SIZE)
162 length = PAGE_SIZE;
163
164 print_error_data(shca, data, rblock, length);
165 }
166 else {
167 ehca_err(&shca->ib_device,
168 "Error data could not be fetched: %lx", resource);
169 }
170
171 kfree(rblock);
172
173error_data1:
174 return ret;
175
176}
177
178static void qp_event_callback(struct ehca_shca *shca,
179 u64 eqe,
180 enum ib_event_type event_type)
181{
182 struct ib_event event;
183 struct ehca_qp *qp;
184 unsigned long flags;
185 u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
186
187 spin_lock_irqsave(&ehca_qp_idr_lock, flags);
188 qp = idr_find(&ehca_qp_idr, token);
189 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
190
191
192 if (!qp)
193 return;
194
195 ehca_error_data(shca, qp, qp->ipz_qp_handle.handle);
196
197 if (!qp->ib_qp.event_handler)
198 return;
199
200 event.device = &shca->ib_device;
201 event.event = event_type;
202 event.element.qp = &qp->ib_qp;
203
204 qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
205
206 return;
207}
208
209static void cq_event_callback(struct ehca_shca *shca,
210 u64 eqe)
211{
212 struct ehca_cq *cq;
213 unsigned long flags;
214 u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
215
216 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
217 cq = idr_find(&ehca_cq_idr, token);
218 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
219
220 if (!cq)
221 return;
222
223 ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
224
225 return;
226}
227
228static void parse_identifier(struct ehca_shca *shca, u64 eqe)
229{
230 u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe);
231
232 switch (identifier) {
233 case 0x02: /* path migrated */
234 qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG);
235 break;
236 case 0x03: /* communication established */
237 qp_event_callback(shca, eqe, IB_EVENT_COMM_EST);
238 break;
239 case 0x04: /* send queue drained */
240 qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED);
241 break;
242 case 0x05: /* QP error */
243 case 0x06: /* QP error */
244 qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL);
245 break;
246 case 0x07: /* CQ error */
247 case 0x08: /* CQ error */
248 cq_event_callback(shca, eqe);
249 break;
250 case 0x09: /* MRMWPTE error */
251 ehca_err(&shca->ib_device, "MRMWPTE error.");
252 break;
253 case 0x0A: /* port event */
254 ehca_err(&shca->ib_device, "Port event.");
255 break;
256 case 0x0B: /* MR access error */
257 ehca_err(&shca->ib_device, "MR access error.");
258 break;
259 case 0x0C: /* EQ error */
260 ehca_err(&shca->ib_device, "EQ error.");
261 break;
262 case 0x0D: /* P/Q_Key mismatch */
263 ehca_err(&shca->ib_device, "P/Q_Key mismatch.");
264 break;
265 case 0x10: /* sampling complete */
266 ehca_err(&shca->ib_device, "Sampling complete.");
267 break;
268 case 0x11: /* unaffiliated access error */
269 ehca_err(&shca->ib_device, "Unaffiliated access error.");
270 break;
271 case 0x12: /* path migrating error */
272 ehca_err(&shca->ib_device, "Path migration error.");
273 break;
274 case 0x13: /* interface trace stopped */
275 ehca_err(&shca->ib_device, "Interface trace stopped.");
276 break;
277 case 0x14: /* first error capture info available */
278 default:
279 ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.",
280 identifier, shca->ib_device.name);
281 break;
282 }
283
284 return;
285}
286
287static void parse_ec(struct ehca_shca *shca, u64 eqe)
288{
289 struct ib_event event;
290 u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
291 u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
292
293 switch (ec) {
294 case 0x30: /* port availability change */
295 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
296 ehca_info(&shca->ib_device,
297 "port %x is active.", port);
298 event.device = &shca->ib_device;
299 event.event = IB_EVENT_PORT_ACTIVE;
300 event.element.port_num = port;
301 shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
302 ib_dispatch_event(&event);
303 } else {
304 ehca_info(&shca->ib_device,
305 "port %x is inactive.", port);
306 event.device = &shca->ib_device;
307 event.event = IB_EVENT_PORT_ERR;
308 event.element.port_num = port;
309 shca->sport[port - 1].port_state = IB_PORT_DOWN;
310 ib_dispatch_event(&event);
311 }
312 break;
313 case 0x31:
314 /* port configuration change
315 * disruptive change is caused by
316 * LID, PKEY or SM change
317 */
318 ehca_warn(&shca->ib_device,
319 "disruptive port %x configuration change", port);
320
321 ehca_info(&shca->ib_device,
322 "port %x is inactive.", port);
323 event.device = &shca->ib_device;
324 event.event = IB_EVENT_PORT_ERR;
325 event.element.port_num = port;
326 shca->sport[port - 1].port_state = IB_PORT_DOWN;
327 ib_dispatch_event(&event);
328
329 ehca_info(&shca->ib_device,
330 "port %x is active.", port);
331 event.device = &shca->ib_device;
332 event.event = IB_EVENT_PORT_ACTIVE;
333 event.element.port_num = port;
334 shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
335 ib_dispatch_event(&event);
336 break;
337 case 0x32: /* adapter malfunction */
338 ehca_err(&shca->ib_device, "Adapter malfunction.");
339 break;
340 case 0x33: /* trace stopped */
341 ehca_err(&shca->ib_device, "Traced stopped.");
342 break;
343 default:
344 ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
345 ec, shca->ib_device.name);
346 break;
347 }
348
349 return;
350}
351
352static inline void reset_eq_pending(struct ehca_cq *cq)
353{
354 u64 CQx_EP;
355 struct h_galpa gal = cq->galpas.kernel;
356
357 hipz_galpa_store_cq(gal, cqx_ep, 0x0);
358 CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep));
359
360 return;
361}
362
363irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs)
364{
365 struct ehca_shca *shca = (struct ehca_shca*)dev_id;
366
367 tasklet_hi_schedule(&shca->neq.interrupt_task);
368
369 return IRQ_HANDLED;
370}
371
372void ehca_tasklet_neq(unsigned long data)
373{
374 struct ehca_shca *shca = (struct ehca_shca*)data;
375 struct ehca_eqe *eqe;
376 u64 ret;
377
378 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
379
380 while (eqe) {
381 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry))
382 parse_ec(shca, eqe->entry);
383
384 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq);
385 }
386
387 ret = hipz_h_reset_event(shca->ipz_hca_handle,
388 shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL);
389
390 if (ret != H_SUCCESS)
391 ehca_err(&shca->ib_device, "Can't clear notification events.");
392
393 return;
394}
395
396irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs)
397{
398 struct ehca_shca *shca = (struct ehca_shca*)dev_id;
399
400 tasklet_hi_schedule(&shca->eq.interrupt_task);
401
402 return IRQ_HANDLED;
403}
404
405void ehca_tasklet_eq(unsigned long data)
406{
407 struct ehca_shca *shca = (struct ehca_shca*)data;
408 struct ehca_eqe *eqe;
409 int int_state;
410 int query_cnt = 0;
411
412 do {
413 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
414
415 if ((shca->hw_level >= 2) && eqe)
416 int_state = 1;
417 else
418 int_state = 0;
419
420 while ((int_state == 1) || eqe) {
421 while (eqe) {
422 u64 eqe_value = eqe->entry;
423
424 ehca_dbg(&shca->ib_device,
425 "eqe_value=%lx", eqe_value);
426
427 /* TODO: better structure */
428 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT,
429 eqe_value)) {
430 unsigned long flags;
431 u32 token;
432 struct ehca_cq *cq;
433
434 ehca_dbg(&shca->ib_device,
435 "... completion event");
436 token =
437 EHCA_BMASK_GET(EQE_CQ_TOKEN,
438 eqe_value);
439 spin_lock_irqsave(&ehca_cq_idr_lock,
440 flags);
441 cq = idr_find(&ehca_cq_idr, token);
442
443 if (cq == NULL) {
444 spin_unlock(&ehca_cq_idr_lock);
445 break;
446 }
447
448 reset_eq_pending(cq);
449#ifdef CONFIG_INFINIBAND_EHCA_SCALING
450 queue_comp_task(cq);
451 spin_unlock_irqrestore(&ehca_cq_idr_lock,
452 flags);
453#else
454 spin_unlock_irqrestore(&ehca_cq_idr_lock,
455 flags);
456 comp_event_callback(cq);
457#endif
458 } else {
459 ehca_dbg(&shca->ib_device,
460 "... non completion event");
461 parse_identifier(shca, eqe_value);
462 }
463 eqe =
464 (struct ehca_eqe *)ehca_poll_eq(shca,
465 &shca->eq);
466 }
467
468 if (shca->hw_level >= 2) {
469 int_state =
470 hipz_h_query_int_state(shca->ipz_hca_handle,
471 shca->eq.ist);
472 query_cnt++;
473 iosync();
474 if (query_cnt >= 100) {
475 query_cnt = 0;
476 int_state = 0;
477 }
478 }
479 eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq);
480
481 }
482 } while (int_state != 0);
483
484 return;
485}
486
487#ifdef CONFIG_INFINIBAND_EHCA_SCALING
488
489static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
490{
491 unsigned long flags_last_cpu;
492
493 if (ehca_debug_level)
494 ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
495
496 spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
497 pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
498 if (pool->last_cpu == NR_CPUS)
499 pool->last_cpu = first_cpu(cpu_online_map);
500 spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
501
502 return pool->last_cpu;
503}
504
505static void __queue_comp_task(struct ehca_cq *__cq,
506 struct ehca_cpu_comp_task *cct)
507{
508 unsigned long flags_cct;
509 unsigned long flags_cq;
510
511 spin_lock_irqsave(&cct->task_lock, flags_cct);
512 spin_lock_irqsave(&__cq->task_lock, flags_cq);
513
514 if (__cq->nr_callbacks == 0) {
515 __cq->nr_callbacks++;
516 list_add_tail(&__cq->entry, &cct->cq_list);
517 cct->cq_jobs++;
518 wake_up(&cct->wait_queue);
519 }
520 else
521 __cq->nr_callbacks++;
522
523 spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
524 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
525}
526
527static void queue_comp_task(struct ehca_cq *__cq)
528{
529 int cpu;
530 int cpu_id;
531 struct ehca_cpu_comp_task *cct;
532
533 cpu = get_cpu();
534 cpu_id = find_next_online_cpu(pool);
535
536 BUG_ON(!cpu_online(cpu_id));
537
538 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
539
540 if (cct->cq_jobs > 0) {
541 cpu_id = find_next_online_cpu(pool);
542 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
543 }
544
545 __queue_comp_task(__cq, cct);
546
547 put_cpu();
548
549 return;
550}
551
552static void run_comp_task(struct ehca_cpu_comp_task* cct)
553{
554 struct ehca_cq *cq;
555 unsigned long flags_cct;
556 unsigned long flags_cq;
557
558 spin_lock_irqsave(&cct->task_lock, flags_cct);
559
560 while (!list_empty(&cct->cq_list)) {
561 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
562 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
563 comp_event_callback(cq);
564 spin_lock_irqsave(&cct->task_lock, flags_cct);
565
566 spin_lock_irqsave(&cq->task_lock, flags_cq);
567 cq->nr_callbacks--;
568 if (cq->nr_callbacks == 0) {
569 list_del_init(cct->cq_list.next);
570 cct->cq_jobs--;
571 }
572 spin_unlock_irqrestore(&cq->task_lock, flags_cq);
573
574 }
575
576 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
577
578 return;
579}
580
581static int comp_task(void *__cct)
582{
583 struct ehca_cpu_comp_task* cct = __cct;
584 DECLARE_WAITQUEUE(wait, current);
585
586 set_current_state(TASK_INTERRUPTIBLE);
587 while(!kthread_should_stop()) {
588 add_wait_queue(&cct->wait_queue, &wait);
589
590 if (list_empty(&cct->cq_list))
591 schedule();
592 else
593 __set_current_state(TASK_RUNNING);
594
595 remove_wait_queue(&cct->wait_queue, &wait);
596
597 if (!list_empty(&cct->cq_list))
598 run_comp_task(__cct);
599
600 set_current_state(TASK_INTERRUPTIBLE);
601 }
602 __set_current_state(TASK_RUNNING);
603
604 return 0;
605}
606
607static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
608 int cpu)
609{
610 struct ehca_cpu_comp_task *cct;
611
612 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
613 spin_lock_init(&cct->task_lock);
614 INIT_LIST_HEAD(&cct->cq_list);
615 init_waitqueue_head(&cct->wait_queue);
616 cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
617
618 return cct->task;
619}
620
621static void destroy_comp_task(struct ehca_comp_pool *pool,
622 int cpu)
623{
624 struct ehca_cpu_comp_task *cct;
625 struct task_struct *task;
626 unsigned long flags_cct;
627
628 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
629
630 spin_lock_irqsave(&cct->task_lock, flags_cct);
631
632 task = cct->task;
633 cct->task = NULL;
634 cct->cq_jobs = 0;
635
636 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
637
638 if (task)
639 kthread_stop(task);
640
641 return;
642}
643
644static void take_over_work(struct ehca_comp_pool *pool,
645 int cpu)
646{
647 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
648 LIST_HEAD(list);
649 struct ehca_cq *cq;
650 unsigned long flags_cct;
651
652 spin_lock_irqsave(&cct->task_lock, flags_cct);
653
654 list_splice_init(&cct->cq_list, &list);
655
656 while(!list_empty(&list)) {
657 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
658
659 list_del(&cq->entry);
660 __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
661 smp_processor_id()));
662 }
663
664 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
665
666}
667
668static int comp_pool_callback(struct notifier_block *nfb,
669 unsigned long action,
670 void *hcpu)
671{
672 unsigned int cpu = (unsigned long)hcpu;
673 struct ehca_cpu_comp_task *cct;
674
675 switch (action) {
676 case CPU_UP_PREPARE:
677 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
678 if(!create_comp_task(pool, cpu)) {
679 ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
680 return NOTIFY_BAD;
681 }
682 break;
683 case CPU_UP_CANCELED:
684 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
685 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
686 kthread_bind(cct->task, any_online_cpu(cpu_online_map));
687 destroy_comp_task(pool, cpu);
688 break;
689 case CPU_ONLINE:
690 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
691 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
692 kthread_bind(cct->task, cpu);
693 wake_up_process(cct->task);
694 break;
695 case CPU_DOWN_PREPARE:
696 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
697 break;
698 case CPU_DOWN_FAILED:
699 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
700 break;
701 case CPU_DEAD:
702 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
703 destroy_comp_task(pool, cpu);
704 take_over_work(pool, cpu);
705 break;
706 }
707
708 return NOTIFY_OK;
709}
710
711#endif
712
713int ehca_create_comp_pool(void)
714{
715#ifdef CONFIG_INFINIBAND_EHCA_SCALING
716 int cpu;
717 struct task_struct *task;
718
719 pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL);
720 if (pool == NULL)
721 return -ENOMEM;
722
723 spin_lock_init(&pool->last_cpu_lock);
724 pool->last_cpu = any_online_cpu(cpu_online_map);
725
726 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
727 if (pool->cpu_comp_tasks == NULL) {
728 kfree(pool);
729 return -EINVAL;
730 }
731
732 for_each_online_cpu(cpu) {
733 task = create_comp_task(pool, cpu);
734 if (task) {
735 kthread_bind(task, cpu);
736 wake_up_process(task);
737 }
738 }
739
740 comp_pool_callback_nb.notifier_call = comp_pool_callback;
741 comp_pool_callback_nb.priority =0;
742 register_cpu_notifier(&comp_pool_callback_nb);
743#endif
744
745 return 0;
746}
747
748void ehca_destroy_comp_pool(void)
749{
750#ifdef CONFIG_INFINIBAND_EHCA_SCALING
751 int i;
752
753 unregister_cpu_notifier(&comp_pool_callback_nb);
754
755 for (i = 0; i < NR_CPUS; i++) {
756 if (cpu_online(i))
757 destroy_comp_task(pool, i);
758 }
759#endif
760
761 return;
762}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
new file mode 100644
index 000000000000..85bf1fe16fe4
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -0,0 +1,77 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Function definitions and structs for EQs, NEQs and interrupts
5 *
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#ifndef __EHCA_IRQ_H
43#define __EHCA_IRQ_H
44
45
46struct ehca_shca;
47
48#include <linux/interrupt.h>
49#include <linux/types.h>
50#include <asm/atomic.h>
51
52int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
53
54irqreturn_t ehca_interrupt_neq(int irq, void *dev_id, struct pt_regs *regs);
55void ehca_tasklet_neq(unsigned long data);
56
57irqreturn_t ehca_interrupt_eq(int irq, void *dev_id, struct pt_regs *regs);
58void ehca_tasklet_eq(unsigned long data);
59
60struct ehca_cpu_comp_task {
61 wait_queue_head_t wait_queue;
62 struct list_head cq_list;
63 struct task_struct *task;
64 spinlock_t task_lock;
65 int cq_jobs;
66};
67
68struct ehca_comp_pool {
69 struct ehca_cpu_comp_task *cpu_comp_tasks;
70 int last_cpu;
71 spinlock_t last_cpu_lock;
72};
73
74int ehca_create_comp_pool(void);
75void ehca_destroy_comp_pool(void);
76
77#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
new file mode 100644
index 000000000000..319c39d47f3a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -0,0 +1,182 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Function definitions for internal functions
5 *
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Dietmar Decker <ddecker@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#ifndef __EHCA_IVERBS_H__
43#define __EHCA_IVERBS_H__
44
45#include "ehca_classes.h"
46
47int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
48
49int ehca_query_port(struct ib_device *ibdev, u8 port,
50 struct ib_port_attr *props);
51
52int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
53
54int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
55 union ib_gid *gid);
56
57int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask,
58 struct ib_port_modify *props);
59
60struct ib_pd *ehca_alloc_pd(struct ib_device *device,
61 struct ib_ucontext *context,
62 struct ib_udata *udata);
63
64int ehca_dealloc_pd(struct ib_pd *pd);
65
66struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
67
68int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
69
70int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
71
72int ehca_destroy_ah(struct ib_ah *ah);
73
74struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
75
76struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
77 struct ib_phys_buf *phys_buf_array,
78 int num_phys_buf,
79 int mr_access_flags, u64 *iova_start);
80
81struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
82 struct ib_umem *region,
83 int mr_access_flags, struct ib_udata *udata);
84
85int ehca_rereg_phys_mr(struct ib_mr *mr,
86 int mr_rereg_mask,
87 struct ib_pd *pd,
88 struct ib_phys_buf *phys_buf_array,
89 int num_phys_buf, int mr_access_flags, u64 *iova_start);
90
91int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
92
93int ehca_dereg_mr(struct ib_mr *mr);
94
95struct ib_mw *ehca_alloc_mw(struct ib_pd *pd);
96
97int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
98 struct ib_mw_bind *mw_bind);
99
100int ehca_dealloc_mw(struct ib_mw *mw);
101
102struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
103 int mr_access_flags,
104 struct ib_fmr_attr *fmr_attr);
105
106int ehca_map_phys_fmr(struct ib_fmr *fmr,
107 u64 *page_list, int list_len, u64 iova);
108
109int ehca_unmap_fmr(struct list_head *fmr_list);
110
111int ehca_dealloc_fmr(struct ib_fmr *fmr);
112
113enum ehca_eq_type {
114 EHCA_EQ = 0, /* Event Queue */
115 EHCA_NEQ /* Notification Event Queue */
116};
117
118int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq,
119 enum ehca_eq_type type, const u32 length);
120
121int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq);
122
123void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq);
124
125
126struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe,
127 struct ib_ucontext *context,
128 struct ib_udata *udata);
129
130int ehca_destroy_cq(struct ib_cq *cq);
131
132int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
133
134int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
135
136int ehca_peek_cq(struct ib_cq *cq, int wc_cnt);
137
138int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify);
139
140struct ib_qp *ehca_create_qp(struct ib_pd *pd,
141 struct ib_qp_init_attr *init_attr,
142 struct ib_udata *udata);
143
144int ehca_destroy_qp(struct ib_qp *qp);
145
146int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
147 struct ib_udata *udata);
148
149int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
150 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
151
152int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
153 struct ib_send_wr **bad_send_wr);
154
155int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
156 struct ib_recv_wr **bad_recv_wr);
157
158u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
159 struct ib_qp_init_attr *qp_init_attr);
160
161int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
162
163int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
164
165struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
166 struct ib_udata *udata);
167
168int ehca_dealloc_ucontext(struct ib_ucontext *context);
169
170int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
171
172void ehca_poll_eqs(unsigned long data);
173
174int ehca_mmap_nopage(u64 foffset,u64 length,void **mapped,
175 struct vm_area_struct **vma);
176
177int ehca_mmap_register(u64 physical,void **mapped,
178 struct vm_area_struct **vma);
179
180int ehca_munmap(unsigned long addr, size_t len);
181
182#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
new file mode 100644
index 000000000000..2380994418a5
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -0,0 +1,818 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * module start stop, hca detection
5 *
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Joachim Fenkes <fenkes@de.ibm.com>
9 *
10 * Copyright (c) 2005 IBM Corporation
11 *
12 * All rights reserved.
13 *
14 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
15 * BSD.
16 *
17 * OpenIB BSD License
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are met:
21 *
22 * Redistributions of source code must retain the above copyright notice, this
23 * list of conditions and the following disclaimer.
24 *
25 * Redistributions in binary form must reproduce the above copyright notice,
26 * this list of conditions and the following disclaimer in the documentation
27 * and/or other materials
28 * provided with the distribution.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43#include "ehca_classes.h"
44#include "ehca_iverbs.h"
45#include "ehca_mrmw.h"
46#include "ehca_tools.h"
47#include "hcp_if.h"
48
49MODULE_LICENSE("Dual BSD/GPL");
50MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
51MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
52MODULE_VERSION("SVNEHCA_0016");
53
54int ehca_open_aqp1 = 0;
55int ehca_debug_level = 0;
56int ehca_hw_level = 0;
57int ehca_nr_ports = 2;
58int ehca_use_hp_mr = 0;
59int ehca_port_act_time = 30;
60int ehca_poll_all_eqs = 1;
61int ehca_static_rate = -1;
62
63module_param_named(open_aqp1, ehca_open_aqp1, int, 0);
64module_param_named(debug_level, ehca_debug_level, int, 0);
65module_param_named(hw_level, ehca_hw_level, int, 0);
66module_param_named(nr_ports, ehca_nr_ports, int, 0);
67module_param_named(use_hp_mr, ehca_use_hp_mr, int, 0);
68module_param_named(port_act_time, ehca_port_act_time, int, 0);
69module_param_named(poll_all_eqs, ehca_poll_all_eqs, int, 0);
70module_param_named(static_rate, ehca_static_rate, int, 0);
71
72MODULE_PARM_DESC(open_aqp1,
73 "AQP1 on startup (0: no (default), 1: yes)");
74MODULE_PARM_DESC(debug_level,
75 "debug level"
76 " (0: no debug traces (default), 1: with debug traces)");
77MODULE_PARM_DESC(hw_level,
78 "hardware level"
79 " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
80MODULE_PARM_DESC(nr_ports,
81 "number of connected ports (default: 2)");
82MODULE_PARM_DESC(use_hp_mr,
83 "high performance MRs (0: no (default), 1: yes)");
84MODULE_PARM_DESC(port_act_time,
85 "time to wait for port activation (default: 30 sec)");
86MODULE_PARM_DESC(poll_all_eqs,
87 "polls all event queues periodically"
88 " (0: no, 1: yes (default))");
89MODULE_PARM_DESC(static_rate,
90 "set permanent static rate (default: disabled)");
91
92spinlock_t ehca_qp_idr_lock;
93spinlock_t ehca_cq_idr_lock;
94DEFINE_IDR(ehca_qp_idr);
95DEFINE_IDR(ehca_cq_idr);
96
97static struct list_head shca_list; /* list of all registered ehcas */
98static spinlock_t shca_list_lock;
99
100static struct timer_list poll_eqs_timer;
101
102static int ehca_create_slab_caches(void)
103{
104 int ret;
105
106 ret = ehca_init_pd_cache();
107 if (ret) {
108 ehca_gen_err("Cannot create PD SLAB cache.");
109 return ret;
110 }
111
112 ret = ehca_init_cq_cache();
113 if (ret) {
114 ehca_gen_err("Cannot create CQ SLAB cache.");
115 goto create_slab_caches2;
116 }
117
118 ret = ehca_init_qp_cache();
119 if (ret) {
120 ehca_gen_err("Cannot create QP SLAB cache.");
121 goto create_slab_caches3;
122 }
123
124 ret = ehca_init_av_cache();
125 if (ret) {
126 ehca_gen_err("Cannot create AV SLAB cache.");
127 goto create_slab_caches4;
128 }
129
130 ret = ehca_init_mrmw_cache();
131 if (ret) {
132 ehca_gen_err("Cannot create MR&MW SLAB cache.");
133 goto create_slab_caches5;
134 }
135
136 return 0;
137
138create_slab_caches5:
139 ehca_cleanup_av_cache();
140
141create_slab_caches4:
142 ehca_cleanup_qp_cache();
143
144create_slab_caches3:
145 ehca_cleanup_cq_cache();
146
147create_slab_caches2:
148 ehca_cleanup_pd_cache();
149
150 return ret;
151}
152
153static void ehca_destroy_slab_caches(void)
154{
155 ehca_cleanup_mrmw_cache();
156 ehca_cleanup_av_cache();
157 ehca_cleanup_qp_cache();
158 ehca_cleanup_cq_cache();
159 ehca_cleanup_pd_cache();
160}
161
162#define EHCA_HCAAVER EHCA_BMASK_IBM(32,39)
163#define EHCA_REVID EHCA_BMASK_IBM(40,63)
164
165int ehca_sense_attributes(struct ehca_shca *shca)
166{
167 int ret = 0;
168 u64 h_ret;
169 struct hipz_query_hca *rblock;
170
171 rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
172 if (!rblock) {
173 ehca_gen_err("Cannot allocate rblock memory.");
174 return -ENOMEM;
175 }
176
177 h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
178 if (h_ret != H_SUCCESS) {
179 ehca_gen_err("Cannot query device properties. h_ret=%lx",
180 h_ret);
181 ret = -EPERM;
182 goto num_ports1;
183 }
184
185 if (ehca_nr_ports == 1)
186 shca->num_ports = 1;
187 else
188 shca->num_ports = (u8)rblock->num_ports;
189
190 ehca_gen_dbg(" ... found %x ports", rblock->num_ports);
191
192 if (ehca_hw_level == 0) {
193 u32 hcaaver;
194 u32 revid;
195
196 hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver);
197 revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver);
198
199 ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
200
201 if ((hcaaver == 1) && (revid == 0))
202 shca->hw_level = 0;
203 else if ((hcaaver == 1) && (revid == 1))
204 shca->hw_level = 1;
205 else if ((hcaaver == 1) && (revid == 2))
206 shca->hw_level = 2;
207 }
208 ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
209
210 shca->sport[0].rate = IB_RATE_30_GBPS;
211 shca->sport[1].rate = IB_RATE_30_GBPS;
212
213num_ports1:
214 kfree(rblock);
215 return ret;
216}
217
218static int init_node_guid(struct ehca_shca *shca)
219{
220 int ret = 0;
221 struct hipz_query_hca *rblock;
222
223 rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
224 if (!rblock) {
225 ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
226 return -ENOMEM;
227 }
228
229 if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) {
230 ehca_err(&shca->ib_device, "Can't query device properties");
231 ret = -EINVAL;
232 goto init_node_guid1;
233 }
234
235 memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64));
236
237init_node_guid1:
238 kfree(rblock);
239 return ret;
240}
241
242int ehca_register_device(struct ehca_shca *shca)
243{
244 int ret;
245
246 ret = init_node_guid(shca);
247 if (ret)
248 return ret;
249
250 strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
251 shca->ib_device.owner = THIS_MODULE;
252
253 shca->ib_device.uverbs_abi_ver = 5;
254 shca->ib_device.uverbs_cmd_mask =
255 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
256 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
257 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
258 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
259 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
260 (1ull << IB_USER_VERBS_CMD_REG_MR) |
261 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
262 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
263 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
264 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
265 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
266 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
267 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
268 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
269 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
270 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
271
272 shca->ib_device.node_type = RDMA_NODE_IB_CA;
273 shca->ib_device.phys_port_cnt = shca->num_ports;
274 shca->ib_device.dma_device = &shca->ibmebus_dev->ofdev.dev;
275 shca->ib_device.query_device = ehca_query_device;
276 shca->ib_device.query_port = ehca_query_port;
277 shca->ib_device.query_gid = ehca_query_gid;
278 shca->ib_device.query_pkey = ehca_query_pkey;
279 /* shca->in_device.modify_device = ehca_modify_device */
280 shca->ib_device.modify_port = ehca_modify_port;
281 shca->ib_device.alloc_ucontext = ehca_alloc_ucontext;
282 shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext;
283 shca->ib_device.alloc_pd = ehca_alloc_pd;
284 shca->ib_device.dealloc_pd = ehca_dealloc_pd;
285 shca->ib_device.create_ah = ehca_create_ah;
286 /* shca->ib_device.modify_ah = ehca_modify_ah; */
287 shca->ib_device.query_ah = ehca_query_ah;
288 shca->ib_device.destroy_ah = ehca_destroy_ah;
289 shca->ib_device.create_qp = ehca_create_qp;
290 shca->ib_device.modify_qp = ehca_modify_qp;
291 shca->ib_device.query_qp = ehca_query_qp;
292 shca->ib_device.destroy_qp = ehca_destroy_qp;
293 shca->ib_device.post_send = ehca_post_send;
294 shca->ib_device.post_recv = ehca_post_recv;
295 shca->ib_device.create_cq = ehca_create_cq;
296 shca->ib_device.destroy_cq = ehca_destroy_cq;
297 shca->ib_device.resize_cq = ehca_resize_cq;
298 shca->ib_device.poll_cq = ehca_poll_cq;
299 /* shca->ib_device.peek_cq = ehca_peek_cq; */
300 shca->ib_device.req_notify_cq = ehca_req_notify_cq;
301 /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */
302 shca->ib_device.get_dma_mr = ehca_get_dma_mr;
303 shca->ib_device.reg_phys_mr = ehca_reg_phys_mr;
304 shca->ib_device.reg_user_mr = ehca_reg_user_mr;
305 shca->ib_device.query_mr = ehca_query_mr;
306 shca->ib_device.dereg_mr = ehca_dereg_mr;
307 shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr;
308 shca->ib_device.alloc_mw = ehca_alloc_mw;
309 shca->ib_device.bind_mw = ehca_bind_mw;
310 shca->ib_device.dealloc_mw = ehca_dealloc_mw;
311 shca->ib_device.alloc_fmr = ehca_alloc_fmr;
312 shca->ib_device.map_phys_fmr = ehca_map_phys_fmr;
313 shca->ib_device.unmap_fmr = ehca_unmap_fmr;
314 shca->ib_device.dealloc_fmr = ehca_dealloc_fmr;
315 shca->ib_device.attach_mcast = ehca_attach_mcast;
316 shca->ib_device.detach_mcast = ehca_detach_mcast;
317 /* shca->ib_device.process_mad = ehca_process_mad; */
318 shca->ib_device.mmap = ehca_mmap;
319
320 ret = ib_register_device(&shca->ib_device);
321 if (ret)
322 ehca_err(&shca->ib_device,
323 "ib_register_device() failed ret=%x", ret);
324
325 return ret;
326}
327
328static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
329{
330 struct ehca_sport *sport = &shca->sport[port - 1];
331 struct ib_cq *ibcq;
332 struct ib_qp *ibqp;
333 struct ib_qp_init_attr qp_init_attr;
334 int ret;
335
336 if (sport->ibcq_aqp1) {
337 ehca_err(&shca->ib_device, "AQP1 CQ is already created.");
338 return -EPERM;
339 }
340
341 ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void*)(-1), 10);
342 if (IS_ERR(ibcq)) {
343 ehca_err(&shca->ib_device, "Cannot create AQP1 CQ.");
344 return PTR_ERR(ibcq);
345 }
346 sport->ibcq_aqp1 = ibcq;
347
348 if (sport->ibqp_aqp1) {
349 ehca_err(&shca->ib_device, "AQP1 QP is already created.");
350 ret = -EPERM;
351 goto create_aqp1;
352 }
353
354 memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr));
355 qp_init_attr.send_cq = ibcq;
356 qp_init_attr.recv_cq = ibcq;
357 qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
358 qp_init_attr.cap.max_send_wr = 100;
359 qp_init_attr.cap.max_recv_wr = 100;
360 qp_init_attr.cap.max_send_sge = 2;
361 qp_init_attr.cap.max_recv_sge = 1;
362 qp_init_attr.qp_type = IB_QPT_GSI;
363 qp_init_attr.port_num = port;
364 qp_init_attr.qp_context = NULL;
365 qp_init_attr.event_handler = NULL;
366 qp_init_attr.srq = NULL;
367
368 ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr);
369 if (IS_ERR(ibqp)) {
370 ehca_err(&shca->ib_device, "Cannot create AQP1 QP.");
371 ret = PTR_ERR(ibqp);
372 goto create_aqp1;
373 }
374 sport->ibqp_aqp1 = ibqp;
375
376 return 0;
377
378create_aqp1:
379 ib_destroy_cq(sport->ibcq_aqp1);
380 return ret;
381}
382
383static int ehca_destroy_aqp1(struct ehca_sport *sport)
384{
385 int ret;
386
387 ret = ib_destroy_qp(sport->ibqp_aqp1);
388 if (ret) {
389 ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret);
390 return ret;
391 }
392
393 ret = ib_destroy_cq(sport->ibcq_aqp1);
394 if (ret)
395 ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret);
396
397 return ret;
398}
399
400static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf)
401{
402 return snprintf(buf, PAGE_SIZE, "%d\n",
403 ehca_debug_level);
404}
405
406static ssize_t ehca_store_debug_level(struct device_driver *ddp,
407 const char *buf, size_t count)
408{
409 int value = (*buf) - '0';
410 if (value >= 0 && value <= 9)
411 ehca_debug_level = value;
412 return 1;
413}
414
415DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR,
416 ehca_show_debug_level, ehca_store_debug_level);
417
418void ehca_create_driver_sysfs(struct ibmebus_driver *drv)
419{
420 driver_create_file(&drv->driver, &driver_attr_debug_level);
421}
422
423void ehca_remove_driver_sysfs(struct ibmebus_driver *drv)
424{
425 driver_remove_file(&drv->driver, &driver_attr_debug_level);
426}
427
428#define EHCA_RESOURCE_ATTR(name) \
429static ssize_t ehca_show_##name(struct device *dev, \
430 struct device_attribute *attr, \
431 char *buf) \
432{ \
433 struct ehca_shca *shca; \
434 struct hipz_query_hca *rblock; \
435 int data; \
436 \
437 shca = dev->driver_data; \
438 \
439 rblock = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL); \
440 if (!rblock) { \
441 dev_err(dev, "Can't allocate rblock memory."); \
442 return 0; \
443 } \
444 \
445 if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \
446 dev_err(dev, "Can't query device properties"); \
447 kfree(rblock); \
448 return 0; \
449 } \
450 \
451 data = rblock->name; \
452 kfree(rblock); \
453 \
454 if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \
455 return snprintf(buf, 256, "1\n"); \
456 else \
457 return snprintf(buf, 256, "%d\n", data); \
458 \
459} \
460static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL);
461
462EHCA_RESOURCE_ATTR(num_ports);
463EHCA_RESOURCE_ATTR(hw_ver);
464EHCA_RESOURCE_ATTR(max_eq);
465EHCA_RESOURCE_ATTR(cur_eq);
466EHCA_RESOURCE_ATTR(max_cq);
467EHCA_RESOURCE_ATTR(cur_cq);
468EHCA_RESOURCE_ATTR(max_qp);
469EHCA_RESOURCE_ATTR(cur_qp);
470EHCA_RESOURCE_ATTR(max_mr);
471EHCA_RESOURCE_ATTR(cur_mr);
472EHCA_RESOURCE_ATTR(max_mw);
473EHCA_RESOURCE_ATTR(cur_mw);
474EHCA_RESOURCE_ATTR(max_pd);
475EHCA_RESOURCE_ATTR(max_ah);
476
477static ssize_t ehca_show_adapter_handle(struct device *dev,
478 struct device_attribute *attr,
479 char *buf)
480{
481 struct ehca_shca *shca = dev->driver_data;
482
483 return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle);
484
485}
486static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL);
487
488
489void ehca_create_device_sysfs(struct ibmebus_dev *dev)
490{
491 device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
492 device_create_file(&dev->ofdev.dev, &dev_attr_num_ports);
493 device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver);
494 device_create_file(&dev->ofdev.dev, &dev_attr_max_eq);
495 device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq);
496 device_create_file(&dev->ofdev.dev, &dev_attr_max_cq);
497 device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq);
498 device_create_file(&dev->ofdev.dev, &dev_attr_max_qp);
499 device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp);
500 device_create_file(&dev->ofdev.dev, &dev_attr_max_mr);
501 device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr);
502 device_create_file(&dev->ofdev.dev, &dev_attr_max_mw);
503 device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw);
504 device_create_file(&dev->ofdev.dev, &dev_attr_max_pd);
505 device_create_file(&dev->ofdev.dev, &dev_attr_max_ah);
506}
507
508void ehca_remove_device_sysfs(struct ibmebus_dev *dev)
509{
510 device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle);
511 device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports);
512 device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver);
513 device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq);
514 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq);
515 device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq);
516 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq);
517 device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp);
518 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp);
519 device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr);
520 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr);
521 device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw);
522 device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw);
523 device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd);
524 device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah);
525}
526
527static int __devinit ehca_probe(struct ibmebus_dev *dev,
528 const struct of_device_id *id)
529{
530 struct ehca_shca *shca;
531 u64 *handle;
532 struct ib_pd *ibpd;
533 int ret;
534
535 handle = (u64 *)get_property(dev->ofdev.node, "ibm,hca-handle", NULL);
536 if (!handle) {
537 ehca_gen_err("Cannot get eHCA handle for adapter: %s.",
538 dev->ofdev.node->full_name);
539 return -ENODEV;
540 }
541
542 if (!(*handle)) {
543 ehca_gen_err("Wrong eHCA handle for adapter: %s.",
544 dev->ofdev.node->full_name);
545 return -ENODEV;
546 }
547
548 shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca));
549 if (!shca) {
550 ehca_gen_err("Cannot allocate shca memory.");
551 return -ENOMEM;
552 }
553
554 shca->ibmebus_dev = dev;
555 shca->ipz_hca_handle.handle = *handle;
556 dev->ofdev.dev.driver_data = shca;
557
558 ret = ehca_sense_attributes(shca);
559 if (ret < 0) {
560 ehca_gen_err("Cannot sense eHCA attributes.");
561 goto probe1;
562 }
563
564 ret = ehca_register_device(shca);
565 if (ret) {
566 ehca_gen_err("Cannot register Infiniband device");
567 goto probe1;
568 }
569
570 /* create event queues */
571 ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
572 if (ret) {
573 ehca_err(&shca->ib_device, "Cannot create EQ.");
574 goto probe2;
575 }
576
577 ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513);
578 if (ret) {
579 ehca_err(&shca->ib_device, "Cannot create NEQ.");
580 goto probe3;
581 }
582
583 /* create internal protection domain */
584 ibpd = ehca_alloc_pd(&shca->ib_device, (void*)(-1), NULL);
585 if (IS_ERR(ibpd)) {
586 ehca_err(&shca->ib_device, "Cannot create internal PD.");
587 ret = PTR_ERR(ibpd);
588 goto probe4;
589 }
590
591 shca->pd = container_of(ibpd, struct ehca_pd, ib_pd);
592 shca->pd->ib_pd.device = &shca->ib_device;
593
594 /* create internal max MR */
595 ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
596
597 if (ret) {
598 ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x",
599 ret);
600 goto probe5;
601 }
602
603 /* create AQP1 for port 1 */
604 if (ehca_open_aqp1 == 1) {
605 shca->sport[0].port_state = IB_PORT_DOWN;
606 ret = ehca_create_aqp1(shca, 1);
607 if (ret) {
608 ehca_err(&shca->ib_device,
609 "Cannot create AQP1 for port 1.");
610 goto probe6;
611 }
612 }
613
614 /* create AQP1 for port 2 */
615 if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) {
616 shca->sport[1].port_state = IB_PORT_DOWN;
617 ret = ehca_create_aqp1(shca, 2);
618 if (ret) {
619 ehca_err(&shca->ib_device,
620 "Cannot create AQP1 for port 2.");
621 goto probe7;
622 }
623 }
624
625 ehca_create_device_sysfs(dev);
626
627 spin_lock(&shca_list_lock);
628 list_add(&shca->shca_list, &shca_list);
629 spin_unlock(&shca_list_lock);
630
631 return 0;
632
633probe7:
634 ret = ehca_destroy_aqp1(&shca->sport[0]);
635 if (ret)
636 ehca_err(&shca->ib_device,
637 "Cannot destroy AQP1 for port 1. ret=%x", ret);
638
639probe6:
640 ret = ehca_dereg_internal_maxmr(shca);
641 if (ret)
642 ehca_err(&shca->ib_device,
643 "Cannot destroy internal MR. ret=%x", ret);
644
645probe5:
646 ret = ehca_dealloc_pd(&shca->pd->ib_pd);
647 if (ret)
648 ehca_err(&shca->ib_device,
649 "Cannot destroy internal PD. ret=%x", ret);
650
651probe4:
652 ret = ehca_destroy_eq(shca, &shca->neq);
653 if (ret)
654 ehca_err(&shca->ib_device,
655 "Cannot destroy NEQ. ret=%x", ret);
656
657probe3:
658 ret = ehca_destroy_eq(shca, &shca->eq);
659 if (ret)
660 ehca_err(&shca->ib_device,
661 "Cannot destroy EQ. ret=%x", ret);
662
663probe2:
664 ib_unregister_device(&shca->ib_device);
665
666probe1:
667 ib_dealloc_device(&shca->ib_device);
668
669 return -EINVAL;
670}
671
672static int __devexit ehca_remove(struct ibmebus_dev *dev)
673{
674 struct ehca_shca *shca = dev->ofdev.dev.driver_data;
675 int ret;
676
677 ehca_remove_device_sysfs(dev);
678
679 if (ehca_open_aqp1 == 1) {
680 int i;
681 for (i = 0; i < shca->num_ports; i++) {
682 ret = ehca_destroy_aqp1(&shca->sport[i]);
683 if (ret)
684 ehca_err(&shca->ib_device,
685 "Cannot destroy AQP1 for port %x "
686 "ret=%x", ret, i);
687 }
688 }
689
690 ib_unregister_device(&shca->ib_device);
691
692 ret = ehca_dereg_internal_maxmr(shca);
693 if (ret)
694 ehca_err(&shca->ib_device,
695 "Cannot destroy internal MR. ret=%x", ret);
696
697 ret = ehca_dealloc_pd(&shca->pd->ib_pd);
698 if (ret)
699 ehca_err(&shca->ib_device,
700 "Cannot destroy internal PD. ret=%x", ret);
701
702 ret = ehca_destroy_eq(shca, &shca->eq);
703 if (ret)
704 ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret);
705
706 ret = ehca_destroy_eq(shca, &shca->neq);
707 if (ret)
708 ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret);
709
710 ib_dealloc_device(&shca->ib_device);
711
712 spin_lock(&shca_list_lock);
713 list_del(&shca->shca_list);
714 spin_unlock(&shca_list_lock);
715
716 return ret;
717}
718
719static struct of_device_id ehca_device_table[] =
720{
721 {
722 .name = "lhca",
723 .compatible = "IBM,lhca",
724 },
725 {},
726};
727
728static struct ibmebus_driver ehca_driver = {
729 .name = "ehca",
730 .id_table = ehca_device_table,
731 .probe = ehca_probe,
732 .remove = ehca_remove,
733};
734
735void ehca_poll_eqs(unsigned long data)
736{
737 struct ehca_shca *shca;
738
739 spin_lock(&shca_list_lock);
740 list_for_each_entry(shca, &shca_list, shca_list) {
741 if (shca->eq.is_initialized)
742 ehca_tasklet_eq((unsigned long)(void*)shca);
743 }
744 mod_timer(&poll_eqs_timer, jiffies + HZ);
745 spin_unlock(&shca_list_lock);
746}
747
748int __init ehca_module_init(void)
749{
750 int ret;
751
752 printk(KERN_INFO "eHCA Infiniband Device Driver "
753 "(Rel.: SVNEHCA_0016)\n");
754 idr_init(&ehca_qp_idr);
755 idr_init(&ehca_cq_idr);
756 spin_lock_init(&ehca_qp_idr_lock);
757 spin_lock_init(&ehca_cq_idr_lock);
758
759 INIT_LIST_HEAD(&shca_list);
760 spin_lock_init(&shca_list_lock);
761
762 if ((ret = ehca_create_comp_pool())) {
763 ehca_gen_err("Cannot create comp pool.");
764 return ret;
765 }
766
767 if ((ret = ehca_create_slab_caches())) {
768 ehca_gen_err("Cannot create SLAB caches");
769 ret = -ENOMEM;
770 goto module_init1;
771 }
772
773 if ((ret = ibmebus_register_driver(&ehca_driver))) {
774 ehca_gen_err("Cannot register eHCA device driver");
775 ret = -EINVAL;
776 goto module_init2;
777 }
778
779 ehca_create_driver_sysfs(&ehca_driver);
780
781 if (ehca_poll_all_eqs != 1) {
782 ehca_gen_err("WARNING!!!");
783 ehca_gen_err("It is possible to lose interrupts.");
784 } else {
785 init_timer(&poll_eqs_timer);
786 poll_eqs_timer.function = ehca_poll_eqs;
787 poll_eqs_timer.expires = jiffies + HZ;
788 add_timer(&poll_eqs_timer);
789 }
790
791 return 0;
792
793module_init2:
794 ehca_destroy_slab_caches();
795
796module_init1:
797 ehca_destroy_comp_pool();
798 return ret;
799};
800
801void __exit ehca_module_exit(void)
802{
803 if (ehca_poll_all_eqs == 1)
804 del_timer_sync(&poll_eqs_timer);
805
806 ehca_remove_driver_sysfs(&ehca_driver);
807 ibmebus_unregister_driver(&ehca_driver);
808
809 ehca_destroy_slab_caches();
810
811 ehca_destroy_comp_pool();
812
813 idr_destroy(&ehca_cq_idr);
814 idr_destroy(&ehca_qp_idr);
815};
816
817module_init(ehca_module_init);
818module_exit(ehca_module_exit);
diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c
new file mode 100644
index 000000000000..32a870660bfe
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mcast.c
@@ -0,0 +1,131 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * mcast functions
5 *
6 * Authors: Khadija Souissi <souissik@de.ibm.com>
7 * Waleri Fomin <fomin@de.ibm.com>
8 * Reinhard Ernst <rernst@de.ibm.com>
9 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
10 * Heiko J Schick <schickhj@de.ibm.com>
11 *
12 * Copyright (c) 2005 IBM Corporation
13 *
14 * All rights reserved.
15 *
16 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
17 * BSD.
18 *
19 * OpenIB BSD License
20 *
21 * Redistribution and use in source and binary forms, with or without
22 * modification, are permitted provided that the following conditions are met:
23 *
24 * Redistributions of source code must retain the above copyright notice, this
25 * list of conditions and the following disclaimer.
26 *
27 * Redistributions in binary form must reproduce the above copyright notice,
28 * this list of conditions and the following disclaimer in the documentation
29 * and/or other materials
30 * provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
39 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
40 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42 * POSSIBILITY OF SUCH DAMAGE.
43 */
44
45#include <linux/module.h>
46#include <linux/err.h>
47#include "ehca_classes.h"
48#include "ehca_tools.h"
49#include "ehca_qes.h"
50#include "ehca_iverbs.h"
51#include "hcp_if.h"
52
53#define MAX_MC_LID 0xFFFE
54#define MIN_MC_LID 0xC000 /* Multicast limits */
55#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF)
56#define EHCA_VALID_MULTICAST_LID(lid) \
57 (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID))
58
59int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
60{
61 struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
62 struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
63 ib_device);
64 union ib_gid my_gid;
65 u64 subnet_prefix, interface_id, h_ret;
66
67 if (ibqp->qp_type != IB_QPT_UD) {
68 ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type);
69 return -EINVAL;
70 }
71
72 if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
73 ehca_err(ibqp->device, "invalid mulitcast gid");
74 return -EINVAL;
75 } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
76 ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
77 return -EINVAL;
78 }
79
80 memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
81
82 subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
83 interface_id = be64_to_cpu(my_gid.global.interface_id);
84 h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle,
85 my_qp->ipz_qp_handle,
86 my_qp->galpas.kernel,
87 lid, subnet_prefix, interface_id);
88 if (h_ret != H_SUCCESS)
89 ehca_err(ibqp->device,
90 "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
91 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
92
93 return ehca2ib_return_code(h_ret);
94}
95
96int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
97{
98 struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
99 struct ehca_shca *shca = container_of(ibqp->pd->device,
100 struct ehca_shca, ib_device);
101 union ib_gid my_gid;
102 u64 subnet_prefix, interface_id, h_ret;
103
104 if (ibqp->qp_type != IB_QPT_UD) {
105 ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type);
106 return -EINVAL;
107 }
108
109 if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) {
110 ehca_err(ibqp->device, "invalid mulitcast gid");
111 return -EINVAL;
112 } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) {
113 ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid);
114 return -EINVAL;
115 }
116
117 memcpy(&my_gid.raw, gid->raw, sizeof(union ib_gid));
118
119 subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix);
120 interface_id = be64_to_cpu(my_gid.global.interface_id);
121 h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle,
122 my_qp->ipz_qp_handle,
123 my_qp->galpas.kernel,
124 lid, subnet_prefix, interface_id);
125 if (h_ret != H_SUCCESS)
126 ehca_err(ibqp->device,
127 "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
128 "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
129
130 return ehca2ib_return_code(h_ret);
131}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
new file mode 100644
index 000000000000..5ca65441e1da
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -0,0 +1,2261 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * MR/MW functions
5 *
6 * Authors: Dietmar Decker <ddecker@de.ibm.com>
7 * Christoph Raisch <raisch@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include <asm/current.h>
43
44#include "ehca_iverbs.h"
45#include "ehca_mrmw.h"
46#include "hcp_if.h"
47#include "hipz_hw.h"
48
49static struct kmem_cache *mr_cache;
50static struct kmem_cache *mw_cache;
51
52static struct ehca_mr *ehca_mr_new(void)
53{
54 struct ehca_mr *me;
55
56 me = kmem_cache_alloc(mr_cache, SLAB_KERNEL);
57 if (me) {
58 memset(me, 0, sizeof(struct ehca_mr));
59 spin_lock_init(&me->mrlock);
60 } else
61 ehca_gen_err("alloc failed");
62
63 return me;
64}
65
66static void ehca_mr_delete(struct ehca_mr *me)
67{
68 kmem_cache_free(mr_cache, me);
69}
70
71static struct ehca_mw *ehca_mw_new(void)
72{
73 struct ehca_mw *me;
74
75 me = kmem_cache_alloc(mw_cache, SLAB_KERNEL);
76 if (me) {
77 memset(me, 0, sizeof(struct ehca_mw));
78 spin_lock_init(&me->mwlock);
79 } else
80 ehca_gen_err("alloc failed");
81
82 return me;
83}
84
85static void ehca_mw_delete(struct ehca_mw *me)
86{
87 kmem_cache_free(mw_cache, me);
88}
89
90/*----------------------------------------------------------------------*/
91
92struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
93{
94 struct ib_mr *ib_mr;
95 int ret;
96 struct ehca_mr *e_maxmr;
97 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
98 struct ehca_shca *shca =
99 container_of(pd->device, struct ehca_shca, ib_device);
100
101 if (shca->maxmr) {
102 e_maxmr = ehca_mr_new();
103 if (!e_maxmr) {
104 ehca_err(&shca->ib_device, "out of memory");
105 ib_mr = ERR_PTR(-ENOMEM);
106 goto get_dma_mr_exit0;
107 }
108
109 ret = ehca_reg_maxmr(shca, e_maxmr, (u64*)KERNELBASE,
110 mr_access_flags, e_pd,
111 &e_maxmr->ib.ib_mr.lkey,
112 &e_maxmr->ib.ib_mr.rkey);
113 if (ret) {
114 ib_mr = ERR_PTR(ret);
115 goto get_dma_mr_exit0;
116 }
117 ib_mr = &e_maxmr->ib.ib_mr;
118 } else {
119 ehca_err(&shca->ib_device, "no internal max-MR exist!");
120 ib_mr = ERR_PTR(-EINVAL);
121 goto get_dma_mr_exit0;
122 }
123
124get_dma_mr_exit0:
125 if (IS_ERR(ib_mr))
126 ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ",
127 PTR_ERR(ib_mr), pd, mr_access_flags);
128 return ib_mr;
129} /* end ehca_get_dma_mr() */
130
131/*----------------------------------------------------------------------*/
132
133struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
134 struct ib_phys_buf *phys_buf_array,
135 int num_phys_buf,
136 int mr_access_flags,
137 u64 *iova_start)
138{
139 struct ib_mr *ib_mr;
140 int ret;
141 struct ehca_mr *e_mr;
142 struct ehca_shca *shca =
143 container_of(pd->device, struct ehca_shca, ib_device);
144 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
145
146 u64 size;
147 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
148 u32 num_pages_mr;
149 u32 num_pages_4k; /* 4k portion "pages" */
150
151 if ((num_phys_buf <= 0) || !phys_buf_array) {
152 ehca_err(pd->device, "bad input values: num_phys_buf=%x "
153 "phys_buf_array=%p", num_phys_buf, phys_buf_array);
154 ib_mr = ERR_PTR(-EINVAL);
155 goto reg_phys_mr_exit0;
156 }
157 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
158 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
159 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
160 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
161 /*
162 * Remote Write Access requires Local Write Access
163 * Remote Atomic Access requires Local Write Access
164 */
165 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
166 mr_access_flags);
167 ib_mr = ERR_PTR(-EINVAL);
168 goto reg_phys_mr_exit0;
169 }
170
171 /* check physical buffer list and calculate size */
172 ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
173 iova_start, &size);
174 if (ret) {
175 ib_mr = ERR_PTR(ret);
176 goto reg_phys_mr_exit0;
177 }
178 if ((size == 0) ||
179 (((u64)iova_start + size) < (u64)iova_start)) {
180 ehca_err(pd->device, "bad input values: size=%lx iova_start=%p",
181 size, iova_start);
182 ib_mr = ERR_PTR(-EINVAL);
183 goto reg_phys_mr_exit0;
184 }
185
186 e_mr = ehca_mr_new();
187 if (!e_mr) {
188 ehca_err(pd->device, "out of memory");
189 ib_mr = ERR_PTR(-ENOMEM);
190 goto reg_phys_mr_exit0;
191 }
192
193 /* determine number of MR pages */
194 num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size +
195 PAGE_SIZE - 1) / PAGE_SIZE);
196 num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size +
197 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
198
199 /* register MR on HCA */
200 if (ehca_mr_is_maxmr(size, iova_start)) {
201 e_mr->flags |= EHCA_MR_FLAG_MAXMR;
202 ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
203 e_pd, &e_mr->ib.ib_mr.lkey,
204 &e_mr->ib.ib_mr.rkey);
205 if (ret) {
206 ib_mr = ERR_PTR(ret);
207 goto reg_phys_mr_exit1;
208 }
209 } else {
210 pginfo.type = EHCA_MR_PGI_PHYS;
211 pginfo.num_pages = num_pages_mr;
212 pginfo.num_4k = num_pages_4k;
213 pginfo.num_phys_buf = num_phys_buf;
214 pginfo.phys_buf_array = phys_buf_array;
215 pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) /
216 EHCA_PAGESIZE);
217
218 ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
219 e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
220 &e_mr->ib.ib_mr.rkey);
221 if (ret) {
222 ib_mr = ERR_PTR(ret);
223 goto reg_phys_mr_exit1;
224 }
225 }
226
227 /* successful registration of all pages */
228 return &e_mr->ib.ib_mr;
229
230reg_phys_mr_exit1:
231 ehca_mr_delete(e_mr);
232reg_phys_mr_exit0:
233 if (IS_ERR(ib_mr))
234 ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p "
235 "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
236 PTR_ERR(ib_mr), pd, phys_buf_array,
237 num_phys_buf, mr_access_flags, iova_start);
238 return ib_mr;
239} /* end ehca_reg_phys_mr() */
240
241/*----------------------------------------------------------------------*/
242
243struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
244 struct ib_umem *region,
245 int mr_access_flags,
246 struct ib_udata *udata)
247{
248 struct ib_mr *ib_mr;
249 struct ehca_mr *e_mr;
250 struct ehca_shca *shca =
251 container_of(pd->device, struct ehca_shca, ib_device);
252 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
253 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
254 int ret;
255 u32 num_pages_mr;
256 u32 num_pages_4k; /* 4k portion "pages" */
257
258 if (!pd) {
259 ehca_gen_err("bad pd=%p", pd);
260 return ERR_PTR(-EFAULT);
261 }
262 if (!region) {
263 ehca_err(pd->device, "bad input values: region=%p", region);
264 ib_mr = ERR_PTR(-EINVAL);
265 goto reg_user_mr_exit0;
266 }
267 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
268 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
269 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
270 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
271 /*
272 * Remote Write Access requires Local Write Access
273 * Remote Atomic Access requires Local Write Access
274 */
275 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
276 mr_access_flags);
277 ib_mr = ERR_PTR(-EINVAL);
278 goto reg_user_mr_exit0;
279 }
280 if (region->page_size != PAGE_SIZE) {
281 ehca_err(pd->device, "page size not supported, "
282 "region->page_size=%x", region->page_size);
283 ib_mr = ERR_PTR(-EINVAL);
284 goto reg_user_mr_exit0;
285 }
286
287 if ((region->length == 0) ||
288 ((region->virt_base + region->length) < region->virt_base)) {
289 ehca_err(pd->device, "bad input values: length=%lx "
290 "virt_base=%lx", region->length, region->virt_base);
291 ib_mr = ERR_PTR(-EINVAL);
292 goto reg_user_mr_exit0;
293 }
294
295 e_mr = ehca_mr_new();
296 if (!e_mr) {
297 ehca_err(pd->device, "out of memory");
298 ib_mr = ERR_PTR(-ENOMEM);
299 goto reg_user_mr_exit0;
300 }
301
302 /* determine number of MR pages */
303 num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length +
304 PAGE_SIZE - 1) / PAGE_SIZE);
305 num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length +
306 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
307
308 /* register MR on HCA */
309 pginfo.type = EHCA_MR_PGI_USER;
310 pginfo.num_pages = num_pages_mr;
311 pginfo.num_4k = num_pages_4k;
312 pginfo.region = region;
313 pginfo.next_4k = region->offset / EHCA_PAGESIZE;
314 pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
315 (&region->chunk_list),
316 list);
317
318 ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base,
319 region->length, mr_access_flags, e_pd, &pginfo,
320 &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
321 if (ret) {
322 ib_mr = ERR_PTR(ret);
323 goto reg_user_mr_exit1;
324 }
325
326 /* successful registration of all pages */
327 return &e_mr->ib.ib_mr;
328
329reg_user_mr_exit1:
330 ehca_mr_delete(e_mr);
331reg_user_mr_exit0:
332 if (IS_ERR(ib_mr))
333 ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x"
334 " udata=%p",
335 PTR_ERR(ib_mr), pd, region, mr_access_flags, udata);
336 return ib_mr;
337} /* end ehca_reg_user_mr() */
338
339/*----------------------------------------------------------------------*/
340
341int ehca_rereg_phys_mr(struct ib_mr *mr,
342 int mr_rereg_mask,
343 struct ib_pd *pd,
344 struct ib_phys_buf *phys_buf_array,
345 int num_phys_buf,
346 int mr_access_flags,
347 u64 *iova_start)
348{
349 int ret;
350
351 struct ehca_shca *shca =
352 container_of(mr->device, struct ehca_shca, ib_device);
353 struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
354 struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
355 u64 new_size;
356 u64 *new_start;
357 u32 new_acl;
358 struct ehca_pd *new_pd;
359 u32 tmp_lkey, tmp_rkey;
360 unsigned long sl_flags;
361 u32 num_pages_mr = 0;
362 u32 num_pages_4k = 0; /* 4k portion "pages" */
363 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
364 u32 cur_pid = current->tgid;
365
366 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
367 (my_pd->ownpid != cur_pid)) {
368 ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
369 cur_pid, my_pd->ownpid);
370 ret = -EINVAL;
371 goto rereg_phys_mr_exit0;
372 }
373
374 if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
375 /* TODO not supported, because PHYP rereg hCall needs pages */
376 ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
377 "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
378 ret = -EINVAL;
379 goto rereg_phys_mr_exit0;
380 }
381
382 if (mr_rereg_mask & IB_MR_REREG_PD) {
383 if (!pd) {
384 ehca_err(mr->device, "rereg with bad pd, pd=%p "
385 "mr_rereg_mask=%x", pd, mr_rereg_mask);
386 ret = -EINVAL;
387 goto rereg_phys_mr_exit0;
388 }
389 }
390
391 if ((mr_rereg_mask &
392 ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
393 (mr_rereg_mask == 0)) {
394 ret = -EINVAL;
395 goto rereg_phys_mr_exit0;
396 }
397
398 /* check other parameters */
399 if (e_mr == shca->maxmr) {
400 /* should be impossible, however reject to be sure */
401 ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
402 "shca->maxmr=%p mr->lkey=%x",
403 mr, shca->maxmr, mr->lkey);
404 ret = -EINVAL;
405 goto rereg_phys_mr_exit0;
406 }
407 if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
408 if (e_mr->flags & EHCA_MR_FLAG_FMR) {
409 ehca_err(mr->device, "not supported for FMR, mr=%p "
410 "flags=%x", mr, e_mr->flags);
411 ret = -EINVAL;
412 goto rereg_phys_mr_exit0;
413 }
414 if (!phys_buf_array || num_phys_buf <= 0) {
415 ehca_err(mr->device, "bad input values: mr_rereg_mask=%x"
416 " phys_buf_array=%p num_phys_buf=%x",
417 mr_rereg_mask, phys_buf_array, num_phys_buf);
418 ret = -EINVAL;
419 goto rereg_phys_mr_exit0;
420 }
421 }
422 if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */
423 (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
424 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
425 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
426 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
427 /*
428 * Remote Write Access requires Local Write Access
429 * Remote Atomic Access requires Local Write Access
430 */
431 ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
432 "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
433 ret = -EINVAL;
434 goto rereg_phys_mr_exit0;
435 }
436
437 /* set requested values dependent on rereg request */
438 spin_lock_irqsave(&e_mr->mrlock, sl_flags);
439 new_start = e_mr->start; /* new == old address */
440 new_size = e_mr->size; /* new == old length */
441 new_acl = e_mr->acl; /* new == old access control */
442 new_pd = container_of(mr->pd,struct ehca_pd,ib_pd); /*new == old PD*/
443
444 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
445 new_start = iova_start; /* change address */
446 /* check physical buffer list and calculate size */
447 ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
448 num_phys_buf, iova_start,
449 &new_size);
450 if (ret)
451 goto rereg_phys_mr_exit1;
452 if ((new_size == 0) ||
453 (((u64)iova_start + new_size) < (u64)iova_start)) {
454 ehca_err(mr->device, "bad input values: new_size=%lx "
455 "iova_start=%p", new_size, iova_start);
456 ret = -EINVAL;
457 goto rereg_phys_mr_exit1;
458 }
459 num_pages_mr = ((((u64)new_start % PAGE_SIZE) + new_size +
460 PAGE_SIZE - 1) / PAGE_SIZE);
461 num_pages_4k = ((((u64)new_start % EHCA_PAGESIZE) + new_size +
462 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
463 pginfo.type = EHCA_MR_PGI_PHYS;
464 pginfo.num_pages = num_pages_mr;
465 pginfo.num_4k = num_pages_4k;
466 pginfo.num_phys_buf = num_phys_buf;
467 pginfo.phys_buf_array = phys_buf_array;
468 pginfo.next_4k = (((u64)iova_start & ~PAGE_MASK) /
469 EHCA_PAGESIZE);
470 }
471 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
472 new_acl = mr_access_flags;
473 if (mr_rereg_mask & IB_MR_REREG_PD)
474 new_pd = container_of(pd, struct ehca_pd, ib_pd);
475
476 ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
477 new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
478 if (ret)
479 goto rereg_phys_mr_exit1;
480
481 /* successful reregistration */
482 if (mr_rereg_mask & IB_MR_REREG_PD)
483 mr->pd = pd;
484 mr->lkey = tmp_lkey;
485 mr->rkey = tmp_rkey;
486
487rereg_phys_mr_exit1:
488 spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
489rereg_phys_mr_exit0:
490 if (ret)
491 ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p "
492 "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
493 "iova_start=%p",
494 ret, mr, mr_rereg_mask, pd, phys_buf_array,
495 num_phys_buf, mr_access_flags, iova_start);
496 return ret;
497} /* end ehca_rereg_phys_mr() */
498
499/*----------------------------------------------------------------------*/
500
501int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
502{
503 int ret = 0;
504 u64 h_ret;
505 struct ehca_shca *shca =
506 container_of(mr->device, struct ehca_shca, ib_device);
507 struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
508 struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
509 u32 cur_pid = current->tgid;
510 unsigned long sl_flags;
511 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
512
513 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
514 (my_pd->ownpid != cur_pid)) {
515 ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
516 cur_pid, my_pd->ownpid);
517 ret = -EINVAL;
518 goto query_mr_exit0;
519 }
520
521 if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
522 ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
523 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
524 ret = -EINVAL;
525 goto query_mr_exit0;
526 }
527
528 memset(mr_attr, 0, sizeof(struct ib_mr_attr));
529 spin_lock_irqsave(&e_mr->mrlock, sl_flags);
530
531 h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
532 if (h_ret != H_SUCCESS) {
533 ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p "
534 "hca_hndl=%lx mr_hndl=%lx lkey=%x",
535 h_ret, mr, shca->ipz_hca_handle.handle,
536 e_mr->ipz_mr_handle.handle, mr->lkey);
537 ret = ehca_mrmw_map_hrc_query_mr(h_ret);
538 goto query_mr_exit1;
539 }
540 mr_attr->pd = mr->pd;
541 mr_attr->device_virt_addr = hipzout.vaddr;
542 mr_attr->size = hipzout.len;
543 mr_attr->lkey = hipzout.lkey;
544 mr_attr->rkey = hipzout.rkey;
545 ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
546
547query_mr_exit1:
548 spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
549query_mr_exit0:
550 if (ret)
551 ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p",
552 ret, mr, mr_attr);
553 return ret;
554} /* end ehca_query_mr() */
555
556/*----------------------------------------------------------------------*/
557
558int ehca_dereg_mr(struct ib_mr *mr)
559{
560 int ret = 0;
561 u64 h_ret;
562 struct ehca_shca *shca =
563 container_of(mr->device, struct ehca_shca, ib_device);
564 struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
565 struct ehca_pd *my_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
566 u32 cur_pid = current->tgid;
567
568 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
569 (my_pd->ownpid != cur_pid)) {
570 ehca_err(mr->device, "Invalid caller pid=%x ownpid=%x",
571 cur_pid, my_pd->ownpid);
572 ret = -EINVAL;
573 goto dereg_mr_exit0;
574 }
575
576 if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
577 ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
578 "e_mr->flags=%x", mr, e_mr, e_mr->flags);
579 ret = -EINVAL;
580 goto dereg_mr_exit0;
581 } else if (e_mr == shca->maxmr) {
582 /* should be impossible, however reject to be sure */
583 ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p "
584 "shca->maxmr=%p mr->lkey=%x",
585 mr, shca->maxmr, mr->lkey);
586 ret = -EINVAL;
587 goto dereg_mr_exit0;
588 }
589
590 /* TODO: BUSY: MR still has bound window(s) */
591 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
592 if (h_ret != H_SUCCESS) {
593 ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p "
594 "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
595 h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
596 e_mr->ipz_mr_handle.handle, mr->lkey);
597 ret = ehca_mrmw_map_hrc_free_mr(h_ret);
598 goto dereg_mr_exit0;
599 }
600
601 /* successful deregistration */
602 ehca_mr_delete(e_mr);
603
604dereg_mr_exit0:
605 if (ret)
606 ehca_err(mr->device, "ret=%x mr=%p", ret, mr);
607 return ret;
608} /* end ehca_dereg_mr() */
609
610/*----------------------------------------------------------------------*/
611
612struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
613{
614 struct ib_mw *ib_mw;
615 u64 h_ret;
616 struct ehca_mw *e_mw;
617 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
618 struct ehca_shca *shca =
619 container_of(pd->device, struct ehca_shca, ib_device);
620 struct ehca_mw_hipzout_parms hipzout = {{0},0};
621
622 e_mw = ehca_mw_new();
623 if (!e_mw) {
624 ib_mw = ERR_PTR(-ENOMEM);
625 goto alloc_mw_exit0;
626 }
627
628 h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
629 e_pd->fw_pd, &hipzout);
630 if (h_ret != H_SUCCESS) {
631 ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
632 "shca=%p hca_hndl=%lx mw=%p",
633 h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
634 ib_mw = ERR_PTR(ehca_mrmw_map_hrc_alloc(h_ret));
635 goto alloc_mw_exit1;
636 }
637 /* successful MW allocation */
638 e_mw->ipz_mw_handle = hipzout.handle;
639 e_mw->ib_mw.rkey = hipzout.rkey;
640 return &e_mw->ib_mw;
641
642alloc_mw_exit1:
643 ehca_mw_delete(e_mw);
644alloc_mw_exit0:
645 if (IS_ERR(ib_mw))
646 ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd);
647 return ib_mw;
648} /* end ehca_alloc_mw() */
649
650/*----------------------------------------------------------------------*/
651
652int ehca_bind_mw(struct ib_qp *qp,
653 struct ib_mw *mw,
654 struct ib_mw_bind *mw_bind)
655{
656 /* TODO: not supported up to now */
657 ehca_gen_err("bind MW currently not supported by HCAD");
658
659 return -EPERM;
660} /* end ehca_bind_mw() */
661
662/*----------------------------------------------------------------------*/
663
664int ehca_dealloc_mw(struct ib_mw *mw)
665{
666 u64 h_ret;
667 struct ehca_shca *shca =
668 container_of(mw->device, struct ehca_shca, ib_device);
669 struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw);
670
671 h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
672 if (h_ret != H_SUCCESS) {
673 ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p "
674 "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
675 h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
676 e_mw->ipz_mw_handle.handle);
677 return ehca_mrmw_map_hrc_free_mw(h_ret);
678 }
679 /* successful deallocation */
680 ehca_mw_delete(e_mw);
681 return 0;
682} /* end ehca_dealloc_mw() */
683
684/*----------------------------------------------------------------------*/
685
686struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
687 int mr_access_flags,
688 struct ib_fmr_attr *fmr_attr)
689{
690 struct ib_fmr *ib_fmr;
691 struct ehca_shca *shca =
692 container_of(pd->device, struct ehca_shca, ib_device);
693 struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
694 struct ehca_mr *e_fmr;
695 int ret;
696 u32 tmp_lkey, tmp_rkey;
697 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
698
699 /* check other parameters */
700 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
701 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
702 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
703 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
704 /*
705 * Remote Write Access requires Local Write Access
706 * Remote Atomic Access requires Local Write Access
707 */
708 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
709 mr_access_flags);
710 ib_fmr = ERR_PTR(-EINVAL);
711 goto alloc_fmr_exit0;
712 }
713 if (mr_access_flags & IB_ACCESS_MW_BIND) {
714 ehca_err(pd->device, "bad input values: mr_access_flags=%x",
715 mr_access_flags);
716 ib_fmr = ERR_PTR(-EINVAL);
717 goto alloc_fmr_exit0;
718 }
719 if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) {
720 ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x "
721 "fmr_attr->max_maps=%x fmr_attr->page_shift=%x",
722 fmr_attr->max_pages, fmr_attr->max_maps,
723 fmr_attr->page_shift);
724 ib_fmr = ERR_PTR(-EINVAL);
725 goto alloc_fmr_exit0;
726 }
727 if (((1 << fmr_attr->page_shift) != EHCA_PAGESIZE) &&
728 ((1 << fmr_attr->page_shift) != PAGE_SIZE)) {
729 ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x",
730 fmr_attr->page_shift);
731 ib_fmr = ERR_PTR(-EINVAL);
732 goto alloc_fmr_exit0;
733 }
734
735 e_fmr = ehca_mr_new();
736 if (!e_fmr) {
737 ib_fmr = ERR_PTR(-ENOMEM);
738 goto alloc_fmr_exit0;
739 }
740 e_fmr->flags |= EHCA_MR_FLAG_FMR;
741
742 /* register MR on HCA */
743 ret = ehca_reg_mr(shca, e_fmr, NULL,
744 fmr_attr->max_pages * (1 << fmr_attr->page_shift),
745 mr_access_flags, e_pd, &pginfo,
746 &tmp_lkey, &tmp_rkey);
747 if (ret) {
748 ib_fmr = ERR_PTR(ret);
749 goto alloc_fmr_exit1;
750 }
751
752 /* successful */
753 e_fmr->fmr_page_size = 1 << fmr_attr->page_shift;
754 e_fmr->fmr_max_pages = fmr_attr->max_pages;
755 e_fmr->fmr_max_maps = fmr_attr->max_maps;
756 e_fmr->fmr_map_cnt = 0;
757 return &e_fmr->ib.ib_fmr;
758
759alloc_fmr_exit1:
760 ehca_mr_delete(e_fmr);
761alloc_fmr_exit0:
762 if (IS_ERR(ib_fmr))
763 ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x "
764 "fmr_attr=%p", PTR_ERR(ib_fmr), pd,
765 mr_access_flags, fmr_attr);
766 return ib_fmr;
767} /* end ehca_alloc_fmr() */
768
769/*----------------------------------------------------------------------*/
770
771int ehca_map_phys_fmr(struct ib_fmr *fmr,
772 u64 *page_list,
773 int list_len,
774 u64 iova)
775{
776 int ret;
777 struct ehca_shca *shca =
778 container_of(fmr->device, struct ehca_shca, ib_device);
779 struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
780 struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd);
781 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
782 u32 tmp_lkey, tmp_rkey;
783
784 if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
785 ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
786 e_fmr, e_fmr->flags);
787 ret = -EINVAL;
788 goto map_phys_fmr_exit0;
789 }
790 ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len);
791 if (ret)
792 goto map_phys_fmr_exit0;
793 if (iova % e_fmr->fmr_page_size) {
794 /* only whole-numbered pages */
795 ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x",
796 iova, e_fmr->fmr_page_size);
797 ret = -EINVAL;
798 goto map_phys_fmr_exit0;
799 }
800 if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) {
801 /* HCAD does not limit the maps, however trace this anyway */
802 ehca_info(fmr->device, "map limit exceeded, fmr=%p "
803 "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x",
804 fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps);
805 }
806
807 pginfo.type = EHCA_MR_PGI_FMR;
808 pginfo.num_pages = list_len;
809 pginfo.num_4k = list_len * (e_fmr->fmr_page_size / EHCA_PAGESIZE);
810 pginfo.page_list = page_list;
811 pginfo.next_4k = ((iova & (e_fmr->fmr_page_size-1)) /
812 EHCA_PAGESIZE);
813
814 ret = ehca_rereg_mr(shca, e_fmr, (u64*)iova,
815 list_len * e_fmr->fmr_page_size,
816 e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey);
817 if (ret)
818 goto map_phys_fmr_exit0;
819
820 /* successful reregistration */
821 e_fmr->fmr_map_cnt++;
822 e_fmr->ib.ib_fmr.lkey = tmp_lkey;
823 e_fmr->ib.ib_fmr.rkey = tmp_rkey;
824 return 0;
825
826map_phys_fmr_exit0:
827 if (ret)
828 ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
829 "iova=%lx",
830 ret, fmr, page_list, list_len, iova);
831 return ret;
832} /* end ehca_map_phys_fmr() */
833
834/*----------------------------------------------------------------------*/
835
836int ehca_unmap_fmr(struct list_head *fmr_list)
837{
838 int ret = 0;
839 struct ib_fmr *ib_fmr;
840 struct ehca_shca *shca = NULL;
841 struct ehca_shca *prev_shca;
842 struct ehca_mr *e_fmr;
843 u32 num_fmr = 0;
844 u32 unmap_fmr_cnt = 0;
845
846 /* check all FMR belong to same SHCA, and check internal flag */
847 list_for_each_entry(ib_fmr, fmr_list, list) {
848 prev_shca = shca;
849 if (!ib_fmr) {
850 ehca_gen_err("bad fmr=%p in list", ib_fmr);
851 ret = -EINVAL;
852 goto unmap_fmr_exit0;
853 }
854 shca = container_of(ib_fmr->device, struct ehca_shca,
855 ib_device);
856 e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
857 if ((shca != prev_shca) && prev_shca) {
858 ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p "
859 "prev_shca=%p e_fmr=%p",
860 shca, prev_shca, e_fmr);
861 ret = -EINVAL;
862 goto unmap_fmr_exit0;
863 }
864 if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
865 ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p "
866 "e_fmr->flags=%x", e_fmr, e_fmr->flags);
867 ret = -EINVAL;
868 goto unmap_fmr_exit0;
869 }
870 num_fmr++;
871 }
872
873 /* loop over all FMRs to unmap */
874 list_for_each_entry(ib_fmr, fmr_list, list) {
875 unmap_fmr_cnt++;
876 e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr);
877 shca = container_of(ib_fmr->device, struct ehca_shca,
878 ib_device);
879 ret = ehca_unmap_one_fmr(shca, e_fmr);
880 if (ret) {
881 /* unmap failed, stop unmapping of rest of FMRs */
882 ehca_err(&shca->ib_device, "unmap of one FMR failed, "
883 "stop rest, e_fmr=%p num_fmr=%x "
884 "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr,
885 unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey);
886 goto unmap_fmr_exit0;
887 }
888 }
889
890unmap_fmr_exit0:
891 if (ret)
892 ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
893 ret, fmr_list, num_fmr, unmap_fmr_cnt);
894 return ret;
895} /* end ehca_unmap_fmr() */
896
897/*----------------------------------------------------------------------*/
898
899int ehca_dealloc_fmr(struct ib_fmr *fmr)
900{
901 int ret;
902 u64 h_ret;
903 struct ehca_shca *shca =
904 container_of(fmr->device, struct ehca_shca, ib_device);
905 struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr);
906
907 if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) {
908 ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x",
909 e_fmr, e_fmr->flags);
910 ret = -EINVAL;
911 goto free_fmr_exit0;
912 }
913
914 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
915 if (h_ret != H_SUCCESS) {
916 ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p "
917 "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
918 h_ret, e_fmr, shca->ipz_hca_handle.handle,
919 e_fmr->ipz_mr_handle.handle, fmr->lkey);
920 ret = ehca_mrmw_map_hrc_free_mr(h_ret);
921 goto free_fmr_exit0;
922 }
923 /* successful deregistration */
924 ehca_mr_delete(e_fmr);
925 return 0;
926
927free_fmr_exit0:
928 if (ret)
929 ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr);
930 return ret;
931} /* end ehca_dealloc_fmr() */
932
933/*----------------------------------------------------------------------*/
934
935int ehca_reg_mr(struct ehca_shca *shca,
936 struct ehca_mr *e_mr,
937 u64 *iova_start,
938 u64 size,
939 int acl,
940 struct ehca_pd *e_pd,
941 struct ehca_mr_pginfo *pginfo,
942 u32 *lkey, /*OUT*/
943 u32 *rkey) /*OUT*/
944{
945 int ret;
946 u64 h_ret;
947 u32 hipz_acl;
948 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
949
950 ehca_mrmw_map_acl(acl, &hipz_acl);
951 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
952 if (ehca_use_hp_mr == 1)
953 hipz_acl |= 0x00000001;
954
955 h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr,
956 (u64)iova_start, size, hipz_acl,
957 e_pd->fw_pd, &hipzout);
958 if (h_ret != H_SUCCESS) {
959 ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
960 "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
961 ret = ehca_mrmw_map_hrc_alloc(h_ret);
962 goto ehca_reg_mr_exit0;
963 }
964
965 e_mr->ipz_mr_handle = hipzout.handle;
966
967 ret = ehca_reg_mr_rpages(shca, e_mr, pginfo);
968 if (ret)
969 goto ehca_reg_mr_exit1;
970
971 /* successful registration */
972 e_mr->num_pages = pginfo->num_pages;
973 e_mr->num_4k = pginfo->num_4k;
974 e_mr->start = iova_start;
975 e_mr->size = size;
976 e_mr->acl = acl;
977 *lkey = hipzout.lkey;
978 *rkey = hipzout.rkey;
979 return 0;
980
981ehca_reg_mr_exit1:
982 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
983 if (h_ret != H_SUCCESS) {
984 ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
985 "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
986 "pginfo=%p num_pages=%lx num_4k=%lx ret=%x",
987 h_ret, shca, e_mr, iova_start, size, acl, e_pd,
988 hipzout.lkey, pginfo, pginfo->num_pages,
989 pginfo->num_4k, ret);
990 ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, "
991 "not recoverable");
992 }
993ehca_reg_mr_exit0:
994 if (ret)
995 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
996 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
997 "num_pages=%lx num_4k=%lx",
998 ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
999 pginfo->num_pages, pginfo->num_4k);
1000 return ret;
1001} /* end ehca_reg_mr() */
1002
1003/*----------------------------------------------------------------------*/
1004
1005int ehca_reg_mr_rpages(struct ehca_shca *shca,
1006 struct ehca_mr *e_mr,
1007 struct ehca_mr_pginfo *pginfo)
1008{
1009 int ret = 0;
1010 u64 h_ret;
1011 u32 rnum;
1012 u64 rpage;
1013 u32 i;
1014 u64 *kpage;
1015
1016 kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
1017 if (!kpage) {
1018 ehca_err(&shca->ib_device, "kpage alloc failed");
1019 ret = -ENOMEM;
1020 goto ehca_reg_mr_rpages_exit0;
1021 }
1022
1023 /* max 512 pages per shot */
1024 for (i = 0; i < ((pginfo->num_4k + 512 - 1) / 512); i++) {
1025
1026 if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
1027 rnum = pginfo->num_4k % 512; /* last shot */
1028 if (rnum == 0)
1029 rnum = 512; /* last shot is full */
1030 } else
1031 rnum = 512;
1032
1033 if (rnum > 1) {
1034 ret = ehca_set_pagebuf(e_mr, pginfo, rnum, kpage);
1035 if (ret) {
1036 ehca_err(&shca->ib_device, "ehca_set_pagebuf "
1037 "bad rc, ret=%x rnum=%x kpage=%p",
1038 ret, rnum, kpage);
1039 ret = -EFAULT;
1040 goto ehca_reg_mr_rpages_exit1;
1041 }
1042 rpage = virt_to_abs(kpage);
1043 if (!rpage) {
1044 ehca_err(&shca->ib_device, "kpage=%p i=%x",
1045 kpage, i);
1046 ret = -EFAULT;
1047 goto ehca_reg_mr_rpages_exit1;
1048 }
1049 } else { /* rnum==1 */
1050 ret = ehca_set_pagebuf_1(e_mr, pginfo, &rpage);
1051 if (ret) {
1052 ehca_err(&shca->ib_device, "ehca_set_pagebuf_1 "
1053 "bad rc, ret=%x i=%x", ret, i);
1054 ret = -EFAULT;
1055 goto ehca_reg_mr_rpages_exit1;
1056 }
1057 }
1058
1059 h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, e_mr,
1060 0, /* pagesize 4k */
1061 0, rpage, rnum);
1062
1063 if (i == ((pginfo->num_4k + 512 - 1) / 512) - 1) {
1064 /*
1065 * check for 'registration complete'==H_SUCCESS
1066 * and for 'page registered'==H_PAGE_REGISTERED
1067 */
1068 if (h_ret != H_SUCCESS) {
1069 ehca_err(&shca->ib_device, "last "
1070 "hipz_reg_rpage_mr failed, h_ret=%lx "
1071 "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
1072 " lkey=%x", h_ret, e_mr, i,
1073 shca->ipz_hca_handle.handle,
1074 e_mr->ipz_mr_handle.handle,
1075 e_mr->ib.ib_mr.lkey);
1076 ret = ehca_mrmw_map_hrc_rrpg_last(h_ret);
1077 break;
1078 } else
1079 ret = 0;
1080 } else if (h_ret != H_PAGE_REGISTERED) {
1081 ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
1082 "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx "
1083 "mr_hndl=%lx", h_ret, e_mr, i,
1084 e_mr->ib.ib_mr.lkey,
1085 shca->ipz_hca_handle.handle,
1086 e_mr->ipz_mr_handle.handle);
1087 ret = ehca_mrmw_map_hrc_rrpg_notlast(h_ret);
1088 break;
1089 } else
1090 ret = 0;
1091 } /* end for(i) */
1092
1093
1094ehca_reg_mr_rpages_exit1:
1095 kfree(kpage);
1096ehca_reg_mr_rpages_exit0:
1097 if (ret)
1098 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
1099 "num_pages=%lx num_4k=%lx", ret, shca, e_mr, pginfo,
1100 pginfo->num_pages, pginfo->num_4k);
1101 return ret;
1102} /* end ehca_reg_mr_rpages() */
1103
1104/*----------------------------------------------------------------------*/
1105
1106inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1107 struct ehca_mr *e_mr,
1108 u64 *iova_start,
1109 u64 size,
1110 u32 acl,
1111 struct ehca_pd *e_pd,
1112 struct ehca_mr_pginfo *pginfo,
1113 u32 *lkey, /*OUT*/
1114 u32 *rkey) /*OUT*/
1115{
1116 int ret;
1117 u64 h_ret;
1118 u32 hipz_acl;
1119 u64 *kpage;
1120 u64 rpage;
1121 struct ehca_mr_pginfo pginfo_save;
1122 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
1123
1124 ehca_mrmw_map_acl(acl, &hipz_acl);
1125 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
1126
1127 kpage = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
1128 if (!kpage) {
1129 ehca_err(&shca->ib_device, "kpage alloc failed");
1130 ret = -ENOMEM;
1131 goto ehca_rereg_mr_rereg1_exit0;
1132 }
1133
1134 pginfo_save = *pginfo;
1135 ret = ehca_set_pagebuf(e_mr, pginfo, pginfo->num_4k, kpage);
1136 if (ret) {
1137 ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p "
1138 "pginfo=%p type=%x num_pages=%lx num_4k=%lx kpage=%p",
1139 e_mr, pginfo, pginfo->type, pginfo->num_pages,
1140 pginfo->num_4k,kpage);
1141 goto ehca_rereg_mr_rereg1_exit1;
1142 }
1143 rpage = virt_to_abs(kpage);
1144 if (!rpage) {
1145 ehca_err(&shca->ib_device, "kpage=%p", kpage);
1146 ret = -EFAULT;
1147 goto ehca_rereg_mr_rereg1_exit1;
1148 }
1149 h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr,
1150 (u64)iova_start, size, hipz_acl,
1151 e_pd->fw_pd, rpage, &hipzout);
1152 if (h_ret != H_SUCCESS) {
1153 /*
1154 * reregistration unsuccessful, try it again with the 3 hCalls,
1155 * e.g. this is required in case H_MR_CONDITION
1156 * (MW bound or MR is shared)
1157 */
1158 ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
1159 "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
1160 *pginfo = pginfo_save;
1161 ret = -EAGAIN;
1162 } else if ((u64*)hipzout.vaddr != iova_start) {
1163 ehca_err(&shca->ib_device, "PHYP changed iova_start in "
1164 "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p "
1165 "mr_handle=%lx lkey=%x lkey_out=%x", iova_start,
1166 hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle,
1167 e_mr->ib.ib_mr.lkey, hipzout.lkey);
1168 ret = -EFAULT;
1169 } else {
1170 /*
1171 * successful reregistration
1172 * note: start and start_out are identical for eServer HCAs
1173 */
1174 e_mr->num_pages = pginfo->num_pages;
1175 e_mr->num_4k = pginfo->num_4k;
1176 e_mr->start = iova_start;
1177 e_mr->size = size;
1178 e_mr->acl = acl;
1179 *lkey = hipzout.lkey;
1180 *rkey = hipzout.rkey;
1181 }
1182
1183ehca_rereg_mr_rereg1_exit1:
1184 kfree(kpage);
1185ehca_rereg_mr_rereg1_exit0:
1186 if ( ret && (ret != -EAGAIN) )
1187 ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
1188 "pginfo=%p num_pages=%lx num_4k=%lx",
1189 ret, *lkey, *rkey, pginfo, pginfo->num_pages,
1190 pginfo->num_4k);
1191 return ret;
1192} /* end ehca_rereg_mr_rereg1() */
1193
1194/*----------------------------------------------------------------------*/
1195
1196int ehca_rereg_mr(struct ehca_shca *shca,
1197 struct ehca_mr *e_mr,
1198 u64 *iova_start,
1199 u64 size,
1200 int acl,
1201 struct ehca_pd *e_pd,
1202 struct ehca_mr_pginfo *pginfo,
1203 u32 *lkey,
1204 u32 *rkey)
1205{
1206 int ret = 0;
1207 u64 h_ret;
1208 int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */
1209 int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */
1210
1211 /* first determine reregistration hCall(s) */
1212 if ((pginfo->num_4k > 512) || (e_mr->num_4k > 512) ||
1213 (pginfo->num_4k > e_mr->num_4k)) {
1214 ehca_dbg(&shca->ib_device, "Rereg3 case, pginfo->num_4k=%lx "
1215 "e_mr->num_4k=%x", pginfo->num_4k, e_mr->num_4k);
1216 rereg_1_hcall = 0;
1217 rereg_3_hcall = 1;
1218 }
1219
1220 if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */
1221 rereg_1_hcall = 0;
1222 rereg_3_hcall = 1;
1223 e_mr->flags &= ~EHCA_MR_FLAG_MAXMR;
1224 ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p",
1225 e_mr);
1226 }
1227
1228 if (rereg_1_hcall) {
1229 ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size,
1230 acl, e_pd, pginfo, lkey, rkey);
1231 if (ret) {
1232 if (ret == -EAGAIN)
1233 rereg_3_hcall = 1;
1234 else
1235 goto ehca_rereg_mr_exit0;
1236 }
1237 }
1238
1239 if (rereg_3_hcall) {
1240 struct ehca_mr save_mr;
1241
1242 /* first deregister old MR */
1243 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
1244 if (h_ret != H_SUCCESS) {
1245 ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1246 "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx "
1247 "mr->lkey=%x",
1248 h_ret, e_mr, shca->ipz_hca_handle.handle,
1249 e_mr->ipz_mr_handle.handle,
1250 e_mr->ib.ib_mr.lkey);
1251 ret = ehca_mrmw_map_hrc_free_mr(h_ret);
1252 goto ehca_rereg_mr_exit0;
1253 }
1254 /* clean ehca_mr_t, without changing struct ib_mr and lock */
1255 save_mr = *e_mr;
1256 ehca_mr_deletenew(e_mr);
1257
1258 /* set some MR values */
1259 e_mr->flags = save_mr.flags;
1260 e_mr->fmr_page_size = save_mr.fmr_page_size;
1261 e_mr->fmr_max_pages = save_mr.fmr_max_pages;
1262 e_mr->fmr_max_maps = save_mr.fmr_max_maps;
1263 e_mr->fmr_map_cnt = save_mr.fmr_map_cnt;
1264
1265 ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl,
1266 e_pd, pginfo, lkey, rkey);
1267 if (ret) {
1268 u32 offset = (u64)(&e_mr->flags) - (u64)e_mr;
1269 memcpy(&e_mr->flags, &(save_mr.flags),
1270 sizeof(struct ehca_mr) - offset);
1271 goto ehca_rereg_mr_exit0;
1272 }
1273 }
1274
1275ehca_rereg_mr_exit0:
1276 if (ret)
1277 ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
1278 "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
1279 "num_pages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
1280 "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
1281 acl, e_pd, pginfo, pginfo->num_pages, *lkey, *rkey,
1282 rereg_1_hcall, rereg_3_hcall);
1283 return ret;
1284} /* end ehca_rereg_mr() */
1285
1286/*----------------------------------------------------------------------*/
1287
1288int ehca_unmap_one_fmr(struct ehca_shca *shca,
1289 struct ehca_mr *e_fmr)
1290{
1291 int ret = 0;
1292 u64 h_ret;
1293 int rereg_1_hcall = 1; /* 1: use hipz_mr_reregister directly */
1294 int rereg_3_hcall = 0; /* 1: use 3 hipz calls for unmapping */
1295 struct ehca_pd *e_pd =
1296 container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd);
1297 struct ehca_mr save_fmr;
1298 u32 tmp_lkey, tmp_rkey;
1299 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
1300 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
1301
1302 /* first check if reregistration hCall can be used for unmap */
1303 if (e_fmr->fmr_max_pages > 512) {
1304 rereg_1_hcall = 0;
1305 rereg_3_hcall = 1;
1306 }
1307
1308 if (rereg_1_hcall) {
1309 /*
1310 * note: after using rereg hcall with len=0,
1311 * rereg hcall must be used again for registering pages
1312 */
1313 h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0,
1314 0, 0, e_pd->fw_pd, 0, &hipzout);
1315 if (h_ret != H_SUCCESS) {
1316 /*
1317 * should not happen, because length checked above,
1318 * FMRs are not shared and no MW bound to FMRs
1319 */
1320 ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
1321 "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
1322 "mr_hndl=%lx lkey=%x lkey_out=%x",
1323 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1324 e_fmr->ipz_mr_handle.handle,
1325 e_fmr->ib.ib_fmr.lkey, hipzout.lkey);
1326 rereg_3_hcall = 1;
1327 } else {
1328 /* successful reregistration */
1329 e_fmr->start = NULL;
1330 e_fmr->size = 0;
1331 tmp_lkey = hipzout.lkey;
1332 tmp_rkey = hipzout.rkey;
1333 }
1334 }
1335
1336 if (rereg_3_hcall) {
1337 struct ehca_mr save_mr;
1338
1339 /* first free old FMR */
1340 h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
1341 if (h_ret != H_SUCCESS) {
1342 ehca_err(&shca->ib_device, "hipz_free_mr failed, "
1343 "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
1344 "lkey=%x",
1345 h_ret, e_fmr, shca->ipz_hca_handle.handle,
1346 e_fmr->ipz_mr_handle.handle,
1347 e_fmr->ib.ib_fmr.lkey);
1348 ret = ehca_mrmw_map_hrc_free_mr(h_ret);
1349 goto ehca_unmap_one_fmr_exit0;
1350 }
1351 /* clean ehca_mr_t, without changing lock */
1352 save_fmr = *e_fmr;
1353 ehca_mr_deletenew(e_fmr);
1354
1355 /* set some MR values */
1356 e_fmr->flags = save_fmr.flags;
1357 e_fmr->fmr_page_size = save_fmr.fmr_page_size;
1358 e_fmr->fmr_max_pages = save_fmr.fmr_max_pages;
1359 e_fmr->fmr_max_maps = save_fmr.fmr_max_maps;
1360 e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt;
1361 e_fmr->acl = save_fmr.acl;
1362
1363 pginfo.type = EHCA_MR_PGI_FMR;
1364 pginfo.num_pages = 0;
1365 pginfo.num_4k = 0;
1366 ret = ehca_reg_mr(shca, e_fmr, NULL,
1367 (e_fmr->fmr_max_pages * e_fmr->fmr_page_size),
1368 e_fmr->acl, e_pd, &pginfo, &tmp_lkey,
1369 &tmp_rkey);
1370 if (ret) {
1371 u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr;
1372 memcpy(&e_fmr->flags, &(save_mr.flags),
1373 sizeof(struct ehca_mr) - offset);
1374 goto ehca_unmap_one_fmr_exit0;
1375 }
1376 }
1377
1378ehca_unmap_one_fmr_exit0:
1379 if (ret)
1380 ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
1381 "fmr_max_pages=%x rereg_1_hcall=%x rereg_3_hcall=%x",
1382 ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages,
1383 rereg_1_hcall, rereg_3_hcall);
1384 return ret;
1385} /* end ehca_unmap_one_fmr() */
1386
1387/*----------------------------------------------------------------------*/
1388
1389int ehca_reg_smr(struct ehca_shca *shca,
1390 struct ehca_mr *e_origmr,
1391 struct ehca_mr *e_newmr,
1392 u64 *iova_start,
1393 int acl,
1394 struct ehca_pd *e_pd,
1395 u32 *lkey, /*OUT*/
1396 u32 *rkey) /*OUT*/
1397{
1398 int ret = 0;
1399 u64 h_ret;
1400 u32 hipz_acl;
1401 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
1402
1403 ehca_mrmw_map_acl(acl, &hipz_acl);
1404 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
1405
1406 h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1407 (u64)iova_start, hipz_acl, e_pd->fw_pd,
1408 &hipzout);
1409 if (h_ret != H_SUCCESS) {
1410 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
1411 "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
1412 "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
1413 h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
1414 shca->ipz_hca_handle.handle,
1415 e_origmr->ipz_mr_handle.handle,
1416 e_origmr->ib.ib_mr.lkey);
1417 ret = ehca_mrmw_map_hrc_reg_smr(h_ret);
1418 goto ehca_reg_smr_exit0;
1419 }
1420 /* successful registration */
1421 e_newmr->num_pages = e_origmr->num_pages;
1422 e_newmr->num_4k = e_origmr->num_4k;
1423 e_newmr->start = iova_start;
1424 e_newmr->size = e_origmr->size;
1425 e_newmr->acl = acl;
1426 e_newmr->ipz_mr_handle = hipzout.handle;
1427 *lkey = hipzout.lkey;
1428 *rkey = hipzout.rkey;
1429 return 0;
1430
1431ehca_reg_smr_exit0:
1432 if (ret)
1433 ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p "
1434 "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
1435 ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
1436 return ret;
1437} /* end ehca_reg_smr() */
1438
1439/*----------------------------------------------------------------------*/
1440
1441/* register internal max-MR to internal SHCA */
1442int ehca_reg_internal_maxmr(
1443 struct ehca_shca *shca,
1444 struct ehca_pd *e_pd,
1445 struct ehca_mr **e_maxmr) /*OUT*/
1446{
1447 int ret;
1448 struct ehca_mr *e_mr;
1449 u64 *iova_start;
1450 u64 size_maxmr;
1451 struct ehca_mr_pginfo pginfo={0,0,0,0,0,0,0,NULL,0,NULL,NULL,0,NULL,0};
1452 struct ib_phys_buf ib_pbuf;
1453 u32 num_pages_mr;
1454 u32 num_pages_4k; /* 4k portion "pages" */
1455
1456 e_mr = ehca_mr_new();
1457 if (!e_mr) {
1458 ehca_err(&shca->ib_device, "out of memory");
1459 ret = -ENOMEM;
1460 goto ehca_reg_internal_maxmr_exit0;
1461 }
1462 e_mr->flags |= EHCA_MR_FLAG_MAXMR;
1463
1464 /* register internal max-MR on HCA */
1465 size_maxmr = (u64)high_memory - PAGE_OFFSET;
1466 iova_start = (u64*)KERNELBASE;
1467 ib_pbuf.addr = 0;
1468 ib_pbuf.size = size_maxmr;
1469 num_pages_mr = ((((u64)iova_start % PAGE_SIZE) + size_maxmr +
1470 PAGE_SIZE - 1) / PAGE_SIZE);
1471 num_pages_4k = ((((u64)iova_start % EHCA_PAGESIZE) + size_maxmr +
1472 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE);
1473
1474 pginfo.type = EHCA_MR_PGI_PHYS;
1475 pginfo.num_pages = num_pages_mr;
1476 pginfo.num_4k = num_pages_4k;
1477 pginfo.num_phys_buf = 1;
1478 pginfo.phys_buf_array = &ib_pbuf;
1479
1480 ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
1481 &pginfo, &e_mr->ib.ib_mr.lkey,
1482 &e_mr->ib.ib_mr.rkey);
1483 if (ret) {
1484 ehca_err(&shca->ib_device, "reg of internal max MR failed, "
1485 "e_mr=%p iova_start=%p size_maxmr=%lx num_pages_mr=%x "
1486 "num_pages_4k=%x", e_mr, iova_start, size_maxmr,
1487 num_pages_mr, num_pages_4k);
1488 goto ehca_reg_internal_maxmr_exit1;
1489 }
1490
1491 /* successful registration of all pages */
1492 e_mr->ib.ib_mr.device = e_pd->ib_pd.device;
1493 e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
1494 e_mr->ib.ib_mr.uobject = NULL;
1495 atomic_inc(&(e_pd->ib_pd.usecnt));
1496 atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
1497 *e_maxmr = e_mr;
1498 return 0;
1499
1500ehca_reg_internal_maxmr_exit1:
1501 ehca_mr_delete(e_mr);
1502ehca_reg_internal_maxmr_exit0:
1503 if (ret)
1504 ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p",
1505 ret, shca, e_pd, e_maxmr);
1506 return ret;
1507} /* end ehca_reg_internal_maxmr() */
1508
1509/*----------------------------------------------------------------------*/
1510
1511int ehca_reg_maxmr(struct ehca_shca *shca,
1512 struct ehca_mr *e_newmr,
1513 u64 *iova_start,
1514 int acl,
1515 struct ehca_pd *e_pd,
1516 u32 *lkey,
1517 u32 *rkey)
1518{
1519 u64 h_ret;
1520 struct ehca_mr *e_origmr = shca->maxmr;
1521 u32 hipz_acl;
1522 struct ehca_mr_hipzout_parms hipzout = {{0},0,0,0,0,0};
1523
1524 ehca_mrmw_map_acl(acl, &hipz_acl);
1525 ehca_mrmw_set_pgsize_hipz_acl(&hipz_acl);
1526
1527 h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr,
1528 (u64)iova_start, hipz_acl, e_pd->fw_pd,
1529 &hipzout);
1530 if (h_ret != H_SUCCESS) {
1531 ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
1532 "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
1533 h_ret, e_origmr, shca->ipz_hca_handle.handle,
1534 e_origmr->ipz_mr_handle.handle,
1535 e_origmr->ib.ib_mr.lkey);
1536 return ehca_mrmw_map_hrc_reg_smr(h_ret);
1537 }
1538 /* successful registration */
1539 e_newmr->num_pages = e_origmr->num_pages;
1540 e_newmr->num_4k = e_origmr->num_4k;
1541 e_newmr->start = iova_start;
1542 e_newmr->size = e_origmr->size;
1543 e_newmr->acl = acl;
1544 e_newmr->ipz_mr_handle = hipzout.handle;
1545 *lkey = hipzout.lkey;
1546 *rkey = hipzout.rkey;
1547 return 0;
1548} /* end ehca_reg_maxmr() */
1549
1550/*----------------------------------------------------------------------*/
1551
1552int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
1553{
1554 int ret;
1555 struct ehca_mr *e_maxmr;
1556 struct ib_pd *ib_pd;
1557
1558 if (!shca->maxmr) {
1559 ehca_err(&shca->ib_device, "bad call, shca=%p", shca);
1560 ret = -EINVAL;
1561 goto ehca_dereg_internal_maxmr_exit0;
1562 }
1563
1564 e_maxmr = shca->maxmr;
1565 ib_pd = e_maxmr->ib.ib_mr.pd;
1566 shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */
1567
1568 ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
1569 if (ret) {
1570 ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
1571 "ret=%x e_maxmr=%p shca=%p lkey=%x",
1572 ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
1573 shca->maxmr = e_maxmr;
1574 goto ehca_dereg_internal_maxmr_exit0;
1575 }
1576
1577 atomic_dec(&ib_pd->usecnt);
1578
1579ehca_dereg_internal_maxmr_exit0:
1580 if (ret)
1581 ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p",
1582 ret, shca, shca->maxmr);
1583 return ret;
1584} /* end ehca_dereg_internal_maxmr() */
1585
1586/*----------------------------------------------------------------------*/
1587
1588/*
1589 * check physical buffer array of MR verbs for validness and
1590 * calculates MR size
1591 */
1592int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
1593 int num_phys_buf,
1594 u64 *iova_start,
1595 u64 *size)
1596{
1597 struct ib_phys_buf *pbuf = phys_buf_array;
1598 u64 size_count = 0;
1599 u32 i;
1600
1601 if (num_phys_buf == 0) {
1602 ehca_gen_err("bad phys buf array len, num_phys_buf=0");
1603 return -EINVAL;
1604 }
1605 /* check first buffer */
1606 if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
1607 ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
1608 "pbuf->addr=%lx pbuf->size=%lx",
1609 iova_start, pbuf->addr, pbuf->size);
1610 return -EINVAL;
1611 }
1612 if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
1613 (num_phys_buf > 1)) {
1614 ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx "
1615 "pbuf->size=%lx", pbuf->addr, pbuf->size);
1616 return -EINVAL;
1617 }
1618
1619 for (i = 0; i < num_phys_buf; i++) {
1620 if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
1621 ehca_gen_err("bad address, i=%x pbuf->addr=%lx "
1622 "pbuf->size=%lx",
1623 i, pbuf->addr, pbuf->size);
1624 return -EINVAL;
1625 }
1626 if (((i > 0) && /* not 1st */
1627 (i < (num_phys_buf - 1)) && /* not last */
1628 (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
1629 ehca_gen_err("bad size, i=%x pbuf->size=%lx",
1630 i, pbuf->size);
1631 return -EINVAL;
1632 }
1633 size_count += pbuf->size;
1634 pbuf++;
1635 }
1636
1637 *size = size_count;
1638 return 0;
1639} /* end ehca_mr_chk_buf_and_calc_size() */
1640
1641/*----------------------------------------------------------------------*/
1642
1643/* check page list of map FMR verb for validness */
1644int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
1645 u64 *page_list,
1646 int list_len)
1647{
1648 u32 i;
1649 u64 *page;
1650
1651 if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) {
1652 ehca_gen_err("bad list_len, list_len=%x "
1653 "e_fmr->fmr_max_pages=%x fmr=%p",
1654 list_len, e_fmr->fmr_max_pages, e_fmr);
1655 return -EINVAL;
1656 }
1657
1658 /* each page must be aligned */
1659 page = page_list;
1660 for (i = 0; i < list_len; i++) {
1661 if (*page % e_fmr->fmr_page_size) {
1662 ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p "
1663 "fmr_page_size=%x", i, *page, page, e_fmr,
1664 e_fmr->fmr_page_size);
1665 return -EINVAL;
1666 }
1667 page++;
1668 }
1669
1670 return 0;
1671} /* end ehca_fmr_check_page_list() */
1672
1673/*----------------------------------------------------------------------*/
1674
1675/* setup page buffer from page info */
1676int ehca_set_pagebuf(struct ehca_mr *e_mr,
1677 struct ehca_mr_pginfo *pginfo,
1678 u32 number,
1679 u64 *kpage)
1680{
1681 int ret = 0;
1682 struct ib_umem_chunk *prev_chunk;
1683 struct ib_umem_chunk *chunk;
1684 struct ib_phys_buf *pbuf;
1685 u64 *fmrlist;
1686 u64 num4k, pgaddr, offs4k;
1687 u32 i = 0;
1688 u32 j = 0;
1689
1690 if (pginfo->type == EHCA_MR_PGI_PHYS) {
1691 /* loop over desired phys_buf_array entries */
1692 while (i < number) {
1693 pbuf = pginfo->phys_buf_array + pginfo->next_buf;
1694 num4k = ((pbuf->addr % EHCA_PAGESIZE) + pbuf->size +
1695 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
1696 offs4k = (pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
1697 while (pginfo->next_4k < offs4k + num4k) {
1698 /* sanity check */
1699 if ((pginfo->page_cnt >= pginfo->num_pages) ||
1700 (pginfo->page_4k_cnt >= pginfo->num_4k)) {
1701 ehca_gen_err("page_cnt >= num_pages, "
1702 "page_cnt=%lx "
1703 "num_pages=%lx "
1704 "page_4k_cnt=%lx "
1705 "num_4k=%lx i=%x",
1706 pginfo->page_cnt,
1707 pginfo->num_pages,
1708 pginfo->page_4k_cnt,
1709 pginfo->num_4k, i);
1710 ret = -EFAULT;
1711 goto ehca_set_pagebuf_exit0;
1712 }
1713 *kpage = phys_to_abs(
1714 (pbuf->addr & EHCA_PAGEMASK)
1715 + (pginfo->next_4k * EHCA_PAGESIZE));
1716 if ( !(*kpage) && pbuf->addr ) {
1717 ehca_gen_err("pbuf->addr=%lx "
1718 "pbuf->size=%lx "
1719 "next_4k=%lx", pbuf->addr,
1720 pbuf->size,
1721 pginfo->next_4k);
1722 ret = -EFAULT;
1723 goto ehca_set_pagebuf_exit0;
1724 }
1725 (pginfo->page_4k_cnt)++;
1726 (pginfo->next_4k)++;
1727 if (pginfo->next_4k %
1728 (PAGE_SIZE / EHCA_PAGESIZE) == 0)
1729 (pginfo->page_cnt)++;
1730 kpage++;
1731 i++;
1732 if (i >= number) break;
1733 }
1734 if (pginfo->next_4k >= offs4k + num4k) {
1735 (pginfo->next_buf)++;
1736 pginfo->next_4k = 0;
1737 }
1738 }
1739 } else if (pginfo->type == EHCA_MR_PGI_USER) {
1740 /* loop over desired chunk entries */
1741 chunk = pginfo->next_chunk;
1742 prev_chunk = pginfo->next_chunk;
1743 list_for_each_entry_continue(chunk,
1744 (&(pginfo->region->chunk_list)),
1745 list) {
1746 for (i = pginfo->next_nmap; i < chunk->nmap; ) {
1747 pgaddr = ( page_to_pfn(chunk->page_list[i].page)
1748 << PAGE_SHIFT );
1749 *kpage = phys_to_abs(pgaddr +
1750 (pginfo->next_4k *
1751 EHCA_PAGESIZE));
1752 if ( !(*kpage) ) {
1753 ehca_gen_err("pgaddr=%lx "
1754 "chunk->page_list[i]=%lx "
1755 "i=%x next_4k=%lx mr=%p",
1756 pgaddr,
1757 (u64)sg_dma_address(
1758 &chunk->
1759 page_list[i]),
1760 i, pginfo->next_4k, e_mr);
1761 ret = -EFAULT;
1762 goto ehca_set_pagebuf_exit0;
1763 }
1764 (pginfo->page_4k_cnt)++;
1765 (pginfo->next_4k)++;
1766 kpage++;
1767 if (pginfo->next_4k %
1768 (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
1769 (pginfo->page_cnt)++;
1770 (pginfo->next_nmap)++;
1771 pginfo->next_4k = 0;
1772 i++;
1773 }
1774 j++;
1775 if (j >= number) break;
1776 }
1777 if ((pginfo->next_nmap >= chunk->nmap) &&
1778 (j >= number)) {
1779 pginfo->next_nmap = 0;
1780 prev_chunk = chunk;
1781 break;
1782 } else if (pginfo->next_nmap >= chunk->nmap) {
1783 pginfo->next_nmap = 0;
1784 prev_chunk = chunk;
1785 } else if (j >= number)
1786 break;
1787 else
1788 prev_chunk = chunk;
1789 }
1790 pginfo->next_chunk =
1791 list_prepare_entry(prev_chunk,
1792 (&(pginfo->region->chunk_list)),
1793 list);
1794 } else if (pginfo->type == EHCA_MR_PGI_FMR) {
1795 /* loop over desired page_list entries */
1796 fmrlist = pginfo->page_list + pginfo->next_listelem;
1797 for (i = 0; i < number; i++) {
1798 *kpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
1799 pginfo->next_4k * EHCA_PAGESIZE);
1800 if ( !(*kpage) ) {
1801 ehca_gen_err("*fmrlist=%lx fmrlist=%p "
1802 "next_listelem=%lx next_4k=%lx",
1803 *fmrlist, fmrlist,
1804 pginfo->next_listelem,
1805 pginfo->next_4k);
1806 ret = -EFAULT;
1807 goto ehca_set_pagebuf_exit0;
1808 }
1809 (pginfo->page_4k_cnt)++;
1810 (pginfo->next_4k)++;
1811 kpage++;
1812 if (pginfo->next_4k %
1813 (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
1814 (pginfo->page_cnt)++;
1815 (pginfo->next_listelem)++;
1816 fmrlist++;
1817 pginfo->next_4k = 0;
1818 }
1819 }
1820 } else {
1821 ehca_gen_err("bad pginfo->type=%x", pginfo->type);
1822 ret = -EFAULT;
1823 goto ehca_set_pagebuf_exit0;
1824 }
1825
1826ehca_set_pagebuf_exit0:
1827 if (ret)
1828 ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
1829 "num_4k=%lx next_buf=%lx next_4k=%lx number=%x "
1830 "kpage=%p page_cnt=%lx page_4k_cnt=%lx i=%x "
1831 "next_listelem=%lx region=%p next_chunk=%p "
1832 "next_nmap=%lx", ret, e_mr, pginfo, pginfo->type,
1833 pginfo->num_pages, pginfo->num_4k,
1834 pginfo->next_buf, pginfo->next_4k, number, kpage,
1835 pginfo->page_cnt, pginfo->page_4k_cnt, i,
1836 pginfo->next_listelem, pginfo->region,
1837 pginfo->next_chunk, pginfo->next_nmap);
1838 return ret;
1839} /* end ehca_set_pagebuf() */
1840
1841/*----------------------------------------------------------------------*/
1842
1843/* setup 1 page from page info page buffer */
1844int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
1845 struct ehca_mr_pginfo *pginfo,
1846 u64 *rpage)
1847{
1848 int ret = 0;
1849 struct ib_phys_buf *tmp_pbuf;
1850 u64 *fmrlist;
1851 struct ib_umem_chunk *chunk;
1852 struct ib_umem_chunk *prev_chunk;
1853 u64 pgaddr, num4k, offs4k;
1854
1855 if (pginfo->type == EHCA_MR_PGI_PHYS) {
1856 /* sanity check */
1857 if ((pginfo->page_cnt >= pginfo->num_pages) ||
1858 (pginfo->page_4k_cnt >= pginfo->num_4k)) {
1859 ehca_gen_err("page_cnt >= num_pages, page_cnt=%lx "
1860 "num_pages=%lx page_4k_cnt=%lx num_4k=%lx",
1861 pginfo->page_cnt, pginfo->num_pages,
1862 pginfo->page_4k_cnt, pginfo->num_4k);
1863 ret = -EFAULT;
1864 goto ehca_set_pagebuf_1_exit0;
1865 }
1866 tmp_pbuf = pginfo->phys_buf_array + pginfo->next_buf;
1867 num4k = ((tmp_pbuf->addr % EHCA_PAGESIZE) + tmp_pbuf->size +
1868 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE;
1869 offs4k = (tmp_pbuf->addr & ~PAGE_MASK) / EHCA_PAGESIZE;
1870 *rpage = phys_to_abs((tmp_pbuf->addr & EHCA_PAGEMASK) +
1871 (pginfo->next_4k * EHCA_PAGESIZE));
1872 if ( !(*rpage) && tmp_pbuf->addr ) {
1873 ehca_gen_err("tmp_pbuf->addr=%lx"
1874 " tmp_pbuf->size=%lx next_4k=%lx",
1875 tmp_pbuf->addr, tmp_pbuf->size,
1876 pginfo->next_4k);
1877 ret = -EFAULT;
1878 goto ehca_set_pagebuf_1_exit0;
1879 }
1880 (pginfo->page_4k_cnt)++;
1881 (pginfo->next_4k)++;
1882 if (pginfo->next_4k % (PAGE_SIZE / EHCA_PAGESIZE) == 0)
1883 (pginfo->page_cnt)++;
1884 if (pginfo->next_4k >= offs4k + num4k) {
1885 (pginfo->next_buf)++;
1886 pginfo->next_4k = 0;
1887 }
1888 } else if (pginfo->type == EHCA_MR_PGI_USER) {
1889 chunk = pginfo->next_chunk;
1890 prev_chunk = pginfo->next_chunk;
1891 list_for_each_entry_continue(chunk,
1892 (&(pginfo->region->chunk_list)),
1893 list) {
1894 pgaddr = ( page_to_pfn(chunk->page_list[
1895 pginfo->next_nmap].page)
1896 << PAGE_SHIFT);
1897 *rpage = phys_to_abs(pgaddr +
1898 (pginfo->next_4k * EHCA_PAGESIZE));
1899 if ( !(*rpage) ) {
1900 ehca_gen_err("pgaddr=%lx chunk->page_list[]=%lx"
1901 " next_nmap=%lx next_4k=%lx mr=%p",
1902 pgaddr, (u64)sg_dma_address(
1903 &chunk->page_list[
1904 pginfo->
1905 next_nmap]),
1906 pginfo->next_nmap, pginfo->next_4k,
1907 e_mr);
1908 ret = -EFAULT;
1909 goto ehca_set_pagebuf_1_exit0;
1910 }
1911 (pginfo->page_4k_cnt)++;
1912 (pginfo->next_4k)++;
1913 if (pginfo->next_4k %
1914 (PAGE_SIZE / EHCA_PAGESIZE) == 0) {
1915 (pginfo->page_cnt)++;
1916 (pginfo->next_nmap)++;
1917 pginfo->next_4k = 0;
1918 }
1919 if (pginfo->next_nmap >= chunk->nmap) {
1920 pginfo->next_nmap = 0;
1921 prev_chunk = chunk;
1922 }
1923 break;
1924 }
1925 pginfo->next_chunk =
1926 list_prepare_entry(prev_chunk,
1927 (&(pginfo->region->chunk_list)),
1928 list);
1929 } else if (pginfo->type == EHCA_MR_PGI_FMR) {
1930 fmrlist = pginfo->page_list + pginfo->next_listelem;
1931 *rpage = phys_to_abs((*fmrlist & EHCA_PAGEMASK) +
1932 pginfo->next_4k * EHCA_PAGESIZE);
1933 if ( !(*rpage) ) {
1934 ehca_gen_err("*fmrlist=%lx fmrlist=%p "
1935 "next_listelem=%lx next_4k=%lx",
1936 *fmrlist, fmrlist, pginfo->next_listelem,
1937 pginfo->next_4k);
1938 ret = -EFAULT;
1939 goto ehca_set_pagebuf_1_exit0;
1940 }
1941 (pginfo->page_4k_cnt)++;
1942 (pginfo->next_4k)++;
1943 if (pginfo->next_4k %
1944 (e_mr->fmr_page_size / EHCA_PAGESIZE) == 0) {
1945 (pginfo->page_cnt)++;
1946 (pginfo->next_listelem)++;
1947 pginfo->next_4k = 0;
1948 }
1949 } else {
1950 ehca_gen_err("bad pginfo->type=%x", pginfo->type);
1951 ret = -EFAULT;
1952 goto ehca_set_pagebuf_1_exit0;
1953 }
1954
1955ehca_set_pagebuf_1_exit0:
1956 if (ret)
1957 ehca_gen_err("ret=%x e_mr=%p pginfo=%p type=%x num_pages=%lx "
1958 "num_4k=%lx next_buf=%lx next_4k=%lx rpage=%p "
1959 "page_cnt=%lx page_4k_cnt=%lx next_listelem=%lx "
1960 "region=%p next_chunk=%p next_nmap=%lx", ret, e_mr,
1961 pginfo, pginfo->type, pginfo->num_pages,
1962 pginfo->num_4k, pginfo->next_buf, pginfo->next_4k,
1963 rpage, pginfo->page_cnt, pginfo->page_4k_cnt,
1964 pginfo->next_listelem, pginfo->region,
1965 pginfo->next_chunk, pginfo->next_nmap);
1966 return ret;
1967} /* end ehca_set_pagebuf_1() */
1968
1969/*----------------------------------------------------------------------*/
1970
1971/*
1972 * check MR if it is a max-MR, i.e. uses whole memory
1973 * in case it's a max-MR 1 is returned, else 0
1974 */
1975int ehca_mr_is_maxmr(u64 size,
1976 u64 *iova_start)
1977{
1978 /* a MR is treated as max-MR only if it fits following: */
1979 if ((size == ((u64)high_memory - PAGE_OFFSET)) &&
1980 (iova_start == (void*)KERNELBASE)) {
1981 ehca_gen_dbg("this is a max-MR");
1982 return 1;
1983 } else
1984 return 0;
1985} /* end ehca_mr_is_maxmr() */
1986
1987/*----------------------------------------------------------------------*/
1988
1989/* map access control for MR/MW. This routine is used for MR and MW. */
1990void ehca_mrmw_map_acl(int ib_acl,
1991 u32 *hipz_acl)
1992{
1993 *hipz_acl = 0;
1994 if (ib_acl & IB_ACCESS_REMOTE_READ)
1995 *hipz_acl |= HIPZ_ACCESSCTRL_R_READ;
1996 if (ib_acl & IB_ACCESS_REMOTE_WRITE)
1997 *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE;
1998 if (ib_acl & IB_ACCESS_REMOTE_ATOMIC)
1999 *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC;
2000 if (ib_acl & IB_ACCESS_LOCAL_WRITE)
2001 *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE;
2002 if (ib_acl & IB_ACCESS_MW_BIND)
2003 *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND;
2004} /* end ehca_mrmw_map_acl() */
2005
2006/*----------------------------------------------------------------------*/
2007
2008/* sets page size in hipz access control for MR/MW. */
2009void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl) /*INOUT*/
2010{
2011 return; /* HCA supports only 4k */
2012} /* end ehca_mrmw_set_pgsize_hipz_acl() */
2013
2014/*----------------------------------------------------------------------*/
2015
2016/*
2017 * reverse map access control for MR/MW.
2018 * This routine is used for MR and MW.
2019 */
2020void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
2021 int *ib_acl) /*OUT*/
2022{
2023 *ib_acl = 0;
2024 if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ)
2025 *ib_acl |= IB_ACCESS_REMOTE_READ;
2026 if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE)
2027 *ib_acl |= IB_ACCESS_REMOTE_WRITE;
2028 if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC)
2029 *ib_acl |= IB_ACCESS_REMOTE_ATOMIC;
2030 if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE)
2031 *ib_acl |= IB_ACCESS_LOCAL_WRITE;
2032 if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND)
2033 *ib_acl |= IB_ACCESS_MW_BIND;
2034} /* end ehca_mrmw_reverse_map_acl() */
2035
2036
2037/*----------------------------------------------------------------------*/
2038
2039/*
2040 * map HIPZ rc to IB retcodes for MR/MW allocations
2041 * Used for hipz_mr_reg_alloc and hipz_mw_alloc.
2042 */
2043int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
2044{
2045 switch (hipz_rc) {
2046 case H_SUCCESS: /* successful completion */
2047 return 0;
2048 case H_ADAPTER_PARM: /* invalid adapter handle */
2049 case H_RT_PARM: /* invalid resource type */
2050 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
2051 case H_MLENGTH_PARM: /* invalid memory length */
2052 case H_MEM_ACCESS_PARM: /* invalid access controls */
2053 case H_CONSTRAINED: /* resource constraint */
2054 return -EINVAL;
2055 case H_BUSY: /* long busy */
2056 return -EBUSY;
2057 default:
2058 return -EINVAL;
2059 }
2060} /* end ehca_mrmw_map_hrc_alloc() */
2061
2062/*----------------------------------------------------------------------*/
2063
2064/*
2065 * map HIPZ rc to IB retcodes for MR register rpage
2066 * Used for hipz_h_register_rpage_mr at registering last page
2067 */
2068int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc)
2069{
2070 switch (hipz_rc) {
2071 case H_SUCCESS: /* registration complete */
2072 return 0;
2073 case H_PAGE_REGISTERED: /* page registered */
2074 case H_ADAPTER_PARM: /* invalid adapter handle */
2075 case H_RH_PARM: /* invalid resource handle */
2076/* case H_QT_PARM: invalid queue type */
2077 case H_PARAMETER: /*
2078 * invalid logical address,
2079 * or count zero or greater 512
2080 */
2081 case H_TABLE_FULL: /* page table full */
2082 case H_HARDWARE: /* HCA not operational */
2083 return -EINVAL;
2084 case H_BUSY: /* long busy */
2085 return -EBUSY;
2086 default:
2087 return -EINVAL;
2088 }
2089} /* end ehca_mrmw_map_hrc_rrpg_last() */
2090
2091/*----------------------------------------------------------------------*/
2092
2093/*
2094 * map HIPZ rc to IB retcodes for MR register rpage
2095 * Used for hipz_h_register_rpage_mr at registering one page, but not last page
2096 */
2097int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc)
2098{
2099 switch (hipz_rc) {
2100 case H_PAGE_REGISTERED: /* page registered */
2101 return 0;
2102 case H_SUCCESS: /* registration complete */
2103 case H_ADAPTER_PARM: /* invalid adapter handle */
2104 case H_RH_PARM: /* invalid resource handle */
2105/* case H_QT_PARM: invalid queue type */
2106 case H_PARAMETER: /*
2107 * invalid logical address,
2108 * or count zero or greater 512
2109 */
2110 case H_TABLE_FULL: /* page table full */
2111 case H_HARDWARE: /* HCA not operational */
2112 return -EINVAL;
2113 case H_BUSY: /* long busy */
2114 return -EBUSY;
2115 default:
2116 return -EINVAL;
2117 }
2118} /* end ehca_mrmw_map_hrc_rrpg_notlast() */
2119
2120/*----------------------------------------------------------------------*/
2121
2122/* map HIPZ rc to IB retcodes for MR query. Used for hipz_mr_query. */
2123int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc)
2124{
2125 switch (hipz_rc) {
2126 case H_SUCCESS: /* successful completion */
2127 return 0;
2128 case H_ADAPTER_PARM: /* invalid adapter handle */
2129 case H_RH_PARM: /* invalid resource handle */
2130 return -EINVAL;
2131 case H_BUSY: /* long busy */
2132 return -EBUSY;
2133 default:
2134 return -EINVAL;
2135 }
2136} /* end ehca_mrmw_map_hrc_query_mr() */
2137
2138/*----------------------------------------------------------------------*/
2139/*----------------------------------------------------------------------*/
2140
2141/*
2142 * map HIPZ rc to IB retcodes for freeing MR resource
2143 * Used for hipz_h_free_resource_mr
2144 */
2145int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc)
2146{
2147 switch (hipz_rc) {
2148 case H_SUCCESS: /* resource freed */
2149 return 0;
2150 case H_ADAPTER_PARM: /* invalid adapter handle */
2151 case H_RH_PARM: /* invalid resource handle */
2152 case H_R_STATE: /* invalid resource state */
2153 case H_HARDWARE: /* HCA not operational */
2154 return -EINVAL;
2155 case H_RESOURCE: /* Resource in use */
2156 case H_BUSY: /* long busy */
2157 return -EBUSY;
2158 default:
2159 return -EINVAL;
2160 }
2161} /* end ehca_mrmw_map_hrc_free_mr() */
2162
2163/*----------------------------------------------------------------------*/
2164
2165/*
2166 * map HIPZ rc to IB retcodes for freeing MW resource
2167 * Used for hipz_h_free_resource_mw
2168 */
2169int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc)
2170{
2171 switch (hipz_rc) {
2172 case H_SUCCESS: /* resource freed */
2173 return 0;
2174 case H_ADAPTER_PARM: /* invalid adapter handle */
2175 case H_RH_PARM: /* invalid resource handle */
2176 case H_R_STATE: /* invalid resource state */
2177 case H_HARDWARE: /* HCA not operational */
2178 return -EINVAL;
2179 case H_RESOURCE: /* Resource in use */
2180 case H_BUSY: /* long busy */
2181 return -EBUSY;
2182 default:
2183 return -EINVAL;
2184 }
2185} /* end ehca_mrmw_map_hrc_free_mw() */
2186
2187/*----------------------------------------------------------------------*/
2188
2189/*
2190 * map HIPZ rc to IB retcodes for SMR registrations
2191 * Used for hipz_h_register_smr.
2192 */
2193int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc)
2194{
2195 switch (hipz_rc) {
2196 case H_SUCCESS: /* successful completion */
2197 return 0;
2198 case H_ADAPTER_PARM: /* invalid adapter handle */
2199 case H_RH_PARM: /* invalid resource handle */
2200 case H_MEM_PARM: /* invalid MR virtual address */
2201 case H_MEM_ACCESS_PARM: /* invalid access controls */
2202 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
2203 return -EINVAL;
2204 case H_BUSY: /* long busy */
2205 return -EBUSY;
2206 default:
2207 return -EINVAL;
2208 }
2209} /* end ehca_mrmw_map_hrc_reg_smr() */
2210
2211/*----------------------------------------------------------------------*/
2212
2213/*
2214 * MR destructor and constructor
2215 * used in Reregister MR verb, sets all fields in ehca_mr_t to 0,
2216 * except struct ib_mr and spinlock
2217 */
2218void ehca_mr_deletenew(struct ehca_mr *mr)
2219{
2220 mr->flags = 0;
2221 mr->num_pages = 0;
2222 mr->num_4k = 0;
2223 mr->acl = 0;
2224 mr->start = NULL;
2225 mr->fmr_page_size = 0;
2226 mr->fmr_max_pages = 0;
2227 mr->fmr_max_maps = 0;
2228 mr->fmr_map_cnt = 0;
2229 memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle));
2230 memset(&mr->galpas, 0, sizeof(mr->galpas));
2231 mr->nr_of_pages = 0;
2232 mr->pagearray = NULL;
2233} /* end ehca_mr_deletenew() */
2234
2235int ehca_init_mrmw_cache(void)
2236{
2237 mr_cache = kmem_cache_create("ehca_cache_mr",
2238 sizeof(struct ehca_mr), 0,
2239 SLAB_HWCACHE_ALIGN,
2240 NULL, NULL);
2241 if (!mr_cache)
2242 return -ENOMEM;
2243 mw_cache = kmem_cache_create("ehca_cache_mw",
2244 sizeof(struct ehca_mw), 0,
2245 SLAB_HWCACHE_ALIGN,
2246 NULL, NULL);
2247 if (!mw_cache) {
2248 kmem_cache_destroy(mr_cache);
2249 mr_cache = NULL;
2250 return -ENOMEM;
2251 }
2252 return 0;
2253}
2254
2255void ehca_cleanup_mrmw_cache(void)
2256{
2257 if (mr_cache)
2258 kmem_cache_destroy(mr_cache);
2259 if (mw_cache)
2260 kmem_cache_destroy(mw_cache);
2261}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h
new file mode 100644
index 000000000000..d936e40a5748
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h
@@ -0,0 +1,140 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * MR/MW declarations and inline functions
5 *
6 * Authors: Dietmar Decker <ddecker@de.ibm.com>
7 * Christoph Raisch <raisch@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#ifndef _EHCA_MRMW_H_
43#define _EHCA_MRMW_H_
44
45int ehca_reg_mr(struct ehca_shca *shca,
46 struct ehca_mr *e_mr,
47 u64 *iova_start,
48 u64 size,
49 int acl,
50 struct ehca_pd *e_pd,
51 struct ehca_mr_pginfo *pginfo,
52 u32 *lkey,
53 u32 *rkey);
54
55int ehca_reg_mr_rpages(struct ehca_shca *shca,
56 struct ehca_mr *e_mr,
57 struct ehca_mr_pginfo *pginfo);
58
59int ehca_rereg_mr(struct ehca_shca *shca,
60 struct ehca_mr *e_mr,
61 u64 *iova_start,
62 u64 size,
63 int mr_access_flags,
64 struct ehca_pd *e_pd,
65 struct ehca_mr_pginfo *pginfo,
66 u32 *lkey,
67 u32 *rkey);
68
69int ehca_unmap_one_fmr(struct ehca_shca *shca,
70 struct ehca_mr *e_fmr);
71
72int ehca_reg_smr(struct ehca_shca *shca,
73 struct ehca_mr *e_origmr,
74 struct ehca_mr *e_newmr,
75 u64 *iova_start,
76 int acl,
77 struct ehca_pd *e_pd,
78 u32 *lkey,
79 u32 *rkey);
80
81int ehca_reg_internal_maxmr(struct ehca_shca *shca,
82 struct ehca_pd *e_pd,
83 struct ehca_mr **maxmr);
84
85int ehca_reg_maxmr(struct ehca_shca *shca,
86 struct ehca_mr *e_newmr,
87 u64 *iova_start,
88 int acl,
89 struct ehca_pd *e_pd,
90 u32 *lkey,
91 u32 *rkey);
92
93int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
94
95int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
96 int num_phys_buf,
97 u64 *iova_start,
98 u64 *size);
99
100int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
101 u64 *page_list,
102 int list_len);
103
104int ehca_set_pagebuf(struct ehca_mr *e_mr,
105 struct ehca_mr_pginfo *pginfo,
106 u32 number,
107 u64 *kpage);
108
109int ehca_set_pagebuf_1(struct ehca_mr *e_mr,
110 struct ehca_mr_pginfo *pginfo,
111 u64 *rpage);
112
113int ehca_mr_is_maxmr(u64 size,
114 u64 *iova_start);
115
116void ehca_mrmw_map_acl(int ib_acl,
117 u32 *hipz_acl);
118
119void ehca_mrmw_set_pgsize_hipz_acl(u32 *hipz_acl);
120
121void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl,
122 int *ib_acl);
123
124int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc);
125
126int ehca_mrmw_map_hrc_rrpg_last(const u64 hipz_rc);
127
128int ehca_mrmw_map_hrc_rrpg_notlast(const u64 hipz_rc);
129
130int ehca_mrmw_map_hrc_query_mr(const u64 hipz_rc);
131
132int ehca_mrmw_map_hrc_free_mr(const u64 hipz_rc);
133
134int ehca_mrmw_map_hrc_free_mw(const u64 hipz_rc);
135
136int ehca_mrmw_map_hrc_reg_smr(const u64 hipz_rc);
137
138void ehca_mr_deletenew(struct ehca_mr *mr);
139
140#endif /*_EHCA_MRMW_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c
new file mode 100644
index 000000000000..2c3cdc6f7b39
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_pd.c
@@ -0,0 +1,114 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * PD functions
5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 *
8 * Copyright (c) 2005 IBM Corporation
9 *
10 * All rights reserved.
11 *
12 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
13 * BSD.
14 *
15 * OpenIB BSD License
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are met:
19 *
20 * Redistributions of source code must retain the above copyright notice, this
21 * list of conditions and the following disclaimer.
22 *
23 * Redistributions in binary form must reproduce the above copyright notice,
24 * this list of conditions and the following disclaimer in the documentation
25 * and/or other materials
26 * provided with the distribution.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41#include <asm/current.h>
42
43#include "ehca_tools.h"
44#include "ehca_iverbs.h"
45
46static struct kmem_cache *pd_cache;
47
48struct ib_pd *ehca_alloc_pd(struct ib_device *device,
49 struct ib_ucontext *context, struct ib_udata *udata)
50{
51 struct ehca_pd *pd;
52
53 pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL);
54 if (!pd) {
55 ehca_err(device, "device=%p context=%p out of memory",
56 device, context);
57 return ERR_PTR(-ENOMEM);
58 }
59
60 memset(pd, 0, sizeof(struct ehca_pd));
61 pd->ownpid = current->tgid;
62
63 /*
64 * Kernel PD: when device = -1, 0
65 * User PD: when context != -1
66 */
67 if (!context) {
68 /*
69 * Kernel PDs after init reuses always
70 * the one created in ehca_shca_reopen()
71 */
72 struct ehca_shca *shca = container_of(device, struct ehca_shca,
73 ib_device);
74 pd->fw_pd.value = shca->pd->fw_pd.value;
75 } else
76 pd->fw_pd.value = (u64)pd;
77
78 return &pd->ib_pd;
79}
80
81int ehca_dealloc_pd(struct ib_pd *pd)
82{
83 u32 cur_pid = current->tgid;
84 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
85
86 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
87 my_pd->ownpid != cur_pid) {
88 ehca_err(pd->device, "Invalid caller pid=%x ownpid=%x",
89 cur_pid, my_pd->ownpid);
90 return -EINVAL;
91 }
92
93 kmem_cache_free(pd_cache,
94 container_of(pd, struct ehca_pd, ib_pd));
95
96 return 0;
97}
98
99int ehca_init_pd_cache(void)
100{
101 pd_cache = kmem_cache_create("ehca_cache_pd",
102 sizeof(struct ehca_pd), 0,
103 SLAB_HWCACHE_ALIGN,
104 NULL, NULL);
105 if (!pd_cache)
106 return -ENOMEM;
107 return 0;
108}
109
110void ehca_cleanup_pd_cache(void)
111{
112 if (pd_cache)
113 kmem_cache_destroy(pd_cache);
114}
diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h
new file mode 100644
index 000000000000..8707d297ce4c
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qes.h
@@ -0,0 +1,259 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Hardware request structures
5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com>
7 * Reinhard Ernst <rernst@de.ibm.com>
8 * Christoph Raisch <raisch@de.ibm.com>
9 *
10 * Copyright (c) 2005 IBM Corporation
11 *
12 * All rights reserved.
13 *
14 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
15 * BSD.
16 *
17 * OpenIB BSD License
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are met:
21 *
22 * Redistributions of source code must retain the above copyright notice, this
23 * list of conditions and the following disclaimer.
24 *
25 * Redistributions in binary form must reproduce the above copyright notice,
26 * this list of conditions and the following disclaimer in the documentation
27 * and/or other materials
28 * provided with the distribution.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43
44#ifndef _EHCA_QES_H_
45#define _EHCA_QES_H_
46
47#include "ehca_tools.h"
48
49/* virtual scatter gather entry to specify remote adresses with length */
50struct ehca_vsgentry {
51 u64 vaddr;
52 u32 lkey;
53 u32 length;
54};
55
56#define GRH_FLAG_MASK EHCA_BMASK_IBM(7,7)
57#define GRH_IPVERSION_MASK EHCA_BMASK_IBM(0,3)
58#define GRH_TCLASS_MASK EHCA_BMASK_IBM(4,12)
59#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13,31)
60#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32,47)
61#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48,55)
62#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56,63)
63
64/*
65 * Unreliable Datagram Address Vector Format
66 * see IBTA Vol1 chapter 8.3 Global Routing Header
67 */
68struct ehca_ud_av {
69 u8 sl;
70 u8 lnh;
71 u16 dlid;
72 u8 reserved1;
73 u8 reserved2;
74 u8 reserved3;
75 u8 slid_path_bits;
76 u8 reserved4;
77 u8 ipd;
78 u8 reserved5;
79 u8 pmtu;
80 u32 reserved6;
81 u64 reserved7;
82 union {
83 struct {
84 u64 word_0; /* always set to 6 */
85 /*should be 0x1B for IB transport */
86 u64 word_1;
87 u64 word_2;
88 u64 word_3;
89 u64 word_4;
90 } grh;
91 struct {
92 u32 wd_0;
93 u32 wd_1;
94 /* DWord_1 --> SGID */
95
96 u32 sgid_wd3;
97 u32 sgid_wd2;
98
99 u32 sgid_wd1;
100 u32 sgid_wd0;
101 /* DWord_3 --> DGID */
102
103 u32 dgid_wd3;
104 u32 dgid_wd2;
105
106 u32 dgid_wd1;
107 u32 dgid_wd0;
108 } grh_l;
109 };
110};
111
112/* maximum number of sg entries allowed in a WQE */
113#define MAX_WQE_SG_ENTRIES 252
114
115#define WQE_OPTYPE_SEND 0x80
116#define WQE_OPTYPE_RDMAREAD 0x40
117#define WQE_OPTYPE_RDMAWRITE 0x20
118#define WQE_OPTYPE_CMPSWAP 0x10
119#define WQE_OPTYPE_FETCHADD 0x08
120#define WQE_OPTYPE_BIND 0x04
121
122#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80
123#define WQE_WRFLAG_FENCE 0x40
124#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20
125#define WQE_WRFLAG_SOLIC_EVENT 0x10
126
127#define WQEF_CACHE_HINT 0x80
128#define WQEF_CACHE_HINT_RD_WR 0x40
129#define WQEF_TIMED_WQE 0x20
130#define WQEF_PURGE 0x08
131#define WQEF_HIGH_NIBBLE 0xF0
132
133#define MW_BIND_ACCESSCTRL_R_WRITE 0x40
134#define MW_BIND_ACCESSCTRL_R_READ 0x20
135#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10
136
137struct ehca_wqe {
138 u64 work_request_id;
139 u8 optype;
140 u8 wr_flag;
141 u16 pkeyi;
142 u8 wqef;
143 u8 nr_of_data_seg;
144 u16 wqe_provided_slid;
145 u32 destination_qp_number;
146 u32 resync_psn_sqp;
147 u32 local_ee_context_qkey;
148 u32 immediate_data;
149 union {
150 struct {
151 u64 remote_virtual_adress;
152 u32 rkey;
153 u32 reserved;
154 u64 atomic_1st_op_dma_len;
155 u64 atomic_2nd_op;
156 struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
157
158 } nud;
159 struct {
160 u64 ehca_ud_av_ptr;
161 u64 reserved1;
162 u64 reserved2;
163 u64 reserved3;
164 struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
165 } ud_avp;
166 struct {
167 struct ehca_ud_av ud_av;
168 struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES -
169 2];
170 } ud_av;
171 struct {
172 u64 reserved0;
173 u64 reserved1;
174 u64 reserved2;
175 u64 reserved3;
176 struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES];
177 } all_rcv;
178
179 struct {
180 u64 reserved;
181 u32 rkey;
182 u32 old_rkey;
183 u64 reserved1;
184 u64 reserved2;
185 u64 virtual_address;
186 u32 reserved3;
187 u32 length;
188 u32 reserved4;
189 u16 reserved5;
190 u8 reserved6;
191 u8 lr_ctl;
192 u32 lkey;
193 u32 reserved7;
194 u64 reserved8;
195 u64 reserved9;
196 u64 reserved10;
197 u64 reserved11;
198 } bind;
199 struct {
200 u64 reserved12;
201 u64 reserved13;
202 u32 size;
203 u32 start;
204 } inline_data;
205 } u;
206
207};
208
209#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0,0)
210#define WC_IMM_DATA EHCA_BMASK_IBM(1,1)
211#define WC_GRH_PRESENT EHCA_BMASK_IBM(2,2)
212#define WC_SE_BIT EHCA_BMASK_IBM(3,3)
213#define WC_STATUS_ERROR_BIT 0x80000000
214#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800
215#define WC_STATUS_PURGE_BIT 0x10
216
217struct ehca_cqe {
218 u64 work_request_id;
219 u8 optype;
220 u8 w_completion_flags;
221 u16 reserved1;
222 u32 nr_bytes_transferred;
223 u32 immediate_data;
224 u32 local_qp_number;
225 u8 freed_resource_count;
226 u8 service_level;
227 u16 wqe_count;
228 u32 qp_token;
229 u32 qkey_ee_token;
230 u32 remote_qp_number;
231 u16 dlid;
232 u16 rlid;
233 u16 reserved2;
234 u16 pkey_index;
235 u32 cqe_timestamp;
236 u32 wqe_timestamp;
237 u8 wqe_timestamp_valid;
238 u8 reserved3;
239 u8 reserved4;
240 u8 cqe_flags;
241 u32 status;
242};
243
244struct ehca_eqe {
245 u64 entry;
246};
247
248struct ehca_mrte {
249 u64 starting_va;
250 u64 length; /* length of memory region in bytes*/
251 u32 pd;
252 u8 key_instance;
253 u8 pagesize;
254 u8 mr_control;
255 u8 local_remote_access_ctrl;
256 u8 reserved[0x20 - 0x18];
257 u64 at_pointer[4];
258};
259#endif /*_EHCA_QES_H_*/
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
new file mode 100644
index 000000000000..4394123cdbd7
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -0,0 +1,1507 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * QP functions
5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Reinhard Ernst <rernst@de.ibm.com>
9 * Heiko J Schick <schickhj@de.ibm.com>
10 *
11 * Copyright (c) 2005 IBM Corporation
12 *
13 * All rights reserved.
14 *
15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
16 * BSD.
17 *
18 * OpenIB BSD License
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
22 *
23 * Redistributions of source code must retain the above copyright notice, this
24 * list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright notice,
27 * this list of conditions and the following disclaimer in the documentation
28 * and/or other materials
29 * provided with the distribution.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGE.
42 */
43
44
45#include <asm/current.h>
46
47#include "ehca_classes.h"
48#include "ehca_tools.h"
49#include "ehca_qes.h"
50#include "ehca_iverbs.h"
51#include "hcp_if.h"
52#include "hipz_fns.h"
53
54static struct kmem_cache *qp_cache;
55
56/*
57 * attributes not supported by query qp
58 */
59#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
60 IB_QP_MAX_QP_RD_ATOMIC | \
61 IB_QP_ACCESS_FLAGS | \
62 IB_QP_EN_SQD_ASYNC_NOTIFY)
63
64/*
65 * ehca (internal) qp state values
66 */
67enum ehca_qp_state {
68 EHCA_QPS_RESET = 1,
69 EHCA_QPS_INIT = 2,
70 EHCA_QPS_RTR = 3,
71 EHCA_QPS_RTS = 5,
72 EHCA_QPS_SQD = 6,
73 EHCA_QPS_SQE = 8,
74 EHCA_QPS_ERR = 128
75};
76
77/*
78 * qp state transitions as defined by IB Arch Rel 1.1 page 431
79 */
80enum ib_qp_statetrans {
81 IB_QPST_ANY2RESET,
82 IB_QPST_ANY2ERR,
83 IB_QPST_RESET2INIT,
84 IB_QPST_INIT2RTR,
85 IB_QPST_INIT2INIT,
86 IB_QPST_RTR2RTS,
87 IB_QPST_RTS2SQD,
88 IB_QPST_RTS2RTS,
89 IB_QPST_SQD2RTS,
90 IB_QPST_SQE2RTS,
91 IB_QPST_SQD2SQD,
92 IB_QPST_MAX /* nr of transitions, this must be last!!! */
93};
94
95/*
96 * ib2ehca_qp_state maps IB to ehca qp_state
97 * returns ehca qp state corresponding to given ib qp state
98 */
99static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state)
100{
101 switch (ib_qp_state) {
102 case IB_QPS_RESET:
103 return EHCA_QPS_RESET;
104 case IB_QPS_INIT:
105 return EHCA_QPS_INIT;
106 case IB_QPS_RTR:
107 return EHCA_QPS_RTR;
108 case IB_QPS_RTS:
109 return EHCA_QPS_RTS;
110 case IB_QPS_SQD:
111 return EHCA_QPS_SQD;
112 case IB_QPS_SQE:
113 return EHCA_QPS_SQE;
114 case IB_QPS_ERR:
115 return EHCA_QPS_ERR;
116 default:
117 ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state);
118 return -EINVAL;
119 }
120}
121
122/*
123 * ehca2ib_qp_state maps ehca to IB qp_state
124 * returns ib qp state corresponding to given ehca qp state
125 */
126static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state
127 ehca_qp_state)
128{
129 switch (ehca_qp_state) {
130 case EHCA_QPS_RESET:
131 return IB_QPS_RESET;
132 case EHCA_QPS_INIT:
133 return IB_QPS_INIT;
134 case EHCA_QPS_RTR:
135 return IB_QPS_RTR;
136 case EHCA_QPS_RTS:
137 return IB_QPS_RTS;
138 case EHCA_QPS_SQD:
139 return IB_QPS_SQD;
140 case EHCA_QPS_SQE:
141 return IB_QPS_SQE;
142 case EHCA_QPS_ERR:
143 return IB_QPS_ERR;
144 default:
145 ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state);
146 return -EINVAL;
147 }
148}
149
150/*
151 * ehca_qp_type used as index for req_attr and opt_attr of
152 * struct ehca_modqp_statetrans
153 */
154enum ehca_qp_type {
155 QPT_RC = 0,
156 QPT_UC = 1,
157 QPT_UD = 2,
158 QPT_SQP = 3,
159 QPT_MAX
160};
161
162/*
163 * ib2ehcaqptype maps Ib to ehca qp_type
164 * returns ehca qp type corresponding to ib qp type
165 */
166static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype)
167{
168 switch (ibqptype) {
169 case IB_QPT_SMI:
170 case IB_QPT_GSI:
171 return QPT_SQP;
172 case IB_QPT_RC:
173 return QPT_RC;
174 case IB_QPT_UC:
175 return QPT_UC;
176 case IB_QPT_UD:
177 return QPT_UD;
178 default:
179 ehca_gen_err("Invalid ibqptype=%x", ibqptype);
180 return -EINVAL;
181 }
182}
183
184static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
185 int ib_tostate)
186{
187 int index = -EINVAL;
188 switch (ib_tostate) {
189 case IB_QPS_RESET:
190 index = IB_QPST_ANY2RESET;
191 break;
192 case IB_QPS_INIT:
193 switch (ib_fromstate) {
194 case IB_QPS_RESET:
195 index = IB_QPST_RESET2INIT;
196 break;
197 case IB_QPS_INIT:
198 index = IB_QPST_INIT2INIT;
199 break;
200 }
201 break;
202 case IB_QPS_RTR:
203 if (ib_fromstate == IB_QPS_INIT)
204 index = IB_QPST_INIT2RTR;
205 break;
206 case IB_QPS_RTS:
207 switch (ib_fromstate) {
208 case IB_QPS_RTR:
209 index = IB_QPST_RTR2RTS;
210 break;
211 case IB_QPS_RTS:
212 index = IB_QPST_RTS2RTS;
213 break;
214 case IB_QPS_SQD:
215 index = IB_QPST_SQD2RTS;
216 break;
217 case IB_QPS_SQE:
218 index = IB_QPST_SQE2RTS;
219 break;
220 }
221 break;
222 case IB_QPS_SQD:
223 if (ib_fromstate == IB_QPS_RTS)
224 index = IB_QPST_RTS2SQD;
225 break;
226 case IB_QPS_SQE:
227 break;
228 case IB_QPS_ERR:
229 index = IB_QPST_ANY2ERR;
230 break;
231 default:
232 break;
233 }
234 return index;
235}
236
237enum ehca_service_type {
238 ST_RC = 0,
239 ST_UC = 1,
240 ST_RD = 2,
241 ST_UD = 3
242};
243
244/*
245 * ibqptype2servicetype returns hcp service type corresponding to given
246 * ib qp type used by create_qp()
247 */
248static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
249{
250 switch (ibqptype) {
251 case IB_QPT_SMI:
252 case IB_QPT_GSI:
253 return ST_UD;
254 case IB_QPT_RC:
255 return ST_RC;
256 case IB_QPT_UC:
257 return ST_UC;
258 case IB_QPT_UD:
259 return ST_UD;
260 case IB_QPT_RAW_IPV6:
261 return -EINVAL;
262 case IB_QPT_RAW_ETY:
263 return -EINVAL;
264 default:
265 ehca_gen_err("Invalid ibqptype=%x", ibqptype);
266 return -EINVAL;
267 }
268}
269
270/*
271 * init_qp_queues initializes/constructs r/squeue and registers queue pages.
272 */
273static inline int init_qp_queues(struct ehca_shca *shca,
274 struct ehca_qp *my_qp,
275 int nr_sq_pages,
276 int nr_rq_pages,
277 int swqe_size,
278 int rwqe_size,
279 int nr_send_sges, int nr_receive_sges)
280{
281 int ret, cnt, ipz_rc;
282 void *vpage;
283 u64 rpage, h_ret;
284 struct ib_device *ib_dev = &shca->ib_device;
285 struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
286
287 ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue,
288 nr_sq_pages,
289 EHCA_PAGESIZE, swqe_size, nr_send_sges);
290 if (!ipz_rc) {
291 ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x",
292 ipz_rc);
293 return -EBUSY;
294 }
295
296 ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue,
297 nr_rq_pages,
298 EHCA_PAGESIZE, rwqe_size, nr_receive_sges);
299 if (!ipz_rc) {
300 ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
301 ipz_rc);
302 ret = -EBUSY;
303 goto init_qp_queues0;
304 }
305 /* register SQ pages */
306 for (cnt = 0; cnt < nr_sq_pages; cnt++) {
307 vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
308 if (!vpage) {
309 ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() "
310 "failed p_vpage= %p", vpage);
311 ret = -EINVAL;
312 goto init_qp_queues1;
313 }
314 rpage = virt_to_abs(vpage);
315
316 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
317 my_qp->ipz_qp_handle,
318 &my_qp->pf, 0, 0,
319 rpage, 1,
320 my_qp->galpas.kernel);
321 if (h_ret < H_SUCCESS) {
322 ehca_err(ib_dev, "SQ hipz_qp_register_rpage()"
323 " failed rc=%lx", h_ret);
324 ret = ehca2ib_return_code(h_ret);
325 goto init_qp_queues1;
326 }
327 }
328
329 ipz_qeit_reset(&my_qp->ipz_squeue);
330
331 /* register RQ pages */
332 for (cnt = 0; cnt < nr_rq_pages; cnt++) {
333 vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
334 if (!vpage) {
335 ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
336 "failed p_vpage = %p", vpage);
337 ret = -EINVAL;
338 goto init_qp_queues1;
339 }
340
341 rpage = virt_to_abs(vpage);
342
343 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
344 my_qp->ipz_qp_handle,
345 &my_qp->pf, 0, 1,
346 rpage, 1,my_qp->galpas.kernel);
347 if (h_ret < H_SUCCESS) {
348 ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
349 "rc=%lx", h_ret);
350 ret = ehca2ib_return_code(h_ret);
351 goto init_qp_queues1;
352 }
353 if (cnt == (nr_rq_pages - 1)) { /* last page! */
354 if (h_ret != H_SUCCESS) {
355 ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
356 "h_ret= %lx ", h_ret);
357 ret = ehca2ib_return_code(h_ret);
358 goto init_qp_queues1;
359 }
360 vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
361 if (vpage) {
362 ehca_err(ib_dev, "ipz_qpageit_get_inc() "
363 "should not succeed vpage=%p", vpage);
364 ret = -EINVAL;
365 goto init_qp_queues1;
366 }
367 } else {
368 if (h_ret != H_PAGE_REGISTERED) {
369 ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
370 "h_ret= %lx ", h_ret);
371 ret = ehca2ib_return_code(h_ret);
372 goto init_qp_queues1;
373 }
374 }
375 }
376
377 ipz_qeit_reset(&my_qp->ipz_rqueue);
378
379 return 0;
380
381init_qp_queues1:
382 ipz_queue_dtor(&my_qp->ipz_rqueue);
383init_qp_queues0:
384 ipz_queue_dtor(&my_qp->ipz_squeue);
385 return ret;
386}
387
388struct ib_qp *ehca_create_qp(struct ib_pd *pd,
389 struct ib_qp_init_attr *init_attr,
390 struct ib_udata *udata)
391{
392 static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
393 static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
394 struct ehca_qp *my_qp;
395 struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
396 struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
397 ib_device);
398 struct ib_ucontext *context = NULL;
399 u64 h_ret;
400 int max_send_sge, max_recv_sge, ret;
401
402 /* h_call's out parameters */
403 struct ehca_alloc_qp_parms parms;
404 u32 swqe_size = 0, rwqe_size = 0;
405 u8 daqp_completion, isdaqp;
406 unsigned long flags;
407
408 if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
409 init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
410 ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
411 init_attr->sq_sig_type);
412 return ERR_PTR(-EINVAL);
413 }
414
415 /* save daqp completion bits */
416 daqp_completion = init_attr->qp_type & 0x60;
417 /* save daqp bit */
418 isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0;
419 init_attr->qp_type = init_attr->qp_type & 0x1F;
420
421 if (init_attr->qp_type != IB_QPT_UD &&
422 init_attr->qp_type != IB_QPT_SMI &&
423 init_attr->qp_type != IB_QPT_GSI &&
424 init_attr->qp_type != IB_QPT_UC &&
425 init_attr->qp_type != IB_QPT_RC) {
426 ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type);
427 return ERR_PTR(-EINVAL);
428 }
429 if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD)
430 && isdaqp) {
431 ehca_err(pd->device, "unsupported LL QP Type=%x",
432 init_attr->qp_type);
433 return ERR_PTR(-EINVAL);
434 } else if (init_attr->qp_type == IB_QPT_RC && isdaqp &&
435 (init_attr->cap.max_send_wr > 255 ||
436 init_attr->cap.max_recv_wr > 255 )) {
437 ehca_err(pd->device, "Invalid Number of max_sq_wr =%x "
438 "or max_rq_wr=%x for QP Type=%x",
439 init_attr->cap.max_send_wr,
440 init_attr->cap.max_recv_wr,init_attr->qp_type);
441 return ERR_PTR(-EINVAL);
442 } else if (init_attr->qp_type == IB_QPT_UD && isdaqp &&
443 init_attr->cap.max_send_wr > 255) {
444 ehca_err(pd->device,
445 "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x",
446 init_attr->cap.max_send_wr, init_attr->qp_type);
447 return ERR_PTR(-EINVAL);
448 }
449
450 if (pd->uobject && udata)
451 context = pd->uobject->context;
452
453 my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL);
454 if (!my_qp) {
455 ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
456 return ERR_PTR(-ENOMEM);
457 }
458
459 memset(my_qp, 0, sizeof(struct ehca_qp));
460 memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
461 spin_lock_init(&my_qp->spinlock_s);
462 spin_lock_init(&my_qp->spinlock_r);
463
464 my_qp->recv_cq =
465 container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
466 my_qp->send_cq =
467 container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
468
469 my_qp->init_attr = *init_attr;
470
471 do {
472 if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
473 ret = -ENOMEM;
474 ehca_err(pd->device, "Can't reserve idr resources.");
475 goto create_qp_exit0;
476 }
477
478 spin_lock_irqsave(&ehca_qp_idr_lock, flags);
479 ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
480 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
481
482 } while (ret == -EAGAIN);
483
484 if (ret) {
485 ret = -ENOMEM;
486 ehca_err(pd->device, "Can't allocate new idr entry.");
487 goto create_qp_exit0;
488 }
489
490 parms.servicetype = ibqptype2servicetype(init_attr->qp_type);
491 if (parms.servicetype < 0) {
492 ret = -EINVAL;
493 ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type);
494 goto create_qp_exit0;
495 }
496
497 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
498 parms.sigtype = HCALL_SIGT_EVERY;
499 else
500 parms.sigtype = HCALL_SIGT_BY_WQE;
501
502 /* UD_AV CIRCUMVENTION */
503 max_send_sge = init_attr->cap.max_send_sge;
504 max_recv_sge = init_attr->cap.max_recv_sge;
505 if (IB_QPT_UD == init_attr->qp_type ||
506 IB_QPT_GSI == init_attr->qp_type ||
507 IB_QPT_SMI == init_attr->qp_type) {
508 max_send_sge += 2;
509 max_recv_sge += 2;
510 }
511
512 parms.ipz_eq_handle = shca->eq.ipz_eq_handle;
513 parms.daqp_ctrl = isdaqp | daqp_completion;
514 parms.pd = my_pd->fw_pd;
515 parms.max_recv_sge = max_recv_sge;
516 parms.max_send_sge = max_send_sge;
517
518 h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms);
519
520 if (h_ret != H_SUCCESS) {
521 ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
522 h_ret);
523 ret = ehca2ib_return_code(h_ret);
524 goto create_qp_exit1;
525 }
526
527 switch (init_attr->qp_type) {
528 case IB_QPT_RC:
529 if (isdaqp == 0) {
530 swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
531 (parms.act_nr_send_sges)]);
532 rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
533 (parms.act_nr_recv_sges)]);
534 } else { /* for daqp we need to use msg size, not wqe size */
535 swqe_size = da_rc_msg_size[max_send_sge];
536 rwqe_size = da_rc_msg_size[max_recv_sge];
537 parms.act_nr_send_sges = 1;
538 parms.act_nr_recv_sges = 1;
539 }
540 break;
541 case IB_QPT_UC:
542 swqe_size = offsetof(struct ehca_wqe,
543 u.nud.sg_list[parms.act_nr_send_sges]);
544 rwqe_size = offsetof(struct ehca_wqe,
545 u.nud.sg_list[parms.act_nr_recv_sges]);
546 break;
547
548 case IB_QPT_UD:
549 case IB_QPT_GSI:
550 case IB_QPT_SMI:
551 /* UD circumvention */
552 parms.act_nr_recv_sges -= 2;
553 parms.act_nr_send_sges -= 2;
554 if (isdaqp) {
555 swqe_size = da_ud_sq_msg_size[max_send_sge];
556 rwqe_size = da_rc_msg_size[max_recv_sge];
557 parms.act_nr_send_sges = 1;
558 parms.act_nr_recv_sges = 1;
559 } else {
560 swqe_size = offsetof(struct ehca_wqe,
561 u.ud_av.sg_list[parms.act_nr_send_sges]);
562 rwqe_size = offsetof(struct ehca_wqe,
563 u.ud_av.sg_list[parms.act_nr_recv_sges]);
564 }
565
566 if (IB_QPT_GSI == init_attr->qp_type ||
567 IB_QPT_SMI == init_attr->qp_type) {
568 parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
569 parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
570 parms.act_nr_send_sges = init_attr->cap.max_send_sge;
571 parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
572 my_qp->real_qp_num =
573 (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
574 }
575
576 break;
577
578 default:
579 break;
580 }
581
582 /* initializes r/squeue and registers queue pages */
583 ret = init_qp_queues(shca, my_qp,
584 parms.nr_sq_pages, parms.nr_rq_pages,
585 swqe_size, rwqe_size,
586 parms.act_nr_send_sges, parms.act_nr_recv_sges);
587 if (ret) {
588 ehca_err(pd->device,
589 "Couldn't initialize r/squeue and pages ret=%x", ret);
590 goto create_qp_exit2;
591 }
592
593 my_qp->ib_qp.pd = &my_pd->ib_pd;
594 my_qp->ib_qp.device = my_pd->ib_pd.device;
595
596 my_qp->ib_qp.recv_cq = init_attr->recv_cq;
597 my_qp->ib_qp.send_cq = init_attr->send_cq;
598
599 my_qp->ib_qp.qp_num = my_qp->real_qp_num;
600 my_qp->ib_qp.qp_type = init_attr->qp_type;
601
602 my_qp->qp_type = init_attr->qp_type;
603 my_qp->ib_qp.srq = init_attr->srq;
604
605 my_qp->ib_qp.qp_context = init_attr->qp_context;
606 my_qp->ib_qp.event_handler = init_attr->event_handler;
607
608 init_attr->cap.max_inline_data = 0; /* not supported yet */
609 init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
610 init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
611 init_attr->cap.max_send_sge = parms.act_nr_send_sges;
612 init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
613
614 /* NOTE: define_apq0() not supported yet */
615 if (init_attr->qp_type == IB_QPT_GSI) {
616 h_ret = ehca_define_sqp(shca, my_qp, init_attr);
617 if (h_ret != H_SUCCESS) {
618 ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
619 h_ret);
620 ret = ehca2ib_return_code(h_ret);
621 goto create_qp_exit3;
622 }
623 }
624 if (init_attr->send_cq) {
625 struct ehca_cq *cq = container_of(init_attr->send_cq,
626 struct ehca_cq, ib_cq);
627 ret = ehca_cq_assign_qp(cq, my_qp);
628 if (ret) {
629 ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
630 ret);
631 goto create_qp_exit3;
632 }
633 my_qp->send_cq = cq;
634 }
635 /* copy queues, galpa data to user space */
636 if (context && udata) {
637 struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
638 struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
639 struct ehca_create_qp_resp resp;
640 struct vm_area_struct * vma;
641 memset(&resp, 0, sizeof(resp));
642
643 resp.qp_num = my_qp->real_qp_num;
644 resp.token = my_qp->token;
645 resp.qp_type = my_qp->qp_type;
646 resp.qkey = my_qp->qkey;
647 resp.real_qp_num = my_qp->real_qp_num;
648 /* rqueue properties */
649 resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size;
650 resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg;
651 resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
652 resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
653 resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
654 ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x22000000,
655 ipz_rqueue->queue_length,
656 (void**)&resp.ipz_rqueue.queue,
657 &vma);
658 if (ret) {
659 ehca_err(pd->device, "Could not mmap rqueue pages");
660 goto create_qp_exit3;
661 }
662 my_qp->uspace_rqueue = resp.ipz_rqueue.queue;
663 /* squeue properties */
664 resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
665 resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
666 resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
667 resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
668 resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
669 ret = ehca_mmap_nopage(((u64)(my_qp->token) << 32) | 0x23000000,
670 ipz_squeue->queue_length,
671 (void**)&resp.ipz_squeue.queue,
672 &vma);
673 if (ret) {
674 ehca_err(pd->device, "Could not mmap squeue pages");
675 goto create_qp_exit4;
676 }
677 my_qp->uspace_squeue = resp.ipz_squeue.queue;
678 /* fw_handle */
679 resp.galpas = my_qp->galpas;
680 ret = ehca_mmap_register(my_qp->galpas.user.fw_handle,
681 (void**)&resp.galpas.kernel.fw_handle,
682 &vma);
683 if (ret) {
684 ehca_err(pd->device, "Could not mmap fw_handle");
685 goto create_qp_exit5;
686 }
687 my_qp->uspace_fwh = (u64)resp.galpas.kernel.fw_handle;
688
689 if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
690 ehca_err(pd->device, "Copy to udata failed");
691 ret = -EINVAL;
692 goto create_qp_exit6;
693 }
694 }
695
696 return &my_qp->ib_qp;
697
698create_qp_exit6:
699 ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
700
701create_qp_exit5:
702 ehca_munmap(my_qp->uspace_squeue, my_qp->ipz_squeue.queue_length);
703
704create_qp_exit4:
705 ehca_munmap(my_qp->uspace_rqueue, my_qp->ipz_rqueue.queue_length);
706
707create_qp_exit3:
708 ipz_queue_dtor(&my_qp->ipz_rqueue);
709 ipz_queue_dtor(&my_qp->ipz_squeue);
710
711create_qp_exit2:
712 hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
713
714create_qp_exit1:
715 spin_lock_irqsave(&ehca_qp_idr_lock, flags);
716 idr_remove(&ehca_qp_idr, my_qp->token);
717 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
718
719create_qp_exit0:
720 kmem_cache_free(qp_cache, my_qp);
721 return ERR_PTR(ret);
722}
723
724/*
725 * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
726 * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
727 * returns total number of bad wqes in bad_wqe_cnt
728 */
729static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
730 int *bad_wqe_cnt)
731{
732 u64 h_ret;
733 struct ipz_queue *squeue;
734 void *bad_send_wqe_p, *bad_send_wqe_v;
735 void *squeue_start_p, *squeue_end_p;
736 void *squeue_start_v, *squeue_end_v;
737 struct ehca_wqe *wqe;
738 int qp_num = my_qp->ib_qp.qp_num;
739
740 /* get send wqe pointer */
741 h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle,
742 my_qp->ipz_qp_handle, &my_qp->pf,
743 &bad_send_wqe_p, NULL, 2);
744 if (h_ret != H_SUCCESS) {
745 ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
746 " ehca_qp=%p qp_num=%x h_ret=%lx",
747 my_qp, qp_num, h_ret);
748 return ehca2ib_return_code(h_ret);
749 }
750 bad_send_wqe_p = (void*)((u64)bad_send_wqe_p & (~(1L<<63)));
751 ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
752 qp_num, bad_send_wqe_p);
753 /* convert wqe pointer to vadr */
754 bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
755 if (ehca_debug_level)
756 ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
757 squeue = &my_qp->ipz_squeue;
758 squeue_start_p = (void*)virt_to_abs(ipz_qeit_calc(squeue, 0L));
759 squeue_end_p = squeue_start_p+squeue->queue_length;
760 squeue_start_v = abs_to_virt((u64)squeue_start_p);
761 squeue_end_v = abs_to_virt((u64)squeue_end_p);
762 ehca_dbg(&shca->ib_device, "qp_num=%x squeue_start_v=%p squeue_end_v=%p",
763 qp_num, squeue_start_v, squeue_end_v);
764
765 /* loop sets wqe's purge bit */
766 wqe = (struct ehca_wqe*)bad_send_wqe_v;
767 *bad_wqe_cnt = 0;
768 while (wqe->optype != 0xff && wqe->wqef != 0xff) {
769 if (ehca_debug_level)
770 ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num);
771 wqe->nr_of_data_seg = 0; /* suppress data access */
772 wqe->wqef = WQEF_PURGE; /* WQE to be purged */
773 wqe = (struct ehca_wqe*)((u8*)wqe+squeue->qe_size);
774 *bad_wqe_cnt = (*bad_wqe_cnt)+1;
775 if ((void*)wqe >= squeue_end_v) {
776 wqe = squeue_start_v;
777 }
778 }
779 /*
780 * bad wqe will be reprocessed and ignored when pol_cq() is called,
781 * i.e. nr of wqes with flush error status is one less
782 */
783 ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x",
784 qp_num, (*bad_wqe_cnt)-1);
785 wqe->wqef = 0;
786
787 return 0;
788}
789
790/*
791 * internal_modify_qp with circumvention to handle aqp0 properly
792 * smi_reset2init indicates if this is an internal reset-to-init-call for
793 * smi. This flag must always be zero if called from ehca_modify_qp()!
794 * This internal func was intorduced to avoid recursion of ehca_modify_qp()!
795 */
796static int internal_modify_qp(struct ib_qp *ibqp,
797 struct ib_qp_attr *attr,
798 int attr_mask, int smi_reset2init)
799{
800 enum ib_qp_state qp_cur_state, qp_new_state;
801 int cnt, qp_attr_idx, ret = 0;
802 enum ib_qp_statetrans statetrans;
803 struct hcp_modify_qp_control_block *mqpcb;
804 struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
805 struct ehca_shca *shca =
806 container_of(ibqp->pd->device, struct ehca_shca, ib_device);
807 u64 update_mask;
808 u64 h_ret;
809 int bad_wqe_cnt = 0;
810 int squeue_locked = 0;
811 unsigned long spl_flags = 0;
812
813 /* do query_qp to obtain current attr values */
814 mqpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL);
815 if (mqpcb == NULL) {
816 ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
817 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
818 return -ENOMEM;
819 }
820
821 h_ret = hipz_h_query_qp(shca->ipz_hca_handle,
822 my_qp->ipz_qp_handle,
823 &my_qp->pf,
824 mqpcb, my_qp->galpas.kernel);
825 if (h_ret != H_SUCCESS) {
826 ehca_err(ibqp->device, "hipz_h_query_qp() failed "
827 "ehca_qp=%p qp_num=%x h_ret=%lx",
828 my_qp, ibqp->qp_num, h_ret);
829 ret = ehca2ib_return_code(h_ret);
830 goto modify_qp_exit1;
831 }
832
833 qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state);
834
835 if (qp_cur_state == -EINVAL) { /* invalid qp state */
836 ret = -EINVAL;
837 ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x "
838 "ehca_qp=%p qp_num=%x",
839 mqpcb->qp_state, my_qp, ibqp->qp_num);
840 goto modify_qp_exit1;
841 }
842 /*
843 * circumvention to set aqp0 initial state to init
844 * as expected by IB spec
845 */
846 if (smi_reset2init == 0 &&
847 ibqp->qp_type == IB_QPT_SMI &&
848 qp_cur_state == IB_QPS_RESET &&
849 (attr_mask & IB_QP_STATE) &&
850 attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */
851 struct ib_qp_attr smiqp_attr = {
852 .qp_state = IB_QPS_INIT,
853 .port_num = my_qp->init_attr.port_num,
854 .pkey_index = 0,
855 .qkey = 0
856 };
857 int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT |
858 IB_QP_PKEY_INDEX | IB_QP_QKEY;
859 int smirc = internal_modify_qp(
860 ibqp, &smiqp_attr, smiqp_attr_mask, 1);
861 if (smirc) {
862 ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
863 "ehca_modify_qp() rc=%x", smirc);
864 ret = H_PARAMETER;
865 goto modify_qp_exit1;
866 }
867 qp_cur_state = IB_QPS_INIT;
868 ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded");
869 }
870 /* is transmitted current state equal to "real" current state */
871 if ((attr_mask & IB_QP_CUR_STATE) &&
872 qp_cur_state != attr->cur_qp_state) {
873 ret = -EINVAL;
874 ehca_err(ibqp->device,
875 "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>"
876 " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x",
877 attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num);
878 goto modify_qp_exit1;
879 }
880
881 ehca_dbg(ibqp->device,"ehca_qp=%p qp_num=%x current qp_state=%x "
882 "new qp_state=%x attribute_mask=%x",
883 my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask);
884
885 qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
886 if (!smi_reset2init &&
887 !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
888 attr_mask)) {
889 ret = -EINVAL;
890 ehca_err(ibqp->device,
891 "Invalid qp transition new_state=%x cur_state=%x "
892 "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state,
893 qp_cur_state, my_qp, ibqp->qp_num, attr_mask);
894 goto modify_qp_exit1;
895 }
896
897 if ((mqpcb->qp_state = ib2ehca_qp_state(qp_new_state)))
898 update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
899 else {
900 ret = -EINVAL;
901 ehca_err(ibqp->device, "Invalid new qp state=%x "
902 "ehca_qp=%p qp_num=%x",
903 qp_new_state, my_qp, ibqp->qp_num);
904 goto modify_qp_exit1;
905 }
906
907 /* retrieve state transition struct to get req and opt attrs */
908 statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state);
909 if (statetrans < 0) {
910 ret = -EINVAL;
911 ehca_err(ibqp->device, "<INVALID STATE CHANGE> qp_cur_state=%x "
912 "new_qp_state=%x State_xsition=%x ehca_qp=%p "
913 "qp_num=%x", qp_cur_state, qp_new_state,
914 statetrans, my_qp, ibqp->qp_num);
915 goto modify_qp_exit1;
916 }
917
918 qp_attr_idx = ib2ehcaqptype(ibqp->qp_type);
919
920 if (qp_attr_idx < 0) {
921 ret = qp_attr_idx;
922 ehca_err(ibqp->device,
923 "Invalid QP type=%x ehca_qp=%p qp_num=%x",
924 ibqp->qp_type, my_qp, ibqp->qp_num);
925 goto modify_qp_exit1;
926 }
927
928 ehca_dbg(ibqp->device,
929 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
930 my_qp, ibqp->qp_num, statetrans);
931
932 /* sqe -> rts: set purge bit of bad wqe before actual trans */
933 if ((my_qp->qp_type == IB_QPT_UD ||
934 my_qp->qp_type == IB_QPT_GSI ||
935 my_qp->qp_type == IB_QPT_SMI) &&
936 statetrans == IB_QPST_SQE2RTS) {
937 /* mark next free wqe if kernel */
938 if (my_qp->uspace_squeue == 0) {
939 struct ehca_wqe *wqe;
940 /* lock send queue */
941 spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
942 squeue_locked = 1;
943 /* mark next free wqe */
944 wqe = (struct ehca_wqe*)
945 ipz_qeit_get(&my_qp->ipz_squeue);
946 wqe->optype = wqe->wqef = 0xff;
947 ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p",
948 ibqp->qp_num, wqe);
949 }
950 ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
951 if (ret) {
952 ehca_err(ibqp->device, "prepare_sqe_rts() failed "
953 "ehca_qp=%p qp_num=%x ret=%x",
954 my_qp, ibqp->qp_num, ret);
955 goto modify_qp_exit2;
956 }
957 }
958
959 /*
960 * enable RDMA_Atomic_Control if reset->init und reliable con
961 * this is necessary since gen2 does not provide that flag,
962 * but pHyp requires it
963 */
964 if (statetrans == IB_QPST_RESET2INIT &&
965 (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) {
966 mqpcb->rdma_atomic_ctrl = 3;
967 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1);
968 }
969 /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */
970 if (statetrans == IB_QPST_INIT2RTR &&
971 (ibqp->qp_type == IB_QPT_UC) &&
972 !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) {
973 mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */
974 update_mask |=
975 EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
976 }
977
978 if (attr_mask & IB_QP_PKEY_INDEX) {
979 mqpcb->prim_p_key_idx = attr->pkey_index;
980 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
981 }
982 if (attr_mask & IB_QP_PORT) {
983 if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
984 ret = -EINVAL;
985 ehca_err(ibqp->device, "Invalid port=%x. "
986 "ehca_qp=%p qp_num=%x num_ports=%x",
987 attr->port_num, my_qp, ibqp->qp_num,
988 shca->num_ports);
989 goto modify_qp_exit2;
990 }
991 mqpcb->prim_phys_port = attr->port_num;
992 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
993 }
994 if (attr_mask & IB_QP_QKEY) {
995 mqpcb->qkey = attr->qkey;
996 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1);
997 }
998 if (attr_mask & IB_QP_AV) {
999 int ah_mult = ib_rate_to_mult(attr->ah_attr.static_rate);
1000 int ehca_mult = ib_rate_to_mult(shca->sport[my_qp->
1001 init_attr.port_num].rate);
1002
1003 mqpcb->dlid = attr->ah_attr.dlid;
1004 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1);
1005 mqpcb->source_path_bits = attr->ah_attr.src_path_bits;
1006 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1);
1007 mqpcb->service_level = attr->ah_attr.sl;
1008 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1);
1009
1010 if (ah_mult < ehca_mult)
1011 mqpcb->max_static_rate = (ah_mult > 0) ?
1012 ((ehca_mult - 1) / ah_mult) : 0;
1013 else
1014 mqpcb->max_static_rate = 0;
1015
1016 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1);
1017
1018 /*
1019 * only if GRH is TRUE we might consider SOURCE_GID_IDX
1020 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
1021 */
1022 if (attr->ah_attr.ah_flags == IB_AH_GRH) {
1023 mqpcb->send_grh_flag = 1 << 31;
1024 update_mask |=
1025 EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
1026 mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index;
1027 update_mask |=
1028 EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1);
1029
1030 for (cnt = 0; cnt < 16; cnt++)
1031 mqpcb->dest_gid.byte[cnt] =
1032 attr->ah_attr.grh.dgid.raw[cnt];
1033
1034 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1);
1035 mqpcb->flow_label = attr->ah_attr.grh.flow_label;
1036 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1);
1037 mqpcb->hop_limit = attr->ah_attr.grh.hop_limit;
1038 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1);
1039 mqpcb->traffic_class = attr->ah_attr.grh.traffic_class;
1040 update_mask |=
1041 EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1);
1042 }
1043 }
1044
1045 if (attr_mask & IB_QP_PATH_MTU) {
1046 mqpcb->path_mtu = attr->path_mtu;
1047 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
1048 }
1049 if (attr_mask & IB_QP_TIMEOUT) {
1050 mqpcb->timeout = attr->timeout;
1051 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1);
1052 }
1053 if (attr_mask & IB_QP_RETRY_CNT) {
1054 mqpcb->retry_count = attr->retry_cnt;
1055 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1);
1056 }
1057 if (attr_mask & IB_QP_RNR_RETRY) {
1058 mqpcb->rnr_retry_count = attr->rnr_retry;
1059 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1);
1060 }
1061 if (attr_mask & IB_QP_RQ_PSN) {
1062 mqpcb->receive_psn = attr->rq_psn;
1063 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1);
1064 }
1065 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1066 mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ?
1067 attr->max_dest_rd_atomic : 2;
1068 update_mask |=
1069 EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1);
1070 }
1071 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1072 mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ?
1073 attr->max_rd_atomic : 2;
1074 update_mask |=
1075 EHCA_BMASK_SET
1076 (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1);
1077 }
1078 if (attr_mask & IB_QP_ALT_PATH) {
1079 int ah_mult = ib_rate_to_mult(attr->alt_ah_attr.static_rate);
1080 int ehca_mult = ib_rate_to_mult(
1081 shca->sport[my_qp->init_attr.port_num].rate);
1082
1083 mqpcb->dlid_al = attr->alt_ah_attr.dlid;
1084 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1);
1085 mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
1086 update_mask |=
1087 EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1);
1088 mqpcb->service_level_al = attr->alt_ah_attr.sl;
1089 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1);
1090
1091 if (ah_mult < ehca_mult)
1092 mqpcb->max_static_rate = (ah_mult > 0) ?
1093 ((ehca_mult - 1) / ah_mult) : 0;
1094 else
1095 mqpcb->max_static_rate_al = 0;
1096
1097 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1);
1098
1099 /*
1100 * only if GRH is TRUE we might consider SOURCE_GID_IDX
1101 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
1102 */
1103 if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
1104 mqpcb->send_grh_flag_al = 1 << 31;
1105 update_mask |=
1106 EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
1107 mqpcb->source_gid_idx_al =
1108 attr->alt_ah_attr.grh.sgid_index;
1109 update_mask |=
1110 EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1);
1111
1112 for (cnt = 0; cnt < 16; cnt++)
1113 mqpcb->dest_gid_al.byte[cnt] =
1114 attr->alt_ah_attr.grh.dgid.raw[cnt];
1115
1116 update_mask |=
1117 EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1);
1118 mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
1119 update_mask |=
1120 EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1);
1121 mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
1122 update_mask |=
1123 EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1);
1124 mqpcb->traffic_class_al =
1125 attr->alt_ah_attr.grh.traffic_class;
1126 update_mask |=
1127 EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
1128 }
1129 }
1130
1131 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
1132 mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer;
1133 update_mask |=
1134 EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1);
1135 }
1136
1137 if (attr_mask & IB_QP_SQ_PSN) {
1138 mqpcb->send_psn = attr->sq_psn;
1139 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1);
1140 }
1141
1142 if (attr_mask & IB_QP_DEST_QPN) {
1143 mqpcb->dest_qp_nr = attr->dest_qp_num;
1144 update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1);
1145 }
1146
1147 if (attr_mask & IB_QP_PATH_MIG_STATE) {
1148 mqpcb->path_migration_state = attr->path_mig_state;
1149 update_mask |=
1150 EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
1151 }
1152
1153 if (attr_mask & IB_QP_CAP) {
1154 mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1;
1155 update_mask |=
1156 EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1);
1157 mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1;
1158 update_mask |=
1159 EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1);
1160 /* no support for max_send/recv_sge yet */
1161 }
1162
1163 if (ehca_debug_level)
1164 ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num);
1165
1166 h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
1167 my_qp->ipz_qp_handle,
1168 &my_qp->pf,
1169 update_mask,
1170 mqpcb, my_qp->galpas.kernel);
1171
1172 if (h_ret != H_SUCCESS) {
1173 ret = ehca2ib_return_code(h_ret);
1174 ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
1175 "ehca_qp=%p qp_num=%x",h_ret, my_qp, ibqp->qp_num);
1176 goto modify_qp_exit2;
1177 }
1178
1179 if ((my_qp->qp_type == IB_QPT_UD ||
1180 my_qp->qp_type == IB_QPT_GSI ||
1181 my_qp->qp_type == IB_QPT_SMI) &&
1182 statetrans == IB_QPST_SQE2RTS) {
1183 /* doorbell to reprocessing wqes */
1184 iosync(); /* serialize GAL register access */
1185 hipz_update_sqa(my_qp, bad_wqe_cnt-1);
1186 ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt);
1187 }
1188
1189 if (statetrans == IB_QPST_RESET2INIT ||
1190 statetrans == IB_QPST_INIT2INIT) {
1191 mqpcb->qp_enable = 1;
1192 mqpcb->qp_state = EHCA_QPS_INIT;
1193 update_mask = 0;
1194 update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
1195
1196 h_ret = hipz_h_modify_qp(shca->ipz_hca_handle,
1197 my_qp->ipz_qp_handle,
1198 &my_qp->pf,
1199 update_mask,
1200 mqpcb,
1201 my_qp->galpas.kernel);
1202
1203 if (h_ret != H_SUCCESS) {
1204 ret = ehca2ib_return_code(h_ret);
1205 ehca_err(ibqp->device, "ENABLE in context of "
1206 "RESET_2_INIT failed! Maybe you didn't get "
1207 "a LID h_ret=%lx ehca_qp=%p qp_num=%x",
1208 h_ret, my_qp, ibqp->qp_num);
1209 goto modify_qp_exit2;
1210 }
1211 }
1212
1213 if (statetrans == IB_QPST_ANY2RESET) {
1214 ipz_qeit_reset(&my_qp->ipz_rqueue);
1215 ipz_qeit_reset(&my_qp->ipz_squeue);
1216 }
1217
1218 if (attr_mask & IB_QP_QKEY)
1219 my_qp->qkey = attr->qkey;
1220
1221modify_qp_exit2:
1222 if (squeue_locked) { /* this means: sqe -> rts */
1223 spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
1224 my_qp->sqerr_purgeflag = 1;
1225 }
1226
1227modify_qp_exit1:
1228 kfree(mqpcb);
1229
1230 return ret;
1231}
1232
1233int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1234 struct ib_udata *udata)
1235{
1236 struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
1237 struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
1238 ib_pd);
1239 u32 cur_pid = current->tgid;
1240
1241 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
1242 my_pd->ownpid != cur_pid) {
1243 ehca_err(ibqp->pd->device, "Invalid caller pid=%x ownpid=%x",
1244 cur_pid, my_pd->ownpid);
1245 return -EINVAL;
1246 }
1247
1248 return internal_modify_qp(ibqp, attr, attr_mask, 0);
1249}
1250
1251int ehca_query_qp(struct ib_qp *qp,
1252 struct ib_qp_attr *qp_attr,
1253 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1254{
1255 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
1256 struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
1257 ib_pd);
1258 struct ehca_shca *shca = container_of(qp->device, struct ehca_shca,
1259 ib_device);
1260 struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
1261 struct hcp_modify_qp_control_block *qpcb;
1262 u32 cur_pid = current->tgid;
1263 int cnt, ret = 0;
1264 u64 h_ret;
1265
1266 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
1267 my_pd->ownpid != cur_pid) {
1268 ehca_err(qp->device, "Invalid caller pid=%x ownpid=%x",
1269 cur_pid, my_pd->ownpid);
1270 return -EINVAL;
1271 }
1272
1273 if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) {
1274 ehca_err(qp->device,"Invalid attribute mask "
1275 "ehca_qp=%p qp_num=%x qp_attr_mask=%x ",
1276 my_qp, qp->qp_num, qp_attr_mask);
1277 return -EINVAL;
1278 }
1279
1280 qpcb = kzalloc(H_CB_ALIGNMENT, GFP_KERNEL );
1281 if (!qpcb) {
1282 ehca_err(qp->device,"Out of memory for qpcb "
1283 "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num);
1284 return -ENOMEM;
1285 }
1286
1287 h_ret = hipz_h_query_qp(adapter_handle,
1288 my_qp->ipz_qp_handle,
1289 &my_qp->pf,
1290 qpcb, my_qp->galpas.kernel);
1291
1292 if (h_ret != H_SUCCESS) {
1293 ret = ehca2ib_return_code(h_ret);
1294 ehca_err(qp->device,"hipz_h_query_qp() failed "
1295 "ehca_qp=%p qp_num=%x h_ret=%lx",
1296 my_qp, qp->qp_num, h_ret);
1297 goto query_qp_exit1;
1298 }
1299
1300 qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state);
1301 qp_attr->qp_state = qp_attr->cur_qp_state;
1302
1303 if (qp_attr->cur_qp_state == -EINVAL) {
1304 ret = -EINVAL;
1305 ehca_err(qp->device,"Got invalid ehca_qp_state=%x "
1306 "ehca_qp=%p qp_num=%x",
1307 qpcb->qp_state, my_qp, qp->qp_num);
1308 goto query_qp_exit1;
1309 }
1310
1311 if (qp_attr->qp_state == IB_QPS_SQD)
1312 qp_attr->sq_draining = 1;
1313
1314 qp_attr->qkey = qpcb->qkey;
1315 qp_attr->path_mtu = qpcb->path_mtu;
1316 qp_attr->path_mig_state = qpcb->path_migration_state;
1317 qp_attr->rq_psn = qpcb->receive_psn;
1318 qp_attr->sq_psn = qpcb->send_psn;
1319 qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
1320 qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1;
1321 qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1;
1322 /* UD_AV CIRCUMVENTION */
1323 if (my_qp->qp_type == IB_QPT_UD) {
1324 qp_attr->cap.max_send_sge =
1325 qpcb->actual_nr_sges_in_sq_wqe - 2;
1326 qp_attr->cap.max_recv_sge =
1327 qpcb->actual_nr_sges_in_rq_wqe - 2;
1328 } else {
1329 qp_attr->cap.max_send_sge =
1330 qpcb->actual_nr_sges_in_sq_wqe;
1331 qp_attr->cap.max_recv_sge =
1332 qpcb->actual_nr_sges_in_rq_wqe;
1333 }
1334
1335 qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size;
1336 qp_attr->dest_qp_num = qpcb->dest_qp_nr;
1337
1338 qp_attr->pkey_index =
1339 EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx);
1340
1341 qp_attr->port_num =
1342 EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port);
1343
1344 qp_attr->timeout = qpcb->timeout;
1345 qp_attr->retry_cnt = qpcb->retry_count;
1346 qp_attr->rnr_retry = qpcb->rnr_retry_count;
1347
1348 qp_attr->alt_pkey_index =
1349 EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx);
1350
1351 qp_attr->alt_port_num = qpcb->alt_phys_port;
1352 qp_attr->alt_timeout = qpcb->timeout_al;
1353
1354 /* primary av */
1355 qp_attr->ah_attr.sl = qpcb->service_level;
1356
1357 if (qpcb->send_grh_flag) {
1358 qp_attr->ah_attr.ah_flags = IB_AH_GRH;
1359 }
1360
1361 qp_attr->ah_attr.static_rate = qpcb->max_static_rate;
1362 qp_attr->ah_attr.dlid = qpcb->dlid;
1363 qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits;
1364 qp_attr->ah_attr.port_num = qp_attr->port_num;
1365
1366 /* primary GRH */
1367 qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class;
1368 qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit;
1369 qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx;
1370 qp_attr->ah_attr.grh.flow_label = qpcb->flow_label;
1371
1372 for (cnt = 0; cnt < 16; cnt++)
1373 qp_attr->ah_attr.grh.dgid.raw[cnt] =
1374 qpcb->dest_gid.byte[cnt];
1375
1376 /* alternate AV */
1377 qp_attr->alt_ah_attr.sl = qpcb->service_level_al;
1378 if (qpcb->send_grh_flag_al) {
1379 qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH;
1380 }
1381
1382 qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al;
1383 qp_attr->alt_ah_attr.dlid = qpcb->dlid_al;
1384 qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al;
1385
1386 /* alternate GRH */
1387 qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al;
1388 qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al;
1389 qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al;
1390 qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al;
1391
1392 for (cnt = 0; cnt < 16; cnt++)
1393 qp_attr->alt_ah_attr.grh.dgid.raw[cnt] =
1394 qpcb->dest_gid_al.byte[cnt];
1395
1396 /* return init attributes given in ehca_create_qp */
1397 if (qp_init_attr)
1398 *qp_init_attr = my_qp->init_attr;
1399
1400 if (ehca_debug_level)
1401 ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num);
1402
1403query_qp_exit1:
1404 kfree(qpcb);
1405
1406 return ret;
1407}
1408
1409int ehca_destroy_qp(struct ib_qp *ibqp)
1410{
1411 struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
1412 struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
1413 ib_device);
1414 struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
1415 ib_pd);
1416 u32 cur_pid = current->tgid;
1417 u32 qp_num = ibqp->qp_num;
1418 int ret;
1419 u64 h_ret;
1420 u8 port_num;
1421 enum ib_qp_type qp_type;
1422 unsigned long flags;
1423
1424 if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
1425 my_pd->ownpid != cur_pid) {
1426 ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
1427 cur_pid, my_pd->ownpid);
1428 return -EINVAL;
1429 }
1430
1431 if (my_qp->send_cq) {
1432 ret = ehca_cq_unassign_qp(my_qp->send_cq,
1433 my_qp->real_qp_num);
1434 if (ret) {
1435 ehca_err(ibqp->device, "Couldn't unassign qp from "
1436 "send_cq ret=%x qp_num=%x cq_num=%x", ret,
1437 my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number);
1438 return ret;
1439 }
1440 }
1441
1442 spin_lock_irqsave(&ehca_qp_idr_lock, flags);
1443 idr_remove(&ehca_qp_idr, my_qp->token);
1444 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
1445
1446 /* un-mmap if vma alloc */
1447 if (my_qp->uspace_rqueue) {
1448 ret = ehca_munmap(my_qp->uspace_rqueue,
1449 my_qp->ipz_rqueue.queue_length);
1450 if (ret)
1451 ehca_err(ibqp->device, "Could not munmap rqueue "
1452 "qp_num=%x", qp_num);
1453 ret = ehca_munmap(my_qp->uspace_squeue,
1454 my_qp->ipz_squeue.queue_length);
1455 if (ret)
1456 ehca_err(ibqp->device, "Could not munmap squeue "
1457 "qp_num=%x", qp_num);
1458 ret = ehca_munmap(my_qp->uspace_fwh, EHCA_PAGESIZE);
1459 if (ret)
1460 ehca_err(ibqp->device, "Could not munmap fwh qp_num=%x",
1461 qp_num);
1462 }
1463
1464 h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
1465 if (h_ret != H_SUCCESS) {
1466 ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
1467 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
1468 return ehca2ib_return_code(h_ret);
1469 }
1470
1471 port_num = my_qp->init_attr.port_num;
1472 qp_type = my_qp->init_attr.qp_type;
1473
1474 /* no support for IB_QPT_SMI yet */
1475 if (qp_type == IB_QPT_GSI) {
1476 struct ib_event event;
1477 ehca_info(ibqp->device, "device %s: port %x is inactive.",
1478 shca->ib_device.name, port_num);
1479 event.device = &shca->ib_device;
1480 event.event = IB_EVENT_PORT_ERR;
1481 event.element.port_num = port_num;
1482 shca->sport[port_num - 1].port_state = IB_PORT_DOWN;
1483 ib_dispatch_event(&event);
1484 }
1485
1486 ipz_queue_dtor(&my_qp->ipz_rqueue);
1487 ipz_queue_dtor(&my_qp->ipz_squeue);
1488 kmem_cache_free(qp_cache, my_qp);
1489 return 0;
1490}
1491
1492int ehca_init_qp_cache(void)
1493{
1494 qp_cache = kmem_cache_create("ehca_cache_qp",
1495 sizeof(struct ehca_qp), 0,
1496 SLAB_HWCACHE_ALIGN,
1497 NULL, NULL);
1498 if (!qp_cache)
1499 return -ENOMEM;
1500 return 0;
1501}
1502
1503void ehca_cleanup_qp_cache(void)
1504{
1505 if (qp_cache)
1506 kmem_cache_destroy(qp_cache);
1507}
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
new file mode 100644
index 000000000000..b46bda1bf85d
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -0,0 +1,653 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * post_send/recv, poll_cq, req_notify
5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Reinhard Ernst <rernst@de.ibm.com>
9 *
10 * Copyright (c) 2005 IBM Corporation
11 *
12 * All rights reserved.
13 *
14 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
15 * BSD.
16 *
17 * OpenIB BSD License
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are met:
21 *
22 * Redistributions of source code must retain the above copyright notice, this
23 * list of conditions and the following disclaimer.
24 *
25 * Redistributions in binary form must reproduce the above copyright notice,
26 * this list of conditions and the following disclaimer in the documentation
27 * and/or other materials
28 * provided with the distribution.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43
44#include <asm-powerpc/system.h>
45#include "ehca_classes.h"
46#include "ehca_tools.h"
47#include "ehca_qes.h"
48#include "ehca_iverbs.h"
49#include "hcp_if.h"
50#include "hipz_fns.h"
51
52static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
53 struct ehca_wqe *wqe_p,
54 struct ib_recv_wr *recv_wr)
55{
56 u8 cnt_ds;
57 if (unlikely((recv_wr->num_sge < 0) ||
58 (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) {
59 ehca_gen_err("Invalid number of WQE SGE. "
60 "num_sqe=%x max_nr_of_sg=%x",
61 recv_wr->num_sge, ipz_rqueue->act_nr_of_sg);
62 return -EINVAL; /* invalid SG list length */
63 }
64
65 /* clear wqe header until sglist */
66 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
67
68 wqe_p->work_request_id = recv_wr->wr_id;
69 wqe_p->nr_of_data_seg = recv_wr->num_sge;
70
71 for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) {
72 wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr =
73 recv_wr->sg_list[cnt_ds].addr;
74 wqe_p->u.all_rcv.sg_list[cnt_ds].lkey =
75 recv_wr->sg_list[cnt_ds].lkey;
76 wqe_p->u.all_rcv.sg_list[cnt_ds].length =
77 recv_wr->sg_list[cnt_ds].length;
78 }
79
80 if (ehca_debug_level) {
81 ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", ipz_rqueue);
82 ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
83 }
84
85 return 0;
86}
87
88#if defined(DEBUG_GSI_SEND_WR)
89
90/* need ib_mad struct */
91#include <rdma/ib_mad.h>
92
93static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
94{
95 int idx;
96 int j;
97 while (send_wr) {
98 struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
99 struct ib_sge *sge = send_wr->sg_list;
100 ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
101 "send_flags=%x opcode=%x",idx, send_wr->wr_id,
102 send_wr->num_sge, send_wr->send_flags,
103 send_wr->opcode);
104 if (mad_hdr) {
105 ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
106 "mgmt_class=%x class_version=%x method=%x "
107 "status=%x class_specific=%x tid=%lx "
108 "attr_id=%x resv=%x attr_mod=%x",
109 idx, mad_hdr->base_version,
110 mad_hdr->mgmt_class,
111 mad_hdr->class_version, mad_hdr->method,
112 mad_hdr->status, mad_hdr->class_specific,
113 mad_hdr->tid, mad_hdr->attr_id,
114 mad_hdr->resv,
115 mad_hdr->attr_mod);
116 }
117 for (j = 0; j < send_wr->num_sge; j++) {
118 u8 *data = (u8 *) abs_to_virt(sge->addr);
119 ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
120 "lkey=%x",
121 idx, j, data, sge->length, sge->lkey);
122 /* assume length is n*16 */
123 ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
124 idx, j);
125 sge++;
126 } /* eof for j */
127 idx++;
128 send_wr = send_wr->next;
129 } /* eof while send_wr */
130}
131
132#endif /* DEBUG_GSI_SEND_WR */
133
134static inline int ehca_write_swqe(struct ehca_qp *qp,
135 struct ehca_wqe *wqe_p,
136 const struct ib_send_wr *send_wr)
137{
138 u32 idx;
139 u64 dma_length;
140 struct ehca_av *my_av;
141 u32 remote_qkey = send_wr->wr.ud.remote_qkey;
142
143 if (unlikely((send_wr->num_sge < 0) ||
144 (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) {
145 ehca_gen_err("Invalid number of WQE SGE. "
146 "num_sqe=%x max_nr_of_sg=%x",
147 send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg);
148 return -EINVAL; /* invalid SG list length */
149 }
150
151 /* clear wqe header until sglist */
152 memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list));
153
154 wqe_p->work_request_id = send_wr->wr_id;
155
156 switch (send_wr->opcode) {
157 case IB_WR_SEND:
158 case IB_WR_SEND_WITH_IMM:
159 wqe_p->optype = WQE_OPTYPE_SEND;
160 break;
161 case IB_WR_RDMA_WRITE:
162 case IB_WR_RDMA_WRITE_WITH_IMM:
163 wqe_p->optype = WQE_OPTYPE_RDMAWRITE;
164 break;
165 case IB_WR_RDMA_READ:
166 wqe_p->optype = WQE_OPTYPE_RDMAREAD;
167 break;
168 default:
169 ehca_gen_err("Invalid opcode=%x", send_wr->opcode);
170 return -EINVAL; /* invalid opcode */
171 }
172
173 wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE;
174
175 wqe_p->wr_flag = 0;
176
177 if (send_wr->send_flags & IB_SEND_SIGNALED)
178 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
179
180 if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
181 send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
182 /* this might not work as long as HW does not support it */
183 wqe_p->immediate_data = be32_to_cpu(send_wr->imm_data);
184 wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT;
185 }
186
187 wqe_p->nr_of_data_seg = send_wr->num_sge;
188
189 switch (qp->qp_type) {
190 case IB_QPT_SMI:
191 case IB_QPT_GSI:
192 /* no break is intential here */
193 case IB_QPT_UD:
194 /* IB 1.2 spec C10-15 compliance */
195 if (send_wr->wr.ud.remote_qkey & 0x80000000)
196 remote_qkey = qp->qkey;
197
198 wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
199 wqe_p->local_ee_context_qkey = remote_qkey;
200 if (!send_wr->wr.ud.ah) {
201 ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
202 return -EINVAL;
203 }
204 my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
205 wqe_p->u.ud_av.ud_av = my_av->av;
206
207 /*
208 * omitted check of IB_SEND_INLINE
209 * since HW does not support it
210 */
211 for (idx = 0; idx < send_wr->num_sge; idx++) {
212 wqe_p->u.ud_av.sg_list[idx].vaddr =
213 send_wr->sg_list[idx].addr;
214 wqe_p->u.ud_av.sg_list[idx].lkey =
215 send_wr->sg_list[idx].lkey;
216 wqe_p->u.ud_av.sg_list[idx].length =
217 send_wr->sg_list[idx].length;
218 } /* eof for idx */
219 if (qp->qp_type == IB_QPT_SMI ||
220 qp->qp_type == IB_QPT_GSI)
221 wqe_p->u.ud_av.ud_av.pmtu = 1;
222 if (qp->qp_type == IB_QPT_GSI) {
223 wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
224#ifdef DEBUG_GSI_SEND_WR
225 trace_send_wr_ud(send_wr);
226#endif /* DEBUG_GSI_SEND_WR */
227 }
228 break;
229
230 case IB_QPT_UC:
231 if (send_wr->send_flags & IB_SEND_FENCE)
232 wqe_p->wr_flag |= WQE_WRFLAG_FENCE;
233 /* no break is intentional here */
234 case IB_QPT_RC:
235 /* TODO: atomic not implemented */
236 wqe_p->u.nud.remote_virtual_adress =
237 send_wr->wr.rdma.remote_addr;
238 wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
239
240 /*
241 * omitted checking of IB_SEND_INLINE
242 * since HW does not support it
243 */
244 dma_length = 0;
245 for (idx = 0; idx < send_wr->num_sge; idx++) {
246 wqe_p->u.nud.sg_list[idx].vaddr =
247 send_wr->sg_list[idx].addr;
248 wqe_p->u.nud.sg_list[idx].lkey =
249 send_wr->sg_list[idx].lkey;
250 wqe_p->u.nud.sg_list[idx].length =
251 send_wr->sg_list[idx].length;
252 dma_length += send_wr->sg_list[idx].length;
253 } /* eof idx */
254 wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
255
256 break;
257
258 default:
259 ehca_gen_err("Invalid qptype=%x", qp->qp_type);
260 return -EINVAL;
261 }
262
263 if (ehca_debug_level) {
264 ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp);
265 ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe");
266 }
267 return 0;
268}
269
270/* map_ib_wc_status converts raw cqe_status to ib_wc_status */
271static inline void map_ib_wc_status(u32 cqe_status,
272 enum ib_wc_status *wc_status)
273{
274 if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) {
275 switch (cqe_status & 0x3F) {
276 case 0x01:
277 case 0x21:
278 *wc_status = IB_WC_LOC_LEN_ERR;
279 break;
280 case 0x02:
281 case 0x22:
282 *wc_status = IB_WC_LOC_QP_OP_ERR;
283 break;
284 case 0x03:
285 case 0x23:
286 *wc_status = IB_WC_LOC_EEC_OP_ERR;
287 break;
288 case 0x04:
289 case 0x24:
290 *wc_status = IB_WC_LOC_PROT_ERR;
291 break;
292 case 0x05:
293 case 0x25:
294 *wc_status = IB_WC_WR_FLUSH_ERR;
295 break;
296 case 0x06:
297 *wc_status = IB_WC_MW_BIND_ERR;
298 break;
299 case 0x07: /* remote error - look into bits 20:24 */
300 switch ((cqe_status
301 & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) {
302 case 0x0:
303 /*
304 * PSN Sequence Error!
305 * couldn't find a matching status!
306 */
307 *wc_status = IB_WC_GENERAL_ERR;
308 break;
309 case 0x1:
310 *wc_status = IB_WC_REM_INV_REQ_ERR;
311 break;
312 case 0x2:
313 *wc_status = IB_WC_REM_ACCESS_ERR;
314 break;
315 case 0x3:
316 *wc_status = IB_WC_REM_OP_ERR;
317 break;
318 case 0x4:
319 *wc_status = IB_WC_REM_INV_RD_REQ_ERR;
320 break;
321 }
322 break;
323 case 0x08:
324 *wc_status = IB_WC_RETRY_EXC_ERR;
325 break;
326 case 0x09:
327 *wc_status = IB_WC_RNR_RETRY_EXC_ERR;
328 break;
329 case 0x0A:
330 case 0x2D:
331 *wc_status = IB_WC_REM_ABORT_ERR;
332 break;
333 case 0x0B:
334 case 0x2E:
335 *wc_status = IB_WC_INV_EECN_ERR;
336 break;
337 case 0x0C:
338 case 0x2F:
339 *wc_status = IB_WC_INV_EEC_STATE_ERR;
340 break;
341 case 0x0D:
342 *wc_status = IB_WC_BAD_RESP_ERR;
343 break;
344 case 0x10:
345 /* WQE purged */
346 *wc_status = IB_WC_WR_FLUSH_ERR;
347 break;
348 default:
349 *wc_status = IB_WC_FATAL_ERR;
350
351 }
352 } else
353 *wc_status = IB_WC_SUCCESS;
354}
355
356int ehca_post_send(struct ib_qp *qp,
357 struct ib_send_wr *send_wr,
358 struct ib_send_wr **bad_send_wr)
359{
360 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
361 struct ib_send_wr *cur_send_wr;
362 struct ehca_wqe *wqe_p;
363 int wqe_cnt = 0;
364 int ret = 0;
365 unsigned long spl_flags;
366
367 /* LOCK the QUEUE */
368 spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
369
370 /* loop processes list of send reqs */
371 for (cur_send_wr = send_wr; cur_send_wr != NULL;
372 cur_send_wr = cur_send_wr->next) {
373 u64 start_offset = my_qp->ipz_squeue.current_q_offset;
374 /* get pointer next to free WQE */
375 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
376 if (unlikely(!wqe_p)) {
377 /* too many posted work requests: queue overflow */
378 if (bad_send_wr)
379 *bad_send_wr = cur_send_wr;
380 if (wqe_cnt == 0) {
381 ret = -ENOMEM;
382 ehca_err(qp->device, "Too many posted WQEs "
383 "qp_num=%x", qp->qp_num);
384 }
385 goto post_send_exit0;
386 }
387 /* write a SEND WQE into the QUEUE */
388 ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
389 /*
390 * if something failed,
391 * reset the free entry pointer to the start value
392 */
393 if (unlikely(ret)) {
394 my_qp->ipz_squeue.current_q_offset = start_offset;
395 *bad_send_wr = cur_send_wr;
396 if (wqe_cnt == 0) {
397 ret = -EINVAL;
398 ehca_err(qp->device, "Could not write WQE "
399 "qp_num=%x", qp->qp_num);
400 }
401 goto post_send_exit0;
402 }
403 wqe_cnt++;
404 ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
405 my_qp, qp->qp_num, wqe_cnt);
406 } /* eof for cur_send_wr */
407
408post_send_exit0:
409 /* UNLOCK the QUEUE */
410 spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
411 iosync(); /* serialize GAL register access */
412 hipz_update_sqa(my_qp, wqe_cnt);
413 return ret;
414}
415
416int ehca_post_recv(struct ib_qp *qp,
417 struct ib_recv_wr *recv_wr,
418 struct ib_recv_wr **bad_recv_wr)
419{
420 struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
421 struct ib_recv_wr *cur_recv_wr;
422 struct ehca_wqe *wqe_p;
423 int wqe_cnt = 0;
424 int ret = 0;
425 unsigned long spl_flags;
426
427 /* LOCK the QUEUE */
428 spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
429
430 /* loop processes list of send reqs */
431 for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
432 cur_recv_wr = cur_recv_wr->next) {
433 u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
434 /* get pointer next to free WQE */
435 wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
436 if (unlikely(!wqe_p)) {
437 /* too many posted work requests: queue overflow */
438 if (bad_recv_wr)
439 *bad_recv_wr = cur_recv_wr;
440 if (wqe_cnt == 0) {
441 ret = -ENOMEM;
442 ehca_err(qp->device, "Too many posted WQEs "
443 "qp_num=%x", qp->qp_num);
444 }
445 goto post_recv_exit0;
446 }
447 /* write a RECV WQE into the QUEUE */
448 ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr);
449 /*
450 * if something failed,
451 * reset the free entry pointer to the start value
452 */
453 if (unlikely(ret)) {
454 my_qp->ipz_rqueue.current_q_offset = start_offset;
455 *bad_recv_wr = cur_recv_wr;
456 if (wqe_cnt == 0) {
457 ret = -EINVAL;
458 ehca_err(qp->device, "Could not write WQE "
459 "qp_num=%x", qp->qp_num);
460 }
461 goto post_recv_exit0;
462 }
463 wqe_cnt++;
464 ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
465 my_qp, qp->qp_num, wqe_cnt);
466 } /* eof for cur_recv_wr */
467
468post_recv_exit0:
469 spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
470 iosync(); /* serialize GAL register access */
471 hipz_update_rqa(my_qp, wqe_cnt);
472 return ret;
473}
474
475/*
476 * ib_wc_opcode table converts ehca wc opcode to ib
477 * Since we use zero to indicate invalid opcode, the actual ib opcode must
478 * be decremented!!!
479 */
480static const u8 ib_wc_opcode[255] = {
481 [0x01] = IB_WC_RECV+1,
482 [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
483 [0x04] = IB_WC_BIND_MW+1,
484 [0x08] = IB_WC_FETCH_ADD+1,
485 [0x10] = IB_WC_COMP_SWAP+1,
486 [0x20] = IB_WC_RDMA_WRITE+1,
487 [0x40] = IB_WC_RDMA_READ+1,
488 [0x80] = IB_WC_SEND+1
489};
490
491/* internal function to poll one entry of cq */
492static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
493{
494 int ret = 0;
495 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
496 struct ehca_cqe *cqe;
497 int cqe_count = 0;
498
499poll_cq_one_read_cqe:
500 cqe = (struct ehca_cqe *)
501 ipz_qeit_get_inc_valid(&my_cq->ipz_queue);
502 if (!cqe) {
503 ret = -EAGAIN;
504 ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
505 "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret);
506 goto poll_cq_one_exit0;
507 }
508
509 /* prevents loads being reordered across this point */
510 rmb();
511
512 cqe_count++;
513 if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
514 struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
515 int purgeflag;
516 unsigned long spl_flags;
517 if (!qp) {
518 ehca_err(cq->device, "cq_num=%x qp_num=%x "
519 "could not find qp -> ignore cqe",
520 my_cq->cq_number, cqe->local_qp_number);
521 ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x",
522 my_cq->cq_number, cqe->local_qp_number);
523 /* ignore this purged cqe */
524 goto poll_cq_one_read_cqe;
525 }
526 spin_lock_irqsave(&qp->spinlock_s, spl_flags);
527 purgeflag = qp->sqerr_purgeflag;
528 spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
529
530 if (purgeflag) {
531 ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
532 "src_qp=%x",
533 cqe->local_qp_number, cqe->remote_qp_number);
534 if (ehca_debug_level)
535 ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x",
536 cqe->local_qp_number,
537 cqe->remote_qp_number);
538 /*
539 * ignore this to avoid double cqes of bad wqe
540 * that caused sqe and turn off purge flag
541 */
542 qp->sqerr_purgeflag = 0;
543 goto poll_cq_one_read_cqe;
544 }
545 }
546
547 /* tracing cqe */
548 if (ehca_debug_level) {
549 ehca_dbg(cq->device,
550 "Received COMPLETION ehca_cq=%p cq_num=%x -----",
551 my_cq, my_cq->cq_number);
552 ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
553 my_cq, my_cq->cq_number);
554 ehca_dbg(cq->device,
555 "ehca_cq=%p cq_num=%x -------------------------",
556 my_cq, my_cq->cq_number);
557 }
558
559 /* we got a completion! */
560 wc->wr_id = cqe->work_request_id;
561
562 /* eval ib_wc_opcode */
563 wc->opcode = ib_wc_opcode[cqe->optype]-1;
564 if (unlikely(wc->opcode == -1)) {
565 ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x "
566 "ehca_cq=%p cq_num=%x",
567 cqe->optype, cqe->status, my_cq, my_cq->cq_number);
568 /* dump cqe for other infos */
569 ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x",
570 my_cq, my_cq->cq_number);
571 /* update also queue adder to throw away this entry!!! */
572 goto poll_cq_one_exit0;
573 }
574 /* eval ib_wc_status */
575 if (unlikely(cqe->status & WC_STATUS_ERROR_BIT)) {
576 /* complete with errors */
577 map_ib_wc_status(cqe->status, &wc->status);
578 wc->vendor_err = wc->status;
579 } else
580 wc->status = IB_WC_SUCCESS;
581
582 wc->qp_num = cqe->local_qp_number;
583 wc->byte_len = cqe->nr_bytes_transferred;
584 wc->pkey_index = cqe->pkey_index;
585 wc->slid = cqe->rlid;
586 wc->dlid_path_bits = cqe->dlid;
587 wc->src_qp = cqe->remote_qp_number;
588 wc->wc_flags = cqe->w_completion_flags;
589 wc->imm_data = cpu_to_be32(cqe->immediate_data);
590 wc->sl = cqe->service_level;
591
592 if (wc->status != IB_WC_SUCCESS)
593 ehca_dbg(cq->device,
594 "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
595 "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
596 "cqe=%p", my_cq, my_cq->cq_number, cqe->optype,
597 cqe->status, cqe->local_qp_number,
598 cqe->remote_qp_number, cqe->work_request_id, cqe);
599
600poll_cq_one_exit0:
601 if (cqe_count > 0)
602 hipz_update_feca(my_cq, cqe_count);
603
604 return ret;
605}
606
607int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
608{
609 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
610 int nr;
611 struct ib_wc *current_wc = wc;
612 int ret = 0;
613 unsigned long spl_flags;
614
615 if (num_entries < 1) {
616 ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
617 "cq_num=%x", num_entries, my_cq, my_cq->cq_number);
618 ret = -EINVAL;
619 goto poll_cq_exit0;
620 }
621
622 spin_lock_irqsave(&my_cq->spinlock, spl_flags);
623 for (nr = 0; nr < num_entries; nr++) {
624 ret = ehca_poll_cq_one(cq, current_wc);
625 if (ret)
626 break;
627 current_wc++;
628 } /* eof for nr */
629 spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
630 if (ret == -EAGAIN || !ret)
631 ret = nr;
632
633poll_cq_exit0:
634 return ret;
635}
636
637int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify cq_notify)
638{
639 struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
640
641 switch (cq_notify) {
642 case IB_CQ_SOLICITED:
643 hipz_set_cqx_n0(my_cq, 1);
644 break;
645 case IB_CQ_NEXT_COMP:
646 hipz_set_cqx_n1(my_cq, 1);
647 break;
648 default:
649 return -EINVAL;
650 }
651
652 return 0;
653}
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
new file mode 100644
index 000000000000..9f16e9c79394
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -0,0 +1,111 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * SQP functions
5 *
6 * Authors: Khadija Souissi <souissi@de.ibm.com>
7 * Heiko J Schick <schickhj@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42
43#include <linux/module.h>
44#include <linux/err.h>
45#include "ehca_classes.h"
46#include "ehca_tools.h"
47#include "ehca_qes.h"
48#include "ehca_iverbs.h"
49#include "hcp_if.h"
50
51
52/**
53 * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
54 * pair is created successfully, the corresponding port gets active.
55 *
56 * Define Special Queue pair 0 (SMI QP) is still not supported.
57 *
58 * @qp_init_attr: Queue pair init attributes with port and queue pair type
59 */
60
61u64 ehca_define_sqp(struct ehca_shca *shca,
62 struct ehca_qp *ehca_qp,
63 struct ib_qp_init_attr *qp_init_attr)
64{
65 u32 pma_qp_nr, bma_qp_nr;
66 u64 ret;
67 u8 port = qp_init_attr->port_num;
68 int counter;
69
70 shca->sport[port - 1].port_state = IB_PORT_DOWN;
71
72 switch (qp_init_attr->qp_type) {
73 case IB_QPT_SMI:
74 /* function not supported yet */
75 break;
76 case IB_QPT_GSI:
77 ret = hipz_h_define_aqp1(shca->ipz_hca_handle,
78 ehca_qp->ipz_qp_handle,
79 ehca_qp->galpas.kernel,
80 (u32) qp_init_attr->port_num,
81 &pma_qp_nr, &bma_qp_nr);
82
83 if (ret != H_SUCCESS) {
84 ehca_err(&shca->ib_device,
85 "Can't define AQP1 for port %x. rc=%lx",
86 port, ret);
87 return ret;
88 }
89 break;
90 default:
91 ehca_err(&shca->ib_device, "invalid qp_type=%x",
92 qp_init_attr->qp_type);
93 return H_PARAMETER;
94 }
95
96 for (counter = 0;
97 shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
98 counter < ehca_port_act_time;
99 counter++) {
100 ehca_dbg(&shca->ib_device, "... wait until port %x is active",
101 port);
102 msleep_interruptible(1000);
103 }
104
105 if (counter == ehca_port_act_time) {
106 ehca_err(&shca->ib_device, "Port %x is not active.", port);
107 return H_HARDWARE;
108 }
109
110 return H_SUCCESS;
111}
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
new file mode 100644
index 000000000000..9f56bb846d93
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -0,0 +1,172 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * auxiliary functions
5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Khadija Souissi <souissik@de.ibm.com>
9 * Waleri Fomin <fomin@de.ibm.com>
10 * Heiko J Schick <schickhj@de.ibm.com>
11 *
12 * Copyright (c) 2005 IBM Corporation
13 *
14 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
15 * BSD.
16 *
17 * OpenIB BSD License
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are met:
21 *
22 * Redistributions of source code must retain the above copyright notice, this
23 * list of conditions and the following disclaimer.
24 *
25 * Redistributions in binary form must reproduce the above copyright notice,
26 * this list of conditions and the following disclaimer in the documentation
27 * and/or other materials
28 * provided with the distribution.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43
44#ifndef EHCA_TOOLS_H
45#define EHCA_TOOLS_H
46
47#include <linux/kernel.h>
48#include <linux/spinlock.h>
49#include <linux/delay.h>
50#include <linux/idr.h>
51#include <linux/kthread.h>
52#include <linux/mm.h>
53#include <linux/mman.h>
54#include <linux/module.h>
55#include <linux/moduleparam.h>
56#include <linux/vmalloc.h>
57#include <linux/version.h>
58#include <linux/notifier.h>
59#include <linux/cpu.h>
60#include <linux/device.h>
61
62#include <asm/abs_addr.h>
63#include <asm/ibmebus.h>
64#include <asm/io.h>
65#include <asm/pgtable.h>
66
67extern int ehca_debug_level;
68
69#define ehca_dbg(ib_dev, format, arg...) \
70 do { \
71 if (unlikely(ehca_debug_level)) \
72 dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
73 "PU%04x EHCA_DBG:%s " format "\n", \
74 get_paca()->paca_index, __FUNCTION__, \
75 ## arg); \
76 } while (0)
77
78#define ehca_info(ib_dev, format, arg...) \
79 dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
80 get_paca()->paca_index, __FUNCTION__, ## arg)
81
82#define ehca_warn(ib_dev, format, arg...) \
83 dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
84 get_paca()->paca_index, __FUNCTION__, ## arg)
85
86#define ehca_err(ib_dev, format, arg...) \
87 dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
88 get_paca()->paca_index, __FUNCTION__, ## arg)
89
90/* use this one only if no ib_dev available */
91#define ehca_gen_dbg(format, arg...) \
92 do { \
93 if (unlikely(ehca_debug_level)) \
94 printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n",\
95 get_paca()->paca_index, __FUNCTION__, ## arg); \
96 } while (0)
97
98#define ehca_gen_warn(format, arg...) \
99 do { \
100 if (unlikely(ehca_debug_level)) \
101 printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n",\
102 get_paca()->paca_index, __FUNCTION__, ## arg); \
103 } while (0)
104
105#define ehca_gen_err(format, arg...) \
106 printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
107 get_paca()->paca_index, __FUNCTION__, ## arg)
108
109/**
110 * ehca_dmp - printk a memory block, whose length is n*8 bytes.
111 * Each line has the following layout:
112 * <format string> adr=X ofs=Y <8 bytes hex> <8 bytes hex>
113 */
114#define ehca_dmp(adr, len, format, args...) \
115 do { \
116 unsigned int x; \
117 unsigned int l = (unsigned int)(len); \
118 unsigned char *deb = (unsigned char*)(adr); \
119 for (x = 0; x < l; x += 16) { \
120 printk("EHCA_DMP:%s" format \
121 " adr=%p ofs=%04x %016lx %016lx\n", \
122 __FUNCTION__, ##args, deb, x, \
123 *((u64 *)&deb[0]), *((u64 *)&deb[8])); \
124 deb += 16; \
125 } \
126 } while (0)
127
128/* define a bitmask, little endian version */
129#define EHCA_BMASK(pos,length) (((pos)<<16)+(length))
130
131/* define a bitmask, the ibm way... */
132#define EHCA_BMASK_IBM(from,to) (((63-to)<<16)+((to)-(from)+1))
133
134/* internal function, don't use */
135#define EHCA_BMASK_SHIFTPOS(mask) (((mask)>>16)&0xffff)
136
137/* internal function, don't use */
138#define EHCA_BMASK_MASK(mask) (0xffffffffffffffffULL >> ((64-(mask))&0xffff))
139
140/**
141 * EHCA_BMASK_SET - return value shifted and masked by mask
142 * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable
143 * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask
144 * in variable
145 */
146#define EHCA_BMASK_SET(mask,value) \
147 ((EHCA_BMASK_MASK(mask) & ((u64)(value)))<<EHCA_BMASK_SHIFTPOS(mask))
148
149/**
150 * EHCA_BMASK_GET - extract a parameter from value by mask
151 */
152#define EHCA_BMASK_GET(mask,value) \
153 (EHCA_BMASK_MASK(mask)& (((u64)(value))>>EHCA_BMASK_SHIFTPOS(mask)))
154
155
156/* Converts ehca to ib return code */
157static inline int ehca2ib_return_code(u64 ehca_rc)
158{
159 switch (ehca_rc) {
160 case H_SUCCESS:
161 return 0;
162 case H_BUSY:
163 return -EBUSY;
164 case H_NO_MEM:
165 return -ENOMEM;
166 default:
167 return -EINVAL;
168 }
169}
170
171
172#endif /* EHCA_TOOLS_H */
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
new file mode 100644
index 000000000000..e08764e4aef2
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -0,0 +1,392 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * userspace support verbs
5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Heiko J Schick <schickhj@de.ibm.com>
9 *
10 * Copyright (c) 2005 IBM Corporation
11 *
12 * All rights reserved.
13 *
14 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
15 * BSD.
16 *
17 * OpenIB BSD License
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are met:
21 *
22 * Redistributions of source code must retain the above copyright notice, this
23 * list of conditions and the following disclaimer.
24 *
25 * Redistributions in binary form must reproduce the above copyright notice,
26 * this list of conditions and the following disclaimer in the documentation
27 * and/or other materials
28 * provided with the distribution.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43#include <asm/current.h>
44
45#include "ehca_classes.h"
46#include "ehca_iverbs.h"
47#include "ehca_mrmw.h"
48#include "ehca_tools.h"
49#include "hcp_if.h"
50
51struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device,
52 struct ib_udata *udata)
53{
54 struct ehca_ucontext *my_context;
55
56 my_context = kzalloc(sizeof *my_context, GFP_KERNEL);
57 if (!my_context) {
58 ehca_err(device, "Out of memory device=%p", device);
59 return ERR_PTR(-ENOMEM);
60 }
61
62 return &my_context->ib_ucontext;
63}
64
65int ehca_dealloc_ucontext(struct ib_ucontext *context)
66{
67 kfree(container_of(context, struct ehca_ucontext, ib_ucontext));
68 return 0;
69}
70
71struct page *ehca_nopage(struct vm_area_struct *vma,
72 unsigned long address, int *type)
73{
74 struct page *mypage = NULL;
75 u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
76 u32 idr_handle = fileoffset >> 32;
77 u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */
78 u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
79 u32 cur_pid = current->tgid;
80 unsigned long flags;
81 struct ehca_cq *cq;
82 struct ehca_qp *qp;
83 struct ehca_pd *pd;
84 u64 offset;
85 void *vaddr;
86
87 switch (q_type) {
88 case 1: /* CQ */
89 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
90 cq = idr_find(&ehca_cq_idr, idr_handle);
91 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
92
93 /* make sure this mmap really belongs to the authorized user */
94 if (!cq) {
95 ehca_gen_err("cq is NULL ret=NOPAGE_SIGBUS");
96 return NOPAGE_SIGBUS;
97 }
98
99 if (cq->ownpid != cur_pid) {
100 ehca_err(cq->ib_cq.device,
101 "Invalid caller pid=%x ownpid=%x",
102 cur_pid, cq->ownpid);
103 return NOPAGE_SIGBUS;
104 }
105
106 if (rsrc_type == 2) {
107 ehca_dbg(cq->ib_cq.device, "cq=%p cq queuearea", cq);
108 offset = address - vma->vm_start;
109 vaddr = ipz_qeit_calc(&cq->ipz_queue, offset);
110 ehca_dbg(cq->ib_cq.device, "offset=%lx vaddr=%p",
111 offset, vaddr);
112 mypage = virt_to_page(vaddr);
113 }
114 break;
115
116 case 2: /* QP */
117 spin_lock_irqsave(&ehca_qp_idr_lock, flags);
118 qp = idr_find(&ehca_qp_idr, idr_handle);
119 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
120
121 /* make sure this mmap really belongs to the authorized user */
122 if (!qp) {
123 ehca_gen_err("qp is NULL ret=NOPAGE_SIGBUS");
124 return NOPAGE_SIGBUS;
125 }
126
127 pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
128 if (pd->ownpid != cur_pid) {
129 ehca_err(qp->ib_qp.device,
130 "Invalid caller pid=%x ownpid=%x",
131 cur_pid, pd->ownpid);
132 return NOPAGE_SIGBUS;
133 }
134
135 if (rsrc_type == 2) { /* rqueue */
136 ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueuearea", qp);
137 offset = address - vma->vm_start;
138 vaddr = ipz_qeit_calc(&qp->ipz_rqueue, offset);
139 ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
140 offset, vaddr);
141 mypage = virt_to_page(vaddr);
142 } else if (rsrc_type == 3) { /* squeue */
143 ehca_dbg(qp->ib_qp.device, "qp=%p qp squeuearea", qp);
144 offset = address - vma->vm_start;
145 vaddr = ipz_qeit_calc(&qp->ipz_squeue, offset);
146 ehca_dbg(qp->ib_qp.device, "offset=%lx vaddr=%p",
147 offset, vaddr);
148 mypage = virt_to_page(vaddr);
149 }
150 break;
151
152 default:
153 ehca_gen_err("bad queue type %x", q_type);
154 return NOPAGE_SIGBUS;
155 }
156
157 if (!mypage) {
158 ehca_gen_err("Invalid page adr==NULL ret=NOPAGE_SIGBUS");
159 return NOPAGE_SIGBUS;
160 }
161 get_page(mypage);
162
163 return mypage;
164}
165
166static struct vm_operations_struct ehcau_vm_ops = {
167 .nopage = ehca_nopage,
168};
169
170int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
171{
172 u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
173 u32 idr_handle = fileoffset >> 32;
174 u32 q_type = (fileoffset >> 28) & 0xF; /* CQ, QP,... */
175 u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
176 u32 cur_pid = current->tgid;
177 u32 ret;
178 u64 vsize, physical;
179 unsigned long flags;
180 struct ehca_cq *cq;
181 struct ehca_qp *qp;
182 struct ehca_pd *pd;
183
184 switch (q_type) {
185 case 1: /* CQ */
186 spin_lock_irqsave(&ehca_cq_idr_lock, flags);
187 cq = idr_find(&ehca_cq_idr, idr_handle);
188 spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
189
190 /* make sure this mmap really belongs to the authorized user */
191 if (!cq)
192 return -EINVAL;
193
194 if (cq->ownpid != cur_pid) {
195 ehca_err(cq->ib_cq.device,
196 "Invalid caller pid=%x ownpid=%x",
197 cur_pid, cq->ownpid);
198 return -ENOMEM;
199 }
200
201 if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context)
202 return -EINVAL;
203
204 switch (rsrc_type) {
205 case 1: /* galpa fw handle */
206 ehca_dbg(cq->ib_cq.device, "cq=%p cq triggerarea", cq);
207 vma->vm_flags |= VM_RESERVED;
208 vsize = vma->vm_end - vma->vm_start;
209 if (vsize != EHCA_PAGESIZE) {
210 ehca_err(cq->ib_cq.device, "invalid vsize=%lx",
211 vma->vm_end - vma->vm_start);
212 return -EINVAL;
213 }
214
215 physical = cq->galpas.user.fw_handle;
216 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
217 vma->vm_flags |= VM_IO | VM_RESERVED;
218
219 ehca_dbg(cq->ib_cq.device,
220 "vsize=%lx physical=%lx", vsize, physical);
221 ret = remap_pfn_range(vma, vma->vm_start,
222 physical >> PAGE_SHIFT, vsize,
223 vma->vm_page_prot);
224 if (ret) {
225 ehca_err(cq->ib_cq.device,
226 "remap_pfn_range() failed ret=%x",
227 ret);
228 return -ENOMEM;
229 }
230 break;
231
232 case 2: /* cq queue_addr */
233 ehca_dbg(cq->ib_cq.device, "cq=%p cq q_addr", cq);
234 vma->vm_flags |= VM_RESERVED;
235 vma->vm_ops = &ehcau_vm_ops;
236 break;
237
238 default:
239 ehca_err(cq->ib_cq.device, "bad resource type %x",
240 rsrc_type);
241 return -EINVAL;
242 }
243 break;
244
245 case 2: /* QP */
246 spin_lock_irqsave(&ehca_qp_idr_lock, flags);
247 qp = idr_find(&ehca_qp_idr, idr_handle);
248 spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
249
250 /* make sure this mmap really belongs to the authorized user */
251 if (!qp)
252 return -EINVAL;
253
254 pd = container_of(qp->ib_qp.pd, struct ehca_pd, ib_pd);
255 if (pd->ownpid != cur_pid) {
256 ehca_err(qp->ib_qp.device,
257 "Invalid caller pid=%x ownpid=%x",
258 cur_pid, pd->ownpid);
259 return -ENOMEM;
260 }
261
262 if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
263 return -EINVAL;
264
265 switch (rsrc_type) {
266 case 1: /* galpa fw handle */
267 ehca_dbg(qp->ib_qp.device, "qp=%p qp triggerarea", qp);
268 vma->vm_flags |= VM_RESERVED;
269 vsize = vma->vm_end - vma->vm_start;
270 if (vsize != EHCA_PAGESIZE) {
271 ehca_err(qp->ib_qp.device, "invalid vsize=%lx",
272 vma->vm_end - vma->vm_start);
273 return -EINVAL;
274 }
275
276 physical = qp->galpas.user.fw_handle;
277 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
278 vma->vm_flags |= VM_IO | VM_RESERVED;
279
280 ehca_dbg(qp->ib_qp.device, "vsize=%lx physical=%lx",
281 vsize, physical);
282 ret = remap_pfn_range(vma, vma->vm_start,
283 physical >> PAGE_SHIFT, vsize,
284 vma->vm_page_prot);
285 if (ret) {
286 ehca_err(qp->ib_qp.device,
287 "remap_pfn_range() failed ret=%x",
288 ret);
289 return -ENOMEM;
290 }
291 break;
292
293 case 2: /* qp rqueue_addr */
294 ehca_dbg(qp->ib_qp.device, "qp=%p qp rqueue_addr", qp);
295 vma->vm_flags |= VM_RESERVED;
296 vma->vm_ops = &ehcau_vm_ops;
297 break;
298
299 case 3: /* qp squeue_addr */
300 ehca_dbg(qp->ib_qp.device, "qp=%p qp squeue_addr", qp);
301 vma->vm_flags |= VM_RESERVED;
302 vma->vm_ops = &ehcau_vm_ops;
303 break;
304
305 default:
306 ehca_err(qp->ib_qp.device, "bad resource type %x",
307 rsrc_type);
308 return -EINVAL;
309 }
310 break;
311
312 default:
313 ehca_gen_err("bad queue type %x", q_type);
314 return -EINVAL;
315 }
316
317 return 0;
318}
319
320int ehca_mmap_nopage(u64 foffset, u64 length, void **mapped,
321 struct vm_area_struct **vma)
322{
323 down_write(&current->mm->mmap_sem);
324 *mapped = (void*)do_mmap(NULL,0, length, PROT_WRITE,
325 MAP_SHARED | MAP_ANONYMOUS,
326 foffset);
327 up_write(&current->mm->mmap_sem);
328 if (!(*mapped)) {
329 ehca_gen_err("couldn't mmap foffset=%lx length=%lx",
330 foffset, length);
331 return -EINVAL;
332 }
333
334 *vma = find_vma(current->mm, (u64)*mapped);
335 if (!(*vma)) {
336 down_write(&current->mm->mmap_sem);
337 do_munmap(current->mm, 0, length);
338 up_write(&current->mm->mmap_sem);
339 ehca_gen_err("couldn't find vma queue=%p", *mapped);
340 return -EINVAL;
341 }
342 (*vma)->vm_flags |= VM_RESERVED;
343 (*vma)->vm_ops = &ehcau_vm_ops;
344
345 return 0;
346}
347
348int ehca_mmap_register(u64 physical, void **mapped,
349 struct vm_area_struct **vma)
350{
351 int ret;
352 unsigned long vsize;
353 /* ehca hw supports only 4k page */
354 ret = ehca_mmap_nopage(0, EHCA_PAGESIZE, mapped, vma);
355 if (ret) {
356 ehca_gen_err("could'nt mmap physical=%lx", physical);
357 return ret;
358 }
359
360 (*vma)->vm_flags |= VM_RESERVED;
361 vsize = (*vma)->vm_end - (*vma)->vm_start;
362 if (vsize != EHCA_PAGESIZE) {
363 ehca_gen_err("invalid vsize=%lx",
364 (*vma)->vm_end - (*vma)->vm_start);
365 return -EINVAL;
366 }
367
368 (*vma)->vm_page_prot = pgprot_noncached((*vma)->vm_page_prot);
369 (*vma)->vm_flags |= VM_IO | VM_RESERVED;
370
371 ret = remap_pfn_range((*vma), (*vma)->vm_start,
372 physical >> PAGE_SHIFT, vsize,
373 (*vma)->vm_page_prot);
374 if (ret) {
375 ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
376 return -ENOMEM;
377 }
378
379 return 0;
380
381}
382
383int ehca_munmap(unsigned long addr, size_t len) {
384 int ret = 0;
385 struct mm_struct *mm = current->mm;
386 if (mm) {
387 down_write(&mm->mmap_sem);
388 ret = do_munmap(mm, addr, len);
389 up_write(&mm->mmap_sem);
390 }
391 return ret;
392}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
new file mode 100644
index 000000000000..3fb46e67df87
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -0,0 +1,874 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Firmware Infiniband Interface code for POWER
5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Gerd Bayer <gerd.bayer@de.ibm.com>
9 * Waleri Fomin <fomin@de.ibm.com>
10 *
11 * Copyright (c) 2005 IBM Corporation
12 *
13 * All rights reserved.
14 *
15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
16 * BSD.
17 *
18 * OpenIB BSD License
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
22 *
23 * Redistributions of source code must retain the above copyright notice, this
24 * list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright notice,
27 * this list of conditions and the following disclaimer in the documentation
28 * and/or other materials
29 * provided with the distribution.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGE.
42 */
43
44#include <asm/hvcall.h>
45#include "ehca_tools.h"
46#include "hcp_if.h"
47#include "hcp_phyp.h"
48#include "hipz_fns.h"
49#include "ipz_pt_fn.h"
50
51#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11)
52#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12)
53#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15)
54#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18)
55#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21)
56#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23)
57#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31)
58#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63)
59
60#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15)
61#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31)
62#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39)
63#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47)
64
65#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31)
66#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63)
67#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15)
68#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31)
69
70#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31)
71#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63)
72
73/* direct access qp controls */
74#define DAQP_CTRL_ENABLE 0x01
75#define DAQP_CTRL_SEND_COMP 0x20
76#define DAQP_CTRL_RECV_COMP 0x40
77
78static u32 get_longbusy_msecs(int longbusy_rc)
79{
80 switch (longbusy_rc) {
81 case H_LONG_BUSY_ORDER_1_MSEC:
82 return 1;
83 case H_LONG_BUSY_ORDER_10_MSEC:
84 return 10;
85 case H_LONG_BUSY_ORDER_100_MSEC:
86 return 100;
87 case H_LONG_BUSY_ORDER_1_SEC:
88 return 1000;
89 case H_LONG_BUSY_ORDER_10_SEC:
90 return 10000;
91 case H_LONG_BUSY_ORDER_100_SEC:
92 return 100000;
93 default:
94 return 1;
95 }
96}
97
98static long ehca_plpar_hcall_norets(unsigned long opcode,
99 unsigned long arg1,
100 unsigned long arg2,
101 unsigned long arg3,
102 unsigned long arg4,
103 unsigned long arg5,
104 unsigned long arg6,
105 unsigned long arg7)
106{
107 long ret;
108 int i, sleep_msecs;
109
110 ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
111 "arg5=%lx arg6=%lx arg7=%lx",
112 opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
113
114 for (i = 0; i < 5; i++) {
115 ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
116 arg5, arg6, arg7);
117
118 if (H_IS_LONG_BUSY(ret)) {
119 sleep_msecs = get_longbusy_msecs(ret);
120 msleep_interruptible(sleep_msecs);
121 continue;
122 }
123
124 if (ret < H_SUCCESS)
125 ehca_gen_err("opcode=%lx ret=%lx"
126 " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
127 " arg5=%lx arg6=%lx arg7=%lx ",
128 opcode, ret,
129 arg1, arg2, arg3, arg4, arg5,
130 arg6, arg7);
131
132 ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
133 return ret;
134
135 }
136
137 return H_BUSY;
138}
139
140static long ehca_plpar_hcall9(unsigned long opcode,
141 unsigned long *outs, /* array of 9 outputs */
142 unsigned long arg1,
143 unsigned long arg2,
144 unsigned long arg3,
145 unsigned long arg4,
146 unsigned long arg5,
147 unsigned long arg6,
148 unsigned long arg7,
149 unsigned long arg8,
150 unsigned long arg9)
151{
152 long ret;
153 int i, sleep_msecs;
154
155 ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
156 "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
157 opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
158 arg8, arg9);
159
160 for (i = 0; i < 5; i++) {
161 ret = plpar_hcall9(opcode, outs,
162 arg1, arg2, arg3, arg4, arg5,
163 arg6, arg7, arg8, arg9);
164
165 if (H_IS_LONG_BUSY(ret)) {
166 sleep_msecs = get_longbusy_msecs(ret);
167 msleep_interruptible(sleep_msecs);
168 continue;
169 }
170
171 if (ret < H_SUCCESS)
172 ehca_gen_err("opcode=%lx ret=%lx"
173 " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
174 " arg5=%lx arg6=%lx arg7=%lx arg8=%lx"
175 " arg9=%lx"
176 " out1=%lx out2=%lx out3=%lx out4=%lx"
177 " out5=%lx out6=%lx out7=%lx out8=%lx"
178 " out9=%lx",
179 opcode, ret,
180 arg1, arg2, arg3, arg4, arg5,
181 arg6, arg7, arg8, arg9,
182 outs[0], outs[1], outs[2], outs[3],
183 outs[4], outs[5], outs[6], outs[7],
184 outs[8]);
185
186 ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
187 "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
188 "out9=%lx",
189 opcode, ret, outs[0], outs[1], outs[2], outs[3],
190 outs[4], outs[5], outs[6], outs[7], outs[8]);
191 return ret;
192
193 }
194
195 return H_BUSY;
196}
197u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
198 struct ehca_pfeq *pfeq,
199 const u32 neq_control,
200 const u32 number_of_entries,
201 struct ipz_eq_handle *eq_handle,
202 u32 *act_nr_of_entries,
203 u32 *act_pages,
204 u32 *eq_ist)
205{
206 u64 ret;
207 u64 outs[PLPAR_HCALL9_BUFSIZE];
208 u64 allocate_controls;
209
210 /* resource type */
211 allocate_controls = 3ULL;
212
213 /* ISN is associated */
214 if (neq_control != 1)
215 allocate_controls = (1ULL << (63 - 7)) | allocate_controls;
216 else /* notification event queue */
217 allocate_controls = (1ULL << 63) | allocate_controls;
218
219 ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
220 adapter_handle.handle, /* r4 */
221 allocate_controls, /* r5 */
222 number_of_entries, /* r6 */
223 0, 0, 0, 0, 0, 0);
224 eq_handle->handle = outs[0];
225 *act_nr_of_entries = (u32)outs[3];
226 *act_pages = (u32)outs[4];
227 *eq_ist = (u32)outs[5];
228
229 if (ret == H_NOT_ENOUGH_RESOURCES)
230 ehca_gen_err("Not enough resource - ret=%lx ", ret);
231
232 return ret;
233}
234
235u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
236 struct ipz_eq_handle eq_handle,
237 const u64 event_mask)
238{
239 return ehca_plpar_hcall_norets(H_RESET_EVENTS,
240 adapter_handle.handle, /* r4 */
241 eq_handle.handle, /* r5 */
242 event_mask, /* r6 */
243 0, 0, 0, 0);
244}
245
246u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
247 struct ehca_cq *cq,
248 struct ehca_alloc_cq_parms *param)
249{
250 u64 ret;
251 u64 outs[PLPAR_HCALL9_BUFSIZE];
252
253 ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
254 adapter_handle.handle, /* r4 */
255 2, /* r5 */
256 param->eq_handle.handle, /* r6 */
257 cq->token, /* r7 */
258 param->nr_cqe, /* r8 */
259 0, 0, 0, 0);
260 cq->ipz_cq_handle.handle = outs[0];
261 param->act_nr_of_entries = (u32)outs[3];
262 param->act_pages = (u32)outs[4];
263
264 if (ret == H_SUCCESS)
265 hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
266
267 if (ret == H_NOT_ENOUGH_RESOURCES)
268 ehca_gen_err("Not enough resources. ret=%lx", ret);
269
270 return ret;
271}
272
273u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
274 struct ehca_qp *qp,
275 struct ehca_alloc_qp_parms *parms)
276{
277 u64 ret;
278 u64 allocate_controls;
279 u64 max_r10_reg;
280 u64 outs[PLPAR_HCALL9_BUFSIZE];
281 u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
282 u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
283 int daqp_ctrl = parms->daqp_ctrl;
284
285 allocate_controls =
286 EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
287 (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
288 | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
289 | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
290 | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
291 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
292 (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
293 | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
294 (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
295 | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
296 parms->ud_av_l_key_ctl)
297 | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
298
299 max_r10_reg =
300 EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
301 max_nr_send_wqes)
302 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
303 max_nr_receive_wqes)
304 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
305 parms->max_send_sge)
306 | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
307 parms->max_recv_sge);
308
309 ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
310 adapter_handle.handle, /* r4 */
311 allocate_controls, /* r5 */
312 qp->send_cq->ipz_cq_handle.handle,
313 qp->recv_cq->ipz_cq_handle.handle,
314 parms->ipz_eq_handle.handle,
315 ((u64)qp->token << 32) | parms->pd.value,
316 max_r10_reg, /* r10 */
317 parms->ud_av_l_key_ctl, /* r11 */
318 0);
319 qp->ipz_qp_handle.handle = outs[0];
320 qp->real_qp_num = (u32)outs[1];
321 parms->act_nr_send_sges =
322 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
323 parms->act_nr_recv_wqes =
324 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
325 parms->act_nr_send_sges =
326 (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]);
327 parms->act_nr_recv_sges =
328 (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]);
329 parms->nr_sq_pages =
330 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]);
331 parms->nr_rq_pages =
332 (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
333
334 if (ret == H_SUCCESS)
335 hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
336
337 if (ret == H_NOT_ENOUGH_RESOURCES)
338 ehca_gen_err("Not enough resources. ret=%lx", ret);
339
340 return ret;
341}
342
343u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
344 const u8 port_id,
345 struct hipz_query_port *query_port_response_block)
346{
347 u64 ret;
348 u64 r_cb = virt_to_abs(query_port_response_block);
349
350 if (r_cb & (EHCA_PAGESIZE-1)) {
351 ehca_gen_err("response block not page aligned");
352 return H_PARAMETER;
353 }
354
355 ret = ehca_plpar_hcall_norets(H_QUERY_PORT,
356 adapter_handle.handle, /* r4 */
357 port_id, /* r5 */
358 r_cb, /* r6 */
359 0, 0, 0, 0);
360
361 if (ehca_debug_level)
362 ehca_dmp(query_port_response_block, 64, "response_block");
363
364 return ret;
365}
366
367u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
368 struct hipz_query_hca *query_hca_rblock)
369{
370 u64 r_cb = virt_to_abs(query_hca_rblock);
371
372 if (r_cb & (EHCA_PAGESIZE-1)) {
373 ehca_gen_err("response_block=%p not page aligned",
374 query_hca_rblock);
375 return H_PARAMETER;
376 }
377
378 return ehca_plpar_hcall_norets(H_QUERY_HCA,
379 adapter_handle.handle, /* r4 */
380 r_cb, /* r5 */
381 0, 0, 0, 0, 0);
382}
383
384u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
385 const u8 pagesize,
386 const u8 queue_type,
387 const u64 resource_handle,
388 const u64 logical_address_of_page,
389 u64 count)
390{
391 return ehca_plpar_hcall_norets(H_REGISTER_RPAGES,
392 adapter_handle.handle, /* r4 */
393 queue_type | pagesize << 8, /* r5 */
394 resource_handle, /* r6 */
395 logical_address_of_page, /* r7 */
396 count, /* r8 */
397 0, 0);
398}
399
400u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
401 const struct ipz_eq_handle eq_handle,
402 struct ehca_pfeq *pfeq,
403 const u8 pagesize,
404 const u8 queue_type,
405 const u64 logical_address_of_page,
406 const u64 count)
407{
408 if (count != 1) {
409 ehca_gen_err("Ppage counter=%lx", count);
410 return H_PARAMETER;
411 }
412 return hipz_h_register_rpage(adapter_handle,
413 pagesize,
414 queue_type,
415 eq_handle.handle,
416 logical_address_of_page, count);
417}
418
419u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle,
420 u32 ist)
421{
422 u64 ret;
423 ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE,
424 adapter_handle.handle, /* r4 */
425 ist, /* r5 */
426 0, 0, 0, 0, 0);
427
428 if (ret != H_SUCCESS && ret != H_BUSY)
429 ehca_gen_err("Could not query interrupt state.");
430
431 return ret;
432}
433
434u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
435 const struct ipz_cq_handle cq_handle,
436 struct ehca_pfcq *pfcq,
437 const u8 pagesize,
438 const u8 queue_type,
439 const u64 logical_address_of_page,
440 const u64 count,
441 const struct h_galpa gal)
442{
443 if (count != 1) {
444 ehca_gen_err("Page counter=%lx", count);
445 return H_PARAMETER;
446 }
447
448 return hipz_h_register_rpage(adapter_handle, pagesize, queue_type,
449 cq_handle.handle, logical_address_of_page,
450 count);
451}
452
453u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
454 const struct ipz_qp_handle qp_handle,
455 struct ehca_pfqp *pfqp,
456 const u8 pagesize,
457 const u8 queue_type,
458 const u64 logical_address_of_page,
459 const u64 count,
460 const struct h_galpa galpa)
461{
462 if (count != 1) {
463 ehca_gen_err("Page counter=%lx", count);
464 return H_PARAMETER;
465 }
466
467 return hipz_h_register_rpage(adapter_handle,pagesize,queue_type,
468 qp_handle.handle,logical_address_of_page,
469 count);
470}
471
472u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
473 const struct ipz_qp_handle qp_handle,
474 struct ehca_pfqp *pfqp,
475 void **log_addr_next_sq_wqe2processed,
476 void **log_addr_next_rq_wqe2processed,
477 int dis_and_get_function_code)
478{
479 u64 ret;
480 u64 outs[PLPAR_HCALL9_BUFSIZE];
481
482 ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
483 adapter_handle.handle, /* r4 */
484 dis_and_get_function_code, /* r5 */
485 qp_handle.handle, /* r6 */
486 0, 0, 0, 0, 0, 0);
487 if (log_addr_next_sq_wqe2processed)
488 *log_addr_next_sq_wqe2processed = (void*)outs[0];
489 if (log_addr_next_rq_wqe2processed)
490 *log_addr_next_rq_wqe2processed = (void*)outs[1];
491
492 return ret;
493}
494
495u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
496 const struct ipz_qp_handle qp_handle,
497 struct ehca_pfqp *pfqp,
498 const u64 update_mask,
499 struct hcp_modify_qp_control_block *mqpcb,
500 struct h_galpa gal)
501{
502 u64 ret;
503 u64 outs[PLPAR_HCALL9_BUFSIZE];
504 ret = ehca_plpar_hcall9(H_MODIFY_QP, outs,
505 adapter_handle.handle, /* r4 */
506 qp_handle.handle, /* r5 */
507 update_mask, /* r6 */
508 virt_to_abs(mqpcb), /* r7 */
509 0, 0, 0, 0, 0);
510
511 if (ret == H_NOT_ENOUGH_RESOURCES)
512 ehca_gen_err("Insufficient resources ret=%lx", ret);
513
514 return ret;
515}
516
517u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
518 const struct ipz_qp_handle qp_handle,
519 struct ehca_pfqp *pfqp,
520 struct hcp_modify_qp_control_block *qqpcb,
521 struct h_galpa gal)
522{
523 return ehca_plpar_hcall_norets(H_QUERY_QP,
524 adapter_handle.handle, /* r4 */
525 qp_handle.handle, /* r5 */
526 virt_to_abs(qqpcb), /* r6 */
527 0, 0, 0, 0);
528}
529
530u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
531 struct ehca_qp *qp)
532{
533 u64 ret;
534 u64 outs[PLPAR_HCALL9_BUFSIZE];
535
536 ret = hcp_galpas_dtor(&qp->galpas);
537 if (ret) {
538 ehca_gen_err("Could not destruct qp->galpas");
539 return H_RESOURCE;
540 }
541 ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs,
542 adapter_handle.handle, /* r4 */
543 /* function code */
544 1, /* r5 */
545 qp->ipz_qp_handle.handle, /* r6 */
546 0, 0, 0, 0, 0, 0);
547 if (ret == H_HARDWARE)
548 ehca_gen_err("HCA not operational. ret=%lx", ret);
549
550 ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
551 adapter_handle.handle, /* r4 */
552 qp->ipz_qp_handle.handle, /* r5 */
553 0, 0, 0, 0, 0);
554
555 if (ret == H_RESOURCE)
556 ehca_gen_err("Resource still in use. ret=%lx", ret);
557
558 return ret;
559}
560
561u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
562 const struct ipz_qp_handle qp_handle,
563 struct h_galpa gal,
564 u32 port)
565{
566 return ehca_plpar_hcall_norets(H_DEFINE_AQP0,
567 adapter_handle.handle, /* r4 */
568 qp_handle.handle, /* r5 */
569 port, /* r6 */
570 0, 0, 0, 0);
571}
572
573u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
574 const struct ipz_qp_handle qp_handle,
575 struct h_galpa gal,
576 u32 port, u32 * pma_qp_nr,
577 u32 * bma_qp_nr)
578{
579 u64 ret;
580 u64 outs[PLPAR_HCALL9_BUFSIZE];
581
582 ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs,
583 adapter_handle.handle, /* r4 */
584 qp_handle.handle, /* r5 */
585 port, /* r6 */
586 0, 0, 0, 0, 0, 0);
587 *pma_qp_nr = (u32)outs[0];
588 *bma_qp_nr = (u32)outs[1];
589
590 if (ret == H_ALIAS_EXIST)
591 ehca_gen_err("AQP1 already exists. ret=%lx", ret);
592
593 return ret;
594}
595
596u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
597 const struct ipz_qp_handle qp_handle,
598 struct h_galpa gal,
599 u16 mcg_dlid,
600 u64 subnet_prefix, u64 interface_id)
601{
602 u64 ret;
603
604 ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP,
605 adapter_handle.handle, /* r4 */
606 qp_handle.handle, /* r5 */
607 mcg_dlid, /* r6 */
608 interface_id, /* r7 */
609 subnet_prefix, /* r8 */
610 0, 0);
611
612 if (ret == H_NOT_ENOUGH_RESOURCES)
613 ehca_gen_err("Not enough resources. ret=%lx", ret);
614
615 return ret;
616}
617
618u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
619 const struct ipz_qp_handle qp_handle,
620 struct h_galpa gal,
621 u16 mcg_dlid,
622 u64 subnet_prefix, u64 interface_id)
623{
624 return ehca_plpar_hcall_norets(H_DETACH_MCQP,
625 adapter_handle.handle, /* r4 */
626 qp_handle.handle, /* r5 */
627 mcg_dlid, /* r6 */
628 interface_id, /* r7 */
629 subnet_prefix, /* r8 */
630 0, 0);
631}
632
633u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
634 struct ehca_cq *cq,
635 u8 force_flag)
636{
637 u64 ret;
638
639 ret = hcp_galpas_dtor(&cq->galpas);
640 if (ret) {
641 ehca_gen_err("Could not destruct cp->galpas");
642 return H_RESOURCE;
643 }
644
645 ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
646 adapter_handle.handle, /* r4 */
647 cq->ipz_cq_handle.handle, /* r5 */
648 force_flag != 0 ? 1L : 0L, /* r6 */
649 0, 0, 0, 0);
650
651 if (ret == H_RESOURCE)
652 ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
653
654 return ret;
655}
656
657u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
658 struct ehca_eq *eq)
659{
660 u64 ret;
661
662 ret = hcp_galpas_dtor(&eq->galpas);
663 if (ret) {
664 ehca_gen_err("Could not destruct eq->galpas");
665 return H_RESOURCE;
666 }
667
668 ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
669 adapter_handle.handle, /* r4 */
670 eq->ipz_eq_handle.handle, /* r5 */
671 0, 0, 0, 0, 0);
672
673 if (ret == H_RESOURCE)
674 ehca_gen_err("Resource in use. ret=%lx ", ret);
675
676 return ret;
677}
678
679u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
680 const struct ehca_mr *mr,
681 const u64 vaddr,
682 const u64 length,
683 const u32 access_ctrl,
684 const struct ipz_pd pd,
685 struct ehca_mr_hipzout_parms *outparms)
686{
687 u64 ret;
688 u64 outs[PLPAR_HCALL9_BUFSIZE];
689
690 ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
691 adapter_handle.handle, /* r4 */
692 5, /* r5 */
693 vaddr, /* r6 */
694 length, /* r7 */
695 (((u64)access_ctrl) << 32ULL), /* r8 */
696 pd.value, /* r9 */
697 0, 0, 0);
698 outparms->handle.handle = outs[0];
699 outparms->lkey = (u32)outs[2];
700 outparms->rkey = (u32)outs[3];
701
702 return ret;
703}
704
705u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
706 const struct ehca_mr *mr,
707 const u8 pagesize,
708 const u8 queue_type,
709 const u64 logical_address_of_page,
710 const u64 count)
711{
712 u64 ret;
713
714 if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) {
715 ehca_gen_err("logical_address_of_page not on a 4k boundary "
716 "adapter_handle=%lx mr=%p mr_handle=%lx "
717 "pagesize=%x queue_type=%x "
718 "logical_address_of_page=%lx count=%lx",
719 adapter_handle.handle, mr,
720 mr->ipz_mr_handle.handle, pagesize, queue_type,
721 logical_address_of_page, count);
722 ret = H_PARAMETER;
723 } else
724 ret = hipz_h_register_rpage(adapter_handle, pagesize,
725 queue_type,
726 mr->ipz_mr_handle.handle,
727 logical_address_of_page, count);
728 return ret;
729}
730
731u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
732 const struct ehca_mr *mr,
733 struct ehca_mr_hipzout_parms *outparms)
734{
735 u64 ret;
736 u64 outs[PLPAR_HCALL9_BUFSIZE];
737
738 ret = ehca_plpar_hcall9(H_QUERY_MR, outs,
739 adapter_handle.handle, /* r4 */
740 mr->ipz_mr_handle.handle, /* r5 */
741 0, 0, 0, 0, 0, 0, 0);
742 outparms->len = outs[0];
743 outparms->vaddr = outs[1];
744 outparms->acl = outs[4] >> 32;
745 outparms->lkey = (u32)(outs[5] >> 32);
746 outparms->rkey = (u32)(outs[5] & (0xffffffff));
747
748 return ret;
749}
750
751u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
752 const struct ehca_mr *mr)
753{
754 return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
755 adapter_handle.handle, /* r4 */
756 mr->ipz_mr_handle.handle, /* r5 */
757 0, 0, 0, 0, 0);
758}
759
760u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
761 const struct ehca_mr *mr,
762 const u64 vaddr_in,
763 const u64 length,
764 const u32 access_ctrl,
765 const struct ipz_pd pd,
766 const u64 mr_addr_cb,
767 struct ehca_mr_hipzout_parms *outparms)
768{
769 u64 ret;
770 u64 outs[PLPAR_HCALL9_BUFSIZE];
771
772 ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs,
773 adapter_handle.handle, /* r4 */
774 mr->ipz_mr_handle.handle, /* r5 */
775 vaddr_in, /* r6 */
776 length, /* r7 */
777 /* r8 */
778 ((((u64)access_ctrl) << 32ULL) | pd.value),
779 mr_addr_cb, /* r9 */
780 0, 0, 0);
781 outparms->vaddr = outs[1];
782 outparms->lkey = (u32)outs[2];
783 outparms->rkey = (u32)outs[3];
784
785 return ret;
786}
787
788u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
789 const struct ehca_mr *mr,
790 const struct ehca_mr *orig_mr,
791 const u64 vaddr_in,
792 const u32 access_ctrl,
793 const struct ipz_pd pd,
794 struct ehca_mr_hipzout_parms *outparms)
795{
796 u64 ret;
797 u64 outs[PLPAR_HCALL9_BUFSIZE];
798
799 ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs,
800 adapter_handle.handle, /* r4 */
801 orig_mr->ipz_mr_handle.handle, /* r5 */
802 vaddr_in, /* r6 */
803 (((u64)access_ctrl) << 32ULL), /* r7 */
804 pd.value, /* r8 */
805 0, 0, 0, 0);
806 outparms->handle.handle = outs[0];
807 outparms->lkey = (u32)outs[2];
808 outparms->rkey = (u32)outs[3];
809
810 return ret;
811}
812
813u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
814 const struct ehca_mw *mw,
815 const struct ipz_pd pd,
816 struct ehca_mw_hipzout_parms *outparms)
817{
818 u64 ret;
819 u64 outs[PLPAR_HCALL9_BUFSIZE];
820
821 ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
822 adapter_handle.handle, /* r4 */
823 6, /* r5 */
824 pd.value, /* r6 */
825 0, 0, 0, 0, 0, 0);
826 outparms->handle.handle = outs[0];
827 outparms->rkey = (u32)outs[3];
828
829 return ret;
830}
831
832u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
833 const struct ehca_mw *mw,
834 struct ehca_mw_hipzout_parms *outparms)
835{
836 u64 ret;
837 u64 outs[PLPAR_HCALL9_BUFSIZE];
838
839 ret = ehca_plpar_hcall9(H_QUERY_MW, outs,
840 adapter_handle.handle, /* r4 */
841 mw->ipz_mw_handle.handle, /* r5 */
842 0, 0, 0, 0, 0, 0, 0);
843 outparms->rkey = (u32)outs[3];
844
845 return ret;
846}
847
848u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
849 const struct ehca_mw *mw)
850{
851 return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
852 adapter_handle.handle, /* r4 */
853 mw->ipz_mw_handle.handle, /* r5 */
854 0, 0, 0, 0, 0);
855}
856
857u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
858 const u64 ressource_handle,
859 void *rblock,
860 unsigned long *byte_count)
861{
862 u64 r_cb = virt_to_abs(rblock);
863
864 if (r_cb & (EHCA_PAGESIZE-1)) {
865 ehca_gen_err("rblock not page aligned.");
866 return H_PARAMETER;
867 }
868
869 return ehca_plpar_hcall_norets(H_ERROR_DATA,
870 adapter_handle.handle,
871 ressource_handle,
872 r_cb,
873 0, 0, 0, 0);
874}
diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h
new file mode 100644
index 000000000000..587ebd470959
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_if.h
@@ -0,0 +1,261 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Firmware Infiniband Interface code for POWER
5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Gerd Bayer <gerd.bayer@de.ibm.com>
9 * Waleri Fomin <fomin@de.ibm.com>
10 *
11 * Copyright (c) 2005 IBM Corporation
12 *
13 * All rights reserved.
14 *
15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
16 * BSD.
17 *
18 * OpenIB BSD License
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
22 *
23 * Redistributions of source code must retain the above copyright notice, this
24 * list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright notice,
27 * this list of conditions and the following disclaimer in the documentation
28 * and/or other materials
29 * provided with the distribution.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGE.
42 */
43
44#ifndef __HCP_IF_H__
45#define __HCP_IF_H__
46
47#include "ehca_classes.h"
48#include "ehca_tools.h"
49#include "hipz_hw.h"
50
51/*
52 * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize
53 * resources, create the empty EQPT (ring).
54 */
55u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
56 struct ehca_pfeq *pfeq,
57 const u32 neq_control,
58 const u32 number_of_entries,
59 struct ipz_eq_handle *eq_handle,
60 u32 * act_nr_of_entries,
61 u32 * act_pages,
62 u32 * eq_ist);
63
64u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle,
65 struct ipz_eq_handle eq_handle,
66 const u64 event_mask);
67/*
68 * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize
69 * resources, create the empty CQPT (ring).
70 */
71u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
72 struct ehca_cq *cq,
73 struct ehca_alloc_cq_parms *param);
74
75
76/*
77 * hipz_h_alloc_resource_qp allocates QP resources in HW and FW,
78 * initialize resources, create empty QPPTs (2 rings).
79 */
80u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
81 struct ehca_qp *qp,
82 struct ehca_alloc_qp_parms *parms);
83
84u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
85 const u8 port_id,
86 struct hipz_query_port *query_port_response_block);
87
88u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
89 struct hipz_query_hca *query_hca_rblock);
90
91/*
92 * hipz_h_register_rpage internal function in hcp_if.h for all
93 * hcp_H_REGISTER_RPAGE calls.
94 */
95u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle,
96 const u8 pagesize,
97 const u8 queue_type,
98 const u64 resource_handle,
99 const u64 logical_address_of_page,
100 u64 count);
101
102u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle,
103 const struct ipz_eq_handle eq_handle,
104 struct ehca_pfeq *pfeq,
105 const u8 pagesize,
106 const u8 queue_type,
107 const u64 logical_address_of_page,
108 const u64 count);
109
110u64 hipz_h_query_int_state(const struct ipz_adapter_handle
111 hcp_adapter_handle,
112 u32 ist);
113
114u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle,
115 const struct ipz_cq_handle cq_handle,
116 struct ehca_pfcq *pfcq,
117 const u8 pagesize,
118 const u8 queue_type,
119 const u64 logical_address_of_page,
120 const u64 count,
121 const struct h_galpa gal);
122
123u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle,
124 const struct ipz_qp_handle qp_handle,
125 struct ehca_pfqp *pfqp,
126 const u8 pagesize,
127 const u8 queue_type,
128 const u64 logical_address_of_page,
129 const u64 count,
130 const struct h_galpa galpa);
131
132u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle,
133 const struct ipz_qp_handle qp_handle,
134 struct ehca_pfqp *pfqp,
135 void **log_addr_next_sq_wqe_tb_processed,
136 void **log_addr_next_rq_wqe_tb_processed,
137 int dis_and_get_function_code);
138enum hcall_sigt {
139 HCALL_SIGT_NO_CQE = 0,
140 HCALL_SIGT_BY_WQE = 1,
141 HCALL_SIGT_EVERY = 2
142};
143
144u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
145 const struct ipz_qp_handle qp_handle,
146 struct ehca_pfqp *pfqp,
147 const u64 update_mask,
148 struct hcp_modify_qp_control_block *mqpcb,
149 struct h_galpa gal);
150
151u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
152 const struct ipz_qp_handle qp_handle,
153 struct ehca_pfqp *pfqp,
154 struct hcp_modify_qp_control_block *qqpcb,
155 struct h_galpa gal);
156
157u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
158 struct ehca_qp *qp);
159
160u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle,
161 const struct ipz_qp_handle qp_handle,
162 struct h_galpa gal,
163 u32 port);
164
165u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
166 const struct ipz_qp_handle qp_handle,
167 struct h_galpa gal,
168 u32 port, u32 * pma_qp_nr,
169 u32 * bma_qp_nr);
170
171u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
172 const struct ipz_qp_handle qp_handle,
173 struct h_galpa gal,
174 u16 mcg_dlid,
175 u64 subnet_prefix, u64 interface_id);
176
177u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle,
178 const struct ipz_qp_handle qp_handle,
179 struct h_galpa gal,
180 u16 mcg_dlid,
181 u64 subnet_prefix, u64 interface_id);
182
183u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
184 struct ehca_cq *cq,
185 u8 force_flag);
186
187u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
188 struct ehca_eq *eq);
189
190/*
191 * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize
192 * resources.
193 */
194u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle,
195 const struct ehca_mr *mr,
196 const u64 vaddr,
197 const u64 length,
198 const u32 access_ctrl,
199 const struct ipz_pd pd,
200 struct ehca_mr_hipzout_parms *outparms);
201
202/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */
203u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
204 const struct ehca_mr *mr,
205 const u8 pagesize,
206 const u8 queue_type,
207 const u64 logical_address_of_page,
208 const u64 count);
209
210/* hipz_h_query_mr queries MR in HW and FW */
211u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle,
212 const struct ehca_mr *mr,
213 struct ehca_mr_hipzout_parms *outparms);
214
215/* hipz_h_free_resource_mr frees MR resources in HW and FW */
216u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
217 const struct ehca_mr *mr);
218
219/* hipz_h_reregister_pmr reregisters MR in HW and FW */
220u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
221 const struct ehca_mr *mr,
222 const u64 vaddr_in,
223 const u64 length,
224 const u32 access_ctrl,
225 const struct ipz_pd pd,
226 const u64 mr_addr_cb,
227 struct ehca_mr_hipzout_parms *outparms);
228
229/* hipz_h_register_smr register shared MR in HW and FW */
230u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle,
231 const struct ehca_mr *mr,
232 const struct ehca_mr *orig_mr,
233 const u64 vaddr_in,
234 const u32 access_ctrl,
235 const struct ipz_pd pd,
236 struct ehca_mr_hipzout_parms *outparms);
237
238/*
239 * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize
240 * resources.
241 */
242u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle,
243 const struct ehca_mw *mw,
244 const struct ipz_pd pd,
245 struct ehca_mw_hipzout_parms *outparms);
246
247/* hipz_h_query_mw queries MW in HW and FW */
248u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle,
249 const struct ehca_mw *mw,
250 struct ehca_mw_hipzout_parms *outparms);
251
252/* hipz_h_free_resource_mw frees MW resources in HW and FW */
253u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle,
254 const struct ehca_mw *mw);
255
256u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
257 const u64 ressource_handle,
258 void *rblock,
259 unsigned long *byte_count);
260
261#endif /* __HCP_IF_H__ */
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c
new file mode 100644
index 000000000000..0b1a4772c78a
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.c
@@ -0,0 +1,80 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * load store abstraction for ehca register access with tracing
5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#include "ehca_classes.h"
43#include "hipz_hw.h"
44
45int hcall_map_page(u64 physaddr, u64 *mapaddr)
46{
47 *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE));
48 return 0;
49}
50
51int hcall_unmap_page(u64 mapaddr)
52{
53 iounmap((volatile void __iomem*)mapaddr);
54 return 0;
55}
56
57int hcp_galpas_ctor(struct h_galpas *galpas,
58 u64 paddr_kernel, u64 paddr_user)
59{
60 int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle);
61 if (ret)
62 return ret;
63
64 galpas->user.fw_handle = paddr_user;
65
66 return 0;
67}
68
69int hcp_galpas_dtor(struct h_galpas *galpas)
70{
71 if (galpas->kernel.fw_handle) {
72 int ret = hcall_unmap_page(galpas->kernel.fw_handle);
73 if (ret)
74 return ret;
75 }
76
77 galpas->user.fw_handle = galpas->kernel.fw_handle = 0;
78
79 return 0;
80}
diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h
new file mode 100644
index 000000000000..5305c2a3ed94
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hcp_phyp.h
@@ -0,0 +1,90 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * Firmware calls
5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
8 * Waleri Fomin <fomin@de.ibm.com>
9 * Gerd Bayer <gerd.bayer@de.ibm.com>
10 *
11 * Copyright (c) 2005 IBM Corporation
12 *
13 * All rights reserved.
14 *
15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
16 * BSD.
17 *
18 * OpenIB BSD License
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
22 *
23 * Redistributions of source code must retain the above copyright notice, this
24 * list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright notice,
27 * this list of conditions and the following disclaimer in the documentation
28 * and/or other materials
29 * provided with the distribution.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGE.
42 */
43
44#ifndef __HCP_PHYP_H__
45#define __HCP_PHYP_H__
46
47
48/*
49 * eHCA page (mapped into memory)
50 * resource to access eHCA register pages in CPU address space
51*/
52struct h_galpa {
53 u64 fw_handle;
54 /* for pSeries this is a 64bit memory address where
55 I/O memory is mapped into CPU address space (kv) */
56};
57
58/*
59 * resource to access eHCA address space registers, all types
60 */
61struct h_galpas {
62 u32 pid; /*PID of userspace galpa checking */
63 struct h_galpa user; /* user space accessible resource,
64 set to 0 if unused */
65 struct h_galpa kernel; /* kernel space accessible resource,
66 set to 0 if unused */
67};
68
69static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset)
70{
71 u64 addr = galpa.fw_handle + offset;
72 return *(volatile u64 __force *)addr;
73}
74
75static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value)
76{
77 u64 addr = galpa.fw_handle + offset;
78 *(volatile u64 __force *)addr = value;
79}
80
81int hcp_galpas_ctor(struct h_galpas *galpas,
82 u64 paddr_kernel, u64 paddr_user);
83
84int hcp_galpas_dtor(struct h_galpas *galpas);
85
86int hcall_map_page(u64 physaddr, u64 * mapaddr);
87
88int hcall_unmap_page(u64 mapaddr);
89
90#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h
new file mode 100644
index 000000000000..9dac93d02140
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns.h
@@ -0,0 +1,68 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * HW abstraction register functions
5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 * Reinhard Ernst <rernst@de.ibm.com>
8 *
9 * Copyright (c) 2005 IBM Corporation
10 *
11 * All rights reserved.
12 *
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
14 * BSD.
15 *
16 * OpenIB BSD License
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
20 *
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
23 *
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 */
41
42#ifndef __HIPZ_FNS_H__
43#define __HIPZ_FNS_H__
44
45#include "ehca_classes.h"
46#include "hipz_hw.h"
47
48#include "hipz_fns_core.h"
49
50#define hipz_galpa_store_eq(gal, offset, value) \
51 hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value)
52
53#define hipz_galpa_load_eq(gal, offset) \
54 hipz_galpa_load(gal, EQTEMM_OFFSET(offset))
55
56#define hipz_galpa_store_qped(gal, offset, value) \
57 hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value)
58
59#define hipz_galpa_load_qped(gal, offset) \
60 hipz_galpa_load(gal, QPEDMM_OFFSET(offset))
61
62#define hipz_galpa_store_mrmw(gal, offset, value) \
63 hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value)
64
65#define hipz_galpa_load_mrmw(gal, offset) \
66 hipz_galpa_load(gal, MRMWMM_OFFSET(offset))
67
68#endif
diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h
new file mode 100644
index 000000000000..20898a153446
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_fns_core.h
@@ -0,0 +1,100 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * HW abstraction register functions
5 *
6 * Authors: Christoph Raisch <raisch@de.ibm.com>
7 * Heiko J Schick <schickhj@de.ibm.com>
8 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9 * Reinhard Ernst <rernst@de.ibm.com>
10 *
11 * Copyright (c) 2005 IBM Corporation
12 *
13 * All rights reserved.
14 *
15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
16 * BSD.
17 *
18 * OpenIB BSD License
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
22 *
23 * Redistributions of source code must retain the above copyright notice, this
24 * list of conditions and the following disclaimer.
25 *
26 * Redistributions in binary form must reproduce the above copyright notice,
27 * this list of conditions and the following disclaimer in the documentation
28 * and/or other materials
29 * provided with the distribution.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGE.
42 */
43
44#ifndef __HIPZ_FNS_CORE_H__
45#define __HIPZ_FNS_CORE_H__
46
47#include "hcp_phyp.h"
48#include "hipz_hw.h"
49
50#define hipz_galpa_store_cq(gal, offset, value) \
51 hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value)
52
53#define hipz_galpa_load_cq(gal, offset) \
54 hipz_galpa_load(gal, CQTEMM_OFFSET(offset))
55
56#define hipz_galpa_store_qp(gal,offset, value) \
57 hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value)
58#define hipz_galpa_load_qp(gal, offset) \
59 hipz_galpa_load(gal,QPTEMM_OFFSET(offset))
60
61static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes)
62{
63 /* ringing doorbell :-) */
64 hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa,
65 EHCA_BMASK_SET(QPX_SQADDER, nr_wqes));
66}
67
68static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes)
69{
70 /* ringing doorbell :-) */
71 hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa,
72 EHCA_BMASK_SET(QPX_RQADDER, nr_wqes));
73}
74
75static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes)
76{
77 hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca,
78 EHCA_BMASK_SET(CQX_FECADDER, nr_cqes));
79}
80
81static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value)
82{
83 u64 cqx_n0_reg;
84
85 hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0,
86 EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT,
87 value));
88 cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0);
89}
90
91static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value)
92{
93 u64 cqx_n1_reg;
94
95 hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1,
96 EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value));
97 cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1);
98}
99
100#endif /* __HIPZ_FNC_CORE_H__ */
diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h
new file mode 100644
index 000000000000..3fc92b031c50
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/hipz_hw.h
@@ -0,0 +1,388 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * eHCA register definitions
5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com>
7 * Christoph Raisch <raisch@de.ibm.com>
8 * Reinhard Ernst <rernst@de.ibm.com>
9 *
10 * Copyright (c) 2005 IBM Corporation
11 *
12 * All rights reserved.
13 *
14 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
15 * BSD.
16 *
17 * OpenIB BSD License
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are met:
21 *
22 * Redistributions of source code must retain the above copyright notice, this
23 * list of conditions and the following disclaimer.
24 *
25 * Redistributions in binary form must reproduce the above copyright notice,
26 * this list of conditions and the following disclaimer in the documentation
27 * and/or other materials
28 * provided with the distribution.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43#ifndef __HIPZ_HW_H__
44#define __HIPZ_HW_H__
45
46#include "ehca_tools.h"
47
48/* QP Table Entry Memory Map */
49struct hipz_qptemm {
50 u64 qpx_hcr;
51 u64 qpx_c;
52 u64 qpx_herr;
53 u64 qpx_aer;
54/* 0x20*/
55 u64 qpx_sqa;
56 u64 qpx_sqc;
57 u64 qpx_rqa;
58 u64 qpx_rqc;
59/* 0x40*/
60 u64 qpx_st;
61 u64 qpx_pmstate;
62 u64 qpx_pmfa;
63 u64 qpx_pkey;
64/* 0x60*/
65 u64 qpx_pkeya;
66 u64 qpx_pkeyb;
67 u64 qpx_pkeyc;
68 u64 qpx_pkeyd;
69/* 0x80*/
70 u64 qpx_qkey;
71 u64 qpx_dqp;
72 u64 qpx_dlidp;
73 u64 qpx_portp;
74/* 0xa0*/
75 u64 qpx_slidp;
76 u64 qpx_slidpp;
77 u64 qpx_dlida;
78 u64 qpx_porta;
79/* 0xc0*/
80 u64 qpx_slida;
81 u64 qpx_slidpa;
82 u64 qpx_slvl;
83 u64 qpx_ipd;
84/* 0xe0*/
85 u64 qpx_mtu;
86 u64 qpx_lato;
87 u64 qpx_rlimit;
88 u64 qpx_rnrlimit;
89/* 0x100*/
90 u64 qpx_t;
91 u64 qpx_sqhp;
92 u64 qpx_sqptp;
93 u64 qpx_nspsn;
94/* 0x120*/
95 u64 qpx_nspsnhwm;
96 u64 reserved1;
97 u64 qpx_sdsi;
98 u64 qpx_sdsbc;
99/* 0x140*/
100 u64 qpx_sqwsize;
101 u64 qpx_sqwts;
102 u64 qpx_lsn;
103 u64 qpx_nssn;
104/* 0x160 */
105 u64 qpx_mor;
106 u64 qpx_cor;
107 u64 qpx_sqsize;
108 u64 qpx_erc;
109/* 0x180*/
110 u64 qpx_rnrrc;
111 u64 qpx_ernrwt;
112 u64 qpx_rnrresp;
113 u64 qpx_lmsna;
114/* 0x1a0 */
115 u64 qpx_sqhpc;
116 u64 qpx_sqcptp;
117 u64 qpx_sigt;
118 u64 qpx_wqecnt;
119/* 0x1c0*/
120 u64 qpx_rqhp;
121 u64 qpx_rqptp;
122 u64 qpx_rqsize;
123 u64 qpx_nrr;
124/* 0x1e0*/
125 u64 qpx_rdmac;
126 u64 qpx_nrpsn;
127 u64 qpx_lapsn;
128 u64 qpx_lcr;
129/* 0x200*/
130 u64 qpx_rwc;
131 u64 qpx_rwva;
132 u64 qpx_rdsi;
133 u64 qpx_rdsbc;
134/* 0x220*/
135 u64 qpx_rqwsize;
136 u64 qpx_crmsn;
137 u64 qpx_rdd;
138 u64 qpx_larpsn;
139/* 0x240*/
140 u64 qpx_pd;
141 u64 qpx_scqn;
142 u64 qpx_rcqn;
143 u64 qpx_aeqn;
144/* 0x260*/
145 u64 qpx_aaelog;
146 u64 qpx_ram;
147 u64 qpx_rdmaqe0;
148 u64 qpx_rdmaqe1;
149/* 0x280*/
150 u64 qpx_rdmaqe2;
151 u64 qpx_rdmaqe3;
152 u64 qpx_nrpsnhwm;
153/* 0x298*/
154 u64 reserved[(0x400 - 0x298) / 8];
155/* 0x400 extended data */
156 u64 reserved_ext[(0x500 - 0x400) / 8];
157/* 0x500 */
158 u64 reserved2[(0x1000 - 0x500) / 8];
159/* 0x1000 */
160};
161
162#define QPX_SQADDER EHCA_BMASK_IBM(48,63)
163#define QPX_RQADDER EHCA_BMASK_IBM(48,63)
164
165#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
166
167/* MRMWPT Entry Memory Map */
168struct hipz_mrmwmm {
169 /* 0x00 */
170 u64 mrx_hcr;
171
172 u64 mrx_c;
173 u64 mrx_herr;
174 u64 mrx_aer;
175 /* 0x20 */
176 u64 mrx_pp;
177 u64 reserved1;
178 u64 reserved2;
179 u64 reserved3;
180 /* 0x40 */
181 u64 reserved4[(0x200 - 0x40) / 8];
182 /* 0x200 */
183 u64 mrx_ctl[64];
184
185};
186
187#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm,x)
188
189struct hipz_qpedmm {
190 /* 0x00 */
191 u64 reserved0[(0x400) / 8];
192 /* 0x400 */
193 u64 qpedx_phh;
194 u64 qpedx_ppsgp;
195 /* 0x410 */
196 u64 qpedx_ppsgu;
197 u64 qpedx_ppdgp;
198 /* 0x420 */
199 u64 qpedx_ppdgu;
200 u64 qpedx_aph;
201 /* 0x430 */
202 u64 qpedx_apsgp;
203 u64 qpedx_apsgu;
204 /* 0x440 */
205 u64 qpedx_apdgp;
206 u64 qpedx_apdgu;
207 /* 0x450 */
208 u64 qpedx_apav;
209 u64 qpedx_apsav;
210 /* 0x460 */
211 u64 qpedx_hcr;
212 u64 reserved1[4];
213 /* 0x488 */
214 u64 qpedx_rrl0;
215 /* 0x490 */
216 u64 qpedx_rrrkey0;
217 u64 qpedx_rrva0;
218 /* 0x4a0 */
219 u64 reserved2;
220 u64 qpedx_rrl1;
221 /* 0x4b0 */
222 u64 qpedx_rrrkey1;
223 u64 qpedx_rrva1;
224 /* 0x4c0 */
225 u64 reserved3;
226 u64 qpedx_rrl2;
227 /* 0x4d0 */
228 u64 qpedx_rrrkey2;
229 u64 qpedx_rrva2;
230 /* 0x4e0 */
231 u64 reserved4;
232 u64 qpedx_rrl3;
233 /* 0x4f0 */
234 u64 qpedx_rrrkey3;
235 u64 qpedx_rrva3;
236};
237
238#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm,x)
239
240/* CQ Table Entry Memory Map */
241struct hipz_cqtemm {
242 u64 cqx_hcr;
243 u64 cqx_c;
244 u64 cqx_herr;
245 u64 cqx_aer;
246/* 0x20 */
247 u64 cqx_ptp;
248 u64 cqx_tp;
249 u64 cqx_fec;
250 u64 cqx_feca;
251/* 0x40 */
252 u64 cqx_ep;
253 u64 cqx_eq;
254/* 0x50 */
255 u64 reserved1;
256 u64 cqx_n0;
257/* 0x60 */
258 u64 cqx_n1;
259 u64 reserved2[(0x1000 - 0x60) / 8];
260/* 0x1000 */
261};
262
263#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32,63)
264#define CQX_FECADDER EHCA_BMASK_IBM(32,63)
265#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0,0)
266#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0,0)
267
268#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm,x)
269
270/* EQ Table Entry Memory Map */
271struct hipz_eqtemm {
272 u64 eqx_hcr;
273 u64 eqx_c;
274
275 u64 eqx_herr;
276 u64 eqx_aer;
277/* 0x20 */
278 u64 eqx_ptp;
279 u64 eqx_tp;
280 u64 eqx_ssba;
281 u64 eqx_psba;
282
283/* 0x40 */
284 u64 eqx_cec;
285 u64 eqx_meql;
286 u64 eqx_xisbi;
287 u64 eqx_xisc;
288/* 0x60 */
289 u64 eqx_it;
290
291};
292
293#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm,x)
294
295/* access control defines for MR/MW */
296#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000
297#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000
298#define HIPZ_ACCESSCTRL_R_READ 0x00200000
299#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000
300#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000
301
302/* query hca response block */
303struct hipz_query_hca {
304 u32 cur_reliable_dg;
305 u32 cur_qp;
306 u32 cur_cq;
307 u32 cur_eq;
308 u32 cur_mr;
309 u32 cur_mw;
310 u32 cur_ee_context;
311 u32 cur_mcast_grp;
312 u32 cur_qp_attached_mcast_grp;
313 u32 reserved1;
314 u32 cur_ipv6_qp;
315 u32 cur_eth_qp;
316 u32 cur_hp_mr;
317 u32 reserved2[3];
318 u32 max_rd_domain;
319 u32 max_qp;
320 u32 max_cq;
321 u32 max_eq;
322 u32 max_mr;
323 u32 max_hp_mr;
324 u32 max_mw;
325 u32 max_mrwpte;
326 u32 max_special_mrwpte;
327 u32 max_rd_ee_context;
328 u32 max_mcast_grp;
329 u32 max_total_mcast_qp_attach;
330 u32 max_mcast_qp_attach;
331 u32 max_raw_ipv6_qp;
332 u32 max_raw_ethy_qp;
333 u32 internal_clock_frequency;
334 u32 max_pd;
335 u32 max_ah;
336 u32 max_cqe;
337 u32 max_wqes_wq;
338 u32 max_partitions;
339 u32 max_rr_ee_context;
340 u32 max_rr_qp;
341 u32 max_rr_hca;
342 u32 max_act_wqs_ee_context;
343 u32 max_act_wqs_qp;
344 u32 max_sge;
345 u32 max_sge_rd;
346 u32 memory_page_size_supported;
347 u64 max_mr_size;
348 u32 local_ca_ack_delay;
349 u32 num_ports;
350 u32 vendor_id;
351 u32 vendor_part_id;
352 u32 hw_ver;
353 u64 node_guid;
354 u64 hca_cap_indicators;
355 u32 data_counter_register_size;
356 u32 max_shared_rq;
357 u32 max_isns_eq;
358 u32 max_neq;
359} __attribute__ ((packed));
360
361/* query port response block */
362struct hipz_query_port {
363 u32 state;
364 u32 bad_pkey_cntr;
365 u32 lmc;
366 u32 lid;
367 u32 subnet_timeout;
368 u32 qkey_viol_cntr;
369 u32 sm_sl;
370 u32 sm_lid;
371 u32 capability_mask;
372 u32 init_type_reply;
373 u32 pkey_tbl_len;
374 u32 gid_tbl_len;
375 u64 gid_prefix;
376 u32 port_nr;
377 u16 pkey_entries[16];
378 u8 reserved1[32];
379 u32 trent_size;
380 u32 trbuf_size;
381 u64 max_msg_sz;
382 u32 max_mtu;
383 u32 vl_cap;
384 u8 reserved2[1900];
385 u64 guid_entries[255];
386} __attribute__ ((packed));
387
388#endif
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
new file mode 100644
index 000000000000..e028ff1588cc
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -0,0 +1,149 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * internal queue handling
5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com>
7 * Reinhard Ernst <rernst@de.ibm.com>
8 * Christoph Raisch <raisch@de.ibm.com>
9 *
10 * Copyright (c) 2005 IBM Corporation
11 *
12 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
13 * BSD.
14 *
15 * OpenIB BSD License
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are met:
19 *
20 * Redistributions of source code must retain the above copyright notice, this
21 * list of conditions and the following disclaimer.
22 *
23 * Redistributions in binary form must reproduce the above copyright notice,
24 * this list of conditions and the following disclaimer in the documentation
25 * and/or other materials
26 * provided with the distribution.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41#include "ehca_tools.h"
42#include "ipz_pt_fn.h"
43
44void *ipz_qpageit_get_inc(struct ipz_queue *queue)
45{
46 void *ret = ipz_qeit_get(queue);
47 queue->current_q_offset += queue->pagesize;
48 if (queue->current_q_offset > queue->queue_length) {
49 queue->current_q_offset -= queue->pagesize;
50 ret = NULL;
51 }
52 if (((u64)ret) % EHCA_PAGESIZE) {
53 ehca_gen_err("ERROR!! not at PAGE-Boundary");
54 return NULL;
55 }
56 return ret;
57}
58
59void *ipz_qeit_eq_get_inc(struct ipz_queue *queue)
60{
61 void *ret = ipz_qeit_get(queue);
62 u64 last_entry_in_q = queue->queue_length - queue->qe_size;
63
64 queue->current_q_offset += queue->qe_size;
65 if (queue->current_q_offset > last_entry_in_q) {
66 queue->current_q_offset = 0;
67 queue->toggle_state = (~queue->toggle_state) & 1;
68 }
69
70 return ret;
71}
72
73int ipz_queue_ctor(struct ipz_queue *queue,
74 const u32 nr_of_pages,
75 const u32 pagesize, const u32 qe_size, const u32 nr_of_sg)
76{
77 int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
78 int f;
79
80 if (pagesize > PAGE_SIZE) {
81 ehca_gen_err("FATAL ERROR: pagesize=%x is greater "
82 "than kernel page size", pagesize);
83 return 0;
84 }
85 if (!pages_per_kpage) {
86 ehca_gen_err("FATAL ERROR: invalid kernel page size. "
87 "pages_per_kpage=%x", pages_per_kpage);
88 return 0;
89 }
90 queue->queue_length = nr_of_pages * pagesize;
91 queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *));
92 if (!queue->queue_pages) {
93 ehca_gen_err("ERROR!! didn't get the memory");
94 return 0;
95 }
96 memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *));
97 /*
98 * allocate pages for queue:
99 * outer loop allocates whole kernel pages (page aligned) and
100 * inner loop divides a kernel page into smaller hca queue pages
101 */
102 f = 0;
103 while (f < nr_of_pages) {
104 u8 *kpage = (u8*)get_zeroed_page(GFP_KERNEL);
105 int k;
106 if (!kpage)
107 goto ipz_queue_ctor_exit0; /*NOMEM*/
108 for (k = 0; k < pages_per_kpage && f < nr_of_pages; k++) {
109 (queue->queue_pages)[f] = (struct ipz_page *)kpage;
110 kpage += EHCA_PAGESIZE;
111 f++;
112 }
113 }
114
115 queue->current_q_offset = 0;
116 queue->qe_size = qe_size;
117 queue->act_nr_of_sg = nr_of_sg;
118 queue->pagesize = pagesize;
119 queue->toggle_state = 1;
120 return 1;
121
122 ipz_queue_ctor_exit0:
123 ehca_gen_err("Couldn't get alloc pages queue=%p f=%x nr_of_pages=%x",
124 queue, f, nr_of_pages);
125 for (f = 0; f < nr_of_pages; f += pages_per_kpage) {
126 if (!(queue->queue_pages)[f])
127 break;
128 free_page((unsigned long)(queue->queue_pages)[f]);
129 }
130 return 0;
131}
132
133int ipz_queue_dtor(struct ipz_queue *queue)
134{
135 int pages_per_kpage = PAGE_SIZE >> EHCA_PAGESHIFT;
136 int g;
137 int nr_pages;
138
139 if (!queue || !queue->queue_pages) {
140 ehca_gen_dbg("queue or queue_pages is NULL");
141 return 0;
142 }
143 nr_pages = queue->queue_length / queue->pagesize;
144 for (g = 0; g < nr_pages; g += pages_per_kpage)
145 free_page((unsigned long)(queue->queue_pages)[g]);
146 vfree(queue->queue_pages);
147
148 return 1;
149}
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
new file mode 100644
index 000000000000..2f13509d5257
--- /dev/null
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.h
@@ -0,0 +1,247 @@
1/*
2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
3 *
4 * internal queue handling
5 *
6 * Authors: Waleri Fomin <fomin@de.ibm.com>
7 * Reinhard Ernst <rernst@de.ibm.com>
8 * Christoph Raisch <raisch@de.ibm.com>
9 *
10 * Copyright (c) 2005 IBM Corporation
11 *
12 * All rights reserved.
13 *
14 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
15 * BSD.
16 *
17 * OpenIB BSD License
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions are met:
21 *
22 * Redistributions of source code must retain the above copyright notice, this
23 * list of conditions and the following disclaimer.
24 *
25 * Redistributions in binary form must reproduce the above copyright notice,
26 * this list of conditions and the following disclaimer in the documentation
27 * and/or other materials
28 * provided with the distribution.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
37 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
38 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
40 * POSSIBILITY OF SUCH DAMAGE.
41 */
42
43#ifndef __IPZ_PT_FN_H__
44#define __IPZ_PT_FN_H__
45
46#define EHCA_PAGESHIFT 12
47#define EHCA_PAGESIZE 4096UL
48#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1))
49#define EHCA_PT_ENTRIES 512UL
50
51#include "ehca_tools.h"
52#include "ehca_qes.h"
53
54/* struct generic ehca page */
55struct ipz_page {
56 u8 entries[EHCA_PAGESIZE];
57};
58
59/* struct generic queue in linux kernel virtual memory (kv) */
60struct ipz_queue {
61 u64 current_q_offset; /* current queue entry */
62
63 struct ipz_page **queue_pages; /* array of pages belonging to queue */
64 u32 qe_size; /* queue entry size */
65 u32 act_nr_of_sg;
66 u32 queue_length; /* queue length allocated in bytes */
67 u32 pagesize;
68 u32 toggle_state; /* toggle flag - per page */
69 u32 dummy3; /* 64 bit alignment */
70};
71
72/*
73 * return current Queue Entry for a certain q_offset
74 * returns address (kv) of Queue Entry
75 */
76static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset)
77{
78 struct ipz_page *current_page;
79 if (q_offset >= queue->queue_length)
80 return NULL;
81 current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT];
82 return &current_page->entries[q_offset & (EHCA_PAGESIZE - 1)];
83}
84
85/*
86 * return current Queue Entry
87 * returns address (kv) of Queue Entry
88 */
89static inline void *ipz_qeit_get(struct ipz_queue *queue)
90{
91 return ipz_qeit_calc(queue, queue->current_q_offset);
92}
93
94/*
95 * return current Queue Page , increment Queue Page iterator from
96 * page to page in struct ipz_queue, last increment will return 0! and
97 * NOT wrap
98 * returns address (kv) of Queue Page
99 * warning don't use in parallel with ipz_QE_get_inc()
100 */
101void *ipz_qpageit_get_inc(struct ipz_queue *queue);
102
103/*
104 * return current Queue Entry, increment Queue Entry iterator by one
105 * step in struct ipz_queue, will wrap in ringbuffer
106 * returns address (kv) of Queue Entry BEFORE increment
107 * warning don't use in parallel with ipz_qpageit_get_inc()
108 * warning unpredictable results may occur if steps>act_nr_of_queue_entries
109 */
110static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
111{
112 void *ret = ipz_qeit_get(queue);
113 queue->current_q_offset += queue->qe_size;
114 if (queue->current_q_offset >= queue->queue_length) {
115 queue->current_q_offset = 0;
116 /* toggle the valid flag */
117 queue->toggle_state = (~queue->toggle_state) & 1;
118 }
119
120 return ret;
121}
122
123/*
124 * return current Queue Entry, increment Queue Entry iterator by one
125 * step in struct ipz_queue, will wrap in ringbuffer
126 * returns address (kv) of Queue Entry BEFORE increment
127 * returns 0 and does not increment, if wrong valid state
128 * warning don't use in parallel with ipz_qpageit_get_inc()
129 * warning unpredictable results may occur if steps>act_nr_of_queue_entries
130 */
131static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
132{
133 struct ehca_cqe *cqe = ipz_qeit_get(queue);
134 u32 cqe_flags = cqe->cqe_flags;
135
136 if ((cqe_flags >> 7) != (queue->toggle_state & 1))
137 return NULL;
138
139 ipz_qeit_get_inc(queue);
140 return cqe;
141}
142
143/*
144 * returns and resets Queue Entry iterator
145 * returns address (kv) of first Queue Entry
146 */
147static inline void *ipz_qeit_reset(struct ipz_queue *queue)
148{
149 queue->current_q_offset = 0;
150 return ipz_qeit_get(queue);
151}
152
153/* struct generic page table */
154struct ipz_pt {
155 u64 entries[EHCA_PT_ENTRIES];
156};
157
158/* struct page table for a queue, only to be used in pf */
159struct ipz_qpt {
160 /* queue page tables (kv), use u64 because we know the element length */
161 u64 *qpts;
162 u32 n_qpts;
163 u32 n_ptes; /* number of page table entries */
164 u64 *current_pte_addr;
165};
166
167/*
168 * constructor for a ipz_queue_t, placement new for ipz_queue_t,
169 * new for all dependent datastructors
170 * all QP Tables are the same
171 * flow:
172 * allocate+pin queue
173 * see ipz_qpt_ctor()
174 * returns true if ok, false if out of memory
175 */
176int ipz_queue_ctor(struct ipz_queue *queue, const u32 nr_of_pages,
177 const u32 pagesize, const u32 qe_size,
178 const u32 nr_of_sg);
179
180/*
181 * destructor for a ipz_queue_t
182 * -# free queue
183 * see ipz_queue_ctor()
184 * returns true if ok, false if queue was NULL-ptr of free failed
185 */
186int ipz_queue_dtor(struct ipz_queue *queue);
187
188/*
189 * constructor for a ipz_qpt_t,
190 * placement new for struct ipz_queue, new for all dependent datastructors
191 * all QP Tables are the same,
192 * flow:
193 * -# allocate+pin queue
194 * -# initialise ptcb
195 * -# allocate+pin PTs
196 * -# link PTs to a ring, according to HCA Arch, set bit62 id needed
197 * -# the ring must have room for exactly nr_of_PTEs
198 * see ipz_qpt_ctor()
199 */
200void ipz_qpt_ctor(struct ipz_qpt *qpt,
201 const u32 nr_of_qes,
202 const u32 pagesize,
203 const u32 qe_size,
204 const u8 lowbyte, const u8 toggle,
205 u32 * act_nr_of_QEs, u32 * act_nr_of_pages);
206
207/*
208 * return current Queue Entry, increment Queue Entry iterator by one
209 * step in struct ipz_queue, will wrap in ringbuffer
210 * returns address (kv) of Queue Entry BEFORE increment
211 * warning don't use in parallel with ipz_qpageit_get_inc()
212 * warning unpredictable results may occur if steps>act_nr_of_queue_entries
213 * fix EQ page problems
214 */
215void *ipz_qeit_eq_get_inc(struct ipz_queue *queue);
216
217/*
218 * return current Event Queue Entry, increment Queue Entry iterator
219 * by one step in struct ipz_queue if valid, will wrap in ringbuffer
220 * returns address (kv) of Queue Entry BEFORE increment
221 * returns 0 and does not increment, if wrong valid state
222 * warning don't use in parallel with ipz_queue_QPageit_get_inc()
223 * warning unpredictable results may occur if steps>act_nr_of_queue_entries
224 */
225static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue)
226{
227 void *ret = ipz_qeit_get(queue);
228 u32 qe = *(u8 *) ret;
229 if ((qe >> 7) != (queue->toggle_state & 1))
230 return NULL;
231 ipz_qeit_eq_get_inc(queue); /* this is a good one */
232 return ret;
233}
234
235/* returns address (GX) of first queue entry */
236static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt)
237{
238 return be64_to_cpu(qpt->qpts[0]);
239}
240
241/* returns address (kv) of first page of queue page table */
242static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt)
243{
244 return qpt->qpts;
245}
246
247#endif /* __IPZ_PT_FN_H__ */
diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig
index 1db9489f1e82..574a678e7fdd 100644
--- a/drivers/infiniband/hw/ipath/Kconfig
+++ b/drivers/infiniband/hw/ipath/Kconfig
@@ -1,16 +1,9 @@
1config IPATH_CORE
2 tristate "QLogic InfiniPath Driver"
3 depends on 64BIT && PCI_MSI && NET
4 ---help---
5 This is a low-level driver for QLogic InfiniPath host channel
6 adapters (HCAs) based on the HT-400 and PE-800 chips.
7
8config INFINIBAND_IPATH 1config INFINIBAND_IPATH
9 tristate "QLogic InfiniPath Verbs Driver" 2 tristate "QLogic InfiniPath Driver"
10 depends on IPATH_CORE && INFINIBAND 3 depends on PCI_MSI && 64BIT && INFINIBAND
11 ---help--- 4 ---help---
12 This is a driver that provides InfiniBand verbs support for 5 This is a driver for QLogic InfiniPath host channel adapters,
13 QLogic InfiniPath host channel adapters (HCAs). This 6 including InfiniBand verbs support. This driver allows these
14 allows these devices to be used with both kernel upper level 7 devices to be used with both kernel upper level protocols such
15 protocols such as IP-over-InfiniBand as well as with userspace 8 as IP-over-InfiniBand as well as with userspace applications
16 applications (in conjunction with InfiniBand userspace access). 9 (in conjunction with InfiniBand userspace access).
diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index b0bf72864130..5e29cb0095e5 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -1,36 +1,35 @@
1EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \ 1EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \
2 -DIPATH_KERN_TYPE=0 2 -DIPATH_KERN_TYPE=0
3 3
4obj-$(CONFIG_IPATH_CORE) += ipath_core.o
5obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o 4obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o
6 5
7ipath_core-y := \ 6ib_ipath-y := \
7 ipath_cq.o \
8 ipath_diag.o \ 8 ipath_diag.o \
9 ipath_driver.o \ 9 ipath_driver.o \
10 ipath_eeprom.o \ 10 ipath_eeprom.o \
11 ipath_file_ops.o \ 11 ipath_file_ops.o \
12 ipath_fs.o \ 12 ipath_fs.o \
13 ipath_ht400.o \ 13 ipath_iba6110.o \
14 ipath_iba6120.o \
14 ipath_init_chip.o \ 15 ipath_init_chip.o \
15 ipath_intr.o \ 16 ipath_intr.o \
16 ipath_layer.o \
17 ipath_pe800.o \
18 ipath_stats.o \
19 ipath_sysfs.o \
20 ipath_user_pages.o
21
22ipath_core-$(CONFIG_X86_64) += ipath_wc_x86_64.o
23
24ib_ipath-y := \
25 ipath_cq.o \
26 ipath_keys.o \ 17 ipath_keys.o \
18 ipath_layer.o \
27 ipath_mad.o \ 19 ipath_mad.o \
20 ipath_mmap.o \
28 ipath_mr.o \ 21 ipath_mr.o \
29 ipath_qp.o \ 22 ipath_qp.o \
30 ipath_rc.o \ 23 ipath_rc.o \
31 ipath_ruc.o \ 24 ipath_ruc.o \
32 ipath_srq.o \ 25 ipath_srq.o \
26 ipath_stats.o \
27 ipath_sysfs.o \
33 ipath_uc.o \ 28 ipath_uc.o \
34 ipath_ud.o \ 29 ipath_ud.o \
35 ipath_verbs.o \ 30 ipath_user_pages.o \
36 ipath_verbs_mcast.o 31 ipath_verbs_mcast.o \
32 ipath_verbs.o
33
34ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
35ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 062bd392e7e5..f577905e3aca 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -106,9 +106,9 @@ struct infinipath_stats {
106 __u64 sps_ether_spkts; 106 __u64 sps_ether_spkts;
107 /* number of "ethernet" packets received by driver */ 107 /* number of "ethernet" packets received by driver */
108 __u64 sps_ether_rpkts; 108 __u64 sps_ether_rpkts;
109 /* number of SMA packets sent by driver */ 109 /* number of SMA packets sent by driver. Obsolete. */
110 __u64 sps_sma_spkts; 110 __u64 sps_sma_spkts;
111 /* number of SMA packets received by driver */ 111 /* number of SMA packets received by driver. Obsolete. */
112 __u64 sps_sma_rpkts; 112 __u64 sps_sma_rpkts;
113 /* number of times all ports rcvhdrq was full and packet dropped */ 113 /* number of times all ports rcvhdrq was full and packet dropped */
114 __u64 sps_hdrqfull; 114 __u64 sps_hdrqfull;
@@ -138,7 +138,7 @@ struct infinipath_stats {
138 __u64 sps_pageunlocks; 138 __u64 sps_pageunlocks;
139 /* 139 /*
140 * Number of packets dropped in kernel other than errors (ether 140 * Number of packets dropped in kernel other than errors (ether
141 * packets if ipath not configured, sma/mad, etc.) 141 * packets if ipath not configured, etc.)
142 */ 142 */
143 __u64 sps_krdrops; 143 __u64 sps_krdrops;
144 /* pad for future growth */ 144 /* pad for future growth */
@@ -153,8 +153,6 @@ struct infinipath_stats {
153#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */ 153#define IPATH_STATUS_DISABLED 0x2 /* hardware disabled */
154/* Device has been disabled via admin request */ 154/* Device has been disabled via admin request */
155#define IPATH_STATUS_ADMIN_DISABLED 0x4 155#define IPATH_STATUS_ADMIN_DISABLED 0x4
156#define IPATH_STATUS_OIB_SMA 0x8 /* ipath_mad kernel SMA running */
157#define IPATH_STATUS_SMA 0x10 /* user SMA running */
158/* Chip has been found and initted */ 156/* Chip has been found and initted */
159#define IPATH_STATUS_CHIP_PRESENT 0x20 157#define IPATH_STATUS_CHIP_PRESENT 0x20
160/* IB link is at ACTIVE, usable for data traffic */ 158/* IB link is at ACTIVE, usable for data traffic */
@@ -465,12 +463,11 @@ struct __ipath_sendpkt {
465 struct ipath_iovec sps_iov[4]; 463 struct ipath_iovec sps_iov[4];
466}; 464};
467 465
468/* Passed into SMA special file's ->read and ->write methods. */ 466/* Passed into diag data special file's ->write method. */
469struct ipath_sma_pkt 467struct ipath_diag_pkt {
470{ 468 __u32 unit;
471 __u32 unit; /* unit on which to send packet */ 469 __u64 data;
472 __u64 data; /* address of payload in userspace */ 470 __u32 len;
473 __u32 len; /* length of payload */
474}; 471};
475 472
476/* 473/*
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 3efee341c9bc..049221bc590e 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -42,20 +42,28 @@
42 * @entry: work completion entry to add 42 * @entry: work completion entry to add
43 * @sig: true if @entry is a solicitated entry 43 * @sig: true if @entry is a solicitated entry
44 * 44 *
45 * This may be called with one of the qp->s_lock or qp->r_rq.lock held. 45 * This may be called with qp->s_lock held.
46 */ 46 */
47void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited) 47void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
48{ 48{
49 struct ipath_cq_wc *wc = cq->queue;
49 unsigned long flags; 50 unsigned long flags;
51 u32 head;
50 u32 next; 52 u32 next;
51 53
52 spin_lock_irqsave(&cq->lock, flags); 54 spin_lock_irqsave(&cq->lock, flags);
53 55
54 if (cq->head == cq->ibcq.cqe) 56 /*
57 * Note that the head pointer might be writable by user processes.
58 * Take care to verify it is a sane value.
59 */
60 head = wc->head;
61 if (head >= (unsigned) cq->ibcq.cqe) {
62 head = cq->ibcq.cqe;
55 next = 0; 63 next = 0;
56 else 64 } else
57 next = cq->head + 1; 65 next = head + 1;
58 if (unlikely(next == cq->tail)) { 66 if (unlikely(next == wc->tail)) {
59 spin_unlock_irqrestore(&cq->lock, flags); 67 spin_unlock_irqrestore(&cq->lock, flags);
60 if (cq->ibcq.event_handler) { 68 if (cq->ibcq.event_handler) {
61 struct ib_event ev; 69 struct ib_event ev;
@@ -67,8 +75,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
67 } 75 }
68 return; 76 return;
69 } 77 }
70 cq->queue[cq->head] = *entry; 78 wc->queue[head] = *entry;
71 cq->head = next; 79 wc->head = next;
72 80
73 if (cq->notify == IB_CQ_NEXT_COMP || 81 if (cq->notify == IB_CQ_NEXT_COMP ||
74 (cq->notify == IB_CQ_SOLICITED && solicited)) { 82 (cq->notify == IB_CQ_SOLICITED && solicited)) {
@@ -101,19 +109,20 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
101int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) 109int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
102{ 110{
103 struct ipath_cq *cq = to_icq(ibcq); 111 struct ipath_cq *cq = to_icq(ibcq);
112 struct ipath_cq_wc *wc = cq->queue;
104 unsigned long flags; 113 unsigned long flags;
105 int npolled; 114 int npolled;
106 115
107 spin_lock_irqsave(&cq->lock, flags); 116 spin_lock_irqsave(&cq->lock, flags);
108 117
109 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 118 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
110 if (cq->tail == cq->head) 119 if (wc->tail == wc->head)
111 break; 120 break;
112 *entry = cq->queue[cq->tail]; 121 *entry = wc->queue[wc->tail];
113 if (cq->tail == cq->ibcq.cqe) 122 if (wc->tail >= cq->ibcq.cqe)
114 cq->tail = 0; 123 wc->tail = 0;
115 else 124 else
116 cq->tail++; 125 wc->tail++;
117 } 126 }
118 127
119 spin_unlock_irqrestore(&cq->lock, flags); 128 spin_unlock_irqrestore(&cq->lock, flags);
@@ -160,38 +169,74 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
160{ 169{
161 struct ipath_ibdev *dev = to_idev(ibdev); 170 struct ipath_ibdev *dev = to_idev(ibdev);
162 struct ipath_cq *cq; 171 struct ipath_cq *cq;
163 struct ib_wc *wc; 172 struct ipath_cq_wc *wc;
164 struct ib_cq *ret; 173 struct ib_cq *ret;
165 174
166 if (entries > ib_ipath_max_cqes) { 175 if (entries < 1 || entries > ib_ipath_max_cqes) {
167 ret = ERR_PTR(-EINVAL); 176 ret = ERR_PTR(-EINVAL);
168 goto bail; 177 goto done;
169 } 178 }
170 179
171 if (dev->n_cqs_allocated == ib_ipath_max_cqs) { 180 if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
172 ret = ERR_PTR(-ENOMEM); 181 ret = ERR_PTR(-ENOMEM);
173 goto bail; 182 goto done;
174 } 183 }
175 184
176 /* 185 /* Allocate the completion queue structure. */
177 * Need to use vmalloc() if we want to support large #s of
178 * entries.
179 */
180 cq = kmalloc(sizeof(*cq), GFP_KERNEL); 186 cq = kmalloc(sizeof(*cq), GFP_KERNEL);
181 if (!cq) { 187 if (!cq) {
182 ret = ERR_PTR(-ENOMEM); 188 ret = ERR_PTR(-ENOMEM);
183 goto bail; 189 goto done;
184 } 190 }
185 191
186 /* 192 /*
187 * Need to use vmalloc() if we want to support large #s of entries. 193 * Allocate the completion queue entries and head/tail pointers.
194 * This is allocated separately so that it can be resized and
195 * also mapped into user space.
196 * We need to use vmalloc() in order to support mmap and large
197 * numbers of entries.
188 */ 198 */
189 wc = vmalloc(sizeof(*wc) * (entries + 1)); 199 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
190 if (!wc) { 200 if (!wc) {
191 kfree(cq);
192 ret = ERR_PTR(-ENOMEM); 201 ret = ERR_PTR(-ENOMEM);
193 goto bail; 202 goto bail_cq;
194 } 203 }
204
205 /*
206 * Return the address of the WC as the offset to mmap.
207 * See ipath_mmap() for details.
208 */
209 if (udata && udata->outlen >= sizeof(__u64)) {
210 struct ipath_mmap_info *ip;
211 __u64 offset = (__u64) wc;
212 int err;
213
214 err = ib_copy_to_udata(udata, &offset, sizeof(offset));
215 if (err) {
216 ret = ERR_PTR(err);
217 goto bail_wc;
218 }
219
220 /* Allocate info for ipath_mmap(). */
221 ip = kmalloc(sizeof(*ip), GFP_KERNEL);
222 if (!ip) {
223 ret = ERR_PTR(-ENOMEM);
224 goto bail_wc;
225 }
226 cq->ip = ip;
227 ip->context = context;
228 ip->obj = wc;
229 kref_init(&ip->ref);
230 ip->mmap_cnt = 0;
231 ip->size = PAGE_ALIGN(sizeof(*wc) +
232 sizeof(struct ib_wc) * entries);
233 spin_lock_irq(&dev->pending_lock);
234 ip->next = dev->pending_mmaps;
235 dev->pending_mmaps = ip;
236 spin_unlock_irq(&dev->pending_lock);
237 } else
238 cq->ip = NULL;
239
195 /* 240 /*
196 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. 241 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
197 * The number of entries should be >= the number requested or return 242 * The number of entries should be >= the number requested or return
@@ -202,15 +247,22 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
202 cq->triggered = 0; 247 cq->triggered = 0;
203 spin_lock_init(&cq->lock); 248 spin_lock_init(&cq->lock);
204 tasklet_init(&cq->comptask, send_complete, (unsigned long)cq); 249 tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
205 cq->head = 0; 250 wc->head = 0;
206 cq->tail = 0; 251 wc->tail = 0;
207 cq->queue = wc; 252 cq->queue = wc;
208 253
209 ret = &cq->ibcq; 254 ret = &cq->ibcq;
210 255
211 dev->n_cqs_allocated++; 256 dev->n_cqs_allocated++;
257 goto done;
212 258
213bail: 259bail_wc:
260 vfree(wc);
261
262bail_cq:
263 kfree(cq);
264
265done:
214 return ret; 266 return ret;
215} 267}
216 268
@@ -229,7 +281,10 @@ int ipath_destroy_cq(struct ib_cq *ibcq)
229 281
230 tasklet_kill(&cq->comptask); 282 tasklet_kill(&cq->comptask);
231 dev->n_cqs_allocated--; 283 dev->n_cqs_allocated--;
232 vfree(cq->queue); 284 if (cq->ip)
285 kref_put(&cq->ip->ref, ipath_release_mmap_info);
286 else
287 vfree(cq->queue);
233 kfree(cq); 288 kfree(cq);
234 289
235 return 0; 290 return 0;
@@ -253,7 +308,7 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
253 spin_lock_irqsave(&cq->lock, flags); 308 spin_lock_irqsave(&cq->lock, flags);
254 /* 309 /*
255 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow 310 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
256 * any other transitions. 311 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
257 */ 312 */
258 if (cq->notify != IB_CQ_NEXT_COMP) 313 if (cq->notify != IB_CQ_NEXT_COMP)
259 cq->notify = notify; 314 cq->notify = notify;
@@ -264,46 +319,86 @@ int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
264int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 319int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
265{ 320{
266 struct ipath_cq *cq = to_icq(ibcq); 321 struct ipath_cq *cq = to_icq(ibcq);
267 struct ib_wc *wc, *old_wc; 322 struct ipath_cq_wc *old_wc = cq->queue;
268 u32 n; 323 struct ipath_cq_wc *wc;
324 u32 head, tail, n;
269 int ret; 325 int ret;
270 326
327 if (cqe < 1 || cqe > ib_ipath_max_cqes) {
328 ret = -EINVAL;
329 goto bail;
330 }
331
271 /* 332 /*
272 * Need to use vmalloc() if we want to support large #s of entries. 333 * Need to use vmalloc() if we want to support large #s of entries.
273 */ 334 */
274 wc = vmalloc(sizeof(*wc) * (cqe + 1)); 335 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
275 if (!wc) { 336 if (!wc) {
276 ret = -ENOMEM; 337 ret = -ENOMEM;
277 goto bail; 338 goto bail;
278 } 339 }
279 340
341 /*
342 * Return the address of the WC as the offset to mmap.
343 * See ipath_mmap() for details.
344 */
345 if (udata && udata->outlen >= sizeof(__u64)) {
346 __u64 offset = (__u64) wc;
347
348 ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
349 if (ret)
350 goto bail;
351 }
352
280 spin_lock_irq(&cq->lock); 353 spin_lock_irq(&cq->lock);
281 if (cq->head < cq->tail) 354 /*
282 n = cq->ibcq.cqe + 1 + cq->head - cq->tail; 355 * Make sure head and tail are sane since they
356 * might be user writable.
357 */
358 head = old_wc->head;
359 if (head > (u32) cq->ibcq.cqe)
360 head = (u32) cq->ibcq.cqe;
361 tail = old_wc->tail;
362 if (tail > (u32) cq->ibcq.cqe)
363 tail = (u32) cq->ibcq.cqe;
364 if (head < tail)
365 n = cq->ibcq.cqe + 1 + head - tail;
283 else 366 else
284 n = cq->head - cq->tail; 367 n = head - tail;
285 if (unlikely((u32)cqe < n)) { 368 if (unlikely((u32)cqe < n)) {
286 spin_unlock_irq(&cq->lock); 369 spin_unlock_irq(&cq->lock);
287 vfree(wc); 370 vfree(wc);
288 ret = -EOVERFLOW; 371 ret = -EOVERFLOW;
289 goto bail; 372 goto bail;
290 } 373 }
291 for (n = 0; cq->tail != cq->head; n++) { 374 for (n = 0; tail != head; n++) {
292 wc[n] = cq->queue[cq->tail]; 375 wc->queue[n] = old_wc->queue[tail];
293 if (cq->tail == cq->ibcq.cqe) 376 if (tail == (u32) cq->ibcq.cqe)
294 cq->tail = 0; 377 tail = 0;
295 else 378 else
296 cq->tail++; 379 tail++;
297 } 380 }
298 cq->ibcq.cqe = cqe; 381 cq->ibcq.cqe = cqe;
299 cq->head = n; 382 wc->head = n;
300 cq->tail = 0; 383 wc->tail = 0;
301 old_wc = cq->queue;
302 cq->queue = wc; 384 cq->queue = wc;
303 spin_unlock_irq(&cq->lock); 385 spin_unlock_irq(&cq->lock);
304 386
305 vfree(old_wc); 387 vfree(old_wc);
306 388
389 if (cq->ip) {
390 struct ipath_ibdev *dev = to_idev(ibcq->device);
391 struct ipath_mmap_info *ip = cq->ip;
392
393 ip->obj = wc;
394 ip->size = PAGE_ALIGN(sizeof(*wc) +
395 sizeof(struct ib_wc) * cqe);
396 spin_lock_irq(&dev->pending_lock);
397 ip->next = dev->pending_mmaps;
398 dev->pending_mmaps = ip;
399 spin_unlock_irq(&dev->pending_lock);
400 }
401
307 ret = 0; 402 ret = 0;
308 403
309bail: 404bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index f415beda0d32..df69f0d80b8b 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -60,7 +60,6 @@
60#define __IPATH_USER_SEND 0x1000 /* use user mode send */ 60#define __IPATH_USER_SEND 0x1000 /* use user mode send */
61#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ 61#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
62#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ 62#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
63#define __IPATH_SMADBG 0x8000 /* sma packet debug */
64#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */ 63#define __IPATH_IPATHDBG 0x10000 /* Ethernet (IPATH) gen debug */
65#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */ 64#define __IPATH_IPATHWARN 0x20000 /* Ethernet (IPATH) warnings */
66#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */ 65#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
@@ -84,7 +83,6 @@
84/* print mmap/nopage stuff, not using VDBG any more */ 83/* print mmap/nopage stuff, not using VDBG any more */
85#define __IPATH_MMDBG 0x0 84#define __IPATH_MMDBG 0x0
86#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */ 85#define __IPATH_EPKTDBG 0x0 /* print ethernet packet data */
87#define __IPATH_SMADBG 0x0 /* process startup (init)/exit messages */
88#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */ 86#define __IPATH_IPATHDBG 0x0 /* Ethernet (IPATH) table dump on */
89#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */ 87#define __IPATH_IPATHWARN 0x0 /* Ethernet (IPATH) warnings on */
90#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */ 88#define __IPATH_IPATHERR 0x0 /* Ethernet (IPATH) errors on */
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 147dd89e21c9..28b6b46c106a 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -41,11 +41,11 @@
41 * through the /sys/bus/pci resource mmap interface. 41 * through the /sys/bus/pci resource mmap interface.
42 */ 42 */
43 43
44#include <linux/io.h>
44#include <linux/pci.h> 45#include <linux/pci.h>
45#include <asm/uaccess.h> 46#include <asm/uaccess.h>
46 47
47#include "ipath_kernel.h" 48#include "ipath_kernel.h"
48#include "ipath_layer.h"
49#include "ipath_common.h" 49#include "ipath_common.h"
50 50
51int ipath_diag_inuse; 51int ipath_diag_inuse;
@@ -274,6 +274,158 @@ bail:
274 return ret; 274 return ret;
275} 275}
276 276
277static ssize_t ipath_diagpkt_write(struct file *fp,
278 const char __user *data,
279 size_t count, loff_t *off);
280
281static struct file_operations diagpkt_file_ops = {
282 .owner = THIS_MODULE,
283 .write = ipath_diagpkt_write,
284};
285
286static struct cdev *diagpkt_cdev;
287static struct class_device *diagpkt_class_dev;
288
289int __init ipath_diagpkt_add(void)
290{
291 return ipath_cdev_init(IPATH_DIAGPKT_MINOR,
292 "ipath_diagpkt", &diagpkt_file_ops,
293 &diagpkt_cdev, &diagpkt_class_dev);
294}
295
296void __exit ipath_diagpkt_remove(void)
297{
298 ipath_cdev_cleanup(&diagpkt_cdev, &diagpkt_class_dev);
299}
300
301/**
302 * ipath_diagpkt_write - write an IB packet
303 * @fp: the diag data device file pointer
304 * @data: ipath_diag_pkt structure saying where to get the packet
305 * @count: size of data to write
306 * @off: unused by this code
307 */
308static ssize_t ipath_diagpkt_write(struct file *fp,
309 const char __user *data,
310 size_t count, loff_t *off)
311{
312 u32 __iomem *piobuf;
313 u32 plen, clen, pbufn;
314 struct ipath_diag_pkt dp;
315 u32 *tmpbuf = NULL;
316 struct ipath_devdata *dd;
317 ssize_t ret = 0;
318 u64 val;
319
320 if (count < sizeof(dp)) {
321 ret = -EINVAL;
322 goto bail;
323 }
324
325 if (copy_from_user(&dp, data, sizeof(dp))) {
326 ret = -EFAULT;
327 goto bail;
328 }
329
330 /* send count must be an exact number of dwords */
331 if (dp.len & 3) {
332 ret = -EINVAL;
333 goto bail;
334 }
335
336 clen = dp.len >> 2;
337
338 dd = ipath_lookup(dp.unit);
339 if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
340 !dd->ipath_kregbase) {
341 ipath_cdbg(VERBOSE, "illegal unit %u for diag data send\n",
342 dp.unit);
343 ret = -ENODEV;
344 goto bail;
345 }
346
347 if (ipath_diag_inuse && !diag_set_link &&
348 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
349 diag_set_link = 1;
350 ipath_cdbg(VERBOSE, "Trying to set to set link active for "
351 "diag pkt\n");
352 ipath_set_linkstate(dd, IPATH_IB_LINKARM);
353 ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
354 }
355
356 if (!(dd->ipath_flags & IPATH_INITTED)) {
357 /* no hardware, freeze, etc. */
358 ipath_cdbg(VERBOSE, "unit %u not usable\n", dd->ipath_unit);
359 ret = -ENODEV;
360 goto bail;
361 }
362 val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
363 if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
364 val != IPATH_IBSTATE_ACTIVE) {
365 ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
366 dd->ipath_unit, (unsigned long long) val);
367 ret = -EINVAL;
368 goto bail;
369 }
370
371 /* need total length before first word written */
372 /* +1 word is for the qword padding */
373 plen = sizeof(u32) + dp.len;
374
375 if ((plen + 4) > dd->ipath_ibmaxlen) {
376 ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
377 plen - 4, dd->ipath_ibmaxlen);
378 ret = -EINVAL;
379 goto bail; /* before writing pbc */
380 }
381 tmpbuf = vmalloc(plen);
382 if (!tmpbuf) {
383 dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
384 "failing\n");
385 ret = -ENOMEM;
386 goto bail;
387 }
388
389 if (copy_from_user(tmpbuf,
390 (const void __user *) (unsigned long) dp.data,
391 dp.len)) {
392 ret = -EFAULT;
393 goto bail;
394 }
395
396 piobuf = ipath_getpiobuf(dd, &pbufn);
397 if (!piobuf) {
398 ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
399 dd->ipath_unit);
400 ret = -EBUSY;
401 goto bail;
402 }
403
404 plen >>= 2; /* in dwords */
405
406 if (ipath_debug & __IPATH_PKTDBG)
407 ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
408 dd->ipath_unit, plen - 1, pbufn);
409
410 /* we have to flush after the PBC for correctness on some cpus
411 * or WC buffer can be written out of order */
412 writeq(plen, piobuf);
413 ipath_flush_wc();
414 /* copy all by the trigger word, then flush, so it's written
415 * to chip before trigger word, then write trigger word, then
416 * flush again, so packet is sent. */
417 __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
418 ipath_flush_wc();
419 __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
420 ipath_flush_wc();
421
422 ret = sizeof(dp);
423
424bail:
425 vfree(tmpbuf);
426 return ret;
427}
428
277static int ipath_diag_release(struct inode *in, struct file *fp) 429static int ipath_diag_release(struct inode *in, struct file *fp)
278{ 430{
279 mutex_lock(&ipath_mutex); 431 mutex_lock(&ipath_mutex);
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index f98518d912b5..2108466c7e33 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -39,7 +39,7 @@
39#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
40 40
41#include "ipath_kernel.h" 41#include "ipath_kernel.h"
42#include "ipath_layer.h" 42#include "ipath_verbs.h"
43#include "ipath_common.h" 43#include "ipath_common.h"
44 44
45static void ipath_update_pio_bufs(struct ipath_devdata *); 45static void ipath_update_pio_bufs(struct ipath_devdata *);
@@ -51,8 +51,6 @@ const char *ipath_get_unit_name(int unit)
51 return iname; 51 return iname;
52} 52}
53 53
54EXPORT_SYMBOL_GPL(ipath_get_unit_name);
55
56#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " 54#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
57#define PFX IPATH_DRV_NAME ": " 55#define PFX IPATH_DRV_NAME ": "
58 56
@@ -60,13 +58,13 @@ EXPORT_SYMBOL_GPL(ipath_get_unit_name);
60 * The size has to be longer than this string, so we can append 58 * The size has to be longer than this string, so we can append
61 * board/chip information to it in the init code. 59 * board/chip information to it in the init code.
62 */ 60 */
63const char ipath_core_version[] = IPATH_IDSTR "\n"; 61const char ib_ipath_version[] = IPATH_IDSTR "\n";
64 62
65static struct idr unit_table; 63static struct idr unit_table;
66DEFINE_SPINLOCK(ipath_devs_lock); 64DEFINE_SPINLOCK(ipath_devs_lock);
67LIST_HEAD(ipath_dev_list); 65LIST_HEAD(ipath_dev_list);
68 66
69wait_queue_head_t ipath_sma_state_wait; 67wait_queue_head_t ipath_state_wait;
70 68
71unsigned ipath_debug = __IPATH_INFO; 69unsigned ipath_debug = __IPATH_INFO;
72 70
@@ -403,10 +401,10 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
403 /* setup the chip-specific functions, as early as possible. */ 401 /* setup the chip-specific functions, as early as possible. */
404 switch (ent->device) { 402 switch (ent->device) {
405 case PCI_DEVICE_ID_INFINIPATH_HT: 403 case PCI_DEVICE_ID_INFINIPATH_HT:
406 ipath_init_ht400_funcs(dd); 404 ipath_init_iba6110_funcs(dd);
407 break; 405 break;
408 case PCI_DEVICE_ID_INFINIPATH_PE800: 406 case PCI_DEVICE_ID_INFINIPATH_PE800:
409 ipath_init_pe800_funcs(dd); 407 ipath_init_iba6120_funcs(dd);
410 break; 408 break;
411 default: 409 default:
412 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " 410 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
@@ -440,7 +438,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
440 } 438 }
441 dd->ipath_pcirev = rev; 439 dd->ipath_pcirev = rev;
442 440
441#if defined(__powerpc__)
442 /* There isn't a generic way to specify writethrough mappings */
443 dd->ipath_kregbase = __ioremap(addr, len,
444 (_PAGE_NO_CACHE|_PAGE_WRITETHRU));
445#else
443 dd->ipath_kregbase = ioremap_nocache(addr, len); 446 dd->ipath_kregbase = ioremap_nocache(addr, len);
447#endif
444 448
445 if (!dd->ipath_kregbase) { 449 if (!dd->ipath_kregbase) {
446 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", 450 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
@@ -503,7 +507,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
503 ipathfs_add_device(dd); 507 ipathfs_add_device(dd);
504 ipath_user_add(dd); 508 ipath_user_add(dd);
505 ipath_diag_add(dd); 509 ipath_diag_add(dd);
506 ipath_layer_add(dd); 510 ipath_register_ib_device(dd);
507 511
508 goto bail; 512 goto bail;
509 513
@@ -532,7 +536,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
532 return; 536 return;
533 537
534 dd = pci_get_drvdata(pdev); 538 dd = pci_get_drvdata(pdev);
535 ipath_layer_remove(dd); 539 ipath_unregister_ib_device(dd->verbs_dev);
536 ipath_diag_remove(dd); 540 ipath_diag_remove(dd);
537 ipath_user_remove(dd); 541 ipath_user_remove(dd);
538 ipathfs_remove_device(dd); 542 ipathfs_remove_device(dd);
@@ -607,21 +611,23 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
607 * 611 *
608 * wait up to msecs milliseconds for IB link state change to occur for 612 * wait up to msecs milliseconds for IB link state change to occur for
609 * now, take the easy polling route. Currently used only by 613 * now, take the easy polling route. Currently used only by
610 * ipath_layer_set_linkstate. Returns 0 if state reached, otherwise 614 * ipath_set_linkstate. Returns 0 if state reached, otherwise
611 * -ETIMEDOUT state can have multiple states set, for any of several 615 * -ETIMEDOUT state can have multiple states set, for any of several
612 * transitions. 616 * transitions.
613 */ 617 */
614int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) 618static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
619 int msecs)
615{ 620{
616 dd->ipath_sma_state_wanted = state; 621 dd->ipath_state_wanted = state;
617 wait_event_interruptible_timeout(ipath_sma_state_wait, 622 wait_event_interruptible_timeout(ipath_state_wait,
618 (dd->ipath_flags & state), 623 (dd->ipath_flags & state),
619 msecs_to_jiffies(msecs)); 624 msecs_to_jiffies(msecs));
620 dd->ipath_sma_state_wanted = 0; 625 dd->ipath_state_wanted = 0;
621 626
622 if (!(dd->ipath_flags & state)) { 627 if (!(dd->ipath_flags & state)) {
623 u64 val; 628 u64 val;
624 ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n", 629 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
630 " ms\n",
625 /* test INIT ahead of DOWN, both can be set */ 631 /* test INIT ahead of DOWN, both can be set */
626 (state & IPATH_LINKINIT) ? "INIT" : 632 (state & IPATH_LINKINIT) ? "INIT" :
627 ((state & IPATH_LINKDOWN) ? "DOWN" : 633 ((state & IPATH_LINKDOWN) ? "DOWN" :
@@ -807,58 +813,6 @@ bail:
807 return skb; 813 return skb;
808} 814}
809 815
810/**
811 * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
812 * @dd: the infinipath device
813 * @etail: the sk_buff number
814 * @tlen: the total packet length
815 * @hdr: the ethernet header
816 *
817 * Separate routine for better overall optimization
818 */
819static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
820 u32 tlen, struct ether_header *hdr)
821{
822 u32 elen;
823 u8 pad, *bthbytes;
824 struct sk_buff *skb, *nskb;
825
826 if (dd->ipath_port0_skbs &&
827 hdr->sub_opcode == IPATH_ITH4X_OPCODE_ENCAP) {
828 /*
829 * Allocate a new sk_buff to replace the one we give
830 * to the network stack.
831 */
832 nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
833 if (!nskb) {
834 /* count OK packets that we drop */
835 ipath_stats.sps_krdrops++;
836 return;
837 }
838
839 bthbytes = (u8 *) hdr->bth;
840 pad = (bthbytes[1] >> 4) & 3;
841 /* +CRC32 */
842 elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
843
844 skb = dd->ipath_port0_skbs[etail];
845 dd->ipath_port0_skbs[etail] = nskb;
846 skb_put(skb, elen);
847
848 dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
849 ((char __iomem *) dd->ipath_kregbase
850 + dd->ipath_rcvegrbase), 0,
851 virt_to_phys(nskb->data));
852
853 __ipath_layer_rcv(dd, hdr, skb);
854
855 /* another ether packet received */
856 ipath_stats.sps_ether_rpkts++;
857 }
858 else if (hdr->sub_opcode == IPATH_ITH4X_OPCODE_LID_ARP)
859 __ipath_layer_rcv_lid(dd, hdr);
860}
861
862static void ipath_rcv_hdrerr(struct ipath_devdata *dd, 816static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
863 u32 eflags, 817 u32 eflags,
864 u32 l, 818 u32 l,
@@ -972,26 +926,17 @@ reloop:
972 if (unlikely(eflags)) 926 if (unlikely(eflags))
973 ipath_rcv_hdrerr(dd, eflags, l, etail, rc); 927 ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
974 else if (etype == RCVHQ_RCV_TYPE_NON_KD) { 928 else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
975 int ret = __ipath_verbs_rcv(dd, rc + 1, 929 ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
976 ebuf, tlen); 930 if (dd->ipath_lli_counter)
977 if (ret == -ENODEV) 931 dd->ipath_lli_counter--;
978 ipath_cdbg(VERBOSE, 932 ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
979 "received IB packet, " 933 "qp=%x), len %x; ignored\n",
980 "not SMA (QP=%x)\n", qp); 934 etype, bthbytes[0], qp, tlen);
981 if (dd->ipath_lli_counter)
982 dd->ipath_lli_counter--;
983
984 } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
985 if (qp == IPATH_KD_QP &&
986 bthbytes[0] == ipath_layer_rcv_opcode &&
987 ebuf)
988 ipath_rcv_layer(dd, etail, tlen,
989 (struct ether_header *)hdr);
990 else
991 ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
992 "qp=%x), len %x; ignored\n",
993 etype, bthbytes[0], qp, tlen);
994 } 935 }
936 else if (etype == RCVHQ_RCV_TYPE_EAGER)
937 ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
938 "qp=%x), len %x; ignored\n",
939 etype, bthbytes[0], qp, tlen);
995 else if (etype == RCVHQ_RCV_TYPE_EXPECTED) 940 else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
996 ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", 941 ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
997 be32_to_cpu(hdr->bth[0]) & 0xff); 942 be32_to_cpu(hdr->bth[0]) & 0xff);
@@ -1024,7 +969,8 @@ reloop:
1024 */ 969 */
1025 if (l == hdrqtail || (i && !(i&0xf))) { 970 if (l == hdrqtail || (i && !(i&0xf))) {
1026 u64 lval; 971 u64 lval;
1027 if (l == hdrqtail) /* PE-800 interrupt only on last */ 972 if (l == hdrqtail)
973 /* request IBA6120 interrupt only on last */
1028 lval = dd->ipath_rhdrhead_intr_off | l; 974 lval = dd->ipath_rhdrhead_intr_off | l;
1029 else 975 else
1030 lval = l; 976 lval = l;
@@ -1038,7 +984,7 @@ reloop:
1038 } 984 }
1039 985
1040 if (!dd->ipath_rhdrhead_intr_off && !reloop) { 986 if (!dd->ipath_rhdrhead_intr_off && !reloop) {
1041 /* HT-400 workaround; we can have a race clearing chip 987 /* IBA6110 workaround; we can have a race clearing chip
1042 * interrupt with another interrupt about to be delivered, 988 * interrupt with another interrupt about to be delivered,
1043 * and can clear it before it is delivered on the GPIO 989 * and can clear it before it is delivered on the GPIO
1044 * workaround. By doing the extra check here for the 990 * workaround. By doing the extra check here for the
@@ -1211,7 +1157,7 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
1211 * 1157 *
1212 * do appropriate marking as busy, etc. 1158 * do appropriate marking as busy, etc.
1213 * returns buffer number if one found (>=0), negative number is error. 1159 * returns buffer number if one found (>=0), negative number is error.
1214 * Used by ipath_sma_send_pkt and ipath_layer_send 1160 * Used by ipath_layer_send
1215 */ 1161 */
1216u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum) 1162u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
1217{ 1163{
@@ -1317,13 +1263,6 @@ rescan:
1317 goto bail; 1263 goto bail;
1318 } 1264 }
1319 1265
1320 if (updated)
1321 /*
1322 * ran out of bufs, now some (at least this one we just
1323 * got) are now available, so tell the layered driver.
1324 */
1325 __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
1326
1327 /* 1266 /*
1328 * set next starting place. Since it's just an optimization, 1267 * set next starting place. Since it's just an optimization,
1329 * it doesn't matter who wins on this, so no locking 1268 * it doesn't matter who wins on this, so no locking
@@ -1500,7 +1439,7 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
1500 return ret; 1439 return ret;
1501} 1440}
1502 1441
1503void ipath_set_ib_lstate(struct ipath_devdata *dd, int which) 1442static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1504{ 1443{
1505 static const char *what[4] = { 1444 static const char *what[4] = {
1506 [0] = "DOWN", 1445 [0] = "DOWN",
@@ -1511,7 +1450,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1511 int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) & 1450 int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
1512 INFINIPATH_IBCC_LINKCMD_MASK; 1451 INFINIPATH_IBCC_LINKCMD_MASK;
1513 1452
1514 ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate " 1453 ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
1515 "is %s\n", dd->ipath_unit, 1454 "is %s\n", dd->ipath_unit,
1516 what[linkcmd], 1455 what[linkcmd],
1517 ipath_ibcstatus_str[ 1456 ipath_ibcstatus_str[
@@ -1520,7 +1459,7 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1520 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & 1459 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1521 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); 1460 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
1522 /* flush all queued sends when going to DOWN or INIT, to be sure that 1461 /* flush all queued sends when going to DOWN or INIT, to be sure that
1523 * they don't block SMA and other MAD packets */ 1462 * they don't block MAD packets */
1524 if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) { 1463 if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
1525 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1464 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1526 INFINIPATH_S_ABORT); 1465 INFINIPATH_S_ABORT);
@@ -1534,6 +1473,180 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1534 dd->ipath_ibcctrl | which); 1473 dd->ipath_ibcctrl | which);
1535} 1474}
1536 1475
1476int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
1477{
1478 u32 lstate;
1479 int ret;
1480
1481 switch (newstate) {
1482 case IPATH_IB_LINKDOWN:
1483 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
1484 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1485 /* don't wait */
1486 ret = 0;
1487 goto bail;
1488
1489 case IPATH_IB_LINKDOWN_SLEEP:
1490 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
1491 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1492 /* don't wait */
1493 ret = 0;
1494 goto bail;
1495
1496 case IPATH_IB_LINKDOWN_DISABLE:
1497 ipath_set_ib_lstate(dd,
1498 INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
1499 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1500 /* don't wait */
1501 ret = 0;
1502 goto bail;
1503
1504 case IPATH_IB_LINKINIT:
1505 if (dd->ipath_flags & IPATH_LINKINIT) {
1506 ret = 0;
1507 goto bail;
1508 }
1509 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
1510 INFINIPATH_IBCC_LINKCMD_SHIFT);
1511 lstate = IPATH_LINKINIT;
1512 break;
1513
1514 case IPATH_IB_LINKARM:
1515 if (dd->ipath_flags & IPATH_LINKARMED) {
1516 ret = 0;
1517 goto bail;
1518 }
1519 if (!(dd->ipath_flags &
1520 (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
1521 ret = -EINVAL;
1522 goto bail;
1523 }
1524 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
1525 INFINIPATH_IBCC_LINKCMD_SHIFT);
1526 /*
1527 * Since the port can transition to ACTIVE by receiving
1528 * a non VL 15 packet, wait for either state.
1529 */
1530 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
1531 break;
1532
1533 case IPATH_IB_LINKACTIVE:
1534 if (dd->ipath_flags & IPATH_LINKACTIVE) {
1535 ret = 0;
1536 goto bail;
1537 }
1538 if (!(dd->ipath_flags & IPATH_LINKARMED)) {
1539 ret = -EINVAL;
1540 goto bail;
1541 }
1542 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
1543 INFINIPATH_IBCC_LINKCMD_SHIFT);
1544 lstate = IPATH_LINKACTIVE;
1545 break;
1546
1547 default:
1548 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
1549 ret = -EINVAL;
1550 goto bail;
1551 }
1552 ret = ipath_wait_linkstate(dd, lstate, 2000);
1553
1554bail:
1555 return ret;
1556}
1557
1558/**
1559 * ipath_set_mtu - set the MTU
1560 * @dd: the infinipath device
1561 * @arg: the new MTU
1562 *
1563 * we can handle "any" incoming size, the issue here is whether we
1564 * need to restrict our outgoing size. For now, we don't do any
1565 * sanity checking on this, and we don't deal with what happens to
1566 * programs that are already running when the size changes.
1567 * NOTE: changing the MTU will usually cause the IBC to go back to
1568 * link initialize (IPATH_IBSTATE_INIT) state...
1569 */
1570int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
1571{
1572 u32 piosize;
1573 int changed = 0;
1574 int ret;
1575
1576 /*
1577 * mtu is IB data payload max. It's the largest power of 2 less
1578 * than piosize (or even larger, since it only really controls the
1579 * largest we can receive; we can send the max of the mtu and
1580 * piosize). We check that it's one of the valid IB sizes.
1581 */
1582 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
1583 arg != 4096) {
1584 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
1585 ret = -EINVAL;
1586 goto bail;
1587 }
1588 if (dd->ipath_ibmtu == arg) {
1589 ret = 0; /* same as current */
1590 goto bail;
1591 }
1592
1593 piosize = dd->ipath_ibmaxlen;
1594 dd->ipath_ibmtu = arg;
1595
1596 if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
1597 /* Only if it's not the initial value (or reset to it) */
1598 if (piosize != dd->ipath_init_ibmaxlen) {
1599 dd->ipath_ibmaxlen = piosize;
1600 changed = 1;
1601 }
1602 } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
1603 piosize = arg + IPATH_PIO_MAXIBHDR;
1604 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
1605 "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
1606 arg);
1607 dd->ipath_ibmaxlen = piosize;
1608 changed = 1;
1609 }
1610
1611 if (changed) {
1612 /*
1613 * set the IBC maxpktlength to the size of our pio
1614 * buffers in words
1615 */
1616 u64 ibc = dd->ipath_ibcctrl;
1617 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
1618 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
1619
1620 piosize = piosize - 2 * sizeof(u32); /* ignore pbc */
1621 dd->ipath_ibmaxlen = piosize;
1622 piosize /= sizeof(u32); /* in words */
1623 /*
1624 * for ICRC, which we only send in diag test pkt mode, and
1625 * we don't need to worry about that for mtu
1626 */
1627 piosize += 1;
1628
1629 ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
1630 dd->ipath_ibcctrl = ibc;
1631 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1632 dd->ipath_ibcctrl);
1633 dd->ipath_f_tidtemplate(dd);
1634 }
1635
1636 ret = 0;
1637
1638bail:
1639 return ret;
1640}
1641
1642int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
1643{
1644 dd->ipath_lid = arg;
1645 dd->ipath_lmc = lmc;
1646
1647 return 0;
1648}
1649
1537/** 1650/**
1538 * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register 1651 * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
1539 * @dd: the infinipath device 1652 * @dd: the infinipath device
@@ -1637,13 +1750,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
1637 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE << 1750 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
1638 INFINIPATH_IBCC_LINKINITCMD_SHIFT); 1751 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
1639 1752
1640 /*
1641 * we are shutting down, so tell the layered driver. We don't do
1642 * this on just a link state change, much like ethernet, a cable
1643 * unplug, etc. doesn't change driver state
1644 */
1645 ipath_layer_intr(dd, IPATH_LAYER_INT_IF_DOWN);
1646
1647 /* disable IBC */ 1753 /* disable IBC */
1648 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; 1754 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
1649 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 1755 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
@@ -1743,7 +1849,7 @@ static int __init infinipath_init(void)
1743{ 1849{
1744 int ret; 1850 int ret;
1745 1851
1746 ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version); 1852 ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
1747 1853
1748 /* 1854 /*
1749 * These must be called before the driver is registered with 1855 * These must be called before the driver is registered with
@@ -1776,8 +1882,18 @@ static int __init infinipath_init(void)
1776 goto bail_group; 1882 goto bail_group;
1777 } 1883 }
1778 1884
1885 ret = ipath_diagpkt_add();
1886 if (ret < 0) {
1887 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
1888 "diag data device: error %d\n", -ret);
1889 goto bail_ipathfs;
1890 }
1891
1779 goto bail; 1892 goto bail;
1780 1893
1894bail_ipathfs:
1895 ipath_exit_ipathfs();
1896
1781bail_group: 1897bail_group:
1782 ipath_driver_remove_group(&ipath_driver.driver); 1898 ipath_driver_remove_group(&ipath_driver.driver);
1783 1899
@@ -1888,6 +2004,8 @@ static void __exit infinipath_cleanup(void)
1888 struct ipath_devdata *dd, *tmp; 2004 struct ipath_devdata *dd, *tmp;
1889 unsigned long flags; 2005 unsigned long flags;
1890 2006
2007 ipath_diagpkt_remove();
2008
1891 ipath_exit_ipathfs(); 2009 ipath_exit_ipathfs();
1892 2010
1893 ipath_driver_remove_group(&ipath_driver.driver); 2011 ipath_driver_remove_group(&ipath_driver.driver);
@@ -1998,5 +2116,22 @@ bail:
1998 return ret; 2116 return ret;
1999} 2117}
2000 2118
2119int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
2120{
2121 u64 val;
2122 if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
2123 return -1;
2124 }
2125 if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
2126 dd->ipath_rx_pol_inv = new_pol_inv;
2127 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
2128 val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
2129 INFINIPATH_XGXS_RX_POL_SHIFT);
2130 val |= ((u64)dd->ipath_rx_pol_inv) <<
2131 INFINIPATH_XGXS_RX_POL_SHIFT;
2132 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
2133 }
2134 return 0;
2135}
2001module_init(infinipath_init); 2136module_init(infinipath_init);
2002module_exit(infinipath_cleanup); 2137module_exit(infinipath_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index bbaa70e57db1..29930e22318e 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -39,7 +39,6 @@
39#include <asm/pgtable.h> 39#include <asm/pgtable.h>
40 40
41#include "ipath_kernel.h" 41#include "ipath_kernel.h"
42#include "ipath_layer.h"
43#include "ipath_common.h" 42#include "ipath_common.h"
44 43
45static int ipath_open(struct inode *, struct file *); 44static int ipath_open(struct inode *, struct file *);
@@ -985,15 +984,17 @@ static int mmap_piobufs(struct vm_area_struct *vma,
985 * write combining behavior we want on the PIO buffers! 984 * write combining behavior we want on the PIO buffers!
986 */ 985 */
987 986
988 if (vma->vm_flags & VM_READ) { 987#if defined(__powerpc__)
989 dev_info(&dd->pcidev->dev, 988 /* There isn't a generic way to specify writethrough mappings */
990 "Can't map piobufs as readable (flags=%lx)\n", 989 pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
991 vma->vm_flags); 990 pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU;
992 ret = -EPERM; 991 pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED;
993 goto bail; 992#endif
994 }
995 993
996 /* don't allow them to later change to readable with mprotect */ 994 /*
995 * don't allow them to later change to readable with mprotect (for when
996 * not initially mapped readable, as is normally the case)
997 */
997 vma->vm_flags &= ~VM_MAYREAD; 998 vma->vm_flags &= ~VM_MAYREAD;
998 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 999 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
999 1000
@@ -1109,7 +1110,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1109 ret = mmap_rcvegrbufs(vma, pd); 1110 ret = mmap_rcvegrbufs(vma, pd);
1110 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) { 1111 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
1111 /* 1112 /*
1112 * The rcvhdrq itself; readonly except on HT-400 (so have 1113 * The rcvhdrq itself; readonly except on HT (so have
1113 * to allow writable mapping), multiple pages, contiguous 1114 * to allow writable mapping), multiple pages, contiguous
1114 * from an i/o perspective. 1115 * from an i/o perspective.
1115 */ 1116 */
@@ -1149,6 +1150,7 @@ static unsigned int ipath_poll(struct file *fp,
1149 struct ipath_portdata *pd; 1150 struct ipath_portdata *pd;
1150 u32 head, tail; 1151 u32 head, tail;
1151 int bit; 1152 int bit;
1153 unsigned pollflag = 0;
1152 struct ipath_devdata *dd; 1154 struct ipath_devdata *dd;
1153 1155
1154 pd = port_fp(fp); 1156 pd = port_fp(fp);
@@ -1185,9 +1187,12 @@ static unsigned int ipath_poll(struct file *fp,
1185 clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); 1187 clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1186 pd->port_rcvwait_to++; 1188 pd->port_rcvwait_to++;
1187 } 1189 }
1190 else
1191 pollflag = POLLIN | POLLRDNORM;
1188 } 1192 }
1189 else { 1193 else {
1190 /* it's already happened; don't do wait_event overhead */ 1194 /* it's already happened; don't do wait_event overhead */
1195 pollflag = POLLIN | POLLRDNORM;
1191 pd->port_rcvnowait++; 1196 pd->port_rcvnowait++;
1192 } 1197 }
1193 1198
@@ -1195,7 +1200,7 @@ static unsigned int ipath_poll(struct file *fp,
1195 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1200 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1196 dd->ipath_rcvctrl); 1201 dd->ipath_rcvctrl);
1197 1202
1198 return 0; 1203 return pollflag;
1199} 1204}
1200 1205
1201static int try_alloc_port(struct ipath_devdata *dd, int port, 1206static int try_alloc_port(struct ipath_devdata *dd, int port,
@@ -1297,14 +1302,14 @@ static int find_best_unit(struct file *fp)
1297 * This code is present to allow a knowledgeable person to 1302 * This code is present to allow a knowledgeable person to
1298 * specify the layout of processes to processors before opening 1303 * specify the layout of processes to processors before opening
1299 * this driver, and then we'll assign the process to the "closest" 1304 * this driver, and then we'll assign the process to the "closest"
1300 * HT-400 to that processor (we assume reasonable connectivity, 1305 * InfiniPath chip to that processor (we assume reasonable connectivity,
1301 * for now). This code assumes that if affinity has been set 1306 * for now). This code assumes that if affinity has been set
1302 * before this point, that at most one cpu is set; for now this 1307 * before this point, that at most one cpu is set; for now this
1303 * is reasonable. I check for both cpus_empty() and cpus_full(), 1308 * is reasonable. I check for both cpus_empty() and cpus_full(),
1304 * in case some kernel variant sets none of the bits when no 1309 * in case some kernel variant sets none of the bits when no
1305 * affinity is set. 2.6.11 and 12 kernels have all present 1310 * affinity is set. 2.6.11 and 12 kernels have all present
1306 * cpus set. Some day we'll have to fix it up further to handle 1311 * cpus set. Some day we'll have to fix it up further to handle
1307 * a cpu subset. This algorithm fails for two HT-400's connected 1312 * a cpu subset. This algorithm fails for two HT chips connected
1308 * in tunnel fashion. Eventually this needs real topology 1313 * in tunnel fashion. Eventually this needs real topology
1309 * information. There may be some issues with dual core numbering 1314 * information. There may be some issues with dual core numbering
1310 * as well. This needs more work prior to release. 1315 * as well. This needs more work prior to release.
@@ -1815,7 +1820,7 @@ int ipath_user_add(struct ipath_devdata *dd)
1815 if (ret < 0) { 1820 if (ret < 0) {
1816 ipath_dev_err(dd, "Could not create wildcard " 1821 ipath_dev_err(dd, "Could not create wildcard "
1817 "minor: error %d\n", -ret); 1822 "minor: error %d\n", -ret);
1818 goto bail_sma; 1823 goto bail_user;
1819 } 1824 }
1820 1825
1821 atomic_set(&user_setup, 1); 1826 atomic_set(&user_setup, 1);
@@ -1831,7 +1836,7 @@ int ipath_user_add(struct ipath_devdata *dd)
1831 1836
1832 goto bail; 1837 goto bail;
1833 1838
1834bail_sma: 1839bail_user:
1835 user_cleanup(); 1840 user_cleanup();
1836bail: 1841bail:
1837 return ret; 1842 return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 0936d8e8d704..a5eb30a06a5c 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -191,8 +191,8 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
191 portinfo[4] = (dd->ipath_lid << 16); 191 portinfo[4] = (dd->ipath_lid << 16);
192 192
193 /* 193 /*
194 * Notimpl yet SMLID (should we store this in the driver, in case 194 * Notimpl yet SMLID.
195 * SMA dies?) CapabilityMask is 0, we don't support any of these 195 * CapabilityMask is 0, we don't support any of these
196 * DiagCode is 0; we don't store any diag info for now Notimpl yet 196 * DiagCode is 0; we don't store any diag info for now Notimpl yet
197 * M_KeyLeasePeriod (we don't support M_Key) 197 * M_KeyLeasePeriod (we don't support M_Key)
198 */ 198 */
diff --git a/drivers/infiniband/hw/ipath/ipath_ht400.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 3db015da6e77..bf2455a6d562 100644
--- a/drivers/infiniband/hw/ipath/ipath_ht400.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -33,7 +33,7 @@
33 33
34/* 34/*
35 * This file contains all of the code that is specific to the InfiniPath 35 * This file contains all of the code that is specific to the InfiniPath
36 * HT-400 chip. 36 * HT chip.
37 */ 37 */
38 38
39#include <linux/pci.h> 39#include <linux/pci.h>
@@ -43,7 +43,7 @@
43#include "ipath_registers.h" 43#include "ipath_registers.h"
44 44
45/* 45/*
46 * This lists the InfiniPath HT400 registers, in the actual chip layout. 46 * This lists the InfiniPath registers, in the actual chip layout.
47 * This structure should never be directly accessed. 47 * This structure should never be directly accessed.
48 * 48 *
49 * The names are in InterCap form because they're taken straight from 49 * The names are in InterCap form because they're taken straight from
@@ -461,8 +461,9 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
461 * times. 461 * times.
462 */ 462 */
463 if (dd->ipath_flags & IPATH_INITTED) { 463 if (dd->ipath_flags & IPATH_INITTED) {
464 ipath_dev_err(dd, "Fatal Error (freeze " 464 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
465 "mode), no longer usable\n"); 465 "mode), no longer usable, SN %.16s\n",
466 dd->ipath_serial);
466 isfatal = 1; 467 isfatal = 1;
467 } 468 }
468 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; 469 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
@@ -537,7 +538,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
537 if (hwerrs & INFINIPATH_HWE_HTCMISCERR7) 538 if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
538 strlcat(msg, "[HT core Misc7]", msgl); 539 strlcat(msg, "[HT core Misc7]", msgl);
539 if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { 540 if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
540 strlcat(msg, "[Memory BIST test failed, HT-400 unusable]", 541 strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
541 msgl); 542 msgl);
542 /* ignore from now on, so disable until driver reloaded */ 543 /* ignore from now on, so disable until driver reloaded */
543 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; 544 dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED;
@@ -553,7 +554,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
553 554
554 if (hwerrs & _IPATH_PLL_FAIL) { 555 if (hwerrs & _IPATH_PLL_FAIL) {
555 snprintf(bitsmsg, sizeof bitsmsg, 556 snprintf(bitsmsg, sizeof bitsmsg,
556 "[PLL failed (%llx), HT-400 unusable]", 557 "[PLL failed (%llx), InfiniPath hardware unusable]",
557 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL)); 558 (unsigned long long) (hwerrs & _IPATH_PLL_FAIL));
558 strlcat(msg, bitsmsg, msgl); 559 strlcat(msg, bitsmsg, msgl);
559 /* ignore from now on, so disable until driver reloaded */ 560 /* ignore from now on, so disable until driver reloaded */
@@ -610,18 +611,18 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
610 break; 611 break;
611 case 5: 612 case 5:
612 /* 613 /*
613 * HT-460 original production board; two production levels, with 614 * original production board; two production levels, with
614 * different serial number ranges. See ipath_ht_early_init() for 615 * different serial number ranges. See ipath_ht_early_init() for
615 * case where we enable IPATH_GPIO_INTR for later serial # range. 616 * case where we enable IPATH_GPIO_INTR for later serial # range.
616 */ 617 */
617 n = "InfiniPath_HT-460"; 618 n = "InfiniPath_QHT7040";
618 break; 619 break;
619 case 6: 620 case 6:
620 n = "OEM_Board_3"; 621 n = "OEM_Board_3";
621 break; 622 break;
622 case 7: 623 case 7:
623 /* HT-460 small form factor production board */ 624 /* small form factor production board */
624 n = "InfiniPath_HT-465"; 625 n = "InfiniPath_QHT7140";
625 break; 626 break;
626 case 8: 627 case 8:
627 n = "LS/X-1"; 628 n = "LS/X-1";
@@ -633,7 +634,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
633 n = "OEM_Board_2"; 634 n = "OEM_Board_2";
634 break; 635 break;
635 case 11: 636 case 11:
636 n = "InfiniPath_HT-470"; 637 n = "InfiniPath_HT-470"; /* obsoleted */
637 break; 638 break;
638 case 12: 639 case 12:
639 n = "OEM_Board_4"; 640 n = "OEM_Board_4";
@@ -641,7 +642,7 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
641 default: /* don't know, just print the number */ 642 default: /* don't know, just print the number */
642 ipath_dev_err(dd, "Don't yet know about board " 643 ipath_dev_err(dd, "Don't yet know about board "
643 "with ID %u\n", boardrev); 644 "with ID %u\n", boardrev);
644 snprintf(name, namelen, "Unknown_InfiniPath_HT-4xx_%u", 645 snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
645 boardrev); 646 boardrev);
646 break; 647 break;
647 } 648 }
@@ -650,11 +651,10 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
650 651
651 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { 652 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) {
652 /* 653 /*
653 * This version of the driver only supports the HT-400 654 * This version of the driver only supports Rev 3.2 and 3.3
654 * Rev 3.2
655 */ 655 */
656 ipath_dev_err(dd, 656 ipath_dev_err(dd,
657 "Unsupported HT-400 revision %u.%u!\n", 657 "Unsupported InfiniPath hardware revision %u.%u!\n",
658 dd->ipath_majrev, dd->ipath_minrev); 658 dd->ipath_majrev, dd->ipath_minrev);
659 ret = 1; 659 ret = 1;
660 goto bail; 660 goto bail;
@@ -738,7 +738,7 @@ static void ipath_check_htlink(struct ipath_devdata *dd)
738 738
739static int ipath_setup_ht_reset(struct ipath_devdata *dd) 739static int ipath_setup_ht_reset(struct ipath_devdata *dd)
740{ 740{
741 ipath_dbg("No reset possible for HT-400\n"); 741 ipath_dbg("No reset possible for this InfiniPath hardware\n");
742 return 0; 742 return 0;
743} 743}
744 744
@@ -925,7 +925,7 @@ static int set_int_handler(struct ipath_devdata *dd, struct pci_dev *pdev,
925 925
926 /* 926 /*
927 * kernels with CONFIG_PCI_MSI set the vector in the irq field of 927 * kernels with CONFIG_PCI_MSI set the vector in the irq field of
928 * struct pci_device, so we use that to program the HT-400 internal 928 * struct pci_device, so we use that to program the internal
929 * interrupt register (not config space) with that value. The BIOS 929 * interrupt register (not config space) with that value. The BIOS
930 * must still have done the basic MSI setup. 930 * must still have done the basic MSI setup.
931 */ 931 */
@@ -1013,7 +1013,7 @@ bail:
1013 * @dd: the infinipath device 1013 * @dd: the infinipath device
1014 * 1014 *
1015 * Called during driver unload. 1015 * Called during driver unload.
1016 * This is currently a nop for the HT-400, not for all chips 1016 * This is currently a nop for the HT chip, not for all chips
1017 */ 1017 */
1018static void ipath_setup_ht_cleanup(struct ipath_devdata *dd) 1018static void ipath_setup_ht_cleanup(struct ipath_devdata *dd)
1019{ 1019{
@@ -1290,6 +1290,15 @@ static int ipath_ht_bringup_serdes(struct ipath_devdata *dd)
1290 val &= ~INFINIPATH_XGXS_RESET; 1290 val &= ~INFINIPATH_XGXS_RESET;
1291 change = 1; 1291 change = 1;
1292 } 1292 }
1293 if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
1294 INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
1295 /* need to compensate for Tx inversion in partner */
1296 val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
1297 INFINIPATH_XGXS_RX_POL_SHIFT);
1298 val |= dd->ipath_rx_pol_inv <<
1299 INFINIPATH_XGXS_RX_POL_SHIFT;
1300 change = 1;
1301 }
1293 if (change) 1302 if (change)
1294 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); 1303 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
1295 1304
@@ -1470,7 +1479,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1470 dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; 1479 dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
1471 1480
1472 /* 1481 /*
1473 * For HT-400, we allocate a somewhat overly large eager buffer, 1482 * For HT, we allocate a somewhat overly large eager buffer,
1474 * such that we can guarantee that we can receive the largest 1483 * such that we can guarantee that we can receive the largest
1475 * packet that we can send out. To truly support a 4KB MTU, 1484 * packet that we can send out. To truly support a 4KB MTU,
1476 * we need to bump this to a large value. To date, other than 1485 * we need to bump this to a large value. To date, other than
@@ -1531,7 +1540,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1531 if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && 1540 if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
1532 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { 1541 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
1533 /* 1542 /*
1534 * Later production HT-460 has same changes as HT-465, so 1543 * Later production QHT7040 has same changes as QHT7140, so
1535 * can use GPIO interrupts. They have serial #'s starting 1544 * can use GPIO interrupts. They have serial #'s starting
1536 * with 128, rather than 112. 1545 * with 128, rather than 112.
1537 */ 1546 */
@@ -1560,13 +1569,13 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
1560} 1569}
1561 1570
1562/** 1571/**
1563 * ipath_init_ht400_funcs - set up the chip-specific function pointers 1572 * ipath_init_iba6110_funcs - set up the chip-specific function pointers
1564 * @dd: the infinipath device 1573 * @dd: the infinipath device
1565 * 1574 *
1566 * This is global, and is called directly at init to set up the 1575 * This is global, and is called directly at init to set up the
1567 * chip-specific function pointers for later use. 1576 * chip-specific function pointers for later use.
1568 */ 1577 */
1569void ipath_init_ht400_funcs(struct ipath_devdata *dd) 1578void ipath_init_iba6110_funcs(struct ipath_devdata *dd)
1570{ 1579{
1571 dd->ipath_f_intrsetup = ipath_ht_intconfig; 1580 dd->ipath_f_intrsetup = ipath_ht_intconfig;
1572 dd->ipath_f_bus = ipath_setup_ht_config; 1581 dd->ipath_f_bus = ipath_setup_ht_config;
diff --git a/drivers/infiniband/hw/ipath/ipath_pe800.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index b83f66d8262c..d86516d23df6 100644
--- a/drivers/infiniband/hw/ipath/ipath_pe800.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -32,7 +32,7 @@
32 */ 32 */
33/* 33/*
34 * This file contains all of the code that is specific to the 34 * This file contains all of the code that is specific to the
35 * InfiniPath PE-800 chip. 35 * InfiniPath PCIe chip.
36 */ 36 */
37 37
38#include <linux/interrupt.h> 38#include <linux/interrupt.h>
@@ -45,9 +45,9 @@
45 45
46/* 46/*
47 * This file contains all the chip-specific register information and 47 * This file contains all the chip-specific register information and
48 * access functions for the QLogic InfiniPath PE800, the PCI-Express chip. 48 * access functions for the QLogic InfiniPath PCI-Express chip.
49 * 49 *
50 * This lists the InfiniPath PE800 registers, in the actual chip layout. 50 * This lists the InfiniPath registers, in the actual chip layout.
51 * This structure should never be directly accessed. 51 * This structure should never be directly accessed.
52 */ 52 */
53struct _infinipath_do_not_use_kernel_regs { 53struct _infinipath_do_not_use_kernel_regs {
@@ -213,7 +213,6 @@ static const struct ipath_kregs ipath_pe_kregs = {
213 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), 213 .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0),
214 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), 214 .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0),
215 215
216 /* This group is pe-800-specific; and used only in this file */
217 /* The rcvpktled register controls one of the debug port signals, so 216 /* The rcvpktled register controls one of the debug port signals, so
218 * a packet activity LED can be connected to it. */ 217 * a packet activity LED can be connected to it. */
219 .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt), 218 .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt),
@@ -364,8 +363,9 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
364 * and we get here multiple times 363 * and we get here multiple times
365 */ 364 */
366 if (dd->ipath_flags & IPATH_INITTED) { 365 if (dd->ipath_flags & IPATH_INITTED) {
367 ipath_dev_err(dd, "Fatal Error (freeze " 366 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
368 "mode), no longer usable\n"); 367 "mode), no longer usable, SN %.16s\n",
368 dd->ipath_serial);
369 isfatal = 1; 369 isfatal = 1;
370 } 370 }
371 /* 371 /*
@@ -388,7 +388,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
388 *msg = '\0'; 388 *msg = '\0';
389 389
390 if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { 390 if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
391 strlcat(msg, "[Memory BIST test failed, PE-800 unusable]", 391 strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
392 msgl); 392 msgl);
393 /* ignore from now on, so disable until driver reloaded */ 393 /* ignore from now on, so disable until driver reloaded */
394 *dd->ipath_statusp |= IPATH_STATUS_HWERROR; 394 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
@@ -433,7 +433,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
433 433
434 if (hwerrs & _IPATH_PLL_FAIL) { 434 if (hwerrs & _IPATH_PLL_FAIL) {
435 snprintf(bitsmsg, sizeof bitsmsg, 435 snprintf(bitsmsg, sizeof bitsmsg,
436 "[PLL failed (%llx), PE-800 unusable]", 436 "[PLL failed (%llx), InfiniPath hardware unusable]",
437 (unsigned long long) hwerrs & _IPATH_PLL_FAIL); 437 (unsigned long long) hwerrs & _IPATH_PLL_FAIL);
438 strlcat(msg, bitsmsg, msgl); 438 strlcat(msg, bitsmsg, msgl);
439 /* ignore from now on, so disable until driver reloaded */ 439 /* ignore from now on, so disable until driver reloaded */
@@ -511,22 +511,25 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
511 n = "InfiniPath_Emulation"; 511 n = "InfiniPath_Emulation";
512 break; 512 break;
513 case 1: 513 case 1:
514 n = "InfiniPath_PE-800-Bringup"; 514 n = "InfiniPath_QLE7140-Bringup";
515 break; 515 break;
516 case 2: 516 case 2:
517 n = "InfiniPath_PE-880"; 517 n = "InfiniPath_QLE7140";
518 break; 518 break;
519 case 3: 519 case 3:
520 n = "InfiniPath_PE-850"; 520 n = "InfiniPath_QMI7140";
521 break; 521 break;
522 case 4: 522 case 4:
523 n = "InfiniPath_PE-860"; 523 n = "InfiniPath_QEM7140";
524 break;
525 case 5:
526 n = "InfiniPath_QMH7140";
524 break; 527 break;
525 default: 528 default:
526 ipath_dev_err(dd, 529 ipath_dev_err(dd,
527 "Don't yet know about board with ID %u\n", 530 "Don't yet know about board with ID %u\n",
528 boardrev); 531 boardrev);
529 snprintf(name, namelen, "Unknown_InfiniPath_PE-8xx_%u", 532 snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u",
530 boardrev); 533 boardrev);
531 break; 534 break;
532 } 535 }
@@ -534,7 +537,7 @@ static int ipath_pe_boardname(struct ipath_devdata *dd, char *name,
534 snprintf(name, namelen, "%s", n); 537 snprintf(name, namelen, "%s", n);
535 538
536 if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) { 539 if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) {
537 ipath_dev_err(dd, "Unsupported PE-800 revision %u.%u!\n", 540 ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n",
538 dd->ipath_majrev, dd->ipath_minrev); 541 dd->ipath_majrev, dd->ipath_minrev);
539 ret = 1; 542 ret = 1;
540 } else 543 } else
@@ -651,6 +654,15 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
651 val &= ~INFINIPATH_XGXS_RESET; 654 val &= ~INFINIPATH_XGXS_RESET;
652 change = 1; 655 change = 1;
653 } 656 }
657 if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
658 INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
659 /* need to compensate for Tx inversion in partner */
660 val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
661 INFINIPATH_XGXS_RX_POL_SHIFT);
662 val |= dd->ipath_rx_pol_inv <<
663 INFINIPATH_XGXS_RX_POL_SHIFT;
664 change = 1;
665 }
654 if (change) 666 if (change)
655 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); 667 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
656 668
@@ -705,7 +717,7 @@ static void ipath_pe_quiet_serdes(struct ipath_devdata *dd)
705 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); 717 ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
706} 718}
707 719
708/* this is not yet needed on the PE800, so just return 0. */ 720/* this is not yet needed on this chip, so just return 0. */
709static int ipath_pe_intconfig(struct ipath_devdata *dd) 721static int ipath_pe_intconfig(struct ipath_devdata *dd)
710{ 722{
711 return 0; 723 return 0;
@@ -759,8 +771,8 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
759 * 771 *
760 * This is called during driver unload. 772 * This is called during driver unload.
761 * We do the pci_disable_msi here, not in generic code, because it 773 * We do the pci_disable_msi here, not in generic code, because it
762 * isn't used for the HT-400. If we do end up needing pci_enable_msi 774 * isn't used for the HT chips. If we do end up needing pci_enable_msi
763 * at some point in the future for HT-400, we'll move the call back 775 * at some point in the future for HT, we'll move the call back
764 * into the main init_one code. 776 * into the main init_one code.
765 */ 777 */
766static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) 778static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
@@ -780,10 +792,10 @@ static void ipath_setup_pe_cleanup(struct ipath_devdata *dd)
780 * late in 2.6.16). 792 * late in 2.6.16).
781 * All that can be done is to edit the kernel source to remove the quirk 793 * All that can be done is to edit the kernel source to remove the quirk
782 * check until that is fixed. 794 * check until that is fixed.
783 * We do not need to call enable_msi() for our HyperTransport chip (HT-400), 795 * We do not need to call enable_msi() for our HyperTransport chip,
784 * even those it uses MSI, and we want to avoid the quirk warning, so 796 * even though it uses MSI, and we want to avoid the quirk warning, so
785 * So we call enable_msi only for the PE-800. If we do end up needing 797 * So we call enable_msi only for PCIe. If we do end up needing
786 * pci_enable_msi at some point in the future for HT-400, we'll move the 798 * pci_enable_msi at some point in the future for HT, we'll move the
787 * call back into the main init_one code. 799 * call back into the main init_one code.
788 * We save the msi lo and hi values, so we can restore them after 800 * We save the msi lo and hi values, so we can restore them after
789 * chip reset (the kernel PCI infrastructure doesn't yet handle that 801 * chip reset (the kernel PCI infrastructure doesn't yet handle that
@@ -971,8 +983,7 @@ static int ipath_setup_pe_reset(struct ipath_devdata *dd)
971 int ret; 983 int ret;
972 984
973 /* Use ERROR so it shows up in logs, etc. */ 985 /* Use ERROR so it shows up in logs, etc. */
974 ipath_dev_err(dd, "Resetting PE-800 unit %u\n", 986 ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit);
975 dd->ipath_unit);
976 /* keep chip from being accessed in a few places */ 987 /* keep chip from being accessed in a few places */
977 dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT); 988 dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT);
978 val = dd->ipath_control | INFINIPATH_C_RESET; 989 val = dd->ipath_control | INFINIPATH_C_RESET;
@@ -1078,7 +1089,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
1078 * @port: the port 1089 * @port: the port
1079 * 1090 *
1080 * clear all TID entries for a port, expected and eager. 1091 * clear all TID entries for a port, expected and eager.
1081 * Used from ipath_close(). On PE800, TIDs are only 32 bits, 1092 * Used from ipath_close(). On this chip, TIDs are only 32 bits,
1082 * not 64, but they are still on 64 bit boundaries, so tidbase 1093 * not 64, but they are still on 64 bit boundaries, so tidbase
1083 * is declared as u64 * for the pointer math, even though we write 32 bits 1094 * is declared as u64 * for the pointer math, even though we write 32 bits
1084 */ 1095 */
@@ -1148,9 +1159,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
1148 dd->ipath_flags |= IPATH_4BYTE_TID; 1159 dd->ipath_flags |= IPATH_4BYTE_TID;
1149 1160
1150 /* 1161 /*
1151 * For openib, we need to be able to handle an IB header of 96 bytes 1162 * For openfabrics, we need to be able to handle an IB header of
1152 * or 24 dwords. HT-400 has arbitrary sized receive buffers, so we 1163 * 24 dwords. HT chip has arbitrary sized receive buffers, so we
1153 * made them the same size as the PIO buffers. The PE-800 does not 1164 * made them the same size as the PIO buffers. This chip does not
1154 * handle arbitrary size buffers, so we need the header large enough 1165 * handle arbitrary size buffers, so we need the header large enough
1155 * to handle largest IB header, but still have room for a 2KB MTU 1166 * to handle largest IB header, but still have room for a 2KB MTU
1156 * standard IB packet. 1167 * standard IB packet.
@@ -1158,11 +1169,10 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
1158 dd->ipath_rcvhdrentsize = 24; 1169 dd->ipath_rcvhdrentsize = 24;
1159 dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; 1170 dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE;
1160 1171
1161 /* For HT-400, we allocate a somewhat overly large eager buffer, 1172 /*
1162 * such that we can guarantee that we can receive the largest packet 1173 * To truly support a 4KB MTU (for usermode), we need to
1163 * that we can send out. To truly support a 4KB MTU, we need to 1174 * bump this to a larger value. For now, we use them for
1164 * bump this to a larger value. We'll do this when I get around to 1175 * the kernel only.
1165 * testing 4KB sends on the PE-800, which I have not yet done.
1166 */ 1176 */
1167 dd->ipath_rcvegrbufsize = 2048; 1177 dd->ipath_rcvegrbufsize = 2048;
1168 /* 1178 /*
@@ -1175,9 +1185,9 @@ static int ipath_pe_early_init(struct ipath_devdata *dd)
1175 dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; 1185 dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
1176 1186
1177 /* 1187 /*
1178 * For PE-800, we can request a receive interrupt for 1 or 1188 * We can request a receive interrupt for 1 or
1179 * more packets from current offset. For now, we set this 1189 * more packets from current offset. For now, we set this
1180 * up for a single packet, to match the HT-400 behavior. 1190 * up for a single packet.
1181 */ 1191 */
1182 dd->ipath_rhdrhead_intr_off = 1ULL<<32; 1192 dd->ipath_rhdrhead_intr_off = 1ULL<<32;
1183 1193
@@ -1216,13 +1226,13 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
1216} 1226}
1217 1227
1218/** 1228/**
1219 * ipath_init_pe800_funcs - set up the chip-specific function pointers 1229 * ipath_init_iba6120_funcs - set up the chip-specific function pointers
1220 * @dd: the infinipath device 1230 * @dd: the infinipath device
1221 * 1231 *
1222 * This is global, and is called directly at init to set up the 1232 * This is global, and is called directly at init to set up the
1223 * chip-specific function pointers for later use. 1233 * chip-specific function pointers for later use.
1224 */ 1234 */
1225void ipath_init_pe800_funcs(struct ipath_devdata *dd) 1235void ipath_init_iba6120_funcs(struct ipath_devdata *dd)
1226{ 1236{
1227 dd->ipath_f_intrsetup = ipath_pe_intconfig; 1237 dd->ipath_f_intrsetup = ipath_pe_intconfig;
1228 dd->ipath_f_bus = ipath_setup_pe_config; 1238 dd->ipath_f_bus = ipath_setup_pe_config;
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 414cdd1d80a6..44669dc2e22d 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -53,8 +53,8 @@ module_param_named(cfgports, ipath_cfgports, ushort, S_IRUGO);
53MODULE_PARM_DESC(cfgports, "Set max number of ports to use"); 53MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
54 54
55/* 55/*
56 * Number of buffers reserved for driver (layered drivers and SMA 56 * Number of buffers reserved for driver (verbs and layered drivers.)
57 * send). Reserved at end of buffer list. Initialized based on 57 * Reserved at end of buffer list. Initialized based on
58 * number of PIO buffers if not set via module interface. 58 * number of PIO buffers if not set via module interface.
59 * The problem with this is that it's global, but we'll use different 59 * The problem with this is that it's global, but we'll use different
60 * numbers for different chip types. So the default value is not 60 * numbers for different chip types. So the default value is not
@@ -80,7 +80,7 @@ MODULE_PARM_DESC(kpiobufs, "Set number of PIO buffers for driver");
80 * 80 *
81 * Allocate the eager TID buffers and program them into infinipath. 81 * Allocate the eager TID buffers and program them into infinipath.
82 * We use the network layer alloc_skb() allocator to allocate the 82 * We use the network layer alloc_skb() allocator to allocate the
83 * memory, and either use the buffers as is for things like SMA 83 * memory, and either use the buffers as is for things like verbs
84 * packets, or pass the buffers up to the ipath layered driver and 84 * packets, or pass the buffers up to the ipath layered driver and
85 * thence the network layer, replacing them as we do so (see 85 * thence the network layer, replacing them as we do so (see
86 * ipath_rcv_layer()). 86 * ipath_rcv_layer()).
@@ -240,7 +240,11 @@ static int init_chip_first(struct ipath_devdata *dd,
240 "only supports %u\n", ipath_cfgports, 240 "only supports %u\n", ipath_cfgports,
241 dd->ipath_portcnt); 241 dd->ipath_portcnt);
242 } 242 }
243 dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports, 243 /*
244 * Allocate full portcnt array, rather than just cfgports, because
245 * cleanup iterates across all possible ports.
246 */
247 dd->ipath_pd = kzalloc(sizeof(*dd->ipath_pd) * dd->ipath_portcnt,
244 GFP_KERNEL); 248 GFP_KERNEL);
245 249
246 if (!dd->ipath_pd) { 250 if (!dd->ipath_pd) {
@@ -446,9 +450,9 @@ static void enable_chip(struct ipath_devdata *dd,
446 u32 val; 450 u32 val;
447 int i; 451 int i;
448 452
449 if (!reinit) { 453 if (!reinit)
450 init_waitqueue_head(&ipath_sma_state_wait); 454 init_waitqueue_head(&ipath_state_wait);
451 } 455
452 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 456 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
453 dd->ipath_rcvctrl); 457 dd->ipath_rcvctrl);
454 458
@@ -687,7 +691,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
687 dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2) 691 dd->ipath_pioavregs = ALIGN(val, sizeof(u64) * BITS_PER_BYTE / 2)
688 / (sizeof(u64) * BITS_PER_BYTE / 2); 692 / (sizeof(u64) * BITS_PER_BYTE / 2);
689 if (ipath_kpiobufs == 0) { 693 if (ipath_kpiobufs == 0) {
690 /* not set by user, or set explictly to default */ 694 /* not set by user (this is default) */
691 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128) 695 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 128)
692 kpiobufs = 32; 696 kpiobufs = 32;
693 else 697 else
@@ -946,6 +950,7 @@ static int ipath_set_kpiobufs(const char *str, struct kernel_param *kp)
946 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val; 950 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - val;
947 } 951 }
948 952
953 ipath_kpiobufs = val;
949 ret = 0; 954 ret = 0;
950bail: 955bail:
951 spin_unlock_irqrestore(&ipath_devs_lock, flags); 956 spin_unlock_irqrestore(&ipath_devs_lock, flags);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 280e732660a1..49bf7bb15b04 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -34,7 +34,7 @@
34#include <linux/pci.h> 34#include <linux/pci.h>
35 35
36#include "ipath_kernel.h" 36#include "ipath_kernel.h"
37#include "ipath_layer.h" 37#include "ipath_verbs.h"
38#include "ipath_common.h" 38#include "ipath_common.h"
39 39
40/* These are all rcv-related errors which we want to count for stats */ 40/* These are all rcv-related errors which we want to count for stats */
@@ -201,7 +201,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
201 ib_linkstate(lstate)); 201 ib_linkstate(lstate));
202 } 202 }
203 else 203 else
204 ipath_cdbg(SMA, "Unit %u link state %s, last " 204 ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
205 "was %s\n", dd->ipath_unit, 205 "was %s\n", dd->ipath_unit,
206 ib_linkstate(lstate), 206 ib_linkstate(lstate),
207 ib_linkstate((unsigned) 207 ib_linkstate((unsigned)
@@ -213,7 +213,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
213 if (lstate == IPATH_IBSTATE_INIT || 213 if (lstate == IPATH_IBSTATE_INIT ||
214 lstate == IPATH_IBSTATE_ARM || 214 lstate == IPATH_IBSTATE_ARM ||
215 lstate == IPATH_IBSTATE_ACTIVE) 215 lstate == IPATH_IBSTATE_ACTIVE)
216 ipath_cdbg(SMA, "Unit %u link state down" 216 ipath_cdbg(VERBOSE, "Unit %u link state down"
217 " (state 0x%x), from %s\n", 217 " (state 0x%x), from %s\n",
218 dd->ipath_unit, 218 dd->ipath_unit,
219 (u32)val & IPATH_IBSTATE_MASK, 219 (u32)val & IPATH_IBSTATE_MASK,
@@ -269,7 +269,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
269 INFINIPATH_IBCS_LINKSTATE_MASK) 269 INFINIPATH_IBCS_LINKSTATE_MASK)
270 == INFINIPATH_IBCS_L_STATE_ACTIVE) 270 == INFINIPATH_IBCS_L_STATE_ACTIVE)
271 /* if from up to down be more vocal */ 271 /* if from up to down be more vocal */
272 ipath_cdbg(SMA, 272 ipath_cdbg(VERBOSE,
273 "Unit %u link now down (%s)\n", 273 "Unit %u link now down (%s)\n",
274 dd->ipath_unit, 274 dd->ipath_unit,
275 ipath_ibcstatus_str[ltstate]); 275 ipath_ibcstatus_str[ltstate]);
@@ -289,8 +289,6 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
289 *dd->ipath_statusp |= 289 *dd->ipath_statusp |=
290 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; 290 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
291 dd->ipath_f_setextled(dd, lstate, ltstate); 291 dd->ipath_f_setextled(dd, lstate, ltstate);
292
293 __ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
294 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) { 292 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
295 /* 293 /*
296 * set INIT and DOWN. Down is checked by most of the other 294 * set INIT and DOWN. Down is checked by most of the other
@@ -598,11 +596,11 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
598 596
599 if (!noprint && *msg) 597 if (!noprint && *msg)
600 ipath_dev_err(dd, "%s error\n", msg); 598 ipath_dev_err(dd, "%s error\n", msg);
601 if (dd->ipath_sma_state_wanted & dd->ipath_flags) { 599 if (dd->ipath_state_wanted & dd->ipath_flags) {
602 ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, " 600 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
603 "waking\n", dd->ipath_sma_state_wanted, 601 "waking\n", dd->ipath_state_wanted,
604 dd->ipath_flags); 602 dd->ipath_flags);
605 wake_up_interruptible(&ipath_sma_state_wait); 603 wake_up_interruptible(&ipath_state_wait);
606 } 604 }
607 605
608 return chkerrpkts; 606 return chkerrpkts;
@@ -708,11 +706,7 @@ static void handle_layer_pioavail(struct ipath_devdata *dd)
708{ 706{
709 int ret; 707 int ret;
710 708
711 ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE); 709 ret = ipath_ib_piobufavail(dd->verbs_dev);
712 if (ret > 0)
713 goto set;
714
715 ret = __ipath_verbs_piobufavail(dd);
716 if (ret > 0) 710 if (ret > 0)
717 goto set; 711 goto set;
718 712
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index e9f374fb641e..a8a56276ff1d 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -132,12 +132,6 @@ struct _ipath_layer {
132 void *l_arg; 132 void *l_arg;
133}; 133};
134 134
135/* Verbs layer interface */
136struct _verbs_layer {
137 void *l_arg;
138 struct timer_list l_timer;
139};
140
141struct ipath_devdata { 135struct ipath_devdata {
142 struct list_head ipath_list; 136 struct list_head ipath_list;
143 137
@@ -198,7 +192,8 @@ struct ipath_devdata {
198 void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64); 192 void (*ipath_f_setextled)(struct ipath_devdata *, u64, u64);
199 /* fill out chip-specific fields */ 193 /* fill out chip-specific fields */
200 int (*ipath_f_get_base_info)(struct ipath_portdata *, void *); 194 int (*ipath_f_get_base_info)(struct ipath_portdata *, void *);
201 struct _verbs_layer verbs_layer; 195 struct ipath_ibdev *verbs_dev;
196 struct timer_list verbs_timer;
202 /* total dwords sent (summed from counter) */ 197 /* total dwords sent (summed from counter) */
203 u64 ipath_sword; 198 u64 ipath_sword;
204 /* total dwords rcvd (summed from counter) */ 199 /* total dwords rcvd (summed from counter) */
@@ -241,7 +236,7 @@ struct ipath_devdata {
241 u64 ipath_tidtemplate; 236 u64 ipath_tidtemplate;
242 /* value to write to free TIDs */ 237 /* value to write to free TIDs */
243 u64 ipath_tidinvalid; 238 u64 ipath_tidinvalid;
244 /* PE-800 rcv interrupt setup */ 239 /* IBA6120 rcv interrupt setup */
245 u64 ipath_rhdrhead_intr_off; 240 u64 ipath_rhdrhead_intr_off;
246 241
247 /* size of memory at ipath_kregbase */ 242 /* size of memory at ipath_kregbase */
@@ -250,8 +245,8 @@ struct ipath_devdata {
250 u32 ipath_pioavregs; 245 u32 ipath_pioavregs;
251 /* IPATH_POLL, etc. */ 246 /* IPATH_POLL, etc. */
252 u32 ipath_flags; 247 u32 ipath_flags;
253 /* ipath_flags sma is waiting for */ 248 /* ipath_flags driver is waiting for */
254 u32 ipath_sma_state_wanted; 249 u32 ipath_state_wanted;
255 /* last buffer for user use, first buf for kernel use is this 250 /* last buffer for user use, first buf for kernel use is this
256 * index. */ 251 * index. */
257 u32 ipath_lastport_piobuf; 252 u32 ipath_lastport_piobuf;
@@ -311,10 +306,6 @@ struct ipath_devdata {
311 u32 ipath_pcibar0; 306 u32 ipath_pcibar0;
312 /* so we can rewrite it after a chip reset */ 307 /* so we can rewrite it after a chip reset */
313 u32 ipath_pcibar1; 308 u32 ipath_pcibar1;
314 /* sequential tries for SMA send and no bufs */
315 u32 ipath_nosma_bufs;
316 /* duration (seconds) ipath_nosma_bufs set */
317 u32 ipath_nosma_secs;
318 309
319 /* HT/PCI Vendor ID (here for NodeInfo) */ 310 /* HT/PCI Vendor ID (here for NodeInfo) */
320 u16 ipath_vendorid; 311 u16 ipath_vendorid;
@@ -512,6 +503,8 @@ struct ipath_devdata {
512 u8 ipath_pci_cacheline; 503 u8 ipath_pci_cacheline;
513 /* LID mask control */ 504 /* LID mask control */
514 u8 ipath_lmc; 505 u8 ipath_lmc;
506 /* Rx Polarity inversion (compensate for ~tx on partner) */
507 u8 ipath_rx_pol_inv;
515 508
516 /* local link integrity counter */ 509 /* local link integrity counter */
517 u32 ipath_lli_counter; 510 u32 ipath_lli_counter;
@@ -523,18 +516,6 @@ extern struct list_head ipath_dev_list;
523extern spinlock_t ipath_devs_lock; 516extern spinlock_t ipath_devs_lock;
524extern struct ipath_devdata *ipath_lookup(int unit); 517extern struct ipath_devdata *ipath_lookup(int unit);
525 518
526extern u16 ipath_layer_rcv_opcode;
527extern int __ipath_layer_intr(struct ipath_devdata *, u32);
528extern int ipath_layer_intr(struct ipath_devdata *, u32);
529extern int __ipath_layer_rcv(struct ipath_devdata *, void *,
530 struct sk_buff *);
531extern int __ipath_layer_rcv_lid(struct ipath_devdata *, void *);
532extern int __ipath_verbs_piobufavail(struct ipath_devdata *);
533extern int __ipath_verbs_rcv(struct ipath_devdata *, void *, void *, u32);
534
535void ipath_layer_add(struct ipath_devdata *);
536void ipath_layer_remove(struct ipath_devdata *);
537
538int ipath_init_chip(struct ipath_devdata *, int); 519int ipath_init_chip(struct ipath_devdata *, int);
539int ipath_enable_wc(struct ipath_devdata *dd); 520int ipath_enable_wc(struct ipath_devdata *dd);
540void ipath_disable_wc(struct ipath_devdata *dd); 521void ipath_disable_wc(struct ipath_devdata *dd);
@@ -549,9 +530,8 @@ void ipath_cdev_cleanup(struct cdev **cdevp,
549 530
550int ipath_diag_add(struct ipath_devdata *); 531int ipath_diag_add(struct ipath_devdata *);
551void ipath_diag_remove(struct ipath_devdata *); 532void ipath_diag_remove(struct ipath_devdata *);
552void ipath_diag_bringup_link(struct ipath_devdata *);
553 533
554extern wait_queue_head_t ipath_sma_state_wait; 534extern wait_queue_head_t ipath_state_wait;
555 535
556int ipath_user_add(struct ipath_devdata *dd); 536int ipath_user_add(struct ipath_devdata *dd);
557void ipath_user_remove(struct ipath_devdata *dd); 537void ipath_user_remove(struct ipath_devdata *dd);
@@ -582,12 +562,14 @@ void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
582 562
583int ipath_parse_ushort(const char *str, unsigned short *valp); 563int ipath_parse_ushort(const char *str, unsigned short *valp);
584 564
585int ipath_wait_linkstate(struct ipath_devdata *, u32, int);
586void ipath_set_ib_lstate(struct ipath_devdata *, int);
587void ipath_kreceive(struct ipath_devdata *); 565void ipath_kreceive(struct ipath_devdata *);
588int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned); 566int ipath_setrcvhdrsize(struct ipath_devdata *, unsigned);
589int ipath_reset_device(int); 567int ipath_reset_device(int);
590void ipath_get_faststats(unsigned long); 568void ipath_get_faststats(unsigned long);
569int ipath_set_linkstate(struct ipath_devdata *, u8);
570int ipath_set_mtu(struct ipath_devdata *, u16);
571int ipath_set_lid(struct ipath_devdata *, u32, u8);
572int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
591 573
592/* for use in system calls, where we want to know device type, etc. */ 574/* for use in system calls, where we want to know device type, etc. */
593#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data) 575#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
@@ -642,10 +624,8 @@ void ipath_free_data(struct ipath_portdata *dd);
642int ipath_waitfor_mdio_cmdready(struct ipath_devdata *); 624int ipath_waitfor_mdio_cmdready(struct ipath_devdata *);
643int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *); 625int ipath_waitfor_complete(struct ipath_devdata *, ipath_kreg, u64, u64 *);
644u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); 626u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
645/* init PE-800-specific func */ 627void ipath_init_iba6120_funcs(struct ipath_devdata *);
646void ipath_init_pe800_funcs(struct ipath_devdata *); 628void ipath_init_iba6110_funcs(struct ipath_devdata *);
647/* init HT-400-specific func */
648void ipath_init_ht400_funcs(struct ipath_devdata *);
649void ipath_get_eeprom_info(struct ipath_devdata *); 629void ipath_get_eeprom_info(struct ipath_devdata *);
650u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 630u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
651 631
@@ -801,7 +781,7 @@ static inline u32 ipath_read_creg32(const struct ipath_devdata *dd,
801 781
802struct device_driver; 782struct device_driver;
803 783
804extern const char ipath_core_version[]; 784extern const char ib_ipath_version[];
805 785
806int ipath_driver_create_group(struct device_driver *); 786int ipath_driver_create_group(struct device_driver *);
807void ipath_driver_remove_group(struct device_driver *); 787void ipath_driver_remove_group(struct device_driver *);
@@ -810,6 +790,9 @@ int ipath_device_create_group(struct device *, struct ipath_devdata *);
810void ipath_device_remove_group(struct device *, struct ipath_devdata *); 790void ipath_device_remove_group(struct device *, struct ipath_devdata *);
811int ipath_expose_reset(struct device *); 791int ipath_expose_reset(struct device *);
812 792
793int ipath_diagpkt_add(void);
794void ipath_diagpkt_remove(void);
795
813int ipath_init_ipathfs(void); 796int ipath_init_ipathfs(void);
814void ipath_exit_ipathfs(void); 797void ipath_exit_ipathfs(void);
815int ipathfs_add_device(struct ipath_devdata *); 798int ipathfs_add_device(struct ipath_devdata *);
@@ -831,10 +814,10 @@ const char *ipath_get_unit_name(int unit);
831 814
832extern struct mutex ipath_mutex; 815extern struct mutex ipath_mutex;
833 816
834#define IPATH_DRV_NAME "ipath_core" 817#define IPATH_DRV_NAME "ib_ipath"
835#define IPATH_MAJOR 233 818#define IPATH_MAJOR 233
836#define IPATH_USER_MINOR_BASE 0 819#define IPATH_USER_MINOR_BASE 0
837#define IPATH_SMA_MINOR 128 820#define IPATH_DIAGPKT_MINOR 127
838#define IPATH_DIAG_MINOR_BASE 129 821#define IPATH_DIAG_MINOR_BASE 129
839#define IPATH_NMINORS 255 822#define IPATH_NMINORS 255
840 823
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index a5ca279370aa..ba1b93226caa 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -34,6 +34,7 @@
34#include <asm/io.h> 34#include <asm/io.h>
35 35
36#include "ipath_verbs.h" 36#include "ipath_verbs.h"
37#include "ipath_kernel.h"
37 38
38/** 39/**
39 * ipath_alloc_lkey - allocate an lkey 40 * ipath_alloc_lkey - allocate an lkey
@@ -60,7 +61,7 @@ int ipath_alloc_lkey(struct ipath_lkey_table *rkt, struct ipath_mregion *mr)
60 r = (r + 1) & (rkt->max - 1); 61 r = (r + 1) & (rkt->max - 1);
61 if (r == n) { 62 if (r == n) {
62 spin_unlock_irqrestore(&rkt->lock, flags); 63 spin_unlock_irqrestore(&rkt->lock, flags);
63 _VERBS_INFO("LKEY table full\n"); 64 ipath_dbg(KERN_INFO "LKEY table full\n");
64 ret = 0; 65 ret = 0;
65 goto bail; 66 goto bail;
66 } 67 }
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.c b/drivers/infiniband/hw/ipath/ipath_layer.c
index b28c6f81c731..e46aa4ed2a7e 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.c
+++ b/drivers/infiniband/hw/ipath/ipath_layer.c
@@ -42,26 +42,20 @@
42 42
43#include "ipath_kernel.h" 43#include "ipath_kernel.h"
44#include "ipath_layer.h" 44#include "ipath_layer.h"
45#include "ipath_verbs.h"
45#include "ipath_common.h" 46#include "ipath_common.h"
46 47
47/* Acquire before ipath_devs_lock. */ 48/* Acquire before ipath_devs_lock. */
48static DEFINE_MUTEX(ipath_layer_mutex); 49static DEFINE_MUTEX(ipath_layer_mutex);
49 50
50static int ipath_verbs_registered;
51
52u16 ipath_layer_rcv_opcode; 51u16 ipath_layer_rcv_opcode;
53 52
54static int (*layer_intr)(void *, u32); 53static int (*layer_intr)(void *, u32);
55static int (*layer_rcv)(void *, void *, struct sk_buff *); 54static int (*layer_rcv)(void *, void *, struct sk_buff *);
56static int (*layer_rcv_lid)(void *, void *); 55static int (*layer_rcv_lid)(void *, void *);
57static int (*verbs_piobufavail)(void *);
58static void (*verbs_rcv)(void *, void *, void *, u32);
59 56
60static void *(*layer_add_one)(int, struct ipath_devdata *); 57static void *(*layer_add_one)(int, struct ipath_devdata *);
61static void (*layer_remove_one)(void *); 58static void (*layer_remove_one)(void *);
62static void *(*verbs_add_one)(int, struct ipath_devdata *);
63static void (*verbs_remove_one)(void *);
64static void (*verbs_timer_cb)(void *);
65 59
66int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg) 60int __ipath_layer_intr(struct ipath_devdata *dd, u32 arg)
67{ 61{
@@ -107,302 +101,16 @@ int __ipath_layer_rcv_lid(struct ipath_devdata *dd, void *hdr)
107 return ret; 101 return ret;
108} 102}
109 103
110int __ipath_verbs_piobufavail(struct ipath_devdata *dd) 104void ipath_layer_lid_changed(struct ipath_devdata *dd)
111{
112 int ret = -ENODEV;
113
114 if (dd->verbs_layer.l_arg && verbs_piobufavail)
115 ret = verbs_piobufavail(dd->verbs_layer.l_arg);
116
117 return ret;
118}
119
120int __ipath_verbs_rcv(struct ipath_devdata *dd, void *rc, void *ebuf,
121 u32 tlen)
122{
123 int ret = -ENODEV;
124
125 if (dd->verbs_layer.l_arg && verbs_rcv) {
126 verbs_rcv(dd->verbs_layer.l_arg, rc, ebuf, tlen);
127 ret = 0;
128 }
129
130 return ret;
131}
132
133int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 newstate)
134{ 105{
135 u32 lstate;
136 int ret;
137
138 switch (newstate) {
139 case IPATH_IB_LINKDOWN:
140 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
141 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
142 /* don't wait */
143 ret = 0;
144 goto bail;
145
146 case IPATH_IB_LINKDOWN_SLEEP:
147 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
148 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
149 /* don't wait */
150 ret = 0;
151 goto bail;
152
153 case IPATH_IB_LINKDOWN_DISABLE:
154 ipath_set_ib_lstate(dd,
155 INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
156 INFINIPATH_IBCC_LINKINITCMD_SHIFT);
157 /* don't wait */
158 ret = 0;
159 goto bail;
160
161 case IPATH_IB_LINKINIT:
162 if (dd->ipath_flags & IPATH_LINKINIT) {
163 ret = 0;
164 goto bail;
165 }
166 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_INIT <<
167 INFINIPATH_IBCC_LINKCMD_SHIFT);
168 lstate = IPATH_LINKINIT;
169 break;
170
171 case IPATH_IB_LINKARM:
172 if (dd->ipath_flags & IPATH_LINKARMED) {
173 ret = 0;
174 goto bail;
175 }
176 if (!(dd->ipath_flags &
177 (IPATH_LINKINIT | IPATH_LINKACTIVE))) {
178 ret = -EINVAL;
179 goto bail;
180 }
181 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
182 INFINIPATH_IBCC_LINKCMD_SHIFT);
183 /*
184 * Since the port can transition to ACTIVE by receiving
185 * a non VL 15 packet, wait for either state.
186 */
187 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
188 break;
189
190 case IPATH_IB_LINKACTIVE:
191 if (dd->ipath_flags & IPATH_LINKACTIVE) {
192 ret = 0;
193 goto bail;
194 }
195 if (!(dd->ipath_flags & IPATH_LINKARMED)) {
196 ret = -EINVAL;
197 goto bail;
198 }
199 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
200 INFINIPATH_IBCC_LINKCMD_SHIFT);
201 lstate = IPATH_LINKACTIVE;
202 break;
203
204 default:
205 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
206 ret = -EINVAL;
207 goto bail;
208 }
209 ret = ipath_wait_linkstate(dd, lstate, 2000);
210
211bail:
212 return ret;
213}
214
215EXPORT_SYMBOL_GPL(ipath_layer_set_linkstate);
216
217/**
218 * ipath_layer_set_mtu - set the MTU
219 * @dd: the infinipath device
220 * @arg: the new MTU
221 *
222 * we can handle "any" incoming size, the issue here is whether we
223 * need to restrict our outgoing size. For now, we don't do any
224 * sanity checking on this, and we don't deal with what happens to
225 * programs that are already running when the size changes.
226 * NOTE: changing the MTU will usually cause the IBC to go back to
227 * link initialize (IPATH_IBSTATE_INIT) state...
228 */
229int ipath_layer_set_mtu(struct ipath_devdata *dd, u16 arg)
230{
231 u32 piosize;
232 int changed = 0;
233 int ret;
234
235 /*
236 * mtu is IB data payload max. It's the largest power of 2 less
237 * than piosize (or even larger, since it only really controls the
238 * largest we can receive; we can send the max of the mtu and
239 * piosize). We check that it's one of the valid IB sizes.
240 */
241 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
242 arg != 4096) {
243 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
244 ret = -EINVAL;
245 goto bail;
246 }
247 if (dd->ipath_ibmtu == arg) {
248 ret = 0; /* same as current */
249 goto bail;
250 }
251
252 piosize = dd->ipath_ibmaxlen;
253 dd->ipath_ibmtu = arg;
254
255 if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
256 /* Only if it's not the initial value (or reset to it) */
257 if (piosize != dd->ipath_init_ibmaxlen) {
258 dd->ipath_ibmaxlen = piosize;
259 changed = 1;
260 }
261 } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
262 piosize = arg + IPATH_PIO_MAXIBHDR;
263 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
264 "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
265 arg);
266 dd->ipath_ibmaxlen = piosize;
267 changed = 1;
268 }
269
270 if (changed) {
271 /*
272 * set the IBC maxpktlength to the size of our pio
273 * buffers in words
274 */
275 u64 ibc = dd->ipath_ibcctrl;
276 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
277 INFINIPATH_IBCC_MAXPKTLEN_SHIFT);
278
279 piosize = piosize - 2 * sizeof(u32); /* ignore pbc */
280 dd->ipath_ibmaxlen = piosize;
281 piosize /= sizeof(u32); /* in words */
282 /*
283 * for ICRC, which we only send in diag test pkt mode, and
284 * we don't need to worry about that for mtu
285 */
286 piosize += 1;
287
288 ibc |= piosize << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
289 dd->ipath_ibcctrl = ibc;
290 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
291 dd->ipath_ibcctrl);
292 dd->ipath_f_tidtemplate(dd);
293 }
294
295 ret = 0;
296
297bail:
298 return ret;
299}
300
301EXPORT_SYMBOL_GPL(ipath_layer_set_mtu);
302
303int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
304{
305 dd->ipath_lid = arg;
306 dd->ipath_lmc = lmc;
307
308 mutex_lock(&ipath_layer_mutex); 106 mutex_lock(&ipath_layer_mutex);
309 107
310 if (dd->ipath_layer.l_arg && layer_intr) 108 if (dd->ipath_layer.l_arg && layer_intr)
311 layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID); 109 layer_intr(dd->ipath_layer.l_arg, IPATH_LAYER_INT_LID);
312 110
313 mutex_unlock(&ipath_layer_mutex); 111 mutex_unlock(&ipath_layer_mutex);
314
315 return 0;
316}
317
318EXPORT_SYMBOL_GPL(ipath_set_lid);
319
320int ipath_layer_set_guid(struct ipath_devdata *dd, __be64 guid)
321{
322 /* XXX - need to inform anyone who cares this just happened. */
323 dd->ipath_guid = guid;
324 return 0;
325}
326
327EXPORT_SYMBOL_GPL(ipath_layer_set_guid);
328
329__be64 ipath_layer_get_guid(struct ipath_devdata *dd)
330{
331 return dd->ipath_guid;
332}
333
334EXPORT_SYMBOL_GPL(ipath_layer_get_guid);
335
336u32 ipath_layer_get_nguid(struct ipath_devdata *dd)
337{
338 return dd->ipath_nguid;
339}
340
341EXPORT_SYMBOL_GPL(ipath_layer_get_nguid);
342
343u32 ipath_layer_get_majrev(struct ipath_devdata *dd)
344{
345 return dd->ipath_majrev;
346} 112}
347 113
348EXPORT_SYMBOL_GPL(ipath_layer_get_majrev);
349
350u32 ipath_layer_get_minrev(struct ipath_devdata *dd)
351{
352 return dd->ipath_minrev;
353}
354
355EXPORT_SYMBOL_GPL(ipath_layer_get_minrev);
356
357u32 ipath_layer_get_pcirev(struct ipath_devdata *dd)
358{
359 return dd->ipath_pcirev;
360}
361
362EXPORT_SYMBOL_GPL(ipath_layer_get_pcirev);
363
364u32 ipath_layer_get_flags(struct ipath_devdata *dd)
365{
366 return dd->ipath_flags;
367}
368
369EXPORT_SYMBOL_GPL(ipath_layer_get_flags);
370
371struct device *ipath_layer_get_device(struct ipath_devdata *dd)
372{
373 return &dd->pcidev->dev;
374}
375
376EXPORT_SYMBOL_GPL(ipath_layer_get_device);
377
378u16 ipath_layer_get_deviceid(struct ipath_devdata *dd)
379{
380 return dd->ipath_deviceid;
381}
382
383EXPORT_SYMBOL_GPL(ipath_layer_get_deviceid);
384
385u32 ipath_layer_get_vendorid(struct ipath_devdata *dd)
386{
387 return dd->ipath_vendorid;
388}
389
390EXPORT_SYMBOL_GPL(ipath_layer_get_vendorid);
391
392u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd)
393{
394 return dd->ipath_lastibcstat;
395}
396
397EXPORT_SYMBOL_GPL(ipath_layer_get_lastibcstat);
398
399u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd)
400{
401 return dd->ipath_ibmtu;
402}
403
404EXPORT_SYMBOL_GPL(ipath_layer_get_ibmtu);
405
406void ipath_layer_add(struct ipath_devdata *dd) 114void ipath_layer_add(struct ipath_devdata *dd)
407{ 115{
408 mutex_lock(&ipath_layer_mutex); 116 mutex_lock(&ipath_layer_mutex);
@@ -411,10 +119,6 @@ void ipath_layer_add(struct ipath_devdata *dd)
411 dd->ipath_layer.l_arg = 119 dd->ipath_layer.l_arg =
412 layer_add_one(dd->ipath_unit, dd); 120 layer_add_one(dd->ipath_unit, dd);
413 121
414 if (verbs_add_one)
415 dd->verbs_layer.l_arg =
416 verbs_add_one(dd->ipath_unit, dd);
417
418 mutex_unlock(&ipath_layer_mutex); 122 mutex_unlock(&ipath_layer_mutex);
419} 123}
420 124
@@ -427,11 +131,6 @@ void ipath_layer_remove(struct ipath_devdata *dd)
427 dd->ipath_layer.l_arg = NULL; 131 dd->ipath_layer.l_arg = NULL;
428 } 132 }
429 133
430 if (dd->verbs_layer.l_arg && verbs_remove_one) {
431 verbs_remove_one(dd->verbs_layer.l_arg);
432 dd->verbs_layer.l_arg = NULL;
433 }
434
435 mutex_unlock(&ipath_layer_mutex); 134 mutex_unlock(&ipath_layer_mutex);
436} 135}
437 136
@@ -463,9 +162,6 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
463 if (dd->ipath_layer.l_arg) 162 if (dd->ipath_layer.l_arg)
464 continue; 163 continue;
465 164
466 if (!(*dd->ipath_statusp & IPATH_STATUS_SMA))
467 *dd->ipath_statusp |= IPATH_STATUS_OIB_SMA;
468
469 spin_unlock_irqrestore(&ipath_devs_lock, flags); 165 spin_unlock_irqrestore(&ipath_devs_lock, flags);
470 dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd); 166 dd->ipath_layer.l_arg = l_add(dd->ipath_unit, dd);
471 spin_lock_irqsave(&ipath_devs_lock, flags); 167 spin_lock_irqsave(&ipath_devs_lock, flags);
@@ -509,107 +205,6 @@ void ipath_layer_unregister(void)
509 205
510EXPORT_SYMBOL_GPL(ipath_layer_unregister); 206EXPORT_SYMBOL_GPL(ipath_layer_unregister);
511 207
512static void __ipath_verbs_timer(unsigned long arg)
513{
514 struct ipath_devdata *dd = (struct ipath_devdata *) arg;
515
516 /*
517 * If port 0 receive packet interrupts are not available, or
518 * can be missed, poll the receive queue
519 */
520 if (dd->ipath_flags & IPATH_POLL_RX_INTR)
521 ipath_kreceive(dd);
522
523 /* Handle verbs layer timeouts. */
524 if (dd->verbs_layer.l_arg && verbs_timer_cb)
525 verbs_timer_cb(dd->verbs_layer.l_arg);
526
527 mod_timer(&dd->verbs_layer.l_timer, jiffies + 1);
528}
529
530/**
531 * ipath_verbs_register - verbs layer registration
532 * @l_piobufavail: callback for when PIO buffers become available
533 * @l_rcv: callback for receiving a packet
534 * @l_timer_cb: timer callback
535 * @ipath_devdata: device data structure is put here
536 */
537int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
538 void (*l_remove)(void *arg),
539 int (*l_piobufavail) (void *arg),
540 void (*l_rcv) (void *arg, void *rhdr,
541 void *data, u32 tlen),
542 void (*l_timer_cb) (void *arg))
543{
544 struct ipath_devdata *dd, *tmp;
545 unsigned long flags;
546
547 mutex_lock(&ipath_layer_mutex);
548
549 verbs_add_one = l_add;
550 verbs_remove_one = l_remove;
551 verbs_piobufavail = l_piobufavail;
552 verbs_rcv = l_rcv;
553 verbs_timer_cb = l_timer_cb;
554
555 spin_lock_irqsave(&ipath_devs_lock, flags);
556
557 list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
558 if (!(dd->ipath_flags & IPATH_INITTED))
559 continue;
560
561 if (dd->verbs_layer.l_arg)
562 continue;
563
564 spin_unlock_irqrestore(&ipath_devs_lock, flags);
565 dd->verbs_layer.l_arg = l_add(dd->ipath_unit, dd);
566 spin_lock_irqsave(&ipath_devs_lock, flags);
567 }
568
569 spin_unlock_irqrestore(&ipath_devs_lock, flags);
570 mutex_unlock(&ipath_layer_mutex);
571
572 ipath_verbs_registered = 1;
573
574 return 0;
575}
576
577EXPORT_SYMBOL_GPL(ipath_verbs_register);
578
579void ipath_verbs_unregister(void)
580{
581 struct ipath_devdata *dd, *tmp;
582 unsigned long flags;
583
584 mutex_lock(&ipath_layer_mutex);
585 spin_lock_irqsave(&ipath_devs_lock, flags);
586
587 list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
588 *dd->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
589
590 if (dd->verbs_layer.l_arg && verbs_remove_one) {
591 spin_unlock_irqrestore(&ipath_devs_lock, flags);
592 verbs_remove_one(dd->verbs_layer.l_arg);
593 spin_lock_irqsave(&ipath_devs_lock, flags);
594 dd->verbs_layer.l_arg = NULL;
595 }
596 }
597
598 spin_unlock_irqrestore(&ipath_devs_lock, flags);
599
600 verbs_add_one = NULL;
601 verbs_remove_one = NULL;
602 verbs_piobufavail = NULL;
603 verbs_rcv = NULL;
604 verbs_timer_cb = NULL;
605
606 ipath_verbs_registered = 0;
607
608 mutex_unlock(&ipath_layer_mutex);
609}
610
611EXPORT_SYMBOL_GPL(ipath_verbs_unregister);
612
613int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax) 208int ipath_layer_open(struct ipath_devdata *dd, u32 * pktmax)
614{ 209{
615 int ret; 210 int ret;
@@ -698,390 +293,6 @@ u16 ipath_layer_get_bcast(struct ipath_devdata *dd)
698 293
699EXPORT_SYMBOL_GPL(ipath_layer_get_bcast); 294EXPORT_SYMBOL_GPL(ipath_layer_get_bcast);
700 295
701u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd)
702{
703 return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
704}
705
706EXPORT_SYMBOL_GPL(ipath_layer_get_cr_errpkey);
707
708static void update_sge(struct ipath_sge_state *ss, u32 length)
709{
710 struct ipath_sge *sge = &ss->sge;
711
712 sge->vaddr += length;
713 sge->length -= length;
714 sge->sge_length -= length;
715 if (sge->sge_length == 0) {
716 if (--ss->num_sge)
717 *sge = *ss->sg_list++;
718 } else if (sge->length == 0 && sge->mr != NULL) {
719 if (++sge->n >= IPATH_SEGSZ) {
720 if (++sge->m >= sge->mr->mapsz)
721 return;
722 sge->n = 0;
723 }
724 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
725 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
726 }
727}
728
729#ifdef __LITTLE_ENDIAN
730static inline u32 get_upper_bits(u32 data, u32 shift)
731{
732 return data >> shift;
733}
734
735static inline u32 set_upper_bits(u32 data, u32 shift)
736{
737 return data << shift;
738}
739
740static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
741{
742 data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
743 data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
744 return data;
745}
746#else
747static inline u32 get_upper_bits(u32 data, u32 shift)
748{
749 return data << shift;
750}
751
752static inline u32 set_upper_bits(u32 data, u32 shift)
753{
754 return data >> shift;
755}
756
757static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
758{
759 data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
760 data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
761 return data;
762}
763#endif
764
765static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
766 u32 length)
767{
768 u32 extra = 0;
769 u32 data = 0;
770 u32 last;
771
772 while (1) {
773 u32 len = ss->sge.length;
774 u32 off;
775
776 BUG_ON(len == 0);
777 if (len > length)
778 len = length;
779 if (len > ss->sge.sge_length)
780 len = ss->sge.sge_length;
781 /* If the source address is not aligned, try to align it. */
782 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
783 if (off) {
784 u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
785 ~(sizeof(u32) - 1));
786 u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
787 u32 y;
788
789 y = sizeof(u32) - off;
790 if (len > y)
791 len = y;
792 if (len + extra >= sizeof(u32)) {
793 data |= set_upper_bits(v, extra *
794 BITS_PER_BYTE);
795 len = sizeof(u32) - extra;
796 if (len == length) {
797 last = data;
798 break;
799 }
800 __raw_writel(data, piobuf);
801 piobuf++;
802 extra = 0;
803 data = 0;
804 } else {
805 /* Clear unused upper bytes */
806 data |= clear_upper_bytes(v, len, extra);
807 if (len == length) {
808 last = data;
809 break;
810 }
811 extra += len;
812 }
813 } else if (extra) {
814 /* Source address is aligned. */
815 u32 *addr = (u32 *) ss->sge.vaddr;
816 int shift = extra * BITS_PER_BYTE;
817 int ushift = 32 - shift;
818 u32 l = len;
819
820 while (l >= sizeof(u32)) {
821 u32 v = *addr;
822
823 data |= set_upper_bits(v, shift);
824 __raw_writel(data, piobuf);
825 data = get_upper_bits(v, ushift);
826 piobuf++;
827 addr++;
828 l -= sizeof(u32);
829 }
830 /*
831 * We still have 'extra' number of bytes leftover.
832 */
833 if (l) {
834 u32 v = *addr;
835
836 if (l + extra >= sizeof(u32)) {
837 data |= set_upper_bits(v, shift);
838 len -= l + extra - sizeof(u32);
839 if (len == length) {
840 last = data;
841 break;
842 }
843 __raw_writel(data, piobuf);
844 piobuf++;
845 extra = 0;
846 data = 0;
847 } else {
848 /* Clear unused upper bytes */
849 data |= clear_upper_bytes(v, l,
850 extra);
851 if (len == length) {
852 last = data;
853 break;
854 }
855 extra += l;
856 }
857 } else if (len == length) {
858 last = data;
859 break;
860 }
861 } else if (len == length) {
862 u32 w;
863
864 /*
865 * Need to round up for the last dword in the
866 * packet.
867 */
868 w = (len + 3) >> 2;
869 __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
870 piobuf += w - 1;
871 last = ((u32 *) ss->sge.vaddr)[w - 1];
872 break;
873 } else {
874 u32 w = len >> 2;
875
876 __iowrite32_copy(piobuf, ss->sge.vaddr, w);
877 piobuf += w;
878
879 extra = len & (sizeof(u32) - 1);
880 if (extra) {
881 u32 v = ((u32 *) ss->sge.vaddr)[w];
882
883 /* Clear unused upper bytes */
884 data = clear_upper_bytes(v, extra, 0);
885 }
886 }
887 update_sge(ss, len);
888 length -= len;
889 }
890 /* Update address before sending packet. */
891 update_sge(ss, length);
892 /* must flush early everything before trigger word */
893 ipath_flush_wc();
894 __raw_writel(last, piobuf);
895 /* be sure trigger word is written */
896 ipath_flush_wc();
897}
898
899/**
900 * ipath_verbs_send - send a packet from the verbs layer
901 * @dd: the infinipath device
902 * @hdrwords: the number of words in the header
903 * @hdr: the packet header
904 * @len: the length of the packet in bytes
905 * @ss: the SGE to send
906 *
907 * This is like ipath_sma_send_pkt() in that we need to be able to send
908 * packets after the chip is initialized (MADs) but also like
909 * ipath_layer_send_hdr() since its used by the verbs layer.
910 */
911int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
912 u32 *hdr, u32 len, struct ipath_sge_state *ss)
913{
914 u32 __iomem *piobuf;
915 u32 plen;
916 int ret;
917
918 /* +1 is for the qword padding of pbc */
919 plen = hdrwords + ((len + 3) >> 2) + 1;
920 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
921 ipath_dbg("packet len 0x%x too long, failing\n", plen);
922 ret = -EINVAL;
923 goto bail;
924 }
925
926 /* Get a PIO buffer to use. */
927 piobuf = ipath_getpiobuf(dd, NULL);
928 if (unlikely(piobuf == NULL)) {
929 ret = -EBUSY;
930 goto bail;
931 }
932
933 /*
934 * Write len to control qword, no flags.
935 * We have to flush after the PBC for correctness on some cpus
936 * or WC buffer can be written out of order.
937 */
938 writeq(plen, piobuf);
939 ipath_flush_wc();
940 piobuf += 2;
941 if (len == 0) {
942 /*
943 * If there is just the header portion, must flush before
944 * writing last word of header for correctness, and after
945 * the last header word (trigger word).
946 */
947 __iowrite32_copy(piobuf, hdr, hdrwords - 1);
948 ipath_flush_wc();
949 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
950 ipath_flush_wc();
951 ret = 0;
952 goto bail;
953 }
954
955 __iowrite32_copy(piobuf, hdr, hdrwords);
956 piobuf += hdrwords;
957
958 /* The common case is aligned and contained in one segment. */
959 if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
960 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
961 u32 w;
962 u32 *addr = (u32 *) ss->sge.vaddr;
963
964 /* Update address before sending packet. */
965 update_sge(ss, len);
966 /* Need to round up for the last dword in the packet. */
967 w = (len + 3) >> 2;
968 __iowrite32_copy(piobuf, addr, w - 1);
969 /* must flush early everything before trigger word */
970 ipath_flush_wc();
971 __raw_writel(addr[w - 1], piobuf + w - 1);
972 /* be sure trigger word is written */
973 ipath_flush_wc();
974 ret = 0;
975 goto bail;
976 }
977 copy_io(piobuf, ss, len);
978 ret = 0;
979
980bail:
981 return ret;
982}
983
984EXPORT_SYMBOL_GPL(ipath_verbs_send);
985
986int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
987 u64 *rwords, u64 *spkts, u64 *rpkts,
988 u64 *xmit_wait)
989{
990 int ret;
991
992 if (!(dd->ipath_flags & IPATH_INITTED)) {
993 /* no hardware, freeze, etc. */
994 ipath_dbg("unit %u not usable\n", dd->ipath_unit);
995 ret = -EINVAL;
996 goto bail;
997 }
998 *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
999 *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1000 *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1001 *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1002 *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
1003
1004 ret = 0;
1005
1006bail:
1007 return ret;
1008}
1009
1010EXPORT_SYMBOL_GPL(ipath_layer_snapshot_counters);
1011
1012/**
1013 * ipath_layer_get_counters - get various chip counters
1014 * @dd: the infinipath device
1015 * @cntrs: counters are placed here
1016 *
1017 * Return the counters needed by recv_pma_get_portcounters().
1018 */
1019int ipath_layer_get_counters(struct ipath_devdata *dd,
1020 struct ipath_layer_counters *cntrs)
1021{
1022 int ret;
1023
1024 if (!(dd->ipath_flags & IPATH_INITTED)) {
1025 /* no hardware, freeze, etc. */
1026 ipath_dbg("unit %u not usable\n", dd->ipath_unit);
1027 ret = -EINVAL;
1028 goto bail;
1029 }
1030 cntrs->symbol_error_counter =
1031 ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
1032 cntrs->link_error_recovery_counter =
1033 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
1034 /*
1035 * The link downed counter counts when the other side downs the
1036 * connection. We add in the number of times we downed the link
1037 * due to local link integrity errors to compensate.
1038 */
1039 cntrs->link_downed_counter =
1040 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
1041 cntrs->port_rcv_errors =
1042 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
1043 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
1044 ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
1045 ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
1046 ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
1047 ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
1048 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
1049 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
1050 ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
1051 cntrs->port_rcv_remphys_errors =
1052 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
1053 cntrs->port_xmit_discards =
1054 ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
1055 cntrs->port_xmit_data =
1056 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1057 cntrs->port_rcv_data =
1058 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1059 cntrs->port_xmit_packets =
1060 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1061 cntrs->port_rcv_packets =
1062 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1063 cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
1064 cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
1065
1066 ret = 0;
1067
1068bail:
1069 return ret;
1070}
1071
1072EXPORT_SYMBOL_GPL(ipath_layer_get_counters);
1073
1074int ipath_layer_want_buffer(struct ipath_devdata *dd)
1075{
1076 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
1077 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1078 dd->ipath_sendctrl);
1079
1080 return 0;
1081}
1082
1083EXPORT_SYMBOL_GPL(ipath_layer_want_buffer);
1084
1085int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr) 296int ipath_layer_send_hdr(struct ipath_devdata *dd, struct ether_header *hdr)
1086{ 297{
1087 int ret = 0; 298 int ret = 0;
@@ -1153,389 +364,3 @@ int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd)
1153} 364}
1154 365
1155EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int); 366EXPORT_SYMBOL_GPL(ipath_layer_set_piointbufavail_int);
1156
1157int ipath_layer_enable_timer(struct ipath_devdata *dd)
1158{
1159 /*
1160 * HT-400 has a design flaw where the chip and kernel idea
1161 * of the tail register don't always agree, and therefore we won't
1162 * get an interrupt on the next packet received.
1163 * If the board supports per packet receive interrupts, use it.
1164 * Otherwise, the timer function periodically checks for packets
1165 * to cover this case.
1166 * Either way, the timer is needed for verbs layer related
1167 * processing.
1168 */
1169 if (dd->ipath_flags & IPATH_GPIO_INTR) {
1170 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1171 0x2074076542310ULL);
1172 /* Enable GPIO bit 2 interrupt */
1173 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1174 (u64) (1 << 2));
1175 }
1176
1177 init_timer(&dd->verbs_layer.l_timer);
1178 dd->verbs_layer.l_timer.function = __ipath_verbs_timer;
1179 dd->verbs_layer.l_timer.data = (unsigned long)dd;
1180 dd->verbs_layer.l_timer.expires = jiffies + 1;
1181 add_timer(&dd->verbs_layer.l_timer);
1182
1183 return 0;
1184}
1185
1186EXPORT_SYMBOL_GPL(ipath_layer_enable_timer);
1187
1188int ipath_layer_disable_timer(struct ipath_devdata *dd)
1189{
1190 /* Disable GPIO bit 2 interrupt */
1191 if (dd->ipath_flags & IPATH_GPIO_INTR)
1192 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
1193
1194 del_timer_sync(&dd->verbs_layer.l_timer);
1195
1196 return 0;
1197}
1198
1199EXPORT_SYMBOL_GPL(ipath_layer_disable_timer);
1200
1201/**
1202 * ipath_layer_set_verbs_flags - set the verbs layer flags
1203 * @dd: the infinipath device
1204 * @flags: the flags to set
1205 */
1206int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags)
1207{
1208 struct ipath_devdata *ss;
1209 unsigned long lflags;
1210
1211 spin_lock_irqsave(&ipath_devs_lock, lflags);
1212
1213 list_for_each_entry(ss, &ipath_dev_list, ipath_list) {
1214 if (!(ss->ipath_flags & IPATH_INITTED))
1215 continue;
1216 if ((flags & IPATH_VERBS_KERNEL_SMA) &&
1217 !(*ss->ipath_statusp & IPATH_STATUS_SMA))
1218 *ss->ipath_statusp |= IPATH_STATUS_OIB_SMA;
1219 else
1220 *ss->ipath_statusp &= ~IPATH_STATUS_OIB_SMA;
1221 }
1222
1223 spin_unlock_irqrestore(&ipath_devs_lock, lflags);
1224
1225 return 0;
1226}
1227
1228EXPORT_SYMBOL_GPL(ipath_layer_set_verbs_flags);
1229
1230/**
1231 * ipath_layer_get_npkeys - return the size of the PKEY table for port 0
1232 * @dd: the infinipath device
1233 */
1234unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd)
1235{
1236 return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1237}
1238
1239EXPORT_SYMBOL_GPL(ipath_layer_get_npkeys);
1240
1241/**
1242 * ipath_layer_get_pkey - return the indexed PKEY from the port 0 PKEY table
1243 * @dd: the infinipath device
1244 * @index: the PKEY index
1245 */
1246unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index)
1247{
1248 unsigned ret;
1249
1250 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1251 ret = 0;
1252 else
1253 ret = dd->ipath_pd[0]->port_pkeys[index];
1254
1255 return ret;
1256}
1257
1258EXPORT_SYMBOL_GPL(ipath_layer_get_pkey);
1259
1260/**
1261 * ipath_layer_get_pkeys - return the PKEY table for port 0
1262 * @dd: the infinipath device
1263 * @pkeys: the pkey table is placed here
1264 */
1265int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
1266{
1267 struct ipath_portdata *pd = dd->ipath_pd[0];
1268
1269 memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
1270
1271 return 0;
1272}
1273
1274EXPORT_SYMBOL_GPL(ipath_layer_get_pkeys);
1275
1276/**
1277 * rm_pkey - decrecment the reference count for the given PKEY
1278 * @dd: the infinipath device
1279 * @key: the PKEY index
1280 *
1281 * Return true if this was the last reference and the hardware table entry
1282 * needs to be changed.
1283 */
1284static int rm_pkey(struct ipath_devdata *dd, u16 key)
1285{
1286 int i;
1287 int ret;
1288
1289 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
1290 if (dd->ipath_pkeys[i] != key)
1291 continue;
1292 if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
1293 dd->ipath_pkeys[i] = 0;
1294 ret = 1;
1295 goto bail;
1296 }
1297 break;
1298 }
1299
1300 ret = 0;
1301
1302bail:
1303 return ret;
1304}
1305
1306/**
1307 * add_pkey - add the given PKEY to the hardware table
1308 * @dd: the infinipath device
1309 * @key: the PKEY
1310 *
1311 * Return an error code if unable to add the entry, zero if no change,
1312 * or 1 if the hardware PKEY register needs to be updated.
1313 */
1314static int add_pkey(struct ipath_devdata *dd, u16 key)
1315{
1316 int i;
1317 u16 lkey = key & 0x7FFF;
1318 int any = 0;
1319 int ret;
1320
1321 if (lkey == 0x7FFF) {
1322 ret = 0;
1323 goto bail;
1324 }
1325
1326 /* Look for an empty slot or a matching PKEY. */
1327 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
1328 if (!dd->ipath_pkeys[i]) {
1329 any++;
1330 continue;
1331 }
1332 /* If it matches exactly, try to increment the ref count */
1333 if (dd->ipath_pkeys[i] == key) {
1334 if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
1335 ret = 0;
1336 goto bail;
1337 }
1338 /* Lost the race. Look for an empty slot below. */
1339 atomic_dec(&dd->ipath_pkeyrefs[i]);
1340 any++;
1341 }
1342 /*
1343 * It makes no sense to have both the limited and unlimited
1344 * PKEY set at the same time since the unlimited one will
1345 * disable the limited one.
1346 */
1347 if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
1348 ret = -EEXIST;
1349 goto bail;
1350 }
1351 }
1352 if (!any) {
1353 ret = -EBUSY;
1354 goto bail;
1355 }
1356 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
1357 if (!dd->ipath_pkeys[i] &&
1358 atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
1359 /* for ipathstats, etc. */
1360 ipath_stats.sps_pkeys[i] = lkey;
1361 dd->ipath_pkeys[i] = key;
1362 ret = 1;
1363 goto bail;
1364 }
1365 }
1366 ret = -EBUSY;
1367
1368bail:
1369 return ret;
1370}
1371
1372/**
1373 * ipath_layer_set_pkeys - set the PKEY table for port 0
1374 * @dd: the infinipath device
1375 * @pkeys: the PKEY table
1376 */
1377int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 * pkeys)
1378{
1379 struct ipath_portdata *pd;
1380 int i;
1381 int changed = 0;
1382
1383 pd = dd->ipath_pd[0];
1384
1385 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
1386 u16 key = pkeys[i];
1387 u16 okey = pd->port_pkeys[i];
1388
1389 if (key == okey)
1390 continue;
1391 /*
1392 * The value of this PKEY table entry is changing.
1393 * Remove the old entry in the hardware's array of PKEYs.
1394 */
1395 if (okey & 0x7FFF)
1396 changed |= rm_pkey(dd, okey);
1397 if (key & 0x7FFF) {
1398 int ret = add_pkey(dd, key);
1399
1400 if (ret < 0)
1401 key = 0;
1402 else
1403 changed |= ret;
1404 }
1405 pd->port_pkeys[i] = key;
1406 }
1407 if (changed) {
1408 u64 pkey;
1409
1410 pkey = (u64) dd->ipath_pkeys[0] |
1411 ((u64) dd->ipath_pkeys[1] << 16) |
1412 ((u64) dd->ipath_pkeys[2] << 32) |
1413 ((u64) dd->ipath_pkeys[3] << 48);
1414 ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
1415 (unsigned long long) pkey);
1416 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
1417 pkey);
1418 }
1419 return 0;
1420}
1421
1422EXPORT_SYMBOL_GPL(ipath_layer_set_pkeys);
1423
1424/**
1425 * ipath_layer_get_linkdowndefaultstate - get the default linkdown state
1426 * @dd: the infinipath device
1427 *
1428 * Returns zero if the default is POLL, 1 if the default is SLEEP.
1429 */
1430int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd)
1431{
1432 return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
1433}
1434
1435EXPORT_SYMBOL_GPL(ipath_layer_get_linkdowndefaultstate);
1436
1437/**
1438 * ipath_layer_set_linkdowndefaultstate - set the default linkdown state
1439 * @dd: the infinipath device
1440 * @sleep: the new state
1441 *
1442 * Note that this will only take effect when the link state changes.
1443 */
1444int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
1445 int sleep)
1446{
1447 if (sleep)
1448 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
1449 else
1450 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
1451 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1452 dd->ipath_ibcctrl);
1453 return 0;
1454}
1455
1456EXPORT_SYMBOL_GPL(ipath_layer_set_linkdowndefaultstate);
1457
1458int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd)
1459{
1460 return (dd->ipath_ibcctrl >>
1461 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
1462 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
1463}
1464
1465EXPORT_SYMBOL_GPL(ipath_layer_get_phyerrthreshold);
1466
1467/**
1468 * ipath_layer_set_phyerrthreshold - set the physical error threshold
1469 * @dd: the infinipath device
1470 * @n: the new threshold
1471 *
1472 * Note that this will only take effect when the link state changes.
1473 */
1474int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
1475{
1476 unsigned v;
1477
1478 v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
1479 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
1480 if (v != n) {
1481 dd->ipath_ibcctrl &=
1482 ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
1483 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
1484 dd->ipath_ibcctrl |=
1485 (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
1486 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1487 dd->ipath_ibcctrl);
1488 }
1489 return 0;
1490}
1491
1492EXPORT_SYMBOL_GPL(ipath_layer_set_phyerrthreshold);
1493
1494int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd)
1495{
1496 return (dd->ipath_ibcctrl >>
1497 INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
1498 INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
1499}
1500
1501EXPORT_SYMBOL_GPL(ipath_layer_get_overrunthreshold);
1502
1503/**
1504 * ipath_layer_set_overrunthreshold - set the overrun threshold
1505 * @dd: the infinipath device
1506 * @n: the new threshold
1507 *
1508 * Note that this will only take effect when the link state changes.
1509 */
1510int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
1511{
1512 unsigned v;
1513
1514 v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
1515 INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
1516 if (v != n) {
1517 dd->ipath_ibcctrl &=
1518 ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
1519 INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
1520 dd->ipath_ibcctrl |=
1521 (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
1522 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1523 dd->ipath_ibcctrl);
1524 }
1525 return 0;
1526}
1527
1528EXPORT_SYMBOL_GPL(ipath_layer_set_overrunthreshold);
1529
1530int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
1531 size_t namelen)
1532{
1533 return dd->ipath_f_get_boardname(dd, name, namelen);
1534}
1535EXPORT_SYMBOL_GPL(ipath_layer_get_boardname);
1536
1537u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd)
1538{
1539 return dd->ipath_rcvhdrentsize;
1540}
1541EXPORT_SYMBOL_GPL(ipath_layer_get_rcvhdrentsize);
diff --git a/drivers/infiniband/hw/ipath/ipath_layer.h b/drivers/infiniband/hw/ipath/ipath_layer.h
index 71485096fcac..3854a4eae684 100644
--- a/drivers/infiniband/hw/ipath/ipath_layer.h
+++ b/drivers/infiniband/hw/ipath/ipath_layer.h
@@ -40,73 +40,9 @@
40 */ 40 */
41 41
42struct sk_buff; 42struct sk_buff;
43struct ipath_sge_state;
44struct ipath_devdata; 43struct ipath_devdata;
45struct ether_header; 44struct ether_header;
46 45
47struct ipath_layer_counters {
48 u64 symbol_error_counter;
49 u64 link_error_recovery_counter;
50 u64 link_downed_counter;
51 u64 port_rcv_errors;
52 u64 port_rcv_remphys_errors;
53 u64 port_xmit_discards;
54 u64 port_xmit_data;
55 u64 port_rcv_data;
56 u64 port_xmit_packets;
57 u64 port_rcv_packets;
58 u32 local_link_integrity_errors;
59 u32 excessive_buffer_overrun_errors;
60};
61
62/*
63 * A segment is a linear region of low physical memory.
64 * XXX Maybe we should use phys addr here and kmap()/kunmap().
65 * Used by the verbs layer.
66 */
67struct ipath_seg {
68 void *vaddr;
69 size_t length;
70};
71
72/* The number of ipath_segs that fit in a page. */
73#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
74
75struct ipath_segarray {
76 struct ipath_seg segs[IPATH_SEGSZ];
77};
78
79struct ipath_mregion {
80 u64 user_base; /* User's address for this region */
81 u64 iova; /* IB start address of this region */
82 size_t length;
83 u32 lkey;
84 u32 offset; /* offset (bytes) to start of region */
85 int access_flags;
86 u32 max_segs; /* number of ipath_segs in all the arrays */
87 u32 mapsz; /* size of the map array */
88 struct ipath_segarray *map[0]; /* the segments */
89};
90
91/*
92 * These keep track of the copy progress within a memory region.
93 * Used by the verbs layer.
94 */
95struct ipath_sge {
96 struct ipath_mregion *mr;
97 void *vaddr; /* current pointer into the segment */
98 u32 sge_length; /* length of the SGE */
99 u32 length; /* remaining length of the segment */
100 u16 m; /* current index: mr->map[m] */
101 u16 n; /* current index: mr->map[m]->segs[n] */
102};
103
104struct ipath_sge_state {
105 struct ipath_sge *sg_list; /* next SGE to be used if any */
106 struct ipath_sge sge; /* progress state for the current SGE */
107 u8 num_sge;
108};
109
110int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *), 46int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
111 void (*l_remove)(void *), 47 void (*l_remove)(void *),
112 int (*l_intr)(void *, u32), 48 int (*l_intr)(void *, u32),
@@ -114,62 +50,14 @@ int ipath_layer_register(void *(*l_add)(int, struct ipath_devdata *),
114 struct sk_buff *), 50 struct sk_buff *),
115 u16 rcv_opcode, 51 u16 rcv_opcode,
116 int (*l_rcv_lid)(void *, void *)); 52 int (*l_rcv_lid)(void *, void *));
117int ipath_verbs_register(void *(*l_add)(int, struct ipath_devdata *),
118 void (*l_remove)(void *arg),
119 int (*l_piobufavail)(void *arg),
120 void (*l_rcv)(void *arg, void *rhdr,
121 void *data, u32 tlen),
122 void (*l_timer_cb)(void *arg));
123void ipath_layer_unregister(void); 53void ipath_layer_unregister(void);
124void ipath_verbs_unregister(void);
125int ipath_layer_open(struct ipath_devdata *, u32 * pktmax); 54int ipath_layer_open(struct ipath_devdata *, u32 * pktmax);
126u16 ipath_layer_get_lid(struct ipath_devdata *dd); 55u16 ipath_layer_get_lid(struct ipath_devdata *dd);
127int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *); 56int ipath_layer_get_mac(struct ipath_devdata *dd, u8 *);
128u16 ipath_layer_get_bcast(struct ipath_devdata *dd); 57u16 ipath_layer_get_bcast(struct ipath_devdata *dd);
129u32 ipath_layer_get_cr_errpkey(struct ipath_devdata *dd);
130int ipath_layer_set_linkstate(struct ipath_devdata *dd, u8 state);
131int ipath_layer_set_mtu(struct ipath_devdata *, u16);
132int ipath_set_lid(struct ipath_devdata *, u32, u8);
133int ipath_layer_send_hdr(struct ipath_devdata *dd, 58int ipath_layer_send_hdr(struct ipath_devdata *dd,
134 struct ether_header *hdr); 59 struct ether_header *hdr);
135int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
136 u32 * hdr, u32 len, struct ipath_sge_state *ss);
137int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd); 60int ipath_layer_set_piointbufavail_int(struct ipath_devdata *dd);
138int ipath_layer_get_boardname(struct ipath_devdata *dd, char *name,
139 size_t namelen);
140int ipath_layer_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
141 u64 *rwords, u64 *spkts, u64 *rpkts,
142 u64 *xmit_wait);
143int ipath_layer_get_counters(struct ipath_devdata *dd,
144 struct ipath_layer_counters *cntrs);
145int ipath_layer_want_buffer(struct ipath_devdata *dd);
146int ipath_layer_set_guid(struct ipath_devdata *, __be64 guid);
147__be64 ipath_layer_get_guid(struct ipath_devdata *);
148u32 ipath_layer_get_nguid(struct ipath_devdata *);
149u32 ipath_layer_get_majrev(struct ipath_devdata *);
150u32 ipath_layer_get_minrev(struct ipath_devdata *);
151u32 ipath_layer_get_pcirev(struct ipath_devdata *);
152u32 ipath_layer_get_flags(struct ipath_devdata *dd);
153struct device *ipath_layer_get_device(struct ipath_devdata *dd);
154u16 ipath_layer_get_deviceid(struct ipath_devdata *dd);
155u32 ipath_layer_get_vendorid(struct ipath_devdata *);
156u64 ipath_layer_get_lastibcstat(struct ipath_devdata *dd);
157u32 ipath_layer_get_ibmtu(struct ipath_devdata *dd);
158int ipath_layer_enable_timer(struct ipath_devdata *dd);
159int ipath_layer_disable_timer(struct ipath_devdata *dd);
160int ipath_layer_set_verbs_flags(struct ipath_devdata *dd, unsigned flags);
161unsigned ipath_layer_get_npkeys(struct ipath_devdata *dd);
162unsigned ipath_layer_get_pkey(struct ipath_devdata *dd, unsigned index);
163int ipath_layer_get_pkeys(struct ipath_devdata *dd, u16 *pkeys);
164int ipath_layer_set_pkeys(struct ipath_devdata *dd, u16 *pkeys);
165int ipath_layer_get_linkdowndefaultstate(struct ipath_devdata *dd);
166int ipath_layer_set_linkdowndefaultstate(struct ipath_devdata *dd,
167 int sleep);
168int ipath_layer_get_phyerrthreshold(struct ipath_devdata *dd);
169int ipath_layer_set_phyerrthreshold(struct ipath_devdata *dd, unsigned n);
170int ipath_layer_get_overrunthreshold(struct ipath_devdata *dd);
171int ipath_layer_set_overrunthreshold(struct ipath_devdata *dd, unsigned n);
172u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
173 61
174/* ipath_ether interrupt values */ 62/* ipath_ether interrupt values */
175#define IPATH_LAYER_INT_IF_UP 0x2 63#define IPATH_LAYER_INT_IF_UP 0x2
@@ -178,9 +66,6 @@ u32 ipath_layer_get_rcvhdrentsize(struct ipath_devdata *dd);
178#define IPATH_LAYER_INT_SEND_CONTINUE 0x10 66#define IPATH_LAYER_INT_SEND_CONTINUE 0x10
179#define IPATH_LAYER_INT_BCAST 0x40 67#define IPATH_LAYER_INT_BCAST 0x40
180 68
181/* _verbs_layer.l_flags */
182#define IPATH_VERBS_KERNEL_SMA 0x1
183
184extern unsigned ipath_debug; /* debugging bit mask */ 69extern unsigned ipath_debug; /* debugging bit mask */
185 70
186#endif /* _IPATH_LAYER_H */ 71#endif /* _IPATH_LAYER_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index d3402341b7d0..72d1db89db8f 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -101,15 +101,15 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
101 nip->num_ports = ibdev->phys_port_cnt; 101 nip->num_ports = ibdev->phys_port_cnt;
102 /* This is already in network order */ 102 /* This is already in network order */
103 nip->sys_guid = to_idev(ibdev)->sys_image_guid; 103 nip->sys_guid = to_idev(ibdev)->sys_image_guid;
104 nip->node_guid = ipath_layer_get_guid(dd); 104 nip->node_guid = dd->ipath_guid;
105 nip->port_guid = nip->sys_guid; 105 nip->port_guid = nip->sys_guid;
106 nip->partition_cap = cpu_to_be16(ipath_layer_get_npkeys(dd)); 106 nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
107 nip->device_id = cpu_to_be16(ipath_layer_get_deviceid(dd)); 107 nip->device_id = cpu_to_be16(dd->ipath_deviceid);
108 majrev = ipath_layer_get_majrev(dd); 108 majrev = dd->ipath_majrev;
109 minrev = ipath_layer_get_minrev(dd); 109 minrev = dd->ipath_minrev;
110 nip->revision = cpu_to_be32((majrev << 16) | minrev); 110 nip->revision = cpu_to_be32((majrev << 16) | minrev);
111 nip->local_port_num = port; 111 nip->local_port_num = port;
112 vendor = ipath_layer_get_vendorid(dd); 112 vendor = dd->ipath_vendorid;
113 nip->vendor_id[0] = 0; 113 nip->vendor_id[0] = 0;
114 nip->vendor_id[1] = vendor >> 8; 114 nip->vendor_id[1] = vendor >> 8;
115 nip->vendor_id[2] = vendor; 115 nip->vendor_id[2] = vendor;
@@ -133,13 +133,89 @@ static int recv_subn_get_guidinfo(struct ib_smp *smp,
133 */ 133 */
134 if (startgx == 0) 134 if (startgx == 0)
135 /* The first is a copy of the read-only HW GUID. */ 135 /* The first is a copy of the read-only HW GUID. */
136 *p = ipath_layer_get_guid(to_idev(ibdev)->dd); 136 *p = to_idev(ibdev)->dd->ipath_guid;
137 else 137 else
138 smp->status |= IB_SMP_INVALID_FIELD; 138 smp->status |= IB_SMP_INVALID_FIELD;
139 139
140 return reply(smp); 140 return reply(smp);
141} 141}
142 142
143
144static int get_overrunthreshold(struct ipath_devdata *dd)
145{
146 return (dd->ipath_ibcctrl >>
147 INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
148 INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
149}
150
151/**
152 * set_overrunthreshold - set the overrun threshold
153 * @dd: the infinipath device
154 * @n: the new threshold
155 *
156 * Note that this will only take effect when the link state changes.
157 */
158static int set_overrunthreshold(struct ipath_devdata *dd, unsigned n)
159{
160 unsigned v;
161
162 v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT) &
163 INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK;
164 if (v != n) {
165 dd->ipath_ibcctrl &=
166 ~(INFINIPATH_IBCC_OVERRUNTHRESHOLD_MASK <<
167 INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT);
168 dd->ipath_ibcctrl |=
169 (u64) n << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
170 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
171 dd->ipath_ibcctrl);
172 }
173 return 0;
174}
175
176static int get_phyerrthreshold(struct ipath_devdata *dd)
177{
178 return (dd->ipath_ibcctrl >>
179 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
180 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
181}
182
183/**
184 * set_phyerrthreshold - set the physical error threshold
185 * @dd: the infinipath device
186 * @n: the new threshold
187 *
188 * Note that this will only take effect when the link state changes.
189 */
190static int set_phyerrthreshold(struct ipath_devdata *dd, unsigned n)
191{
192 unsigned v;
193
194 v = (dd->ipath_ibcctrl >> INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
195 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
196 if (v != n) {
197 dd->ipath_ibcctrl &=
198 ~(INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK <<
199 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT);
200 dd->ipath_ibcctrl |=
201 (u64) n << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
202 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
203 dd->ipath_ibcctrl);
204 }
205 return 0;
206}
207
208/**
209 * get_linkdowndefaultstate - get the default linkdown state
210 * @dd: the infinipath device
211 *
212 * Returns zero if the default is POLL, 1 if the default is SLEEP.
213 */
214static int get_linkdowndefaultstate(struct ipath_devdata *dd)
215{
216 return !!(dd->ipath_ibcctrl & INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE);
217}
218
143static int recv_subn_get_portinfo(struct ib_smp *smp, 219static int recv_subn_get_portinfo(struct ib_smp *smp,
144 struct ib_device *ibdev, u8 port) 220 struct ib_device *ibdev, u8 port)
145{ 221{
@@ -166,7 +242,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
166 (dev->mkeyprot_resv_lmc >> 6) == 0) 242 (dev->mkeyprot_resv_lmc >> 6) == 0)
167 pip->mkey = dev->mkey; 243 pip->mkey = dev->mkey;
168 pip->gid_prefix = dev->gid_prefix; 244 pip->gid_prefix = dev->gid_prefix;
169 lid = ipath_layer_get_lid(dev->dd); 245 lid = dev->dd->ipath_lid;
170 pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; 246 pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
171 pip->sm_lid = cpu_to_be16(dev->sm_lid); 247 pip->sm_lid = cpu_to_be16(dev->sm_lid);
172 pip->cap_mask = cpu_to_be32(dev->port_cap_flags); 248 pip->cap_mask = cpu_to_be32(dev->port_cap_flags);
@@ -177,14 +253,14 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
177 pip->link_width_supported = 3; /* 1x or 4x */ 253 pip->link_width_supported = 3; /* 1x or 4x */
178 pip->link_width_active = 2; /* 4x */ 254 pip->link_width_active = 2; /* 4x */
179 pip->linkspeed_portstate = 0x10; /* 2.5Gbps */ 255 pip->linkspeed_portstate = 0x10; /* 2.5Gbps */
180 ibcstat = ipath_layer_get_lastibcstat(dev->dd); 256 ibcstat = dev->dd->ipath_lastibcstat;
181 pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1; 257 pip->linkspeed_portstate |= ((ibcstat >> 4) & 0x3) + 1;
182 pip->portphysstate_linkdown = 258 pip->portphysstate_linkdown =
183 (ipath_cvt_physportstate[ibcstat & 0xf] << 4) | 259 (ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
184 (ipath_layer_get_linkdowndefaultstate(dev->dd) ? 1 : 2); 260 (get_linkdowndefaultstate(dev->dd) ? 1 : 2);
185 pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc; 261 pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
186 pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */ 262 pip->linkspeedactive_enabled = 0x11; /* 2.5Gbps, 2.5Gbps */
187 switch (ipath_layer_get_ibmtu(dev->dd)) { 263 switch (dev->dd->ipath_ibmtu) {
188 case 4096: 264 case 4096:
189 mtu = IB_MTU_4096; 265 mtu = IB_MTU_4096;
190 break; 266 break;
@@ -217,7 +293,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
217 pip->mkey_violations = cpu_to_be16(dev->mkey_violations); 293 pip->mkey_violations = cpu_to_be16(dev->mkey_violations);
218 /* P_KeyViolations are counted by hardware. */ 294 /* P_KeyViolations are counted by hardware. */
219 pip->pkey_violations = 295 pip->pkey_violations =
220 cpu_to_be16((ipath_layer_get_cr_errpkey(dev->dd) - 296 cpu_to_be16((ipath_get_cr_errpkey(dev->dd) -
221 dev->z_pkey_violations) & 0xFFFF); 297 dev->z_pkey_violations) & 0xFFFF);
222 pip->qkey_violations = cpu_to_be16(dev->qkey_violations); 298 pip->qkey_violations = cpu_to_be16(dev->qkey_violations);
223 /* Only the hardware GUID is supported for now */ 299 /* Only the hardware GUID is supported for now */
@@ -226,8 +302,8 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
226 /* 32.768 usec. response time (guessing) */ 302 /* 32.768 usec. response time (guessing) */
227 pip->resv_resptimevalue = 3; 303 pip->resv_resptimevalue = 3;
228 pip->localphyerrors_overrunerrors = 304 pip->localphyerrors_overrunerrors =
229 (ipath_layer_get_phyerrthreshold(dev->dd) << 4) | 305 (get_phyerrthreshold(dev->dd) << 4) |
230 ipath_layer_get_overrunthreshold(dev->dd); 306 get_overrunthreshold(dev->dd);
231 /* pip->max_credit_hint; */ 307 /* pip->max_credit_hint; */
232 /* pip->link_roundtrip_latency[3]; */ 308 /* pip->link_roundtrip_latency[3]; */
233 309
@@ -237,6 +313,20 @@ bail:
237 return ret; 313 return ret;
238} 314}
239 315
316/**
317 * get_pkeys - return the PKEY table for port 0
318 * @dd: the infinipath device
319 * @pkeys: the pkey table is placed here
320 */
321static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
322{
323 struct ipath_portdata *pd = dd->ipath_pd[0];
324
325 memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
326
327 return 0;
328}
329
240static int recv_subn_get_pkeytable(struct ib_smp *smp, 330static int recv_subn_get_pkeytable(struct ib_smp *smp,
241 struct ib_device *ibdev) 331 struct ib_device *ibdev)
242{ 332{
@@ -249,9 +339,9 @@ static int recv_subn_get_pkeytable(struct ib_smp *smp,
249 memset(smp->data, 0, sizeof(smp->data)); 339 memset(smp->data, 0, sizeof(smp->data));
250 if (startpx == 0) { 340 if (startpx == 0) {
251 struct ipath_ibdev *dev = to_idev(ibdev); 341 struct ipath_ibdev *dev = to_idev(ibdev);
252 unsigned i, n = ipath_layer_get_npkeys(dev->dd); 342 unsigned i, n = ipath_get_npkeys(dev->dd);
253 343
254 ipath_layer_get_pkeys(dev->dd, p); 344 get_pkeys(dev->dd, p);
255 345
256 for (i = 0; i < n; i++) 346 for (i = 0; i < n; i++)
257 q[i] = cpu_to_be16(p[i]); 347 q[i] = cpu_to_be16(p[i]);
@@ -269,6 +359,24 @@ static int recv_subn_set_guidinfo(struct ib_smp *smp,
269} 359}
270 360
271/** 361/**
362 * set_linkdowndefaultstate - set the default linkdown state
363 * @dd: the infinipath device
364 * @sleep: the new state
365 *
366 * Note that this will only take effect when the link state changes.
367 */
368static int set_linkdowndefaultstate(struct ipath_devdata *dd, int sleep)
369{
370 if (sleep)
371 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
372 else
373 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE;
374 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
375 dd->ipath_ibcctrl);
376 return 0;
377}
378
379/**
272 * recv_subn_set_portinfo - set port information 380 * recv_subn_set_portinfo - set port information
273 * @smp: the incoming SM packet 381 * @smp: the incoming SM packet
274 * @ibdev: the infiniband device 382 * @ibdev: the infiniband device
@@ -290,7 +398,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
290 u8 state; 398 u8 state;
291 u16 lstate; 399 u16 lstate;
292 u32 mtu; 400 u32 mtu;
293 int ret; 401 int ret, ore;
294 402
295 if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt) 403 if (be32_to_cpu(smp->attr_mod) > ibdev->phys_port_cnt)
296 goto err; 404 goto err;
@@ -304,7 +412,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
304 dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); 412 dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
305 413
306 lid = be16_to_cpu(pip->lid); 414 lid = be16_to_cpu(pip->lid);
307 if (lid != ipath_layer_get_lid(dev->dd)) { 415 if (lid != dev->dd->ipath_lid) {
308 /* Must be a valid unicast LID address. */ 416 /* Must be a valid unicast LID address. */
309 if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE) 417 if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
310 goto err; 418 goto err;
@@ -342,11 +450,11 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
342 case 0: /* NOP */ 450 case 0: /* NOP */
343 break; 451 break;
344 case 1: /* SLEEP */ 452 case 1: /* SLEEP */
345 if (ipath_layer_set_linkdowndefaultstate(dev->dd, 1)) 453 if (set_linkdowndefaultstate(dev->dd, 1))
346 goto err; 454 goto err;
347 break; 455 break;
348 case 2: /* POLL */ 456 case 2: /* POLL */
349 if (ipath_layer_set_linkdowndefaultstate(dev->dd, 0)) 457 if (set_linkdowndefaultstate(dev->dd, 0))
350 goto err; 458 goto err;
351 break; 459 break;
352 default: 460 default:
@@ -376,7 +484,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
376 /* XXX We have already partially updated our state! */ 484 /* XXX We have already partially updated our state! */
377 goto err; 485 goto err;
378 } 486 }
379 ipath_layer_set_mtu(dev->dd, mtu); 487 ipath_set_mtu(dev->dd, mtu);
380 488
381 dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF; 489 dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
382 490
@@ -392,20 +500,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
392 * later. 500 * later.
393 */ 501 */
394 if (pip->pkey_violations == 0) 502 if (pip->pkey_violations == 0)
395 dev->z_pkey_violations = 503 dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
396 ipath_layer_get_cr_errpkey(dev->dd);
397 504
398 if (pip->qkey_violations == 0) 505 if (pip->qkey_violations == 0)
399 dev->qkey_violations = 0; 506 dev->qkey_violations = 0;
400 507
401 if (ipath_layer_set_phyerrthreshold( 508 ore = pip->localphyerrors_overrunerrors;
402 dev->dd, 509 if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
403 (pip->localphyerrors_overrunerrors >> 4) & 0xF))
404 goto err; 510 goto err;
405 511
406 if (ipath_layer_set_overrunthreshold( 512 if (set_overrunthreshold(dev->dd, (ore & 0xF)))
407 dev->dd,
408 (pip->localphyerrors_overrunerrors & 0xF)))
409 goto err; 513 goto err;
410 514
411 dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; 515 dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -423,7 +527,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
423 * is down or is being set to down. 527 * is down or is being set to down.
424 */ 528 */
425 state = pip->linkspeed_portstate & 0xF; 529 state = pip->linkspeed_portstate & 0xF;
426 flags = ipath_layer_get_flags(dev->dd); 530 flags = dev->dd->ipath_flags;
427 lstate = (pip->portphysstate_linkdown >> 4) & 0xF; 531 lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
428 if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) 532 if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
429 goto err; 533 goto err;
@@ -439,7 +543,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
439 /* FALLTHROUGH */ 543 /* FALLTHROUGH */
440 case IB_PORT_DOWN: 544 case IB_PORT_DOWN:
441 if (lstate == 0) 545 if (lstate == 0)
442 if (ipath_layer_get_linkdowndefaultstate(dev->dd)) 546 if (get_linkdowndefaultstate(dev->dd))
443 lstate = IPATH_IB_LINKDOWN_SLEEP; 547 lstate = IPATH_IB_LINKDOWN_SLEEP;
444 else 548 else
445 lstate = IPATH_IB_LINKDOWN; 549 lstate = IPATH_IB_LINKDOWN;
@@ -451,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
451 lstate = IPATH_IB_LINKDOWN_DISABLE; 555 lstate = IPATH_IB_LINKDOWN_DISABLE;
452 else 556 else
453 goto err; 557 goto err;
454 ipath_layer_set_linkstate(dev->dd, lstate); 558 ipath_set_linkstate(dev->dd, lstate);
455 if (flags & IPATH_LINKACTIVE) { 559 if (flags & IPATH_LINKACTIVE) {
456 event.event = IB_EVENT_PORT_ERR; 560 event.event = IB_EVENT_PORT_ERR;
457 ib_dispatch_event(&event); 561 ib_dispatch_event(&event);
@@ -460,7 +564,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
460 case IB_PORT_ARMED: 564 case IB_PORT_ARMED:
461 if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE))) 565 if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
462 break; 566 break;
463 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKARM); 567 ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
464 if (flags & IPATH_LINKACTIVE) { 568 if (flags & IPATH_LINKACTIVE) {
465 event.event = IB_EVENT_PORT_ERR; 569 event.event = IB_EVENT_PORT_ERR;
466 ib_dispatch_event(&event); 570 ib_dispatch_event(&event);
@@ -469,7 +573,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
469 case IB_PORT_ACTIVE: 573 case IB_PORT_ACTIVE:
470 if (!(flags & IPATH_LINKARMED)) 574 if (!(flags & IPATH_LINKARMED))
471 break; 575 break;
472 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE); 576 ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
473 event.event = IB_EVENT_PORT_ACTIVE; 577 event.event = IB_EVENT_PORT_ACTIVE;
474 ib_dispatch_event(&event); 578 ib_dispatch_event(&event);
475 break; 579 break;
@@ -493,6 +597,152 @@ done:
493 return ret; 597 return ret;
494} 598}
495 599
600/**
601 * rm_pkey - decrecment the reference count for the given PKEY
602 * @dd: the infinipath device
603 * @key: the PKEY index
604 *
605 * Return true if this was the last reference and the hardware table entry
606 * needs to be changed.
607 */
608static int rm_pkey(struct ipath_devdata *dd, u16 key)
609{
610 int i;
611 int ret;
612
613 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
614 if (dd->ipath_pkeys[i] != key)
615 continue;
616 if (atomic_dec_and_test(&dd->ipath_pkeyrefs[i])) {
617 dd->ipath_pkeys[i] = 0;
618 ret = 1;
619 goto bail;
620 }
621 break;
622 }
623
624 ret = 0;
625
626bail:
627 return ret;
628}
629
630/**
631 * add_pkey - add the given PKEY to the hardware table
632 * @dd: the infinipath device
633 * @key: the PKEY
634 *
635 * Return an error code if unable to add the entry, zero if no change,
636 * or 1 if the hardware PKEY register needs to be updated.
637 */
638static int add_pkey(struct ipath_devdata *dd, u16 key)
639{
640 int i;
641 u16 lkey = key & 0x7FFF;
642 int any = 0;
643 int ret;
644
645 if (lkey == 0x7FFF) {
646 ret = 0;
647 goto bail;
648 }
649
650 /* Look for an empty slot or a matching PKEY. */
651 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
652 if (!dd->ipath_pkeys[i]) {
653 any++;
654 continue;
655 }
656 /* If it matches exactly, try to increment the ref count */
657 if (dd->ipath_pkeys[i] == key) {
658 if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
659 ret = 0;
660 goto bail;
661 }
662 /* Lost the race. Look for an empty slot below. */
663 atomic_dec(&dd->ipath_pkeyrefs[i]);
664 any++;
665 }
666 /*
667 * It makes no sense to have both the limited and unlimited
668 * PKEY set at the same time since the unlimited one will
669 * disable the limited one.
670 */
671 if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
672 ret = -EEXIST;
673 goto bail;
674 }
675 }
676 if (!any) {
677 ret = -EBUSY;
678 goto bail;
679 }
680 for (i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
681 if (!dd->ipath_pkeys[i] &&
682 atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
683 /* for ipathstats, etc. */
684 ipath_stats.sps_pkeys[i] = lkey;
685 dd->ipath_pkeys[i] = key;
686 ret = 1;
687 goto bail;
688 }
689 }
690 ret = -EBUSY;
691
692bail:
693 return ret;
694}
695
696/**
697 * set_pkeys - set the PKEY table for port 0
698 * @dd: the infinipath device
699 * @pkeys: the PKEY table
700 */
701static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
702{
703 struct ipath_portdata *pd;
704 int i;
705 int changed = 0;
706
707 pd = dd->ipath_pd[0];
708
709 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
710 u16 key = pkeys[i];
711 u16 okey = pd->port_pkeys[i];
712
713 if (key == okey)
714 continue;
715 /*
716 * The value of this PKEY table entry is changing.
717 * Remove the old entry in the hardware's array of PKEYs.
718 */
719 if (okey & 0x7FFF)
720 changed |= rm_pkey(dd, okey);
721 if (key & 0x7FFF) {
722 int ret = add_pkey(dd, key);
723
724 if (ret < 0)
725 key = 0;
726 else
727 changed |= ret;
728 }
729 pd->port_pkeys[i] = key;
730 }
731 if (changed) {
732 u64 pkey;
733
734 pkey = (u64) dd->ipath_pkeys[0] |
735 ((u64) dd->ipath_pkeys[1] << 16) |
736 ((u64) dd->ipath_pkeys[2] << 32) |
737 ((u64) dd->ipath_pkeys[3] << 48);
738 ipath_cdbg(VERBOSE, "p0 new pkey reg %llx\n",
739 (unsigned long long) pkey);
740 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
741 pkey);
742 }
743 return 0;
744}
745
496static int recv_subn_set_pkeytable(struct ib_smp *smp, 746static int recv_subn_set_pkeytable(struct ib_smp *smp,
497 struct ib_device *ibdev) 747 struct ib_device *ibdev)
498{ 748{
@@ -500,13 +750,12 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
500 __be16 *p = (__be16 *) smp->data; 750 __be16 *p = (__be16 *) smp->data;
501 u16 *q = (u16 *) smp->data; 751 u16 *q = (u16 *) smp->data;
502 struct ipath_ibdev *dev = to_idev(ibdev); 752 struct ipath_ibdev *dev = to_idev(ibdev);
503 unsigned i, n = ipath_layer_get_npkeys(dev->dd); 753 unsigned i, n = ipath_get_npkeys(dev->dd);
504 754
505 for (i = 0; i < n; i++) 755 for (i = 0; i < n; i++)
506 q[i] = be16_to_cpu(p[i]); 756 q[i] = be16_to_cpu(p[i]);
507 757
508 if (startpx != 0 || 758 if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
509 ipath_layer_set_pkeys(dev->dd, q) != 0)
510 smp->status |= IB_SMP_INVALID_FIELD; 759 smp->status |= IB_SMP_INVALID_FIELD;
511 760
512 return recv_subn_get_pkeytable(smp, ibdev); 761 return recv_subn_get_pkeytable(smp, ibdev);
@@ -844,10 +1093,10 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp,
844 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) 1093 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
845 pmp->data; 1094 pmp->data;
846 struct ipath_ibdev *dev = to_idev(ibdev); 1095 struct ipath_ibdev *dev = to_idev(ibdev);
847 struct ipath_layer_counters cntrs; 1096 struct ipath_verbs_counters cntrs;
848 u8 port_select = p->port_select; 1097 u8 port_select = p->port_select;
849 1098
850 ipath_layer_get_counters(dev->dd, &cntrs); 1099 ipath_get_counters(dev->dd, &cntrs);
851 1100
852 /* Adjust counters for any resets done. */ 1101 /* Adjust counters for any resets done. */
853 cntrs.symbol_error_counter -= dev->z_symbol_error_counter; 1102 cntrs.symbol_error_counter -= dev->z_symbol_error_counter;
@@ -944,8 +1193,8 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp,
944 u64 swords, rwords, spkts, rpkts, xwait; 1193 u64 swords, rwords, spkts, rpkts, xwait;
945 u8 port_select = p->port_select; 1194 u8 port_select = p->port_select;
946 1195
947 ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, 1196 ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
948 &rpkts, &xwait); 1197 &rpkts, &xwait);
949 1198
950 /* Adjust counters for any resets done. */ 1199 /* Adjust counters for any resets done. */
951 swords -= dev->z_port_xmit_data; 1200 swords -= dev->z_port_xmit_data;
@@ -978,13 +1227,13 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp,
978 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) 1227 struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
979 pmp->data; 1228 pmp->data;
980 struct ipath_ibdev *dev = to_idev(ibdev); 1229 struct ipath_ibdev *dev = to_idev(ibdev);
981 struct ipath_layer_counters cntrs; 1230 struct ipath_verbs_counters cntrs;
982 1231
983 /* 1232 /*
984 * Since the HW doesn't support clearing counters, we save the 1233 * Since the HW doesn't support clearing counters, we save the
985 * current count and subtract it from future responses. 1234 * current count and subtract it from future responses.
986 */ 1235 */
987 ipath_layer_get_counters(dev->dd, &cntrs); 1236 ipath_get_counters(dev->dd, &cntrs);
988 1237
989 if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) 1238 if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR)
990 dev->z_symbol_error_counter = cntrs.symbol_error_counter; 1239 dev->z_symbol_error_counter = cntrs.symbol_error_counter;
@@ -1041,8 +1290,8 @@ static int recv_pma_set_portcounters_ext(struct ib_perf *pmp,
1041 struct ipath_ibdev *dev = to_idev(ibdev); 1290 struct ipath_ibdev *dev = to_idev(ibdev);
1042 u64 swords, rwords, spkts, rpkts, xwait; 1291 u64 swords, rwords, spkts, rpkts, xwait;
1043 1292
1044 ipath_layer_snapshot_counters(dev->dd, &swords, &rwords, &spkts, 1293 ipath_snapshot_counters(dev->dd, &swords, &rwords, &spkts,
1045 &rpkts, &xwait); 1294 &rpkts, &xwait);
1046 1295
1047 if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) 1296 if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA)
1048 dev->z_port_xmit_data = swords; 1297 dev->z_port_xmit_data = swords;
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
new file mode 100644
index 000000000000..11b7378ff214
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -0,0 +1,122 @@
1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/config.h>
34#include <linux/module.h>
35#include <linux/vmalloc.h>
36#include <linux/mm.h>
37#include <linux/errno.h>
38#include <asm/pgtable.h>
39
40#include "ipath_verbs.h"
41
42/**
43 * ipath_release_mmap_info - free mmap info structure
44 * @ref: a pointer to the kref within struct ipath_mmap_info
45 */
46void ipath_release_mmap_info(struct kref *ref)
47{
48 struct ipath_mmap_info *ip =
49 container_of(ref, struct ipath_mmap_info, ref);
50
51 vfree(ip->obj);
52 kfree(ip);
53}
54
55/*
56 * open and close keep track of how many times the CQ is mapped,
57 * to avoid releasing it.
58 */
59static void ipath_vma_open(struct vm_area_struct *vma)
60{
61 struct ipath_mmap_info *ip = vma->vm_private_data;
62
63 kref_get(&ip->ref);
64 ip->mmap_cnt++;
65}
66
67static void ipath_vma_close(struct vm_area_struct *vma)
68{
69 struct ipath_mmap_info *ip = vma->vm_private_data;
70
71 ip->mmap_cnt--;
72 kref_put(&ip->ref, ipath_release_mmap_info);
73}
74
75static struct vm_operations_struct ipath_vm_ops = {
76 .open = ipath_vma_open,
77 .close = ipath_vma_close,
78};
79
80/**
81 * ipath_mmap - create a new mmap region
82 * @context: the IB user context of the process making the mmap() call
83 * @vma: the VMA to be initialized
84 * Return zero if the mmap is OK. Otherwise, return an errno.
85 */
86int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
87{
88 struct ipath_ibdev *dev = to_idev(context->device);
89 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
90 unsigned long size = vma->vm_end - vma->vm_start;
91 struct ipath_mmap_info *ip, **pp;
92 int ret = -EINVAL;
93
94 /*
95 * Search the device's list of objects waiting for a mmap call.
96 * Normally, this list is very short since a call to create a
97 * CQ, QP, or SRQ is soon followed by a call to mmap().
98 */
99 spin_lock_irq(&dev->pending_lock);
100 for (pp = &dev->pending_mmaps; (ip = *pp); pp = &ip->next) {
101 /* Only the creator is allowed to mmap the object */
102 if (context != ip->context || (void *) offset != ip->obj)
103 continue;
104 /* Don't allow a mmap larger than the object. */
105 if (size > ip->size)
106 break;
107
108 *pp = ip->next;
109 spin_unlock_irq(&dev->pending_lock);
110
111 ret = remap_vmalloc_range(vma, ip->obj, 0);
112 if (ret)
113 goto done;
114 vma->vm_ops = &ipath_vm_ops;
115 vma->vm_private_data = ip;
116 ipath_vma_open(vma);
117 goto done;
118 }
119 spin_unlock_irq(&dev->pending_lock);
120done:
121 return ret;
122}
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 4ac31a5da330..b36f6fb3e37a 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -36,6 +36,18 @@
36 36
37#include "ipath_verbs.h" 37#include "ipath_verbs.h"
38 38
39/* Fast memory region */
40struct ipath_fmr {
41 struct ib_fmr ibfmr;
42 u8 page_shift;
43 struct ipath_mregion mr; /* must be last */
44};
45
46static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
47{
48 return container_of(ibfmr, struct ipath_fmr, ibfmr);
49}
50
39/** 51/**
40 * ipath_get_dma_mr - get a DMA memory region 52 * ipath_get_dma_mr - get a DMA memory region
41 * @pd: protection domain for this memory region 53 * @pd: protection domain for this memory region
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 83e557be591e..224b0f40767f 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -35,7 +35,7 @@
35#include <linux/vmalloc.h> 35#include <linux/vmalloc.h>
36 36
37#include "ipath_verbs.h" 37#include "ipath_verbs.h"
38#include "ipath_common.h" 38#include "ipath_kernel.h"
39 39
40#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) 40#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE)
41#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) 41#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
@@ -44,19 +44,6 @@
44#define find_next_offset(map, off) find_next_zero_bit((map)->page, \ 44#define find_next_offset(map, off) find_next_zero_bit((map)->page, \
45 BITS_PER_PAGE, off) 45 BITS_PER_PAGE, off)
46 46
47#define TRANS_INVALID 0
48#define TRANS_ANY2RST 1
49#define TRANS_RST2INIT 2
50#define TRANS_INIT2INIT 3
51#define TRANS_INIT2RTR 4
52#define TRANS_RTR2RTS 5
53#define TRANS_RTS2RTS 6
54#define TRANS_SQERR2RTS 7
55#define TRANS_ANY2ERR 8
56#define TRANS_RTS2SQD 9 /* XXX Wait for expected ACKs & signal event */
57#define TRANS_SQD2SQD 10 /* error if not drained & parameter change */
58#define TRANS_SQD2RTS 11 /* error if not drained */
59
60/* 47/*
61 * Convert the AETH credit code into the number of credits. 48 * Convert the AETH credit code into the number of credits.
62 */ 49 */
@@ -287,7 +274,7 @@ void ipath_free_all_qps(struct ipath_qp_table *qpt)
287 free_qpn(qpt, qp->ibqp.qp_num); 274 free_qpn(qpt, qp->ibqp.qp_num);
288 if (!atomic_dec_and_test(&qp->refcount) || 275 if (!atomic_dec_and_test(&qp->refcount) ||
289 !ipath_destroy_qp(&qp->ibqp)) 276 !ipath_destroy_qp(&qp->ibqp))
290 _VERBS_INFO("QP memory leak!\n"); 277 ipath_dbg(KERN_INFO "QP memory leak!\n");
291 qp = nqp; 278 qp = nqp;
292 } 279 }
293 } 280 }
@@ -355,8 +342,10 @@ static void ipath_reset_qp(struct ipath_qp *qp)
355 qp->s_last = 0; 342 qp->s_last = 0;
356 qp->s_ssn = 1; 343 qp->s_ssn = 1;
357 qp->s_lsn = 0; 344 qp->s_lsn = 0;
358 qp->r_rq.head = 0; 345 if (qp->r_rq.wq) {
359 qp->r_rq.tail = 0; 346 qp->r_rq.wq->head = 0;
347 qp->r_rq.wq->tail = 0;
348 }
360 qp->r_reuse_sge = 0; 349 qp->r_reuse_sge = 0;
361} 350}
362 351
@@ -373,8 +362,8 @@ void ipath_error_qp(struct ipath_qp *qp)
373 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 362 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
374 struct ib_wc wc; 363 struct ib_wc wc;
375 364
376 _VERBS_INFO("QP%d/%d in error state\n", 365 ipath_dbg(KERN_INFO "QP%d/%d in error state\n",
377 qp->ibqp.qp_num, qp->remote_qpn); 366 qp->ibqp.qp_num, qp->remote_qpn);
378 367
379 spin_lock(&dev->pending_lock); 368 spin_lock(&dev->pending_lock);
380 /* XXX What if its already removed by the timeout code? */ 369 /* XXX What if its already removed by the timeout code? */
@@ -410,15 +399,32 @@ void ipath_error_qp(struct ipath_qp *qp)
410 qp->s_hdrwords = 0; 399 qp->s_hdrwords = 0;
411 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 400 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
412 401
413 wc.opcode = IB_WC_RECV; 402 if (qp->r_rq.wq) {
414 spin_lock(&qp->r_rq.lock); 403 struct ipath_rwq *wq;
415 while (qp->r_rq.tail != qp->r_rq.head) { 404 u32 head;
416 wc.wr_id = get_rwqe_ptr(&qp->r_rq, qp->r_rq.tail)->wr_id; 405 u32 tail;
417 if (++qp->r_rq.tail >= qp->r_rq.size) 406
418 qp->r_rq.tail = 0; 407 spin_lock(&qp->r_rq.lock);
419 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 408
409 /* sanity check pointers before trusting them */
410 wq = qp->r_rq.wq;
411 head = wq->head;
412 if (head >= qp->r_rq.size)
413 head = 0;
414 tail = wq->tail;
415 if (tail >= qp->r_rq.size)
416 tail = 0;
417 wc.opcode = IB_WC_RECV;
418 while (tail != head) {
419 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
420 if (++tail >= qp->r_rq.size)
421 tail = 0;
422 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
423 }
424 wq->tail = tail;
425
426 spin_unlock(&qp->r_rq.lock);
420 } 427 }
421 spin_unlock(&qp->r_rq.lock);
422} 428}
423 429
424/** 430/**
@@ -426,11 +432,12 @@ void ipath_error_qp(struct ipath_qp *qp)
426 * @ibqp: the queue pair who's attributes we're modifying 432 * @ibqp: the queue pair who's attributes we're modifying
427 * @attr: the new attributes 433 * @attr: the new attributes
428 * @attr_mask: the mask of attributes to modify 434 * @attr_mask: the mask of attributes to modify
435 * @udata: user data for ipathverbs.so
429 * 436 *
430 * Returns 0 on success, otherwise returns an errno. 437 * Returns 0 on success, otherwise returns an errno.
431 */ 438 */
432int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 439int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
433 int attr_mask) 440 int attr_mask, struct ib_udata *udata)
434{ 441{
435 struct ipath_ibdev *dev = to_idev(ibqp->device); 442 struct ipath_ibdev *dev = to_idev(ibqp->device);
436 struct ipath_qp *qp = to_iqp(ibqp); 443 struct ipath_qp *qp = to_iqp(ibqp);
@@ -448,19 +455,46 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
448 attr_mask)) 455 attr_mask))
449 goto inval; 456 goto inval;
450 457
451 if (attr_mask & IB_QP_AV) 458 if (attr_mask & IB_QP_AV) {
452 if (attr->ah_attr.dlid == 0 || 459 if (attr->ah_attr.dlid == 0 ||
453 attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE) 460 attr->ah_attr.dlid >= IPATH_MULTICAST_LID_BASE)
454 goto inval; 461 goto inval;
455 462
463 if ((attr->ah_attr.ah_flags & IB_AH_GRH) &&
464 (attr->ah_attr.grh.sgid_index > 1))
465 goto inval;
466 }
467
456 if (attr_mask & IB_QP_PKEY_INDEX) 468 if (attr_mask & IB_QP_PKEY_INDEX)
457 if (attr->pkey_index >= ipath_layer_get_npkeys(dev->dd)) 469 if (attr->pkey_index >= ipath_get_npkeys(dev->dd))
458 goto inval; 470 goto inval;
459 471
460 if (attr_mask & IB_QP_MIN_RNR_TIMER) 472 if (attr_mask & IB_QP_MIN_RNR_TIMER)
461 if (attr->min_rnr_timer > 31) 473 if (attr->min_rnr_timer > 31)
462 goto inval; 474 goto inval;
463 475
476 if (attr_mask & IB_QP_PORT)
477 if (attr->port_num == 0 ||
478 attr->port_num > ibqp->device->phys_port_cnt)
479 goto inval;
480
481 if (attr_mask & IB_QP_PATH_MTU)
482 if (attr->path_mtu > IB_MTU_4096)
483 goto inval;
484
485 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
486 if (attr->max_dest_rd_atomic > 1)
487 goto inval;
488
489 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
490 if (attr->max_rd_atomic > 1)
491 goto inval;
492
493 if (attr_mask & IB_QP_PATH_MIG_STATE)
494 if (attr->path_mig_state != IB_MIG_MIGRATED &&
495 attr->path_mig_state != IB_MIG_REARM)
496 goto inval;
497
464 switch (new_state) { 498 switch (new_state) {
465 case IB_QPS_RESET: 499 case IB_QPS_RESET:
466 ipath_reset_qp(qp); 500 ipath_reset_qp(qp);
@@ -511,6 +545,9 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
511 if (attr_mask & IB_QP_MIN_RNR_TIMER) 545 if (attr_mask & IB_QP_MIN_RNR_TIMER)
512 qp->r_min_rnr_timer = attr->min_rnr_timer; 546 qp->r_min_rnr_timer = attr->min_rnr_timer;
513 547
548 if (attr_mask & IB_QP_TIMEOUT)
549 qp->timeout = attr->timeout;
550
514 if (attr_mask & IB_QP_QKEY) 551 if (attr_mask & IB_QP_QKEY)
515 qp->qkey = attr->qkey; 552 qp->qkey = attr->qkey;
516 553
@@ -543,7 +580,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
543 attr->dest_qp_num = qp->remote_qpn; 580 attr->dest_qp_num = qp->remote_qpn;
544 attr->qp_access_flags = qp->qp_access_flags; 581 attr->qp_access_flags = qp->qp_access_flags;
545 attr->cap.max_send_wr = qp->s_size - 1; 582 attr->cap.max_send_wr = qp->s_size - 1;
546 attr->cap.max_recv_wr = qp->r_rq.size - 1; 583 attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
547 attr->cap.max_send_sge = qp->s_max_sge; 584 attr->cap.max_send_sge = qp->s_max_sge;
548 attr->cap.max_recv_sge = qp->r_rq.max_sge; 585 attr->cap.max_recv_sge = qp->r_rq.max_sge;
549 attr->cap.max_inline_data = 0; 586 attr->cap.max_inline_data = 0;
@@ -557,7 +594,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
557 attr->max_dest_rd_atomic = 1; 594 attr->max_dest_rd_atomic = 1;
558 attr->min_rnr_timer = qp->r_min_rnr_timer; 595 attr->min_rnr_timer = qp->r_min_rnr_timer;
559 attr->port_num = 1; 596 attr->port_num = 1;
560 attr->timeout = 0; 597 attr->timeout = qp->timeout;
561 attr->retry_cnt = qp->s_retry_cnt; 598 attr->retry_cnt = qp->s_retry_cnt;
562 attr->rnr_retry = qp->s_rnr_retry; 599 attr->rnr_retry = qp->s_rnr_retry;
563 attr->alt_port_num = 0; 600 attr->alt_port_num = 0;
@@ -569,9 +606,10 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
569 init_attr->recv_cq = qp->ibqp.recv_cq; 606 init_attr->recv_cq = qp->ibqp.recv_cq;
570 init_attr->srq = qp->ibqp.srq; 607 init_attr->srq = qp->ibqp.srq;
571 init_attr->cap = attr->cap; 608 init_attr->cap = attr->cap;
572 init_attr->sq_sig_type = 609 if (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR))
573 (qp->s_flags & (1 << IPATH_S_SIGNAL_REQ_WR)) 610 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
574 ? IB_SIGNAL_REQ_WR : 0; 611 else
612 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
575 init_attr->qp_type = qp->ibqp.qp_type; 613 init_attr->qp_type = qp->ibqp.qp_type;
576 init_attr->port_num = 1; 614 init_attr->port_num = 1;
577 return 0; 615 return 0;
@@ -596,13 +634,23 @@ __be32 ipath_compute_aeth(struct ipath_qp *qp)
596 } else { 634 } else {
597 u32 min, max, x; 635 u32 min, max, x;
598 u32 credits; 636 u32 credits;
599 637 struct ipath_rwq *wq = qp->r_rq.wq;
638 u32 head;
639 u32 tail;
640
641 /* sanity check pointers before trusting them */
642 head = wq->head;
643 if (head >= qp->r_rq.size)
644 head = 0;
645 tail = wq->tail;
646 if (tail >= qp->r_rq.size)
647 tail = 0;
600 /* 648 /*
601 * Compute the number of credits available (RWQEs). 649 * Compute the number of credits available (RWQEs).
602 * XXX Not holding the r_rq.lock here so there is a small 650 * XXX Not holding the r_rq.lock here so there is a small
603 * chance that the pair of reads are not atomic. 651 * chance that the pair of reads are not atomic.
604 */ 652 */
605 credits = qp->r_rq.head - qp->r_rq.tail; 653 credits = head - tail;
606 if ((int)credits < 0) 654 if ((int)credits < 0)
607 credits += qp->r_rq.size; 655 credits += qp->r_rq.size;
608 /* 656 /*
@@ -679,27 +727,37 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
679 case IB_QPT_UD: 727 case IB_QPT_UD:
680 case IB_QPT_SMI: 728 case IB_QPT_SMI:
681 case IB_QPT_GSI: 729 case IB_QPT_GSI:
682 qp = kmalloc(sizeof(*qp), GFP_KERNEL); 730 sz = sizeof(*qp);
731 if (init_attr->srq) {
732 struct ipath_srq *srq = to_isrq(init_attr->srq);
733
734 sz += sizeof(*qp->r_sg_list) *
735 srq->rq.max_sge;
736 } else
737 sz += sizeof(*qp->r_sg_list) *
738 init_attr->cap.max_recv_sge;
739 qp = kmalloc(sz, GFP_KERNEL);
683 if (!qp) { 740 if (!qp) {
684 vfree(swq);
685 ret = ERR_PTR(-ENOMEM); 741 ret = ERR_PTR(-ENOMEM);
686 goto bail; 742 goto bail_swq;
687 } 743 }
688 if (init_attr->srq) { 744 if (init_attr->srq) {
745 sz = 0;
689 qp->r_rq.size = 0; 746 qp->r_rq.size = 0;
690 qp->r_rq.max_sge = 0; 747 qp->r_rq.max_sge = 0;
691 qp->r_rq.wq = NULL; 748 qp->r_rq.wq = NULL;
749 init_attr->cap.max_recv_wr = 0;
750 init_attr->cap.max_recv_sge = 0;
692 } else { 751 } else {
693 qp->r_rq.size = init_attr->cap.max_recv_wr + 1; 752 qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
694 qp->r_rq.max_sge = init_attr->cap.max_recv_sge; 753 qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
695 sz = (sizeof(struct ipath_sge) * qp->r_rq.max_sge) + 754 sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
696 sizeof(struct ipath_rwqe); 755 sizeof(struct ipath_rwqe);
697 qp->r_rq.wq = vmalloc(qp->r_rq.size * sz); 756 qp->r_rq.wq = vmalloc_user(sizeof(struct ipath_rwq) +
757 qp->r_rq.size * sz);
698 if (!qp->r_rq.wq) { 758 if (!qp->r_rq.wq) {
699 kfree(qp);
700 vfree(swq);
701 ret = ERR_PTR(-ENOMEM); 759 ret = ERR_PTR(-ENOMEM);
702 goto bail; 760 goto bail_qp;
703 } 761 }
704 } 762 }
705 763
@@ -719,24 +777,19 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
719 qp->s_wq = swq; 777 qp->s_wq = swq;
720 qp->s_size = init_attr->cap.max_send_wr + 1; 778 qp->s_size = init_attr->cap.max_send_wr + 1;
721 qp->s_max_sge = init_attr->cap.max_send_sge; 779 qp->s_max_sge = init_attr->cap.max_send_sge;
722 qp->s_flags = init_attr->sq_sig_type == IB_SIGNAL_REQ_WR ? 780 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
723 1 << IPATH_S_SIGNAL_REQ_WR : 0; 781 qp->s_flags = 1 << IPATH_S_SIGNAL_REQ_WR;
782 else
783 qp->s_flags = 0;
724 dev = to_idev(ibpd->device); 784 dev = to_idev(ibpd->device);
725 err = ipath_alloc_qpn(&dev->qp_table, qp, 785 err = ipath_alloc_qpn(&dev->qp_table, qp,
726 init_attr->qp_type); 786 init_attr->qp_type);
727 if (err) { 787 if (err) {
728 vfree(swq);
729 vfree(qp->r_rq.wq);
730 kfree(qp);
731 ret = ERR_PTR(err); 788 ret = ERR_PTR(err);
732 goto bail; 789 goto bail_rwq;
733 } 790 }
791 qp->ip = NULL;
734 ipath_reset_qp(qp); 792 ipath_reset_qp(qp);
735
736 /* Tell the core driver that the kernel SMA is present. */
737 if (init_attr->qp_type == IB_QPT_SMI)
738 ipath_layer_set_verbs_flags(dev->dd,
739 IPATH_VERBS_KERNEL_SMA);
740 break; 793 break;
741 794
742 default: 795 default:
@@ -747,8 +800,63 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
747 800
748 init_attr->cap.max_inline_data = 0; 801 init_attr->cap.max_inline_data = 0;
749 802
803 /*
804 * Return the address of the RWQ as the offset to mmap.
805 * See ipath_mmap() for details.
806 */
807 if (udata && udata->outlen >= sizeof(__u64)) {
808 struct ipath_mmap_info *ip;
809 __u64 offset = (__u64) qp->r_rq.wq;
810 int err;
811
812 err = ib_copy_to_udata(udata, &offset, sizeof(offset));
813 if (err) {
814 ret = ERR_PTR(err);
815 goto bail_rwq;
816 }
817
818 if (qp->r_rq.wq) {
819 /* Allocate info for ipath_mmap(). */
820 ip = kmalloc(sizeof(*ip), GFP_KERNEL);
821 if (!ip) {
822 ret = ERR_PTR(-ENOMEM);
823 goto bail_rwq;
824 }
825 qp->ip = ip;
826 ip->context = ibpd->uobject->context;
827 ip->obj = qp->r_rq.wq;
828 kref_init(&ip->ref);
829 ip->mmap_cnt = 0;
830 ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
831 qp->r_rq.size * sz);
832 spin_lock_irq(&dev->pending_lock);
833 ip->next = dev->pending_mmaps;
834 dev->pending_mmaps = ip;
835 spin_unlock_irq(&dev->pending_lock);
836 }
837 }
838
839 spin_lock(&dev->n_qps_lock);
840 if (dev->n_qps_allocated == ib_ipath_max_qps) {
841 spin_unlock(&dev->n_qps_lock);
842 ret = ERR_PTR(-ENOMEM);
843 goto bail_ip;
844 }
845
846 dev->n_qps_allocated++;
847 spin_unlock(&dev->n_qps_lock);
848
750 ret = &qp->ibqp; 849 ret = &qp->ibqp;
850 goto bail;
751 851
852bail_ip:
853 kfree(qp->ip);
854bail_rwq:
855 vfree(qp->r_rq.wq);
856bail_qp:
857 kfree(qp);
858bail_swq:
859 vfree(swq);
752bail: 860bail:
753 return ret; 861 return ret;
754} 862}
@@ -768,15 +876,12 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
768 struct ipath_ibdev *dev = to_idev(ibqp->device); 876 struct ipath_ibdev *dev = to_idev(ibqp->device);
769 unsigned long flags; 877 unsigned long flags;
770 878
771 /* Tell the core driver that the kernel SMA is gone. */ 879 spin_lock_irqsave(&qp->s_lock, flags);
772 if (qp->ibqp.qp_type == IB_QPT_SMI)
773 ipath_layer_set_verbs_flags(dev->dd, 0);
774
775 spin_lock_irqsave(&qp->r_rq.lock, flags);
776 spin_lock(&qp->s_lock);
777 qp->state = IB_QPS_ERR; 880 qp->state = IB_QPS_ERR;
778 spin_unlock(&qp->s_lock); 881 spin_unlock_irqrestore(&qp->s_lock, flags);
779 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 882 spin_lock(&dev->n_qps_lock);
883 dev->n_qps_allocated--;
884 spin_unlock(&dev->n_qps_lock);
780 885
781 /* Stop the sending tasklet. */ 886 /* Stop the sending tasklet. */
782 tasklet_kill(&qp->s_task); 887 tasklet_kill(&qp->s_task);
@@ -797,8 +902,11 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
797 if (atomic_read(&qp->refcount) != 0) 902 if (atomic_read(&qp->refcount) != 0)
798 ipath_free_qp(&dev->qp_table, qp); 903 ipath_free_qp(&dev->qp_table, qp);
799 904
905 if (qp->ip)
906 kref_put(&qp->ip->ref, ipath_release_mmap_info);
907 else
908 vfree(qp->r_rq.wq);
800 vfree(qp->s_wq); 909 vfree(qp->s_wq);
801 vfree(qp->r_rq.wq);
802 kfree(qp); 910 kfree(qp);
803 return 0; 911 return 0;
804} 912}
@@ -850,8 +958,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
850 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 958 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
851 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 959 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
852 960
853 _VERBS_INFO("Send queue error on QP%d/%d: err: %d\n", 961 ipath_dbg(KERN_INFO "Send queue error on QP%d/%d: err: %d\n",
854 qp->ibqp.qp_num, qp->remote_qpn, wc->status); 962 qp->ibqp.qp_num, qp->remote_qpn, wc->status);
855 963
856 spin_lock(&dev->pending_lock); 964 spin_lock(&dev->pending_lock);
857 /* XXX What if its already removed by the timeout code? */ 965 /* XXX What if its already removed by the timeout code? */
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 774d1615ce2f..a08654042c03 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -32,7 +32,7 @@
32 */ 32 */
33 33
34#include "ipath_verbs.h" 34#include "ipath_verbs.h"
35#include "ipath_common.h" 35#include "ipath_kernel.h"
36 36
37/* cut down ridiculously long IB macro names */ 37/* cut down ridiculously long IB macro names */
38#define OP(x) IB_OPCODE_RC_##x 38#define OP(x) IB_OPCODE_RC_##x
@@ -540,7 +540,7 @@ static void send_rc_ack(struct ipath_qp *qp)
540 lrh0 = IPATH_LRH_GRH; 540 lrh0 = IPATH_LRH_GRH;
541 } 541 }
542 /* read pkey_index w/o lock (its atomic) */ 542 /* read pkey_index w/o lock (its atomic) */
543 bth0 = ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); 543 bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index);
544 if (qp->r_nak_state) 544 if (qp->r_nak_state)
545 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) | 545 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
546 (qp->r_nak_state << 546 (qp->r_nak_state <<
@@ -557,7 +557,7 @@ static void send_rc_ack(struct ipath_qp *qp)
557 hdr.lrh[0] = cpu_to_be16(lrh0); 557 hdr.lrh[0] = cpu_to_be16(lrh0);
558 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); 558 hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
559 hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); 559 hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
560 hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); 560 hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
561 ohdr->bth[0] = cpu_to_be32(bth0); 561 ohdr->bth[0] = cpu_to_be32(bth0);
562 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); 562 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
563 ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); 563 ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK);
@@ -1323,8 +1323,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1323 * the eager header buffer size to 56 bytes so the last 4 1323 * the eager header buffer size to 56 bytes so the last 4
1324 * bytes of the BTH header (PSN) is in the data buffer. 1324 * bytes of the BTH header (PSN) is in the data buffer.
1325 */ 1325 */
1326 header_in_data = 1326 header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
1327 ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
1328 if (header_in_data) { 1327 if (header_in_data) {
1329 psn = be32_to_cpu(((__be32 *) data)[0]); 1328 psn = be32_to_cpu(((__be32 *) data)[0]);
1330 data += sizeof(__be32); 1329 data += sizeof(__be32);
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 89df8f5ea998..6e23b3d632b8 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -36,8 +36,7 @@
36 36
37/* 37/*
38 * This file should only be included by kernel source, and by the diags. It 38 * This file should only be included by kernel source, and by the diags. It
39 * defines the registers, and their contents, for the InfiniPath HT-400 39 * defines the registers, and their contents, for InfiniPath chips.
40 * chip.
41 */ 40 */
42 41
43/* 42/*
@@ -283,10 +282,12 @@
283#define INFINIPATH_XGXS_RESET 0x7ULL 282#define INFINIPATH_XGXS_RESET 0x7ULL
284#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL 283#define INFINIPATH_XGXS_MDIOADDR_MASK 0xfULL
285#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4 284#define INFINIPATH_XGXS_MDIOADDR_SHIFT 4
285#define INFINIPATH_XGXS_RX_POL_SHIFT 19
286#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
286 287
287#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ 288#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
288 289
289/* TID entries (memory), HT400-only */ 290/* TID entries (memory), HT-only */
290#define INFINIPATH_RT_VALID 0x8000000000000000ULL 291#define INFINIPATH_RT_VALID 0x8000000000000000ULL
291#define INFINIPATH_RT_ADDR_SHIFT 0 292#define INFINIPATH_RT_ADDR_SHIFT 0
292#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF 293#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 772bc59fb85c..5c1da2d25e03 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -32,7 +32,7 @@
32 */ 32 */
33 33
34#include "ipath_verbs.h" 34#include "ipath_verbs.h"
35#include "ipath_common.h" 35#include "ipath_kernel.h"
36 36
37/* 37/*
38 * Convert the AETH RNR timeout code into the number of milliseconds. 38 * Convert the AETH RNR timeout code into the number of milliseconds.
@@ -106,6 +106,54 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
106 spin_unlock_irqrestore(&dev->pending_lock, flags); 106 spin_unlock_irqrestore(&dev->pending_lock, flags);
107} 107}
108 108
109static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
110{
111 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
112 int user = to_ipd(qp->ibqp.pd)->user;
113 int i, j, ret;
114 struct ib_wc wc;
115
116 qp->r_len = 0;
117 for (i = j = 0; i < wqe->num_sge; i++) {
118 if (wqe->sg_list[i].length == 0)
119 continue;
120 /* Check LKEY */
121 if ((user && wqe->sg_list[i].lkey == 0) ||
122 !ipath_lkey_ok(&dev->lk_table,
123 &qp->r_sg_list[j], &wqe->sg_list[i],
124 IB_ACCESS_LOCAL_WRITE))
125 goto bad_lkey;
126 qp->r_len += wqe->sg_list[i].length;
127 j++;
128 }
129 qp->r_sge.sge = qp->r_sg_list[0];
130 qp->r_sge.sg_list = qp->r_sg_list + 1;
131 qp->r_sge.num_sge = j;
132 ret = 1;
133 goto bail;
134
135bad_lkey:
136 wc.wr_id = wqe->wr_id;
137 wc.status = IB_WC_LOC_PROT_ERR;
138 wc.opcode = IB_WC_RECV;
139 wc.vendor_err = 0;
140 wc.byte_len = 0;
141 wc.imm_data = 0;
142 wc.qp_num = qp->ibqp.qp_num;
143 wc.src_qp = 0;
144 wc.wc_flags = 0;
145 wc.pkey_index = 0;
146 wc.slid = 0;
147 wc.sl = 0;
148 wc.dlid_path_bits = 0;
149 wc.port_num = 0;
150 /* Signal solicited completion event. */
151 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
152 ret = 0;
153bail:
154 return ret;
155}
156
109/** 157/**
110 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE 158 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
111 * @qp: the QP 159 * @qp: the QP
@@ -119,71 +167,71 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
119{ 167{
120 unsigned long flags; 168 unsigned long flags;
121 struct ipath_rq *rq; 169 struct ipath_rq *rq;
170 struct ipath_rwq *wq;
122 struct ipath_srq *srq; 171 struct ipath_srq *srq;
123 struct ipath_rwqe *wqe; 172 struct ipath_rwqe *wqe;
124 int ret = 1; 173 void (*handler)(struct ib_event *, void *);
174 u32 tail;
175 int ret;
125 176
126 if (!qp->ibqp.srq) { 177 if (qp->ibqp.srq) {
178 srq = to_isrq(qp->ibqp.srq);
179 handler = srq->ibsrq.event_handler;
180 rq = &srq->rq;
181 } else {
182 srq = NULL;
183 handler = NULL;
127 rq = &qp->r_rq; 184 rq = &qp->r_rq;
128 spin_lock_irqsave(&rq->lock, flags);
129
130 if (unlikely(rq->tail == rq->head)) {
131 ret = 0;
132 goto done;
133 }
134 wqe = get_rwqe_ptr(rq, rq->tail);
135 qp->r_wr_id = wqe->wr_id;
136 if (!wr_id_only) {
137 qp->r_sge.sge = wqe->sg_list[0];
138 qp->r_sge.sg_list = wqe->sg_list + 1;
139 qp->r_sge.num_sge = wqe->num_sge;
140 qp->r_len = wqe->length;
141 }
142 if (++rq->tail >= rq->size)
143 rq->tail = 0;
144 goto done;
145 } 185 }
146 186
147 srq = to_isrq(qp->ibqp.srq);
148 rq = &srq->rq;
149 spin_lock_irqsave(&rq->lock, flags); 187 spin_lock_irqsave(&rq->lock, flags);
150 188 wq = rq->wq;
151 if (unlikely(rq->tail == rq->head)) { 189 tail = wq->tail;
152 ret = 0; 190 /* Validate tail before using it since it is user writable. */
153 goto done; 191 if (tail >= rq->size)
154 } 192 tail = 0;
155 wqe = get_rwqe_ptr(rq, rq->tail); 193 do {
194 if (unlikely(tail == wq->head)) {
195 spin_unlock_irqrestore(&rq->lock, flags);
196 ret = 0;
197 goto bail;
198 }
199 wqe = get_rwqe_ptr(rq, tail);
200 if (++tail >= rq->size)
201 tail = 0;
202 } while (!wr_id_only && !init_sge(qp, wqe));
156 qp->r_wr_id = wqe->wr_id; 203 qp->r_wr_id = wqe->wr_id;
157 if (!wr_id_only) { 204 wq->tail = tail;
158 qp->r_sge.sge = wqe->sg_list[0]; 205
159 qp->r_sge.sg_list = wqe->sg_list + 1; 206 ret = 1;
160 qp->r_sge.num_sge = wqe->num_sge; 207 if (handler) {
161 qp->r_len = wqe->length;
162 }
163 if (++rq->tail >= rq->size)
164 rq->tail = 0;
165 if (srq->ibsrq.event_handler) {
166 struct ib_event ev;
167 u32 n; 208 u32 n;
168 209
169 if (rq->head < rq->tail) 210 /*
170 n = rq->size + rq->head - rq->tail; 211 * validate head pointer value and compute
212 * the number of remaining WQEs.
213 */
214 n = wq->head;
215 if (n >= rq->size)
216 n = 0;
217 if (n < tail)
218 n += rq->size - tail;
171 else 219 else
172 n = rq->head - rq->tail; 220 n -= tail;
173 if (n < srq->limit) { 221 if (n < srq->limit) {
222 struct ib_event ev;
223
174 srq->limit = 0; 224 srq->limit = 0;
175 spin_unlock_irqrestore(&rq->lock, flags); 225 spin_unlock_irqrestore(&rq->lock, flags);
176 ev.device = qp->ibqp.device; 226 ev.device = qp->ibqp.device;
177 ev.element.srq = qp->ibqp.srq; 227 ev.element.srq = qp->ibqp.srq;
178 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 228 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
179 srq->ibsrq.event_handler(&ev, 229 handler(&ev, srq->ibsrq.srq_context);
180 srq->ibsrq.srq_context);
181 goto bail; 230 goto bail;
182 } 231 }
183 } 232 }
184
185done:
186 spin_unlock_irqrestore(&rq->lock, flags); 233 spin_unlock_irqrestore(&rq->lock, flags);
234
187bail: 235bail:
188 return ret; 236 return ret;
189} 237}
@@ -422,6 +470,15 @@ done:
422 wake_up(&qp->wait); 470 wake_up(&qp->wait);
423} 471}
424 472
473static int want_buffer(struct ipath_devdata *dd)
474{
475 set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
476 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
477 dd->ipath_sendctrl);
478
479 return 0;
480}
481
425/** 482/**
426 * ipath_no_bufs_available - tell the layer driver we need buffers 483 * ipath_no_bufs_available - tell the layer driver we need buffers
427 * @qp: the QP that caused the problem 484 * @qp: the QP that caused the problem
@@ -438,7 +495,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
438 list_add_tail(&qp->piowait, &dev->piowait); 495 list_add_tail(&qp->piowait, &dev->piowait);
439 spin_unlock_irqrestore(&dev->pending_lock, flags); 496 spin_unlock_irqrestore(&dev->pending_lock, flags);
440 /* 497 /*
441 * Note that as soon as ipath_layer_want_buffer() is called and 498 * Note that as soon as want_buffer() is called and
442 * possibly before it returns, ipath_ib_piobufavail() 499 * possibly before it returns, ipath_ib_piobufavail()
443 * could be called. If we are still in the tasklet function, 500 * could be called. If we are still in the tasklet function,
444 * tasklet_hi_schedule() will not call us until the next time 501 * tasklet_hi_schedule() will not call us until the next time
@@ -448,7 +505,7 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
448 */ 505 */
449 clear_bit(IPATH_S_BUSY, &qp->s_flags); 506 clear_bit(IPATH_S_BUSY, &qp->s_flags);
450 tasklet_unlock(&qp->s_task); 507 tasklet_unlock(&qp->s_task);
451 ipath_layer_want_buffer(dev->dd); 508 want_buffer(dev->dd);
452 dev->n_piowait++; 509 dev->n_piowait++;
453} 510}
454 511
@@ -563,7 +620,7 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
563 hdr->hop_limit = grh->hop_limit; 620 hdr->hop_limit = grh->hop_limit;
564 /* The SGID is 32-bit aligned. */ 621 /* The SGID is 32-bit aligned. */
565 hdr->sgid.global.subnet_prefix = dev->gid_prefix; 622 hdr->sgid.global.subnet_prefix = dev->gid_prefix;
566 hdr->sgid.global.interface_id = ipath_layer_get_guid(dev->dd); 623 hdr->sgid.global.interface_id = dev->dd->ipath_guid;
567 hdr->dgid = grh->dgid; 624 hdr->dgid = grh->dgid;
568 625
569 /* GRH header size in 32-bit words. */ 626 /* GRH header size in 32-bit words. */
@@ -595,8 +652,7 @@ void ipath_do_ruc_send(unsigned long data)
595 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags)) 652 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_flags))
596 goto bail; 653 goto bail;
597 654
598 if (unlikely(qp->remote_ah_attr.dlid == 655 if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
599 ipath_layer_get_lid(dev->dd))) {
600 ipath_ruc_loopback(qp); 656 ipath_ruc_loopback(qp);
601 goto clear; 657 goto clear;
602 } 658 }
@@ -663,8 +719,8 @@ again:
663 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); 719 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
664 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + 720 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
665 SIZE_OF_CRC); 721 SIZE_OF_CRC);
666 qp->s_hdr.lrh[3] = cpu_to_be16(ipath_layer_get_lid(dev->dd)); 722 qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
667 bth0 |= ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); 723 bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
668 bth0 |= extra_bytes << 20; 724 bth0 |= extra_bytes << 20;
669 ohdr->bth[0] = cpu_to_be32(bth0); 725 ohdr->bth[0] = cpu_to_be32(bth0);
670 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); 726 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index f760434660bd..941e866d9517 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -48,66 +48,39 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
48 struct ib_recv_wr **bad_wr) 48 struct ib_recv_wr **bad_wr)
49{ 49{
50 struct ipath_srq *srq = to_isrq(ibsrq); 50 struct ipath_srq *srq = to_isrq(ibsrq);
51 struct ipath_ibdev *dev = to_idev(ibsrq->device); 51 struct ipath_rwq *wq;
52 unsigned long flags; 52 unsigned long flags;
53 int ret; 53 int ret;
54 54
55 for (; wr; wr = wr->next) { 55 for (; wr; wr = wr->next) {
56 struct ipath_rwqe *wqe; 56 struct ipath_rwqe *wqe;
57 u32 next; 57 u32 next;
58 int i, j; 58 int i;
59 59
60 if (wr->num_sge > srq->rq.max_sge) { 60 if ((unsigned) wr->num_sge > srq->rq.max_sge) {
61 *bad_wr = wr; 61 *bad_wr = wr;
62 ret = -ENOMEM; 62 ret = -ENOMEM;
63 goto bail; 63 goto bail;
64 } 64 }
65 65
66 spin_lock_irqsave(&srq->rq.lock, flags); 66 spin_lock_irqsave(&srq->rq.lock, flags);
67 next = srq->rq.head + 1; 67 wq = srq->rq.wq;
68 next = wq->head + 1;
68 if (next >= srq->rq.size) 69 if (next >= srq->rq.size)
69 next = 0; 70 next = 0;
70 if (next == srq->rq.tail) { 71 if (next == wq->tail) {
71 spin_unlock_irqrestore(&srq->rq.lock, flags); 72 spin_unlock_irqrestore(&srq->rq.lock, flags);
72 *bad_wr = wr; 73 *bad_wr = wr;
73 ret = -ENOMEM; 74 ret = -ENOMEM;
74 goto bail; 75 goto bail;
75 } 76 }
76 77
77 wqe = get_rwqe_ptr(&srq->rq, srq->rq.head); 78 wqe = get_rwqe_ptr(&srq->rq, wq->head);
78 wqe->wr_id = wr->wr_id; 79 wqe->wr_id = wr->wr_id;
79 wqe->sg_list[0].mr = NULL; 80 wqe->num_sge = wr->num_sge;
80 wqe->sg_list[0].vaddr = NULL; 81 for (i = 0; i < wr->num_sge; i++)
81 wqe->sg_list[0].length = 0; 82 wqe->sg_list[i] = wr->sg_list[i];
82 wqe->sg_list[0].sge_length = 0; 83 wq->head = next;
83 wqe->length = 0;
84 for (i = 0, j = 0; i < wr->num_sge; i++) {
85 /* Check LKEY */
86 if (to_ipd(srq->ibsrq.pd)->user &&
87 wr->sg_list[i].lkey == 0) {
88 spin_unlock_irqrestore(&srq->rq.lock,
89 flags);
90 *bad_wr = wr;
91 ret = -EINVAL;
92 goto bail;
93 }
94 if (wr->sg_list[i].length == 0)
95 continue;
96 if (!ipath_lkey_ok(&dev->lk_table,
97 &wqe->sg_list[j],
98 &wr->sg_list[i],
99 IB_ACCESS_LOCAL_WRITE)) {
100 spin_unlock_irqrestore(&srq->rq.lock,
101 flags);
102 *bad_wr = wr;
103 ret = -EINVAL;
104 goto bail;
105 }
106 wqe->length += wr->sg_list[i].length;
107 j++;
108 }
109 wqe->num_sge = j;
110 srq->rq.head = next;
111 spin_unlock_irqrestore(&srq->rq.lock, flags); 84 spin_unlock_irqrestore(&srq->rq.lock, flags);
112 } 85 }
113 ret = 0; 86 ret = 0;
@@ -133,53 +106,95 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
133 106
134 if (dev->n_srqs_allocated == ib_ipath_max_srqs) { 107 if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
135 ret = ERR_PTR(-ENOMEM); 108 ret = ERR_PTR(-ENOMEM);
136 goto bail; 109 goto done;
137 } 110 }
138 111
139 if (srq_init_attr->attr.max_wr == 0) { 112 if (srq_init_attr->attr.max_wr == 0) {
140 ret = ERR_PTR(-EINVAL); 113 ret = ERR_PTR(-EINVAL);
141 goto bail; 114 goto done;
142 } 115 }
143 116
144 if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) || 117 if ((srq_init_attr->attr.max_sge > ib_ipath_max_srq_sges) ||
145 (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) { 118 (srq_init_attr->attr.max_wr > ib_ipath_max_srq_wrs)) {
146 ret = ERR_PTR(-EINVAL); 119 ret = ERR_PTR(-EINVAL);
147 goto bail; 120 goto done;
148 } 121 }
149 122
150 srq = kmalloc(sizeof(*srq), GFP_KERNEL); 123 srq = kmalloc(sizeof(*srq), GFP_KERNEL);
151 if (!srq) { 124 if (!srq) {
152 ret = ERR_PTR(-ENOMEM); 125 ret = ERR_PTR(-ENOMEM);
153 goto bail; 126 goto done;
154 } 127 }
155 128
156 /* 129 /*
157 * Need to use vmalloc() if we want to support large #s of entries. 130 * Need to use vmalloc() if we want to support large #s of entries.
158 */ 131 */
159 srq->rq.size = srq_init_attr->attr.max_wr + 1; 132 srq->rq.size = srq_init_attr->attr.max_wr + 1;
160 sz = sizeof(struct ipath_sge) * srq_init_attr->attr.max_sge + 133 srq->rq.max_sge = srq_init_attr->attr.max_sge;
134 sz = sizeof(struct ib_sge) * srq->rq.max_sge +
161 sizeof(struct ipath_rwqe); 135 sizeof(struct ipath_rwqe);
162 srq->rq.wq = vmalloc(srq->rq.size * sz); 136 srq->rq.wq = vmalloc_user(sizeof(struct ipath_rwq) + srq->rq.size * sz);
163 if (!srq->rq.wq) { 137 if (!srq->rq.wq) {
164 kfree(srq);
165 ret = ERR_PTR(-ENOMEM); 138 ret = ERR_PTR(-ENOMEM);
166 goto bail; 139 goto bail_srq;
167 } 140 }
168 141
169 /* 142 /*
143 * Return the address of the RWQ as the offset to mmap.
144 * See ipath_mmap() for details.
145 */
146 if (udata && udata->outlen >= sizeof(__u64)) {
147 struct ipath_mmap_info *ip;
148 __u64 offset = (__u64) srq->rq.wq;
149 int err;
150
151 err = ib_copy_to_udata(udata, &offset, sizeof(offset));
152 if (err) {
153 ret = ERR_PTR(err);
154 goto bail_wq;
155 }
156
157 /* Allocate info for ipath_mmap(). */
158 ip = kmalloc(sizeof(*ip), GFP_KERNEL);
159 if (!ip) {
160 ret = ERR_PTR(-ENOMEM);
161 goto bail_wq;
162 }
163 srq->ip = ip;
164 ip->context = ibpd->uobject->context;
165 ip->obj = srq->rq.wq;
166 kref_init(&ip->ref);
167 ip->mmap_cnt = 0;
168 ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
169 srq->rq.size * sz);
170 spin_lock_irq(&dev->pending_lock);
171 ip->next = dev->pending_mmaps;
172 dev->pending_mmaps = ip;
173 spin_unlock_irq(&dev->pending_lock);
174 } else
175 srq->ip = NULL;
176
177 /*
170 * ib_create_srq() will initialize srq->ibsrq. 178 * ib_create_srq() will initialize srq->ibsrq.
171 */ 179 */
172 spin_lock_init(&srq->rq.lock); 180 spin_lock_init(&srq->rq.lock);
173 srq->rq.head = 0; 181 srq->rq.wq->head = 0;
174 srq->rq.tail = 0; 182 srq->rq.wq->tail = 0;
175 srq->rq.max_sge = srq_init_attr->attr.max_sge; 183 srq->rq.max_sge = srq_init_attr->attr.max_sge;
176 srq->limit = srq_init_attr->attr.srq_limit; 184 srq->limit = srq_init_attr->attr.srq_limit;
177 185
186 dev->n_srqs_allocated++;
187
178 ret = &srq->ibsrq; 188 ret = &srq->ibsrq;
189 goto done;
179 190
180 dev->n_srqs_allocated++; 191bail_wq:
192 vfree(srq->rq.wq);
181 193
182bail: 194bail_srq:
195 kfree(srq);
196
197done:
183 return ret; 198 return ret;
184} 199}
185 200
@@ -188,83 +203,130 @@ bail:
188 * @ibsrq: the SRQ to modify 203 * @ibsrq: the SRQ to modify
189 * @attr: the new attributes of the SRQ 204 * @attr: the new attributes of the SRQ
190 * @attr_mask: indicates which attributes to modify 205 * @attr_mask: indicates which attributes to modify
206 * @udata: user data for ipathverbs.so
191 */ 207 */
192int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 208int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
193 enum ib_srq_attr_mask attr_mask) 209 enum ib_srq_attr_mask attr_mask,
210 struct ib_udata *udata)
194{ 211{
195 struct ipath_srq *srq = to_isrq(ibsrq); 212 struct ipath_srq *srq = to_isrq(ibsrq);
196 unsigned long flags; 213 int ret = 0;
197 int ret;
198 214
199 if (attr_mask & IB_SRQ_MAX_WR) 215 if (attr_mask & IB_SRQ_MAX_WR) {
200 if ((attr->max_wr > ib_ipath_max_srq_wrs) || 216 struct ipath_rwq *owq;
201 (attr->max_sge > srq->rq.max_sge)) { 217 struct ipath_rwq *wq;
202 ret = -EINVAL; 218 struct ipath_rwqe *p;
203 goto bail; 219 u32 sz, size, n, head, tail;
204 }
205 220
206 if (attr_mask & IB_SRQ_LIMIT) 221 /* Check that the requested sizes are below the limits. */
207 if (attr->srq_limit >= srq->rq.size) { 222 if ((attr->max_wr > ib_ipath_max_srq_wrs) ||
223 ((attr_mask & IB_SRQ_LIMIT) ?
224 attr->srq_limit : srq->limit) > attr->max_wr) {
208 ret = -EINVAL; 225 ret = -EINVAL;
209 goto bail; 226 goto bail;
210 } 227 }
211 228
212 if (attr_mask & IB_SRQ_MAX_WR) {
213 struct ipath_rwqe *wq, *p;
214 u32 sz, size, n;
215
216 sz = sizeof(struct ipath_rwqe) + 229 sz = sizeof(struct ipath_rwqe) +
217 attr->max_sge * sizeof(struct ipath_sge); 230 srq->rq.max_sge * sizeof(struct ib_sge);
218 size = attr->max_wr + 1; 231 size = attr->max_wr + 1;
219 wq = vmalloc(size * sz); 232 wq = vmalloc_user(sizeof(struct ipath_rwq) + size * sz);
220 if (!wq) { 233 if (!wq) {
221 ret = -ENOMEM; 234 ret = -ENOMEM;
222 goto bail; 235 goto bail;
223 } 236 }
224 237
225 spin_lock_irqsave(&srq->rq.lock, flags); 238 /*
226 if (srq->rq.head < srq->rq.tail) 239 * Return the address of the RWQ as the offset to mmap.
227 n = srq->rq.size + srq->rq.head - srq->rq.tail; 240 * See ipath_mmap() for details.
241 */
242 if (udata && udata->inlen >= sizeof(__u64)) {
243 __u64 offset_addr;
244 __u64 offset = (__u64) wq;
245
246 ret = ib_copy_from_udata(&offset_addr, udata,
247 sizeof(offset_addr));
248 if (ret) {
249 vfree(wq);
250 goto bail;
251 }
252 udata->outbuf = (void __user *) offset_addr;
253 ret = ib_copy_to_udata(udata, &offset,
254 sizeof(offset));
255 if (ret) {
256 vfree(wq);
257 goto bail;
258 }
259 }
260
261 spin_lock_irq(&srq->rq.lock);
262 /*
263 * validate head pointer value and compute
264 * the number of remaining WQEs.
265 */
266 owq = srq->rq.wq;
267 head = owq->head;
268 if (head >= srq->rq.size)
269 head = 0;
270 tail = owq->tail;
271 if (tail >= srq->rq.size)
272 tail = 0;
273 n = head;
274 if (n < tail)
275 n += srq->rq.size - tail;
228 else 276 else
229 n = srq->rq.head - srq->rq.tail; 277 n -= tail;
230 if (size <= n || size <= srq->limit) { 278 if (size <= n) {
231 spin_unlock_irqrestore(&srq->rq.lock, flags); 279 spin_unlock_irq(&srq->rq.lock);
232 vfree(wq); 280 vfree(wq);
233 ret = -EINVAL; 281 ret = -EINVAL;
234 goto bail; 282 goto bail;
235 } 283 }
236 n = 0; 284 n = 0;
237 p = wq; 285 p = wq->wq;
238 while (srq->rq.tail != srq->rq.head) { 286 while (tail != head) {
239 struct ipath_rwqe *wqe; 287 struct ipath_rwqe *wqe;
240 int i; 288 int i;
241 289
242 wqe = get_rwqe_ptr(&srq->rq, srq->rq.tail); 290 wqe = get_rwqe_ptr(&srq->rq, tail);
243 p->wr_id = wqe->wr_id; 291 p->wr_id = wqe->wr_id;
244 p->length = wqe->length;
245 p->num_sge = wqe->num_sge; 292 p->num_sge = wqe->num_sge;
246 for (i = 0; i < wqe->num_sge; i++) 293 for (i = 0; i < wqe->num_sge; i++)
247 p->sg_list[i] = wqe->sg_list[i]; 294 p->sg_list[i] = wqe->sg_list[i];
248 n++; 295 n++;
249 p = (struct ipath_rwqe *)((char *) p + sz); 296 p = (struct ipath_rwqe *)((char *) p + sz);
250 if (++srq->rq.tail >= srq->rq.size) 297 if (++tail >= srq->rq.size)
251 srq->rq.tail = 0; 298 tail = 0;
252 } 299 }
253 vfree(srq->rq.wq);
254 srq->rq.wq = wq; 300 srq->rq.wq = wq;
255 srq->rq.size = size; 301 srq->rq.size = size;
256 srq->rq.head = n; 302 wq->head = n;
257 srq->rq.tail = 0; 303 wq->tail = 0;
258 srq->rq.max_sge = attr->max_sge; 304 if (attr_mask & IB_SRQ_LIMIT)
259 spin_unlock_irqrestore(&srq->rq.lock, flags); 305 srq->limit = attr->srq_limit;
260 } 306 spin_unlock_irq(&srq->rq.lock);
261 307
262 if (attr_mask & IB_SRQ_LIMIT) { 308 vfree(owq);
263 spin_lock_irqsave(&srq->rq.lock, flags); 309
264 srq->limit = attr->srq_limit; 310 if (srq->ip) {
265 spin_unlock_irqrestore(&srq->rq.lock, flags); 311 struct ipath_mmap_info *ip = srq->ip;
312 struct ipath_ibdev *dev = to_idev(srq->ibsrq.device);
313
314 ip->obj = wq;
315 ip->size = PAGE_ALIGN(sizeof(struct ipath_rwq) +
316 size * sz);
317 spin_lock_irq(&dev->pending_lock);
318 ip->next = dev->pending_mmaps;
319 dev->pending_mmaps = ip;
320 spin_unlock_irq(&dev->pending_lock);
321 }
322 } else if (attr_mask & IB_SRQ_LIMIT) {
323 spin_lock_irq(&srq->rq.lock);
324 if (attr->srq_limit >= srq->rq.size)
325 ret = -EINVAL;
326 else
327 srq->limit = attr->srq_limit;
328 spin_unlock_irq(&srq->rq.lock);
266 } 329 }
267 ret = 0;
268 330
269bail: 331bail:
270 return ret; 332 return ret;
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 70351b7e35c0..30a825928fcf 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -271,33 +271,6 @@ void ipath_get_faststats(unsigned long opaque)
271 } 271 }
272 } 272 }
273 273
274 if (dd->ipath_nosma_bufs) {
275 dd->ipath_nosma_secs += 5;
276 if (dd->ipath_nosma_secs >= 30) {
277 ipath_cdbg(SMA, "No SMA bufs avail %u seconds; "
278 "cancelling pending sends\n",
279 dd->ipath_nosma_secs);
280 /*
281 * issue an abort as well, in case we have a packet
282 * stuck in launch fifo. This could corrupt an
283 * outgoing user packet in the worst case,
284 * but this is a pretty catastrophic, anyway.
285 */
286 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
287 INFINIPATH_S_ABORT);
288 ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
289 dd->ipath_piobcnt2k +
290 dd->ipath_piobcnt4k -
291 dd->ipath_lastport_piobuf);
292 /* start again, if necessary */
293 dd->ipath_nosma_secs = 0;
294 } else
295 ipath_cdbg(SMA, "No SMA bufs avail %u tries, "
296 "after %u seconds\n",
297 dd->ipath_nosma_bufs,
298 dd->ipath_nosma_secs);
299 }
300
301done: 274done:
302 mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); 275 mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
303} 276}
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index b98821d7801d..e299148c4b68 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -35,7 +35,6 @@
35#include <linux/pci.h> 35#include <linux/pci.h>
36 36
37#include "ipath_kernel.h" 37#include "ipath_kernel.h"
38#include "ipath_layer.h"
39#include "ipath_common.h" 38#include "ipath_common.h"
40 39
41/** 40/**
@@ -76,7 +75,7 @@ bail:
76static ssize_t show_version(struct device_driver *dev, char *buf) 75static ssize_t show_version(struct device_driver *dev, char *buf)
77{ 76{
78 /* The string printed here is already newline-terminated. */ 77 /* The string printed here is already newline-terminated. */
79 return scnprintf(buf, PAGE_SIZE, "%s", ipath_core_version); 78 return scnprintf(buf, PAGE_SIZE, "%s", ib_ipath_version);
80} 79}
81 80
82static ssize_t show_num_units(struct device_driver *dev, char *buf) 81static ssize_t show_num_units(struct device_driver *dev, char *buf)
@@ -108,8 +107,8 @@ static const char *ipath_status_str[] = {
108 "Initted", 107 "Initted",
109 "Disabled", 108 "Disabled",
110 "Admin_Disabled", 109 "Admin_Disabled",
111 "OIB_SMA", 110 "", /* This used to be the old "OIB_SMA" status. */
112 "SMA", 111 "", /* This used to be the old "SMA" status. */
113 "Present", 112 "Present",
114 "IB_link_up", 113 "IB_link_up",
115 "IB_configured", 114 "IB_configured",
@@ -227,7 +226,6 @@ static ssize_t store_mlid(struct device *dev,
227 unit = dd->ipath_unit; 226 unit = dd->ipath_unit;
228 227
229 dd->ipath_mlid = mlid; 228 dd->ipath_mlid = mlid;
230 ipath_layer_intr(dd, IPATH_LAYER_INT_BCAST);
231 229
232 goto bail; 230 goto bail;
233invalid: 231invalid:
@@ -467,7 +465,7 @@ static ssize_t store_link_state(struct device *dev,
467 if (ret < 0) 465 if (ret < 0)
468 goto invalid; 466 goto invalid;
469 467
470 r = ipath_layer_set_linkstate(dd, state); 468 r = ipath_set_linkstate(dd, state);
471 if (r < 0) { 469 if (r < 0) {
472 ret = r; 470 ret = r;
473 goto bail; 471 goto bail;
@@ -502,7 +500,7 @@ static ssize_t store_mtu(struct device *dev,
502 if (ret < 0) 500 if (ret < 0)
503 goto invalid; 501 goto invalid;
504 502
505 r = ipath_layer_set_mtu(dd, mtu); 503 r = ipath_set_mtu(dd, mtu);
506 if (r < 0) 504 if (r < 0)
507 ret = r; 505 ret = r;
508 506
@@ -563,6 +561,33 @@ bail:
563 return ret; 561 return ret;
564} 562}
565 563
564static ssize_t store_rx_pol_inv(struct device *dev,
565 struct device_attribute *attr,
566 const char *buf,
567 size_t count)
568{
569 struct ipath_devdata *dd = dev_get_drvdata(dev);
570 int ret, r;
571 u16 val;
572
573 ret = ipath_parse_ushort(buf, &val);
574 if (ret < 0)
575 goto invalid;
576
577 r = ipath_set_rx_pol_inv(dd, val);
578 if (r < 0) {
579 ret = r;
580 goto bail;
581 }
582
583 goto bail;
584invalid:
585 ipath_dev_err(dd, "attempt to set invalid Rx Polarity invert\n");
586bail:
587 return ret;
588}
589
590
566static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); 591static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
567static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); 592static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
568 593
@@ -589,6 +614,7 @@ static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
589static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL); 614static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
590static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); 615static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
591static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); 616static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
617static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
592 618
593static struct attribute *dev_attributes[] = { 619static struct attribute *dev_attributes[] = {
594 &dev_attr_guid.attr, 620 &dev_attr_guid.attr,
@@ -603,6 +629,7 @@ static struct attribute *dev_attributes[] = {
603 &dev_attr_boardversion.attr, 629 &dev_attr_boardversion.attr,
604 &dev_attr_unit.attr, 630 &dev_attr_unit.attr,
605 &dev_attr_enabled.attr, 631 &dev_attr_enabled.attr,
632 &dev_attr_rx_pol_inv.attr,
606 NULL 633 NULL
607}; 634};
608 635
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index c33abea2d5a7..0fd3cded16ba 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -32,7 +32,7 @@
32 */ 32 */
33 33
34#include "ipath_verbs.h" 34#include "ipath_verbs.h"
35#include "ipath_common.h" 35#include "ipath_kernel.h"
36 36
37/* cut down ridiculously long IB macro names */ 37/* cut down ridiculously long IB macro names */
38#define OP(x) IB_OPCODE_UC_##x 38#define OP(x) IB_OPCODE_UC_##x
@@ -261,8 +261,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
261 * size to 56 bytes so the last 4 bytes of 261 * size to 56 bytes so the last 4 bytes of
262 * the BTH header (PSN) is in the data buffer. 262 * the BTH header (PSN) is in the data buffer.
263 */ 263 */
264 header_in_data = 264 header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
265 ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
266 if (header_in_data) { 265 if (header_in_data) {
267 psn = be32_to_cpu(((__be32 *) data)[0]); 266 psn = be32_to_cpu(((__be32 *) data)[0]);
268 data += sizeof(__be32); 267 data += sizeof(__be32);
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 3466129af804..6991d1d74e3c 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -34,7 +34,54 @@
34#include <rdma/ib_smi.h> 34#include <rdma/ib_smi.h>
35 35
36#include "ipath_verbs.h" 36#include "ipath_verbs.h"
37#include "ipath_common.h" 37#include "ipath_kernel.h"
38
39static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
40 u32 *lengthp, struct ipath_sge_state *ss)
41{
42 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
43 int user = to_ipd(qp->ibqp.pd)->user;
44 int i, j, ret;
45 struct ib_wc wc;
46
47 *lengthp = 0;
48 for (i = j = 0; i < wqe->num_sge; i++) {
49 if (wqe->sg_list[i].length == 0)
50 continue;
51 /* Check LKEY */
52 if ((user && wqe->sg_list[i].lkey == 0) ||
53 !ipath_lkey_ok(&dev->lk_table,
54 j ? &ss->sg_list[j - 1] : &ss->sge,
55 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
56 goto bad_lkey;
57 *lengthp += wqe->sg_list[i].length;
58 j++;
59 }
60 ss->num_sge = j;
61 ret = 1;
62 goto bail;
63
64bad_lkey:
65 wc.wr_id = wqe->wr_id;
66 wc.status = IB_WC_LOC_PROT_ERR;
67 wc.opcode = IB_WC_RECV;
68 wc.vendor_err = 0;
69 wc.byte_len = 0;
70 wc.imm_data = 0;
71 wc.qp_num = qp->ibqp.qp_num;
72 wc.src_qp = 0;
73 wc.wc_flags = 0;
74 wc.pkey_index = 0;
75 wc.slid = 0;
76 wc.sl = 0;
77 wc.dlid_path_bits = 0;
78 wc.port_num = 0;
79 /* Signal solicited completion event. */
80 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
81 ret = 0;
82bail:
83 return ret;
84}
38 85
39/** 86/**
40 * ipath_ud_loopback - handle send on loopback QPs 87 * ipath_ud_loopback - handle send on loopback QPs
@@ -46,6 +93,8 @@
46 * 93 *
47 * This is called from ipath_post_ud_send() to forward a WQE addressed 94 * This is called from ipath_post_ud_send() to forward a WQE addressed
48 * to the same HCA. 95 * to the same HCA.
96 * Note that the receive interrupt handler may be calling ipath_ud_rcv()
97 * while this is being called.
49 */ 98 */
50static void ipath_ud_loopback(struct ipath_qp *sqp, 99static void ipath_ud_loopback(struct ipath_qp *sqp,
51 struct ipath_sge_state *ss, 100 struct ipath_sge_state *ss,
@@ -60,7 +109,11 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
60 struct ipath_srq *srq; 109 struct ipath_srq *srq;
61 struct ipath_sge_state rsge; 110 struct ipath_sge_state rsge;
62 struct ipath_sge *sge; 111 struct ipath_sge *sge;
112 struct ipath_rwq *wq;
63 struct ipath_rwqe *wqe; 113 struct ipath_rwqe *wqe;
114 void (*handler)(struct ib_event *, void *);
115 u32 tail;
116 u32 rlen;
64 117
65 qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn); 118 qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
66 if (!qp) 119 if (!qp)
@@ -94,6 +147,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
94 wc->imm_data = 0; 147 wc->imm_data = 0;
95 } 148 }
96 149
150 if (wr->num_sge > 1) {
151 rsge.sg_list = kmalloc((wr->num_sge - 1) *
152 sizeof(struct ipath_sge),
153 GFP_ATOMIC);
154 } else
155 rsge.sg_list = NULL;
156
97 /* 157 /*
98 * Get the next work request entry to find where to put the data. 158 * Get the next work request entry to find where to put the data.
99 * Note that it is safe to drop the lock after changing rq->tail 159 * Note that it is safe to drop the lock after changing rq->tail
@@ -101,37 +161,52 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
101 */ 161 */
102 if (qp->ibqp.srq) { 162 if (qp->ibqp.srq) {
103 srq = to_isrq(qp->ibqp.srq); 163 srq = to_isrq(qp->ibqp.srq);
164 handler = srq->ibsrq.event_handler;
104 rq = &srq->rq; 165 rq = &srq->rq;
105 } else { 166 } else {
106 srq = NULL; 167 srq = NULL;
168 handler = NULL;
107 rq = &qp->r_rq; 169 rq = &qp->r_rq;
108 } 170 }
171
109 spin_lock_irqsave(&rq->lock, flags); 172 spin_lock_irqsave(&rq->lock, flags);
110 if (rq->tail == rq->head) { 173 wq = rq->wq;
111 spin_unlock_irqrestore(&rq->lock, flags); 174 tail = wq->tail;
112 dev->n_pkt_drops++; 175 while (1) {
113 goto done; 176 if (unlikely(tail == wq->head)) {
177 spin_unlock_irqrestore(&rq->lock, flags);
178 dev->n_pkt_drops++;
179 goto bail_sge;
180 }
181 wqe = get_rwqe_ptr(rq, tail);
182 if (++tail >= rq->size)
183 tail = 0;
184 if (init_sge(qp, wqe, &rlen, &rsge))
185 break;
186 wq->tail = tail;
114 } 187 }
115 /* Silently drop packets which are too big. */ 188 /* Silently drop packets which are too big. */
116 wqe = get_rwqe_ptr(rq, rq->tail); 189 if (wc->byte_len > rlen) {
117 if (wc->byte_len > wqe->length) {
118 spin_unlock_irqrestore(&rq->lock, flags); 190 spin_unlock_irqrestore(&rq->lock, flags);
119 dev->n_pkt_drops++; 191 dev->n_pkt_drops++;
120 goto done; 192 goto bail_sge;
121 } 193 }
194 wq->tail = tail;
122 wc->wr_id = wqe->wr_id; 195 wc->wr_id = wqe->wr_id;
123 rsge.sge = wqe->sg_list[0]; 196 if (handler) {
124 rsge.sg_list = wqe->sg_list + 1;
125 rsge.num_sge = wqe->num_sge;
126 if (++rq->tail >= rq->size)
127 rq->tail = 0;
128 if (srq && srq->ibsrq.event_handler) {
129 u32 n; 197 u32 n;
130 198
131 if (rq->head < rq->tail) 199 /*
132 n = rq->size + rq->head - rq->tail; 200 * validate head pointer value and compute
201 * the number of remaining WQEs.
202 */
203 n = wq->head;
204 if (n >= rq->size)
205 n = 0;
206 if (n < tail)
207 n += rq->size - tail;
133 else 208 else
134 n = rq->head - rq->tail; 209 n -= tail;
135 if (n < srq->limit) { 210 if (n < srq->limit) {
136 struct ib_event ev; 211 struct ib_event ev;
137 212
@@ -140,12 +215,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
140 ev.device = qp->ibqp.device; 215 ev.device = qp->ibqp.device;
141 ev.element.srq = qp->ibqp.srq; 216 ev.element.srq = qp->ibqp.srq;
142 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 217 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
143 srq->ibsrq.event_handler(&ev, 218 handler(&ev, srq->ibsrq.srq_context);
144 srq->ibsrq.srq_context);
145 } else 219 } else
146 spin_unlock_irqrestore(&rq->lock, flags); 220 spin_unlock_irqrestore(&rq->lock, flags);
147 } else 221 } else
148 spin_unlock_irqrestore(&rq->lock, flags); 222 spin_unlock_irqrestore(&rq->lock, flags);
223
149 ah_attr = &to_iah(wr->wr.ud.ah)->attr; 224 ah_attr = &to_iah(wr->wr.ud.ah)->attr;
150 if (ah_attr->ah_flags & IB_AH_GRH) { 225 if (ah_attr->ah_flags & IB_AH_GRH) {
151 ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); 226 ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
@@ -186,7 +261,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
186 wc->src_qp = sqp->ibqp.qp_num; 261 wc->src_qp = sqp->ibqp.qp_num;
187 /* XXX do we know which pkey matched? Only needed for GSI. */ 262 /* XXX do we know which pkey matched? Only needed for GSI. */
188 wc->pkey_index = 0; 263 wc->pkey_index = 0;
189 wc->slid = ipath_layer_get_lid(dev->dd) | 264 wc->slid = dev->dd->ipath_lid |
190 (ah_attr->src_path_bits & 265 (ah_attr->src_path_bits &
191 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1)); 266 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
192 wc->sl = ah_attr->sl; 267 wc->sl = ah_attr->sl;
@@ -196,6 +271,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
196 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc, 271 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
197 wr->send_flags & IB_SEND_SOLICITED); 272 wr->send_flags & IB_SEND_SOLICITED);
198 273
274bail_sge:
275 kfree(rsge.sg_list);
199done: 276done:
200 if (atomic_dec_and_test(&qp->refcount)) 277 if (atomic_dec_and_test(&qp->refcount))
201 wake_up(&qp->wait); 278 wake_up(&qp->wait);
@@ -276,7 +353,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
276 ss.num_sge++; 353 ss.num_sge++;
277 } 354 }
278 /* Check for invalid packet size. */ 355 /* Check for invalid packet size. */
279 if (len > ipath_layer_get_ibmtu(dev->dd)) { 356 if (len > dev->dd->ipath_ibmtu) {
280 ret = -EINVAL; 357 ret = -EINVAL;
281 goto bail; 358 goto bail;
282 } 359 }
@@ -298,7 +375,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
298 dev->n_unicast_xmit++; 375 dev->n_unicast_xmit++;
299 lid = ah_attr->dlid & 376 lid = ah_attr->dlid &
300 ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); 377 ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
301 if (unlikely(lid == ipath_layer_get_lid(dev->dd))) { 378 if (unlikely(lid == dev->dd->ipath_lid)) {
302 /* 379 /*
303 * Pass in an uninitialized ib_wc to save stack 380 * Pass in an uninitialized ib_wc to save stack
304 * space. 381 * space.
@@ -327,7 +404,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
327 qp->s_hdr.u.l.grh.sgid.global.subnet_prefix = 404 qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
328 dev->gid_prefix; 405 dev->gid_prefix;
329 qp->s_hdr.u.l.grh.sgid.global.interface_id = 406 qp->s_hdr.u.l.grh.sgid.global.interface_id =
330 ipath_layer_get_guid(dev->dd); 407 dev->dd->ipath_guid;
331 qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid; 408 qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
332 /* 409 /*
333 * Don't worry about sending to locally attached multicast 410 * Don't worry about sending to locally attached multicast
@@ -357,7 +434,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
357 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); 434 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
358 qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ 435 qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
359 qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC); 436 qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
360 lid = ipath_layer_get_lid(dev->dd); 437 lid = dev->dd->ipath_lid;
361 if (lid) { 438 if (lid) {
362 lid |= ah_attr->src_path_bits & 439 lid |= ah_attr->src_path_bits &
363 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); 440 ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
@@ -368,7 +445,7 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
368 bth0 |= 1 << 23; 445 bth0 |= 1 << 23;
369 bth0 |= extra_bytes << 20; 446 bth0 |= extra_bytes << 20;
370 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : 447 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
371 ipath_layer_get_pkey(dev->dd, qp->s_pkey_index); 448 ipath_get_pkey(dev->dd, qp->s_pkey_index);
372 ohdr->bth[0] = cpu_to_be32(bth0); 449 ohdr->bth[0] = cpu_to_be32(bth0);
373 /* 450 /*
374 * Use the multicast QP if the destination LID is a multicast LID. 451 * Use the multicast QP if the destination LID is a multicast LID.
@@ -433,13 +510,9 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
433 int opcode; 510 int opcode;
434 u32 hdrsize; 511 u32 hdrsize;
435 u32 pad; 512 u32 pad;
436 unsigned long flags;
437 struct ib_wc wc; 513 struct ib_wc wc;
438 u32 qkey; 514 u32 qkey;
439 u32 src_qp; 515 u32 src_qp;
440 struct ipath_rq *rq;
441 struct ipath_srq *srq;
442 struct ipath_rwqe *wqe;
443 u16 dlid; 516 u16 dlid;
444 int header_in_data; 517 int header_in_data;
445 518
@@ -458,8 +531,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
458 * the eager header buffer size to 56 bytes so the last 12 531 * the eager header buffer size to 56 bytes so the last 12
459 * bytes of the IB header is in the data buffer. 532 * bytes of the IB header is in the data buffer.
460 */ 533 */
461 header_in_data = 534 header_in_data = dev->dd->ipath_rcvhdrentsize == 16;
462 ipath_layer_get_rcvhdrentsize(dev->dd) == 16;
463 if (header_in_data) { 535 if (header_in_data) {
464 qkey = be32_to_cpu(((__be32 *) data)[1]); 536 qkey = be32_to_cpu(((__be32 *) data)[1]);
465 src_qp = be32_to_cpu(((__be32 *) data)[2]); 537 src_qp = be32_to_cpu(((__be32 *) data)[2]);
@@ -547,19 +619,10 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
547 619
548 /* 620 /*
549 * Get the next work request entry to find where to put the data. 621 * Get the next work request entry to find where to put the data.
550 * Note that it is safe to drop the lock after changing rq->tail
551 * since ipath_post_receive() won't fill the empty slot.
552 */ 622 */
553 if (qp->ibqp.srq) { 623 if (qp->r_reuse_sge)
554 srq = to_isrq(qp->ibqp.srq); 624 qp->r_reuse_sge = 0;
555 rq = &srq->rq; 625 else if (!ipath_get_rwqe(qp, 0)) {
556 } else {
557 srq = NULL;
558 rq = &qp->r_rq;
559 }
560 spin_lock_irqsave(&rq->lock, flags);
561 if (rq->tail == rq->head) {
562 spin_unlock_irqrestore(&rq->lock, flags);
563 /* 626 /*
564 * Count VL15 packets dropped due to no receive buffer. 627 * Count VL15 packets dropped due to no receive buffer.
565 * Otherwise, count them as buffer overruns since usually, 628 * Otherwise, count them as buffer overruns since usually,
@@ -573,39 +636,11 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
573 goto bail; 636 goto bail;
574 } 637 }
575 /* Silently drop packets which are too big. */ 638 /* Silently drop packets which are too big. */
576 wqe = get_rwqe_ptr(rq, rq->tail); 639 if (wc.byte_len > qp->r_len) {
577 if (wc.byte_len > wqe->length) { 640 qp->r_reuse_sge = 1;
578 spin_unlock_irqrestore(&rq->lock, flags);
579 dev->n_pkt_drops++; 641 dev->n_pkt_drops++;
580 goto bail; 642 goto bail;
581 } 643 }
582 wc.wr_id = wqe->wr_id;
583 qp->r_sge.sge = wqe->sg_list[0];
584 qp->r_sge.sg_list = wqe->sg_list + 1;
585 qp->r_sge.num_sge = wqe->num_sge;
586 if (++rq->tail >= rq->size)
587 rq->tail = 0;
588 if (srq && srq->ibsrq.event_handler) {
589 u32 n;
590
591 if (rq->head < rq->tail)
592 n = rq->size + rq->head - rq->tail;
593 else
594 n = rq->head - rq->tail;
595 if (n < srq->limit) {
596 struct ib_event ev;
597
598 srq->limit = 0;
599 spin_unlock_irqrestore(&rq->lock, flags);
600 ev.device = qp->ibqp.device;
601 ev.element.srq = qp->ibqp.srq;
602 ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
603 srq->ibsrq.event_handler(&ev,
604 srq->ibsrq.srq_context);
605 } else
606 spin_unlock_irqrestore(&rq->lock, flags);
607 } else
608 spin_unlock_irqrestore(&rq->lock, flags);
609 if (has_grh) { 644 if (has_grh) {
610 ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh, 645 ipath_copy_sge(&qp->r_sge, &hdr->u.l.grh,
611 sizeof(struct ib_grh)); 646 sizeof(struct ib_grh));
@@ -614,6 +649,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
614 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); 649 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
615 ipath_copy_sge(&qp->r_sge, data, 650 ipath_copy_sge(&qp->r_sge, data,
616 wc.byte_len - sizeof(struct ib_grh)); 651 wc.byte_len - sizeof(struct ib_grh));
652 wc.wr_id = qp->r_wr_id;
617 wc.status = IB_WC_SUCCESS; 653 wc.status = IB_WC_SUCCESS;
618 wc.opcode = IB_WC_RECV; 654 wc.opcode = IB_WC_RECV;
619 wc.vendor_err = 0; 655 wc.vendor_err = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index d70a9b6b5239..b8381c5e72bd 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -33,15 +33,13 @@
33 33
34#include <rdma/ib_mad.h> 34#include <rdma/ib_mad.h>
35#include <rdma/ib_user_verbs.h> 35#include <rdma/ib_user_verbs.h>
36#include <linux/io.h>
36#include <linux/utsname.h> 37#include <linux/utsname.h>
37 38
38#include "ipath_kernel.h" 39#include "ipath_kernel.h"
39#include "ipath_verbs.h" 40#include "ipath_verbs.h"
40#include "ipath_common.h" 41#include "ipath_common.h"
41 42
42/* Not static, because we don't want the compiler removing it */
43const char ipath_verbs_version[] = "ipath_verbs " IPATH_IDSTR;
44
45static unsigned int ib_ipath_qp_table_size = 251; 43static unsigned int ib_ipath_qp_table_size = 251;
46module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); 44module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
47MODULE_PARM_DESC(qp_table_size, "QP table size"); 45MODULE_PARM_DESC(qp_table_size, "QP table size");
@@ -52,10 +50,6 @@ module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
52MODULE_PARM_DESC(lkey_table_size, 50MODULE_PARM_DESC(lkey_table_size,
53 "LKEY table size in bits (2^n, 1 <= n <= 23)"); 51 "LKEY table size in bits (2^n, 1 <= n <= 23)");
54 52
55unsigned int ib_ipath_debug; /* debug mask */
56module_param_named(debug, ib_ipath_debug, uint, S_IWUSR | S_IRUGO);
57MODULE_PARM_DESC(debug, "Verbs debug mask");
58
59static unsigned int ib_ipath_max_pds = 0xFFFF; 53static unsigned int ib_ipath_max_pds = 0xFFFF;
60module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); 54module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
61MODULE_PARM_DESC(max_pds, 55MODULE_PARM_DESC(max_pds,
@@ -79,6 +73,10 @@ module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
79 S_IWUSR | S_IRUGO); 73 S_IWUSR | S_IRUGO);
80MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); 74MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
81 75
76unsigned int ib_ipath_max_qps = 16384;
77module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
78MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
79
82unsigned int ib_ipath_max_sges = 0x60; 80unsigned int ib_ipath_max_sges = 0x60;
83module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); 81module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
84MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); 82MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
@@ -109,9 +107,9 @@ module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
109 uint, S_IWUSR | S_IRUGO); 107 uint, S_IWUSR | S_IRUGO);
110MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); 108MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
111 109
112MODULE_LICENSE("GPL"); 110static unsigned int ib_ipath_disable_sma;
113MODULE_AUTHOR("QLogic <support@pathscale.com>"); 111module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
114MODULE_DESCRIPTION("QLogic InfiniPath driver"); 112MODULE_PARM_DESC(ib_ipath_disable_sma, "Disable the SMA");
115 113
116const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { 114const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
117 [IB_QPS_RESET] = 0, 115 [IB_QPS_RESET] = 0,
@@ -125,6 +123,16 @@ const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
125 [IB_QPS_ERR] = 0, 123 [IB_QPS_ERR] = 0,
126}; 124};
127 125
126struct ipath_ucontext {
127 struct ib_ucontext ibucontext;
128};
129
130static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
131 *ibucontext)
132{
133 return container_of(ibucontext, struct ipath_ucontext, ibucontext);
134}
135
128/* 136/*
129 * Translate ib_wr_opcode into ib_wc_opcode. 137 * Translate ib_wr_opcode into ib_wc_opcode.
130 */ 138 */
@@ -277,11 +285,12 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
277 struct ib_recv_wr **bad_wr) 285 struct ib_recv_wr **bad_wr)
278{ 286{
279 struct ipath_qp *qp = to_iqp(ibqp); 287 struct ipath_qp *qp = to_iqp(ibqp);
288 struct ipath_rwq *wq = qp->r_rq.wq;
280 unsigned long flags; 289 unsigned long flags;
281 int ret; 290 int ret;
282 291
283 /* Check that state is OK to post receive. */ 292 /* Check that state is OK to post receive. */
284 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK)) { 293 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
285 *bad_wr = wr; 294 *bad_wr = wr;
286 ret = -EINVAL; 295 ret = -EINVAL;
287 goto bail; 296 goto bail;
@@ -290,59 +299,31 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
290 for (; wr; wr = wr->next) { 299 for (; wr; wr = wr->next) {
291 struct ipath_rwqe *wqe; 300 struct ipath_rwqe *wqe;
292 u32 next; 301 u32 next;
293 int i, j; 302 int i;
294 303
295 if (wr->num_sge > qp->r_rq.max_sge) { 304 if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
296 *bad_wr = wr; 305 *bad_wr = wr;
297 ret = -ENOMEM; 306 ret = -ENOMEM;
298 goto bail; 307 goto bail;
299 } 308 }
300 309
301 spin_lock_irqsave(&qp->r_rq.lock, flags); 310 spin_lock_irqsave(&qp->r_rq.lock, flags);
302 next = qp->r_rq.head + 1; 311 next = wq->head + 1;
303 if (next >= qp->r_rq.size) 312 if (next >= qp->r_rq.size)
304 next = 0; 313 next = 0;
305 if (next == qp->r_rq.tail) { 314 if (next == wq->tail) {
306 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 315 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
307 *bad_wr = wr; 316 *bad_wr = wr;
308 ret = -ENOMEM; 317 ret = -ENOMEM;
309 goto bail; 318 goto bail;
310 } 319 }
311 320
312 wqe = get_rwqe_ptr(&qp->r_rq, qp->r_rq.head); 321 wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
313 wqe->wr_id = wr->wr_id; 322 wqe->wr_id = wr->wr_id;
314 wqe->sg_list[0].mr = NULL; 323 wqe->num_sge = wr->num_sge;
315 wqe->sg_list[0].vaddr = NULL; 324 for (i = 0; i < wr->num_sge; i++)
316 wqe->sg_list[0].length = 0; 325 wqe->sg_list[i] = wr->sg_list[i];
317 wqe->sg_list[0].sge_length = 0; 326 wq->head = next;
318 wqe->length = 0;
319 for (i = 0, j = 0; i < wr->num_sge; i++) {
320 /* Check LKEY */
321 if (to_ipd(qp->ibqp.pd)->user &&
322 wr->sg_list[i].lkey == 0) {
323 spin_unlock_irqrestore(&qp->r_rq.lock,
324 flags);
325 *bad_wr = wr;
326 ret = -EINVAL;
327 goto bail;
328 }
329 if (wr->sg_list[i].length == 0)
330 continue;
331 if (!ipath_lkey_ok(
332 &to_idev(qp->ibqp.device)->lk_table,
333 &wqe->sg_list[j], &wr->sg_list[i],
334 IB_ACCESS_LOCAL_WRITE)) {
335 spin_unlock_irqrestore(&qp->r_rq.lock,
336 flags);
337 *bad_wr = wr;
338 ret = -EINVAL;
339 goto bail;
340 }
341 wqe->length += wr->sg_list[i].length;
342 j++;
343 }
344 wqe->num_sge = j;
345 qp->r_rq.head = next;
346 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 327 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
347 } 328 }
348 ret = 0; 329 ret = 0;
@@ -377,6 +358,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
377 switch (qp->ibqp.qp_type) { 358 switch (qp->ibqp.qp_type) {
378 case IB_QPT_SMI: 359 case IB_QPT_SMI:
379 case IB_QPT_GSI: 360 case IB_QPT_GSI:
361 if (ib_ipath_disable_sma)
362 break;
363 /* FALLTHROUGH */
380 case IB_QPT_UD: 364 case IB_QPT_UD:
381 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); 365 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
382 break; 366 break;
@@ -395,7 +379,7 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
395} 379}
396 380
397/** 381/**
398 * ipath_ib_rcv - process and incoming packet 382 * ipath_ib_rcv - process an incoming packet
399 * @arg: the device pointer 383 * @arg: the device pointer
400 * @rhdr: the header of the packet 384 * @rhdr: the header of the packet
401 * @data: the packet data 385 * @data: the packet data
@@ -404,9 +388,9 @@ static void ipath_qp_rcv(struct ipath_ibdev *dev,
404 * This is called from ipath_kreceive() to process an incoming packet at 388 * This is called from ipath_kreceive() to process an incoming packet at
405 * interrupt level. Tlen is the length of the header + data + CRC in bytes. 389 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
406 */ 390 */
407static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen) 391void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
392 u32 tlen)
408{ 393{
409 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
410 struct ipath_ib_header *hdr = rhdr; 394 struct ipath_ib_header *hdr = rhdr;
411 struct ipath_other_headers *ohdr; 395 struct ipath_other_headers *ohdr;
412 struct ipath_qp *qp; 396 struct ipath_qp *qp;
@@ -427,7 +411,7 @@ static void ipath_ib_rcv(void *arg, void *rhdr, void *data, u32 tlen)
427 lid = be16_to_cpu(hdr->lrh[1]); 411 lid = be16_to_cpu(hdr->lrh[1]);
428 if (lid < IPATH_MULTICAST_LID_BASE) { 412 if (lid < IPATH_MULTICAST_LID_BASE) {
429 lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1); 413 lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
430 if (unlikely(lid != ipath_layer_get_lid(dev->dd))) { 414 if (unlikely(lid != dev->dd->ipath_lid)) {
431 dev->rcv_errors++; 415 dev->rcv_errors++;
432 goto bail; 416 goto bail;
433 } 417 }
@@ -495,9 +479,8 @@ bail:;
495 * This is called from ipath_do_rcv_timer() at interrupt level to check for 479 * This is called from ipath_do_rcv_timer() at interrupt level to check for
496 * QPs which need retransmits and to collect performance numbers. 480 * QPs which need retransmits and to collect performance numbers.
497 */ 481 */
498static void ipath_ib_timer(void *arg) 482void ipath_ib_timer(struct ipath_ibdev *dev)
499{ 483{
500 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
501 struct ipath_qp *resend = NULL; 484 struct ipath_qp *resend = NULL;
502 struct list_head *last; 485 struct list_head *last;
503 struct ipath_qp *qp; 486 struct ipath_qp *qp;
@@ -539,19 +522,19 @@ static void ipath_ib_timer(void *arg)
539 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && 522 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
540 --dev->pma_sample_start == 0) { 523 --dev->pma_sample_start == 0) {
541 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; 524 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
542 ipath_layer_snapshot_counters(dev->dd, &dev->ipath_sword, 525 ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
543 &dev->ipath_rword, 526 &dev->ipath_rword,
544 &dev->ipath_spkts, 527 &dev->ipath_spkts,
545 &dev->ipath_rpkts, 528 &dev->ipath_rpkts,
546 &dev->ipath_xmit_wait); 529 &dev->ipath_xmit_wait);
547 } 530 }
548 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { 531 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
549 if (dev->pma_sample_interval == 0) { 532 if (dev->pma_sample_interval == 0) {
550 u64 ta, tb, tc, td, te; 533 u64 ta, tb, tc, td, te;
551 534
552 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; 535 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
553 ipath_layer_snapshot_counters(dev->dd, &ta, &tb, 536 ipath_snapshot_counters(dev->dd, &ta, &tb,
554 &tc, &td, &te); 537 &tc, &td, &te);
555 538
556 dev->ipath_sword = ta - dev->ipath_sword; 539 dev->ipath_sword = ta - dev->ipath_sword;
557 dev->ipath_rword = tb - dev->ipath_rword; 540 dev->ipath_rword = tb - dev->ipath_rword;
@@ -581,6 +564,362 @@ static void ipath_ib_timer(void *arg)
581 } 564 }
582} 565}
583 566
567static void update_sge(struct ipath_sge_state *ss, u32 length)
568{
569 struct ipath_sge *sge = &ss->sge;
570
571 sge->vaddr += length;
572 sge->length -= length;
573 sge->sge_length -= length;
574 if (sge->sge_length == 0) {
575 if (--ss->num_sge)
576 *sge = *ss->sg_list++;
577 } else if (sge->length == 0 && sge->mr != NULL) {
578 if (++sge->n >= IPATH_SEGSZ) {
579 if (++sge->m >= sge->mr->mapsz)
580 return;
581 sge->n = 0;
582 }
583 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
584 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
585 }
586}
587
588#ifdef __LITTLE_ENDIAN
589static inline u32 get_upper_bits(u32 data, u32 shift)
590{
591 return data >> shift;
592}
593
594static inline u32 set_upper_bits(u32 data, u32 shift)
595{
596 return data << shift;
597}
598
599static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
600{
601 data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
602 data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
603 return data;
604}
605#else
606static inline u32 get_upper_bits(u32 data, u32 shift)
607{
608 return data << shift;
609}
610
611static inline u32 set_upper_bits(u32 data, u32 shift)
612{
613 return data >> shift;
614}
615
616static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
617{
618 data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
619 data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
620 return data;
621}
622#endif
623
624static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
625 u32 length)
626{
627 u32 extra = 0;
628 u32 data = 0;
629 u32 last;
630
631 while (1) {
632 u32 len = ss->sge.length;
633 u32 off;
634
635 BUG_ON(len == 0);
636 if (len > length)
637 len = length;
638 if (len > ss->sge.sge_length)
639 len = ss->sge.sge_length;
640 /* If the source address is not aligned, try to align it. */
641 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
642 if (off) {
643 u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
644 ~(sizeof(u32) - 1));
645 u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
646 u32 y;
647
648 y = sizeof(u32) - off;
649 if (len > y)
650 len = y;
651 if (len + extra >= sizeof(u32)) {
652 data |= set_upper_bits(v, extra *
653 BITS_PER_BYTE);
654 len = sizeof(u32) - extra;
655 if (len == length) {
656 last = data;
657 break;
658 }
659 __raw_writel(data, piobuf);
660 piobuf++;
661 extra = 0;
662 data = 0;
663 } else {
664 /* Clear unused upper bytes */
665 data |= clear_upper_bytes(v, len, extra);
666 if (len == length) {
667 last = data;
668 break;
669 }
670 extra += len;
671 }
672 } else if (extra) {
673 /* Source address is aligned. */
674 u32 *addr = (u32 *) ss->sge.vaddr;
675 int shift = extra * BITS_PER_BYTE;
676 int ushift = 32 - shift;
677 u32 l = len;
678
679 while (l >= sizeof(u32)) {
680 u32 v = *addr;
681
682 data |= set_upper_bits(v, shift);
683 __raw_writel(data, piobuf);
684 data = get_upper_bits(v, ushift);
685 piobuf++;
686 addr++;
687 l -= sizeof(u32);
688 }
689 /*
690 * We still have 'extra' number of bytes leftover.
691 */
692 if (l) {
693 u32 v = *addr;
694
695 if (l + extra >= sizeof(u32)) {
696 data |= set_upper_bits(v, shift);
697 len -= l + extra - sizeof(u32);
698 if (len == length) {
699 last = data;
700 break;
701 }
702 __raw_writel(data, piobuf);
703 piobuf++;
704 extra = 0;
705 data = 0;
706 } else {
707 /* Clear unused upper bytes */
708 data |= clear_upper_bytes(v, l,
709 extra);
710 if (len == length) {
711 last = data;
712 break;
713 }
714 extra += l;
715 }
716 } else if (len == length) {
717 last = data;
718 break;
719 }
720 } else if (len == length) {
721 u32 w;
722
723 /*
724 * Need to round up for the last dword in the
725 * packet.
726 */
727 w = (len + 3) >> 2;
728 __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
729 piobuf += w - 1;
730 last = ((u32 *) ss->sge.vaddr)[w - 1];
731 break;
732 } else {
733 u32 w = len >> 2;
734
735 __iowrite32_copy(piobuf, ss->sge.vaddr, w);
736 piobuf += w;
737
738 extra = len & (sizeof(u32) - 1);
739 if (extra) {
740 u32 v = ((u32 *) ss->sge.vaddr)[w];
741
742 /* Clear unused upper bytes */
743 data = clear_upper_bytes(v, extra, 0);
744 }
745 }
746 update_sge(ss, len);
747 length -= len;
748 }
749 /* Update address before sending packet. */
750 update_sge(ss, length);
751 /* must flush early everything before trigger word */
752 ipath_flush_wc();
753 __raw_writel(last, piobuf);
754 /* be sure trigger word is written */
755 ipath_flush_wc();
756}
757
758/**
759 * ipath_verbs_send - send a packet
760 * @dd: the infinipath device
761 * @hdrwords: the number of words in the header
762 * @hdr: the packet header
763 * @len: the length of the packet in bytes
764 * @ss: the SGE to send
765 */
766int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
767 u32 *hdr, u32 len, struct ipath_sge_state *ss)
768{
769 u32 __iomem *piobuf;
770 u32 plen;
771 int ret;
772
773 /* +1 is for the qword padding of pbc */
774 plen = hdrwords + ((len + 3) >> 2) + 1;
775 if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
776 ipath_dbg("packet len 0x%x too long, failing\n", plen);
777 ret = -EINVAL;
778 goto bail;
779 }
780
781 /* Get a PIO buffer to use. */
782 piobuf = ipath_getpiobuf(dd, NULL);
783 if (unlikely(piobuf == NULL)) {
784 ret = -EBUSY;
785 goto bail;
786 }
787
788 /*
789 * Write len to control qword, no flags.
790 * We have to flush after the PBC for correctness on some cpus
791 * or WC buffer can be written out of order.
792 */
793 writeq(plen, piobuf);
794 ipath_flush_wc();
795 piobuf += 2;
796 if (len == 0) {
797 /*
798 * If there is just the header portion, must flush before
799 * writing last word of header for correctness, and after
800 * the last header word (trigger word).
801 */
802 __iowrite32_copy(piobuf, hdr, hdrwords - 1);
803 ipath_flush_wc();
804 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
805 ipath_flush_wc();
806 ret = 0;
807 goto bail;
808 }
809
810 __iowrite32_copy(piobuf, hdr, hdrwords);
811 piobuf += hdrwords;
812
813 /* The common case is aligned and contained in one segment. */
814 if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
815 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
816 u32 w;
817 u32 *addr = (u32 *) ss->sge.vaddr;
818
819 /* Update address before sending packet. */
820 update_sge(ss, len);
821 /* Need to round up for the last dword in the packet. */
822 w = (len + 3) >> 2;
823 __iowrite32_copy(piobuf, addr, w - 1);
824 /* must flush early everything before trigger word */
825 ipath_flush_wc();
826 __raw_writel(addr[w - 1], piobuf + w - 1);
827 /* be sure trigger word is written */
828 ipath_flush_wc();
829 ret = 0;
830 goto bail;
831 }
832 copy_io(piobuf, ss, len);
833 ret = 0;
834
835bail:
836 return ret;
837}
838
839int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
840 u64 *rwords, u64 *spkts, u64 *rpkts,
841 u64 *xmit_wait)
842{
843 int ret;
844
845 if (!(dd->ipath_flags & IPATH_INITTED)) {
846 /* no hardware, freeze, etc. */
847 ipath_dbg("unit %u not usable\n", dd->ipath_unit);
848 ret = -EINVAL;
849 goto bail;
850 }
851 *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
852 *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
853 *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
854 *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
855 *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
856
857 ret = 0;
858
859bail:
860 return ret;
861}
862
863/**
864 * ipath_get_counters - get various chip counters
865 * @dd: the infinipath device
866 * @cntrs: counters are placed here
867 *
868 * Return the counters needed by recv_pma_get_portcounters().
869 */
870int ipath_get_counters(struct ipath_devdata *dd,
871 struct ipath_verbs_counters *cntrs)
872{
873 int ret;
874
875 if (!(dd->ipath_flags & IPATH_INITTED)) {
876 /* no hardware, freeze, etc. */
877 ipath_dbg("unit %u not usable\n", dd->ipath_unit);
878 ret = -EINVAL;
879 goto bail;
880 }
881 cntrs->symbol_error_counter =
882 ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
883 cntrs->link_error_recovery_counter =
884 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
885 /*
886 * The link downed counter counts when the other side downs the
887 * connection. We add in the number of times we downed the link
888 * due to local link integrity errors to compensate.
889 */
890 cntrs->link_downed_counter =
891 ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
892 cntrs->port_rcv_errors =
893 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
894 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
895 ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
896 ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
897 ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
898 ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
899 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
900 ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
901 ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
902 cntrs->port_rcv_remphys_errors =
903 ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
904 cntrs->port_xmit_discards =
905 ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
906 cntrs->port_xmit_data =
907 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
908 cntrs->port_rcv_data =
909 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
910 cntrs->port_xmit_packets =
911 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
912 cntrs->port_rcv_packets =
913 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
914 cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
915 cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
916
917 ret = 0;
918
919bail:
920 return ret;
921}
922
584/** 923/**
585 * ipath_ib_piobufavail - callback when a PIO buffer is available 924 * ipath_ib_piobufavail - callback when a PIO buffer is available
586 * @arg: the device pointer 925 * @arg: the device pointer
@@ -591,9 +930,8 @@ static void ipath_ib_timer(void *arg)
591 * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and 930 * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
592 * return zero). 931 * return zero).
593 */ 932 */
594static int ipath_ib_piobufavail(void *arg) 933int ipath_ib_piobufavail(struct ipath_ibdev *dev)
595{ 934{
596 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
597 struct ipath_qp *qp; 935 struct ipath_qp *qp;
598 unsigned long flags; 936 unsigned long flags;
599 937
@@ -624,14 +962,14 @@ static int ipath_query_device(struct ib_device *ibdev,
624 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | 962 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
625 IB_DEVICE_SYS_IMAGE_GUID; 963 IB_DEVICE_SYS_IMAGE_GUID;
626 props->page_size_cap = PAGE_SIZE; 964 props->page_size_cap = PAGE_SIZE;
627 props->vendor_id = ipath_layer_get_vendorid(dev->dd); 965 props->vendor_id = dev->dd->ipath_vendorid;
628 props->vendor_part_id = ipath_layer_get_deviceid(dev->dd); 966 props->vendor_part_id = dev->dd->ipath_deviceid;
629 props->hw_ver = ipath_layer_get_pcirev(dev->dd); 967 props->hw_ver = dev->dd->ipath_pcirev;
630 968
631 props->sys_image_guid = dev->sys_image_guid; 969 props->sys_image_guid = dev->sys_image_guid;
632 970
633 props->max_mr_size = ~0ull; 971 props->max_mr_size = ~0ull;
634 props->max_qp = dev->qp_table.max; 972 props->max_qp = ib_ipath_max_qps;
635 props->max_qp_wr = ib_ipath_max_qp_wrs; 973 props->max_qp_wr = ib_ipath_max_qp_wrs;
636 props->max_sge = ib_ipath_max_sges; 974 props->max_sge = ib_ipath_max_sges;
637 props->max_cq = ib_ipath_max_cqs; 975 props->max_cq = ib_ipath_max_cqs;
@@ -647,7 +985,7 @@ static int ipath_query_device(struct ib_device *ibdev,
647 props->max_srq_sge = ib_ipath_max_srq_sges; 985 props->max_srq_sge = ib_ipath_max_srq_sges;
648 /* props->local_ca_ack_delay */ 986 /* props->local_ca_ack_delay */
649 props->atomic_cap = IB_ATOMIC_HCA; 987 props->atomic_cap = IB_ATOMIC_HCA;
650 props->max_pkeys = ipath_layer_get_npkeys(dev->dd); 988 props->max_pkeys = ipath_get_npkeys(dev->dd);
651 props->max_mcast_grp = ib_ipath_max_mcast_grps; 989 props->max_mcast_grp = ib_ipath_max_mcast_grps;
652 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; 990 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
653 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 991 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
@@ -672,12 +1010,17 @@ const u8 ipath_cvt_physportstate[16] = {
672 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6, 1010 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 6,
673}; 1011};
674 1012
1013u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
1014{
1015 return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
1016}
1017
675static int ipath_query_port(struct ib_device *ibdev, 1018static int ipath_query_port(struct ib_device *ibdev,
676 u8 port, struct ib_port_attr *props) 1019 u8 port, struct ib_port_attr *props)
677{ 1020{
678 struct ipath_ibdev *dev = to_idev(ibdev); 1021 struct ipath_ibdev *dev = to_idev(ibdev);
679 enum ib_mtu mtu; 1022 enum ib_mtu mtu;
680 u16 lid = ipath_layer_get_lid(dev->dd); 1023 u16 lid = dev->dd->ipath_lid;
681 u64 ibcstat; 1024 u64 ibcstat;
682 1025
683 memset(props, 0, sizeof(*props)); 1026 memset(props, 0, sizeof(*props));
@@ -685,16 +1028,16 @@ static int ipath_query_port(struct ib_device *ibdev,
685 props->lmc = dev->mkeyprot_resv_lmc & 7; 1028 props->lmc = dev->mkeyprot_resv_lmc & 7;
686 props->sm_lid = dev->sm_lid; 1029 props->sm_lid = dev->sm_lid;
687 props->sm_sl = dev->sm_sl; 1030 props->sm_sl = dev->sm_sl;
688 ibcstat = ipath_layer_get_lastibcstat(dev->dd); 1031 ibcstat = dev->dd->ipath_lastibcstat;
689 props->state = ((ibcstat >> 4) & 0x3) + 1; 1032 props->state = ((ibcstat >> 4) & 0x3) + 1;
690 /* See phys_state_show() */ 1033 /* See phys_state_show() */
691 props->phys_state = ipath_cvt_physportstate[ 1034 props->phys_state = ipath_cvt_physportstate[
692 ipath_layer_get_lastibcstat(dev->dd) & 0xf]; 1035 dev->dd->ipath_lastibcstat & 0xf];
693 props->port_cap_flags = dev->port_cap_flags; 1036 props->port_cap_flags = dev->port_cap_flags;
694 props->gid_tbl_len = 1; 1037 props->gid_tbl_len = 1;
695 props->max_msg_sz = 0x80000000; 1038 props->max_msg_sz = 0x80000000;
696 props->pkey_tbl_len = ipath_layer_get_npkeys(dev->dd); 1039 props->pkey_tbl_len = ipath_get_npkeys(dev->dd);
697 props->bad_pkey_cntr = ipath_layer_get_cr_errpkey(dev->dd) - 1040 props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) -
698 dev->z_pkey_violations; 1041 dev->z_pkey_violations;
699 props->qkey_viol_cntr = dev->qkey_violations; 1042 props->qkey_viol_cntr = dev->qkey_violations;
700 props->active_width = IB_WIDTH_4X; 1043 props->active_width = IB_WIDTH_4X;
@@ -704,7 +1047,7 @@ static int ipath_query_port(struct ib_device *ibdev,
704 props->init_type_reply = 0; 1047 props->init_type_reply = 0;
705 1048
706 props->max_mtu = IB_MTU_4096; 1049 props->max_mtu = IB_MTU_4096;
707 switch (ipath_layer_get_ibmtu(dev->dd)) { 1050 switch (dev->dd->ipath_ibmtu) {
708 case 4096: 1051 case 4096:
709 mtu = IB_MTU_4096; 1052 mtu = IB_MTU_4096;
710 break; 1053 break;
@@ -763,7 +1106,7 @@ static int ipath_modify_port(struct ib_device *ibdev,
763 dev->port_cap_flags |= props->set_port_cap_mask; 1106 dev->port_cap_flags |= props->set_port_cap_mask;
764 dev->port_cap_flags &= ~props->clr_port_cap_mask; 1107 dev->port_cap_flags &= ~props->clr_port_cap_mask;
765 if (port_modify_mask & IB_PORT_SHUTDOWN) 1108 if (port_modify_mask & IB_PORT_SHUTDOWN)
766 ipath_layer_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); 1109 ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
767 if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) 1110 if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
768 dev->qkey_violations = 0; 1111 dev->qkey_violations = 0;
769 return 0; 1112 return 0;
@@ -780,7 +1123,7 @@ static int ipath_query_gid(struct ib_device *ibdev, u8 port,
780 goto bail; 1123 goto bail;
781 } 1124 }
782 gid->global.subnet_prefix = dev->gid_prefix; 1125 gid->global.subnet_prefix = dev->gid_prefix;
783 gid->global.interface_id = ipath_layer_get_guid(dev->dd); 1126 gid->global.interface_id = dev->dd->ipath_guid;
784 1127
785 ret = 0; 1128 ret = 0;
786 1129
@@ -803,18 +1146,22 @@ static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
803 * we allow allocations of more than we report for this value. 1146 * we allow allocations of more than we report for this value.
804 */ 1147 */
805 1148
806 if (dev->n_pds_allocated == ib_ipath_max_pds) { 1149 pd = kmalloc(sizeof *pd, GFP_KERNEL);
1150 if (!pd) {
807 ret = ERR_PTR(-ENOMEM); 1151 ret = ERR_PTR(-ENOMEM);
808 goto bail; 1152 goto bail;
809 } 1153 }
810 1154
811 pd = kmalloc(sizeof *pd, GFP_KERNEL); 1155 spin_lock(&dev->n_pds_lock);
812 if (!pd) { 1156 if (dev->n_pds_allocated == ib_ipath_max_pds) {
1157 spin_unlock(&dev->n_pds_lock);
1158 kfree(pd);
813 ret = ERR_PTR(-ENOMEM); 1159 ret = ERR_PTR(-ENOMEM);
814 goto bail; 1160 goto bail;
815 } 1161 }
816 1162
817 dev->n_pds_allocated++; 1163 dev->n_pds_allocated++;
1164 spin_unlock(&dev->n_pds_lock);
818 1165
819 /* ib_alloc_pd() will initialize pd->ibpd. */ 1166 /* ib_alloc_pd() will initialize pd->ibpd. */
820 pd->user = udata != NULL; 1167 pd->user = udata != NULL;
@@ -830,7 +1177,9 @@ static int ipath_dealloc_pd(struct ib_pd *ibpd)
830 struct ipath_pd *pd = to_ipd(ibpd); 1177 struct ipath_pd *pd = to_ipd(ibpd);
831 struct ipath_ibdev *dev = to_idev(ibpd->device); 1178 struct ipath_ibdev *dev = to_idev(ibpd->device);
832 1179
1180 spin_lock(&dev->n_pds_lock);
833 dev->n_pds_allocated--; 1181 dev->n_pds_allocated--;
1182 spin_unlock(&dev->n_pds_lock);
834 1183
835 kfree(pd); 1184 kfree(pd);
836 1185
@@ -851,11 +1200,6 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
851 struct ib_ah *ret; 1200 struct ib_ah *ret;
852 struct ipath_ibdev *dev = to_idev(pd->device); 1201 struct ipath_ibdev *dev = to_idev(pd->device);
853 1202
854 if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
855 ret = ERR_PTR(-ENOMEM);
856 goto bail;
857 }
858
859 /* A multicast address requires a GRH (see ch. 8.4.1). */ 1203 /* A multicast address requires a GRH (see ch. 8.4.1). */
860 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && 1204 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
861 ah_attr->dlid != IPATH_PERMISSIVE_LID && 1205 ah_attr->dlid != IPATH_PERMISSIVE_LID &&
@@ -881,7 +1225,16 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
881 goto bail; 1225 goto bail;
882 } 1226 }
883 1227
1228 spin_lock(&dev->n_ahs_lock);
1229 if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
1230 spin_unlock(&dev->n_ahs_lock);
1231 kfree(ah);
1232 ret = ERR_PTR(-ENOMEM);
1233 goto bail;
1234 }
1235
884 dev->n_ahs_allocated++; 1236 dev->n_ahs_allocated++;
1237 spin_unlock(&dev->n_ahs_lock);
885 1238
886 /* ib_create_ah() will initialize ah->ibah. */ 1239 /* ib_create_ah() will initialize ah->ibah. */
887 ah->attr = *ah_attr; 1240 ah->attr = *ah_attr;
@@ -903,7 +1256,9 @@ static int ipath_destroy_ah(struct ib_ah *ibah)
903 struct ipath_ibdev *dev = to_idev(ibah->device); 1256 struct ipath_ibdev *dev = to_idev(ibah->device);
904 struct ipath_ah *ah = to_iah(ibah); 1257 struct ipath_ah *ah = to_iah(ibah);
905 1258
1259 spin_lock(&dev->n_ahs_lock);
906 dev->n_ahs_allocated--; 1260 dev->n_ahs_allocated--;
1261 spin_unlock(&dev->n_ahs_lock);
907 1262
908 kfree(ah); 1263 kfree(ah);
909 1264
@@ -919,25 +1274,50 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
919 return 0; 1274 return 0;
920} 1275}
921 1276
1277/**
1278 * ipath_get_npkeys - return the size of the PKEY table for port 0
1279 * @dd: the infinipath device
1280 */
1281unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1282{
1283 return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1284}
1285
1286/**
1287 * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
1288 * @dd: the infinipath device
1289 * @index: the PKEY index
1290 */
1291unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1292{
1293 unsigned ret;
1294
1295 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1296 ret = 0;
1297 else
1298 ret = dd->ipath_pd[0]->port_pkeys[index];
1299
1300 return ret;
1301}
1302
922static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 1303static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
923 u16 *pkey) 1304 u16 *pkey)
924{ 1305{
925 struct ipath_ibdev *dev = to_idev(ibdev); 1306 struct ipath_ibdev *dev = to_idev(ibdev);
926 int ret; 1307 int ret;
927 1308
928 if (index >= ipath_layer_get_npkeys(dev->dd)) { 1309 if (index >= ipath_get_npkeys(dev->dd)) {
929 ret = -EINVAL; 1310 ret = -EINVAL;
930 goto bail; 1311 goto bail;
931 } 1312 }
932 1313
933 *pkey = ipath_layer_get_pkey(dev->dd, index); 1314 *pkey = ipath_get_pkey(dev->dd, index);
934 ret = 0; 1315 ret = 0;
935 1316
936bail: 1317bail:
937 return ret; 1318 return ret;
938} 1319}
939 1320
940
941/** 1321/**
942 * ipath_alloc_ucontext - allocate a ucontest 1322 * ipath_alloc_ucontext - allocate a ucontest
943 * @ibdev: the infiniband device 1323 * @ibdev: the infiniband device
@@ -970,26 +1350,91 @@ static int ipath_dealloc_ucontext(struct ib_ucontext *context)
970 1350
971static int ipath_verbs_register_sysfs(struct ib_device *dev); 1351static int ipath_verbs_register_sysfs(struct ib_device *dev);
972 1352
1353static void __verbs_timer(unsigned long arg)
1354{
1355 struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1356
1357 /*
1358 * If port 0 receive packet interrupts are not available, or
1359 * can be missed, poll the receive queue
1360 */
1361 if (dd->ipath_flags & IPATH_POLL_RX_INTR)
1362 ipath_kreceive(dd);
1363
1364 /* Handle verbs layer timeouts. */
1365 ipath_ib_timer(dd->verbs_dev);
1366
1367 mod_timer(&dd->verbs_timer, jiffies + 1);
1368}
1369
1370static int enable_timer(struct ipath_devdata *dd)
1371{
1372 /*
1373 * Early chips had a design flaw where the chip and kernel idea
1374 * of the tail register don't always agree, and therefore we won't
1375 * get an interrupt on the next packet received.
1376 * If the board supports per packet receive interrupts, use it.
1377 * Otherwise, the timer function periodically checks for packets
1378 * to cover this case.
1379 * Either way, the timer is needed for verbs layer related
1380 * processing.
1381 */
1382 if (dd->ipath_flags & IPATH_GPIO_INTR) {
1383 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1384 0x2074076542310ULL);
1385 /* Enable GPIO bit 2 interrupt */
1386 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1387 (u64) (1 << 2));
1388 }
1389
1390 init_timer(&dd->verbs_timer);
1391 dd->verbs_timer.function = __verbs_timer;
1392 dd->verbs_timer.data = (unsigned long)dd;
1393 dd->verbs_timer.expires = jiffies + 1;
1394 add_timer(&dd->verbs_timer);
1395
1396 return 0;
1397}
1398
1399static int disable_timer(struct ipath_devdata *dd)
1400{
1401 /* Disable GPIO bit 2 interrupt */
1402 if (dd->ipath_flags & IPATH_GPIO_INTR)
1403 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
1404
1405 del_timer_sync(&dd->verbs_timer);
1406
1407 return 0;
1408}
1409
973/** 1410/**
974 * ipath_register_ib_device - register our device with the infiniband core 1411 * ipath_register_ib_device - register our device with the infiniband core
975 * @unit: the device number to register
976 * @dd: the device data structure 1412 * @dd: the device data structure
977 * Return the allocated ipath_ibdev pointer or NULL on error. 1413 * Return the allocated ipath_ibdev pointer or NULL on error.
978 */ 1414 */
979static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd) 1415int ipath_register_ib_device(struct ipath_devdata *dd)
980{ 1416{
981 struct ipath_layer_counters cntrs; 1417 struct ipath_verbs_counters cntrs;
982 struct ipath_ibdev *idev; 1418 struct ipath_ibdev *idev;
983 struct ib_device *dev; 1419 struct ib_device *dev;
984 int ret; 1420 int ret;
985 1421
986 idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); 1422 idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
987 if (idev == NULL) 1423 if (idev == NULL) {
1424 ret = -ENOMEM;
988 goto bail; 1425 goto bail;
1426 }
989 1427
990 dev = &idev->ibdev; 1428 dev = &idev->ibdev;
991 1429
992 /* Only need to initialize non-zero fields. */ 1430 /* Only need to initialize non-zero fields. */
1431 spin_lock_init(&idev->n_pds_lock);
1432 spin_lock_init(&idev->n_ahs_lock);
1433 spin_lock_init(&idev->n_cqs_lock);
1434 spin_lock_init(&idev->n_qps_lock);
1435 spin_lock_init(&idev->n_srqs_lock);
1436 spin_lock_init(&idev->n_mcast_grps_lock);
1437
993 spin_lock_init(&idev->qp_table.lock); 1438 spin_lock_init(&idev->qp_table.lock);
994 spin_lock_init(&idev->lk_table.lock); 1439 spin_lock_init(&idev->lk_table.lock);
995 idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); 1440 idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE);
@@ -1030,7 +1475,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
1030 idev->link_width_enabled = 3; /* 1x or 4x */ 1475 idev->link_width_enabled = 3; /* 1x or 4x */
1031 1476
1032 /* Snapshot current HW counters to "clear" them. */ 1477 /* Snapshot current HW counters to "clear" them. */
1033 ipath_layer_get_counters(dd, &cntrs); 1478 ipath_get_counters(dd, &cntrs);
1034 idev->z_symbol_error_counter = cntrs.symbol_error_counter; 1479 idev->z_symbol_error_counter = cntrs.symbol_error_counter;
1035 idev->z_link_error_recovery_counter = 1480 idev->z_link_error_recovery_counter =
1036 cntrs.link_error_recovery_counter; 1481 cntrs.link_error_recovery_counter;
@@ -1054,14 +1499,14 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
1054 * device types in the system, we can't be sure this is unique. 1499 * device types in the system, we can't be sure this is unique.
1055 */ 1500 */
1056 if (!sys_image_guid) 1501 if (!sys_image_guid)
1057 sys_image_guid = ipath_layer_get_guid(dd); 1502 sys_image_guid = dd->ipath_guid;
1058 idev->sys_image_guid = sys_image_guid; 1503 idev->sys_image_guid = sys_image_guid;
1059 idev->ib_unit = unit; 1504 idev->ib_unit = dd->ipath_unit;
1060 idev->dd = dd; 1505 idev->dd = dd;
1061 1506
1062 strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); 1507 strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
1063 dev->owner = THIS_MODULE; 1508 dev->owner = THIS_MODULE;
1064 dev->node_guid = ipath_layer_get_guid(dd); 1509 dev->node_guid = dd->ipath_guid;
1065 dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; 1510 dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
1066 dev->uverbs_cmd_mask = 1511 dev->uverbs_cmd_mask =
1067 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 1512 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -1093,9 +1538,9 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
1093 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 1538 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
1094 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 1539 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
1095 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); 1540 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
1096 dev->node_type = IB_NODE_CA; 1541 dev->node_type = RDMA_NODE_IB_CA;
1097 dev->phys_port_cnt = 1; 1542 dev->phys_port_cnt = 1;
1098 dev->dma_device = ipath_layer_get_device(dd); 1543 dev->dma_device = &dd->pcidev->dev;
1099 dev->class_dev.dev = dev->dma_device; 1544 dev->class_dev.dev = dev->dma_device;
1100 dev->query_device = ipath_query_device; 1545 dev->query_device = ipath_query_device;
1101 dev->modify_device = ipath_modify_device; 1546 dev->modify_device = ipath_modify_device;
@@ -1137,9 +1582,10 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
1137 dev->attach_mcast = ipath_multicast_attach; 1582 dev->attach_mcast = ipath_multicast_attach;
1138 dev->detach_mcast = ipath_multicast_detach; 1583 dev->detach_mcast = ipath_multicast_detach;
1139 dev->process_mad = ipath_process_mad; 1584 dev->process_mad = ipath_process_mad;
1585 dev->mmap = ipath_mmap;
1140 1586
1141 snprintf(dev->node_desc, sizeof(dev->node_desc), 1587 snprintf(dev->node_desc, sizeof(dev->node_desc),
1142 IPATH_IDSTR " %s kernel_SMA", system_utsname.nodename); 1588 IPATH_IDSTR " %s", system_utsname.nodename);
1143 1589
1144 ret = ib_register_device(dev); 1590 ret = ib_register_device(dev);
1145 if (ret) 1591 if (ret)
@@ -1148,7 +1594,7 @@ static void *ipath_register_ib_device(int unit, struct ipath_devdata *dd)
1148 if (ipath_verbs_register_sysfs(dev)) 1594 if (ipath_verbs_register_sysfs(dev))
1149 goto err_class; 1595 goto err_class;
1150 1596
1151 ipath_layer_enable_timer(dd); 1597 enable_timer(dd);
1152 1598
1153 goto bail; 1599 goto bail;
1154 1600
@@ -1160,37 +1606,32 @@ err_lk:
1160 kfree(idev->qp_table.table); 1606 kfree(idev->qp_table.table);
1161err_qp: 1607err_qp:
1162 ib_dealloc_device(dev); 1608 ib_dealloc_device(dev);
1163 _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n", 1609 ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
1164 unit, -ret);
1165 idev = NULL; 1610 idev = NULL;
1166 1611
1167bail: 1612bail:
1168 return idev; 1613 dd->verbs_dev = idev;
1614 return ret;
1169} 1615}
1170 1616
1171static void ipath_unregister_ib_device(void *arg) 1617void ipath_unregister_ib_device(struct ipath_ibdev *dev)
1172{ 1618{
1173 struct ipath_ibdev *dev = (struct ipath_ibdev *) arg;
1174 struct ib_device *ibdev = &dev->ibdev; 1619 struct ib_device *ibdev = &dev->ibdev;
1175 1620
1176 ipath_layer_disable_timer(dev->dd); 1621 disable_timer(dev->dd);
1177 1622
1178 ib_unregister_device(ibdev); 1623 ib_unregister_device(ibdev);
1179 1624
1180 if (!list_empty(&dev->pending[0]) || 1625 if (!list_empty(&dev->pending[0]) ||
1181 !list_empty(&dev->pending[1]) || 1626 !list_empty(&dev->pending[1]) ||
1182 !list_empty(&dev->pending[2])) 1627 !list_empty(&dev->pending[2]))
1183 _VERBS_ERROR("ipath%d pending list not empty!\n", 1628 ipath_dev_err(dev->dd, "pending list not empty!\n");
1184 dev->ib_unit);
1185 if (!list_empty(&dev->piowait)) 1629 if (!list_empty(&dev->piowait))
1186 _VERBS_ERROR("ipath%d piowait list not empty!\n", 1630 ipath_dev_err(dev->dd, "piowait list not empty!\n");
1187 dev->ib_unit);
1188 if (!list_empty(&dev->rnrwait)) 1631 if (!list_empty(&dev->rnrwait))
1189 _VERBS_ERROR("ipath%d rnrwait list not empty!\n", 1632 ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
1190 dev->ib_unit);
1191 if (!ipath_mcast_tree_empty()) 1633 if (!ipath_mcast_tree_empty())
1192 _VERBS_ERROR("ipath%d multicast table memory leak!\n", 1634 ipath_dev_err(dev->dd, "multicast table memory leak!\n");
1193 dev->ib_unit);
1194 /* 1635 /*
1195 * Note that ipath_unregister_ib_device() can be called before all 1636 * Note that ipath_unregister_ib_device() can be called before all
1196 * the QPs are destroyed! 1637 * the QPs are destroyed!
@@ -1201,25 +1642,12 @@ static void ipath_unregister_ib_device(void *arg)
1201 ib_dealloc_device(ibdev); 1642 ib_dealloc_device(ibdev);
1202} 1643}
1203 1644
1204static int __init ipath_verbs_init(void)
1205{
1206 return ipath_verbs_register(ipath_register_ib_device,
1207 ipath_unregister_ib_device,
1208 ipath_ib_piobufavail, ipath_ib_rcv,
1209 ipath_ib_timer);
1210}
1211
1212static void __exit ipath_verbs_cleanup(void)
1213{
1214 ipath_verbs_unregister();
1215}
1216
1217static ssize_t show_rev(struct class_device *cdev, char *buf) 1645static ssize_t show_rev(struct class_device *cdev, char *buf)
1218{ 1646{
1219 struct ipath_ibdev *dev = 1647 struct ipath_ibdev *dev =
1220 container_of(cdev, struct ipath_ibdev, ibdev.class_dev); 1648 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1221 1649
1222 return sprintf(buf, "%x\n", ipath_layer_get_pcirev(dev->dd)); 1650 return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
1223} 1651}
1224 1652
1225static ssize_t show_hca(struct class_device *cdev, char *buf) 1653static ssize_t show_hca(struct class_device *cdev, char *buf)
@@ -1228,7 +1656,7 @@ static ssize_t show_hca(struct class_device *cdev, char *buf)
1228 container_of(cdev, struct ipath_ibdev, ibdev.class_dev); 1656 container_of(cdev, struct ipath_ibdev, ibdev.class_dev);
1229 int ret; 1657 int ret;
1230 1658
1231 ret = ipath_layer_get_boardname(dev->dd, buf, 128); 1659 ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
1232 if (ret < 0) 1660 if (ret < 0)
1233 goto bail; 1661 goto bail;
1234 strcat(buf, "\n"); 1662 strcat(buf, "\n");
@@ -1305,6 +1733,3 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev)
1305bail: 1733bail:
1306 return ret; 1734 return ret;
1307} 1735}
1308
1309module_init(ipath_verbs_init);
1310module_exit(ipath_verbs_cleanup);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 2df684727dc1..09bbb3f9a217 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -38,10 +38,10 @@
38#include <linux/spinlock.h> 38#include <linux/spinlock.h>
39#include <linux/kernel.h> 39#include <linux/kernel.h>
40#include <linux/interrupt.h> 40#include <linux/interrupt.h>
41#include <linux/kref.h>
41#include <rdma/ib_pack.h> 42#include <rdma/ib_pack.h>
42 43
43#include "ipath_layer.h" 44#include "ipath_layer.h"
44#include "verbs_debug.h"
45 45
46#define QPN_MAX (1 << 24) 46#define QPN_MAX (1 << 24)
47#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) 47#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
@@ -50,7 +50,7 @@
50 * Increment this value if any changes that break userspace ABI 50 * Increment this value if any changes that break userspace ABI
51 * compatibility are made. 51 * compatibility are made.
52 */ 52 */
53#define IPATH_UVERBS_ABI_VERSION 1 53#define IPATH_UVERBS_ABI_VERSION 2
54 54
55/* 55/*
56 * Define an ib_cq_notify value that is not valid so we know when CQ 56 * Define an ib_cq_notify value that is not valid so we know when CQ
@@ -152,19 +152,6 @@ struct ipath_mcast {
152 int n_attached; 152 int n_attached;
153}; 153};
154 154
155/* Memory region */
156struct ipath_mr {
157 struct ib_mr ibmr;
158 struct ipath_mregion mr; /* must be last */
159};
160
161/* Fast memory region */
162struct ipath_fmr {
163 struct ib_fmr ibfmr;
164 u8 page_shift;
165 struct ipath_mregion mr; /* must be last */
166};
167
168/* Protection domain */ 155/* Protection domain */
169struct ipath_pd { 156struct ipath_pd {
170 struct ib_pd ibpd; 157 struct ib_pd ibpd;
@@ -178,58 +165,89 @@ struct ipath_ah {
178}; 165};
179 166
180/* 167/*
181 * Quick description of our CQ/QP locking scheme: 168 * This structure is used by ipath_mmap() to validate an offset
182 * 169 * when an mmap() request is made. The vm_area_struct then uses
183 * We have one global lock that protects dev->cq/qp_table. Each 170 * this as its vm_private_data.
184 * struct ipath_cq/qp also has its own lock. An individual qp lock 171 */
185 * may be taken inside of an individual cq lock. Both cqs attached to 172struct ipath_mmap_info {
186 * a qp may be locked, with the send cq locked first. No other 173 struct ipath_mmap_info *next;
187 * nesting should be done. 174 struct ib_ucontext *context;
188 * 175 void *obj;
189 * Each struct ipath_cq/qp also has an atomic_t ref count. The 176 struct kref ref;
190 * pointer from the cq/qp_table to the struct counts as one reference. 177 unsigned size;
191 * This reference also is good for access through the consumer API, so 178 unsigned mmap_cnt;
192 * modifying the CQ/QP etc doesn't need to take another reference. 179};
193 * Access because of a completion being polled does need a reference. 180
194 * 181/*
195 * Finally, each struct ipath_cq/qp has a wait_queue_head_t for the 182 * This structure is used to contain the head pointer, tail pointer,
196 * destroy function to sleep on. 183 * and completion queue entries as a single memory allocation so
197 * 184 * it can be mmap'ed into user space.
198 * This means that access from the consumer API requires nothing but
199 * taking the struct's lock.
200 *
201 * Access because of a completion event should go as follows:
202 * - lock cq/qp_table and look up struct
203 * - increment ref count in struct
204 * - drop cq/qp_table lock
205 * - lock struct, do your thing, and unlock struct
206 * - decrement ref count; if zero, wake up waiters
207 *
208 * To destroy a CQ/QP, we can do the following:
209 * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock
210 * - decrement ref count
211 * - wait_event until ref count is zero
212 *
213 * It is the consumer's responsibilty to make sure that no QP
214 * operations (WQE posting or state modification) are pending when the
215 * QP is destroyed. Also, the consumer must make sure that calls to
216 * qp_modify are serialized.
217 *
218 * Possible optimizations (wait for profile data to see if/where we
219 * have locks bouncing between CPUs):
220 * - split cq/qp table lock into n separate (cache-aligned) locks,
221 * indexed (say) by the page in the table
222 */ 185 */
186struct ipath_cq_wc {
187 u32 head; /* index of next entry to fill */
188 u32 tail; /* index of next ib_poll_cq() entry */
189 struct ib_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
190};
223 191
192/*
193 * The completion queue structure.
194 */
224struct ipath_cq { 195struct ipath_cq {
225 struct ib_cq ibcq; 196 struct ib_cq ibcq;
226 struct tasklet_struct comptask; 197 struct tasklet_struct comptask;
227 spinlock_t lock; 198 spinlock_t lock;
228 u8 notify; 199 u8 notify;
229 u8 triggered; 200 u8 triggered;
230 u32 head; /* new records added to the head */ 201 struct ipath_cq_wc *queue;
231 u32 tail; /* poll_cq() reads from here. */ 202 struct ipath_mmap_info *ip;
232 struct ib_wc *queue; /* this is actually ibcq.cqe + 1 */ 203};
204
205/*
206 * A segment is a linear region of low physical memory.
207 * XXX Maybe we should use phys addr here and kmap()/kunmap().
208 * Used by the verbs layer.
209 */
210struct ipath_seg {
211 void *vaddr;
212 size_t length;
213};
214
215/* The number of ipath_segs that fit in a page. */
216#define IPATH_SEGSZ (PAGE_SIZE / sizeof (struct ipath_seg))
217
218struct ipath_segarray {
219 struct ipath_seg segs[IPATH_SEGSZ];
220};
221
222struct ipath_mregion {
223 u64 user_base; /* User's address for this region */
224 u64 iova; /* IB start address of this region */
225 size_t length;
226 u32 lkey;
227 u32 offset; /* offset (bytes) to start of region */
228 int access_flags;
229 u32 max_segs; /* number of ipath_segs in all the arrays */
230 u32 mapsz; /* size of the map array */
231 struct ipath_segarray *map[0]; /* the segments */
232};
233
234/*
235 * These keep track of the copy progress within a memory region.
236 * Used by the verbs layer.
237 */
238struct ipath_sge {
239 struct ipath_mregion *mr;
240 void *vaddr; /* current pointer into the segment */
241 u32 sge_length; /* length of the SGE */
242 u32 length; /* remaining length of the segment */
243 u16 m; /* current index: mr->map[m] */
244 u16 n; /* current index: mr->map[m]->segs[n] */
245};
246
247/* Memory region */
248struct ipath_mr {
249 struct ib_mr ibmr;
250 struct ipath_mregion mr; /* must be last */
233}; 251};
234 252
235/* 253/*
@@ -248,32 +266,50 @@ struct ipath_swqe {
248 266
249/* 267/*
250 * Receive work request queue entry. 268 * Receive work request queue entry.
251 * The size of the sg_list is determined when the QP is created and stored 269 * The size of the sg_list is determined when the QP (or SRQ) is created
252 * in qp->r_max_sge. 270 * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
253 */ 271 */
254struct ipath_rwqe { 272struct ipath_rwqe {
255 u64 wr_id; 273 u64 wr_id;
256 u32 length; /* total length of data in sg_list */
257 u8 num_sge; 274 u8 num_sge;
258 struct ipath_sge sg_list[0]; 275 struct ib_sge sg_list[0];
259}; 276};
260 277
261struct ipath_rq { 278/*
262 spinlock_t lock; 279 * This structure is used to contain the head pointer, tail pointer,
280 * and receive work queue entries as a single memory allocation so
281 * it can be mmap'ed into user space.
282 * Note that the wq array elements are variable size so you can't
283 * just index into the array to get the N'th element;
284 * use get_rwqe_ptr() instead.
285 */
286struct ipath_rwq {
263 u32 head; /* new work requests posted to the head */ 287 u32 head; /* new work requests posted to the head */
264 u32 tail; /* receives pull requests from here. */ 288 u32 tail; /* receives pull requests from here. */
289 struct ipath_rwqe wq[0];
290};
291
292struct ipath_rq {
293 struct ipath_rwq *wq;
294 spinlock_t lock;
265 u32 size; /* size of RWQE array */ 295 u32 size; /* size of RWQE array */
266 u8 max_sge; 296 u8 max_sge;
267 struct ipath_rwqe *wq; /* RWQE array */
268}; 297};
269 298
270struct ipath_srq { 299struct ipath_srq {
271 struct ib_srq ibsrq; 300 struct ib_srq ibsrq;
272 struct ipath_rq rq; 301 struct ipath_rq rq;
302 struct ipath_mmap_info *ip;
273 /* send signal when number of RWQEs < limit */ 303 /* send signal when number of RWQEs < limit */
274 u32 limit; 304 u32 limit;
275}; 305};
276 306
307struct ipath_sge_state {
308 struct ipath_sge *sg_list; /* next SGE to be used if any */
309 struct ipath_sge sge; /* progress state for the current SGE */
310 u8 num_sge;
311};
312
277/* 313/*
278 * Variables prefixed with s_ are for the requester (sender). 314 * Variables prefixed with s_ are for the requester (sender).
279 * Variables prefixed with r_ are for the responder (receiver). 315 * Variables prefixed with r_ are for the responder (receiver).
@@ -293,6 +329,7 @@ struct ipath_qp {
293 atomic_t refcount; 329 atomic_t refcount;
294 wait_queue_head_t wait; 330 wait_queue_head_t wait;
295 struct tasklet_struct s_task; 331 struct tasklet_struct s_task;
332 struct ipath_mmap_info *ip;
296 struct ipath_sge_state *s_cur_sge; 333 struct ipath_sge_state *s_cur_sge;
297 struct ipath_sge_state s_sge; /* current send request data */ 334 struct ipath_sge_state s_sge; /* current send request data */
298 /* current RDMA read send data */ 335 /* current RDMA read send data */
@@ -334,6 +371,7 @@ struct ipath_qp {
334 u8 s_retry; /* requester retry counter */ 371 u8 s_retry; /* requester retry counter */
335 u8 s_rnr_retry; /* requester RNR retry counter */ 372 u8 s_rnr_retry; /* requester RNR retry counter */
336 u8 s_pkey_index; /* PKEY index to use */ 373 u8 s_pkey_index; /* PKEY index to use */
374 u8 timeout; /* Timeout for this QP */
337 enum ib_mtu path_mtu; 375 enum ib_mtu path_mtu;
338 u32 remote_qpn; 376 u32 remote_qpn;
339 u32 qkey; /* QKEY for this QP (for UD or RD) */ 377 u32 qkey; /* QKEY for this QP (for UD or RD) */
@@ -345,7 +383,8 @@ struct ipath_qp {
345 u32 s_ssn; /* SSN of tail entry */ 383 u32 s_ssn; /* SSN of tail entry */
346 u32 s_lsn; /* limit sequence number (credit) */ 384 u32 s_lsn; /* limit sequence number (credit) */
347 struct ipath_swqe *s_wq; /* send work queue */ 385 struct ipath_swqe *s_wq; /* send work queue */
348 struct ipath_rq r_rq; /* receive work queue */ 386 struct ipath_rq r_rq; /* receive work queue */
387 struct ipath_sge r_sg_list[0]; /* verified SGEs */
349}; 388};
350 389
351/* 390/*
@@ -369,15 +408,15 @@ static inline struct ipath_swqe *get_swqe_ptr(struct ipath_qp *qp,
369 408
370/* 409/*
371 * Since struct ipath_rwqe is not a fixed size, we can't simply index into 410 * Since struct ipath_rwqe is not a fixed size, we can't simply index into
372 * struct ipath_rq.wq. This function does the array index computation. 411 * struct ipath_rwq.wq. This function does the array index computation.
373 */ 412 */
374static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq, 413static inline struct ipath_rwqe *get_rwqe_ptr(struct ipath_rq *rq,
375 unsigned n) 414 unsigned n)
376{ 415{
377 return (struct ipath_rwqe *) 416 return (struct ipath_rwqe *)
378 ((char *) rq->wq + 417 ((char *) rq->wq->wq +
379 (sizeof(struct ipath_rwqe) + 418 (sizeof(struct ipath_rwqe) +
380 rq->max_sge * sizeof(struct ipath_sge)) * n); 419 rq->max_sge * sizeof(struct ib_sge)) * n);
381} 420}
382 421
383/* 422/*
@@ -417,6 +456,7 @@ struct ipath_ibdev {
417 struct ib_device ibdev; 456 struct ib_device ibdev;
418 struct list_head dev_list; 457 struct list_head dev_list;
419 struct ipath_devdata *dd; 458 struct ipath_devdata *dd;
459 struct ipath_mmap_info *pending_mmaps;
420 int ib_unit; /* This is the device number */ 460 int ib_unit; /* This is the device number */
421 u16 sm_lid; /* in host order */ 461 u16 sm_lid; /* in host order */
422 u8 sm_sl; 462 u8 sm_sl;
@@ -435,11 +475,20 @@ struct ipath_ibdev {
435 __be64 sys_image_guid; /* in network order */ 475 __be64 sys_image_guid; /* in network order */
436 __be64 gid_prefix; /* in network order */ 476 __be64 gid_prefix; /* in network order */
437 __be64 mkey; 477 __be64 mkey;
478
438 u32 n_pds_allocated; /* number of PDs allocated for device */ 479 u32 n_pds_allocated; /* number of PDs allocated for device */
480 spinlock_t n_pds_lock;
439 u32 n_ahs_allocated; /* number of AHs allocated for device */ 481 u32 n_ahs_allocated; /* number of AHs allocated for device */
482 spinlock_t n_ahs_lock;
440 u32 n_cqs_allocated; /* number of CQs allocated for device */ 483 u32 n_cqs_allocated; /* number of CQs allocated for device */
484 spinlock_t n_cqs_lock;
485 u32 n_qps_allocated; /* number of QPs allocated for device */
486 spinlock_t n_qps_lock;
441 u32 n_srqs_allocated; /* number of SRQs allocated for device */ 487 u32 n_srqs_allocated; /* number of SRQs allocated for device */
488 spinlock_t n_srqs_lock;
442 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ 489 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
490 spinlock_t n_mcast_grps_lock;
491
443 u64 ipath_sword; /* total dwords sent (sample result) */ 492 u64 ipath_sword; /* total dwords sent (sample result) */
444 u64 ipath_rword; /* total dwords received (sample result) */ 493 u64 ipath_rword; /* total dwords received (sample result) */
445 u64 ipath_spkts; /* total packets sent (sample result) */ 494 u64 ipath_spkts; /* total packets sent (sample result) */
@@ -494,8 +543,19 @@ struct ipath_ibdev {
494 struct ipath_opcode_stats opstats[128]; 543 struct ipath_opcode_stats opstats[128];
495}; 544};
496 545
497struct ipath_ucontext { 546struct ipath_verbs_counters {
498 struct ib_ucontext ibucontext; 547 u64 symbol_error_counter;
548 u64 link_error_recovery_counter;
549 u64 link_downed_counter;
550 u64 port_rcv_errors;
551 u64 port_rcv_remphys_errors;
552 u64 port_xmit_discards;
553 u64 port_xmit_data;
554 u64 port_rcv_data;
555 u64 port_xmit_packets;
556 u64 port_rcv_packets;
557 u32 local_link_integrity_errors;
558 u32 excessive_buffer_overrun_errors;
499}; 559};
500 560
501static inline struct ipath_mr *to_imr(struct ib_mr *ibmr) 561static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
@@ -503,11 +563,6 @@ static inline struct ipath_mr *to_imr(struct ib_mr *ibmr)
503 return container_of(ibmr, struct ipath_mr, ibmr); 563 return container_of(ibmr, struct ipath_mr, ibmr);
504} 564}
505 565
506static inline struct ipath_fmr *to_ifmr(struct ib_fmr *ibfmr)
507{
508 return container_of(ibfmr, struct ipath_fmr, ibfmr);
509}
510
511static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd) 566static inline struct ipath_pd *to_ipd(struct ib_pd *ibpd)
512{ 567{
513 return container_of(ibpd, struct ipath_pd, ibpd); 568 return container_of(ibpd, struct ipath_pd, ibpd);
@@ -545,12 +600,6 @@ int ipath_process_mad(struct ib_device *ibdev,
545 struct ib_grh *in_grh, 600 struct ib_grh *in_grh,
546 struct ib_mad *in_mad, struct ib_mad *out_mad); 601 struct ib_mad *in_mad, struct ib_mad *out_mad);
547 602
548static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
549 *ibucontext)
550{
551 return container_of(ibucontext, struct ipath_ucontext, ibucontext);
552}
553
554/* 603/*
555 * Compare the lower 24 bits of the two values. 604 * Compare the lower 24 bits of the two values.
556 * Returns an integer <, ==, or > than zero. 605 * Returns an integer <, ==, or > than zero.
@@ -562,6 +611,13 @@ static inline int ipath_cmp24(u32 a, u32 b)
562 611
563struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid); 612struct ipath_mcast *ipath_mcast_find(union ib_gid *mgid);
564 613
614int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
615 u64 *rwords, u64 *spkts, u64 *rpkts,
616 u64 *xmit_wait);
617
618int ipath_get_counters(struct ipath_devdata *dd,
619 struct ipath_verbs_counters *cntrs);
620
565int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); 621int ipath_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
566 622
567int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); 623int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
@@ -579,7 +635,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
579int ipath_destroy_qp(struct ib_qp *ibqp); 635int ipath_destroy_qp(struct ib_qp *ibqp);
580 636
581int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 637int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
582 int attr_mask); 638 int attr_mask, struct ib_udata *udata);
583 639
584int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 640int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
585 int attr_mask, struct ib_qp_init_attr *init_attr); 641 int attr_mask, struct ib_qp_init_attr *init_attr);
@@ -592,6 +648,9 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
592 648
593void ipath_get_credit(struct ipath_qp *qp, u32 aeth); 649void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
594 650
651int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
652 u32 *hdr, u32 len, struct ipath_sge_state *ss);
653
595void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig); 654void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
596 655
597int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss, 656int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
@@ -638,7 +697,8 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
638 struct ib_udata *udata); 697 struct ib_udata *udata);
639 698
640int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 699int ipath_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
641 enum ib_srq_attr_mask attr_mask); 700 enum ib_srq_attr_mask attr_mask,
701 struct ib_udata *udata);
642 702
643int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); 703int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
644 704
@@ -680,6 +740,10 @@ int ipath_unmap_fmr(struct list_head *fmr_list);
680 740
681int ipath_dealloc_fmr(struct ib_fmr *ibfmr); 741int ipath_dealloc_fmr(struct ib_fmr *ibfmr);
682 742
743void ipath_release_mmap_info(struct kref *ref);
744
745int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
746
683void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev); 747void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev);
684 748
685void ipath_insert_rnr_queue(struct ipath_qp *qp); 749void ipath_insert_rnr_queue(struct ipath_qp *qp);
@@ -700,6 +764,22 @@ int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
700int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr, 764int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
701 u32 pmtu, u32 *bth0p, u32 *bth2p); 765 u32 pmtu, u32 *bth0p, u32 *bth2p);
702 766
767int ipath_register_ib_device(struct ipath_devdata *);
768
769void ipath_unregister_ib_device(struct ipath_ibdev *);
770
771void ipath_ib_rcv(struct ipath_ibdev *, void *, void *, u32);
772
773int ipath_ib_piobufavail(struct ipath_ibdev *);
774
775void ipath_ib_timer(struct ipath_ibdev *);
776
777unsigned ipath_get_npkeys(struct ipath_devdata *);
778
779u32 ipath_get_cr_errpkey(struct ipath_devdata *);
780
781unsigned ipath_get_pkey(struct ipath_devdata *, unsigned);
782
703extern const enum ib_wc_opcode ib_ipath_wc_opcode[]; 783extern const enum ib_wc_opcode ib_ipath_wc_opcode[];
704 784
705extern const u8 ipath_cvt_physportstate[]; 785extern const u8 ipath_cvt_physportstate[];
@@ -714,6 +794,8 @@ extern unsigned int ib_ipath_max_cqs;
714 794
715extern unsigned int ib_ipath_max_qp_wrs; 795extern unsigned int ib_ipath_max_qp_wrs;
716 796
797extern unsigned int ib_ipath_max_qps;
798
717extern unsigned int ib_ipath_max_sges; 799extern unsigned int ib_ipath_max_sges;
718 800
719extern unsigned int ib_ipath_max_mcast_grps; 801extern unsigned int ib_ipath_max_mcast_grps;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index ee0e1d96d723..085e28b939ec 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -207,12 +207,17 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
207 goto bail; 207 goto bail;
208 } 208 }
209 209
210 spin_lock(&dev->n_mcast_grps_lock);
210 if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) { 211 if (dev->n_mcast_grps_allocated == ib_ipath_max_mcast_grps) {
212 spin_unlock(&dev->n_mcast_grps_lock);
211 ret = ENOMEM; 213 ret = ENOMEM;
212 goto bail; 214 goto bail;
213 } 215 }
214 216
215 dev->n_mcast_grps_allocated++; 217 dev->n_mcast_grps_allocated++;
218 spin_unlock(&dev->n_mcast_grps_lock);
219
220 mcast->n_attached++;
216 221
217 list_add_tail_rcu(&mqp->list, &mcast->qp_list); 222 list_add_tail_rcu(&mqp->list, &mcast->qp_list);
218 223
@@ -343,7 +348,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
343 atomic_dec(&mcast->refcount); 348 atomic_dec(&mcast->refcount);
344 wait_event(mcast->wait, !atomic_read(&mcast->refcount)); 349 wait_event(mcast->wait, !atomic_read(&mcast->refcount));
345 ipath_mcast_free(mcast); 350 ipath_mcast_free(mcast);
351 spin_lock(&dev->n_mcast_grps_lock);
346 dev->n_mcast_grps_allocated--; 352 dev->n_mcast_grps_allocated--;
353 spin_unlock(&dev->n_mcast_grps_lock);
347 } 354 }
348 355
349 ret = 0; 356 ret = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
new file mode 100644
index 000000000000..036fde662aa9
--- /dev/null
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -0,0 +1,52 @@
1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*
34 * This file is conditionally built on PowerPC only. Otherwise weak symbol
35 * versions of the functions exported from here are used.
36 */
37
38#include "ipath_kernel.h"
39
40/**
41 * ipath_unordered_wc - indicate whether write combining is ordered
42 *
43 * PowerPC systems (at least those in the 970 processor family)
44 * write partially filled store buffers in address order, but will write
45 * completely filled store buffers in "random" order, and therefore must
46 * have serialization for correctness with current InfiniPath chips.
47 *
48 */
49int ipath_unordered_wc(void)
50{
51 return 1;
52}
diff --git a/drivers/infiniband/hw/ipath/verbs_debug.h b/drivers/infiniband/hw/ipath/verbs_debug.h
deleted file mode 100644
index 6186676f2a16..000000000000
--- a/drivers/infiniband/hw/ipath/verbs_debug.h
+++ /dev/null
@@ -1,108 +0,0 @@
1/*
2 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef _VERBS_DEBUG_H
35#define _VERBS_DEBUG_H
36
37/*
38 * This file contains tracing code for the ib_ipath kernel module.
39 */
40#ifndef _VERBS_DEBUGGING /* tracing enabled or not */
41#define _VERBS_DEBUGGING 1
42#endif
43
44extern unsigned ib_ipath_debug;
45
46#define _VERBS_ERROR(fmt,...) \
47 do { \
48 printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
49 } while(0)
50
51#define _VERBS_UNIT_ERROR(unit,fmt,...) \
52 do { \
53 printk(KERN_ERR "%s: " fmt, "ib_ipath", ##__VA_ARGS__); \
54 } while(0)
55
56#if _VERBS_DEBUGGING
57
58/*
59 * Mask values for debugging. The scheme allows us to compile out any
60 * of the debug tracing stuff, and if compiled in, to enable or
61 * disable dynamically.
62 * This can be set at modprobe time also:
63 * modprobe ib_path ib_ipath_debug=3
64 */
65
66#define __VERBS_INFO 0x1 /* generic low verbosity stuff */
67#define __VERBS_DBG 0x2 /* generic debug */
68#define __VERBS_VDBG 0x4 /* verbose debug */
69#define __VERBS_SMADBG 0x8000 /* sma packet debug */
70
71#define _VERBS_INFO(fmt,...) \
72 do { \
73 if (unlikely(ib_ipath_debug&__VERBS_INFO)) \
74 printk(KERN_INFO "%s: " fmt,"ib_ipath", \
75 ##__VA_ARGS__); \
76 } while(0)
77
78#define _VERBS_DBG(fmt,...) \
79 do { \
80 if (unlikely(ib_ipath_debug&__VERBS_DBG)) \
81 printk(KERN_DEBUG "%s: " fmt, __func__, \
82 ##__VA_ARGS__); \
83 } while(0)
84
85#define _VERBS_VDBG(fmt,...) \
86 do { \
87 if (unlikely(ib_ipath_debug&__VERBS_VDBG)) \
88 printk(KERN_DEBUG "%s: " fmt, __func__, \
89 ##__VA_ARGS__); \
90 } while(0)
91
92#define _VERBS_SMADBG(fmt,...) \
93 do { \
94 if (unlikely(ib_ipath_debug&__VERBS_SMADBG)) \
95 printk(KERN_DEBUG "%s: " fmt, __func__, \
96 ##__VA_ARGS__); \
97 } while(0)
98
99#else /* ! _VERBS_DEBUGGING */
100
101#define _VERBS_INFO(fmt,...)
102#define _VERBS_DBG(fmt,...)
103#define _VERBS_VDBG(fmt,...)
104#define _VERBS_SMADBG(fmt,...)
105
106#endif /* _VERBS_DEBUGGING */
107
108#endif /* _VERBS_DEBUG_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index 25157f57a6d0..f930e55b58fc 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -41,9 +41,11 @@
41/* Trivial bitmap-based allocator */ 41/* Trivial bitmap-based allocator */
42u32 mthca_alloc(struct mthca_alloc *alloc) 42u32 mthca_alloc(struct mthca_alloc *alloc)
43{ 43{
44 unsigned long flags;
44 u32 obj; 45 u32 obj;
45 46
46 spin_lock(&alloc->lock); 47 spin_lock_irqsave(&alloc->lock, flags);
48
47 obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); 49 obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
48 if (obj >= alloc->max) { 50 if (obj >= alloc->max) {
49 alloc->top = (alloc->top + alloc->max) & alloc->mask; 51 alloc->top = (alloc->top + alloc->max) & alloc->mask;
@@ -56,19 +58,24 @@ u32 mthca_alloc(struct mthca_alloc *alloc)
56 } else 58 } else
57 obj = -1; 59 obj = -1;
58 60
59 spin_unlock(&alloc->lock); 61 spin_unlock_irqrestore(&alloc->lock, flags);
60 62
61 return obj; 63 return obj;
62} 64}
63 65
64void mthca_free(struct mthca_alloc *alloc, u32 obj) 66void mthca_free(struct mthca_alloc *alloc, u32 obj)
65{ 67{
68 unsigned long flags;
69
66 obj &= alloc->max - 1; 70 obj &= alloc->max - 1;
67 spin_lock(&alloc->lock); 71
72 spin_lock_irqsave(&alloc->lock, flags);
73
68 clear_bit(obj, alloc->table); 74 clear_bit(obj, alloc->table);
69 alloc->last = min(alloc->last, obj); 75 alloc->last = min(alloc->last, obj);
70 alloc->top = (alloc->top + alloc->max) & alloc->mask; 76 alloc->top = (alloc->top + alloc->max) & alloc->mask;
71 spin_unlock(&alloc->lock); 77
78 spin_unlock_irqrestore(&alloc->lock, flags);
72} 79}
73 80
74int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, 81int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index e215041b2db9..69599455aca2 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -90,7 +90,7 @@ static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
90 case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS; 90 case MTHCA_RATE_TAVOR_1X: return IB_RATE_2_5_GBPS;
91 case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS; 91 case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
92 case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS; 92 case MTHCA_RATE_TAVOR_4X: return IB_RATE_10_GBPS;
93 default: return port_rate; 93 default: return mult_to_ib_rate(port_rate);
94 } 94 }
95} 95}
96 96
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index c3bec7490f52..cd044ea2dfa4 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -34,6 +34,7 @@
34 34
35#include <linux/jiffies.h> 35#include <linux/jiffies.h>
36#include <linux/timer.h> 36#include <linux/timer.h>
37#include <linux/workqueue.h>
37 38
38#include "mthca_dev.h" 39#include "mthca_dev.h"
39 40
@@ -48,9 +49,41 @@ enum {
48 49
49static DEFINE_SPINLOCK(catas_lock); 50static DEFINE_SPINLOCK(catas_lock);
50 51
52static LIST_HEAD(catas_list);
53static struct workqueue_struct *catas_wq;
54static struct work_struct catas_work;
55
56static int catas_reset_disable;
57module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
58MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
59
60static void catas_reset(void *work_ptr)
61{
62 struct mthca_dev *dev, *tmpdev;
63 LIST_HEAD(tlist);
64 int ret;
65
66 mutex_lock(&mthca_device_mutex);
67
68 spin_lock_irq(&catas_lock);
69 list_splice_init(&catas_list, &tlist);
70 spin_unlock_irq(&catas_lock);
71
72 list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
73 ret = __mthca_restart_one(dev->pdev);
74 if (ret)
75 mthca_err(dev, "Reset failed (%d)\n", ret);
76 else
77 mthca_dbg(dev, "Reset succeeded\n");
78 }
79
80 mutex_unlock(&mthca_device_mutex);
81}
82
51static void handle_catas(struct mthca_dev *dev) 83static void handle_catas(struct mthca_dev *dev)
52{ 84{
53 struct ib_event event; 85 struct ib_event event;
86 unsigned long flags;
54 const char *type; 87 const char *type;
55 int i; 88 int i;
56 89
@@ -82,6 +115,14 @@ static void handle_catas(struct mthca_dev *dev)
82 for (i = 0; i < dev->catas_err.size; ++i) 115 for (i = 0; i < dev->catas_err.size; ++i)
83 mthca_err(dev, " buf[%02x]: %08x\n", 116 mthca_err(dev, " buf[%02x]: %08x\n",
84 i, swab32(readl(dev->catas_err.map + i))); 117 i, swab32(readl(dev->catas_err.map + i)));
118
119 if (catas_reset_disable)
120 return;
121
122 spin_lock_irqsave(&catas_lock, flags);
123 list_add(&dev->catas_err.list, &catas_list);
124 queue_work(catas_wq, &catas_work);
125 spin_unlock_irqrestore(&catas_lock, flags);
85} 126}
86 127
87static void poll_catas(unsigned long dev_ptr) 128static void poll_catas(unsigned long dev_ptr)
@@ -135,6 +176,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
135 dev->catas_err.timer.data = (unsigned long) dev; 176 dev->catas_err.timer.data = (unsigned long) dev;
136 dev->catas_err.timer.function = poll_catas; 177 dev->catas_err.timer.function = poll_catas;
137 dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL; 178 dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL;
179 INIT_LIST_HEAD(&dev->catas_err.list);
138 add_timer(&dev->catas_err.timer); 180 add_timer(&dev->catas_err.timer);
139} 181}
140 182
@@ -153,4 +195,24 @@ void mthca_stop_catas_poll(struct mthca_dev *dev)
153 dev->catas_err.addr), 195 dev->catas_err.addr),
154 dev->catas_err.size * 4); 196 dev->catas_err.size * 4);
155 } 197 }
198
199 spin_lock_irq(&catas_lock);
200 list_del(&dev->catas_err.list);
201 spin_unlock_irq(&catas_lock);
202}
203
204int __init mthca_catas_init(void)
205{
206 INIT_WORK(&catas_work, catas_reset, NULL);
207
208 catas_wq = create_singlethread_workqueue("mthca_catas");
209 if (!catas_wq)
210 return -ENOMEM;
211
212 return 0;
213}
214
215void mthca_catas_cleanup(void)
216{
217 destroy_workqueue(catas_wq);
156} 218}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index deabc14b4ea4..99a94d710935 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -34,7 +34,7 @@
34 * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $ 34 * $Id: mthca_cmd.c 1349 2004-12-16 21:09:43Z roland $
35 */ 35 */
36 36
37#include <linux/sched.h> 37#include <linux/completion.h>
38#include <linux/pci.h> 38#include <linux/pci.h>
39#include <linux/errno.h> 39#include <linux/errno.h>
40#include <asm/io.h> 40#include <asm/io.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 3e27a084257e..e393681ba7d4 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -544,11 +544,11 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
544 wq = &(*cur_qp)->rq; 544 wq = &(*cur_qp)->rq;
545 wqe = be32_to_cpu(cqe->wqe); 545 wqe = be32_to_cpu(cqe->wqe);
546 wqe_index = wqe >> wq->wqe_shift; 546 wqe_index = wqe >> wq->wqe_shift;
547 /* 547 /*
548 * WQE addr == base - 1 might be reported in receive completion 548 * WQE addr == base - 1 might be reported in receive completion
549 * with error instead of (rq size - 1) by Sinai FW 1.0.800 and 549 * with error instead of (rq size - 1) by Sinai FW 1.0.800 and
550 * Arbel FW 5.1.400. This bug should be fixed in later FW revs. 550 * Arbel FW 5.1.400. This bug should be fixed in later FW revs.
551 */ 551 */
552 if (unlikely(wqe_index < 0)) 552 if (unlikely(wqe_index < 0))
553 wqe_index = wq->max - 1; 553 wqe_index = wq->max - 1;
554 entry->wr_id = (*cur_qp)->wrid[wqe_index]; 554 entry->wr_id = (*cur_qp)->wrid[wqe_index];
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index f8160b8de090..fe5cecf70fed 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -45,6 +45,7 @@
45#include <linux/dma-mapping.h> 45#include <linux/dma-mapping.h>
46#include <linux/timer.h> 46#include <linux/timer.h>
47#include <linux/mutex.h> 47#include <linux/mutex.h>
48#include <linux/list.h>
48 49
49#include <asm/semaphore.h> 50#include <asm/semaphore.h>
50 51
@@ -283,8 +284,11 @@ struct mthca_catas_err {
283 unsigned long stop; 284 unsigned long stop;
284 u32 size; 285 u32 size;
285 struct timer_list timer; 286 struct timer_list timer;
287 struct list_head list;
286}; 288};
287 289
290extern struct mutex mthca_device_mutex;
291
288struct mthca_dev { 292struct mthca_dev {
289 struct ib_device ib_dev; 293 struct ib_device ib_dev;
290 struct pci_dev *pdev; 294 struct pci_dev *pdev;
@@ -450,6 +454,9 @@ void mthca_unregister_device(struct mthca_dev *dev);
450 454
451void mthca_start_catas_poll(struct mthca_dev *dev); 455void mthca_start_catas_poll(struct mthca_dev *dev);
452void mthca_stop_catas_poll(struct mthca_dev *dev); 456void mthca_stop_catas_poll(struct mthca_dev *dev);
457int __mthca_restart_one(struct pci_dev *pdev);
458int mthca_catas_init(void);
459void mthca_catas_cleanup(void);
453 460
454int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); 461int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
455void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); 462void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
@@ -506,7 +513,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
506 struct ib_srq_attr *attr, struct mthca_srq *srq); 513 struct ib_srq_attr *attr, struct mthca_srq *srq);
507void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); 514void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
508int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 515int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
509 enum ib_srq_attr_mask attr_mask); 516 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
510int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); 517int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
511int mthca_max_srq_sge(struct mthca_dev *dev); 518int mthca_max_srq_sge(struct mthca_dev *dev);
512void mthca_srq_event(struct mthca_dev *dev, u32 srqn, 519void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
@@ -521,7 +528,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
521 enum ib_event_type event_type); 528 enum ib_event_type event_type);
522int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, 529int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
523 struct ib_qp_init_attr *qp_init_attr); 530 struct ib_qp_init_attr *qp_init_attr);
524int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); 531int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
532 struct ib_udata *udata);
525int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 533int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
526 struct ib_send_wr **bad_wr); 534 struct ib_send_wr **bad_wr);
527int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 535int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index d9bc030bcccc..45e106f14807 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -119,7 +119,7 @@ static void smp_snoop(struct ib_device *ibdev,
119 119
120 mthca_update_rate(to_mdev(ibdev), port_num); 120 mthca_update_rate(to_mdev(ibdev), port_num);
121 update_sm_ah(to_mdev(ibdev), port_num, 121 update_sm_ah(to_mdev(ibdev), port_num,
122 be16_to_cpu(pinfo->lid), 122 be16_to_cpu(pinfo->sm_lid),
123 pinfo->neighbormtu_mastersmsl & 0xf); 123 pinfo->neighbormtu_mastersmsl & 0xf);
124 124
125 event.device = ibdev; 125 event.device = ibdev;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 7b82c1907f04..47ea02148368 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -80,6 +80,8 @@ static int tune_pci = 0;
80module_param(tune_pci, int, 0444); 80module_param(tune_pci, int, 0444);
81MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero"); 81MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
82 82
83struct mutex mthca_device_mutex;
84
83static const char mthca_version[] __devinitdata = 85static const char mthca_version[] __devinitdata =
84 DRV_NAME ": Mellanox InfiniBand HCA driver v" 86 DRV_NAME ": Mellanox InfiniBand HCA driver v"
85 DRV_VERSION " (" DRV_RELDATE ")\n"; 87 DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -978,28 +980,15 @@ static struct {
978 MTHCA_FLAG_SINAI_OPT } 980 MTHCA_FLAG_SINAI_OPT }
979}; 981};
980 982
981static int __devinit mthca_init_one(struct pci_dev *pdev, 983static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
982 const struct pci_device_id *id)
983{ 984{
984 static int mthca_version_printed = 0;
985 int ddr_hidden = 0; 985 int ddr_hidden = 0;
986 int err; 986 int err;
987 struct mthca_dev *mdev; 987 struct mthca_dev *mdev;
988 988
989 if (!mthca_version_printed) {
990 printk(KERN_INFO "%s", mthca_version);
991 ++mthca_version_printed;
992 }
993
994 printk(KERN_INFO PFX "Initializing %s\n", 989 printk(KERN_INFO PFX "Initializing %s\n",
995 pci_name(pdev)); 990 pci_name(pdev));
996 991
997 if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
998 printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
999 pci_name(pdev), id->driver_data);
1000 return -ENODEV;
1001 }
1002
1003 err = pci_enable_device(pdev); 992 err = pci_enable_device(pdev);
1004 if (err) { 993 if (err) {
1005 dev_err(&pdev->dev, "Cannot enable PCI device, " 994 dev_err(&pdev->dev, "Cannot enable PCI device, "
@@ -1065,7 +1054,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
1065 1054
1066 mdev->pdev = pdev; 1055 mdev->pdev = pdev;
1067 1056
1068 mdev->mthca_flags = mthca_hca_table[id->driver_data].flags; 1057 mdev->mthca_flags = mthca_hca_table[hca_type].flags;
1069 if (ddr_hidden) 1058 if (ddr_hidden)
1070 mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; 1059 mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
1071 1060
@@ -1099,13 +1088,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
1099 if (err) 1088 if (err)
1100 goto err_cmd; 1089 goto err_cmd;
1101 1090
1102 if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) { 1091 if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
1103 mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n", 1092 mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
1104 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, 1093 (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
1105 (int) (mdev->fw_ver & 0xffff), 1094 (int) (mdev->fw_ver & 0xffff),
1106 (int) (mthca_hca_table[id->driver_data].latest_fw >> 32), 1095 (int) (mthca_hca_table[hca_type].latest_fw >> 32),
1107 (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff, 1096 (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff,
1108 (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff)); 1097 (int) (mthca_hca_table[hca_type].latest_fw & 0xffff));
1109 mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n"); 1098 mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
1110 } 1099 }
1111 1100
@@ -1122,6 +1111,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
1122 goto err_unregister; 1111 goto err_unregister;
1123 1112
1124 pci_set_drvdata(pdev, mdev); 1113 pci_set_drvdata(pdev, mdev);
1114 mdev->hca_type = hca_type;
1125 1115
1126 return 0; 1116 return 0;
1127 1117
@@ -1166,7 +1156,7 @@ err_disable_pdev:
1166 return err; 1156 return err;
1167} 1157}
1168 1158
1169static void __devexit mthca_remove_one(struct pci_dev *pdev) 1159static void __mthca_remove_one(struct pci_dev *pdev)
1170{ 1160{
1171 struct mthca_dev *mdev = pci_get_drvdata(pdev); 1161 struct mthca_dev *mdev = pci_get_drvdata(pdev);
1172 u8 status; 1162 u8 status;
@@ -1211,6 +1201,51 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev)
1211 } 1201 }
1212} 1202}
1213 1203
1204int __mthca_restart_one(struct pci_dev *pdev)
1205{
1206 struct mthca_dev *mdev;
1207
1208 mdev = pci_get_drvdata(pdev);
1209 if (!mdev)
1210 return -ENODEV;
1211 __mthca_remove_one(pdev);
1212 return __mthca_init_one(pdev, mdev->hca_type);
1213}
1214
1215static int __devinit mthca_init_one(struct pci_dev *pdev,
1216 const struct pci_device_id *id)
1217{
1218 static int mthca_version_printed = 0;
1219 int ret;
1220
1221 mutex_lock(&mthca_device_mutex);
1222
1223 if (!mthca_version_printed) {
1224 printk(KERN_INFO "%s", mthca_version);
1225 ++mthca_version_printed;
1226 }
1227
1228 if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
1229 printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
1230 pci_name(pdev), id->driver_data);
1231 mutex_unlock(&mthca_device_mutex);
1232 return -ENODEV;
1233 }
1234
1235 ret = __mthca_init_one(pdev, id->driver_data);
1236
1237 mutex_unlock(&mthca_device_mutex);
1238
1239 return ret;
1240}
1241
1242static void __devexit mthca_remove_one(struct pci_dev *pdev)
1243{
1244 mutex_lock(&mthca_device_mutex);
1245 __mthca_remove_one(pdev);
1246 mutex_unlock(&mthca_device_mutex);
1247}
1248
1214static struct pci_device_id mthca_pci_table[] = { 1249static struct pci_device_id mthca_pci_table[] = {
1215 { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR), 1250 { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
1216 .driver_data = TAVOR }, 1251 .driver_data = TAVOR },
@@ -1248,13 +1283,24 @@ static int __init mthca_init(void)
1248{ 1283{
1249 int ret; 1284 int ret;
1250 1285
1286 mutex_init(&mthca_device_mutex);
1287 ret = mthca_catas_init();
1288 if (ret)
1289 return ret;
1290
1251 ret = pci_register_driver(&mthca_driver); 1291 ret = pci_register_driver(&mthca_driver);
1252 return ret < 0 ? ret : 0; 1292 if (ret < 0) {
1293 mthca_catas_cleanup();
1294 return ret;
1295 }
1296
1297 return 0;
1253} 1298}
1254 1299
1255static void __exit mthca_cleanup(void) 1300static void __exit mthca_cleanup(void)
1256{ 1301{
1257 pci_unregister_driver(&mthca_driver); 1302 pci_unregister_driver(&mthca_driver);
1303 mthca_catas_cleanup();
1258} 1304}
1259 1305
1260module_init(mthca_init); 1306module_init(mthca_init);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 265b1d1c4a62..981fe2eebdfa 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1288,7 +1288,7 @@ int mthca_register_device(struct mthca_dev *dev)
1288 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 1288 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
1289 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 1289 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
1290 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); 1290 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
1291 dev->ib_dev.node_type = IB_NODE_CA; 1291 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
1292 dev->ib_dev.phys_port_cnt = dev->limits.num_ports; 1292 dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
1293 dev->ib_dev.dma_device = &dev->pdev->dev; 1293 dev->ib_dev.dma_device = &dev->pdev->dev;
1294 dev->ib_dev.class_dev.dev = &dev->pdev->dev; 1294 dev->ib_dev.class_dev.dev = &dev->pdev->dev;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 2e8f6f36e0a5..5e5c58b9920b 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -408,7 +408,7 @@ static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
408 ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28; 408 ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
409 ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f; 409 ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
410 ib_ah_attr->static_rate = mthca_rate_to_ib(dev, 410 ib_ah_attr->static_rate = mthca_rate_to_ib(dev,
411 path->static_rate & 0x7, 411 path->static_rate & 0xf,
412 ib_ah_attr->port_num); 412 ib_ah_attr->port_num);
413 ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0; 413 ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
414 if (ib_ah_attr->ah_flags) { 414 if (ib_ah_attr->ah_flags) {
@@ -472,10 +472,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
472 if (qp->transport == RC || qp->transport == UC) { 472 if (qp->transport == RC || qp->transport == UC) {
473 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); 473 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
474 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); 474 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
475 qp_attr->alt_pkey_index =
476 be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
477 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
475 } 478 }
476 479
477 qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f; 480 qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
478 qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f; 481 qp_attr->port_num =
482 (be32_to_cpu(context->pri_path.port_pkey) >> 24) & 0x3;
479 483
480 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ 484 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
481 qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING; 485 qp_attr->sq_draining = mthca_state == MTHCA_QP_STATE_DRAINING;
@@ -486,11 +490,9 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
486 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); 490 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
487 qp_attr->min_rnr_timer = 491 qp_attr->min_rnr_timer =
488 (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; 492 (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
489 qp_attr->port_num = qp_attr->ah_attr.port_num;
490 qp_attr->timeout = context->pri_path.ackto >> 3; 493 qp_attr->timeout = context->pri_path.ackto >> 3;
491 qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; 494 qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
492 qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5; 495 qp_attr->rnr_retry = context->pri_path.rnr_retry >> 5;
493 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
494 qp_attr->alt_timeout = context->alt_path.ackto >> 3; 496 qp_attr->alt_timeout = context->alt_path.ackto >> 3;
495 qp_init_attr->cap = qp_attr->cap; 497 qp_init_attr->cap = qp_attr->cap;
496 498
@@ -527,7 +529,8 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
527 return 0; 529 return 0;
528} 530}
529 531
530int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) 532int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
533 struct ib_udata *udata)
531{ 534{
532 struct mthca_dev *dev = to_mdev(ibqp->device); 535 struct mthca_dev *dev = to_mdev(ibqp->device);
533 struct mthca_qp *qp = to_mqp(ibqp); 536 struct mthca_qp *qp = to_mqp(ibqp);
@@ -842,11 +845,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
842 * entries and reinitialize the QP. 845 * entries and reinitialize the QP.
843 */ 846 */
844 if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) { 847 if (new_state == IB_QPS_RESET && !qp->ibqp.uobject) {
845 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, 848 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn,
846 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); 849 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
847 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 850 if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
848 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq), qp->qpn, 851 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq), qp->qpn, NULL);
849 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
850 852
851 mthca_wq_reset(&qp->sq); 853 mthca_wq_reset(&qp->sq);
852 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); 854 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index b60a9d79ae54..0f316c87bf64 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -358,7 +358,7 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq)
358} 358}
359 359
360int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 360int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
361 enum ib_srq_attr_mask attr_mask) 361 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
362{ 362{
363 struct mthca_dev *dev = to_mdev(ibsrq->device); 363 struct mthca_dev *dev = to_mdev(ibsrq->device);
364 struct mthca_srq *srq = to_msrq(ibsrq); 364 struct mthca_srq *srq = to_msrq(ibsrq);
diff --git a/drivers/infiniband/hw/mthca/mthca_uar.c b/drivers/infiniband/hw/mthca/mthca_uar.c
index 8e9219842be4..8b728486410d 100644
--- a/drivers/infiniband/hw/mthca/mthca_uar.c
+++ b/drivers/infiniband/hw/mthca/mthca_uar.c
@@ -60,7 +60,7 @@ int mthca_init_uar_table(struct mthca_dev *dev)
60 ret = mthca_alloc_init(&dev->uar_table.alloc, 60 ret = mthca_alloc_init(&dev->uar_table.alloc,
61 dev->limits.num_uars, 61 dev->limits.num_uars,
62 dev->limits.num_uars - 1, 62 dev->limits.num_uars - 1,
63 dev->limits.reserved_uars); 63 dev->limits.reserved_uars + 1);
64 if (ret) 64 if (ret)
65 return ret; 65 return ret;
66 66
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 474aa214ab57..0b8a79d53a00 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -336,6 +336,8 @@ static inline void ipoib_unregister_debugfs(void) { }
336extern int ipoib_sendq_size; 336extern int ipoib_sendq_size;
337extern int ipoib_recvq_size; 337extern int ipoib_recvq_size;
338 338
339extern struct ib_sa_client ipoib_sa_client;
340
339#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 341#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
340extern int ipoib_debug_level; 342extern int ipoib_debug_level;
341 343
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5033666b1481..f426a69d9a43 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -169,117 +169,129 @@ static int ipoib_ib_post_receives(struct net_device *dev)
169 return 0; 169 return 0;
170} 170}
171 171
172static void ipoib_ib_handle_wc(struct net_device *dev, 172static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
173 struct ib_wc *wc)
174{ 173{
175 struct ipoib_dev_priv *priv = netdev_priv(dev); 174 struct ipoib_dev_priv *priv = netdev_priv(dev);
176 unsigned int wr_id = wc->wr_id; 175 unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
176 struct sk_buff *skb;
177 dma_addr_t addr;
177 178
178 ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n", 179 ipoib_dbg_data(priv, "recv completion: id %d, op %d, status: %d\n",
179 wr_id, wc->opcode, wc->status); 180 wr_id, wc->opcode, wc->status);
180 181
181 if (wr_id & IPOIB_OP_RECV) { 182 if (unlikely(wr_id >= ipoib_recvq_size)) {
182 wr_id &= ~IPOIB_OP_RECV; 183 ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
183 184 wr_id, ipoib_recvq_size);
184 if (wr_id < ipoib_recvq_size) { 185 return;
185 struct sk_buff *skb = priv->rx_ring[wr_id].skb; 186 }
186 dma_addr_t addr = priv->rx_ring[wr_id].mapping;
187
188 if (unlikely(wc->status != IB_WC_SUCCESS)) {
189 if (wc->status != IB_WC_WR_FLUSH_ERR)
190 ipoib_warn(priv, "failed recv event "
191 "(status=%d, wrid=%d vend_err %x)\n",
192 wc->status, wr_id, wc->vendor_err);
193 dma_unmap_single(priv->ca->dma_device, addr,
194 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
195 dev_kfree_skb_any(skb);
196 priv->rx_ring[wr_id].skb = NULL;
197 return;
198 }
199 187
200 /* 188 skb = priv->rx_ring[wr_id].skb;
201 * If we can't allocate a new RX buffer, dump 189 addr = priv->rx_ring[wr_id].mapping;
202 * this packet and reuse the old buffer.
203 */
204 if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
205 ++priv->stats.rx_dropped;
206 goto repost;
207 }
208 190
209 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 191 if (unlikely(wc->status != IB_WC_SUCCESS)) {
210 wc->byte_len, wc->slid); 192 if (wc->status != IB_WC_WR_FLUSH_ERR)
193 ipoib_warn(priv, "failed recv event "
194 "(status=%d, wrid=%d vend_err %x)\n",
195 wc->status, wr_id, wc->vendor_err);
196 dma_unmap_single(priv->ca->dma_device, addr,
197 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
198 dev_kfree_skb_any(skb);
199 priv->rx_ring[wr_id].skb = NULL;
200 return;
201 }
211 202
212 dma_unmap_single(priv->ca->dma_device, addr, 203 /*
213 IPOIB_BUF_SIZE, DMA_FROM_DEVICE); 204 * If we can't allocate a new RX buffer, dump
205 * this packet and reuse the old buffer.
206 */
207 if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
208 ++priv->stats.rx_dropped;
209 goto repost;
210 }
214 211
215 skb_put(skb, wc->byte_len); 212 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
216 skb_pull(skb, IB_GRH_BYTES); 213 wc->byte_len, wc->slid);
217 214
218 if (wc->slid != priv->local_lid || 215 dma_unmap_single(priv->ca->dma_device, addr,
219 wc->src_qp != priv->qp->qp_num) { 216 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
220 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
221 skb->mac.raw = skb->data;
222 skb_pull(skb, IPOIB_ENCAP_LEN);
223 217
224 dev->last_rx = jiffies; 218 skb_put(skb, wc->byte_len);
225 ++priv->stats.rx_packets; 219 skb_pull(skb, IB_GRH_BYTES);
226 priv->stats.rx_bytes += skb->len;
227 220
228 skb->dev = dev; 221 if (wc->slid != priv->local_lid ||
229 /* XXX get correct PACKET_ type here */ 222 wc->src_qp != priv->qp->qp_num) {
230 skb->pkt_type = PACKET_HOST; 223 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
231 netif_rx_ni(skb); 224 skb->mac.raw = skb->data;
232 } else { 225 skb_pull(skb, IPOIB_ENCAP_LEN);
233 ipoib_dbg_data(priv, "dropping loopback packet\n");
234 dev_kfree_skb_any(skb);
235 }
236 226
237 repost: 227 dev->last_rx = jiffies;
238 if (unlikely(ipoib_ib_post_receive(dev, wr_id))) 228 ++priv->stats.rx_packets;
239 ipoib_warn(priv, "ipoib_ib_post_receive failed " 229 priv->stats.rx_bytes += skb->len;
240 "for buf %d\n", wr_id);
241 } else
242 ipoib_warn(priv, "completion event with wrid %d\n",
243 wr_id);
244 230
231 skb->dev = dev;
232 /* XXX get correct PACKET_ type here */
233 skb->pkt_type = PACKET_HOST;
234 netif_rx_ni(skb);
245 } else { 235 } else {
246 struct ipoib_tx_buf *tx_req; 236 ipoib_dbg_data(priv, "dropping loopback packet\n");
247 unsigned long flags; 237 dev_kfree_skb_any(skb);
238 }
248 239
249 if (wr_id >= ipoib_sendq_size) { 240repost:
250 ipoib_warn(priv, "completion event with wrid %d (> %d)\n", 241 if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
251 wr_id, ipoib_sendq_size); 242 ipoib_warn(priv, "ipoib_ib_post_receive failed "
252 return; 243 "for buf %d\n", wr_id);
253 } 244}
254 245
255 ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id); 246static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
247{
248 struct ipoib_dev_priv *priv = netdev_priv(dev);
249 unsigned int wr_id = wc->wr_id;
250 struct ipoib_tx_buf *tx_req;
251 unsigned long flags;
256 252
257 tx_req = &priv->tx_ring[wr_id]; 253 ipoib_dbg_data(priv, "send completion: id %d, op %d, status: %d\n",
254 wr_id, wc->opcode, wc->status);
258 255
259 dma_unmap_single(priv->ca->dma_device, 256 if (unlikely(wr_id >= ipoib_sendq_size)) {
260 pci_unmap_addr(tx_req, mapping), 257 ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
261 tx_req->skb->len, 258 wr_id, ipoib_sendq_size);
262 DMA_TO_DEVICE); 259 return;
260 }
263 261
264 ++priv->stats.tx_packets; 262 tx_req = &priv->tx_ring[wr_id];
265 priv->stats.tx_bytes += tx_req->skb->len;
266 263
267 dev_kfree_skb_any(tx_req->skb); 264 dma_unmap_single(priv->ca->dma_device,
265 pci_unmap_addr(tx_req, mapping),
266 tx_req->skb->len,
267 DMA_TO_DEVICE);
268 268
269 spin_lock_irqsave(&priv->tx_lock, flags); 269 ++priv->stats.tx_packets;
270 ++priv->tx_tail; 270 priv->stats.tx_bytes += tx_req->skb->len;
271 if (netif_queue_stopped(dev) &&
272 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
273 priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
274 netif_wake_queue(dev);
275 spin_unlock_irqrestore(&priv->tx_lock, flags);
276 271
277 if (wc->status != IB_WC_SUCCESS && 272 dev_kfree_skb_any(tx_req->skb);
278 wc->status != IB_WC_WR_FLUSH_ERR) 273
279 ipoib_warn(priv, "failed send event " 274 spin_lock_irqsave(&priv->tx_lock, flags);
280 "(status=%d, wrid=%d vend_err %x)\n", 275 ++priv->tx_tail;
281 wc->status, wr_id, wc->vendor_err); 276 if (netif_queue_stopped(dev) &&
282 } 277 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
278 priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
279 netif_wake_queue(dev);
280 spin_unlock_irqrestore(&priv->tx_lock, flags);
281
282 if (wc->status != IB_WC_SUCCESS &&
283 wc->status != IB_WC_WR_FLUSH_ERR)
284 ipoib_warn(priv, "failed send event "
285 "(status=%d, wrid=%d vend_err %x)\n",
286 wc->status, wr_id, wc->vendor_err);
287}
288
289static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
290{
291 if (wc->wr_id & IPOIB_OP_RECV)
292 ipoib_ib_handle_rx_wc(dev, wc);
293 else
294 ipoib_ib_handle_tx_wc(dev, wc);
283} 295}
284 296
285void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) 297void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
@@ -320,7 +332,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
320 struct ipoib_tx_buf *tx_req; 332 struct ipoib_tx_buf *tx_req;
321 dma_addr_t addr; 333 dma_addr_t addr;
322 334
323 if (skb->len > dev->mtu + INFINIBAND_ALEN) { 335 if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) {
324 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 336 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
325 skb->len, dev->mtu + INFINIBAND_ALEN); 337 skb->len, dev->mtu + INFINIBAND_ALEN);
326 ++priv->stats.tx_dropped; 338 ++priv->stats.tx_dropped;
@@ -619,8 +631,10 @@ void ipoib_ib_dev_flush(void *_dev)
619 * The device could have been brought down between the start and when 631 * The device could have been brought down between the start and when
620 * we get here, don't bring it back up if it's not configured up 632 * we get here, don't bring it back up if it's not configured up
621 */ 633 */
622 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 634 if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
623 ipoib_ib_dev_up(dev); 635 ipoib_ib_dev_up(dev);
636 ipoib_mcast_restart_task(dev);
637 }
624 638
625 mutex_lock(&priv->vlan_mutex); 639 mutex_lock(&priv->vlan_mutex);
626 640
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cf71d2a5515c..1eaf00e9862c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -40,7 +40,6 @@
40 40
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/vmalloc.h>
44#include <linux/kernel.h> 43#include <linux/kernel.h>
45 44
46#include <linux/if_arp.h> /* For ARPHRD_xxx */ 45#include <linux/if_arp.h> /* For ARPHRD_xxx */
@@ -82,6 +81,8 @@ static const u8 ipv4_bcast_addr[] = {
82 81
83struct workqueue_struct *ipoib_workqueue; 82struct workqueue_struct *ipoib_workqueue;
84 83
84struct ib_sa_client ipoib_sa_client;
85
85static void ipoib_add_one(struct ib_device *device); 86static void ipoib_add_one(struct ib_device *device);
86static void ipoib_remove_one(struct ib_device *device); 87static void ipoib_remove_one(struct ib_device *device);
87 88
@@ -336,7 +337,8 @@ void ipoib_flush_paths(struct net_device *dev)
336 struct ipoib_path *path, *tp; 337 struct ipoib_path *path, *tp;
337 LIST_HEAD(remove_list); 338 LIST_HEAD(remove_list);
338 339
339 spin_lock_irq(&priv->lock); 340 spin_lock_irq(&priv->tx_lock);
341 spin_lock(&priv->lock);
340 342
341 list_splice(&priv->path_list, &remove_list); 343 list_splice(&priv->path_list, &remove_list);
342 INIT_LIST_HEAD(&priv->path_list); 344 INIT_LIST_HEAD(&priv->path_list);
@@ -347,12 +349,15 @@ void ipoib_flush_paths(struct net_device *dev)
347 list_for_each_entry_safe(path, tp, &remove_list, list) { 349 list_for_each_entry_safe(path, tp, &remove_list, list) {
348 if (path->query) 350 if (path->query)
349 ib_sa_cancel_query(path->query_id, path->query); 351 ib_sa_cancel_query(path->query_id, path->query);
350 spin_unlock_irq(&priv->lock); 352 spin_unlock(&priv->lock);
353 spin_unlock_irq(&priv->tx_lock);
351 wait_for_completion(&path->done); 354 wait_for_completion(&path->done);
352 path_free(dev, path); 355 path_free(dev, path);
353 spin_lock_irq(&priv->lock); 356 spin_lock_irq(&priv->tx_lock);
357 spin_lock(&priv->lock);
354 } 358 }
355 spin_unlock_irq(&priv->lock); 359 spin_unlock(&priv->lock);
360 spin_unlock_irq(&priv->tx_lock);
356} 361}
357 362
358static void path_rec_completion(int status, 363static void path_rec_completion(int status,
@@ -459,7 +464,7 @@ static int path_rec_start(struct net_device *dev,
459 init_completion(&path->done); 464 init_completion(&path->done);
460 465
461 path->query_id = 466 path->query_id =
462 ib_sa_path_rec_get(priv->ca, priv->port, 467 ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
463 &path->pathrec, 468 &path->pathrec,
464 IB_SA_PATH_REC_DGID | 469 IB_SA_PATH_REC_DGID |
465 IB_SA_PATH_REC_SGID | 470 IB_SA_PATH_REC_SGID |
@@ -615,7 +620,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
615 struct ipoib_neigh *neigh; 620 struct ipoib_neigh *neigh;
616 unsigned long flags; 621 unsigned long flags;
617 622
618 if (!spin_trylock_irqsave(&priv->tx_lock, flags)) 623 if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
619 return NETDEV_TX_LOCKED; 624 return NETDEV_TX_LOCKED;
620 625
621 /* 626 /*
@@ -628,7 +633,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
628 return NETDEV_TX_BUSY; 633 return NETDEV_TX_BUSY;
629 } 634 }
630 635
631 if (skb->dst && skb->dst->neighbour) { 636 if (likely(skb->dst && skb->dst->neighbour)) {
632 if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { 637 if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
633 ipoib_path_lookup(skb, dev); 638 ipoib_path_lookup(skb, dev);
634 goto out; 639 goto out;
@@ -1107,13 +1112,16 @@ static void ipoib_add_one(struct ib_device *device)
1107 struct ipoib_dev_priv *priv; 1112 struct ipoib_dev_priv *priv;
1108 int s, e, p; 1113 int s, e, p;
1109 1114
1115 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1116 return;
1117
1110 dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); 1118 dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
1111 if (!dev_list) 1119 if (!dev_list)
1112 return; 1120 return;
1113 1121
1114 INIT_LIST_HEAD(dev_list); 1122 INIT_LIST_HEAD(dev_list);
1115 1123
1116 if (device->node_type == IB_NODE_SWITCH) { 1124 if (device->node_type == RDMA_NODE_IB_SWITCH) {
1117 s = 0; 1125 s = 0;
1118 e = 0; 1126 e = 0;
1119 } else { 1127 } else {
@@ -1137,6 +1145,9 @@ static void ipoib_remove_one(struct ib_device *device)
1137 struct ipoib_dev_priv *priv, *tmp; 1145 struct ipoib_dev_priv *priv, *tmp;
1138 struct list_head *dev_list; 1146 struct list_head *dev_list;
1139 1147
1148 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1149 return;
1150
1140 dev_list = ib_get_client_data(device, &ipoib_client); 1151 dev_list = ib_get_client_data(device, &ipoib_client);
1141 1152
1142 list_for_each_entry_safe(priv, tmp, dev_list, list) { 1153 list_for_each_entry_safe(priv, tmp, dev_list, list) {
@@ -1181,13 +1192,16 @@ static int __init ipoib_init_module(void)
1181 goto err_fs; 1192 goto err_fs;
1182 } 1193 }
1183 1194
1195 ib_sa_register_client(&ipoib_sa_client);
1196
1184 ret = ib_register_client(&ipoib_client); 1197 ret = ib_register_client(&ipoib_client);
1185 if (ret) 1198 if (ret)
1186 goto err_wq; 1199 goto err_sa;
1187 1200
1188 return 0; 1201 return 0;
1189 1202
1190err_wq: 1203err_sa:
1204 ib_sa_unregister_client(&ipoib_sa_client);
1191 destroy_workqueue(ipoib_workqueue); 1205 destroy_workqueue(ipoib_workqueue);
1192 1206
1193err_fs: 1207err_fs:
@@ -1199,6 +1213,7 @@ err_fs:
1199static void __exit ipoib_cleanup_module(void) 1213static void __exit ipoib_cleanup_module(void)
1200{ 1214{
1201 ib_unregister_client(&ipoib_client); 1215 ib_unregister_client(&ipoib_client);
1216 ib_sa_unregister_client(&ipoib_sa_client);
1202 ipoib_unregister_debugfs(); 1217 ipoib_unregister_debugfs();
1203 destroy_workqueue(ipoib_workqueue); 1218 destroy_workqueue(ipoib_workqueue);
1204} 1219}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index b5e6a7be603d..3faa1820f0e9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -326,6 +326,7 @@ ipoib_mcast_sendonly_join_complete(int status,
326 326
327 /* Clear the busy flag so we try again */ 327 /* Clear the busy flag so we try again */
328 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); 328 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
329 mcast->query = NULL;
329 } 330 }
330 331
331 complete(&mcast->done); 332 complete(&mcast->done);
@@ -360,7 +361,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
360 361
361 init_completion(&mcast->done); 362 init_completion(&mcast->done);
362 363
363 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, 364 ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port, &rec,
364 IB_SA_MCMEMBER_REC_MGID | 365 IB_SA_MCMEMBER_REC_MGID |
365 IB_SA_MCMEMBER_REC_PORT_GID | 366 IB_SA_MCMEMBER_REC_PORT_GID |
366 IB_SA_MCMEMBER_REC_PKEY | 367 IB_SA_MCMEMBER_REC_PKEY |
@@ -471,22 +472,32 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
471 472
472 if (create) { 473 if (create) {
473 comp_mask |= 474 comp_mask |=
474 IB_SA_MCMEMBER_REC_QKEY | 475 IB_SA_MCMEMBER_REC_QKEY |
475 IB_SA_MCMEMBER_REC_SL | 476 IB_SA_MCMEMBER_REC_MTU_SELECTOR |
476 IB_SA_MCMEMBER_REC_FLOW_LABEL | 477 IB_SA_MCMEMBER_REC_MTU |
477 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 478 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
479 IB_SA_MCMEMBER_REC_RATE_SELECTOR |
480 IB_SA_MCMEMBER_REC_RATE |
481 IB_SA_MCMEMBER_REC_SL |
482 IB_SA_MCMEMBER_REC_FLOW_LABEL |
483 IB_SA_MCMEMBER_REC_HOP_LIMIT;
478 484
479 rec.qkey = priv->broadcast->mcmember.qkey; 485 rec.qkey = priv->broadcast->mcmember.qkey;
486 rec.mtu_selector = IB_SA_EQ;
487 rec.mtu = priv->broadcast->mcmember.mtu;
488 rec.traffic_class = priv->broadcast->mcmember.traffic_class;
489 rec.rate_selector = IB_SA_EQ;
490 rec.rate = priv->broadcast->mcmember.rate;
480 rec.sl = priv->broadcast->mcmember.sl; 491 rec.sl = priv->broadcast->mcmember.sl;
481 rec.flow_label = priv->broadcast->mcmember.flow_label; 492 rec.flow_label = priv->broadcast->mcmember.flow_label;
482 rec.traffic_class = priv->broadcast->mcmember.traffic_class; 493 rec.hop_limit = priv->broadcast->mcmember.hop_limit;
483 } 494 }
484 495
485 init_completion(&mcast->done); 496 init_completion(&mcast->done);
486 497
487 ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask, 498 ret = ib_sa_mcmember_rec_set(&ipoib_sa_client, priv->ca, priv->port,
488 mcast->backoff * 1000, GFP_ATOMIC, 499 &rec, comp_mask, mcast->backoff * 1000,
489 ipoib_mcast_join_complete, 500 GFP_ATOMIC, ipoib_mcast_join_complete,
490 mcast, &mcast->query); 501 mcast, &mcast->query);
491 502
492 if (ret < 0) { 503 if (ret < 0) {
@@ -527,7 +538,7 @@ void ipoib_mcast_join_task(void *dev_ptr)
527 priv->local_rate = attr.active_speed * 538 priv->local_rate = attr.active_speed *
528 ib_width_enum_to_int(attr.active_width); 539 ib_width_enum_to_int(attr.active_width);
529 } else 540 } else
530 ipoib_warn(priv, "ib_query_port failed\n"); 541 ipoib_warn(priv, "ib_query_port failed\n");
531 } 542 }
532 543
533 if (!priv->broadcast) { 544 if (!priv->broadcast) {
@@ -680,7 +691,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
680 * Just make one shot at leaving and don't wait for a reply; 691 * Just make one shot at leaving and don't wait for a reply;
681 * if we fail, too bad. 692 * if we fail, too bad.
682 */ 693 */
683 ret = ib_sa_mcmember_rec_delete(priv->ca, priv->port, &rec, 694 ret = ib_sa_mcmember_rec_delete(&ipoib_sa_client, priv->ca, priv->port, &rec,
684 IB_SA_MCMEMBER_REC_MGID | 695 IB_SA_MCMEMBER_REC_MGID |
685 IB_SA_MCMEMBER_REC_PORT_GID | 696 IB_SA_MCMEMBER_REC_PORT_GID |
686 IB_SA_MCMEMBER_REC_PKEY | 697 IB_SA_MCMEMBER_REC_PKEY |
@@ -794,7 +805,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
794 } 805 }
795 806
796 if (priv->broadcast) { 807 if (priv->broadcast) {
797 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); 808 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
798 list_add_tail(&priv->broadcast->list, &remove_list); 809 list_add_tail(&priv->broadcast->list, &remove_list);
799 priv->broadcast = NULL; 810 priv->broadcast = NULL;
800 } 811 }
diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
index fead87d1eff9..365a1b5f19e0 100644
--- a/drivers/infiniband/ulp/iser/Kconfig
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_ISER 1config INFINIBAND_ISER
2 tristate "ISCSI RDMA Protocol" 2 tristate "ISCSI RDMA Protocol"
3 depends on INFINIBAND && SCSI 3 depends on INFINIBAND && SCSI && INET
4 select SCSI_ISCSI_ATTRS 4 select SCSI_ISCSI_ATTRS
5 ---help--- 5 ---help---
6 Support for the ISCSI RDMA Protocol over InfiniBand. This 6 Support for the ISCSI RDMA Protocol over InfiniBand. This
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 101e407eaa43..2a14fe2e3226 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -545,6 +545,7 @@ static struct scsi_host_template iscsi_iser_sht = {
545 .queuecommand = iscsi_queuecommand, 545 .queuecommand = iscsi_queuecommand,
546 .can_queue = ISCSI_XMIT_CMDS_MAX - 1, 546 .can_queue = ISCSI_XMIT_CMDS_MAX - 1,
547 .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, 547 .sg_tablesize = ISCSI_ISER_SG_TABLESIZE,
548 .max_sectors = 1024,
548 .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, 549 .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN,
549 .eh_abort_handler = iscsi_eh_abort, 550 .eh_abort_handler = iscsi_eh_abort,
550 .eh_host_reset_handler = iscsi_eh_host_reset, 551 .eh_host_reset_handler = iscsi_eh_host_reset,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 7c3d0c96d889..2cf9ae0def1c 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -82,8 +82,12 @@
82 __func__ , ## arg); \ 82 __func__ , ## arg); \
83 } while (0) 83 } while (0)
84 84
85#define SHIFT_4K 12
86#define SIZE_4K (1UL << SHIFT_4K)
87#define MASK_4K (~(SIZE_4K-1))
88
85 /* support upto 512KB in one RDMA */ 89 /* support upto 512KB in one RDMA */
86#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> PAGE_SHIFT) 90#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K)
87#define ISCSI_ISER_MAX_LUN 256 91#define ISCSI_ISER_MAX_LUN 256
88#define ISCSI_ISER_MAX_CMD_LEN 16 92#define ISCSI_ISER_MAX_CMD_LEN 16
89 93
@@ -171,6 +175,7 @@ struct iser_mem_reg {
171 u64 va; 175 u64 va;
172 u64 len; 176 u64 len;
173 void *mem_h; 177 void *mem_h;
178 int is_fmr;
174}; 179};
175 180
176struct iser_regd_buf { 181struct iser_regd_buf {
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 31950a522a1c..d0b03f426581 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -42,6 +42,7 @@
42#include "iscsi_iser.h" 42#include "iscsi_iser.h"
43 43
44#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ 44#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
45
45/** 46/**
46 * Decrements the reference count for the 47 * Decrements the reference count for the
47 * registered buffer & releases it 48 * registered buffer & releases it
@@ -55,7 +56,7 @@ int iser_regd_buff_release(struct iser_regd_buf *regd_buf)
55 if ((atomic_read(&regd_buf->ref_count) == 0) || 56 if ((atomic_read(&regd_buf->ref_count) == 0) ||
56 atomic_dec_and_test(&regd_buf->ref_count)) { 57 atomic_dec_and_test(&regd_buf->ref_count)) {
57 /* if we used the dma mr, unreg is just NOP */ 58 /* if we used the dma mr, unreg is just NOP */
58 if (regd_buf->reg.rkey != 0) 59 if (regd_buf->reg.is_fmr)
59 iser_unreg_mem(&regd_buf->reg); 60 iser_unreg_mem(&regd_buf->reg);
60 61
61 if (regd_buf->dma_addr) { 62 if (regd_buf->dma_addr) {
@@ -90,9 +91,9 @@ void iser_reg_single(struct iser_device *device,
90 BUG_ON(dma_mapping_error(dma_addr)); 91 BUG_ON(dma_mapping_error(dma_addr));
91 92
92 regd_buf->reg.lkey = device->mr->lkey; 93 regd_buf->reg.lkey = device->mr->lkey;
93 regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */
94 regd_buf->reg.len = regd_buf->data_size; 94 regd_buf->reg.len = regd_buf->data_size;
95 regd_buf->reg.va = dma_addr; 95 regd_buf->reg.va = dma_addr;
96 regd_buf->reg.is_fmr = 0;
96 97
97 regd_buf->dma_addr = dma_addr; 98 regd_buf->dma_addr = dma_addr;
98 regd_buf->direction = direction; 99 regd_buf->direction = direction;
@@ -239,7 +240,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
239 int i; 240 int i;
240 241
241 /* compute the offset of first element */ 242 /* compute the offset of first element */
242 page_vec->offset = (u64) sg[0].offset; 243 page_vec->offset = (u64) sg[0].offset & ~MASK_4K;
243 244
244 for (i = 0; i < data->dma_nents; i++) { 245 for (i = 0; i < data->dma_nents; i++) {
245 total_sz += sg_dma_len(&sg[i]); 246 total_sz += sg_dma_len(&sg[i]);
@@ -247,21 +248,30 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
247 first_addr = sg_dma_address(&sg[i]); 248 first_addr = sg_dma_address(&sg[i]);
248 last_addr = first_addr + sg_dma_len(&sg[i]); 249 last_addr = first_addr + sg_dma_len(&sg[i]);
249 250
250 start_aligned = !(first_addr & ~PAGE_MASK); 251 start_aligned = !(first_addr & ~MASK_4K);
251 end_aligned = !(last_addr & ~PAGE_MASK); 252 end_aligned = !(last_addr & ~MASK_4K);
252 253
253 /* continue to collect page fragments till aligned or SG ends */ 254 /* continue to collect page fragments till aligned or SG ends */
254 while (!end_aligned && (i + 1 < data->dma_nents)) { 255 while (!end_aligned && (i + 1 < data->dma_nents)) {
255 i++; 256 i++;
256 total_sz += sg_dma_len(&sg[i]); 257 total_sz += sg_dma_len(&sg[i]);
257 last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]); 258 last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]);
258 end_aligned = !(last_addr & ~PAGE_MASK); 259 end_aligned = !(last_addr & ~MASK_4K);
259 } 260 }
260 261
261 first_addr = first_addr & PAGE_MASK; 262 /* handle the 1st page in the 1st DMA element */
262 263 if (cur_page == 0) {
263 for (page = first_addr; page < last_addr; page += PAGE_SIZE) 264 page = first_addr & MASK_4K;
264 page_vec->pages[cur_page++] = page; 265 page_vec->pages[cur_page] = page;
266 cur_page++;
267 page += SIZE_4K;
268 } else
269 page = first_addr;
270
271 for (; page < last_addr; page += SIZE_4K) {
272 page_vec->pages[cur_page] = page;
273 cur_page++;
274 }
265 275
266 } 276 }
267 page_vec->data_size = total_sz; 277 page_vec->data_size = total_sz;
@@ -269,8 +279,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
269 return cur_page; 279 return cur_page;
270} 280}
271 281
272#define MASK_4K ((1UL << 12) - 1) /* 0xFFF */ 282#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
273#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & MASK_4K) == 0)
274 283
275/** 284/**
276 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned 285 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
@@ -320,9 +329,9 @@ static void iser_data_buf_dump(struct iser_data_buf *data)
320 struct scatterlist *sg = (struct scatterlist *)data->buf; 329 struct scatterlist *sg = (struct scatterlist *)data->buf;
321 int i; 330 int i;
322 331
323 for (i = 0; i < data->size; i++) 332 for (i = 0; i < data->dma_nents; i++)
324 iser_err("sg[%d] dma_addr:0x%lX page:0x%p " 333 iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
325 "off:%d sz:%d dma_len:%d\n", 334 "off:0x%x sz:0x%x dma_len:0x%x\n",
326 i, (unsigned long)sg_dma_address(&sg[i]), 335 i, (unsigned long)sg_dma_address(&sg[i]),
327 sg[i].page, sg[i].offset, 336 sg[i].page, sg[i].offset,
328 sg[i].length,sg_dma_len(&sg[i])); 337 sg[i].length,sg_dma_len(&sg[i]));
@@ -352,7 +361,7 @@ static void iser_page_vec_build(struct iser_data_buf *data,
352 361
353 page_vec->length = page_vec_len; 362 page_vec->length = page_vec_len;
354 363
355 if (page_vec_len * PAGE_SIZE < page_vec->data_size) { 364 if (page_vec_len * SIZE_4K < page_vec->data_size) {
356 iser_err("page_vec too short to hold this SG\n"); 365 iser_err("page_vec too short to hold this SG\n");
357 iser_data_buf_dump(data); 366 iser_data_buf_dump(data);
358 iser_dump_page_vec(page_vec); 367 iser_dump_page_vec(page_vec);
@@ -370,15 +379,18 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
370 enum iser_data_dir cmd_dir) 379 enum iser_data_dir cmd_dir)
371{ 380{
372 struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; 381 struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn;
382 struct iser_device *device = ib_conn->device;
373 struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; 383 struct iser_data_buf *mem = &iser_ctask->data[cmd_dir];
374 struct iser_regd_buf *regd_buf; 384 struct iser_regd_buf *regd_buf;
375 int aligned_len; 385 int aligned_len;
376 int err; 386 int err;
387 int i;
388 struct scatterlist *sg;
377 389
378 regd_buf = &iser_ctask->rdma_regd[cmd_dir]; 390 regd_buf = &iser_ctask->rdma_regd[cmd_dir];
379 391
380 aligned_len = iser_data_buf_aligned_len(mem); 392 aligned_len = iser_data_buf_aligned_len(mem);
381 if (aligned_len != mem->size) { 393 if (aligned_len != mem->dma_nents) {
382 iser_err("rdma alignment violation %d/%d aligned\n", 394 iser_err("rdma alignment violation %d/%d aligned\n",
383 aligned_len, mem->size); 395 aligned_len, mem->size);
384 iser_data_buf_dump(mem); 396 iser_data_buf_dump(mem);
@@ -389,10 +401,38 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
389 mem = &iser_ctask->data_copy[cmd_dir]; 401 mem = &iser_ctask->data_copy[cmd_dir];
390 } 402 }
391 403
392 iser_page_vec_build(mem, ib_conn->page_vec); 404 /* if there a single dma entry, FMR is not needed */
393 err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg); 405 if (mem->dma_nents == 1) {
394 if (err) 406 sg = (struct scatterlist *)mem->buf;
395 return err; 407
408 regd_buf->reg.lkey = device->mr->lkey;
409 regd_buf->reg.rkey = device->mr->rkey;
410 regd_buf->reg.len = sg_dma_len(&sg[0]);
411 regd_buf->reg.va = sg_dma_address(&sg[0]);
412 regd_buf->reg.is_fmr = 0;
413
414 iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
415 "va: 0x%08lX sz: %ld]\n",
416 (unsigned int)regd_buf->reg.lkey,
417 (unsigned int)regd_buf->reg.rkey,
418 (unsigned long)regd_buf->reg.va,
419 (unsigned long)regd_buf->reg.len);
420 } else { /* use FMR for multiple dma entries */
421 iser_page_vec_build(mem, ib_conn->page_vec);
422 err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg);
423 if (err) {
424 iser_data_buf_dump(mem);
425 iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents,
426 ntoh24(iser_ctask->desc.iscsi_header.dlength));
427 iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
428 ib_conn->page_vec->data_size, ib_conn->page_vec->length,
429 ib_conn->page_vec->offset);
430 for (i=0 ; i<ib_conn->page_vec->length ; i++)
431 iser_err("page_vec[%d] = 0x%llx\n", i,
432 (unsigned long long) ib_conn->page_vec->pages[i]);
433 return err;
434 }
435 }
396 436
397 /* take a reference on this regd buf such that it will not be released * 437 /* take a reference on this regd buf such that it will not be released *
398 * (eg in send dto completion) before we get the scsi response */ 438 * (eg in send dto completion) before we get the scsi response */
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 72febf1f8ff8..ecdca7fc1e4c 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -88,8 +88,9 @@ static int iser_create_device_ib_res(struct iser_device *device)
88 iser_cq_tasklet_fn, 88 iser_cq_tasklet_fn,
89 (unsigned long)device); 89 (unsigned long)device);
90 90
91 device->mr = ib_get_dma_mr(device->pd, 91 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
92 IB_ACCESS_LOCAL_WRITE); 92 IB_ACCESS_REMOTE_WRITE |
93 IB_ACCESS_REMOTE_READ);
93 if (IS_ERR(device->mr)) 94 if (IS_ERR(device->mr))
94 goto dma_mr_err; 95 goto dma_mr_err;
95 96
@@ -150,7 +151,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
150 } 151 }
151 ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); 152 ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
152 153
153 params.page_shift = PAGE_SHIFT; 154 params.page_shift = SHIFT_4K;
154 /* when the first/last SG element are not start/end * 155 /* when the first/last SG element are not start/end *
155 * page aligned, the map whould be of N+1 pages */ 156 * page aligned, the map whould be of N+1 pages */
156 params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; 157 params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
@@ -604,8 +605,9 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
604 605
605 mem_reg->lkey = mem->fmr->lkey; 606 mem_reg->lkey = mem->fmr->lkey;
606 mem_reg->rkey = mem->fmr->rkey; 607 mem_reg->rkey = mem->fmr->rkey;
607 mem_reg->len = page_vec->length * PAGE_SIZE; 608 mem_reg->len = page_vec->length * SIZE_4K;
608 mem_reg->va = io_addr; 609 mem_reg->va = io_addr;
610 mem_reg->is_fmr = 1;
609 mem_reg->mem_h = (void *)mem; 611 mem_reg->mem_h = (void *)mem;
610 612
611 mem_reg->va += page_vec->offset; 613 mem_reg->va += page_vec->offset;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 8257d5a2c8f8..44b9e5be6687 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -96,6 +96,8 @@ static struct ib_client srp_client = {
96 .remove = srp_remove_one 96 .remove = srp_remove_one
97}; 97};
98 98
99static struct ib_sa_client srp_sa_client;
100
99static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 101static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
100{ 102{
101 return (struct srp_target_port *) host->hostdata; 103 return (struct srp_target_port *) host->hostdata;
@@ -267,7 +269,8 @@ static int srp_lookup_path(struct srp_target_port *target)
267 269
268 init_completion(&target->done); 270 init_completion(&target->done);
269 271
270 target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev->dev, 272 target->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
273 target->srp_host->dev->dev,
271 target->srp_host->port, 274 target->srp_host->port,
272 &target->path, 275 &target->path,
273 IB_SA_PATH_REC_DGID | 276 IB_SA_PATH_REC_DGID |
@@ -330,7 +333,7 @@ static int srp_send_req(struct srp_target_port *target)
330 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | 333 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
331 SRP_BUF_FORMAT_INDIRECT); 334 SRP_BUF_FORMAT_INDIRECT);
332 /* 335 /*
333 * In the published SRP specification (draft rev. 16a), the 336 * In the published SRP specification (draft rev. 16a), the
334 * port identifier format is 8 bytes of ID extension followed 337 * port identifier format is 8 bytes of ID extension followed
335 * by 8 bytes of GUID. Older drafts put the two halves in the 338 * by 8 bytes of GUID. Older drafts put the two halves in the
336 * opposite order, so that the GUID comes first. 339 * opposite order, so that the GUID comes first.
@@ -799,13 +802,6 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
799 spin_unlock_irqrestore(target->scsi_host->host_lock, flags); 802 spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
800} 803}
801 804
802static void srp_reconnect_work(void *target_ptr)
803{
804 struct srp_target_port *target = target_ptr;
805
806 srp_reconnect_target(target);
807}
808
809static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) 805static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
810{ 806{
811 struct srp_iu *iu; 807 struct srp_iu *iu;
@@ -858,7 +854,6 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr)
858{ 854{
859 struct srp_target_port *target = target_ptr; 855 struct srp_target_port *target = target_ptr;
860 struct ib_wc wc; 856 struct ib_wc wc;
861 unsigned long flags;
862 857
863 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 858 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
864 while (ib_poll_cq(cq, 1, &wc) > 0) { 859 while (ib_poll_cq(cq, 1, &wc) > 0) {
@@ -866,10 +861,6 @@ static void srp_completion(struct ib_cq *cq, void *target_ptr)
866 printk(KERN_ERR PFX "failed %s status %d\n", 861 printk(KERN_ERR PFX "failed %s status %d\n",
867 wc.wr_id & SRP_OP_RECV ? "receive" : "send", 862 wc.wr_id & SRP_OP_RECV ? "receive" : "send",
868 wc.status); 863 wc.status);
869 spin_lock_irqsave(target->scsi_host->host_lock, flags);
870 if (target->state == SRP_TARGET_LIVE)
871 schedule_work(&target->work);
872 spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
873 break; 864 break;
874 } 865 }
875 866
@@ -1461,12 +1452,28 @@ static ssize_t show_zero_req_lim(struct class_device *cdev, char *buf)
1461 return sprintf(buf, "%d\n", target->zero_req_lim); 1452 return sprintf(buf, "%d\n", target->zero_req_lim);
1462} 1453}
1463 1454
1464static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); 1455static ssize_t show_local_ib_port(struct class_device *cdev, char *buf)
1465static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 1456{
1466static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 1457 struct srp_target_port *target = host_to_target(class_to_shost(cdev));
1467static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 1458
1468static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 1459 return sprintf(buf, "%d\n", target->srp_host->port);
1469static CLASS_DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 1460}
1461
1462static ssize_t show_local_ib_device(struct class_device *cdev, char *buf)
1463{
1464 struct srp_target_port *target = host_to_target(class_to_shost(cdev));
1465
1466 return sprintf(buf, "%s\n", target->srp_host->dev->dev->name);
1467}
1468
1469static CLASS_DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
1470static CLASS_DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
1471static CLASS_DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
1472static CLASS_DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
1473static CLASS_DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
1474static CLASS_DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
1475static CLASS_DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
1476static CLASS_DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
1470 1477
1471static struct class_device_attribute *srp_host_attrs[] = { 1478static struct class_device_attribute *srp_host_attrs[] = {
1472 &class_device_attr_id_ext, 1479 &class_device_attr_id_ext,
@@ -1475,6 +1482,8 @@ static struct class_device_attribute *srp_host_attrs[] = {
1475 &class_device_attr_pkey, 1482 &class_device_attr_pkey,
1476 &class_device_attr_dgid, 1483 &class_device_attr_dgid,
1477 &class_device_attr_zero_req_lim, 1484 &class_device_attr_zero_req_lim,
1485 &class_device_attr_local_ib_port,
1486 &class_device_attr_local_ib_device,
1478 NULL 1487 NULL
1479}; 1488};
1480 1489
@@ -1705,8 +1714,6 @@ static ssize_t srp_create_target(struct class_device *class_dev,
1705 target->scsi_host = target_host; 1714 target->scsi_host = target_host;
1706 target->srp_host = host; 1715 target->srp_host = host;
1707 1716
1708 INIT_WORK(&target->work, srp_reconnect_work, target);
1709
1710 INIT_LIST_HEAD(&target->free_reqs); 1717 INIT_LIST_HEAD(&target->free_reqs);
1711 INIT_LIST_HEAD(&target->req_queue); 1718 INIT_LIST_HEAD(&target->req_queue);
1712 for (i = 0; i < SRP_SQ_SIZE; ++i) { 1719 for (i = 0; i < SRP_SQ_SIZE; ++i) {
@@ -1895,7 +1902,7 @@ static void srp_add_one(struct ib_device *device)
1895 if (IS_ERR(srp_dev->fmr_pool)) 1902 if (IS_ERR(srp_dev->fmr_pool))
1896 srp_dev->fmr_pool = NULL; 1903 srp_dev->fmr_pool = NULL;
1897 1904
1898 if (device->node_type == IB_NODE_SWITCH) { 1905 if (device->node_type == RDMA_NODE_IB_SWITCH) {
1899 s = 0; 1906 s = 0;
1900 e = 0; 1907 e = 0;
1901 } else { 1908 } else {
@@ -1994,9 +2001,12 @@ static int __init srp_init_module(void)
1994 return ret; 2001 return ret;
1995 } 2002 }
1996 2003
2004 ib_sa_register_client(&srp_sa_client);
2005
1997 ret = ib_register_client(&srp_client); 2006 ret = ib_register_client(&srp_client);
1998 if (ret) { 2007 if (ret) {
1999 printk(KERN_ERR PFX "couldn't register IB client\n"); 2008 printk(KERN_ERR PFX "couldn't register IB client\n");
2009 ib_sa_unregister_client(&srp_sa_client);
2000 class_unregister(&srp_class); 2010 class_unregister(&srp_class);
2001 return ret; 2011 return ret;
2002 } 2012 }
@@ -2007,6 +2017,7 @@ static int __init srp_init_module(void)
2007static void __exit srp_cleanup_module(void) 2017static void __exit srp_cleanup_module(void)
2008{ 2018{
2009 ib_unregister_client(&srp_client); 2019 ib_unregister_client(&srp_client);
2020 ib_sa_unregister_client(&srp_sa_client);
2010 class_unregister(&srp_class); 2021 class_unregister(&srp_class);
2011} 2022}
2012 2023