aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cache.c1
-rw-r--r--drivers/infiniband/core/cm.c29
-rw-r--r--drivers/infiniband/core/device.c136
-rw-r--r--drivers/infiniband/core/umem.c5
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c7
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c16
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c236
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c6
-rw-r--r--drivers/infiniband/hw/mlx4/user.h5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c165
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h49
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c206
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c118
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c40
23 files changed, 769 insertions, 270 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 558c9a0fc8b9..e85f7013de57 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -38,6 +38,7 @@
38#include <linux/module.h> 38#include <linux/module.h>
39#include <linux/errno.h> 39#include <linux/errno.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/workqueue.h>
41 42
42#include <rdma/ib_cache.h> 43#include <rdma/ib_cache.h>
43 44
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index eff591deeb46..40c004a2697e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -306,7 +306,9 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
306 do { 306 do {
307 spin_lock_irqsave(&cm.lock, flags); 307 spin_lock_irqsave(&cm.lock, flags);
308 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 308 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
309 next_id++, &id); 309 next_id, &id);
310 if (!ret)
311 next_id = ((unsigned) id + 1) & MAX_ID_MASK;
310 spin_unlock_irqrestore(&cm.lock, flags); 312 spin_unlock_irqrestore(&cm.lock, flags);
311 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); 313 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
312 314
@@ -1295,26 +1297,29 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
1295 1297
1296 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; 1298 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1297 1299
1298 /* Check for duplicate REQ and stale connections. */ 1300 /* Check for possible duplicate REQ. */
1299 spin_lock_irqsave(&cm.lock, flags); 1301 spin_lock_irqsave(&cm.lock, flags);
1300 timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); 1302 timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1301 if (!timewait_info)
1302 timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1303
1304 if (timewait_info) { 1303 if (timewait_info) {
1305 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, 1304 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1306 timewait_info->work.remote_id); 1305 timewait_info->work.remote_id);
1307 cm_cleanup_timewait(cm_id_priv->timewait_info);
1308 spin_unlock_irqrestore(&cm.lock, flags); 1306 spin_unlock_irqrestore(&cm.lock, flags);
1309 if (cur_cm_id_priv) { 1307 if (cur_cm_id_priv) {
1310 cm_dup_req_handler(work, cur_cm_id_priv); 1308 cm_dup_req_handler(work, cur_cm_id_priv);
1311 cm_deref_id(cur_cm_id_priv); 1309 cm_deref_id(cur_cm_id_priv);
1312 } else 1310 }
1313 cm_issue_rej(work->port, work->mad_recv_wc, 1311 return NULL;
1314 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, 1312 }
1315 NULL, 0); 1313
1316 listen_cm_id_priv = NULL; 1314 /* Check for stale connections. */
1317 goto out; 1315 timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1316 if (timewait_info) {
1317 cm_cleanup_timewait(cm_id_priv->timewait_info);
1318 spin_unlock_irqrestore(&cm.lock, flags);
1319 cm_issue_rej(work->port, work->mad_recv_wc,
1320 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1321 NULL, 0);
1322 return NULL;
1318 } 1323 }
1319 1324
1320 /* Find matching listen request. */ 1325 /* Find matching listen request. */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 592c90aa3183..3ada17c0f239 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -40,6 +40,7 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/workqueue.h>
43 44
44#include "core_priv.h" 45#include "core_priv.h"
45 46
@@ -149,6 +150,18 @@ static int alloc_name(char *name)
149 return 0; 150 return 0;
150} 151}
151 152
153static int start_port(struct ib_device *device)
154{
155 return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
156}
157
158
159static int end_port(struct ib_device *device)
160{
161 return (device->node_type == RDMA_NODE_IB_SWITCH) ?
162 0 : device->phys_port_cnt;
163}
164
152/** 165/**
153 * ib_alloc_device - allocate an IB device struct 166 * ib_alloc_device - allocate an IB device struct
154 * @size:size of structure to allocate 167 * @size:size of structure to allocate
@@ -208,6 +221,45 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
208 return 0; 221 return 0;
209} 222}
210 223
224static int read_port_table_lengths(struct ib_device *device)
225{
226 struct ib_port_attr *tprops = NULL;
227 int num_ports, ret = -ENOMEM;
228 u8 port_index;
229
230 tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
231 if (!tprops)
232 goto out;
233
234 num_ports = end_port(device) - start_port(device) + 1;
235
236 device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports,
237 GFP_KERNEL);
238 device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports,
239 GFP_KERNEL);
240 if (!device->pkey_tbl_len || !device->gid_tbl_len)
241 goto err;
242
243 for (port_index = 0; port_index < num_ports; ++port_index) {
244 ret = ib_query_port(device, port_index + start_port(device),
245 tprops);
246 if (ret)
247 goto err;
248 device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
249 device->gid_tbl_len[port_index] = tprops->gid_tbl_len;
250 }
251
252 ret = 0;
253 goto out;
254
255err:
256 kfree(device->gid_tbl_len);
257 kfree(device->pkey_tbl_len);
258out:
259 kfree(tprops);
260 return ret;
261}
262
211/** 263/**
212 * ib_register_device - Register an IB device with IB core 264 * ib_register_device - Register an IB device with IB core
213 * @device:Device to register 265 * @device:Device to register
@@ -239,10 +291,19 @@ int ib_register_device(struct ib_device *device)
239 spin_lock_init(&device->event_handler_lock); 291 spin_lock_init(&device->event_handler_lock);
240 spin_lock_init(&device->client_data_lock); 292 spin_lock_init(&device->client_data_lock);
241 293
294 ret = read_port_table_lengths(device);
295 if (ret) {
296 printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n",
297 device->name);
298 goto out;
299 }
300
242 ret = ib_device_register_sysfs(device); 301 ret = ib_device_register_sysfs(device);
243 if (ret) { 302 if (ret) {
244 printk(KERN_WARNING "Couldn't register device %s with driver model\n", 303 printk(KERN_WARNING "Couldn't register device %s with driver model\n",
245 device->name); 304 device->name);
305 kfree(device->gid_tbl_len);
306 kfree(device->pkey_tbl_len);
246 goto out; 307 goto out;
247 } 308 }
248 309
@@ -284,6 +345,9 @@ void ib_unregister_device(struct ib_device *device)
284 345
285 list_del(&device->core_list); 346 list_del(&device->core_list);
286 347
348 kfree(device->gid_tbl_len);
349 kfree(device->pkey_tbl_len);
350
287 mutex_unlock(&device_mutex); 351 mutex_unlock(&device_mutex);
288 352
289 spin_lock_irqsave(&device->client_data_lock, flags); 353 spin_lock_irqsave(&device->client_data_lock, flags);
@@ -506,10 +570,7 @@ int ib_query_port(struct ib_device *device,
506 u8 port_num, 570 u8 port_num,
507 struct ib_port_attr *port_attr) 571 struct ib_port_attr *port_attr)
508{ 572{
509 if (device->node_type == RDMA_NODE_IB_SWITCH) { 573 if (port_num < start_port(device) || port_num > end_port(device))
510 if (port_num)
511 return -EINVAL;
512 } else if (port_num < 1 || port_num > device->phys_port_cnt)
513 return -EINVAL; 574 return -EINVAL;
514 575
515 return device->query_port(device, port_num, port_attr); 576 return device->query_port(device, port_num, port_attr);
@@ -581,10 +642,7 @@ int ib_modify_port(struct ib_device *device,
581 u8 port_num, int port_modify_mask, 642 u8 port_num, int port_modify_mask,
582 struct ib_port_modify *port_modify) 643 struct ib_port_modify *port_modify)
583{ 644{
584 if (device->node_type == RDMA_NODE_IB_SWITCH) { 645 if (port_num < start_port(device) || port_num > end_port(device))
585 if (port_num)
586 return -EINVAL;
587 } else if (port_num < 1 || port_num > device->phys_port_cnt)
588 return -EINVAL; 646 return -EINVAL;
589 647
590 return device->modify_port(device, port_num, port_modify_mask, 648 return device->modify_port(device, port_num, port_modify_mask,
@@ -592,6 +650,68 @@ int ib_modify_port(struct ib_device *device,
592} 650}
593EXPORT_SYMBOL(ib_modify_port); 651EXPORT_SYMBOL(ib_modify_port);
594 652
653/**
654 * ib_find_gid - Returns the port number and GID table index where
655 * a specified GID value occurs.
656 * @device: The device to query.
657 * @gid: The GID value to search for.
658 * @port_num: The port number of the device where the GID value was found.
659 * @index: The index into the GID table where the GID was found. This
660 * parameter may be NULL.
661 */
662int ib_find_gid(struct ib_device *device, union ib_gid *gid,
663 u8 *port_num, u16 *index)
664{
665 union ib_gid tmp_gid;
666 int ret, port, i;
667
668 for (port = start_port(device); port <= end_port(device); ++port) {
669 for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
670 ret = ib_query_gid(device, port, i, &tmp_gid);
671 if (ret)
672 return ret;
673 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
674 *port_num = port;
675 if (index)
676 *index = i;
677 return 0;
678 }
679 }
680 }
681
682 return -ENOENT;
683}
684EXPORT_SYMBOL(ib_find_gid);
685
686/**
687 * ib_find_pkey - Returns the PKey table index where a specified
688 * PKey value occurs.
689 * @device: The device to query.
690 * @port_num: The port number of the device to search for the PKey.
691 * @pkey: The PKey value to search for.
692 * @index: The index into the PKey table where the PKey was found.
693 */
694int ib_find_pkey(struct ib_device *device,
695 u8 port_num, u16 pkey, u16 *index)
696{
697 int ret, i;
698 u16 tmp_pkey;
699
700 for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
701 ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
702 if (ret)
703 return ret;
704
705 if (pkey == tmp_pkey) {
706 *index = i;
707 return 0;
708 }
709 }
710
711 return -ENOENT;
712}
713EXPORT_SYMBOL(ib_find_pkey);
714
595static int __init ib_core_init(void) 715static int __init ib_core_init(void)
596{ 716{
597 int ret; 717 int ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index f32ca5fbb26b..b4aec5103c99 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -36,6 +36,7 @@
36 36
37#include <linux/mm.h> 37#include <linux/mm.h>
38#include <linux/dma-mapping.h> 38#include <linux/dma-mapping.h>
39#include <linux/sched.h>
39 40
40#include "uverbs.h" 41#include "uverbs.h"
41 42
@@ -209,8 +210,10 @@ void ib_umem_release(struct ib_umem *umem)
209 __ib_umem_release(umem->context->device, umem, 1); 210 __ib_umem_release(umem->context->device, umem, 1);
210 211
211 mm = get_task_mm(current); 212 mm = get_task_mm(current);
212 if (!mm) 213 if (!mm) {
214 kfree(umem);
213 return; 215 return;
216 }
214 217
215 diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; 218 diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
216 219
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 84c5bb498563..add79bd44e39 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -2050,13 +2050,10 @@ int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
2050 switch (hipz_rc) { 2050 switch (hipz_rc) {
2051 case H_SUCCESS: /* successful completion */ 2051 case H_SUCCESS: /* successful completion */
2052 return 0; 2052 return 0;
2053 case H_ADAPTER_PARM: /* invalid adapter handle */
2054 case H_RT_PARM: /* invalid resource type */
2055 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ 2053 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
2056 case H_MLENGTH_PARM: /* invalid memory length */
2057 case H_MEM_ACCESS_PARM: /* invalid access controls */
2058 case H_CONSTRAINED: /* resource constraint */ 2054 case H_CONSTRAINED: /* resource constraint */
2059 return -EINVAL; 2055 case H_NO_MEM:
2056 return -ENOMEM;
2060 case H_BUSY: /* long busy */ 2057 case H_BUSY: /* long busy */
2061 return -EBUSY; 2058 return -EBUSY;
2062 default: 2059 default:
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 7f0beec74f70..5766ae3a2029 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -331,7 +331,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
331 0); 331 0);
332 qp->ipz_qp_handle.handle = outs[0]; 332 qp->ipz_qp_handle.handle = outs[0];
333 qp->real_qp_num = (u32)outs[1]; 333 qp->real_qp_num = (u32)outs[1];
334 parms->act_nr_send_sges = 334 parms->act_nr_send_wqes =
335 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); 335 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
336 parms->act_nr_recv_wqes = 336 parms->act_nr_recv_wqes =
337 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); 337 (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index 085e28b939ec..dd691cfa5079 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -165,10 +165,9 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
165{ 165{
166 struct rb_node **n = &mcast_tree.rb_node; 166 struct rb_node **n = &mcast_tree.rb_node;
167 struct rb_node *pn = NULL; 167 struct rb_node *pn = NULL;
168 unsigned long flags;
169 int ret; 168 int ret;
170 169
171 spin_lock_irqsave(&mcast_lock, flags); 170 spin_lock_irq(&mcast_lock);
172 171
173 while (*n) { 172 while (*n) {
174 struct ipath_mcast *tmcast; 173 struct ipath_mcast *tmcast;
@@ -228,7 +227,7 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
228 ret = 0; 227 ret = 0;
229 228
230bail: 229bail:
231 spin_unlock_irqrestore(&mcast_lock, flags); 230 spin_unlock_irq(&mcast_lock);
232 231
233 return ret; 232 return ret;
234} 233}
@@ -289,17 +288,16 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
289 struct ipath_mcast *mcast = NULL; 288 struct ipath_mcast *mcast = NULL;
290 struct ipath_mcast_qp *p, *tmp; 289 struct ipath_mcast_qp *p, *tmp;
291 struct rb_node *n; 290 struct rb_node *n;
292 unsigned long flags;
293 int last = 0; 291 int last = 0;
294 int ret; 292 int ret;
295 293
296 spin_lock_irqsave(&mcast_lock, flags); 294 spin_lock_irq(&mcast_lock);
297 295
298 /* Find the GID in the mcast table. */ 296 /* Find the GID in the mcast table. */
299 n = mcast_tree.rb_node; 297 n = mcast_tree.rb_node;
300 while (1) { 298 while (1) {
301 if (n == NULL) { 299 if (n == NULL) {
302 spin_unlock_irqrestore(&mcast_lock, flags); 300 spin_unlock_irq(&mcast_lock);
303 ret = -EINVAL; 301 ret = -EINVAL;
304 goto bail; 302 goto bail;
305 } 303 }
@@ -334,7 +332,7 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
334 break; 332 break;
335 } 333 }
336 334
337 spin_unlock_irqrestore(&mcast_lock, flags); 335 spin_unlock_irq(&mcast_lock);
338 336
339 if (p) { 337 if (p) {
340 /* 338 /*
@@ -348,9 +346,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
348 atomic_dec(&mcast->refcount); 346 atomic_dec(&mcast->refcount);
349 wait_event(mcast->wait, !atomic_read(&mcast->refcount)); 347 wait_event(mcast->wait, !atomic_read(&mcast->refcount));
350 ipath_mcast_free(mcast); 348 ipath_mcast_free(mcast);
351 spin_lock(&dev->n_mcast_grps_lock); 349 spin_lock_irq(&dev->n_mcast_grps_lock);
352 dev->n_mcast_grps_allocated--; 350 dev->n_mcast_grps_allocated--;
353 spin_unlock(&dev->n_mcast_grps_lock); 351 spin_unlock_irq(&dev->n_mcast_grps_lock);
354 } 352 }
355 353
356 ret = 0; 354 ret = 0;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5cd706908450..dc137dec2308 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -188,14 +188,32 @@ static int send_wqe_overhead(enum ib_qp_type type)
188 } 188 }
189} 189}
190 190
191static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 191static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
192 enum ib_qp_type type, struct mlx4_ib_qp *qp) 192 struct mlx4_ib_qp *qp)
193{ 193{
194 /* Sanity check QP size before proceeding */ 194 /* Sanity check RQ size before proceeding */
195 if (cap->max_recv_wr > dev->dev->caps.max_wqes ||
196 cap->max_recv_sge > dev->dev->caps.max_rq_sg)
197 return -EINVAL;
198
199 qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0;
200
201 qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
202 sizeof (struct mlx4_wqe_data_seg)));
203 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
204
205 cap->max_recv_wr = qp->rq.max;
206 cap->max_recv_sge = qp->rq.max_gs;
207
208 return 0;
209}
210
211static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
212 enum ib_qp_type type, struct mlx4_ib_qp *qp)
213{
214 /* Sanity check SQ size before proceeding */
195 if (cap->max_send_wr > dev->dev->caps.max_wqes || 215 if (cap->max_send_wr > dev->dev->caps.max_wqes ||
196 cap->max_recv_wr > dev->dev->caps.max_wqes ||
197 cap->max_send_sge > dev->dev->caps.max_sq_sg || 216 cap->max_send_sge > dev->dev->caps.max_sq_sg ||
198 cap->max_recv_sge > dev->dev->caps.max_rq_sg ||
199 cap->max_inline_data + send_wqe_overhead(type) + 217 cap->max_inline_data + send_wqe_overhead(type) +
200 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) 218 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
201 return -EINVAL; 219 return -EINVAL;
@@ -208,12 +226,7 @@ static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
208 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 226 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
209 return -EINVAL; 227 return -EINVAL;
210 228
211 qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; 229 qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1;
212 qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0;
213
214 qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
215 sizeof (struct mlx4_wqe_data_seg)));
216 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
217 230
218 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * 231 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
219 sizeof (struct mlx4_wqe_data_seg), 232 sizeof (struct mlx4_wqe_data_seg),
@@ -233,23 +246,31 @@ static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
233 qp->sq.offset = 0; 246 qp->sq.offset = 0;
234 } 247 }
235 248
236 cap->max_send_wr = qp->sq.max; 249 cap->max_send_wr = qp->sq.max;
237 cap->max_recv_wr = qp->rq.max; 250 cap->max_send_sge = qp->sq.max_gs;
238 cap->max_send_sge = qp->sq.max_gs;
239 cap->max_recv_sge = qp->rq.max_gs;
240 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) - 251 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
241 sizeof (struct mlx4_wqe_inline_seg); 252 sizeof (struct mlx4_wqe_inline_seg);
242 253
243 return 0; 254 return 0;
244} 255}
245 256
257static int set_user_sq_size(struct mlx4_ib_qp *qp,
258 struct mlx4_ib_create_qp *ucmd)
259{
260 qp->sq.max = 1 << ucmd->log_sq_bb_count;
261 qp->sq.wqe_shift = ucmd->log_sq_stride;
262
263 qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
264 (qp->sq.max << qp->sq.wqe_shift);
265
266 return 0;
267}
268
246static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 269static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
247 struct ib_qp_init_attr *init_attr, 270 struct ib_qp_init_attr *init_attr,
248 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) 271 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
249{ 272{
250 struct mlx4_wqe_ctrl_seg *ctrl;
251 int err; 273 int err;
252 int i;
253 274
254 mutex_init(&qp->mutex); 275 mutex_init(&qp->mutex);
255 spin_lock_init(&qp->sq.lock); 276 spin_lock_init(&qp->sq.lock);
@@ -264,7 +285,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
264 qp->sq.head = 0; 285 qp->sq.head = 0;
265 qp->sq.tail = 0; 286 qp->sq.tail = 0;
266 287
267 err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp); 288 err = set_rq_size(dev, &init_attr->cap, qp);
268 if (err) 289 if (err)
269 goto err; 290 goto err;
270 291
@@ -276,6 +297,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
276 goto err; 297 goto err;
277 } 298 }
278 299
300 err = set_user_sq_size(qp, &ucmd);
301 if (err)
302 goto err;
303
279 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 304 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
280 qp->buf_size, 0); 305 qp->buf_size, 0);
281 if (IS_ERR(qp->umem)) { 306 if (IS_ERR(qp->umem)) {
@@ -292,16 +317,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
292 if (err) 317 if (err)
293 goto err_mtt; 318 goto err_mtt;
294 319
295 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), 320 if (!init_attr->srq) {
296 ucmd.db_addr, &qp->db); 321 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
297 if (err) 322 ucmd.db_addr, &qp->db);
298 goto err_mtt; 323 if (err)
324 goto err_mtt;
325 }
299 } else { 326 } else {
300 err = mlx4_ib_db_alloc(dev, &qp->db, 0); 327 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
301 if (err) 328 if (err)
302 goto err; 329 goto err;
303 330
304 *qp->db.db = 0; 331 if (!init_attr->srq) {
332 err = mlx4_ib_db_alloc(dev, &qp->db, 0);
333 if (err)
334 goto err;
335
336 *qp->db.db = 0;
337 }
305 338
306 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { 339 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
307 err = -ENOMEM; 340 err = -ENOMEM;
@@ -317,11 +350,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
317 if (err) 350 if (err)
318 goto err_mtt; 351 goto err_mtt;
319 352
320 for (i = 0; i < qp->sq.max; ++i) {
321 ctrl = get_send_wqe(qp, i);
322 ctrl->owner_opcode = cpu_to_be32(1 << 31);
323 }
324
325 qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL); 353 qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL);
326 qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL); 354 qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL);
327 355
@@ -355,7 +383,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
355 return 0; 383 return 0;
356 384
357err_wrid: 385err_wrid:
358 if (pd->uobject) 386 if (pd->uobject && !init_attr->srq)
359 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); 387 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
360 else { 388 else {
361 kfree(qp->sq.wrid); 389 kfree(qp->sq.wrid);
@@ -372,7 +400,7 @@ err_buf:
372 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 400 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
373 401
374err_db: 402err_db:
375 if (!pd->uobject) 403 if (!pd->uobject && !init_attr->srq)
376 mlx4_ib_db_free(dev, &qp->db); 404 mlx4_ib_db_free(dev, &qp->db);
377 405
378err: 406err:
@@ -450,14 +478,16 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
450 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 478 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
451 479
452 if (is_user) { 480 if (is_user) {
453 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), 481 if (!qp->ibqp.srq)
454 &qp->db); 482 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
483 &qp->db);
455 ib_umem_release(qp->umem); 484 ib_umem_release(qp->umem);
456 } else { 485 } else {
457 kfree(qp->sq.wrid); 486 kfree(qp->sq.wrid);
458 kfree(qp->rq.wrid); 487 kfree(qp->rq.wrid);
459 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 488 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
460 mlx4_ib_db_free(dev, &qp->db); 489 if (!qp->ibqp.srq)
490 mlx4_ib_db_free(dev, &qp->db);
461 } 491 }
462} 492}
463 493
@@ -573,7 +603,7 @@ static int to_mlx4_st(enum ib_qp_type type)
573 } 603 }
574} 604}
575 605
576static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr, 606static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
577 int attr_mask) 607 int attr_mask)
578{ 608{
579 u8 dest_rd_atomic; 609 u8 dest_rd_atomic;
@@ -603,7 +633,7 @@ static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *att
603 return cpu_to_be32(hw_access_flags); 633 return cpu_to_be32(hw_access_flags);
604} 634}
605 635
606static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr, 636static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
607 int attr_mask) 637 int attr_mask)
608{ 638{
609 if (attr_mask & IB_QP_PKEY_INDEX) 639 if (attr_mask & IB_QP_PKEY_INDEX)
@@ -619,7 +649,7 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
619 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); 649 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
620} 650}
621 651
622static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah, 652static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
623 struct mlx4_qp_path *path, u8 port) 653 struct mlx4_qp_path *path, u8 port)
624{ 654{
625 path->grh_mylmc = ah->src_path_bits & 0x7f; 655 path->grh_mylmc = ah->src_path_bits & 0x7f;
@@ -655,14 +685,14 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
655 return 0; 685 return 0;
656} 686}
657 687
658int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 688static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
659 int attr_mask, struct ib_udata *udata) 689 const struct ib_qp_attr *attr, int attr_mask,
690 enum ib_qp_state cur_state, enum ib_qp_state new_state)
660{ 691{
661 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 692 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
662 struct mlx4_ib_qp *qp = to_mqp(ibqp); 693 struct mlx4_ib_qp *qp = to_mqp(ibqp);
663 struct mlx4_qp_context *context; 694 struct mlx4_qp_context *context;
664 enum mlx4_qp_optpar optpar = 0; 695 enum mlx4_qp_optpar optpar = 0;
665 enum ib_qp_state cur_state, new_state;
666 int sqd_event; 696 int sqd_event;
667 int err = -EINVAL; 697 int err = -EINVAL;
668 698
@@ -670,34 +700,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
670 if (!context) 700 if (!context)
671 return -ENOMEM; 701 return -ENOMEM;
672 702
673 mutex_lock(&qp->mutex);
674
675 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
676 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
677
678 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
679 goto out;
680
681 if ((attr_mask & IB_QP_PKEY_INDEX) &&
682 attr->pkey_index >= dev->dev->caps.pkey_table_len) {
683 goto out;
684 }
685
686 if ((attr_mask & IB_QP_PORT) &&
687 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
688 goto out;
689 }
690
691 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
692 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
693 goto out;
694 }
695
696 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
697 attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
698 goto out;
699 }
700
701 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 703 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
702 (to_mlx4_st(ibqp->qp_type) << 16)); 704 (to_mlx4_st(ibqp->qp_type) << 16));
703 context->flags |= cpu_to_be32(1 << 8); /* DE? */ 705 context->flags |= cpu_to_be32(1 << 8); /* DE? */
@@ -849,7 +851,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
849 if (ibqp->srq) 851 if (ibqp->srq)
850 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); 852 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
851 853
852 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 854 if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
853 context->db_rec_addr = cpu_to_be64(qp->db.dma); 855 context->db_rec_addr = cpu_to_be64(qp->db.dma);
854 856
855 if (cur_state == IB_QPS_INIT && 857 if (cur_state == IB_QPS_INIT &&
@@ -869,6 +871,21 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
869 else 871 else
870 sqd_event = 0; 872 sqd_event = 0;
871 873
874 /*
875 * Before passing a kernel QP to the HW, make sure that the
876 * ownership bits of the send queue are set so that the
877 * hardware doesn't start processing stale work requests.
878 */
879 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
880 struct mlx4_wqe_ctrl_seg *ctrl;
881 int i;
882
883 for (i = 0; i < qp->sq.max; ++i) {
884 ctrl = get_send_wqe(qp, i);
885 ctrl->owner_opcode = cpu_to_be32(1 << 31);
886 }
887 }
888
872 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), 889 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
873 to_mlx4_state(new_state), context, optpar, 890 to_mlx4_state(new_state), context, optpar,
874 sqd_event, &qp->mqp); 891 sqd_event, &qp->mqp);
@@ -916,15 +933,89 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
916 qp->rq.tail = 0; 933 qp->rq.tail = 0;
917 qp->sq.head = 0; 934 qp->sq.head = 0;
918 qp->sq.tail = 0; 935 qp->sq.tail = 0;
919 *qp->db.db = 0; 936 if (!ibqp->srq)
937 *qp->db.db = 0;
920 } 938 }
921 939
922out: 940out:
923 mutex_unlock(&qp->mutex);
924 kfree(context); 941 kfree(context);
925 return err; 942 return err;
926} 943}
927 944
945static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };
946static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = {
947 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
948 IB_QP_PORT |
949 IB_QP_QKEY),
950 [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
951 IB_QP_PORT |
952 IB_QP_ACCESS_FLAGS),
953 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
954 IB_QP_PORT |
955 IB_QP_ACCESS_FLAGS),
956 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
957 IB_QP_QKEY),
958 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
959 IB_QP_QKEY),
960};
961
962int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
963 int attr_mask, struct ib_udata *udata)
964{
965 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
966 struct mlx4_ib_qp *qp = to_mqp(ibqp);
967 enum ib_qp_state cur_state, new_state;
968 int err = -EINVAL;
969
970 mutex_lock(&qp->mutex);
971
972 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
973 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
974
975 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
976 goto out;
977
978 if ((attr_mask & IB_QP_PKEY_INDEX) &&
979 attr->pkey_index >= dev->dev->caps.pkey_table_len) {
980 goto out;
981 }
982
983 if ((attr_mask & IB_QP_PORT) &&
984 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
985 goto out;
986 }
987
988 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
989 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
990 goto out;
991 }
992
993 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
994 attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
995 goto out;
996 }
997
998 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
999 err = 0;
1000 goto out;
1001 }
1002
1003 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
1004 err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
1005 mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
1006 IB_QPS_RESET, IB_QPS_INIT);
1007 if (err)
1008 goto out;
1009 cur_state = IB_QPS_INIT;
1010 }
1011
1012 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
1013
1014out:
1015 mutex_unlock(&qp->mutex);
1016 return err;
1017}
1018
928static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 1019static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
929 void *wqe) 1020 void *wqe)
930{ 1021{
@@ -952,6 +1043,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
952 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; 1043 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
953 sqp->ud_header.grh.flow_label = 1044 sqp->ud_header.grh.flow_label =
954 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1045 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1046 sqp->ud_header.grh.hop_limit = ah->av.hop_limit;
955 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, 1047 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
956 ah->av.gid_index, &sqp->ud_header.grh.source_gid); 1048 ah->av.gid_index, &sqp->ud_header.grh.source_gid);
957 memcpy(sqp->ud_header.grh.destination_gid.raw, 1049 memcpy(sqp->ud_header.grh.destination_gid.raw,
@@ -1192,7 +1284,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1192 */ 1284 */
1193 wmb(); 1285 wmb();
1194 1286
1195 if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) { 1287 if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
1196 err = -EINVAL; 1288 err = -EINVAL;
1197 goto out; 1289 goto out;
1198 } 1290 }
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 42ab4a801d6a..12fac1c8989d 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -297,6 +297,12 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
297 break; 297 break;
298 } 298 }
299 299
300 if (unlikely(srq->head == srq->tail)) {
301 err = -ENOMEM;
302 *bad_wr = wr;
303 break;
304 }
305
300 srq->wrid[srq->head] = wr->wr_id; 306 srq->wrid[srq->head] = wr->wr_id;
301 307
302 next = get_wqe(srq, srq->head); 308 next = get_wqe(srq, srq->head);
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index 5b8eddc9fa83..88c72d56368b 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -39,7 +39,7 @@
39 * Increment this value if any changes that break userspace ABI 39 * Increment this value if any changes that break userspace ABI
40 * compatibility are made. 40 * compatibility are made.
41 */ 41 */
42#define MLX4_IB_UVERBS_ABI_VERSION 1 42#define MLX4_IB_UVERBS_ABI_VERSION 2
43 43
44/* 44/*
45 * Make sure that all structs defined in this file remain laid out so 45 * Make sure that all structs defined in this file remain laid out so
@@ -87,6 +87,9 @@ struct mlx4_ib_create_srq_resp {
87struct mlx4_ib_create_qp { 87struct mlx4_ib_create_qp {
88 __u64 buf_addr; 88 __u64 buf_addr;
89 __u64 db_addr; 89 __u64 db_addr;
90 __u8 log_sq_bb_count;
91 __u8 log_sq_stride;
92 __u8 reserved[6];
90}; 93};
91 94
92#endif /* MLX4_IB_USER_H */ 95#endif /* MLX4_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 27caf3b0648a..4b111a852ff6 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -279,6 +279,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
279 (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff; 279 (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
280 header->grh.flow_label = 280 header->grh.flow_label =
281 ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); 281 ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
282 header->grh.hop_limit = ah->av->hop_limit;
282 ib_get_cached_gid(&dev->ib_dev, 283 ib_get_cached_gid(&dev->ib_dev,
283 be32_to_cpu(ah->av->port_pd) >> 24, 284 be32_to_cpu(ah->av->port_pd) >> 24,
284 ah->av->gid_index % dev->limits.gid_table_len, 285 ah->av->gid_index % dev->limits.gid_table_len,
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 71314460b11e..38102520ffb3 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -37,6 +37,7 @@
37#include <linux/completion.h> 37#include <linux/completion.h>
38#include <linux/pci.h> 38#include <linux/pci.h>
39#include <linux/errno.h> 39#include <linux/errno.h>
40#include <linux/sched.h>
40#include <asm/io.h> 41#include <asm/io.h>
41#include <rdma/ib_mad.h> 42#include <rdma/ib_mad.h>
42 43
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index ca224d018af2..be6e1e03bdab 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -37,6 +37,7 @@
37 */ 37 */
38 38
39#include <linux/hardirq.h> 39#include <linux/hardirq.h>
40#include <linux/sched.h>
40 41
41#include <asm/io.h> 42#include <asm/io.h>
42 43
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 773145e29947..aa563e61de65 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1250,12 +1250,14 @@ static void __mthca_remove_one(struct pci_dev *pdev)
1250int __mthca_restart_one(struct pci_dev *pdev) 1250int __mthca_restart_one(struct pci_dev *pdev)
1251{ 1251{
1252 struct mthca_dev *mdev; 1252 struct mthca_dev *mdev;
1253 int hca_type;
1253 1254
1254 mdev = pci_get_drvdata(pdev); 1255 mdev = pci_get_drvdata(pdev);
1255 if (!mdev) 1256 if (!mdev)
1256 return -ENODEV; 1257 return -ENODEV;
1258 hca_type = mdev->hca_type;
1257 __mthca_remove_one(pdev); 1259 __mthca_remove_one(pdev);
1258 return __mthca_init_one(pdev, mdev->hca_type); 1260 return __mthca_init_one(pdev, hca_type);
1259} 1261}
1260 1262
1261static int __devinit mthca_init_one(struct pci_dev *pdev, 1263static int __devinit mthca_init_one(struct pci_dev *pdev,
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 48f7c65e9aed..e61f3e626980 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -36,6 +36,7 @@
36 36
37#include <linux/mm.h> 37#include <linux/mm.h>
38#include <linux/scatterlist.h> 38#include <linux/scatterlist.h>
39#include <linux/sched.h>
39 40
40#include <asm/page.h> 41#include <asm/page.h>
41 42
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 72fabb822f1c..eef415b12b2e 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -37,6 +37,7 @@
37 37
38#include <linux/string.h> 38#include <linux/string.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/sched.h>
40 41
41#include <asm/io.h> 42#include <asm/io.h>
42 43
@@ -295,7 +296,7 @@ static int to_mthca_st(int transport)
295 } 296 }
296} 297}
297 298
298static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr, 299static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
299 int attr_mask) 300 int attr_mask)
300{ 301{
301 if (attr_mask & IB_QP_PKEY_INDEX) 302 if (attr_mask & IB_QP_PKEY_INDEX)
@@ -327,7 +328,7 @@ static void init_port(struct mthca_dev *dev, int port)
327 mthca_warn(dev, "INIT_IB returned status %02x.\n", status); 328 mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
328} 329}
329 330
330static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, 331static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
331 int attr_mask) 332 int attr_mask)
332{ 333{
333 u8 dest_rd_atomic; 334 u8 dest_rd_atomic;
@@ -510,7 +511,7 @@ out:
510 return err; 511 return err;
511} 512}
512 513
513static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, 514static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
514 struct mthca_qp_path *path, u8 port) 515 struct mthca_qp_path *path, u8 port)
515{ 516{
516 path->g_mylmc = ah->src_path_bits & 0x7f; 517 path->g_mylmc = ah->src_path_bits & 0x7f;
@@ -538,12 +539,12 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
538 return 0; 539 return 0;
539} 540}
540 541
541int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, 542static int __mthca_modify_qp(struct ib_qp *ibqp,
542 struct ib_udata *udata) 543 const struct ib_qp_attr *attr, int attr_mask,
544 enum ib_qp_state cur_state, enum ib_qp_state new_state)
543{ 545{
544 struct mthca_dev *dev = to_mdev(ibqp->device); 546 struct mthca_dev *dev = to_mdev(ibqp->device);
545 struct mthca_qp *qp = to_mqp(ibqp); 547 struct mthca_qp *qp = to_mqp(ibqp);
546 enum ib_qp_state cur_state, new_state;
547 struct mthca_mailbox *mailbox; 548 struct mthca_mailbox *mailbox;
548 struct mthca_qp_param *qp_param; 549 struct mthca_qp_param *qp_param;
549 struct mthca_qp_context *qp_context; 550 struct mthca_qp_context *qp_context;
@@ -551,60 +552,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
551 u8 status; 552 u8 status;
552 int err = -EINVAL; 553 int err = -EINVAL;
553 554
554 mutex_lock(&qp->mutex);
555
556 if (attr_mask & IB_QP_CUR_STATE) {
557 cur_state = attr->cur_qp_state;
558 } else {
559 spin_lock_irq(&qp->sq.lock);
560 spin_lock(&qp->rq.lock);
561 cur_state = qp->state;
562 spin_unlock(&qp->rq.lock);
563 spin_unlock_irq(&qp->sq.lock);
564 }
565
566 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
567
568 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
569 mthca_dbg(dev, "Bad QP transition (transport %d) "
570 "%d->%d with attr 0x%08x\n",
571 qp->transport, cur_state, new_state,
572 attr_mask);
573 goto out;
574 }
575
576 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
577 err = 0;
578 goto out;
579 }
580
581 if ((attr_mask & IB_QP_PKEY_INDEX) &&
582 attr->pkey_index >= dev->limits.pkey_table_len) {
583 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
584 attr->pkey_index, dev->limits.pkey_table_len-1);
585 goto out;
586 }
587
588 if ((attr_mask & IB_QP_PORT) &&
589 (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
590 mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
591 goto out;
592 }
593
594 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
595 attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
596 mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
597 attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
598 goto out;
599 }
600
601 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
602 attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
603 mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
604 attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
605 goto out;
606 }
607
608 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 555 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
609 if (IS_ERR(mailbox)) { 556 if (IS_ERR(mailbox)) {
610 err = PTR_ERR(mailbox); 557 err = PTR_ERR(mailbox);
@@ -891,6 +838,98 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
891 838
892out_mailbox: 839out_mailbox:
893 mthca_free_mailbox(dev, mailbox); 840 mthca_free_mailbox(dev, mailbox);
841out:
842 return err;
843}
844
845static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 };
846static const int dummy_init_attr_mask[] = {
847 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
848 IB_QP_PORT |
849 IB_QP_QKEY),
850 [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
851 IB_QP_PORT |
852 IB_QP_ACCESS_FLAGS),
853 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
854 IB_QP_PORT |
855 IB_QP_ACCESS_FLAGS),
856 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
857 IB_QP_QKEY),
858 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
859 IB_QP_QKEY),
860};
861
862int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
863 struct ib_udata *udata)
864{
865 struct mthca_dev *dev = to_mdev(ibqp->device);
866 struct mthca_qp *qp = to_mqp(ibqp);
867 enum ib_qp_state cur_state, new_state;
868 int err = -EINVAL;
869
870 mutex_lock(&qp->mutex);
871 if (attr_mask & IB_QP_CUR_STATE) {
872 cur_state = attr->cur_qp_state;
873 } else {
874 spin_lock_irq(&qp->sq.lock);
875 spin_lock(&qp->rq.lock);
876 cur_state = qp->state;
877 spin_unlock(&qp->rq.lock);
878 spin_unlock_irq(&qp->sq.lock);
879 }
880
881 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
882
883 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
884 mthca_dbg(dev, "Bad QP transition (transport %d) "
885 "%d->%d with attr 0x%08x\n",
886 qp->transport, cur_state, new_state,
887 attr_mask);
888 goto out;
889 }
890
891 if ((attr_mask & IB_QP_PKEY_INDEX) &&
892 attr->pkey_index >= dev->limits.pkey_table_len) {
893 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
894 attr->pkey_index, dev->limits.pkey_table_len-1);
895 goto out;
896 }
897
898 if ((attr_mask & IB_QP_PORT) &&
899 (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
900 mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
901 goto out;
902 }
903
904 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
905 attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
906 mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
907 attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
908 goto out;
909 }
910
911 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
912 attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
913 mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
914 attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
915 goto out;
916 }
917
918 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
919 err = 0;
920 goto out;
921 }
922
923 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
924 err = __mthca_modify_qp(ibqp, &dummy_init_attr,
925 dummy_init_attr_mask[ibqp->qp_type],
926 IB_QPS_RESET, IB_QPS_INIT);
927 if (err)
928 goto out;
929 cur_state = IB_QPS_INIT;
930 }
931
932 err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
894 933
895out: 934out:
896 mutex_unlock(&qp->mutex); 935 mutex_unlock(&qp->mutex);
@@ -2245,10 +2284,10 @@ void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
2245 struct mthca_next_seg *next; 2284 struct mthca_next_seg *next;
2246 2285
2247 /* 2286 /*
2248 * For SRQs, all WQEs generate a CQE, so we're always at the 2287 * For SRQs, all receive WQEs generate a CQE, so we're always
2249 * end of the doorbell chain. 2288 * at the end of the doorbell chain.
2250 */ 2289 */
2251 if (qp->ibqp.srq) { 2290 if (qp->ibqp.srq && !is_send) {
2252 *new_wqe = 0; 2291 *new_wqe = 0;
2253 return; 2292 return;
2254 } 2293 }
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 61974b0296ca..b8f05a526673 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -34,6 +34,7 @@
34 34
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/string.h> 36#include <linux/string.h>
37#include <linux/sched.h>
37 38
38#include <asm/io.h> 39#include <asm/io.h>
39 40
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 87310eeb6df0..285c143115cc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -132,12 +132,46 @@ struct ipoib_cm_data {
132 __be32 mtu; 132 __be32 mtu;
133}; 133};
134 134
135/*
136 * Quoting 10.3.1 Queue Pair and EE Context States:
137 *
138 * Note, for QPs that are associated with an SRQ, the Consumer should take the
139 * QP through the Error State before invoking a Destroy QP or a Modify QP to the
140 * Reset State. The Consumer may invoke the Destroy QP without first performing
141 * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
142 * Last WQE Reached Event. However, if the Consumer does not wait for the
143 * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
144 * leakage may occur. Therefore, it is good programming practice to tear down a
145 * QP that is associated with an SRQ by using the following process:
146 *
147 * - Put the QP in the Error State
148 * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
149 * - either:
150 * drain the CQ by invoking the Poll CQ verb and either wait for CQ
151 * to be empty or the number of Poll CQ operations has exceeded
152 * CQ capacity size;
153 * - or
154 * post another WR that completes on the same CQ and wait for this
155 * WR to return as a WC;
156 * - and then invoke a Destroy QP or Reset QP.
157 *
158 * We use the second option and wait for a completion on the
159 * same CQ before destroying QPs attached to our SRQ.
160 */
161
162enum ipoib_cm_state {
163 IPOIB_CM_RX_LIVE,
164 IPOIB_CM_RX_ERROR, /* Ignored by stale task */
165 IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */
166};
167
135struct ipoib_cm_rx { 168struct ipoib_cm_rx {
136 struct ib_cm_id *id; 169 struct ib_cm_id *id;
137 struct ib_qp *qp; 170 struct ib_qp *qp;
138 struct list_head list; 171 struct list_head list;
139 struct net_device *dev; 172 struct net_device *dev;
140 unsigned long jiffies; 173 unsigned long jiffies;
174 enum ipoib_cm_state state;
141}; 175};
142 176
143struct ipoib_cm_tx { 177struct ipoib_cm_tx {
@@ -165,10 +199,15 @@ struct ipoib_cm_dev_priv {
165 struct ib_srq *srq; 199 struct ib_srq *srq;
166 struct ipoib_cm_rx_buf *srq_ring; 200 struct ipoib_cm_rx_buf *srq_ring;
167 struct ib_cm_id *id; 201 struct ib_cm_id *id;
168 struct list_head passive_ids; 202 struct list_head passive_ids; /* state: LIVE */
203 struct list_head rx_error_list; /* state: ERROR */
204 struct list_head rx_flush_list; /* state: FLUSH, drain not started */
205 struct list_head rx_drain_list; /* state: FLUSH, drain started */
206 struct list_head rx_reap_list; /* state: FLUSH, drain done */
169 struct work_struct start_task; 207 struct work_struct start_task;
170 struct work_struct reap_task; 208 struct work_struct reap_task;
171 struct work_struct skb_task; 209 struct work_struct skb_task;
210 struct work_struct rx_reap_task;
172 struct delayed_work stale_task; 211 struct delayed_work stale_task;
173 struct sk_buff_head skb_queue; 212 struct sk_buff_head skb_queue;
174 struct list_head start_list; 213 struct list_head start_list;
@@ -201,15 +240,17 @@ struct ipoib_dev_priv {
201 struct list_head multicast_list; 240 struct list_head multicast_list;
202 struct rb_root multicast_tree; 241 struct rb_root multicast_tree;
203 242
204 struct delayed_work pkey_task; 243 struct delayed_work pkey_poll_task;
205 struct delayed_work mcast_task; 244 struct delayed_work mcast_task;
206 struct work_struct flush_task; 245 struct work_struct flush_task;
207 struct work_struct restart_task; 246 struct work_struct restart_task;
208 struct delayed_work ah_reap_task; 247 struct delayed_work ah_reap_task;
248 struct work_struct pkey_event_task;
209 249
210 struct ib_device *ca; 250 struct ib_device *ca;
211 u8 port; 251 u8 port;
212 u16 pkey; 252 u16 pkey;
253 u16 pkey_index;
213 struct ib_pd *pd; 254 struct ib_pd *pd;
214 struct ib_mr *mr; 255 struct ib_mr *mr;
215 struct ib_cq *cq; 256 struct ib_cq *cq;
@@ -333,12 +374,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
333 374
334int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 375int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
335void ipoib_ib_dev_flush(struct work_struct *work); 376void ipoib_ib_dev_flush(struct work_struct *work);
377void ipoib_pkey_event(struct work_struct *work);
336void ipoib_ib_dev_cleanup(struct net_device *dev); 378void ipoib_ib_dev_cleanup(struct net_device *dev);
337 379
338int ipoib_ib_dev_open(struct net_device *dev); 380int ipoib_ib_dev_open(struct net_device *dev);
339int ipoib_ib_dev_up(struct net_device *dev); 381int ipoib_ib_dev_up(struct net_device *dev);
340int ipoib_ib_dev_down(struct net_device *dev, int flush); 382int ipoib_ib_dev_down(struct net_device *dev, int flush);
341int ipoib_ib_dev_stop(struct net_device *dev); 383int ipoib_ib_dev_stop(struct net_device *dev, int flush);
342 384
343int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 385int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
344void ipoib_dev_cleanup(struct net_device *dev); 386void ipoib_dev_cleanup(struct net_device *dev);
@@ -386,6 +428,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
386 428
387void ipoib_pkey_poll(struct work_struct *work); 429void ipoib_pkey_poll(struct work_struct *work);
388int ipoib_pkey_dev_delay_open(struct net_device *dev); 430int ipoib_pkey_dev_delay_open(struct net_device *dev);
431void ipoib_drain_cq(struct net_device *dev);
389 432
390#ifdef CONFIG_INFINIBAND_IPOIB_CM 433#ifdef CONFIG_INFINIBAND_IPOIB_CM
391 434
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index eec833b81e9b..076a0bbb63d7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -37,6 +37,7 @@
37#include <net/dst.h> 37#include <net/dst.h>
38#include <net/icmp.h> 38#include <net/icmp.h>
39#include <linux/icmpv6.h> 39#include <linux/icmpv6.h>
40#include <linux/delay.h>
40 41
41#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 42#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
42static int data_debug_level; 43static int data_debug_level;
@@ -62,6 +63,17 @@ struct ipoib_cm_id {
62 u32 remote_mtu; 63 u32 remote_mtu;
63}; 64};
64 65
66static struct ib_qp_attr ipoib_cm_err_attr = {
67 .qp_state = IB_QPS_ERR
68};
69
70#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
71
72static struct ib_send_wr ipoib_cm_rx_drain_wr = {
73 .wr_id = IPOIB_CM_RX_DRAIN_WRID,
74 .opcode = IB_WR_SEND,
75};
76
65static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 77static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
66 struct ib_cm_event *event); 78 struct ib_cm_event *event);
67 79
@@ -150,15 +162,54 @@ partial_error:
150 return NULL; 162 return NULL;
151} 163}
152 164
165static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv)
166{
167 struct ib_send_wr *bad_wr;
168 struct ipoib_cm_rx *p;
169
170 /* We only reserved 1 extra slot in CQ for drain WRs, so
171 * make sure we have at most 1 outstanding WR. */
172 if (list_empty(&priv->cm.rx_flush_list) ||
173 !list_empty(&priv->cm.rx_drain_list))
174 return;
175
176 /*
177 * QPs on flush list are error state. This way, a "flush
178 * error" WC will be immediately generated for each WR we post.
179 */
180 p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
181 if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
182 ipoib_warn(priv, "failed to post drain wr\n");
183
184 list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
185}
186
187static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
188{
189 struct ipoib_cm_rx *p = ctx;
190 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
191 unsigned long flags;
192
193 if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
194 return;
195
196 spin_lock_irqsave(&priv->lock, flags);
197 list_move(&p->list, &priv->cm.rx_flush_list);
198 p->state = IPOIB_CM_RX_FLUSH;
199 ipoib_cm_start_rx_drain(priv);
200 spin_unlock_irqrestore(&priv->lock, flags);
201}
202
153static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, 203static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
154 struct ipoib_cm_rx *p) 204 struct ipoib_cm_rx *p)
155{ 205{
156 struct ipoib_dev_priv *priv = netdev_priv(dev); 206 struct ipoib_dev_priv *priv = netdev_priv(dev);
157 struct ib_qp_init_attr attr = { 207 struct ib_qp_init_attr attr = {
158 .send_cq = priv->cq, /* does not matter, we never send anything */ 208 .event_handler = ipoib_cm_rx_event_handler,
209 .send_cq = priv->cq, /* For drain WR */
159 .recv_cq = priv->cq, 210 .recv_cq = priv->cq,
160 .srq = priv->cm.srq, 211 .srq = priv->cm.srq,
161 .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */ 212 .cap.max_send_wr = 1, /* For drain WR */
162 .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ 213 .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
163 .sq_sig_type = IB_SIGNAL_ALL_WR, 214 .sq_sig_type = IB_SIGNAL_ALL_WR,
164 .qp_type = IB_QPT_RC, 215 .qp_type = IB_QPT_RC,
@@ -198,6 +249,27 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
198 ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); 249 ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
199 return ret; 250 return ret;
200 } 251 }
252
253 /*
254 * Current Mellanox HCA firmware won't generate completions
255 * with error for drain WRs unless the QP has been moved to
256 * RTS first. This work-around leaves a window where a QP has
257 * moved to error asynchronously, but this will eventually get
258 * fixed in firmware, so let's not error out if modify QP
259 * fails.
260 */
261 qp_attr.qp_state = IB_QPS_RTS;
262 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
263 if (ret) {
264 ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
265 return 0;
266 }
267 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
268 if (ret) {
269 ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
270 return 0;
271 }
272
201 return 0; 273 return 0;
202} 274}
203 275
@@ -256,6 +328,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
256 328
257 cm_id->context = p; 329 cm_id->context = p;
258 p->jiffies = jiffies; 330 p->jiffies = jiffies;
331 p->state = IPOIB_CM_RX_LIVE;
259 spin_lock_irq(&priv->lock); 332 spin_lock_irq(&priv->lock);
260 if (list_empty(&priv->cm.passive_ids)) 333 if (list_empty(&priv->cm.passive_ids))
261 queue_delayed_work(ipoib_workqueue, 334 queue_delayed_work(ipoib_workqueue,
@@ -277,7 +350,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
277{ 350{
278 struct ipoib_cm_rx *p; 351 struct ipoib_cm_rx *p;
279 struct ipoib_dev_priv *priv; 352 struct ipoib_dev_priv *priv;
280 int ret;
281 353
282 switch (event->event) { 354 switch (event->event) {
283 case IB_CM_REQ_RECEIVED: 355 case IB_CM_REQ_RECEIVED:
@@ -289,20 +361,9 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
289 case IB_CM_REJ_RECEIVED: 361 case IB_CM_REJ_RECEIVED:
290 p = cm_id->context; 362 p = cm_id->context;
291 priv = netdev_priv(p->dev); 363 priv = netdev_priv(p->dev);
292 spin_lock_irq(&priv->lock); 364 if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
293 if (list_empty(&p->list)) 365 ipoib_warn(priv, "unable to move qp to error state\n");
294 ret = 0; /* Connection is going away already. */ 366 /* Fall through */
295 else {
296 list_del_init(&p->list);
297 ret = -ECONNRESET;
298 }
299 spin_unlock_irq(&priv->lock);
300 if (ret) {
301 ib_destroy_qp(p->qp);
302 kfree(p);
303 return ret;
304 }
305 return 0;
306 default: 367 default:
307 return 0; 368 return 0;
308 } 369 }
@@ -354,8 +415,15 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
354 wr_id, wc->status); 415 wr_id, wc->status);
355 416
356 if (unlikely(wr_id >= ipoib_recvq_size)) { 417 if (unlikely(wr_id >= ipoib_recvq_size)) {
357 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 418 if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) {
358 wr_id, ipoib_recvq_size); 419 spin_lock_irqsave(&priv->lock, flags);
420 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
421 ipoib_cm_start_rx_drain(priv);
422 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
423 spin_unlock_irqrestore(&priv->lock, flags);
424 } else
425 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
426 wr_id, ipoib_recvq_size);
359 return; 427 return;
360 } 428 }
361 429
@@ -374,9 +442,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
374 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 442 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
375 spin_lock_irqsave(&priv->lock, flags); 443 spin_lock_irqsave(&priv->lock, flags);
376 p->jiffies = jiffies; 444 p->jiffies = jiffies;
377 /* Move this entry to list head, but do 445 /* Move this entry to list head, but do not re-add it
378 * not re-add it if it has been removed. */ 446 * if it has been moved out of list. */
379 if (!list_empty(&p->list)) 447 if (p->state == IPOIB_CM_RX_LIVE)
380 list_move(&p->list, &priv->cm.passive_ids); 448 list_move(&p->list, &priv->cm.passive_ids);
381 spin_unlock_irqrestore(&priv->lock, flags); 449 spin_unlock_irqrestore(&priv->lock, flags);
382 } 450 }
@@ -592,8 +660,7 @@ int ipoib_cm_dev_open(struct net_device *dev)
592 if (IS_ERR(priv->cm.id)) { 660 if (IS_ERR(priv->cm.id)) {
593 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); 661 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
594 ret = PTR_ERR(priv->cm.id); 662 ret = PTR_ERR(priv->cm.id);
595 priv->cm.id = NULL; 663 goto err_cm;
596 return ret;
597 } 664 }
598 665
599 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), 666 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
@@ -601,34 +668,76 @@ int ipoib_cm_dev_open(struct net_device *dev)
601 if (ret) { 668 if (ret) {
602 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, 669 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
603 IPOIB_CM_IETF_ID | priv->qp->qp_num); 670 IPOIB_CM_IETF_ID | priv->qp->qp_num);
604 ib_destroy_cm_id(priv->cm.id); 671 goto err_listen;
605 priv->cm.id = NULL;
606 return ret;
607 } 672 }
673
608 return 0; 674 return 0;
675
676err_listen:
677 ib_destroy_cm_id(priv->cm.id);
678err_cm:
679 priv->cm.id = NULL;
680 return ret;
609} 681}
610 682
611void ipoib_cm_dev_stop(struct net_device *dev) 683void ipoib_cm_dev_stop(struct net_device *dev)
612{ 684{
613 struct ipoib_dev_priv *priv = netdev_priv(dev); 685 struct ipoib_dev_priv *priv = netdev_priv(dev);
614 struct ipoib_cm_rx *p; 686 struct ipoib_cm_rx *p, *n;
687 unsigned long begin;
688 LIST_HEAD(list);
689 int ret;
615 690
616 if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id) 691 if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
617 return; 692 return;
618 693
619 ib_destroy_cm_id(priv->cm.id); 694 ib_destroy_cm_id(priv->cm.id);
620 priv->cm.id = NULL; 695 priv->cm.id = NULL;
696
621 spin_lock_irq(&priv->lock); 697 spin_lock_irq(&priv->lock);
622 while (!list_empty(&priv->cm.passive_ids)) { 698 while (!list_empty(&priv->cm.passive_ids)) {
623 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); 699 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
624 list_del_init(&p->list); 700 list_move(&p->list, &priv->cm.rx_error_list);
701 p->state = IPOIB_CM_RX_ERROR;
702 spin_unlock_irq(&priv->lock);
703 ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
704 if (ret)
705 ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
706 spin_lock_irq(&priv->lock);
707 }
708
709 /* Wait for all RX to be drained */
710 begin = jiffies;
711
712 while (!list_empty(&priv->cm.rx_error_list) ||
713 !list_empty(&priv->cm.rx_flush_list) ||
714 !list_empty(&priv->cm.rx_drain_list)) {
715 if (time_after(jiffies, begin + 5 * HZ)) {
716 ipoib_warn(priv, "RX drain timing out\n");
717
718 /*
719 * assume the HW is wedged and just free up everything.
720 */
721 list_splice_init(&priv->cm.rx_flush_list, &list);
722 list_splice_init(&priv->cm.rx_error_list, &list);
723 list_splice_init(&priv->cm.rx_drain_list, &list);
724 break;
725 }
625 spin_unlock_irq(&priv->lock); 726 spin_unlock_irq(&priv->lock);
727 msleep(1);
728 ipoib_drain_cq(dev);
729 spin_lock_irq(&priv->lock);
730 }
731
732 list_splice_init(&priv->cm.rx_reap_list, &list);
733
734 spin_unlock_irq(&priv->lock);
735
736 list_for_each_entry_safe(p, n, &list, list) {
626 ib_destroy_cm_id(p->id); 737 ib_destroy_cm_id(p->id);
627 ib_destroy_qp(p->qp); 738 ib_destroy_qp(p->qp);
628 kfree(p); 739 kfree(p);
629 spin_lock_irq(&priv->lock);
630 } 740 }
631 spin_unlock_irq(&priv->lock);
632 741
633 cancel_delayed_work(&priv->cm.stale_task); 742 cancel_delayed_work(&priv->cm.stale_task);
634} 743}
@@ -1079,24 +1188,44 @@ void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
1079 queue_work(ipoib_workqueue, &priv->cm.skb_task); 1188 queue_work(ipoib_workqueue, &priv->cm.skb_task);
1080} 1189}
1081 1190
1191static void ipoib_cm_rx_reap(struct work_struct *work)
1192{
1193 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1194 cm.rx_reap_task);
1195 struct ipoib_cm_rx *p, *n;
1196 LIST_HEAD(list);
1197
1198 spin_lock_irq(&priv->lock);
1199 list_splice_init(&priv->cm.rx_reap_list, &list);
1200 spin_unlock_irq(&priv->lock);
1201
1202 list_for_each_entry_safe(p, n, &list, list) {
1203 ib_destroy_cm_id(p->id);
1204 ib_destroy_qp(p->qp);
1205 kfree(p);
1206 }
1207}
1208
1082static void ipoib_cm_stale_task(struct work_struct *work) 1209static void ipoib_cm_stale_task(struct work_struct *work)
1083{ 1210{
1084 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1211 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1085 cm.stale_task.work); 1212 cm.stale_task.work);
1086 struct ipoib_cm_rx *p; 1213 struct ipoib_cm_rx *p;
1214 int ret;
1087 1215
1088 spin_lock_irq(&priv->lock); 1216 spin_lock_irq(&priv->lock);
1089 while (!list_empty(&priv->cm.passive_ids)) { 1217 while (!list_empty(&priv->cm.passive_ids)) {
1090 /* List if sorted by LRU, start from tail, 1218 /* List is sorted by LRU, start from tail,
1091 * stop when we see a recently used entry */ 1219 * stop when we see a recently used entry */
1092 p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); 1220 p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
1093 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) 1221 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
1094 break; 1222 break;
1095 list_del_init(&p->list); 1223 list_move(&p->list, &priv->cm.rx_error_list);
1224 p->state = IPOIB_CM_RX_ERROR;
1096 spin_unlock_irq(&priv->lock); 1225 spin_unlock_irq(&priv->lock);
1097 ib_destroy_cm_id(p->id); 1226 ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
1098 ib_destroy_qp(p->qp); 1227 if (ret)
1099 kfree(p); 1228 ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
1100 spin_lock_irq(&priv->lock); 1229 spin_lock_irq(&priv->lock);
1101 } 1230 }
1102 1231
@@ -1164,9 +1293,14 @@ int ipoib_cm_dev_init(struct net_device *dev)
1164 INIT_LIST_HEAD(&priv->cm.passive_ids); 1293 INIT_LIST_HEAD(&priv->cm.passive_ids);
1165 INIT_LIST_HEAD(&priv->cm.reap_list); 1294 INIT_LIST_HEAD(&priv->cm.reap_list);
1166 INIT_LIST_HEAD(&priv->cm.start_list); 1295 INIT_LIST_HEAD(&priv->cm.start_list);
1296 INIT_LIST_HEAD(&priv->cm.rx_error_list);
1297 INIT_LIST_HEAD(&priv->cm.rx_flush_list);
1298 INIT_LIST_HEAD(&priv->cm.rx_drain_list);
1299 INIT_LIST_HEAD(&priv->cm.rx_reap_list);
1167 INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); 1300 INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
1168 INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); 1301 INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
1169 INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); 1302 INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
1303 INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
1170 INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); 1304 INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
1171 1305
1172 skb_queue_head_init(&priv->cm.skb_queue); 1306 skb_queue_head_init(&priv->cm.skb_queue);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 68d72c6f7ffb..8404f05b2b6e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
448 struct ipoib_dev_priv *priv = netdev_priv(dev); 448 struct ipoib_dev_priv *priv = netdev_priv(dev);
449 int ret; 449 int ret;
450 450
451 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
452 ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
453 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
454 return -1;
455 }
456 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
457
451 ret = ipoib_init_qp(dev); 458 ret = ipoib_init_qp(dev);
452 if (ret) { 459 if (ret) {
453 ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); 460 ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
457 ret = ipoib_ib_post_receives(dev); 464 ret = ipoib_ib_post_receives(dev);
458 if (ret) { 465 if (ret) {
459 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 466 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
460 ipoib_ib_dev_stop(dev); 467 ipoib_ib_dev_stop(dev, 1);
461 return -1; 468 return -1;
462 } 469 }
463 470
464 ret = ipoib_cm_dev_open(dev); 471 ret = ipoib_cm_dev_open(dev);
465 if (ret) { 472 if (ret) {
466 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 473 ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
467 ipoib_ib_dev_stop(dev); 474 ipoib_ib_dev_stop(dev, 1);
468 return -1; 475 return -1;
469 } 476 }
470 477
@@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
516 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { 523 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
517 mutex_lock(&pkey_mutex); 524 mutex_lock(&pkey_mutex);
518 set_bit(IPOIB_PKEY_STOP, &priv->flags); 525 set_bit(IPOIB_PKEY_STOP, &priv->flags);
519 cancel_delayed_work(&priv->pkey_task); 526 cancel_delayed_work(&priv->pkey_poll_task);
520 mutex_unlock(&pkey_mutex); 527 mutex_unlock(&pkey_mutex);
521 if (flush) 528 if (flush)
522 flush_workqueue(ipoib_workqueue); 529 flush_workqueue(ipoib_workqueue);
@@ -543,13 +550,30 @@ static int recvs_pending(struct net_device *dev)
543 return pending; 550 return pending;
544} 551}
545 552
546int ipoib_ib_dev_stop(struct net_device *dev) 553void ipoib_drain_cq(struct net_device *dev)
554{
555 struct ipoib_dev_priv *priv = netdev_priv(dev);
556 int i, n;
557 do {
558 n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
559 for (i = 0; i < n; ++i) {
560 if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ)
561 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
562 else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV)
563 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
564 else
565 ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
566 }
567 } while (n == IPOIB_NUM_WC);
568}
569
570int ipoib_ib_dev_stop(struct net_device *dev, int flush)
547{ 571{
548 struct ipoib_dev_priv *priv = netdev_priv(dev); 572 struct ipoib_dev_priv *priv = netdev_priv(dev);
549 struct ib_qp_attr qp_attr; 573 struct ib_qp_attr qp_attr;
550 unsigned long begin; 574 unsigned long begin;
551 struct ipoib_tx_buf *tx_req; 575 struct ipoib_tx_buf *tx_req;
552 int i, n; 576 int i;
553 577
554 clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 578 clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
555 netif_poll_disable(dev); 579 netif_poll_disable(dev);
@@ -604,17 +628,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
604 goto timeout; 628 goto timeout;
605 } 629 }
606 630
607 do { 631 ipoib_drain_cq(dev);
608 n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
609 for (i = 0; i < n; ++i) {
610 if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ)
611 ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
612 else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV)
613 ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
614 else
615 ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
616 }
617 } while (n == IPOIB_NUM_WC);
618 632
619 msleep(1); 633 msleep(1);
620 } 634 }
@@ -629,7 +643,8 @@ timeout:
629 /* Wait for all AHs to be reaped */ 643 /* Wait for all AHs to be reaped */
630 set_bit(IPOIB_STOP_REAPER, &priv->flags); 644 set_bit(IPOIB_STOP_REAPER, &priv->flags);
631 cancel_delayed_work(&priv->ah_reap_task); 645 cancel_delayed_work(&priv->ah_reap_task);
632 flush_workqueue(ipoib_workqueue); 646 if (flush)
647 flush_workqueue(ipoib_workqueue);
633 648
634 begin = jiffies; 649 begin = jiffies;
635 650
@@ -673,13 +688,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
673 return 0; 688 return 0;
674} 689}
675 690
676void ipoib_ib_dev_flush(struct work_struct *work) 691static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
677{ 692{
678 struct ipoib_dev_priv *cpriv, *priv = 693 struct ipoib_dev_priv *cpriv;
679 container_of(work, struct ipoib_dev_priv, flush_task);
680 struct net_device *dev = priv->dev; 694 struct net_device *dev = priv->dev;
695 u16 new_index;
696
697 mutex_lock(&priv->vlan_mutex);
698
699 /*
700 * Flush any child interfaces too -- they might be up even if
701 * the parent is down.
702 */
703 list_for_each_entry(cpriv, &priv->child_intfs, list)
704 __ipoib_ib_dev_flush(cpriv, pkey_event);
681 705
682 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) { 706 mutex_unlock(&priv->vlan_mutex);
707
708 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
683 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); 709 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
684 return; 710 return;
685 } 711 }
@@ -689,10 +715,32 @@ void ipoib_ib_dev_flush(struct work_struct *work)
689 return; 715 return;
690 } 716 }
691 717
718 if (pkey_event) {
719 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
720 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
721 ipoib_ib_dev_down(dev, 0);
722 ipoib_pkey_dev_delay_open(dev);
723 return;
724 }
725 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
726
727 /* restart QP only if P_Key index is changed */
728 if (new_index == priv->pkey_index) {
729 ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
730 return;
731 }
732 priv->pkey_index = new_index;
733 }
734
692 ipoib_dbg(priv, "flushing\n"); 735 ipoib_dbg(priv, "flushing\n");
693 736
694 ipoib_ib_dev_down(dev, 0); 737 ipoib_ib_dev_down(dev, 0);
695 738
739 if (pkey_event) {
740 ipoib_ib_dev_stop(dev, 0);
741 ipoib_ib_dev_open(dev);
742 }
743
696 /* 744 /*
697 * The device could have been brought down between the start and when 745 * The device could have been brought down between the start and when
698 * we get here, don't bring it back up if it's not configured up 746 * we get here, don't bring it back up if it's not configured up
@@ -701,14 +749,24 @@ void ipoib_ib_dev_flush(struct work_struct *work)
701 ipoib_ib_dev_up(dev); 749 ipoib_ib_dev_up(dev);
702 ipoib_mcast_restart_task(&priv->restart_task); 750 ipoib_mcast_restart_task(&priv->restart_task);
703 } 751 }
752}
704 753
705 mutex_lock(&priv->vlan_mutex); 754void ipoib_ib_dev_flush(struct work_struct *work)
755{
756 struct ipoib_dev_priv *priv =
757 container_of(work, struct ipoib_dev_priv, flush_task);
706 758
707 /* Flush any child interfaces too */ 759 ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
708 list_for_each_entry(cpriv, &priv->child_intfs, list) 760 __ipoib_ib_dev_flush(priv, 0);
709 ipoib_ib_dev_flush(&cpriv->flush_task); 761}
710 762
711 mutex_unlock(&priv->vlan_mutex); 763void ipoib_pkey_event(struct work_struct *work)
764{
765 struct ipoib_dev_priv *priv =
766 container_of(work, struct ipoib_dev_priv, pkey_event_task);
767
768 ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
769 __ipoib_ib_dev_flush(priv, 1);
712} 770}
713 771
714void ipoib_ib_dev_cleanup(struct net_device *dev) 772void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -736,7 +794,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
736void ipoib_pkey_poll(struct work_struct *work) 794void ipoib_pkey_poll(struct work_struct *work)
737{ 795{
738 struct ipoib_dev_priv *priv = 796 struct ipoib_dev_priv *priv =
739 container_of(work, struct ipoib_dev_priv, pkey_task.work); 797 container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
740 struct net_device *dev = priv->dev; 798 struct net_device *dev = priv->dev;
741 799
742 ipoib_pkey_dev_check_presence(dev); 800 ipoib_pkey_dev_check_presence(dev);
@@ -747,7 +805,7 @@ void ipoib_pkey_poll(struct work_struct *work)
747 mutex_lock(&pkey_mutex); 805 mutex_lock(&pkey_mutex);
748 if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) 806 if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
749 queue_delayed_work(ipoib_workqueue, 807 queue_delayed_work(ipoib_workqueue,
750 &priv->pkey_task, 808 &priv->pkey_poll_task,
751 HZ); 809 HZ);
752 mutex_unlock(&pkey_mutex); 810 mutex_unlock(&pkey_mutex);
753 } 811 }
@@ -766,7 +824,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev)
766 mutex_lock(&pkey_mutex); 824 mutex_lock(&pkey_mutex);
767 clear_bit(IPOIB_PKEY_STOP, &priv->flags); 825 clear_bit(IPOIB_PKEY_STOP, &priv->flags);
768 queue_delayed_work(ipoib_workqueue, 826 queue_delayed_work(ipoib_workqueue,
769 &priv->pkey_task, 827 &priv->pkey_poll_task,
770 HZ); 828 HZ);
771 mutex_unlock(&pkey_mutex); 829 mutex_unlock(&pkey_mutex);
772 return 1; 830 return 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 0a428f2b05c7..894b1dcdf3eb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
107 return -EINVAL; 107 return -EINVAL;
108 108
109 if (ipoib_ib_dev_up(dev)) { 109 if (ipoib_ib_dev_up(dev)) {
110 ipoib_ib_dev_stop(dev); 110 ipoib_ib_dev_stop(dev, 1);
111 return -EINVAL; 111 return -EINVAL;
112 } 112 }
113 113
@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev)
152 flush_workqueue(ipoib_workqueue); 152 flush_workqueue(ipoib_workqueue);
153 153
154 ipoib_ib_dev_down(dev, 1); 154 ipoib_ib_dev_down(dev, 1);
155 ipoib_ib_dev_stop(dev); 155 ipoib_ib_dev_stop(dev, 1);
156 156
157 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 157 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
158 struct ipoib_dev_priv *cpriv; 158 struct ipoib_dev_priv *cpriv;
@@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev)
988 INIT_LIST_HEAD(&priv->dead_ahs); 988 INIT_LIST_HEAD(&priv->dead_ahs);
989 INIT_LIST_HEAD(&priv->multicast_list); 989 INIT_LIST_HEAD(&priv->multicast_list);
990 990
991 INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll); 991 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
992 INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
992 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 993 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
993 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); 994 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
994 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 995 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 54fbead4de01..aae367057a56 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -524,7 +524,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
524 return; 524 return;
525 525
526 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 526 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
527 ipoib_warn(priv, "ib_gid_entry_get() failed\n"); 527 ipoib_warn(priv, "ib_query_gid() failed\n");
528 else 528 else
529 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 529 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
530 530
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 5c3c6a43a52b..982eb88e27ec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -33,8 +33,6 @@
33 * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ 33 * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
34 */ 34 */
35 35
36#include <rdma/ib_cache.h>
37
38#include "ipoib.h" 36#include "ipoib.h"
39 37
40int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) 38int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
49 if (!qp_attr) 47 if (!qp_attr)
50 goto out; 48 goto out;
51 49
52 if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { 50 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
53 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 51 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
54 ret = -ENXIO; 52 ret = -ENXIO;
55 goto out; 53 goto out;
@@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev)
94{ 92{
95 struct ipoib_dev_priv *priv = netdev_priv(dev); 93 struct ipoib_dev_priv *priv = netdev_priv(dev);
96 int ret; 94 int ret;
97 u16 pkey_index;
98 struct ib_qp_attr qp_attr; 95 struct ib_qp_attr qp_attr;
99 int attr_mask; 96 int attr_mask;
100 97
101 /* 98 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
102 * Search through the port P_Key table for the requested pkey value. 99 return -1;
103 * The port has to be assigned to the respective IB partition in
104 * advance.
105 */
106 ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
107 if (ret) {
108 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
109 return ret;
110 }
111 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
112 100
113 qp_attr.qp_state = IB_QPS_INIT; 101 qp_attr.qp_state = IB_QPS_INIT;
114 qp_attr.qkey = 0; 102 qp_attr.qkey = 0;
115 qp_attr.port_num = priv->port; 103 qp_attr.port_num = priv->port;
116 qp_attr.pkey_index = pkey_index; 104 qp_attr.pkey_index = priv->pkey_index;
117 attr_mask = 105 attr_mask =
118 IB_QP_QKEY | 106 IB_QP_QKEY |
119 IB_QP_PORT | 107 IB_QP_PORT |
@@ -185,7 +173,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
185 size = ipoib_sendq_size + ipoib_recvq_size + 1; 173 size = ipoib_sendq_size + ipoib_recvq_size + 1;
186 ret = ipoib_cm_dev_init(dev); 174 ret = ipoib_cm_dev_init(dev);
187 if (!ret) 175 if (!ret)
188 size += ipoib_recvq_size; 176 size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
189 177
190 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); 178 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
191 if (IS_ERR(priv->cq)) { 179 if (IS_ERR(priv->cq)) {
@@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler,
259 struct ipoib_dev_priv *priv = 247 struct ipoib_dev_priv *priv =
260 container_of(handler, struct ipoib_dev_priv, event_handler); 248 container_of(handler, struct ipoib_dev_priv, event_handler);
261 249
262 if ((record->event == IB_EVENT_PORT_ERR || 250 if (record->element.port_num != priv->port)
263 record->event == IB_EVENT_PKEY_CHANGE || 251 return;
264 record->event == IB_EVENT_PORT_ACTIVE || 252
265 record->event == IB_EVENT_LID_CHANGE || 253 if (record->event == IB_EVENT_PORT_ERR ||
266 record->event == IB_EVENT_SM_CHANGE || 254 record->event == IB_EVENT_PORT_ACTIVE ||
267 record->event == IB_EVENT_CLIENT_REREGISTER) && 255 record->event == IB_EVENT_LID_CHANGE ||
268 record->element.port_num == priv->port) { 256 record->event == IB_EVENT_SM_CHANGE ||
257 record->event == IB_EVENT_CLIENT_REREGISTER) {
269 ipoib_dbg(priv, "Port state change event\n"); 258 ipoib_dbg(priv, "Port state change event\n");
270 queue_work(ipoib_workqueue, &priv->flush_task); 259 queue_work(ipoib_workqueue, &priv->flush_task);
260 } else if (record->event == IB_EVENT_PKEY_CHANGE) {
261 ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
262 queue_work(ipoib_workqueue, &priv->pkey_event_task);
271 } 263 }
272} 264}