aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cm.c4
-rw-r--r--drivers/infiniband/core/device.c135
-rw-r--r--drivers/infiniband/core/umem.c4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c16
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c181
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c6
-rw-r--r--drivers/infiniband/hw/mlx4/user.h5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c158
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h49
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c203
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c87
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c40
17 files changed, 688 insertions, 221 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index eff591deeb46..e840434a96d8 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -306,7 +306,9 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
306 do { 306 do {
307 spin_lock_irqsave(&cm.lock, flags); 307 spin_lock_irqsave(&cm.lock, flags);
308 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 308 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
309 next_id++, &id); 309 next_id, &id);
310 if (!ret)
311 next_id = ((unsigned) id + 1) & MAX_ID_MASK;
310 spin_unlock_irqrestore(&cm.lock, flags); 312 spin_unlock_irqrestore(&cm.lock, flags);
311 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); 313 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
312 314
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index bcecf4ddbf00..3ada17c0f239 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -150,6 +150,18 @@ static int alloc_name(char *name)
150 return 0; 150 return 0;
151} 151}
152 152
153static int start_port(struct ib_device *device)
154{
155 return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
156}
157
158
159static int end_port(struct ib_device *device)
160{
161 return (device->node_type == RDMA_NODE_IB_SWITCH) ?
162 0 : device->phys_port_cnt;
163}
164
153/** 165/**
154 * ib_alloc_device - allocate an IB device struct 166 * ib_alloc_device - allocate an IB device struct
155 * @size:size of structure to allocate 167 * @size:size of structure to allocate
@@ -209,6 +221,45 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
209 return 0; 221 return 0;
210} 222}
211 223
224static int read_port_table_lengths(struct ib_device *device)
225{
226 struct ib_port_attr *tprops = NULL;
227 int num_ports, ret = -ENOMEM;
228 u8 port_index;
229
230 tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
231 if (!tprops)
232 goto out;
233
234 num_ports = end_port(device) - start_port(device) + 1;
235
236 device->pkey_tbl_len = kmalloc(sizeof *device->pkey_tbl_len * num_ports,
237 GFP_KERNEL);
238 device->gid_tbl_len = kmalloc(sizeof *device->gid_tbl_len * num_ports,
239 GFP_KERNEL);
240 if (!device->pkey_tbl_len || !device->gid_tbl_len)
241 goto err;
242
243 for (port_index = 0; port_index < num_ports; ++port_index) {
244 ret = ib_query_port(device, port_index + start_port(device),
245 tprops);
246 if (ret)
247 goto err;
248 device->pkey_tbl_len[port_index] = tprops->pkey_tbl_len;
249 device->gid_tbl_len[port_index] = tprops->gid_tbl_len;
250 }
251
252 ret = 0;
253 goto out;
254
255err:
256 kfree(device->gid_tbl_len);
257 kfree(device->pkey_tbl_len);
258out:
259 kfree(tprops);
260 return ret;
261}
262
212/** 263/**
213 * ib_register_device - Register an IB device with IB core 264 * ib_register_device - Register an IB device with IB core
214 * @device:Device to register 265 * @device:Device to register
@@ -240,10 +291,19 @@ int ib_register_device(struct ib_device *device)
240 spin_lock_init(&device->event_handler_lock); 291 spin_lock_init(&device->event_handler_lock);
241 spin_lock_init(&device->client_data_lock); 292 spin_lock_init(&device->client_data_lock);
242 293
294 ret = read_port_table_lengths(device);
295 if (ret) {
296 printk(KERN_WARNING "Couldn't create table lengths cache for device %s\n",
297 device->name);
298 goto out;
299 }
300
243 ret = ib_device_register_sysfs(device); 301 ret = ib_device_register_sysfs(device);
244 if (ret) { 302 if (ret) {
245 printk(KERN_WARNING "Couldn't register device %s with driver model\n", 303 printk(KERN_WARNING "Couldn't register device %s with driver model\n",
246 device->name); 304 device->name);
305 kfree(device->gid_tbl_len);
306 kfree(device->pkey_tbl_len);
247 goto out; 307 goto out;
248 } 308 }
249 309
@@ -285,6 +345,9 @@ void ib_unregister_device(struct ib_device *device)
285 345
286 list_del(&device->core_list); 346 list_del(&device->core_list);
287 347
348 kfree(device->gid_tbl_len);
349 kfree(device->pkey_tbl_len);
350
288 mutex_unlock(&device_mutex); 351 mutex_unlock(&device_mutex);
289 352
290 spin_lock_irqsave(&device->client_data_lock, flags); 353 spin_lock_irqsave(&device->client_data_lock, flags);
@@ -507,10 +570,7 @@ int ib_query_port(struct ib_device *device,
507 u8 port_num, 570 u8 port_num,
508 struct ib_port_attr *port_attr) 571 struct ib_port_attr *port_attr)
509{ 572{
510 if (device->node_type == RDMA_NODE_IB_SWITCH) { 573 if (port_num < start_port(device) || port_num > end_port(device))
511 if (port_num)
512 return -EINVAL;
513 } else if (port_num < 1 || port_num > device->phys_port_cnt)
514 return -EINVAL; 574 return -EINVAL;
515 575
516 return device->query_port(device, port_num, port_attr); 576 return device->query_port(device, port_num, port_attr);
@@ -582,10 +642,7 @@ int ib_modify_port(struct ib_device *device,
582 u8 port_num, int port_modify_mask, 642 u8 port_num, int port_modify_mask,
583 struct ib_port_modify *port_modify) 643 struct ib_port_modify *port_modify)
584{ 644{
585 if (device->node_type == RDMA_NODE_IB_SWITCH) { 645 if (port_num < start_port(device) || port_num > end_port(device))
586 if (port_num)
587 return -EINVAL;
588 } else if (port_num < 1 || port_num > device->phys_port_cnt)
589 return -EINVAL; 646 return -EINVAL;
590 647
591 return device->modify_port(device, port_num, port_modify_mask, 648 return device->modify_port(device, port_num, port_modify_mask,
@@ -593,6 +650,68 @@ int ib_modify_port(struct ib_device *device,
593} 650}
594EXPORT_SYMBOL(ib_modify_port); 651EXPORT_SYMBOL(ib_modify_port);
595 652
653/**
654 * ib_find_gid - Returns the port number and GID table index where
655 * a specified GID value occurs.
656 * @device: The device to query.
657 * @gid: The GID value to search for.
658 * @port_num: The port number of the device where the GID value was found.
659 * @index: The index into the GID table where the GID was found. This
660 * parameter may be NULL.
661 */
662int ib_find_gid(struct ib_device *device, union ib_gid *gid,
663 u8 *port_num, u16 *index)
664{
665 union ib_gid tmp_gid;
666 int ret, port, i;
667
668 for (port = start_port(device); port <= end_port(device); ++port) {
669 for (i = 0; i < device->gid_tbl_len[port - start_port(device)]; ++i) {
670 ret = ib_query_gid(device, port, i, &tmp_gid);
671 if (ret)
672 return ret;
673 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
674 *port_num = port;
675 if (index)
676 *index = i;
677 return 0;
678 }
679 }
680 }
681
682 return -ENOENT;
683}
684EXPORT_SYMBOL(ib_find_gid);
685
686/**
687 * ib_find_pkey - Returns the PKey table index where a specified
688 * PKey value occurs.
689 * @device: The device to query.
690 * @port_num: The port number of the device to search for the PKey.
691 * @pkey: The PKey value to search for.
692 * @index: The index into the PKey table where the PKey was found.
693 */
694int ib_find_pkey(struct ib_device *device,
695 u8 port_num, u16 pkey, u16 *index)
696{
697 int ret, i;
698 u16 tmp_pkey;
699
700 for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
701 ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
702 if (ret)
703 return ret;
704
705 if (pkey == tmp_pkey) {
706 *index = i;
707 return 0;
708 }
709 }
710
711 return -ENOENT;
712}
713EXPORT_SYMBOL(ib_find_pkey);
714
596static int __init ib_core_init(void) 715static int __init ib_core_init(void)
597{ 716{
598 int ret; 717 int ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 96a16c0c08f8..b4aec5103c99 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -210,8 +210,10 @@ void ib_umem_release(struct ib_umem *umem)
210 __ib_umem_release(umem->context->device, umem, 1); 210 __ib_umem_release(umem->context->device, umem, 1);
211 211
212 mm = get_task_mm(current); 212 mm = get_task_mm(current);
213 if (!mm) 213 if (!mm) {
214 kfree(umem);
214 return; 215 return;
216 }
215 217
216 diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; 218 diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
217 219
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 84c5bb498563..add79bd44e39 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -2050,13 +2050,10 @@ int ehca_mrmw_map_hrc_alloc(const u64 hipz_rc)
2050 switch (hipz_rc) { 2050 switch (hipz_rc) {
2051 case H_SUCCESS: /* successful completion */ 2051 case H_SUCCESS: /* successful completion */
2052 return 0; 2052 return 0;
2053 case H_ADAPTER_PARM: /* invalid adapter handle */
2054 case H_RT_PARM: /* invalid resource type */
2055 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ 2053 case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */
2056 case H_MLENGTH_PARM: /* invalid memory length */
2057 case H_MEM_ACCESS_PARM: /* invalid access controls */
2058 case H_CONSTRAINED: /* resource constraint */ 2054 case H_CONSTRAINED: /* resource constraint */
2059 return -EINVAL; 2055 case H_NO_MEM:
2056 return -ENOMEM;
2060 case H_BUSY: /* long busy */ 2057 case H_BUSY: /* long busy */
2061 return -EBUSY; 2058 return -EBUSY;
2062 default: 2059 default:
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index 085e28b939ec..dd691cfa5079 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -165,10 +165,9 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
165{ 165{
166 struct rb_node **n = &mcast_tree.rb_node; 166 struct rb_node **n = &mcast_tree.rb_node;
167 struct rb_node *pn = NULL; 167 struct rb_node *pn = NULL;
168 unsigned long flags;
169 int ret; 168 int ret;
170 169
171 spin_lock_irqsave(&mcast_lock, flags); 170 spin_lock_irq(&mcast_lock);
172 171
173 while (*n) { 172 while (*n) {
174 struct ipath_mcast *tmcast; 173 struct ipath_mcast *tmcast;
@@ -228,7 +227,7 @@ static int ipath_mcast_add(struct ipath_ibdev *dev,
228 ret = 0; 227 ret = 0;
229 228
230bail: 229bail:
231 spin_unlock_irqrestore(&mcast_lock, flags); 230 spin_unlock_irq(&mcast_lock);
232 231
233 return ret; 232 return ret;
234} 233}
@@ -289,17 +288,16 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
289 struct ipath_mcast *mcast = NULL; 288 struct ipath_mcast *mcast = NULL;
290 struct ipath_mcast_qp *p, *tmp; 289 struct ipath_mcast_qp *p, *tmp;
291 struct rb_node *n; 290 struct rb_node *n;
292 unsigned long flags;
293 int last = 0; 291 int last = 0;
294 int ret; 292 int ret;
295 293
296 spin_lock_irqsave(&mcast_lock, flags); 294 spin_lock_irq(&mcast_lock);
297 295
298 /* Find the GID in the mcast table. */ 296 /* Find the GID in the mcast table. */
299 n = mcast_tree.rb_node; 297 n = mcast_tree.rb_node;
300 while (1) { 298 while (1) {
301 if (n == NULL) { 299 if (n == NULL) {
302 spin_unlock_irqrestore(&mcast_lock, flags); 300 spin_unlock_irq(&mcast_lock);
303 ret = -EINVAL; 301 ret = -EINVAL;
304 goto bail; 302 goto bail;
305 } 303 }
@@ -334,7 +332,7 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
334 break; 332 break;
335 } 333 }
336 334
337 spin_unlock_irqrestore(&mcast_lock, flags); 335 spin_unlock_irq(&mcast_lock);
338 336
339 if (p) { 337 if (p) {
340 /* 338 /*
@@ -348,9 +346,9 @@ int ipath_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
348 atomic_dec(&mcast->refcount); 346 atomic_dec(&mcast->refcount);
349 wait_event(mcast->wait, !atomic_read(&mcast->refcount)); 347 wait_event(mcast->wait, !atomic_read(&mcast->refcount));
350 ipath_mcast_free(mcast); 348 ipath_mcast_free(mcast);
351 spin_lock(&dev->n_mcast_grps_lock); 349 spin_lock_irq(&dev->n_mcast_grps_lock);
352 dev->n_mcast_grps_allocated--; 350 dev->n_mcast_grps_allocated--;
353 spin_unlock(&dev->n_mcast_grps_lock); 351 spin_unlock_irq(&dev->n_mcast_grps_lock);
354 } 352 }
355 353
356 ret = 0; 354 ret = 0;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5cd706908450..a824bc5f79fd 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -188,14 +188,32 @@ static int send_wqe_overhead(enum ib_qp_type type)
188 } 188 }
189} 189}
190 190
191static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 191static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
192 enum ib_qp_type type, struct mlx4_ib_qp *qp) 192 struct mlx4_ib_qp *qp)
193{ 193{
194 /* Sanity check QP size before proceeding */ 194 /* Sanity check RQ size before proceeding */
195 if (cap->max_recv_wr > dev->dev->caps.max_wqes ||
196 cap->max_recv_sge > dev->dev->caps.max_rq_sg)
197 return -EINVAL;
198
199 qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0;
200
201 qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
202 sizeof (struct mlx4_wqe_data_seg)));
203 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
204
205 cap->max_recv_wr = qp->rq.max;
206 cap->max_recv_sge = qp->rq.max_gs;
207
208 return 0;
209}
210
211static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
212 enum ib_qp_type type, struct mlx4_ib_qp *qp)
213{
214 /* Sanity check SQ size before proceeding */
195 if (cap->max_send_wr > dev->dev->caps.max_wqes || 215 if (cap->max_send_wr > dev->dev->caps.max_wqes ||
196 cap->max_recv_wr > dev->dev->caps.max_wqes ||
197 cap->max_send_sge > dev->dev->caps.max_sq_sg || 216 cap->max_send_sge > dev->dev->caps.max_sq_sg ||
198 cap->max_recv_sge > dev->dev->caps.max_rq_sg ||
199 cap->max_inline_data + send_wqe_overhead(type) + 217 cap->max_inline_data + send_wqe_overhead(type) +
200 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) 218 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
201 return -EINVAL; 219 return -EINVAL;
@@ -208,12 +226,7 @@ static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
208 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 226 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
209 return -EINVAL; 227 return -EINVAL;
210 228
211 qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; 229 qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 1;
212 qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0;
213
214 qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
215 sizeof (struct mlx4_wqe_data_seg)));
216 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
217 230
218 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * 231 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
219 sizeof (struct mlx4_wqe_data_seg), 232 sizeof (struct mlx4_wqe_data_seg),
@@ -233,16 +246,26 @@ static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
233 qp->sq.offset = 0; 246 qp->sq.offset = 0;
234 } 247 }
235 248
236 cap->max_send_wr = qp->sq.max; 249 cap->max_send_wr = qp->sq.max;
237 cap->max_recv_wr = qp->rq.max; 250 cap->max_send_sge = qp->sq.max_gs;
238 cap->max_send_sge = qp->sq.max_gs;
239 cap->max_recv_sge = qp->rq.max_gs;
240 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) - 251 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
241 sizeof (struct mlx4_wqe_inline_seg); 252 sizeof (struct mlx4_wqe_inline_seg);
242 253
243 return 0; 254 return 0;
244} 255}
245 256
257static int set_user_sq_size(struct mlx4_ib_qp *qp,
258 struct mlx4_ib_create_qp *ucmd)
259{
260 qp->sq.max = 1 << ucmd->log_sq_bb_count;
261 qp->sq.wqe_shift = ucmd->log_sq_stride;
262
263 qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
264 (qp->sq.max << qp->sq.wqe_shift);
265
266 return 0;
267}
268
246static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 269static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
247 struct ib_qp_init_attr *init_attr, 270 struct ib_qp_init_attr *init_attr,
248 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) 271 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
@@ -264,7 +287,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
264 qp->sq.head = 0; 287 qp->sq.head = 0;
265 qp->sq.tail = 0; 288 qp->sq.tail = 0;
266 289
267 err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp); 290 err = set_rq_size(dev, &init_attr->cap, qp);
268 if (err) 291 if (err)
269 goto err; 292 goto err;
270 293
@@ -276,6 +299,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
276 goto err; 299 goto err;
277 } 300 }
278 301
302 err = set_user_sq_size(qp, &ucmd);
303 if (err)
304 goto err;
305
279 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, 306 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
280 qp->buf_size, 0); 307 qp->buf_size, 0);
281 if (IS_ERR(qp->umem)) { 308 if (IS_ERR(qp->umem)) {
@@ -297,6 +324,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
297 if (err) 324 if (err)
298 goto err_mtt; 325 goto err_mtt;
299 } else { 326 } else {
327 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
328 if (err)
329 goto err;
330
300 err = mlx4_ib_db_alloc(dev, &qp->db, 0); 331 err = mlx4_ib_db_alloc(dev, &qp->db, 0);
301 if (err) 332 if (err)
302 goto err; 333 goto err;
@@ -573,7 +604,7 @@ static int to_mlx4_st(enum ib_qp_type type)
573 } 604 }
574} 605}
575 606
576static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr, 607static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, const struct ib_qp_attr *attr,
577 int attr_mask) 608 int attr_mask)
578{ 609{
579 u8 dest_rd_atomic; 610 u8 dest_rd_atomic;
@@ -603,7 +634,7 @@ static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *att
603 return cpu_to_be32(hw_access_flags); 634 return cpu_to_be32(hw_access_flags);
604} 635}
605 636
606static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr, 637static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, const struct ib_qp_attr *attr,
607 int attr_mask) 638 int attr_mask)
608{ 639{
609 if (attr_mask & IB_QP_PKEY_INDEX) 640 if (attr_mask & IB_QP_PKEY_INDEX)
@@ -619,7 +650,7 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
619 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); 650 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
620} 651}
621 652
622static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah, 653static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
623 struct mlx4_qp_path *path, u8 port) 654 struct mlx4_qp_path *path, u8 port)
624{ 655{
625 path->grh_mylmc = ah->src_path_bits & 0x7f; 656 path->grh_mylmc = ah->src_path_bits & 0x7f;
@@ -655,14 +686,14 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
655 return 0; 686 return 0;
656} 687}
657 688
658int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 689static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
659 int attr_mask, struct ib_udata *udata) 690 const struct ib_qp_attr *attr, int attr_mask,
691 enum ib_qp_state cur_state, enum ib_qp_state new_state)
660{ 692{
661 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 693 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
662 struct mlx4_ib_qp *qp = to_mqp(ibqp); 694 struct mlx4_ib_qp *qp = to_mqp(ibqp);
663 struct mlx4_qp_context *context; 695 struct mlx4_qp_context *context;
664 enum mlx4_qp_optpar optpar = 0; 696 enum mlx4_qp_optpar optpar = 0;
665 enum ib_qp_state cur_state, new_state;
666 int sqd_event; 697 int sqd_event;
667 int err = -EINVAL; 698 int err = -EINVAL;
668 699
@@ -670,34 +701,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
670 if (!context) 701 if (!context)
671 return -ENOMEM; 702 return -ENOMEM;
672 703
673 mutex_lock(&qp->mutex);
674
675 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
676 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
677
678 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
679 goto out;
680
681 if ((attr_mask & IB_QP_PKEY_INDEX) &&
682 attr->pkey_index >= dev->dev->caps.pkey_table_len) {
683 goto out;
684 }
685
686 if ((attr_mask & IB_QP_PORT) &&
687 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
688 goto out;
689 }
690
691 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
692 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
693 goto out;
694 }
695
696 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
697 attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
698 goto out;
699 }
700
701 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 704 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
702 (to_mlx4_st(ibqp->qp_type) << 16)); 705 (to_mlx4_st(ibqp->qp_type) << 16));
703 context->flags |= cpu_to_be32(1 << 8); /* DE? */ 706 context->flags |= cpu_to_be32(1 << 8); /* DE? */
@@ -920,11 +923,84 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
920 } 923 }
921 924
922out: 925out:
923 mutex_unlock(&qp->mutex);
924 kfree(context); 926 kfree(context);
925 return err; 927 return err;
926} 928}
927 929
930static const struct ib_qp_attr mlx4_ib_qp_attr = { .port_num = 1 };
931static const int mlx4_ib_qp_attr_mask_table[IB_QPT_UD + 1] = {
932 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
933 IB_QP_PORT |
934 IB_QP_QKEY),
935 [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
936 IB_QP_PORT |
937 IB_QP_ACCESS_FLAGS),
938 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
939 IB_QP_PORT |
940 IB_QP_ACCESS_FLAGS),
941 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
942 IB_QP_QKEY),
943 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
944 IB_QP_QKEY),
945};
946
947int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
948 int attr_mask, struct ib_udata *udata)
949{
950 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
951 struct mlx4_ib_qp *qp = to_mqp(ibqp);
952 enum ib_qp_state cur_state, new_state;
953 int err = -EINVAL;
954
955 mutex_lock(&qp->mutex);
956
957 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
958 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
959
960 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
961 goto out;
962
963 if ((attr_mask & IB_QP_PKEY_INDEX) &&
964 attr->pkey_index >= dev->dev->caps.pkey_table_len) {
965 goto out;
966 }
967
968 if ((attr_mask & IB_QP_PORT) &&
969 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
970 goto out;
971 }
972
973 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
974 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
975 goto out;
976 }
977
978 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
979 attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
980 goto out;
981 }
982
983 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
984 err = 0;
985 goto out;
986 }
987
988 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
989 err = __mlx4_ib_modify_qp(ibqp, &mlx4_ib_qp_attr,
990 mlx4_ib_qp_attr_mask_table[ibqp->qp_type],
991 IB_QPS_RESET, IB_QPS_INIT);
992 if (err)
993 goto out;
994 cur_state = IB_QPS_INIT;
995 }
996
997 err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
998
999out:
1000 mutex_unlock(&qp->mutex);
1001 return err;
1002}
1003
928static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 1004static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
929 void *wqe) 1005 void *wqe)
930{ 1006{
@@ -952,6 +1028,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
952 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; 1028 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
953 sqp->ud_header.grh.flow_label = 1029 sqp->ud_header.grh.flow_label =
954 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1030 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1031 sqp->ud_header.grh.hop_limit = ah->av.hop_limit;
955 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, 1032 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
956 ah->av.gid_index, &sqp->ud_header.grh.source_gid); 1033 ah->av.gid_index, &sqp->ud_header.grh.source_gid);
957 memcpy(sqp->ud_header.grh.destination_gid.raw, 1034 memcpy(sqp->ud_header.grh.destination_gid.raw,
@@ -1192,7 +1269,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1192 */ 1269 */
1193 wmb(); 1270 wmb();
1194 1271
1195 if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) { 1272 if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
1196 err = -EINVAL; 1273 err = -EINVAL;
1197 goto out; 1274 goto out;
1198 } 1275 }
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 42ab4a801d6a..12fac1c8989d 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -297,6 +297,12 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
297 break; 297 break;
298 } 298 }
299 299
300 if (unlikely(srq->head == srq->tail)) {
301 err = -ENOMEM;
302 *bad_wr = wr;
303 break;
304 }
305
300 srq->wrid[srq->head] = wr->wr_id; 306 srq->wrid[srq->head] = wr->wr_id;
301 307
302 next = get_wqe(srq, srq->head); 308 next = get_wqe(srq, srq->head);
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index 5b8eddc9fa83..88c72d56368b 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -39,7 +39,7 @@
39 * Increment this value if any changes that break userspace ABI 39 * Increment this value if any changes that break userspace ABI
40 * compatibility are made. 40 * compatibility are made.
41 */ 41 */
42#define MLX4_IB_UVERBS_ABI_VERSION 1 42#define MLX4_IB_UVERBS_ABI_VERSION 2
43 43
44/* 44/*
45 * Make sure that all structs defined in this file remain laid out so 45 * Make sure that all structs defined in this file remain laid out so
@@ -87,6 +87,9 @@ struct mlx4_ib_create_srq_resp {
87struct mlx4_ib_create_qp { 87struct mlx4_ib_create_qp {
88 __u64 buf_addr; 88 __u64 buf_addr;
89 __u64 db_addr; 89 __u64 db_addr;
90 __u8 log_sq_bb_count;
91 __u8 log_sq_stride;
92 __u8 reserved[6];
90}; 93};
91 94
92#endif /* MLX4_IB_USER_H */ 95#endif /* MLX4_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 27caf3b0648a..4b111a852ff6 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -279,6 +279,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
279 (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff; 279 (be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20) & 0xff;
280 header->grh.flow_label = 280 header->grh.flow_label =
281 ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); 281 ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
282 header->grh.hop_limit = ah->av->hop_limit;
282 ib_get_cached_gid(&dev->ib_dev, 283 ib_get_cached_gid(&dev->ib_dev,
283 be32_to_cpu(ah->av->port_pd) >> 24, 284 be32_to_cpu(ah->av->port_pd) >> 24,
284 ah->av->gid_index % dev->limits.gid_table_len, 285 ah->av->gid_index % dev->limits.gid_table_len,
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 773145e29947..aa563e61de65 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -1250,12 +1250,14 @@ static void __mthca_remove_one(struct pci_dev *pdev)
1250int __mthca_restart_one(struct pci_dev *pdev) 1250int __mthca_restart_one(struct pci_dev *pdev)
1251{ 1251{
1252 struct mthca_dev *mdev; 1252 struct mthca_dev *mdev;
1253 int hca_type;
1253 1254
1254 mdev = pci_get_drvdata(pdev); 1255 mdev = pci_get_drvdata(pdev);
1255 if (!mdev) 1256 if (!mdev)
1256 return -ENODEV; 1257 return -ENODEV;
1258 hca_type = mdev->hca_type;
1257 __mthca_remove_one(pdev); 1259 __mthca_remove_one(pdev);
1258 return __mthca_init_one(pdev, mdev->hca_type); 1260 return __mthca_init_one(pdev, hca_type);
1259} 1261}
1260 1262
1261static int __devinit mthca_init_one(struct pci_dev *pdev, 1263static int __devinit mthca_init_one(struct pci_dev *pdev,
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 2741ded89297..027664979fe2 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -296,7 +296,7 @@ static int to_mthca_st(int transport)
296 } 296 }
297} 297}
298 298
299static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr, 299static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr,
300 int attr_mask) 300 int attr_mask)
301{ 301{
302 if (attr_mask & IB_QP_PKEY_INDEX) 302 if (attr_mask & IB_QP_PKEY_INDEX)
@@ -328,7 +328,7 @@ static void init_port(struct mthca_dev *dev, int port)
328 mthca_warn(dev, "INIT_IB returned status %02x.\n", status); 328 mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
329} 329}
330 330
331static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, 331static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr,
332 int attr_mask) 332 int attr_mask)
333{ 333{
334 u8 dest_rd_atomic; 334 u8 dest_rd_atomic;
@@ -511,7 +511,7 @@ out:
511 return err; 511 return err;
512} 512}
513 513
514static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah, 514static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
515 struct mthca_qp_path *path, u8 port) 515 struct mthca_qp_path *path, u8 port)
516{ 516{
517 path->g_mylmc = ah->src_path_bits & 0x7f; 517 path->g_mylmc = ah->src_path_bits & 0x7f;
@@ -539,12 +539,12 @@ static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
539 return 0; 539 return 0;
540} 540}
541 541
542int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, 542static int __mthca_modify_qp(struct ib_qp *ibqp,
543 struct ib_udata *udata) 543 const struct ib_qp_attr *attr, int attr_mask,
544 enum ib_qp_state cur_state, enum ib_qp_state new_state)
544{ 545{
545 struct mthca_dev *dev = to_mdev(ibqp->device); 546 struct mthca_dev *dev = to_mdev(ibqp->device);
546 struct mthca_qp *qp = to_mqp(ibqp); 547 struct mthca_qp *qp = to_mqp(ibqp);
547 enum ib_qp_state cur_state, new_state;
548 struct mthca_mailbox *mailbox; 548 struct mthca_mailbox *mailbox;
549 struct mthca_qp_param *qp_param; 549 struct mthca_qp_param *qp_param;
550 struct mthca_qp_context *qp_context; 550 struct mthca_qp_context *qp_context;
@@ -552,60 +552,6 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
552 u8 status; 552 u8 status;
553 int err = -EINVAL; 553 int err = -EINVAL;
554 554
555 mutex_lock(&qp->mutex);
556
557 if (attr_mask & IB_QP_CUR_STATE) {
558 cur_state = attr->cur_qp_state;
559 } else {
560 spin_lock_irq(&qp->sq.lock);
561 spin_lock(&qp->rq.lock);
562 cur_state = qp->state;
563 spin_unlock(&qp->rq.lock);
564 spin_unlock_irq(&qp->sq.lock);
565 }
566
567 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
568
569 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
570 mthca_dbg(dev, "Bad QP transition (transport %d) "
571 "%d->%d with attr 0x%08x\n",
572 qp->transport, cur_state, new_state,
573 attr_mask);
574 goto out;
575 }
576
577 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
578 err = 0;
579 goto out;
580 }
581
582 if ((attr_mask & IB_QP_PKEY_INDEX) &&
583 attr->pkey_index >= dev->limits.pkey_table_len) {
584 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
585 attr->pkey_index, dev->limits.pkey_table_len-1);
586 goto out;
587 }
588
589 if ((attr_mask & IB_QP_PORT) &&
590 (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
591 mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
592 goto out;
593 }
594
595 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
596 attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
597 mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
598 attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
599 goto out;
600 }
601
602 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
603 attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
604 mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
605 attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
606 goto out;
607 }
608
609 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 555 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
610 if (IS_ERR(mailbox)) { 556 if (IS_ERR(mailbox)) {
611 err = PTR_ERR(mailbox); 557 err = PTR_ERR(mailbox);
@@ -892,6 +838,98 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
892 838
893out_mailbox: 839out_mailbox:
894 mthca_free_mailbox(dev, mailbox); 840 mthca_free_mailbox(dev, mailbox);
841out:
842 return err;
843}
844
845static const struct ib_qp_attr dummy_init_attr = { .port_num = 1 };
846static const int dummy_init_attr_mask[] = {
847 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
848 IB_QP_PORT |
849 IB_QP_QKEY),
850 [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
851 IB_QP_PORT |
852 IB_QP_ACCESS_FLAGS),
853 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
854 IB_QP_PORT |
855 IB_QP_ACCESS_FLAGS),
856 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
857 IB_QP_QKEY),
858 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
859 IB_QP_QKEY),
860};
861
862int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
863 struct ib_udata *udata)
864{
865 struct mthca_dev *dev = to_mdev(ibqp->device);
866 struct mthca_qp *qp = to_mqp(ibqp);
867 enum ib_qp_state cur_state, new_state;
868 int err = -EINVAL;
869
870 mutex_lock(&qp->mutex);
871 if (attr_mask & IB_QP_CUR_STATE) {
872 cur_state = attr->cur_qp_state;
873 } else {
874 spin_lock_irq(&qp->sq.lock);
875 spin_lock(&qp->rq.lock);
876 cur_state = qp->state;
877 spin_unlock(&qp->rq.lock);
878 spin_unlock_irq(&qp->sq.lock);
879 }
880
881 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
882
883 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
884 mthca_dbg(dev, "Bad QP transition (transport %d) "
885 "%d->%d with attr 0x%08x\n",
886 qp->transport, cur_state, new_state,
887 attr_mask);
888 goto out;
889 }
890
891 if ((attr_mask & IB_QP_PKEY_INDEX) &&
892 attr->pkey_index >= dev->limits.pkey_table_len) {
893 mthca_dbg(dev, "P_Key index (%u) too large. max is %d\n",
894 attr->pkey_index, dev->limits.pkey_table_len-1);
895 goto out;
896 }
897
898 if ((attr_mask & IB_QP_PORT) &&
899 (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
900 mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
901 goto out;
902 }
903
904 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
905 attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
906 mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
907 attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
908 goto out;
909 }
910
911 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
912 attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
913 mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
914 attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
915 goto out;
916 }
917
918 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
919 err = 0;
920 goto out;
921 }
922
923 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_ERR) {
924 err = __mthca_modify_qp(ibqp, &dummy_init_attr,
925 dummy_init_attr_mask[ibqp->qp_type],
926 IB_QPS_RESET, IB_QPS_INIT);
927 if (err)
928 goto out;
929 cur_state = IB_QPS_INIT;
930 }
931
932 err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
895 933
896out: 934out:
897 mutex_unlock(&qp->mutex); 935 mutex_unlock(&qp->mutex);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 87310eeb6df0..a0b3782c7625 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -132,12 +132,46 @@ struct ipoib_cm_data {
132 __be32 mtu; 132 __be32 mtu;
133}; 133};
134 134
135/*
136 * Quoting 10.3.1 Queue Pair and EE Context States:
137 *
138 * Note, for QPs that are associated with an SRQ, the Consumer should take the
139 * QP through the Error State before invoking a Destroy QP or a Modify QP to the
140 * Reset State. The Consumer may invoke the Destroy QP without first performing
141 * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
142 * Last WQE Reached Event. However, if the Consumer does not wait for the
143 * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
144 * leakage may occur. Therefore, it is good programming practice to tear down a
145 * QP that is associated with an SRQ by using the following process:
146 *
147 * - Put the QP in the Error State
148 * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
149 * - either:
150 * drain the CQ by invoking the Poll CQ verb and either wait for CQ
151 * to be empty or the number of Poll CQ operations has exceeded
152 * CQ capacity size;
153 * - or
154 * post another WR that completes on the same CQ and wait for this
155 * WR to return as a WC;
156 * - and then invoke a Destroy QP or Reset QP.
157 *
158 * We use the second option and wait for a completion on the
159 * rx_drain_qp before destroying QPs attached to our SRQ.
160 */
161
162enum ipoib_cm_state {
163 IPOIB_CM_RX_LIVE,
164 IPOIB_CM_RX_ERROR, /* Ignored by stale task */
165 IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */
166};
167
135struct ipoib_cm_rx { 168struct ipoib_cm_rx {
136 struct ib_cm_id *id; 169 struct ib_cm_id *id;
137 struct ib_qp *qp; 170 struct ib_qp *qp;
138 struct list_head list; 171 struct list_head list;
139 struct net_device *dev; 172 struct net_device *dev;
140 unsigned long jiffies; 173 unsigned long jiffies;
174 enum ipoib_cm_state state;
141}; 175};
142 176
143struct ipoib_cm_tx { 177struct ipoib_cm_tx {
@@ -165,10 +199,16 @@ struct ipoib_cm_dev_priv {
165 struct ib_srq *srq; 199 struct ib_srq *srq;
166 struct ipoib_cm_rx_buf *srq_ring; 200 struct ipoib_cm_rx_buf *srq_ring;
167 struct ib_cm_id *id; 201 struct ib_cm_id *id;
168 struct list_head passive_ids; 202 struct ib_qp *rx_drain_qp; /* generates WR described in 10.3.1 */
203 struct list_head passive_ids; /* state: LIVE */
204 struct list_head rx_error_list; /* state: ERROR */
205 struct list_head rx_flush_list; /* state: FLUSH, drain not started */
206 struct list_head rx_drain_list; /* state: FLUSH, drain started */
207 struct list_head rx_reap_list; /* state: FLUSH, drain done */
169 struct work_struct start_task; 208 struct work_struct start_task;
170 struct work_struct reap_task; 209 struct work_struct reap_task;
171 struct work_struct skb_task; 210 struct work_struct skb_task;
211 struct work_struct rx_reap_task;
172 struct delayed_work stale_task; 212 struct delayed_work stale_task;
173 struct sk_buff_head skb_queue; 213 struct sk_buff_head skb_queue;
174 struct list_head start_list; 214 struct list_head start_list;
@@ -201,15 +241,17 @@ struct ipoib_dev_priv {
201 struct list_head multicast_list; 241 struct list_head multicast_list;
202 struct rb_root multicast_tree; 242 struct rb_root multicast_tree;
203 243
204 struct delayed_work pkey_task; 244 struct delayed_work pkey_poll_task;
205 struct delayed_work mcast_task; 245 struct delayed_work mcast_task;
206 struct work_struct flush_task; 246 struct work_struct flush_task;
207 struct work_struct restart_task; 247 struct work_struct restart_task;
208 struct delayed_work ah_reap_task; 248 struct delayed_work ah_reap_task;
249 struct work_struct pkey_event_task;
209 250
210 struct ib_device *ca; 251 struct ib_device *ca;
211 u8 port; 252 u8 port;
212 u16 pkey; 253 u16 pkey;
254 u16 pkey_index;
213 struct ib_pd *pd; 255 struct ib_pd *pd;
214 struct ib_mr *mr; 256 struct ib_mr *mr;
215 struct ib_cq *cq; 257 struct ib_cq *cq;
@@ -333,12 +375,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
333 375
334int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 376int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
335void ipoib_ib_dev_flush(struct work_struct *work); 377void ipoib_ib_dev_flush(struct work_struct *work);
378void ipoib_pkey_event(struct work_struct *work);
336void ipoib_ib_dev_cleanup(struct net_device *dev); 379void ipoib_ib_dev_cleanup(struct net_device *dev);
337 380
338int ipoib_ib_dev_open(struct net_device *dev); 381int ipoib_ib_dev_open(struct net_device *dev);
339int ipoib_ib_dev_up(struct net_device *dev); 382int ipoib_ib_dev_up(struct net_device *dev);
340int ipoib_ib_dev_down(struct net_device *dev, int flush); 383int ipoib_ib_dev_down(struct net_device *dev, int flush);
341int ipoib_ib_dev_stop(struct net_device *dev); 384int ipoib_ib_dev_stop(struct net_device *dev, int flush);
342 385
343int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 386int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
344void ipoib_dev_cleanup(struct net_device *dev); 387void ipoib_dev_cleanup(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index eec833b81e9b..ffec794b7913 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -37,6 +37,7 @@
37#include <net/dst.h> 37#include <net/dst.h>
38#include <net/icmp.h> 38#include <net/icmp.h>
39#include <linux/icmpv6.h> 39#include <linux/icmpv6.h>
40#include <linux/delay.h>
40 41
41#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 42#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
42static int data_debug_level; 43static int data_debug_level;
@@ -62,6 +63,16 @@ struct ipoib_cm_id {
62 u32 remote_mtu; 63 u32 remote_mtu;
63}; 64};
64 65
66static struct ib_qp_attr ipoib_cm_err_attr = {
67 .qp_state = IB_QPS_ERR
68};
69
70#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
71
72static struct ib_recv_wr ipoib_cm_rx_drain_wr = {
73 .wr_id = IPOIB_CM_RX_DRAIN_WRID
74};
75
65static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 76static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
66 struct ib_cm_event *event); 77 struct ib_cm_event *event);
67 78
@@ -150,11 +161,44 @@ partial_error:
150 return NULL; 161 return NULL;
151} 162}
152 163
164static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv)
165{
166 struct ib_recv_wr *bad_wr;
167
168 /* rx_drain_qp send queue depth is 1, so
169 * make sure we have at most 1 outstanding WR. */
170 if (list_empty(&priv->cm.rx_flush_list) ||
171 !list_empty(&priv->cm.rx_drain_list))
172 return;
173
174 if (ib_post_recv(priv->cm.rx_drain_qp, &ipoib_cm_rx_drain_wr, &bad_wr))
175 ipoib_warn(priv, "failed to post rx_drain wr\n");
176
177 list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
178}
179
180static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
181{
182 struct ipoib_cm_rx *p = ctx;
183 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
184 unsigned long flags;
185
186 if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
187 return;
188
189 spin_lock_irqsave(&priv->lock, flags);
190 list_move(&p->list, &priv->cm.rx_flush_list);
191 p->state = IPOIB_CM_RX_FLUSH;
192 ipoib_cm_start_rx_drain(priv);
193 spin_unlock_irqrestore(&priv->lock, flags);
194}
195
153static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, 196static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
154 struct ipoib_cm_rx *p) 197 struct ipoib_cm_rx *p)
155{ 198{
156 struct ipoib_dev_priv *priv = netdev_priv(dev); 199 struct ipoib_dev_priv *priv = netdev_priv(dev);
157 struct ib_qp_init_attr attr = { 200 struct ib_qp_init_attr attr = {
201 .event_handler = ipoib_cm_rx_event_handler,
158 .send_cq = priv->cq, /* does not matter, we never send anything */ 202 .send_cq = priv->cq, /* does not matter, we never send anything */
159 .recv_cq = priv->cq, 203 .recv_cq = priv->cq,
160 .srq = priv->cm.srq, 204 .srq = priv->cm.srq,
@@ -256,6 +300,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
256 300
257 cm_id->context = p; 301 cm_id->context = p;
258 p->jiffies = jiffies; 302 p->jiffies = jiffies;
303 p->state = IPOIB_CM_RX_LIVE;
259 spin_lock_irq(&priv->lock); 304 spin_lock_irq(&priv->lock);
260 if (list_empty(&priv->cm.passive_ids)) 305 if (list_empty(&priv->cm.passive_ids))
261 queue_delayed_work(ipoib_workqueue, 306 queue_delayed_work(ipoib_workqueue,
@@ -277,7 +322,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
277{ 322{
278 struct ipoib_cm_rx *p; 323 struct ipoib_cm_rx *p;
279 struct ipoib_dev_priv *priv; 324 struct ipoib_dev_priv *priv;
280 int ret;
281 325
282 switch (event->event) { 326 switch (event->event) {
283 case IB_CM_REQ_RECEIVED: 327 case IB_CM_REQ_RECEIVED:
@@ -289,20 +333,9 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
289 case IB_CM_REJ_RECEIVED: 333 case IB_CM_REJ_RECEIVED:
290 p = cm_id->context; 334 p = cm_id->context;
291 priv = netdev_priv(p->dev); 335 priv = netdev_priv(p->dev);
292 spin_lock_irq(&priv->lock); 336 if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
293 if (list_empty(&p->list)) 337 ipoib_warn(priv, "unable to move qp to error state\n");
294 ret = 0; /* Connection is going away already. */ 338 /* Fall through */
295 else {
296 list_del_init(&p->list);
297 ret = -ECONNRESET;
298 }
299 spin_unlock_irq(&priv->lock);
300 if (ret) {
301 ib_destroy_qp(p->qp);
302 kfree(p);
303 return ret;
304 }
305 return 0;
306 default: 339 default:
307 return 0; 340 return 0;
308 } 341 }
@@ -354,8 +387,15 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
354 wr_id, wc->status); 387 wr_id, wc->status);
355 388
356 if (unlikely(wr_id >= ipoib_recvq_size)) { 389 if (unlikely(wr_id >= ipoib_recvq_size)) {
357 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 390 if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) {
358 wr_id, ipoib_recvq_size); 391 spin_lock_irqsave(&priv->lock, flags);
392 list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
393 ipoib_cm_start_rx_drain(priv);
394 queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
395 spin_unlock_irqrestore(&priv->lock, flags);
396 } else
397 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
398 wr_id, ipoib_recvq_size);
359 return; 399 return;
360 } 400 }
361 401
@@ -374,9 +414,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
374 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 414 if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
375 spin_lock_irqsave(&priv->lock, flags); 415 spin_lock_irqsave(&priv->lock, flags);
376 p->jiffies = jiffies; 416 p->jiffies = jiffies;
377 /* Move this entry to list head, but do 417 /* Move this entry to list head, but do not re-add it
378 * not re-add it if it has been removed. */ 418 * if it has been moved out of list. */
379 if (!list_empty(&p->list)) 419 if (p->state == IPOIB_CM_RX_LIVE)
380 list_move(&p->list, &priv->cm.passive_ids); 420 list_move(&p->list, &priv->cm.passive_ids);
381 spin_unlock_irqrestore(&priv->lock, flags); 421 spin_unlock_irqrestore(&priv->lock, flags);
382 } 422 }
@@ -583,17 +623,43 @@ static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
583int ipoib_cm_dev_open(struct net_device *dev) 623int ipoib_cm_dev_open(struct net_device *dev)
584{ 624{
585 struct ipoib_dev_priv *priv = netdev_priv(dev); 625 struct ipoib_dev_priv *priv = netdev_priv(dev);
626 struct ib_qp_init_attr qp_init_attr = {
627 .send_cq = priv->cq, /* does not matter, we never send anything */
628 .recv_cq = priv->cq,
629 .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */
630 .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
631 .cap.max_recv_wr = 1,
632 .cap.max_recv_sge = 1, /* FIXME: 0 Seems not to work */
633 .sq_sig_type = IB_SIGNAL_ALL_WR,
634 .qp_type = IB_QPT_UC,
635 };
586 int ret; 636 int ret;
587 637
588 if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) 638 if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
589 return 0; 639 return 0;
590 640
641 priv->cm.rx_drain_qp = ib_create_qp(priv->pd, &qp_init_attr);
642 if (IS_ERR(priv->cm.rx_drain_qp)) {
643 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
644 ret = PTR_ERR(priv->cm.rx_drain_qp);
645 return ret;
646 }
647
648 /*
649 * We put the QP in error state directly. This way, a "flush
650 * error" WC will be immediately generated for each WR we post.
651 */
652 ret = ib_modify_qp(priv->cm.rx_drain_qp, &ipoib_cm_err_attr, IB_QP_STATE);
653 if (ret) {
654 ipoib_warn(priv, "failed to modify drain QP to error: %d\n", ret);
655 goto err_qp;
656 }
657
591 priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); 658 priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
592 if (IS_ERR(priv->cm.id)) { 659 if (IS_ERR(priv->cm.id)) {
593 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); 660 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
594 ret = PTR_ERR(priv->cm.id); 661 ret = PTR_ERR(priv->cm.id);
595 priv->cm.id = NULL; 662 goto err_cm;
596 return ret;
597 } 663 }
598 664
599 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), 665 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
@@ -601,35 +667,79 @@ int ipoib_cm_dev_open(struct net_device *dev)
601 if (ret) { 667 if (ret) {
602 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, 668 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
603 IPOIB_CM_IETF_ID | priv->qp->qp_num); 669 IPOIB_CM_IETF_ID | priv->qp->qp_num);
604 ib_destroy_cm_id(priv->cm.id); 670 goto err_listen;
605 priv->cm.id = NULL;
606 return ret;
607 } 671 }
672
608 return 0; 673 return 0;
674
675err_listen:
676 ib_destroy_cm_id(priv->cm.id);
677err_cm:
678 priv->cm.id = NULL;
679err_qp:
680 ib_destroy_qp(priv->cm.rx_drain_qp);
681 return ret;
609} 682}
610 683
611void ipoib_cm_dev_stop(struct net_device *dev) 684void ipoib_cm_dev_stop(struct net_device *dev)
612{ 685{
613 struct ipoib_dev_priv *priv = netdev_priv(dev); 686 struct ipoib_dev_priv *priv = netdev_priv(dev);
614 struct ipoib_cm_rx *p; 687 struct ipoib_cm_rx *p, *n;
688 unsigned long begin;
689 LIST_HEAD(list);
690 int ret;
615 691
616 if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id) 692 if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
617 return; 693 return;
618 694
619 ib_destroy_cm_id(priv->cm.id); 695 ib_destroy_cm_id(priv->cm.id);
620 priv->cm.id = NULL; 696 priv->cm.id = NULL;
697
621 spin_lock_irq(&priv->lock); 698 spin_lock_irq(&priv->lock);
622 while (!list_empty(&priv->cm.passive_ids)) { 699 while (!list_empty(&priv->cm.passive_ids)) {
623 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); 700 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
624 list_del_init(&p->list); 701 list_move(&p->list, &priv->cm.rx_error_list);
702 p->state = IPOIB_CM_RX_ERROR;
625 spin_unlock_irq(&priv->lock); 703 spin_unlock_irq(&priv->lock);
704 ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
705 if (ret)
706 ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
707 spin_lock_irq(&priv->lock);
708 }
709
710 /* Wait for all RX to be drained */
711 begin = jiffies;
712
713 while (!list_empty(&priv->cm.rx_error_list) ||
714 !list_empty(&priv->cm.rx_flush_list) ||
715 !list_empty(&priv->cm.rx_drain_list)) {
716 if (!time_after(jiffies, begin + 5 * HZ)) {
717 ipoib_warn(priv, "RX drain timing out\n");
718
719 /*
720 * assume the HW is wedged and just free up everything.
721 */
722 list_splice_init(&priv->cm.rx_flush_list, &list);
723 list_splice_init(&priv->cm.rx_error_list, &list);
724 list_splice_init(&priv->cm.rx_drain_list, &list);
725 break;
726 }
727 spin_unlock_irq(&priv->lock);
728 msleep(1);
729 spin_lock_irq(&priv->lock);
730 }
731
732 list_splice_init(&priv->cm.rx_reap_list, &list);
733
734 spin_unlock_irq(&priv->lock);
735
736 list_for_each_entry_safe(p, n, &list, list) {
626 ib_destroy_cm_id(p->id); 737 ib_destroy_cm_id(p->id);
627 ib_destroy_qp(p->qp); 738 ib_destroy_qp(p->qp);
628 kfree(p); 739 kfree(p);
629 spin_lock_irq(&priv->lock);
630 } 740 }
631 spin_unlock_irq(&priv->lock);
632 741
742 ib_destroy_qp(priv->cm.rx_drain_qp);
633 cancel_delayed_work(&priv->cm.stale_task); 743 cancel_delayed_work(&priv->cm.stale_task);
634} 744}
635 745
@@ -1079,24 +1189,44 @@ void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
1079 queue_work(ipoib_workqueue, &priv->cm.skb_task); 1189 queue_work(ipoib_workqueue, &priv->cm.skb_task);
1080} 1190}
1081 1191
1192static void ipoib_cm_rx_reap(struct work_struct *work)
1193{
1194 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1195 cm.rx_reap_task);
1196 struct ipoib_cm_rx *p, *n;
1197 LIST_HEAD(list);
1198
1199 spin_lock_irq(&priv->lock);
1200 list_splice_init(&priv->cm.rx_reap_list, &list);
1201 spin_unlock_irq(&priv->lock);
1202
1203 list_for_each_entry_safe(p, n, &list, list) {
1204 ib_destroy_cm_id(p->id);
1205 ib_destroy_qp(p->qp);
1206 kfree(p);
1207 }
1208}
1209
1082static void ipoib_cm_stale_task(struct work_struct *work) 1210static void ipoib_cm_stale_task(struct work_struct *work)
1083{ 1211{
1084 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 1212 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1085 cm.stale_task.work); 1213 cm.stale_task.work);
1086 struct ipoib_cm_rx *p; 1214 struct ipoib_cm_rx *p;
1215 int ret;
1087 1216
1088 spin_lock_irq(&priv->lock); 1217 spin_lock_irq(&priv->lock);
1089 while (!list_empty(&priv->cm.passive_ids)) { 1218 while (!list_empty(&priv->cm.passive_ids)) {
1090 /* List if sorted by LRU, start from tail, 1219 /* List is sorted by LRU, start from tail,
1091 * stop when we see a recently used entry */ 1220 * stop when we see a recently used entry */
1092 p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); 1221 p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
1093 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) 1222 if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
1094 break; 1223 break;
1095 list_del_init(&p->list); 1224 list_move(&p->list, &priv->cm.rx_error_list);
1225 p->state = IPOIB_CM_RX_ERROR;
1096 spin_unlock_irq(&priv->lock); 1226 spin_unlock_irq(&priv->lock);
1097 ib_destroy_cm_id(p->id); 1227 ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
1098 ib_destroy_qp(p->qp); 1228 if (ret)
1099 kfree(p); 1229 ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
1100 spin_lock_irq(&priv->lock); 1230 spin_lock_irq(&priv->lock);
1101 } 1231 }
1102 1232
@@ -1164,9 +1294,14 @@ int ipoib_cm_dev_init(struct net_device *dev)
1164 INIT_LIST_HEAD(&priv->cm.passive_ids); 1294 INIT_LIST_HEAD(&priv->cm.passive_ids);
1165 INIT_LIST_HEAD(&priv->cm.reap_list); 1295 INIT_LIST_HEAD(&priv->cm.reap_list);
1166 INIT_LIST_HEAD(&priv->cm.start_list); 1296 INIT_LIST_HEAD(&priv->cm.start_list);
1297 INIT_LIST_HEAD(&priv->cm.rx_error_list);
1298 INIT_LIST_HEAD(&priv->cm.rx_flush_list);
1299 INIT_LIST_HEAD(&priv->cm.rx_drain_list);
1300 INIT_LIST_HEAD(&priv->cm.rx_reap_list);
1167 INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); 1301 INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
1168 INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); 1302 INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
1169 INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); 1303 INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
1304 INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
1170 INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); 1305 INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
1171 1306
1172 skb_queue_head_init(&priv->cm.skb_queue); 1307 skb_queue_head_init(&priv->cm.skb_queue);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 68d72c6f7ffb..c1aad06eb4e9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
448 struct ipoib_dev_priv *priv = netdev_priv(dev); 448 struct ipoib_dev_priv *priv = netdev_priv(dev);
449 int ret; 449 int ret;
450 450
451 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
452 ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
453 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
454 return -1;
455 }
456 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
457
451 ret = ipoib_init_qp(dev); 458 ret = ipoib_init_qp(dev);
452 if (ret) { 459 if (ret) {
453 ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret); 460 ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
457 ret = ipoib_ib_post_receives(dev); 464 ret = ipoib_ib_post_receives(dev);
458 if (ret) { 465 if (ret) {
459 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 466 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
460 ipoib_ib_dev_stop(dev); 467 ipoib_ib_dev_stop(dev, 1);
461 return -1; 468 return -1;
462 } 469 }
463 470
464 ret = ipoib_cm_dev_open(dev); 471 ret = ipoib_cm_dev_open(dev);
465 if (ret) { 472 if (ret) {
466 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); 473 ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
467 ipoib_ib_dev_stop(dev); 474 ipoib_ib_dev_stop(dev, 1);
468 return -1; 475 return -1;
469 } 476 }
470 477
@@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
516 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { 523 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
517 mutex_lock(&pkey_mutex); 524 mutex_lock(&pkey_mutex);
518 set_bit(IPOIB_PKEY_STOP, &priv->flags); 525 set_bit(IPOIB_PKEY_STOP, &priv->flags);
519 cancel_delayed_work(&priv->pkey_task); 526 cancel_delayed_work(&priv->pkey_poll_task);
520 mutex_unlock(&pkey_mutex); 527 mutex_unlock(&pkey_mutex);
521 if (flush) 528 if (flush)
522 flush_workqueue(ipoib_workqueue); 529 flush_workqueue(ipoib_workqueue);
@@ -543,7 +550,7 @@ static int recvs_pending(struct net_device *dev)
543 return pending; 550 return pending;
544} 551}
545 552
546int ipoib_ib_dev_stop(struct net_device *dev) 553int ipoib_ib_dev_stop(struct net_device *dev, int flush)
547{ 554{
548 struct ipoib_dev_priv *priv = netdev_priv(dev); 555 struct ipoib_dev_priv *priv = netdev_priv(dev);
549 struct ib_qp_attr qp_attr; 556 struct ib_qp_attr qp_attr;
@@ -629,7 +636,8 @@ timeout:
629 /* Wait for all AHs to be reaped */ 636 /* Wait for all AHs to be reaped */
630 set_bit(IPOIB_STOP_REAPER, &priv->flags); 637 set_bit(IPOIB_STOP_REAPER, &priv->flags);
631 cancel_delayed_work(&priv->ah_reap_task); 638 cancel_delayed_work(&priv->ah_reap_task);
632 flush_workqueue(ipoib_workqueue); 639 if (flush)
640 flush_workqueue(ipoib_workqueue);
633 641
634 begin = jiffies; 642 begin = jiffies;
635 643
@@ -673,13 +681,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
673 return 0; 681 return 0;
674} 682}
675 683
676void ipoib_ib_dev_flush(struct work_struct *work) 684static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
677{ 685{
678 struct ipoib_dev_priv *cpriv, *priv = 686 struct ipoib_dev_priv *cpriv;
679 container_of(work, struct ipoib_dev_priv, flush_task);
680 struct net_device *dev = priv->dev; 687 struct net_device *dev = priv->dev;
688 u16 new_index;
689
690 mutex_lock(&priv->vlan_mutex);
681 691
682 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) { 692 /*
693 * Flush any child interfaces too -- they might be up even if
694 * the parent is down.
695 */
696 list_for_each_entry(cpriv, &priv->child_intfs, list)
697 __ipoib_ib_dev_flush(cpriv, pkey_event);
698
699 mutex_unlock(&priv->vlan_mutex);
700
701 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
683 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); 702 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
684 return; 703 return;
685 } 704 }
@@ -689,10 +708,32 @@ void ipoib_ib_dev_flush(struct work_struct *work)
689 return; 708 return;
690 } 709 }
691 710
711 if (pkey_event) {
712 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
713 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
714 ipoib_ib_dev_down(dev, 0);
715 ipoib_pkey_dev_delay_open(dev);
716 return;
717 }
718 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
719
720 /* restart QP only if P_Key index is changed */
721 if (new_index == priv->pkey_index) {
722 ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
723 return;
724 }
725 priv->pkey_index = new_index;
726 }
727
692 ipoib_dbg(priv, "flushing\n"); 728 ipoib_dbg(priv, "flushing\n");
693 729
694 ipoib_ib_dev_down(dev, 0); 730 ipoib_ib_dev_down(dev, 0);
695 731
732 if (pkey_event) {
733 ipoib_ib_dev_stop(dev, 0);
734 ipoib_ib_dev_open(dev);
735 }
736
696 /* 737 /*
697 * The device could have been brought down between the start and when 738 * The device could have been brought down between the start and when
698 * we get here, don't bring it back up if it's not configured up 739 * we get here, don't bring it back up if it's not configured up
@@ -701,14 +742,24 @@ void ipoib_ib_dev_flush(struct work_struct *work)
701 ipoib_ib_dev_up(dev); 742 ipoib_ib_dev_up(dev);
702 ipoib_mcast_restart_task(&priv->restart_task); 743 ipoib_mcast_restart_task(&priv->restart_task);
703 } 744 }
745}
704 746
705 mutex_lock(&priv->vlan_mutex); 747void ipoib_ib_dev_flush(struct work_struct *work)
748{
749 struct ipoib_dev_priv *priv =
750 container_of(work, struct ipoib_dev_priv, flush_task);
706 751
707 /* Flush any child interfaces too */ 752 ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
708 list_for_each_entry(cpriv, &priv->child_intfs, list) 753 __ipoib_ib_dev_flush(priv, 0);
709 ipoib_ib_dev_flush(&cpriv->flush_task); 754}
710 755
711 mutex_unlock(&priv->vlan_mutex); 756void ipoib_pkey_event(struct work_struct *work)
757{
758 struct ipoib_dev_priv *priv =
759 container_of(work, struct ipoib_dev_priv, pkey_event_task);
760
761 ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
762 __ipoib_ib_dev_flush(priv, 1);
712} 763}
713 764
714void ipoib_ib_dev_cleanup(struct net_device *dev) 765void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -736,7 +787,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
736void ipoib_pkey_poll(struct work_struct *work) 787void ipoib_pkey_poll(struct work_struct *work)
737{ 788{
738 struct ipoib_dev_priv *priv = 789 struct ipoib_dev_priv *priv =
739 container_of(work, struct ipoib_dev_priv, pkey_task.work); 790 container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
740 struct net_device *dev = priv->dev; 791 struct net_device *dev = priv->dev;
741 792
742 ipoib_pkey_dev_check_presence(dev); 793 ipoib_pkey_dev_check_presence(dev);
@@ -747,7 +798,7 @@ void ipoib_pkey_poll(struct work_struct *work)
747 mutex_lock(&pkey_mutex); 798 mutex_lock(&pkey_mutex);
748 if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) 799 if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
749 queue_delayed_work(ipoib_workqueue, 800 queue_delayed_work(ipoib_workqueue,
750 &priv->pkey_task, 801 &priv->pkey_poll_task,
751 HZ); 802 HZ);
752 mutex_unlock(&pkey_mutex); 803 mutex_unlock(&pkey_mutex);
753 } 804 }
@@ -766,7 +817,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev)
766 mutex_lock(&pkey_mutex); 817 mutex_lock(&pkey_mutex);
767 clear_bit(IPOIB_PKEY_STOP, &priv->flags); 818 clear_bit(IPOIB_PKEY_STOP, &priv->flags);
768 queue_delayed_work(ipoib_workqueue, 819 queue_delayed_work(ipoib_workqueue,
769 &priv->pkey_task, 820 &priv->pkey_poll_task,
770 HZ); 821 HZ);
771 mutex_unlock(&pkey_mutex); 822 mutex_unlock(&pkey_mutex);
772 return 1; 823 return 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 0a428f2b05c7..894b1dcdf3eb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
107 return -EINVAL; 107 return -EINVAL;
108 108
109 if (ipoib_ib_dev_up(dev)) { 109 if (ipoib_ib_dev_up(dev)) {
110 ipoib_ib_dev_stop(dev); 110 ipoib_ib_dev_stop(dev, 1);
111 return -EINVAL; 111 return -EINVAL;
112 } 112 }
113 113
@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev)
152 flush_workqueue(ipoib_workqueue); 152 flush_workqueue(ipoib_workqueue);
153 153
154 ipoib_ib_dev_down(dev, 1); 154 ipoib_ib_dev_down(dev, 1);
155 ipoib_ib_dev_stop(dev); 155 ipoib_ib_dev_stop(dev, 1);
156 156
157 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 157 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
158 struct ipoib_dev_priv *cpriv; 158 struct ipoib_dev_priv *cpriv;
@@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev)
988 INIT_LIST_HEAD(&priv->dead_ahs); 988 INIT_LIST_HEAD(&priv->dead_ahs);
989 INIT_LIST_HEAD(&priv->multicast_list); 989 INIT_LIST_HEAD(&priv->multicast_list);
990 990
991 INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll); 991 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
992 INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
992 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 993 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
993 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush); 994 INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
994 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 995 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 54fbead4de01..aae367057a56 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -524,7 +524,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
524 return; 524 return;
525 525
526 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 526 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
527 ipoib_warn(priv, "ib_gid_entry_get() failed\n"); 527 ipoib_warn(priv, "ib_query_gid() failed\n");
528 else 528 else
529 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 529 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
530 530
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 5c3c6a43a52b..982eb88e27ec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -33,8 +33,6 @@
33 * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $ 33 * $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
34 */ 34 */
35 35
36#include <rdma/ib_cache.h>
37
38#include "ipoib.h" 36#include "ipoib.h"
39 37
40int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid) 38int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
49 if (!qp_attr) 47 if (!qp_attr)
50 goto out; 48 goto out;
51 49
52 if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) { 50 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
53 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 51 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
54 ret = -ENXIO; 52 ret = -ENXIO;
55 goto out; 53 goto out;
@@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev)
94{ 92{
95 struct ipoib_dev_priv *priv = netdev_priv(dev); 93 struct ipoib_dev_priv *priv = netdev_priv(dev);
96 int ret; 94 int ret;
97 u16 pkey_index;
98 struct ib_qp_attr qp_attr; 95 struct ib_qp_attr qp_attr;
99 int attr_mask; 96 int attr_mask;
100 97
101 /* 98 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
102 * Search through the port P_Key table for the requested pkey value. 99 return -1;
103 * The port has to be assigned to the respective IB partition in
104 * advance.
105 */
106 ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
107 if (ret) {
108 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
109 return ret;
110 }
111 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
112 100
113 qp_attr.qp_state = IB_QPS_INIT; 101 qp_attr.qp_state = IB_QPS_INIT;
114 qp_attr.qkey = 0; 102 qp_attr.qkey = 0;
115 qp_attr.port_num = priv->port; 103 qp_attr.port_num = priv->port;
116 qp_attr.pkey_index = pkey_index; 104 qp_attr.pkey_index = priv->pkey_index;
117 attr_mask = 105 attr_mask =
118 IB_QP_QKEY | 106 IB_QP_QKEY |
119 IB_QP_PORT | 107 IB_QP_PORT |
@@ -185,7 +173,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
185 size = ipoib_sendq_size + ipoib_recvq_size + 1; 173 size = ipoib_sendq_size + ipoib_recvq_size + 1;
186 ret = ipoib_cm_dev_init(dev); 174 ret = ipoib_cm_dev_init(dev);
187 if (!ret) 175 if (!ret)
188 size += ipoib_recvq_size; 176 size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
189 177
190 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0); 178 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
191 if (IS_ERR(priv->cq)) { 179 if (IS_ERR(priv->cq)) {
@@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler,
259 struct ipoib_dev_priv *priv = 247 struct ipoib_dev_priv *priv =
260 container_of(handler, struct ipoib_dev_priv, event_handler); 248 container_of(handler, struct ipoib_dev_priv, event_handler);
261 249
262 if ((record->event == IB_EVENT_PORT_ERR || 250 if (record->element.port_num != priv->port)
263 record->event == IB_EVENT_PKEY_CHANGE || 251 return;
264 record->event == IB_EVENT_PORT_ACTIVE || 252
265 record->event == IB_EVENT_LID_CHANGE || 253 if (record->event == IB_EVENT_PORT_ERR ||
266 record->event == IB_EVENT_SM_CHANGE || 254 record->event == IB_EVENT_PORT_ACTIVE ||
267 record->event == IB_EVENT_CLIENT_REREGISTER) && 255 record->event == IB_EVENT_LID_CHANGE ||
268 record->element.port_num == priv->port) { 256 record->event == IB_EVENT_SM_CHANGE ||
257 record->event == IB_EVENT_CLIENT_REREGISTER) {
269 ipoib_dbg(priv, "Port state change event\n"); 258 ipoib_dbg(priv, "Port state change event\n");
270 queue_work(ipoib_workqueue, &priv->flush_task); 259 queue_work(ipoib_workqueue, &priv->flush_task);
260 } else if (record->event == IB_EVENT_PKEY_CHANGE) {
261 ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
262 queue_work(ipoib_workqueue, &priv->pkey_event_task);
271 } 263 }
272} 264}