aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2012-08-03 04:40:44 -0400
committerRoland Dreier <roland@purestorage.com>2012-09-30 23:33:34 -0400
commit37bfc7c1e83f1589bcdc5918c7216422662644ee (patch)
tree4428532211269f763f2f06c8e7227450ae6dd334 /drivers/infiniband
parent54679e148287f0ca1bdd09264c908bacb9f19b3f (diff)
IB/mlx4: SR-IOV multiplex and demultiplex MADs
Special QPs are paravirtualized. vHCAs are not given direct access to QP0/1. Rather, these QPs are operated by a special context hosted by the PF, which mediates access to/from vHCAs. This is done by opening a "tunnel" per vHCA port per QP0/1. A tunnel comprises a pair of UD QPs: a "Tunnel QP" in the PF-context and a "Proxy QP" in the vHCA. All vHCA MAD traffic must pass through the corresponding tunnel. vHCA QPs cannot be assigned to VL15 and are denied of the well-known QKey. Outgoing messages are "de-multiplexed" (i.e., directed to the wire via the real special QP). Incoming messages are "multiplexed" (i.e. steered by the PPF to the correct VF or to the PF) QP0 access is restricted to the PF vHCA. VF vHCAs also have (virtual) QP0s, but they never receive any SMPs and all SMPs sent are discarded. QP1 traffic is allowed for all vHCAs, but special care is required to bridge the gap between the host and network views. Specifically: - Transaction IDs are mapped to guarantee uniqueness among vHCAs - CM para-virtualization o Incoming requests are steered to the correct vHCA according to the embedded GID o Local communication IDs are mapped to ensure uniqueness among vHCAs (see the patch that adds CM paravirtualization.) - Multicast para-virtualization o The PF context aggregates membership state from all vHCAs o The SA is contacted only when the aggregate membership changes o If the aggregate does not change, the PF context will provide the requesting vHCA with the proper response. (see the patch that adds multicast group paravirtualization) Incoming MADs are steered according to: - the DGID If a GRH is present - the mapped transaction ID for response MADs - the embedded GID in CM requests - the remote communication ID in other CM messages Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c567
1 files changed, 565 insertions, 2 deletions
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 318d5bcf821b..8dfbf69f8370 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -32,6 +32,8 @@
32 32
33#include <rdma/ib_mad.h> 33#include <rdma/ib_mad.h>
34#include <rdma/ib_smi.h> 34#include <rdma/ib_smi.h>
35#include <rdma/ib_sa.h>
36#include <rdma/ib_cache.h>
35 37
36#include <linux/mlx4/cmd.h> 38#include <linux/mlx4/cmd.h>
37#include <linux/gfp.h> 39#include <linux/gfp.h>
@@ -300,6 +302,254 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
300 } 302 }
301} 303}
302 304
305static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
306 struct ib_sa_mad *sa_mad)
307{
308 return 0;
309}
310
311int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
312{
313 struct mlx4_ib_dev *dev = to_mdev(ibdev);
314 int i;
315
316 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
317 if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
318 return i;
319 }
320 return -1;
321}
322
323
324static int get_pkey_phys_indices(struct mlx4_ib_dev *ibdev, u8 port, u8 ph_pkey_ix,
325 u8 *full_pk_ix, u8 *partial_pk_ix,
326 int *is_full_member)
327{
328 u16 search_pkey;
329 int fm;
330 int err = 0;
331 u16 pk;
332
333 err = ib_get_cached_pkey(&ibdev->ib_dev, port, ph_pkey_ix, &search_pkey);
334 if (err)
335 return err;
336
337 fm = (search_pkey & 0x8000) ? 1 : 0;
338 if (fm) {
339 *full_pk_ix = ph_pkey_ix;
340 search_pkey &= 0x7FFF;
341 } else {
342 *partial_pk_ix = ph_pkey_ix;
343 search_pkey |= 0x8000;
344 }
345
346 if (ib_find_exact_cached_pkey(&ibdev->ib_dev, port, search_pkey, &pk))
347 pk = 0xFFFF;
348
349 if (fm)
350 *partial_pk_ix = (pk & 0xFF);
351 else
352 *full_pk_ix = (pk & 0xFF);
353
354 *is_full_member = fm;
355 return err;
356}
357
358int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
359 enum ib_qp_type dest_qpt, struct ib_wc *wc,
360 struct ib_grh *grh, struct ib_mad *mad)
361{
362 struct ib_sge list;
363 struct ib_send_wr wr, *bad_wr;
364 struct mlx4_ib_demux_pv_ctx *tun_ctx;
365 struct mlx4_ib_demux_pv_qp *tun_qp;
366 struct mlx4_rcv_tunnel_mad *tun_mad;
367 struct ib_ah_attr attr;
368 struct ib_ah *ah;
369 struct ib_qp *src_qp = NULL;
370 unsigned tun_tx_ix = 0;
371 int dqpn;
372 int ret = 0;
373 int i;
374 int is_full_member = 0;
375 u16 tun_pkey_ix;
376 u8 ph_pkey_ix, full_pk_ix = 0, partial_pk_ix = 0;
377
378 if (dest_qpt > IB_QPT_GSI)
379 return -EINVAL;
380
381 tun_ctx = dev->sriov.demux[port-1].tun[slave];
382
383 /* check if proxy qp created */
384 if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
385 return -EAGAIN;
386
387 /* QP0 forwarding only for Dom0 */
388 if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
389 return -EINVAL;
390
391 if (!dest_qpt)
392 tun_qp = &tun_ctx->qp[0];
393 else
394 tun_qp = &tun_ctx->qp[1];
395
396 /* compute pkey index for slave */
397 /* get physical pkey -- virtualized Dom0 pkey to phys*/
398 if (dest_qpt) {
399 ph_pkey_ix =
400 dev->pkeys.virt2phys_pkey[mlx4_master_func_num(dev->dev)][port - 1][wc->pkey_index];
401
402 /* now, translate this to the slave pkey index */
403 ret = get_pkey_phys_indices(dev, port, ph_pkey_ix, &full_pk_ix,
404 &partial_pk_ix, &is_full_member);
405 if (ret)
406 return -EINVAL;
407
408 for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
409 if ((dev->pkeys.virt2phys_pkey[slave][port - 1][i] == full_pk_ix) ||
410 (is_full_member &&
411 (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == partial_pk_ix)))
412 break;
413 }
414 if (i == dev->dev->caps.pkey_table_len[port])
415 return -EINVAL;
416 tun_pkey_ix = i;
417 } else
418 tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
419
420 dqpn = dev->dev->caps.sqp_start + 8 * slave + port + (dest_qpt * 2) - 1;
421
422 /* get tunnel tx data buf for slave */
423 src_qp = tun_qp->qp;
424
425 /* create ah. Just need an empty one with the port num for the post send.
426 * The driver will set the force loopback bit in post_send */
427 memset(&attr, 0, sizeof attr);
428 attr.port_num = port;
429 ah = ib_create_ah(tun_ctx->pd, &attr);
430 if (IS_ERR(ah))
431 return -ENOMEM;
432
433 /* allocate tunnel tx buf after pass failure returns */
434 spin_lock(&tun_qp->tx_lock);
435 if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
436 (MLX4_NUM_TUNNEL_BUFS - 1))
437 ret = -EAGAIN;
438 else
439 tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
440 spin_unlock(&tun_qp->tx_lock);
441 if (ret)
442 goto out;
443
444 tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
445 if (tun_qp->tx_ring[tun_tx_ix].ah)
446 ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
447 tun_qp->tx_ring[tun_tx_ix].ah = ah;
448 ib_dma_sync_single_for_cpu(&dev->ib_dev,
449 tun_qp->tx_ring[tun_tx_ix].buf.map,
450 sizeof (struct mlx4_rcv_tunnel_mad),
451 DMA_TO_DEVICE);
452
453 /* copy over to tunnel buffer */
454 if (grh)
455 memcpy(&tun_mad->grh, grh, sizeof *grh);
456 memcpy(&tun_mad->mad, mad, sizeof *mad);
457
458 /* adjust tunnel data */
459 tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
460 tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
461 tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
462 tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
463 tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
464
465 ib_dma_sync_single_for_device(&dev->ib_dev,
466 tun_qp->tx_ring[tun_tx_ix].buf.map,
467 sizeof (struct mlx4_rcv_tunnel_mad),
468 DMA_TO_DEVICE);
469
470 list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
471 list.length = sizeof (struct mlx4_rcv_tunnel_mad);
472 list.lkey = tun_ctx->mr->lkey;
473
474 wr.wr.ud.ah = ah;
475 wr.wr.ud.port_num = port;
476 wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
477 wr.wr.ud.remote_qpn = dqpn;
478 wr.next = NULL;
479 wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
480 wr.sg_list = &list;
481 wr.num_sge = 1;
482 wr.opcode = IB_WR_SEND;
483 wr.send_flags = IB_SEND_SIGNALED;
484
485 ret = ib_post_send(src_qp, &wr, &bad_wr);
486out:
487 if (ret)
488 ib_destroy_ah(ah);
489 return ret;
490}
491
492static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
493 struct ib_wc *wc, struct ib_grh *grh,
494 struct ib_mad *mad)
495{
496 struct mlx4_ib_dev *dev = to_mdev(ibdev);
497 int err;
498 int slave;
499 u8 *slave_id;
500
501 /* Initially assume that this mad is for us */
502 slave = mlx4_master_func_num(dev->dev);
503
504 /* See if the slave id is encoded in a response mad */
505 if (mad->mad_hdr.method & 0x80) {
506 slave_id = (u8 *) &mad->mad_hdr.tid;
507 slave = *slave_id;
508 if (slave != 255) /*255 indicates the dom0*/
509 *slave_id = 0; /* remap tid */
510 }
511
512 /* If a grh is present, we demux according to it */
513 if (wc->wc_flags & IB_WC_GRH) {
514 slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
515 if (slave < 0) {
516 mlx4_ib_warn(ibdev, "failed matching grh\n");
517 return -ENOENT;
518 }
519 }
520 /* Class-specific handling */
521 switch (mad->mad_hdr.mgmt_class) {
522 case IB_MGMT_CLASS_SUBN_ADM:
523 if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
524 (struct ib_sa_mad *) mad))
525 return 0;
526 break;
527 case IB_MGMT_CLASS_DEVICE_MGMT:
528 if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
529 return 0;
530 break;
531 default:
532 /* Drop unsupported classes for slaves in tunnel mode */
533 if (slave != mlx4_master_func_num(dev->dev)) {
534 pr_debug("dropping unsupported ingress mad from class:%d "
535 "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
536 return 0;
537 }
538 }
539 /*make sure that no slave==255 was not handled yet.*/
540 if (slave >= dev->dev->caps.sqp_demux) {
541 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
542 slave, dev->dev->caps.sqp_demux);
543 return -ENOENT;
544 }
545
546 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
547 if (err)
548 pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
549 slave, err);
550 return 0;
551}
552
303static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 553static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
304 struct ib_wc *in_wc, struct ib_grh *in_grh, 554 struct ib_wc *in_wc, struct ib_grh *in_grh,
305 struct ib_mad *in_mad, struct ib_mad *out_mad) 555 struct ib_mad *in_mad, struct ib_mad *out_mad)
@@ -611,6 +861,216 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
611 return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr); 861 return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
612} 862}
613 863
864static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
865 int slave, struct ib_sa_mad *sa_mad)
866{
867 return 0;
868}
869
870static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
871{
872 int slave_start = dev->dev->caps.sqp_start + 8 * slave;
873
874 return (qpn >= slave_start && qpn <= slave_start + 1);
875}
876
877
878int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
879 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
880 u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
881{
882 struct ib_sge list;
883 struct ib_send_wr wr, *bad_wr;
884 struct mlx4_ib_demux_pv_ctx *sqp_ctx;
885 struct mlx4_ib_demux_pv_qp *sqp;
886 struct mlx4_mad_snd_buf *sqp_mad;
887 struct ib_ah *ah;
888 struct ib_qp *send_qp = NULL;
889 unsigned wire_tx_ix = 0;
890 int ret = 0;
891 u16 wire_pkey_ix;
892 int src_qpnum;
893 u8 sgid_index;
894
895
896 sqp_ctx = dev->sriov.sqps[port-1];
897
898 /* check if proxy qp created */
899 if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
900 return -EAGAIN;
901
902 /* QP0 forwarding only for Dom0 */
903 if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
904 return -EINVAL;
905
906 if (dest_qpt == IB_QPT_SMI) {
907 src_qpnum = 0;
908 sqp = &sqp_ctx->qp[0];
909 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
910 } else {
911 src_qpnum = 1;
912 sqp = &sqp_ctx->qp[1];
913 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
914 }
915
916 send_qp = sqp->qp;
917
918 /* create ah */
919 sgid_index = attr->grh.sgid_index;
920 attr->grh.sgid_index = 0;
921 ah = ib_create_ah(sqp_ctx->pd, attr);
922 if (IS_ERR(ah))
923 return -ENOMEM;
924 attr->grh.sgid_index = sgid_index;
925 to_mah(ah)->av.ib.gid_index = sgid_index;
926 /* get rid of force-loopback bit */
927 to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
928 spin_lock(&sqp->tx_lock);
929 if (sqp->tx_ix_head - sqp->tx_ix_tail >=
930 (MLX4_NUM_TUNNEL_BUFS - 1))
931 ret = -EAGAIN;
932 else
933 wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
934 spin_unlock(&sqp->tx_lock);
935 if (ret)
936 goto out;
937
938 sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
939 if (sqp->tx_ring[wire_tx_ix].ah)
940 ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
941 sqp->tx_ring[wire_tx_ix].ah = ah;
942 ib_dma_sync_single_for_cpu(&dev->ib_dev,
943 sqp->tx_ring[wire_tx_ix].buf.map,
944 sizeof (struct mlx4_mad_snd_buf),
945 DMA_TO_DEVICE);
946
947 memcpy(&sqp_mad->payload, mad, sizeof *mad);
948
949 ib_dma_sync_single_for_device(&dev->ib_dev,
950 sqp->tx_ring[wire_tx_ix].buf.map,
951 sizeof (struct mlx4_mad_snd_buf),
952 DMA_TO_DEVICE);
953
954 list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
955 list.length = sizeof (struct mlx4_mad_snd_buf);
956 list.lkey = sqp_ctx->mr->lkey;
957
958 wr.wr.ud.ah = ah;
959 wr.wr.ud.port_num = port;
960 wr.wr.ud.pkey_index = wire_pkey_ix;
961 wr.wr.ud.remote_qkey = qkey;
962 wr.wr.ud.remote_qpn = remote_qpn;
963 wr.next = NULL;
964 wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
965 wr.sg_list = &list;
966 wr.num_sge = 1;
967 wr.opcode = IB_WR_SEND;
968 wr.send_flags = IB_SEND_SIGNALED;
969
970 ret = ib_post_send(send_qp, &wr, &bad_wr);
971out:
972 if (ret)
973 ib_destroy_ah(ah);
974 return ret;
975}
976
977static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
978{
979 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
980 struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
981 int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
982 struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
983 struct mlx4_ib_ah ah;
984 struct ib_ah_attr ah_attr;
985 u8 *slave_id;
986 int slave;
987
988 /* Get slave that sent this packet */
989 if (wc->src_qp < dev->dev->caps.sqp_start ||
990 wc->src_qp >= dev->dev->caps.base_tunnel_sqpn ||
991 (wc->src_qp & 0x1) != ctx->port - 1 ||
992 wc->src_qp & 0x4) {
993 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
994 return;
995 }
996 slave = ((wc->src_qp & ~0x7) - dev->dev->caps.sqp_start) / 8;
997 if (slave != ctx->slave) {
998 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
999 "belongs to another slave\n", wc->src_qp);
1000 return;
1001 }
1002 if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
1003 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1004 "non-master trying to send QP0 packets\n", wc->src_qp);
1005 return;
1006 }
1007
1008 /* Map transaction ID */
1009 ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
1010 sizeof (struct mlx4_tunnel_mad),
1011 DMA_FROM_DEVICE);
1012 switch (tunnel->mad.mad_hdr.method) {
1013 case IB_MGMT_METHOD_SET:
1014 case IB_MGMT_METHOD_GET:
1015 case IB_MGMT_METHOD_REPORT:
1016 case IB_SA_METHOD_GET_TABLE:
1017 case IB_SA_METHOD_DELETE:
1018 case IB_SA_METHOD_GET_MULTI:
1019 case IB_SA_METHOD_GET_TRACE_TBL:
1020 slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
1021 if (*slave_id) {
1022 mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
1023 "class:%d slave:%d\n", *slave_id,
1024 tunnel->mad.mad_hdr.mgmt_class, slave);
1025 return;
1026 } else
1027 *slave_id = slave;
1028 default:
1029 /* nothing */;
1030 }
1031
1032 /* Class-specific handling */
1033 switch (tunnel->mad.mad_hdr.mgmt_class) {
1034 case IB_MGMT_CLASS_SUBN_ADM:
1035 if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
1036 (struct ib_sa_mad *) &tunnel->mad))
1037 return;
1038 break;
1039 case IB_MGMT_CLASS_DEVICE_MGMT:
1040 if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
1041 tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
1042 return;
1043 break;
1044 default:
1045 /* Drop unsupported classes for slaves in tunnel mode */
1046 if (slave != mlx4_master_func_num(dev->dev)) {
1047 mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
1048 "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
1049 return;
1050 }
1051 }
1052
1053 /* We are using standard ib_core services to send the mad, so generate a
1054 * stadard address handle by decoding the tunnelled mlx4_ah fields */
1055 memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
1056 ah.ibah.device = ctx->ib_dev;
1057 mlx4_ib_query_ah(&ah.ibah, &ah_attr);
1058 if ((ah_attr.ah_flags & IB_AH_GRH) &&
1059 (ah_attr.grh.sgid_index != slave)) {
1060 mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
1061 slave, ah_attr.grh.sgid_index);
1062 return;
1063 }
1064
1065 mlx4_ib_send_to_wire(dev, slave, ctx->port,
1066 is_proxy_qp0(dev, wc->src_qp, slave) ?
1067 IB_QPT_SMI : IB_QPT_GSI,
1068 be16_to_cpu(tunnel->hdr.pkey_index),
1069 be32_to_cpu(tunnel->hdr.remote_qpn),
1070 be32_to_cpu(tunnel->hdr.qkey),
1071 &ah_attr, &tunnel->mad);
1072}
1073
614static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, 1074static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
615 enum ib_qp_type qp_type, int is_tun) 1075 enum ib_qp_type qp_type, int is_tun)
616{ 1076{
@@ -735,7 +1195,57 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
735 1195
736static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) 1196static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
737{ 1197{
738 /* dummy until next patch in series */ 1198 struct mlx4_ib_demux_pv_ctx *ctx;
1199 struct mlx4_ib_demux_pv_qp *tun_qp;
1200 struct ib_wc wc;
1201 int ret;
1202 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1203 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1204
1205 while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1206 tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1207 if (wc.status == IB_WC_SUCCESS) {
1208 switch (wc.opcode) {
1209 case IB_WC_RECV:
1210 mlx4_ib_multiplex_mad(ctx, &wc);
1211 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
1212 wc.wr_id &
1213 (MLX4_NUM_TUNNEL_BUFS - 1));
1214 if (ret)
1215 pr_err("Failed reposting tunnel "
1216 "buf:%lld\n", wc.wr_id);
1217 break;
1218 case IB_WC_SEND:
1219 pr_debug("received tunnel send completion:"
1220 "wrid=0x%llx, status=0x%x\n",
1221 wc.wr_id, wc.status);
1222 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1223 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1224 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1225 = NULL;
1226 spin_lock(&tun_qp->tx_lock);
1227 tun_qp->tx_ix_tail++;
1228 spin_unlock(&tun_qp->tx_lock);
1229
1230 break;
1231 default:
1232 break;
1233 }
1234 } else {
1235 pr_debug("mlx4_ib: completion error in tunnel: %d."
1236 " status = %d, wrid = 0x%llx\n",
1237 ctx->slave, wc.status, wc.wr_id);
1238 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1239 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1240 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1241 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1242 = NULL;
1243 spin_lock(&tun_qp->tx_lock);
1244 tun_qp->tx_ix_tail++;
1245 spin_unlock(&tun_qp->tx_lock);
1246 }
1247 }
1248 }
739} 1249}
740 1250
741static void pv_qp_event_handler(struct ib_event *event, void *qp_context) 1251static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
@@ -843,7 +1353,60 @@ err_qp:
843 */ 1353 */
844static void mlx4_ib_sqp_comp_worker(struct work_struct *work) 1354static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
845{ 1355{
846 /* dummy until next patch in series */ 1356 struct mlx4_ib_demux_pv_ctx *ctx;
1357 struct mlx4_ib_demux_pv_qp *sqp;
1358 struct ib_wc wc;
1359 struct ib_grh *grh;
1360 struct ib_mad *mad;
1361
1362 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1363 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1364
1365 while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1366 sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1367 if (wc.status == IB_WC_SUCCESS) {
1368 switch (wc.opcode) {
1369 case IB_WC_SEND:
1370 ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1371 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1372 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1373 = NULL;
1374 spin_lock(&sqp->tx_lock);
1375 sqp->tx_ix_tail++;
1376 spin_unlock(&sqp->tx_lock);
1377 break;
1378 case IB_WC_RECV:
1379 mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
1380 (sqp->ring[wc.wr_id &
1381 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
1382 grh = &(((struct mlx4_mad_rcv_buf *)
1383 (sqp->ring[wc.wr_id &
1384 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
1385 mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
1386 if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
1387 (MLX4_NUM_TUNNEL_BUFS - 1)))
1388 pr_err("Failed reposting SQP "
1389 "buf:%lld\n", wc.wr_id);
1390 break;
1391 default:
1392 BUG_ON(1);
1393 break;
1394 }
1395 } else {
1396 pr_debug("mlx4_ib: completion error in tunnel: %d."
1397 " status = %d, wrid = 0x%llx\n",
1398 ctx->slave, wc.status, wc.wr_id);
1399 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1400 ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1401 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1402 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1403 = NULL;
1404 spin_lock(&sqp->tx_lock);
1405 sqp->tx_ix_tail++;
1406 spin_unlock(&sqp->tx_lock);
1407 }
1408 }
1409 }
847} 1410}
848 1411
849static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port, 1412static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,