aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2012-08-03 04:40:42 -0400
committerRoland Dreier <roland@purestorage.com>2012-09-30 23:33:32 -0400
commitfc06573dfaf8a33bc0533bb70c49de13fa5232a4 (patch)
tree034d40d1b3f21a53ff52be78b8fcfea0222aa891 /drivers/infiniband
parente2c76824ca16a3e8443cc7b26abcb21af7c27b10 (diff)
IB/mlx4: Initialize SR-IOV IB support for slaves in master context
Allocate SR-IOV paravirtualization resources and MAD demuxing contexts on the master. This has two parts. The first part is to initialize the structures to contain the contexts. This is done at master startup time in mlx4_ib_init_sriov(). The second part is to actually create the tunneling resources required on the master to support a slave. This is performed the master detects that a slave has started up (MLX4_DEV_EVENT_SLAVE_INIT event generated when a slave initializes its comm channel). For the master, there is no such startup event, so it creates its own tunneling resources when it starts up. In addition, the master also creates the real special QPs. The ib_core layer on the master causes creation of proxy special QPs, since the master is also paravirtualized at the ib_core layer. Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c684
-rw-r--r--drivers/infiniband/hw/mlx4/main.c80
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h34
3 files changed, 791 insertions, 7 deletions
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 9c2ae7efd00f..e98849338a94 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -44,6 +44,35 @@ enum {
44 MLX4_IB_VENDOR_CLASS2 = 0xa 44 MLX4_IB_VENDOR_CLASS2 = 0xa
45}; 45};
46 46
47#define MLX4_TUN_SEND_WRID_SHIFT 34
48#define MLX4_TUN_QPN_SHIFT 32
49#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
50#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
51
52#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
53#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
54
55struct mlx4_mad_rcv_buf {
56 struct ib_grh grh;
57 u8 payload[256];
58} __packed;
59
60struct mlx4_mad_snd_buf {
61 u8 payload[256];
62} __packed;
63
64struct mlx4_tunnel_mad {
65 struct ib_grh grh;
66 struct mlx4_ib_tunnel_header hdr;
67 struct ib_mad mad;
68} __packed;
69
70struct mlx4_rcv_tunnel_mad {
71 struct mlx4_rcv_tunnel_hdr hdr;
72 struct ib_grh grh;
73 struct ib_mad mad;
74} __packed;
75
47int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, 76int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
48 int port, struct ib_wc *in_wc, struct ib_grh *in_grh, 77 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
49 void *in_mad, void *response_mad) 78 void *in_mad, void *response_mad)
@@ -516,3 +545,658 @@ void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
516 545
517 ib_dispatch_event(&event); 546 ib_dispatch_event(&event);
518} 547}
548
549static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
550{
551 unsigned long flags;
552 struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
553 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
554 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
555 if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
556 queue_work(ctx->wq, &ctx->work);
557 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
558}
559
560static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
561 struct mlx4_ib_demux_pv_qp *tun_qp,
562 int index)
563{
564 struct ib_sge sg_list;
565 struct ib_recv_wr recv_wr, *bad_recv_wr;
566 int size;
567
568 size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
569 sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
570
571 sg_list.addr = tun_qp->ring[index].map;
572 sg_list.length = size;
573 sg_list.lkey = ctx->mr->lkey;
574
575 recv_wr.next = NULL;
576 recv_wr.sg_list = &sg_list;
577 recv_wr.num_sge = 1;
578 recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
579 MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
580 ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
581 size, DMA_FROM_DEVICE);
582 return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
583}
584
585static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
586 enum ib_qp_type qp_type, int is_tun)
587{
588 int i;
589 struct mlx4_ib_demux_pv_qp *tun_qp;
590 int rx_buf_size, tx_buf_size;
591
592 if (qp_type > IB_QPT_GSI)
593 return -EINVAL;
594
595 tun_qp = &ctx->qp[qp_type];
596
597 tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
598 GFP_KERNEL);
599 if (!tun_qp->ring)
600 return -ENOMEM;
601
602 tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
603 sizeof (struct mlx4_ib_tun_tx_buf),
604 GFP_KERNEL);
605 if (!tun_qp->tx_ring) {
606 kfree(tun_qp->ring);
607 tun_qp->ring = NULL;
608 return -ENOMEM;
609 }
610
611 if (is_tun) {
612 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
613 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
614 } else {
615 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
616 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
617 }
618
619 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
620 tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
621 if (!tun_qp->ring[i].addr)
622 goto err;
623 tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
624 tun_qp->ring[i].addr,
625 rx_buf_size,
626 DMA_FROM_DEVICE);
627 }
628
629 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
630 tun_qp->tx_ring[i].buf.addr =
631 kmalloc(tx_buf_size, GFP_KERNEL);
632 if (!tun_qp->tx_ring[i].buf.addr)
633 goto tx_err;
634 tun_qp->tx_ring[i].buf.map =
635 ib_dma_map_single(ctx->ib_dev,
636 tun_qp->tx_ring[i].buf.addr,
637 tx_buf_size,
638 DMA_TO_DEVICE);
639 tun_qp->tx_ring[i].ah = NULL;
640 }
641 spin_lock_init(&tun_qp->tx_lock);
642 tun_qp->tx_ix_head = 0;
643 tun_qp->tx_ix_tail = 0;
644 tun_qp->proxy_qpt = qp_type;
645
646 return 0;
647
648tx_err:
649 while (i > 0) {
650 --i;
651 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
652 tx_buf_size, DMA_TO_DEVICE);
653 kfree(tun_qp->tx_ring[i].buf.addr);
654 }
655 kfree(tun_qp->tx_ring);
656 tun_qp->tx_ring = NULL;
657 i = MLX4_NUM_TUNNEL_BUFS;
658err:
659 while (i > 0) {
660 --i;
661 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
662 rx_buf_size, DMA_FROM_DEVICE);
663 kfree(tun_qp->ring[i].addr);
664 }
665 kfree(tun_qp->ring);
666 tun_qp->ring = NULL;
667 return -ENOMEM;
668}
669
670static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
671 enum ib_qp_type qp_type, int is_tun)
672{
673 int i;
674 struct mlx4_ib_demux_pv_qp *tun_qp;
675 int rx_buf_size, tx_buf_size;
676
677 if (qp_type > IB_QPT_GSI)
678 return;
679
680 tun_qp = &ctx->qp[qp_type];
681 if (is_tun) {
682 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
683 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
684 } else {
685 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
686 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
687 }
688
689
690 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
691 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
692 rx_buf_size, DMA_FROM_DEVICE);
693 kfree(tun_qp->ring[i].addr);
694 }
695
696 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
697 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
698 tx_buf_size, DMA_TO_DEVICE);
699 kfree(tun_qp->tx_ring[i].buf.addr);
700 if (tun_qp->tx_ring[i].ah)
701 ib_destroy_ah(tun_qp->tx_ring[i].ah);
702 }
703 kfree(tun_qp->tx_ring);
704 kfree(tun_qp->ring);
705}
706
707static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
708{
709 /* dummy until next patch in series */
710}
711
712static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
713{
714 struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
715
716 /* It's worse than that! He's dead, Jim! */
717 pr_err("Fatal error (%d) on a MAD QP on port %d\n",
718 event->event, sqp->port);
719}
720
721static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
722 enum ib_qp_type qp_type, int create_tun)
723{
724 int i, ret;
725 struct mlx4_ib_demux_pv_qp *tun_qp;
726 struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
727 struct ib_qp_attr attr;
728 int qp_attr_mask_INIT;
729
730 if (qp_type > IB_QPT_GSI)
731 return -EINVAL;
732
733 tun_qp = &ctx->qp[qp_type];
734
735 memset(&qp_init_attr, 0, sizeof qp_init_attr);
736 qp_init_attr.init_attr.send_cq = ctx->cq;
737 qp_init_attr.init_attr.recv_cq = ctx->cq;
738 qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
739 qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
740 qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
741 qp_init_attr.init_attr.cap.max_send_sge = 1;
742 qp_init_attr.init_attr.cap.max_recv_sge = 1;
743 if (create_tun) {
744 qp_init_attr.init_attr.qp_type = IB_QPT_UD;
745 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP;
746 qp_init_attr.port = ctx->port;
747 qp_init_attr.slave = ctx->slave;
748 qp_init_attr.proxy_qp_type = qp_type;
749 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
750 IB_QP_QKEY | IB_QP_PORT;
751 } else {
752 qp_init_attr.init_attr.qp_type = qp_type;
753 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP;
754 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
755 }
756 qp_init_attr.init_attr.port_num = ctx->port;
757 qp_init_attr.init_attr.qp_context = ctx;
758 qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
759 tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
760 if (IS_ERR(tun_qp->qp)) {
761 ret = PTR_ERR(tun_qp->qp);
762 tun_qp->qp = NULL;
763 pr_err("Couldn't create %s QP (%d)\n",
764 create_tun ? "tunnel" : "special", ret);
765 return ret;
766 }
767
768 memset(&attr, 0, sizeof attr);
769 attr.qp_state = IB_QPS_INIT;
770 attr.pkey_index =
771 to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
772 attr.qkey = IB_QP1_QKEY;
773 attr.port_num = ctx->port;
774 ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
775 if (ret) {
776 pr_err("Couldn't change %s qp state to INIT (%d)\n",
777 create_tun ? "tunnel" : "special", ret);
778 goto err_qp;
779 }
780 attr.qp_state = IB_QPS_RTR;
781 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
782 if (ret) {
783 pr_err("Couldn't change %s qp state to RTR (%d)\n",
784 create_tun ? "tunnel" : "special", ret);
785 goto err_qp;
786 }
787 attr.qp_state = IB_QPS_RTS;
788 attr.sq_psn = 0;
789 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
790 if (ret) {
791 pr_err("Couldn't change %s qp state to RTS (%d)\n",
792 create_tun ? "tunnel" : "special", ret);
793 goto err_qp;
794 }
795
796 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
797 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
798 if (ret) {
799 pr_err(" mlx4_ib_post_pv_buf error"
800 " (err = %d, i = %d)\n", ret, i);
801 goto err_qp;
802 }
803 }
804 return 0;
805
806err_qp:
807 ib_destroy_qp(tun_qp->qp);
808 tun_qp->qp = NULL;
809 return ret;
810}
811
812/*
813 * IB MAD completion callback for real SQPs
814 */
815static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
816{
817 /* dummy until next patch in series */
818}
819
820static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
821 struct mlx4_ib_demux_pv_ctx **ret_ctx)
822{
823 struct mlx4_ib_demux_pv_ctx *ctx;
824
825 *ret_ctx = NULL;
826 ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
827 if (!ctx) {
828 pr_err("failed allocating pv resource context "
829 "for port %d, slave %d\n", port, slave);
830 return -ENOMEM;
831 }
832
833 ctx->ib_dev = &dev->ib_dev;
834 ctx->port = port;
835 ctx->slave = slave;
836 *ret_ctx = ctx;
837 return 0;
838}
839
840static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
841{
842 if (dev->sriov.demux[port - 1].tun[slave]) {
843 kfree(dev->sriov.demux[port - 1].tun[slave]);
844 dev->sriov.demux[port - 1].tun[slave] = NULL;
845 }
846}
847
848static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
849 int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
850{
851 int ret, cq_size;
852
853 ctx->state = DEMUX_PV_STATE_STARTING;
854 /* have QP0 only on port owner, and only if link layer is IB */
855 if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
856 rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
857 ctx->has_smi = 1;
858
859 if (ctx->has_smi) {
860 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
861 if (ret) {
862 pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
863 goto err_out;
864 }
865 }
866
867 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
868 if (ret) {
869 pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
870 goto err_out_qp0;
871 }
872
873 cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
874 if (ctx->has_smi)
875 cq_size *= 2;
876
877 ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
878 NULL, ctx, cq_size, 0);
879 if (IS_ERR(ctx->cq)) {
880 ret = PTR_ERR(ctx->cq);
881 pr_err("Couldn't create tunnel CQ (%d)\n", ret);
882 goto err_buf;
883 }
884
885 ctx->pd = ib_alloc_pd(ctx->ib_dev);
886 if (IS_ERR(ctx->pd)) {
887 ret = PTR_ERR(ctx->pd);
888 pr_err("Couldn't create tunnel PD (%d)\n", ret);
889 goto err_cq;
890 }
891
892 ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
893 if (IS_ERR(ctx->mr)) {
894 ret = PTR_ERR(ctx->mr);
895 pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
896 goto err_pd;
897 }
898
899 if (ctx->has_smi) {
900 ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
901 if (ret) {
902 pr_err("Couldn't create %s QP0 (%d)\n",
903 create_tun ? "tunnel for" : "", ret);
904 goto err_mr;
905 }
906 }
907
908 ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
909 if (ret) {
910 pr_err("Couldn't create %s QP1 (%d)\n",
911 create_tun ? "tunnel for" : "", ret);
912 goto err_qp0;
913 }
914
915 if (create_tun)
916 INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
917 else
918 INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
919
920 ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
921
922 ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
923 if (ret) {
924 pr_err("Couldn't arm tunnel cq (%d)\n", ret);
925 goto err_wq;
926 }
927 ctx->state = DEMUX_PV_STATE_ACTIVE;
928 return 0;
929
930err_wq:
931 ctx->wq = NULL;
932 ib_destroy_qp(ctx->qp[1].qp);
933 ctx->qp[1].qp = NULL;
934
935
936err_qp0:
937 if (ctx->has_smi)
938 ib_destroy_qp(ctx->qp[0].qp);
939 ctx->qp[0].qp = NULL;
940
941err_mr:
942 ib_dereg_mr(ctx->mr);
943 ctx->mr = NULL;
944
945err_pd:
946 ib_dealloc_pd(ctx->pd);
947 ctx->pd = NULL;
948
949err_cq:
950 ib_destroy_cq(ctx->cq);
951 ctx->cq = NULL;
952
953err_buf:
954 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
955
956err_out_qp0:
957 if (ctx->has_smi)
958 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
959err_out:
960 ctx->state = DEMUX_PV_STATE_DOWN;
961 return ret;
962}
963
964static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
965 struct mlx4_ib_demux_pv_ctx *ctx, int flush)
966{
967 if (!ctx)
968 return;
969 if (ctx->state > DEMUX_PV_STATE_DOWN) {
970 ctx->state = DEMUX_PV_STATE_DOWNING;
971 if (flush)
972 flush_workqueue(ctx->wq);
973 if (ctx->has_smi) {
974 ib_destroy_qp(ctx->qp[0].qp);
975 ctx->qp[0].qp = NULL;
976 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
977 }
978 ib_destroy_qp(ctx->qp[1].qp);
979 ctx->qp[1].qp = NULL;
980 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
981 ib_dereg_mr(ctx->mr);
982 ctx->mr = NULL;
983 ib_dealloc_pd(ctx->pd);
984 ctx->pd = NULL;
985 ib_destroy_cq(ctx->cq);
986 ctx->cq = NULL;
987 ctx->state = DEMUX_PV_STATE_DOWN;
988 }
989}
990
991static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
992 int port, int do_init)
993{
994 int ret = 0;
995
996 if (!do_init) {
997 /* for master, destroy real sqp resources */
998 if (slave == mlx4_master_func_num(dev->dev))
999 destroy_pv_resources(dev, slave, port,
1000 dev->sriov.sqps[port - 1], 1);
1001 /* destroy the tunnel qp resources */
1002 destroy_pv_resources(dev, slave, port,
1003 dev->sriov.demux[port - 1].tun[slave], 1);
1004 return 0;
1005 }
1006
1007 /* create the tunnel qp resources */
1008 ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
1009 dev->sriov.demux[port - 1].tun[slave]);
1010
1011 /* for master, create the real sqp resources */
1012 if (!ret && slave == mlx4_master_func_num(dev->dev))
1013 ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
1014 dev->sriov.sqps[port - 1]);
1015 return ret;
1016}
1017
1018void mlx4_ib_tunnels_update_work(struct work_struct *work)
1019{
1020 struct mlx4_ib_demux_work *dmxw;
1021
1022 dmxw = container_of(work, struct mlx4_ib_demux_work, work);
1023 mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
1024 dmxw->do_init);
1025 kfree(dmxw);
1026 return;
1027}
1028
1029static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
1030 struct mlx4_ib_demux_ctx *ctx,
1031 int port)
1032{
1033 char name[12];
1034 int ret = 0;
1035 int i;
1036
1037 ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
1038 sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
1039 if (!ctx->tun)
1040 return -ENOMEM;
1041
1042 ctx->dev = dev;
1043 ctx->port = port;
1044 ctx->ib_dev = &dev->ib_dev;
1045
1046 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1047 ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
1048 if (ret) {
1049 ret = -ENOMEM;
1050 goto err_wq;
1051 }
1052 }
1053
1054 snprintf(name, sizeof name, "mlx4_ibt%d", port);
1055 ctx->wq = create_singlethread_workqueue(name);
1056 if (!ctx->wq) {
1057 pr_err("Failed to create tunnelling WQ for port %d\n", port);
1058 ret = -ENOMEM;
1059 goto err_wq;
1060 }
1061
1062 snprintf(name, sizeof name, "mlx4_ibud%d", port);
1063 ctx->ud_wq = create_singlethread_workqueue(name);
1064 if (!ctx->ud_wq) {
1065 pr_err("Failed to create up/down WQ for port %d\n", port);
1066 ret = -ENOMEM;
1067 goto err_udwq;
1068 }
1069
1070 return 0;
1071
1072err_udwq:
1073 destroy_workqueue(ctx->wq);
1074 ctx->wq = NULL;
1075
1076err_wq:
1077 for (i = 0; i < dev->dev->caps.sqp_demux; i++)
1078 free_pv_object(dev, i, port);
1079 kfree(ctx->tun);
1080 ctx->tun = NULL;
1081 return ret;
1082}
1083
1084static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
1085{
1086 if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
1087 sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
1088 flush_workqueue(sqp_ctx->wq);
1089 if (sqp_ctx->has_smi) {
1090 ib_destroy_qp(sqp_ctx->qp[0].qp);
1091 sqp_ctx->qp[0].qp = NULL;
1092 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
1093 }
1094 ib_destroy_qp(sqp_ctx->qp[1].qp);
1095 sqp_ctx->qp[1].qp = NULL;
1096 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
1097 ib_dereg_mr(sqp_ctx->mr);
1098 sqp_ctx->mr = NULL;
1099 ib_dealloc_pd(sqp_ctx->pd);
1100 sqp_ctx->pd = NULL;
1101 ib_destroy_cq(sqp_ctx->cq);
1102 sqp_ctx->cq = NULL;
1103 sqp_ctx->state = DEMUX_PV_STATE_DOWN;
1104 }
1105}
1106
1107static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
1108{
1109 int i;
1110 if (ctx) {
1111 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1112 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1113 if (!ctx->tun[i])
1114 continue;
1115 if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
1116 ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
1117 }
1118 flush_workqueue(ctx->wq);
1119 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1120 destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
1121 free_pv_object(dev, i, ctx->port);
1122 }
1123 kfree(ctx->tun);
1124 destroy_workqueue(ctx->ud_wq);
1125 destroy_workqueue(ctx->wq);
1126 }
1127}
1128
1129static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
1130{
1131 int i;
1132
1133 if (!mlx4_is_master(dev->dev))
1134 return;
1135 /* initialize or tear down tunnel QPs for the master */
1136 for (i = 0; i < dev->dev->caps.num_ports; i++)
1137 mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
1138 return;
1139}
1140
1141int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
1142{
1143 int i = 0;
1144 int err;
1145
1146 if (!mlx4_is_mfunc(dev->dev))
1147 return 0;
1148
1149 dev->sriov.is_going_down = 0;
1150 spin_lock_init(&dev->sriov.going_down_lock);
1151
1152 mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
1153
1154 if (mlx4_is_slave(dev->dev)) {
1155 mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
1156 return 0;
1157 }
1158
1159 mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
1160 dev->dev->caps.sqp_demux);
1161 for (i = 0; i < dev->num_ports; i++) {
1162 err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
1163 &dev->sriov.sqps[i]);
1164 if (err)
1165 goto demux_err;
1166 err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
1167 if (err)
1168 goto demux_err;
1169 }
1170 mlx4_ib_master_tunnels(dev, 1);
1171 return 0;
1172
1173demux_err:
1174 while (i > 0) {
1175 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
1176 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
1177 --i;
1178 }
1179
1180 return err;
1181}
1182
1183void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
1184{
1185 int i;
1186 unsigned long flags;
1187
1188 if (!mlx4_is_mfunc(dev->dev))
1189 return;
1190
1191 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
1192 dev->sriov.is_going_down = 1;
1193 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
1194 if (mlx4_is_master(dev->dev))
1195 for (i = 0; i < dev->num_ports; i++) {
1196 flush_workqueue(dev->sriov.demux[i].ud_wq);
1197 mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
1198 kfree(dev->sriov.sqps[i]);
1199 dev->sriov.sqps[i] = NULL;
1200 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
1201 }
1202}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index cc05579ebce7..3f7f77f93a1c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1357,11 +1357,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1357 if (mlx4_ib_mad_init(ibdev)) 1357 if (mlx4_ib_mad_init(ibdev))
1358 goto err_reg; 1358 goto err_reg;
1359 1359
1360 if (mlx4_ib_init_sriov(ibdev))
1361 goto err_mad;
1362
1360 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { 1363 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
1361 iboe->nb.notifier_call = mlx4_ib_netdev_event; 1364 iboe->nb.notifier_call = mlx4_ib_netdev_event;
1362 err = register_netdevice_notifier(&iboe->nb); 1365 err = register_netdevice_notifier(&iboe->nb);
1363 if (err) 1366 if (err)
1364 goto err_reg; 1367 goto err_sriov;
1365 } 1368 }
1366 1369
1367 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { 1370 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -1379,6 +1382,12 @@ err_notif:
1379 pr_warn("failure unregistering notifier\n"); 1382 pr_warn("failure unregistering notifier\n");
1380 flush_workqueue(wq); 1383 flush_workqueue(wq);
1381 1384
1385err_sriov:
1386 mlx4_ib_close_sriov(ibdev);
1387
1388err_mad:
1389 mlx4_ib_mad_cleanup(ibdev);
1390
1382err_reg: 1391err_reg:
1383 ib_unregister_device(&ibdev->ib_dev); 1392 ib_unregister_device(&ibdev->ib_dev);
1384 1393
@@ -1407,6 +1416,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1407 struct mlx4_ib_dev *ibdev = ibdev_ptr; 1416 struct mlx4_ib_dev *ibdev = ibdev_ptr;
1408 int p; 1417 int p;
1409 1418
1419 mlx4_ib_close_sriov(ibdev);
1410 mlx4_ib_mad_cleanup(ibdev); 1420 mlx4_ib_mad_cleanup(ibdev);
1411 ib_unregister_device(&ibdev->ib_dev); 1421 ib_unregister_device(&ibdev->ib_dev);
1412 if (ibdev->iboe.nb.notifier_call) { 1422 if (ibdev->iboe.nb.notifier_call) {
@@ -1428,6 +1438,51 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1428 ib_dealloc_device(&ibdev->ib_dev); 1438 ib_dealloc_device(&ibdev->ib_dev);
1429} 1439}
1430 1440
1441static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
1442{
1443 struct mlx4_ib_demux_work **dm = NULL;
1444 struct mlx4_dev *dev = ibdev->dev;
1445 int i;
1446 unsigned long flags;
1447
1448 if (!mlx4_is_master(dev))
1449 return;
1450
1451 dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
1452 if (!dm) {
1453 pr_err("failed to allocate memory for tunneling qp update\n");
1454 goto out;
1455 }
1456
1457 for (i = 0; i < dev->caps.num_ports; i++) {
1458 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
1459 if (!dm[i]) {
1460 pr_err("failed to allocate memory for tunneling qp update work struct\n");
1461 for (i = 0; i < dev->caps.num_ports; i++) {
1462 if (dm[i])
1463 kfree(dm[i]);
1464 }
1465 goto out;
1466 }
1467 }
1468 /* initialize or tear down tunnel QPs for the slave */
1469 for (i = 0; i < dev->caps.num_ports; i++) {
1470 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
1471 dm[i]->port = i + 1;
1472 dm[i]->slave = slave;
1473 dm[i]->do_init = do_init;
1474 dm[i]->dev = ibdev;
1475 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
1476 if (!ibdev->sriov.is_going_down)
1477 queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
1478 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
1479 }
1480out:
1481 if (dm)
1482 kfree(dm);
1483 return;
1484}
1485
1431static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, 1486static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1432 enum mlx4_dev_event event, unsigned long param) 1487 enum mlx4_dev_event event, unsigned long param)
1433{ 1488{
@@ -1435,22 +1490,23 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1435 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); 1490 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
1436 struct mlx4_eqe *eqe = NULL; 1491 struct mlx4_eqe *eqe = NULL;
1437 struct ib_event_work *ew; 1492 struct ib_event_work *ew;
1438 int port = 0; 1493 int p = 0;
1439 1494
1440 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) 1495 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
1441 eqe = (struct mlx4_eqe *)param; 1496 eqe = (struct mlx4_eqe *)param;
1442 else 1497 else
1443 port = (u8)param; 1498 p = (int) param;
1444
1445 if (port > ibdev->num_ports)
1446 return;
1447 1499
1448 switch (event) { 1500 switch (event) {
1449 case MLX4_DEV_EVENT_PORT_UP: 1501 case MLX4_DEV_EVENT_PORT_UP:
1502 if (p > ibdev->num_ports)
1503 return;
1450 ibev.event = IB_EVENT_PORT_ACTIVE; 1504 ibev.event = IB_EVENT_PORT_ACTIVE;
1451 break; 1505 break;
1452 1506
1453 case MLX4_DEV_EVENT_PORT_DOWN: 1507 case MLX4_DEV_EVENT_PORT_DOWN:
1508 if (p > ibdev->num_ports)
1509 return;
1454 ibev.event = IB_EVENT_PORT_ERR; 1510 ibev.event = IB_EVENT_PORT_ERR;
1455 break; 1511 break;
1456 1512
@@ -1472,12 +1528,22 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1472 handle_port_mgmt_change_event(&ew->work); 1528 handle_port_mgmt_change_event(&ew->work);
1473 return; 1529 return;
1474 1530
1531 case MLX4_DEV_EVENT_SLAVE_INIT:
1532 /* here, p is the slave id */
1533 do_slave_init(ibdev, p, 1);
1534 return;
1535
1536 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
1537 /* here, p is the slave id */
1538 do_slave_init(ibdev, p, 0);
1539 return;
1540
1475 default: 1541 default:
1476 return; 1542 return;
1477 } 1543 }
1478 1544
1479 ibev.device = ibdev_ptr; 1545 ibev.device = ibdev_ptr;
1480 ibev.element.port_num = port; 1546 ibev.element.port_num = (u8) p;
1481 1547
1482 ib_dispatch_event(&ibev); 1548 ib_dispatch_event(&ibev);
1483} 1549}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1248d576b031..137941d79870 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -176,6 +176,10 @@ enum mlx4_ib_qp_type {
176 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ 176 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
177 MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) 177 MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
178 178
179enum {
180 MLX4_NUM_TUNNEL_BUFS = 256,
181};
182
179struct mlx4_ib_tunnel_header { 183struct mlx4_ib_tunnel_header {
180 struct mlx4_av av; 184 struct mlx4_av av;
181 __be32 remote_qpn; 185 __be32 remote_qpn;
@@ -263,6 +267,15 @@ struct mlx4_ib_ah {
263 union mlx4_ext_av av; 267 union mlx4_ext_av av;
264}; 268};
265 269
270struct mlx4_ib_demux_work {
271 struct work_struct work;
272 struct mlx4_ib_dev *dev;
273 int slave;
274 int do_init;
275 u8 port;
276
277};
278
266struct mlx4_ib_tun_tx_buf { 279struct mlx4_ib_tun_tx_buf {
267 struct mlx4_ib_buf buf; 280 struct mlx4_ib_buf buf;
268 struct ib_ah *ah; 281 struct ib_ah *ah;
@@ -278,9 +291,17 @@ struct mlx4_ib_demux_pv_qp {
278 unsigned tx_ix_tail; 291 unsigned tx_ix_tail;
279}; 292};
280 293
294enum mlx4_ib_demux_pv_state {
295 DEMUX_PV_STATE_DOWN,
296 DEMUX_PV_STATE_STARTING,
297 DEMUX_PV_STATE_ACTIVE,
298 DEMUX_PV_STATE_DOWNING,
299};
300
281struct mlx4_ib_demux_pv_ctx { 301struct mlx4_ib_demux_pv_ctx {
282 int port; 302 int port;
283 int slave; 303 int slave;
304 enum mlx4_ib_demux_pv_state state;
284 int has_smi; 305 int has_smi;
285 struct ib_device *ib_dev; 306 struct ib_device *ib_dev;
286 struct ib_cq *cq; 307 struct ib_cq *cq;
@@ -319,6 +340,13 @@ struct mlx4_ib_iboe {
319 union ib_gid gid_table[MLX4_MAX_PORTS][128]; 340 union ib_gid gid_table[MLX4_MAX_PORTS][128];
320}; 341};
321 342
343struct pkey_mgt {
344 u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
345 u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
346 struct list_head pkey_port_list[MLX4_MFUNC_MAX];
347 struct kobject *device_parent[MLX4_MFUNC_MAX];
348};
349
322struct mlx4_ib_dev { 350struct mlx4_ib_dev {
323 struct ib_device ib_dev; 351 struct ib_device ib_dev;
324 struct mlx4_dev *dev; 352 struct mlx4_dev *dev;
@@ -340,6 +368,7 @@ struct mlx4_ib_dev {
340 int counters[MLX4_MAX_PORTS]; 368 int counters[MLX4_MAX_PORTS];
341 int *eq_table; 369 int *eq_table;
342 int eq_added; 370 int eq_added;
371 struct pkey_mgt pkeys;
343}; 372};
344 373
345struct ib_event_work { 374struct ib_event_work {
@@ -424,6 +453,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
424 return container_of(ibah, struct mlx4_ib_ah, ibah); 453 return container_of(ibah, struct mlx4_ib_ah, ibah);
425} 454}
426 455
456int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
457void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
458
427int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, 459int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
428 struct mlx4_db *db); 460 struct mlx4_db *db);
429void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); 461void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@@ -515,4 +547,6 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
515void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, 547void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
516 enum ib_event_type type); 548 enum ib_event_type type);
517 549
550void mlx4_ib_tunnels_update_work(struct work_struct *work);
551
518#endif /* MLX4_IB_H */ 552#endif /* MLX4_IB_H */