aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c684
-rw-r--r--drivers/infiniband/hw/mlx4/main.c80
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h34
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c3
-rw-r--r--include/linux/mlx4/device.h3
-rw-r--r--include/linux/mlx4/driver.h2
6 files changed, 798 insertions, 8 deletions
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 9c2ae7efd00f..e98849338a94 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -44,6 +44,35 @@ enum {
44 MLX4_IB_VENDOR_CLASS2 = 0xa 44 MLX4_IB_VENDOR_CLASS2 = 0xa
45}; 45};
46 46
47#define MLX4_TUN_SEND_WRID_SHIFT 34
48#define MLX4_TUN_QPN_SHIFT 32
49#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
50#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
51
52#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
53#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
54
55struct mlx4_mad_rcv_buf {
56 struct ib_grh grh;
57 u8 payload[256];
58} __packed;
59
60struct mlx4_mad_snd_buf {
61 u8 payload[256];
62} __packed;
63
64struct mlx4_tunnel_mad {
65 struct ib_grh grh;
66 struct mlx4_ib_tunnel_header hdr;
67 struct ib_mad mad;
68} __packed;
69
70struct mlx4_rcv_tunnel_mad {
71 struct mlx4_rcv_tunnel_hdr hdr;
72 struct ib_grh grh;
73 struct ib_mad mad;
74} __packed;
75
47int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, 76int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
48 int port, struct ib_wc *in_wc, struct ib_grh *in_grh, 77 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
49 void *in_mad, void *response_mad) 78 void *in_mad, void *response_mad)
@@ -516,3 +545,658 @@ void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
516 545
517 ib_dispatch_event(&event); 546 ib_dispatch_event(&event);
518} 547}
548
549static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
550{
551 unsigned long flags;
552 struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
553 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
554 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
555 if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
556 queue_work(ctx->wq, &ctx->work);
557 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
558}
559
560static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
561 struct mlx4_ib_demux_pv_qp *tun_qp,
562 int index)
563{
564 struct ib_sge sg_list;
565 struct ib_recv_wr recv_wr, *bad_recv_wr;
566 int size;
567
568 size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
569 sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
570
571 sg_list.addr = tun_qp->ring[index].map;
572 sg_list.length = size;
573 sg_list.lkey = ctx->mr->lkey;
574
575 recv_wr.next = NULL;
576 recv_wr.sg_list = &sg_list;
577 recv_wr.num_sge = 1;
578 recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
579 MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
580 ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
581 size, DMA_FROM_DEVICE);
582 return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
583}
584
585static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
586 enum ib_qp_type qp_type, int is_tun)
587{
588 int i;
589 struct mlx4_ib_demux_pv_qp *tun_qp;
590 int rx_buf_size, tx_buf_size;
591
592 if (qp_type > IB_QPT_GSI)
593 return -EINVAL;
594
595 tun_qp = &ctx->qp[qp_type];
596
597 tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
598 GFP_KERNEL);
599 if (!tun_qp->ring)
600 return -ENOMEM;
601
602 tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
603 sizeof (struct mlx4_ib_tun_tx_buf),
604 GFP_KERNEL);
605 if (!tun_qp->tx_ring) {
606 kfree(tun_qp->ring);
607 tun_qp->ring = NULL;
608 return -ENOMEM;
609 }
610
611 if (is_tun) {
612 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
613 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
614 } else {
615 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
616 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
617 }
618
619 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
620 tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
621 if (!tun_qp->ring[i].addr)
622 goto err;
623 tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
624 tun_qp->ring[i].addr,
625 rx_buf_size,
626 DMA_FROM_DEVICE);
627 }
628
629 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
630 tun_qp->tx_ring[i].buf.addr =
631 kmalloc(tx_buf_size, GFP_KERNEL);
632 if (!tun_qp->tx_ring[i].buf.addr)
633 goto tx_err;
634 tun_qp->tx_ring[i].buf.map =
635 ib_dma_map_single(ctx->ib_dev,
636 tun_qp->tx_ring[i].buf.addr,
637 tx_buf_size,
638 DMA_TO_DEVICE);
639 tun_qp->tx_ring[i].ah = NULL;
640 }
641 spin_lock_init(&tun_qp->tx_lock);
642 tun_qp->tx_ix_head = 0;
643 tun_qp->tx_ix_tail = 0;
644 tun_qp->proxy_qpt = qp_type;
645
646 return 0;
647
648tx_err:
649 while (i > 0) {
650 --i;
651 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
652 tx_buf_size, DMA_TO_DEVICE);
653 kfree(tun_qp->tx_ring[i].buf.addr);
654 }
655 kfree(tun_qp->tx_ring);
656 tun_qp->tx_ring = NULL;
657 i = MLX4_NUM_TUNNEL_BUFS;
658err:
659 while (i > 0) {
660 --i;
661 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
662 rx_buf_size, DMA_FROM_DEVICE);
663 kfree(tun_qp->ring[i].addr);
664 }
665 kfree(tun_qp->ring);
666 tun_qp->ring = NULL;
667 return -ENOMEM;
668}
669
670static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
671 enum ib_qp_type qp_type, int is_tun)
672{
673 int i;
674 struct mlx4_ib_demux_pv_qp *tun_qp;
675 int rx_buf_size, tx_buf_size;
676
677 if (qp_type > IB_QPT_GSI)
678 return;
679
680 tun_qp = &ctx->qp[qp_type];
681 if (is_tun) {
682 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
683 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
684 } else {
685 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
686 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
687 }
688
689
690 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
691 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
692 rx_buf_size, DMA_FROM_DEVICE);
693 kfree(tun_qp->ring[i].addr);
694 }
695
696 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
697 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
698 tx_buf_size, DMA_TO_DEVICE);
699 kfree(tun_qp->tx_ring[i].buf.addr);
700 if (tun_qp->tx_ring[i].ah)
701 ib_destroy_ah(tun_qp->tx_ring[i].ah);
702 }
703 kfree(tun_qp->tx_ring);
704 kfree(tun_qp->ring);
705}
706
707static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
708{
709 /* dummy until next patch in series */
710}
711
712static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
713{
714 struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
715
716 /* It's worse than that! He's dead, Jim! */
717 pr_err("Fatal error (%d) on a MAD QP on port %d\n",
718 event->event, sqp->port);
719}
720
721static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
722 enum ib_qp_type qp_type, int create_tun)
723{
724 int i, ret;
725 struct mlx4_ib_demux_pv_qp *tun_qp;
726 struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
727 struct ib_qp_attr attr;
728 int qp_attr_mask_INIT;
729
730 if (qp_type > IB_QPT_GSI)
731 return -EINVAL;
732
733 tun_qp = &ctx->qp[qp_type];
734
735 memset(&qp_init_attr, 0, sizeof qp_init_attr);
736 qp_init_attr.init_attr.send_cq = ctx->cq;
737 qp_init_attr.init_attr.recv_cq = ctx->cq;
738 qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
739 qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
740 qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
741 qp_init_attr.init_attr.cap.max_send_sge = 1;
742 qp_init_attr.init_attr.cap.max_recv_sge = 1;
743 if (create_tun) {
744 qp_init_attr.init_attr.qp_type = IB_QPT_UD;
745 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP;
746 qp_init_attr.port = ctx->port;
747 qp_init_attr.slave = ctx->slave;
748 qp_init_attr.proxy_qp_type = qp_type;
749 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
750 IB_QP_QKEY | IB_QP_PORT;
751 } else {
752 qp_init_attr.init_attr.qp_type = qp_type;
753 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP;
754 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
755 }
756 qp_init_attr.init_attr.port_num = ctx->port;
757 qp_init_attr.init_attr.qp_context = ctx;
758 qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
759 tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
760 if (IS_ERR(tun_qp->qp)) {
761 ret = PTR_ERR(tun_qp->qp);
762 tun_qp->qp = NULL;
763 pr_err("Couldn't create %s QP (%d)\n",
764 create_tun ? "tunnel" : "special", ret);
765 return ret;
766 }
767
768 memset(&attr, 0, sizeof attr);
769 attr.qp_state = IB_QPS_INIT;
770 attr.pkey_index =
771 to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
772 attr.qkey = IB_QP1_QKEY;
773 attr.port_num = ctx->port;
774 ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
775 if (ret) {
776 pr_err("Couldn't change %s qp state to INIT (%d)\n",
777 create_tun ? "tunnel" : "special", ret);
778 goto err_qp;
779 }
780 attr.qp_state = IB_QPS_RTR;
781 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
782 if (ret) {
783 pr_err("Couldn't change %s qp state to RTR (%d)\n",
784 create_tun ? "tunnel" : "special", ret);
785 goto err_qp;
786 }
787 attr.qp_state = IB_QPS_RTS;
788 attr.sq_psn = 0;
789 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
790 if (ret) {
791 pr_err("Couldn't change %s qp state to RTS (%d)\n",
792 create_tun ? "tunnel" : "special", ret);
793 goto err_qp;
794 }
795
796 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
797 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
798 if (ret) {
799 pr_err(" mlx4_ib_post_pv_buf error"
800 " (err = %d, i = %d)\n", ret, i);
801 goto err_qp;
802 }
803 }
804 return 0;
805
806err_qp:
807 ib_destroy_qp(tun_qp->qp);
808 tun_qp->qp = NULL;
809 return ret;
810}
811
812/*
813 * IB MAD completion callback for real SQPs
814 */
815static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
816{
817 /* dummy until next patch in series */
818}
819
820static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
821 struct mlx4_ib_demux_pv_ctx **ret_ctx)
822{
823 struct mlx4_ib_demux_pv_ctx *ctx;
824
825 *ret_ctx = NULL;
826 ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
827 if (!ctx) {
828 pr_err("failed allocating pv resource context "
829 "for port %d, slave %d\n", port, slave);
830 return -ENOMEM;
831 }
832
833 ctx->ib_dev = &dev->ib_dev;
834 ctx->port = port;
835 ctx->slave = slave;
836 *ret_ctx = ctx;
837 return 0;
838}
839
840static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
841{
842 if (dev->sriov.demux[port - 1].tun[slave]) {
843 kfree(dev->sriov.demux[port - 1].tun[slave]);
844 dev->sriov.demux[port - 1].tun[slave] = NULL;
845 }
846}
847
848static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
849 int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
850{
851 int ret, cq_size;
852
853 ctx->state = DEMUX_PV_STATE_STARTING;
854 /* have QP0 only on port owner, and only if link layer is IB */
855 if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
856 rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
857 ctx->has_smi = 1;
858
859 if (ctx->has_smi) {
860 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
861 if (ret) {
862 pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
863 goto err_out;
864 }
865 }
866
867 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
868 if (ret) {
869 pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
870 goto err_out_qp0;
871 }
872
873 cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
874 if (ctx->has_smi)
875 cq_size *= 2;
876
877 ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
878 NULL, ctx, cq_size, 0);
879 if (IS_ERR(ctx->cq)) {
880 ret = PTR_ERR(ctx->cq);
881 pr_err("Couldn't create tunnel CQ (%d)\n", ret);
882 goto err_buf;
883 }
884
885 ctx->pd = ib_alloc_pd(ctx->ib_dev);
886 if (IS_ERR(ctx->pd)) {
887 ret = PTR_ERR(ctx->pd);
888 pr_err("Couldn't create tunnel PD (%d)\n", ret);
889 goto err_cq;
890 }
891
892 ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
893 if (IS_ERR(ctx->mr)) {
894 ret = PTR_ERR(ctx->mr);
895 pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
896 goto err_pd;
897 }
898
899 if (ctx->has_smi) {
900 ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
901 if (ret) {
902 pr_err("Couldn't create %s QP0 (%d)\n",
903 create_tun ? "tunnel for" : "", ret);
904 goto err_mr;
905 }
906 }
907
908 ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
909 if (ret) {
910 pr_err("Couldn't create %s QP1 (%d)\n",
911 create_tun ? "tunnel for" : "", ret);
912 goto err_qp0;
913 }
914
915 if (create_tun)
916 INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
917 else
918 INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
919
920 ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
921
922 ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
923 if (ret) {
924 pr_err("Couldn't arm tunnel cq (%d)\n", ret);
925 goto err_wq;
926 }
927 ctx->state = DEMUX_PV_STATE_ACTIVE;
928 return 0;
929
930err_wq:
931 ctx->wq = NULL;
932 ib_destroy_qp(ctx->qp[1].qp);
933 ctx->qp[1].qp = NULL;
934
935
936err_qp0:
937 if (ctx->has_smi)
938 ib_destroy_qp(ctx->qp[0].qp);
939 ctx->qp[0].qp = NULL;
940
941err_mr:
942 ib_dereg_mr(ctx->mr);
943 ctx->mr = NULL;
944
945err_pd:
946 ib_dealloc_pd(ctx->pd);
947 ctx->pd = NULL;
948
949err_cq:
950 ib_destroy_cq(ctx->cq);
951 ctx->cq = NULL;
952
953err_buf:
954 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
955
956err_out_qp0:
957 if (ctx->has_smi)
958 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
959err_out:
960 ctx->state = DEMUX_PV_STATE_DOWN;
961 return ret;
962}
963
964static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
965 struct mlx4_ib_demux_pv_ctx *ctx, int flush)
966{
967 if (!ctx)
968 return;
969 if (ctx->state > DEMUX_PV_STATE_DOWN) {
970 ctx->state = DEMUX_PV_STATE_DOWNING;
971 if (flush)
972 flush_workqueue(ctx->wq);
973 if (ctx->has_smi) {
974 ib_destroy_qp(ctx->qp[0].qp);
975 ctx->qp[0].qp = NULL;
976 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
977 }
978 ib_destroy_qp(ctx->qp[1].qp);
979 ctx->qp[1].qp = NULL;
980 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
981 ib_dereg_mr(ctx->mr);
982 ctx->mr = NULL;
983 ib_dealloc_pd(ctx->pd);
984 ctx->pd = NULL;
985 ib_destroy_cq(ctx->cq);
986 ctx->cq = NULL;
987 ctx->state = DEMUX_PV_STATE_DOWN;
988 }
989}
990
991static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
992 int port, int do_init)
993{
994 int ret = 0;
995
996 if (!do_init) {
997 /* for master, destroy real sqp resources */
998 if (slave == mlx4_master_func_num(dev->dev))
999 destroy_pv_resources(dev, slave, port,
1000 dev->sriov.sqps[port - 1], 1);
1001 /* destroy the tunnel qp resources */
1002 destroy_pv_resources(dev, slave, port,
1003 dev->sriov.demux[port - 1].tun[slave], 1);
1004 return 0;
1005 }
1006
1007 /* create the tunnel qp resources */
1008 ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
1009 dev->sriov.demux[port - 1].tun[slave]);
1010
1011 /* for master, create the real sqp resources */
1012 if (!ret && slave == mlx4_master_func_num(dev->dev))
1013 ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
1014 dev->sriov.sqps[port - 1]);
1015 return ret;
1016}
1017
1018void mlx4_ib_tunnels_update_work(struct work_struct *work)
1019{
1020 struct mlx4_ib_demux_work *dmxw;
1021
1022 dmxw = container_of(work, struct mlx4_ib_demux_work, work);
1023 mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
1024 dmxw->do_init);
1025 kfree(dmxw);
1026 return;
1027}
1028
1029static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
1030 struct mlx4_ib_demux_ctx *ctx,
1031 int port)
1032{
1033 char name[12];
1034 int ret = 0;
1035 int i;
1036
1037 ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
1038 sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
1039 if (!ctx->tun)
1040 return -ENOMEM;
1041
1042 ctx->dev = dev;
1043 ctx->port = port;
1044 ctx->ib_dev = &dev->ib_dev;
1045
1046 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1047 ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
1048 if (ret) {
1049 ret = -ENOMEM;
1050 goto err_wq;
1051 }
1052 }
1053
1054 snprintf(name, sizeof name, "mlx4_ibt%d", port);
1055 ctx->wq = create_singlethread_workqueue(name);
1056 if (!ctx->wq) {
1057 pr_err("Failed to create tunnelling WQ for port %d\n", port);
1058 ret = -ENOMEM;
1059 goto err_wq;
1060 }
1061
1062 snprintf(name, sizeof name, "mlx4_ibud%d", port);
1063 ctx->ud_wq = create_singlethread_workqueue(name);
1064 if (!ctx->ud_wq) {
1065 pr_err("Failed to create up/down WQ for port %d\n", port);
1066 ret = -ENOMEM;
1067 goto err_udwq;
1068 }
1069
1070 return 0;
1071
1072err_udwq:
1073 destroy_workqueue(ctx->wq);
1074 ctx->wq = NULL;
1075
1076err_wq:
1077 for (i = 0; i < dev->dev->caps.sqp_demux; i++)
1078 free_pv_object(dev, i, port);
1079 kfree(ctx->tun);
1080 ctx->tun = NULL;
1081 return ret;
1082}
1083
1084static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
1085{
1086 if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
1087 sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
1088 flush_workqueue(sqp_ctx->wq);
1089 if (sqp_ctx->has_smi) {
1090 ib_destroy_qp(sqp_ctx->qp[0].qp);
1091 sqp_ctx->qp[0].qp = NULL;
1092 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
1093 }
1094 ib_destroy_qp(sqp_ctx->qp[1].qp);
1095 sqp_ctx->qp[1].qp = NULL;
1096 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
1097 ib_dereg_mr(sqp_ctx->mr);
1098 sqp_ctx->mr = NULL;
1099 ib_dealloc_pd(sqp_ctx->pd);
1100 sqp_ctx->pd = NULL;
1101 ib_destroy_cq(sqp_ctx->cq);
1102 sqp_ctx->cq = NULL;
1103 sqp_ctx->state = DEMUX_PV_STATE_DOWN;
1104 }
1105}
1106
1107static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
1108{
1109 int i;
1110 if (ctx) {
1111 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1112 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1113 if (!ctx->tun[i])
1114 continue;
1115 if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
1116 ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
1117 }
1118 flush_workqueue(ctx->wq);
1119 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1120 destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
1121 free_pv_object(dev, i, ctx->port);
1122 }
1123 kfree(ctx->tun);
1124 destroy_workqueue(ctx->ud_wq);
1125 destroy_workqueue(ctx->wq);
1126 }
1127}
1128
1129static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
1130{
1131 int i;
1132
1133 if (!mlx4_is_master(dev->dev))
1134 return;
1135 /* initialize or tear down tunnel QPs for the master */
1136 for (i = 0; i < dev->dev->caps.num_ports; i++)
1137 mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
1138 return;
1139}
1140
1141int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
1142{
1143 int i = 0;
1144 int err;
1145
1146 if (!mlx4_is_mfunc(dev->dev))
1147 return 0;
1148
1149 dev->sriov.is_going_down = 0;
1150 spin_lock_init(&dev->sriov.going_down_lock);
1151
1152 mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
1153
1154 if (mlx4_is_slave(dev->dev)) {
1155 mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
1156 return 0;
1157 }
1158
1159 mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
1160 dev->dev->caps.sqp_demux);
1161 for (i = 0; i < dev->num_ports; i++) {
1162 err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
1163 &dev->sriov.sqps[i]);
1164 if (err)
1165 goto demux_err;
1166 err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
1167 if (err)
1168 goto demux_err;
1169 }
1170 mlx4_ib_master_tunnels(dev, 1);
1171 return 0;
1172
1173demux_err:
1174 while (i > 0) {
1175 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
1176 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
1177 --i;
1178 }
1179
1180 return err;
1181}
1182
1183void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
1184{
1185 int i;
1186 unsigned long flags;
1187
1188 if (!mlx4_is_mfunc(dev->dev))
1189 return;
1190
1191 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
1192 dev->sriov.is_going_down = 1;
1193 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
1194 if (mlx4_is_master(dev->dev))
1195 for (i = 0; i < dev->num_ports; i++) {
1196 flush_workqueue(dev->sriov.demux[i].ud_wq);
1197 mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
1198 kfree(dev->sriov.sqps[i]);
1199 dev->sriov.sqps[i] = NULL;
1200 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
1201 }
1202}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index cc05579ebce7..3f7f77f93a1c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1357,11 +1357,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1357 if (mlx4_ib_mad_init(ibdev)) 1357 if (mlx4_ib_mad_init(ibdev))
1358 goto err_reg; 1358 goto err_reg;
1359 1359
1360 if (mlx4_ib_init_sriov(ibdev))
1361 goto err_mad;
1362
1360 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { 1363 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
1361 iboe->nb.notifier_call = mlx4_ib_netdev_event; 1364 iboe->nb.notifier_call = mlx4_ib_netdev_event;
1362 err = register_netdevice_notifier(&iboe->nb); 1365 err = register_netdevice_notifier(&iboe->nb);
1363 if (err) 1366 if (err)
1364 goto err_reg; 1367 goto err_sriov;
1365 } 1368 }
1366 1369
1367 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { 1370 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -1379,6 +1382,12 @@ err_notif:
1379 pr_warn("failure unregistering notifier\n"); 1382 pr_warn("failure unregistering notifier\n");
1380 flush_workqueue(wq); 1383 flush_workqueue(wq);
1381 1384
1385err_sriov:
1386 mlx4_ib_close_sriov(ibdev);
1387
1388err_mad:
1389 mlx4_ib_mad_cleanup(ibdev);
1390
1382err_reg: 1391err_reg:
1383 ib_unregister_device(&ibdev->ib_dev); 1392 ib_unregister_device(&ibdev->ib_dev);
1384 1393
@@ -1407,6 +1416,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1407 struct mlx4_ib_dev *ibdev = ibdev_ptr; 1416 struct mlx4_ib_dev *ibdev = ibdev_ptr;
1408 int p; 1417 int p;
1409 1418
1419 mlx4_ib_close_sriov(ibdev);
1410 mlx4_ib_mad_cleanup(ibdev); 1420 mlx4_ib_mad_cleanup(ibdev);
1411 ib_unregister_device(&ibdev->ib_dev); 1421 ib_unregister_device(&ibdev->ib_dev);
1412 if (ibdev->iboe.nb.notifier_call) { 1422 if (ibdev->iboe.nb.notifier_call) {
@@ -1428,6 +1438,51 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1428 ib_dealloc_device(&ibdev->ib_dev); 1438 ib_dealloc_device(&ibdev->ib_dev);
1429} 1439}
1430 1440
1441static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
1442{
1443 struct mlx4_ib_demux_work **dm = NULL;
1444 struct mlx4_dev *dev = ibdev->dev;
1445 int i;
1446 unsigned long flags;
1447
1448 if (!mlx4_is_master(dev))
1449 return;
1450
1451 dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
1452 if (!dm) {
1453 pr_err("failed to allocate memory for tunneling qp update\n");
1454 goto out;
1455 }
1456
1457 for (i = 0; i < dev->caps.num_ports; i++) {
1458 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
1459 if (!dm[i]) {
1460 pr_err("failed to allocate memory for tunneling qp update work struct\n");
1461 for (i = 0; i < dev->caps.num_ports; i++) {
1462 if (dm[i])
1463 kfree(dm[i]);
1464 }
1465 goto out;
1466 }
1467 }
1468 /* initialize or tear down tunnel QPs for the slave */
1469 for (i = 0; i < dev->caps.num_ports; i++) {
1470 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
1471 dm[i]->port = i + 1;
1472 dm[i]->slave = slave;
1473 dm[i]->do_init = do_init;
1474 dm[i]->dev = ibdev;
1475 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
1476 if (!ibdev->sriov.is_going_down)
1477 queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
1478 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
1479 }
1480out:
1481 if (dm)
1482 kfree(dm);
1483 return;
1484}
1485
1431static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, 1486static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1432 enum mlx4_dev_event event, unsigned long param) 1487 enum mlx4_dev_event event, unsigned long param)
1433{ 1488{
@@ -1435,22 +1490,23 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1435 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); 1490 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
1436 struct mlx4_eqe *eqe = NULL; 1491 struct mlx4_eqe *eqe = NULL;
1437 struct ib_event_work *ew; 1492 struct ib_event_work *ew;
1438 int port = 0; 1493 int p = 0;
1439 1494
1440 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) 1495 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
1441 eqe = (struct mlx4_eqe *)param; 1496 eqe = (struct mlx4_eqe *)param;
1442 else 1497 else
1443 port = (u8)param; 1498 p = (int) param;
1444
1445 if (port > ibdev->num_ports)
1446 return;
1447 1499
1448 switch (event) { 1500 switch (event) {
1449 case MLX4_DEV_EVENT_PORT_UP: 1501 case MLX4_DEV_EVENT_PORT_UP:
1502 if (p > ibdev->num_ports)
1503 return;
1450 ibev.event = IB_EVENT_PORT_ACTIVE; 1504 ibev.event = IB_EVENT_PORT_ACTIVE;
1451 break; 1505 break;
1452 1506
1453 case MLX4_DEV_EVENT_PORT_DOWN: 1507 case MLX4_DEV_EVENT_PORT_DOWN:
1508 if (p > ibdev->num_ports)
1509 return;
1454 ibev.event = IB_EVENT_PORT_ERR; 1510 ibev.event = IB_EVENT_PORT_ERR;
1455 break; 1511 break;
1456 1512
@@ -1472,12 +1528,22 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1472 handle_port_mgmt_change_event(&ew->work); 1528 handle_port_mgmt_change_event(&ew->work);
1473 return; 1529 return;
1474 1530
1531 case MLX4_DEV_EVENT_SLAVE_INIT:
1532 /* here, p is the slave id */
1533 do_slave_init(ibdev, p, 1);
1534 return;
1535
1536 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
1537 /* here, p is the slave id */
1538 do_slave_init(ibdev, p, 0);
1539 return;
1540
1475 default: 1541 default:
1476 return; 1542 return;
1477 } 1543 }
1478 1544
1479 ibev.device = ibdev_ptr; 1545 ibev.device = ibdev_ptr;
1480 ibev.element.port_num = port; 1546 ibev.element.port_num = (u8) p;
1481 1547
1482 ib_dispatch_event(&ibev); 1548 ib_dispatch_event(&ibev);
1483} 1549}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1248d576b031..137941d79870 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -176,6 +176,10 @@ enum mlx4_ib_qp_type {
176 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ 176 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
177 MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) 177 MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
178 178
179enum {
180 MLX4_NUM_TUNNEL_BUFS = 256,
181};
182
179struct mlx4_ib_tunnel_header { 183struct mlx4_ib_tunnel_header {
180 struct mlx4_av av; 184 struct mlx4_av av;
181 __be32 remote_qpn; 185 __be32 remote_qpn;
@@ -263,6 +267,15 @@ struct mlx4_ib_ah {
263 union mlx4_ext_av av; 267 union mlx4_ext_av av;
264}; 268};
265 269
270struct mlx4_ib_demux_work {
271 struct work_struct work;
272 struct mlx4_ib_dev *dev;
273 int slave;
274 int do_init;
275 u8 port;
276
277};
278
266struct mlx4_ib_tun_tx_buf { 279struct mlx4_ib_tun_tx_buf {
267 struct mlx4_ib_buf buf; 280 struct mlx4_ib_buf buf;
268 struct ib_ah *ah; 281 struct ib_ah *ah;
@@ -278,9 +291,17 @@ struct mlx4_ib_demux_pv_qp {
278 unsigned tx_ix_tail; 291 unsigned tx_ix_tail;
279}; 292};
280 293
294enum mlx4_ib_demux_pv_state {
295 DEMUX_PV_STATE_DOWN,
296 DEMUX_PV_STATE_STARTING,
297 DEMUX_PV_STATE_ACTIVE,
298 DEMUX_PV_STATE_DOWNING,
299};
300
281struct mlx4_ib_demux_pv_ctx { 301struct mlx4_ib_demux_pv_ctx {
282 int port; 302 int port;
283 int slave; 303 int slave;
304 enum mlx4_ib_demux_pv_state state;
284 int has_smi; 305 int has_smi;
285 struct ib_device *ib_dev; 306 struct ib_device *ib_dev;
286 struct ib_cq *cq; 307 struct ib_cq *cq;
@@ -319,6 +340,13 @@ struct mlx4_ib_iboe {
319 union ib_gid gid_table[MLX4_MAX_PORTS][128]; 340 union ib_gid gid_table[MLX4_MAX_PORTS][128];
320}; 341};
321 342
343struct pkey_mgt {
344 u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
345 u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
346 struct list_head pkey_port_list[MLX4_MFUNC_MAX];
347 struct kobject *device_parent[MLX4_MFUNC_MAX];
348};
349
322struct mlx4_ib_dev { 350struct mlx4_ib_dev {
323 struct ib_device ib_dev; 351 struct ib_device ib_dev;
324 struct mlx4_dev *dev; 352 struct mlx4_dev *dev;
@@ -340,6 +368,7 @@ struct mlx4_ib_dev {
340 int counters[MLX4_MAX_PORTS]; 368 int counters[MLX4_MAX_PORTS];
341 int *eq_table; 369 int *eq_table;
342 int eq_added; 370 int eq_added;
371 struct pkey_mgt pkeys;
343}; 372};
344 373
345struct ib_event_work { 374struct ib_event_work {
@@ -424,6 +453,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
424 return container_of(ibah, struct mlx4_ib_ah, ibah); 453 return container_of(ibah, struct mlx4_ib_ah, ibah);
425} 454}
426 455
456int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
457void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
458
427int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, 459int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
428 struct mlx4_db *db); 460 struct mlx4_db *db);
429void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); 461void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@@ -515,4 +547,6 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
515void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, 547void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
516 enum ib_event_type type); 548 enum ib_event_type type);
517 549
550void mlx4_ib_tunnels_update_work(struct work_struct *work);
551
518#endif /* MLX4_IB_H */ 552#endif /* MLX4_IB_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index c8fef4353021..cb9bebe28276 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1340,6 +1340,8 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
1340 if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) 1340 if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd)
1341 goto inform_slave_state; 1341 goto inform_slave_state;
1342 1342
1343 mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave);
1344
1343 /* write the version in the event field */ 1345 /* write the version in the event field */
1344 reply |= mlx4_comm_get_version(); 1346 reply |= mlx4_comm_get_version();
1345 1347
@@ -1376,6 +1378,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
1376 goto reset_slave; 1378 goto reset_slave;
1377 slave_state[slave].vhcr_dma |= param; 1379 slave_state[slave].vhcr_dma |= param;
1378 slave_state[slave].active = true; 1380 slave_state[slave].active = true;
1381 mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave);
1379 break; 1382 break;
1380 case MLX4_COMM_CMD_VHCR_POST: 1383 case MLX4_COMM_CMD_VHCR_POST:
1381 if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && 1384 if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index d5c82b7216de..b6b8d341b6c8 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -54,7 +54,8 @@ enum {
54}; 54};
55 55
56enum { 56enum {
57 MLX4_MAX_PORTS = 2 57 MLX4_MAX_PORTS = 2,
58 MLX4_MAX_PORT_PKEYS = 128
58}; 59};
59 60
60/* base qkey for use in sriov tunnel-qp/proxy-qp communication. 61/* base qkey for use in sriov tunnel-qp/proxy-qp communication.
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index d813704b963b..c257e1b211be 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -45,6 +45,8 @@ enum mlx4_dev_event {
45 MLX4_DEV_EVENT_PORT_DOWN, 45 MLX4_DEV_EVENT_PORT_DOWN,
46 MLX4_DEV_EVENT_PORT_REINIT, 46 MLX4_DEV_EVENT_PORT_REINIT,
47 MLX4_DEV_EVENT_PORT_MGMT_CHANGE, 47 MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
48 MLX4_DEV_EVENT_SLAVE_INIT,
49 MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
48}; 50};
49 51
50struct mlx4_interface { 52struct mlx4_interface {