aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx4/qp.c
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2012-08-03 04:40:40 -0400
committerRoland Dreier <roland@purestorage.com>2012-09-30 23:33:30 -0400
commit1ffeb2eb8be9936e9dc1f9af2d5f4c14d69a0d36 (patch)
tree1e79a4a6c3955a21c7802618d58b3bd7605a937e /drivers/infiniband/hw/mlx4/qp.c
parent73aaa7418f8069103ca56fc620b3cd16c5a37d6e (diff)
IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support
1. Introduce the basic SR-IOV parvirtualization context objects for multiplexing and demultiplexing MADs. 2. Introduce support for the new proxy and tunnel QP types. This patch introduces the objects required by the master for managing QP paravirtualization for guests. struct mlx4_ib_sriov is created by the master only. It is a container for the following: 1. All the info required by the PPF to multiplex and de-multiplex MADs (including those from the PF). (struct mlx4_ib_demux_ctx demux) 2. All the info required to manage alias GUIDs (i.e., the GUID at index 0 that each guest perceives. In fact, this is not the GUID which is actually at index 0, but is, in fact, the GUID which is at index[<VF number>] in the physical table. 3. structures which are used to manage CM paravirtualization 4. structures for managing the real special QPs when running in SR-IOV mode. The real SQPs are controlled by the PPF in this case. All SQPs created and controlled by the ib core layer are proxy SQP. struct mlx4_ib_demux_ctx contains the information per port needed to manage paravirtualization: 1. All multicast paravirt info 2. All tunnel-qp paravirt info for the port. 3. GUID-table and GUID-prefix for the port 4. work queues. struct mlx4_ib_demux_pv_ctx contains all the info for managing the paravirtualized QPs for one slave/port. struct mlx4_ib_demux_pv_qp contains the info need to run an individual QP (either tunnel qp or real SQP). Note: We made use of the 2 most significant bits in enum mlx4_ib_qp_flags (based on enum ib_qp_create_flags in ib_verbs.h). We need these bits in the low-level driver for internal purposes. Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/hw/mlx4/qp.c')
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c616
1 files changed, 543 insertions, 73 deletions
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index f585eddef4b7..a8622510de42 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -38,6 +38,7 @@
38#include <rdma/ib_cache.h> 38#include <rdma/ib_cache.h>
39#include <rdma/ib_pack.h> 39#include <rdma/ib_pack.h>
40#include <rdma/ib_addr.h> 40#include <rdma/ib_addr.h>
41#include <rdma/ib_mad.h>
41 42
42#include <linux/mlx4/qp.h> 43#include <linux/mlx4/qp.h>
43 44
@@ -110,16 +111,38 @@ static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
110 return container_of(mqp, struct mlx4_ib_sqp, qp); 111 return container_of(mqp, struct mlx4_ib_sqp, qp);
111} 112}
112 113
114static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
115{
116 if (!mlx4_is_master(dev->dev))
117 return 0;
118
119 return qp->mqp.qpn >= dev->dev->caps.base_sqpn &&
120 qp->mqp.qpn < dev->dev->caps.base_sqpn +
121 8 + 16 * MLX4_MFUNC_MAX;
122}
123
113static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 124static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
114{ 125{
115 return qp->mqp.qpn >= dev->dev->caps.sqp_start && 126 return ((mlx4_is_master(dev->dev) &&
116 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; 127 qp->mqp.qpn >= dev->dev->caps.base_sqpn &&
128 qp->mqp.qpn <= dev->dev->caps.base_sqpn + 3) ||
129 (qp->mqp.qpn >= dev->dev->caps.sqp_start &&
130 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3));
117} 131}
118 132
133/* used for INIT/CLOSE port logic */
119static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 134static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
120{ 135{
121 return qp->mqp.qpn >= dev->dev->caps.sqp_start && 136 int qp0;
122 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; 137
138 /* qp0 is either the proxy qp0, or the real qp0 */
139 qp0 = (qp->mqp.qpn >= dev->dev->caps.sqp_start &&
140 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1) ||
141 (mlx4_is_master(dev->dev) &&
142 qp->mqp.qpn >= dev->dev->caps.base_sqpn &&
143 qp->mqp.qpn <= dev->dev->caps.base_sqpn + 1);
144
145 return qp0;
123} 146}
124 147
125static void *get_wqe(struct mlx4_ib_qp *qp, int offset) 148static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
@@ -270,7 +293,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
270 } 293 }
271} 294}
272 295
273static int send_wqe_overhead(enum ib_qp_type type, u32 flags) 296static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
274{ 297{
275 /* 298 /*
276 * UD WQEs must have a datagram segment. 299 * UD WQEs must have a datagram segment.
@@ -279,19 +302,29 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
279 * header and space for the ICRC). 302 * header and space for the ICRC).
280 */ 303 */
281 switch (type) { 304 switch (type) {
282 case IB_QPT_UD: 305 case MLX4_IB_QPT_UD:
283 return sizeof (struct mlx4_wqe_ctrl_seg) + 306 return sizeof (struct mlx4_wqe_ctrl_seg) +
284 sizeof (struct mlx4_wqe_datagram_seg) + 307 sizeof (struct mlx4_wqe_datagram_seg) +
285 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); 308 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
286 case IB_QPT_UC: 309 case MLX4_IB_QPT_PROXY_SMI_OWNER:
310 case MLX4_IB_QPT_PROXY_SMI:
311 case MLX4_IB_QPT_PROXY_GSI:
312 return sizeof (struct mlx4_wqe_ctrl_seg) +
313 sizeof (struct mlx4_wqe_datagram_seg) + 64;
314 case MLX4_IB_QPT_TUN_SMI_OWNER:
315 case MLX4_IB_QPT_TUN_GSI:
316 return sizeof (struct mlx4_wqe_ctrl_seg) +
317 sizeof (struct mlx4_wqe_datagram_seg);
318
319 case MLX4_IB_QPT_UC:
287 return sizeof (struct mlx4_wqe_ctrl_seg) + 320 return sizeof (struct mlx4_wqe_ctrl_seg) +
288 sizeof (struct mlx4_wqe_raddr_seg); 321 sizeof (struct mlx4_wqe_raddr_seg);
289 case IB_QPT_RC: 322 case MLX4_IB_QPT_RC:
290 return sizeof (struct mlx4_wqe_ctrl_seg) + 323 return sizeof (struct mlx4_wqe_ctrl_seg) +
291 sizeof (struct mlx4_wqe_atomic_seg) + 324 sizeof (struct mlx4_wqe_atomic_seg) +
292 sizeof (struct mlx4_wqe_raddr_seg); 325 sizeof (struct mlx4_wqe_raddr_seg);
293 case IB_QPT_SMI: 326 case MLX4_IB_QPT_SMI:
294 case IB_QPT_GSI: 327 case MLX4_IB_QPT_GSI:
295 return sizeof (struct mlx4_wqe_ctrl_seg) + 328 return sizeof (struct mlx4_wqe_ctrl_seg) +
296 ALIGN(MLX4_IB_UD_HEADER_SIZE + 329 ALIGN(MLX4_IB_UD_HEADER_SIZE +
297 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, 330 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
@@ -345,7 +378,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
345} 378}
346 379
347static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 380static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
348 enum ib_qp_type type, struct mlx4_ib_qp *qp) 381 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
349{ 382{
350 int s; 383 int s;
351 384
@@ -360,7 +393,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
360 * For MLX transport we need 2 extra S/G entries: 393 * For MLX transport we need 2 extra S/G entries:
361 * one for the header and one for the checksum at the end 394 * one for the header and one for the checksum at the end
362 */ 395 */
363 if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && 396 if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
397 type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
364 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 398 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
365 return -EINVAL; 399 return -EINVAL;
366 400
@@ -404,7 +438,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
404 */ 438 */
405 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && 439 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
406 qp->sq_signal_bits && BITS_PER_LONG == 64 && 440 qp->sq_signal_bits && BITS_PER_LONG == 64 &&
407 type != IB_QPT_SMI && type != IB_QPT_GSI) 441 type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
442 !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
443 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
408 qp->sq.wqe_shift = ilog2(64); 444 qp->sq.wqe_shift = ilog2(64);
409 else 445 else
410 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); 446 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
@@ -476,6 +512,54 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
476 return 0; 512 return 0;
477} 513}
478 514
515static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
516{
517 int i;
518
519 qp->sqp_proxy_rcv =
520 kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
521 GFP_KERNEL);
522 if (!qp->sqp_proxy_rcv)
523 return -ENOMEM;
524 for (i = 0; i < qp->rq.wqe_cnt; i++) {
525 qp->sqp_proxy_rcv[i].addr =
526 kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
527 GFP_KERNEL);
528 if (!qp->sqp_proxy_rcv[i].addr)
529 goto err;
530 qp->sqp_proxy_rcv[i].map =
531 ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
532 sizeof (struct mlx4_ib_proxy_sqp_hdr),
533 DMA_FROM_DEVICE);
534 }
535 return 0;
536
537err:
538 while (i > 0) {
539 --i;
540 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
541 sizeof (struct mlx4_ib_proxy_sqp_hdr),
542 DMA_FROM_DEVICE);
543 kfree(qp->sqp_proxy_rcv[i].addr);
544 }
545 kfree(qp->sqp_proxy_rcv);
546 qp->sqp_proxy_rcv = NULL;
547 return -ENOMEM;
548}
549
550static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
551{
552 int i;
553
554 for (i = 0; i < qp->rq.wqe_cnt; i++) {
555 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
556 sizeof (struct mlx4_ib_proxy_sqp_hdr),
557 DMA_FROM_DEVICE);
558 kfree(qp->sqp_proxy_rcv[i].addr);
559 }
560 kfree(qp->sqp_proxy_rcv);
561}
562
479static int qp_has_rq(struct ib_qp_init_attr *attr) 563static int qp_has_rq(struct ib_qp_init_attr *attr)
480{ 564{
481 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) 565 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
@@ -486,10 +570,71 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
486 570
487static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 571static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
488 struct ib_qp_init_attr *init_attr, 572 struct ib_qp_init_attr *init_attr,
489 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) 573 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
490{ 574{
491 int qpn; 575 int qpn;
492 int err; 576 int err;
577 struct mlx4_ib_sqp *sqp;
578 struct mlx4_ib_qp *qp;
579 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
580
581 /* When tunneling special qps, we use a plain UD qp */
582 if (sqpn) {
583 if (mlx4_is_mfunc(dev->dev) &&
584 (!mlx4_is_master(dev->dev) ||
585 !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
586 if (init_attr->qp_type == IB_QPT_GSI)
587 qp_type = MLX4_IB_QPT_PROXY_GSI;
588 else if (mlx4_is_master(dev->dev))
589 qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
590 else
591 qp_type = MLX4_IB_QPT_PROXY_SMI;
592 }
593 qpn = sqpn;
594 /* add extra sg entry for tunneling */
595 init_attr->cap.max_recv_sge++;
596 } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
597 struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
598 container_of(init_attr,
599 struct mlx4_ib_qp_tunnel_init_attr, init_attr);
600 if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
601 tnl_init->proxy_qp_type != IB_QPT_GSI) ||
602 !mlx4_is_master(dev->dev))
603 return -EINVAL;
604 if (tnl_init->proxy_qp_type == IB_QPT_GSI)
605 qp_type = MLX4_IB_QPT_TUN_GSI;
606 else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
607 qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
608 else
609 qp_type = MLX4_IB_QPT_TUN_SMI;
610 qpn = dev->dev->caps.base_tunnel_sqpn + 8 * tnl_init->slave +
611 tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
612 sqpn = qpn;
613 }
614
615 if (!*caller_qp) {
616 if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
617 (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
618 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
619 sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
620 if (!sqp)
621 return -ENOMEM;
622 qp = &sqp->qp;
623 } else {
624 qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
625 if (!qp)
626 return -ENOMEM;
627 }
628 } else
629 qp = *caller_qp;
630
631 qp->mlx4_ib_qp_type = qp_type;
632
633 if (mlx4_is_mfunc(dev->dev) &&
634 (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI)) {
635 qpn -= 8;
636 sqpn -= 8;
637 }
493 638
494 mutex_init(&qp->mutex); 639 mutex_init(&qp->mutex);
495 spin_lock_init(&qp->sq.lock); 640 spin_lock_init(&qp->sq.lock);
@@ -550,7 +695,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
550 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 695 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
551 qp->flags |= MLX4_IB_QP_LSO; 696 qp->flags |= MLX4_IB_QP_LSO;
552 697
553 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); 698 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
554 if (err) 699 if (err)
555 goto err; 700 goto err;
556 701
@@ -586,7 +731,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
586 } 731 }
587 732
588 if (sqpn) { 733 if (sqpn) {
589 qpn = sqpn; 734 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
735 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
736 if (alloc_proxy_bufs(pd->device, qp)) {
737 err = -ENOMEM;
738 goto err_wrid;
739 }
740 }
590 } else { 741 } else {
591 /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE 742 /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
592 * BlueFlame setup flow wrongly causes VLAN insertion. */ 743 * BlueFlame setup flow wrongly causes VLAN insertion. */
@@ -595,7 +746,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
595 else 746 else
596 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); 747 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
597 if (err) 748 if (err)
598 goto err_wrid; 749 goto err_proxy;
599 } 750 }
600 751
601 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); 752 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
@@ -613,13 +764,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
613 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); 764 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
614 765
615 qp->mqp.event = mlx4_ib_qp_event; 766 qp->mqp.event = mlx4_ib_qp_event;
616 767 if (!*caller_qp)
768 *caller_qp = qp;
617 return 0; 769 return 0;
618 770
619err_qpn: 771err_qpn:
620 if (!sqpn) 772 if (!sqpn)
621 mlx4_qp_release_range(dev->dev, qpn, 1); 773 mlx4_qp_release_range(dev->dev, qpn, 1);
622 774err_proxy:
775 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
776 free_proxy_bufs(pd->device, qp);
623err_wrid: 777err_wrid:
624 if (pd->uobject) { 778 if (pd->uobject) {
625 if (qp_has_rq(init_attr)) 779 if (qp_has_rq(init_attr))
@@ -643,6 +797,8 @@ err_db:
643 mlx4_db_free(dev->dev, &qp->db); 797 mlx4_db_free(dev->dev, &qp->db);
644 798
645err: 799err:
800 if (!*caller_qp)
801 kfree(qp);
646 return err; 802 return err;
647} 803}
648 804
@@ -755,7 +911,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
755 911
756 mlx4_qp_free(dev->dev, &qp->mqp); 912 mlx4_qp_free(dev->dev, &qp->mqp);
757 913
758 if (!is_sqp(dev, qp)) 914 if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
759 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); 915 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
760 916
761 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 917 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
@@ -768,6 +924,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
768 } else { 924 } else {
769 kfree(qp->sq.wrid); 925 kfree(qp->sq.wrid);
770 kfree(qp->rq.wrid); 926 kfree(qp->rq.wrid);
927 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
928 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
929 free_proxy_bufs(&dev->ib_dev, qp);
771 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 930 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
772 if (qp->rq.wqe_cnt) 931 if (qp->rq.wqe_cnt)
773 mlx4_db_free(dev->dev, &qp->db); 932 mlx4_db_free(dev->dev, &qp->db);
@@ -780,21 +939,25 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
780 struct ib_qp_init_attr *init_attr, 939 struct ib_qp_init_attr *init_attr,
781 struct ib_udata *udata) 940 struct ib_udata *udata)
782{ 941{
783 struct mlx4_ib_sqp *sqp; 942 struct mlx4_ib_qp *qp = NULL;
784 struct mlx4_ib_qp *qp;
785 int err; 943 int err;
786 u16 xrcdn = 0; 944 u16 xrcdn = 0;
787 945
788 /* 946 /*
789 * We only support LSO and multicast loopback blocking, and 947 * We only support LSO, vendor flag1, and multicast loopback blocking,
790 * only for kernel UD QPs. 948 * and only for kernel UD QPs.
791 */ 949 */
792 if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | 950 if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
793 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) 951 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
952 MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
794 return ERR_PTR(-EINVAL); 953 return ERR_PTR(-EINVAL);
795 954
796 if (init_attr->create_flags && 955 if (init_attr->create_flags &&
797 (udata || init_attr->qp_type != IB_QPT_UD)) 956 (udata ||
957 ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
958 init_attr->qp_type != IB_QPT_UD) ||
959 ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
960 init_attr->qp_type > IB_QPT_GSI)))
798 return ERR_PTR(-EINVAL); 961 return ERR_PTR(-EINVAL);
799 962
800 switch (init_attr->qp_type) { 963 switch (init_attr->qp_type) {
@@ -810,18 +973,17 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
810 /* fall through */ 973 /* fall through */
811 case IB_QPT_RC: 974 case IB_QPT_RC:
812 case IB_QPT_UC: 975 case IB_QPT_UC:
813 case IB_QPT_UD:
814 case IB_QPT_RAW_PACKET: 976 case IB_QPT_RAW_PACKET:
815 {
816 qp = kzalloc(sizeof *qp, GFP_KERNEL); 977 qp = kzalloc(sizeof *qp, GFP_KERNEL);
817 if (!qp) 978 if (!qp)
818 return ERR_PTR(-ENOMEM); 979 return ERR_PTR(-ENOMEM);
819 980 /* fall through */
820 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp); 981 case IB_QPT_UD:
821 if (err) { 982 {
822 kfree(qp); 983 err = create_qp_common(to_mdev(pd->device), pd, init_attr,
984 udata, 0, &qp);
985 if (err)
823 return ERR_PTR(err); 986 return ERR_PTR(err);
824 }
825 987
826 qp->ibqp.qp_num = qp->mqp.qpn; 988 qp->ibqp.qp_num = qp->mqp.qpn;
827 qp->xrcdn = xrcdn; 989 qp->xrcdn = xrcdn;
@@ -835,21 +997,13 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
835 if (udata) 997 if (udata)
836 return ERR_PTR(-EINVAL); 998 return ERR_PTR(-EINVAL);
837 999
838 sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
839 if (!sqp)
840 return ERR_PTR(-ENOMEM);
841
842 qp = &sqp->qp;
843
844 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 1000 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
845 to_mdev(pd->device)->dev->caps.sqp_start + 1001 to_mdev(pd->device)->dev->caps.sqp_start +
846 (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + 1002 (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
847 init_attr->port_num - 1, 1003 init_attr->port_num - 1,
848 qp); 1004 &qp);
849 if (err) { 1005 if (err)
850 kfree(sqp);
851 return ERR_PTR(err); 1006 return ERR_PTR(err);
852 }
853 1007
854 qp->port = init_attr->port_num; 1008 qp->port = init_attr->port_num;
855 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; 1009 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
@@ -884,18 +1038,27 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
884 return 0; 1038 return 0;
885} 1039}
886 1040
887static int to_mlx4_st(enum ib_qp_type type) 1041static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
888{ 1042{
889 switch (type) { 1043 switch (type) {
890 case IB_QPT_RC: return MLX4_QP_ST_RC; 1044 case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC;
891 case IB_QPT_UC: return MLX4_QP_ST_UC; 1045 case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC;
892 case IB_QPT_UD: return MLX4_QP_ST_UD; 1046 case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD;
893 case IB_QPT_XRC_INI: 1047 case MLX4_IB_QPT_XRC_INI:
894 case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; 1048 case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
895 case IB_QPT_SMI: 1049 case MLX4_IB_QPT_SMI:
896 case IB_QPT_GSI: 1050 case MLX4_IB_QPT_GSI:
897 case IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; 1051 case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
898 default: return -1; 1052
1053 case MLX4_IB_QPT_PROXY_SMI_OWNER:
1054 case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
1055 MLX4_QP_ST_MLX : -1);
1056 case MLX4_IB_QPT_PROXY_SMI:
1057 case MLX4_IB_QPT_TUN_SMI:
1058 case MLX4_IB_QPT_PROXY_GSI:
1059 case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
1060 MLX4_QP_ST_UD : -1);
1061 default: return -1;
899 } 1062 }
900} 1063}
901 1064
@@ -1043,7 +1206,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1043 return -ENOMEM; 1206 return -ENOMEM;
1044 1207
1045 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 1208 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
1046 (to_mlx4_st(ibqp->qp_type) << 16)); 1209 (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
1047 1210
1048 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) 1211 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
1049 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 1212 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1121,13 +1284,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1121 } 1284 }
1122 1285
1123 if (attr_mask & IB_QP_PKEY_INDEX) { 1286 if (attr_mask & IB_QP_PKEY_INDEX) {
1287 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1288 context->pri_path.disable_pkey_check = 0x40;
1124 context->pri_path.pkey_index = attr->pkey_index; 1289 context->pri_path.pkey_index = attr->pkey_index;
1125 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; 1290 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
1126 } 1291 }
1127 1292
1128 if (attr_mask & IB_QP_AV) { 1293 if (attr_mask & IB_QP_AV) {
1129 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, 1294 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
1130 attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) 1295 attr_mask & IB_QP_PORT ?
1296 attr->port_num : qp->port))
1131 goto out; 1297 goto out;
1132 1298
1133 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | 1299 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@@ -1210,8 +1376,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1210 if (attr_mask & IB_QP_RQ_PSN) 1376 if (attr_mask & IB_QP_RQ_PSN)
1211 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); 1377 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1212 1378
1379 /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */
1213 if (attr_mask & IB_QP_QKEY) { 1380 if (attr_mask & IB_QP_QKEY) {
1214 context->qkey = cpu_to_be32(attr->qkey); 1381 if (qp->mlx4_ib_qp_type &
1382 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
1383 context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
1384 else {
1385 if (mlx4_is_mfunc(dev->dev) &&
1386 !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
1387 (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
1388 MLX4_RESERVED_QKEY_BASE) {
1389 pr_err("Cannot use reserved QKEY"
1390 " 0x%x (range 0xffff0000..0xffffffff"
1391 " is reserved)\n", attr->qkey);
1392 err = -EINVAL;
1393 goto out;
1394 }
1395 context->qkey = cpu_to_be32(attr->qkey);
1396 }
1215 optpar |= MLX4_QP_OPTPAR_Q_KEY; 1397 optpar |= MLX4_QP_OPTPAR_Q_KEY;
1216 } 1398 }
1217 1399
@@ -1227,10 +1409,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1227 ibqp->qp_type == IB_QPT_UD || 1409 ibqp->qp_type == IB_QPT_UD ||
1228 ibqp->qp_type == IB_QPT_RAW_PACKET)) { 1410 ibqp->qp_type == IB_QPT_RAW_PACKET)) {
1229 context->pri_path.sched_queue = (qp->port - 1) << 6; 1411 context->pri_path.sched_queue = (qp->port - 1) << 6;
1230 if (is_qp0(dev, qp)) 1412 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
1413 qp->mlx4_ib_qp_type &
1414 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
1231 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; 1415 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
1232 else 1416 if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
1417 context->pri_path.fl = 0x80;
1418 } else {
1419 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1420 context->pri_path.fl = 0x80;
1233 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; 1421 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
1422 }
1234 } 1423 }
1235 1424
1236 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && 1425 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
@@ -1346,7 +1535,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1346 } 1535 }
1347 1536
1348 if ((attr_mask & IB_QP_PORT) && 1537 if ((attr_mask & IB_QP_PORT) &&
1349 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { 1538 (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
1350 pr_debug("qpn 0x%x: invalid port number (%d) specified " 1539 pr_debug("qpn 0x%x: invalid port number (%d) specified "
1351 "for transition %d to %d. qp_type %d\n", 1540 "for transition %d to %d. qp_type %d\n",
1352 ibqp->qp_num, attr->port_num, cur_state, 1541 ibqp->qp_num, attr->port_num, cur_state,
@@ -1400,6 +1589,115 @@ out:
1400 return err; 1589 return err;
1401} 1590}
1402 1591
1592static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
1593 struct ib_send_wr *wr,
1594 void *wqe, unsigned *mlx_seg_len)
1595{
1596 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
1597 struct ib_device *ib_dev = &mdev->ib_dev;
1598 struct mlx4_wqe_mlx_seg *mlx = wqe;
1599 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1600 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1601 u16 pkey;
1602 u32 qkey;
1603 int send_size;
1604 int header_size;
1605 int spc;
1606 int i;
1607
1608 if (wr->opcode != IB_WR_SEND)
1609 return -EINVAL;
1610
1611 send_size = 0;
1612
1613 for (i = 0; i < wr->num_sge; ++i)
1614 send_size += wr->sg_list[i].length;
1615
1616 /* for proxy-qp0 sends, need to add in size of tunnel header */
1617 /* for tunnel-qp0 sends, tunnel header is already in s/g list */
1618 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
1619 send_size += sizeof (struct mlx4_ib_tunnel_header);
1620
1621 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
1622
1623 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
1624 sqp->ud_header.lrh.service_level =
1625 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1626 sqp->ud_header.lrh.destination_lid =
1627 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1628 sqp->ud_header.lrh.source_lid =
1629 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1630 }
1631
1632 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1633
1634 /* force loopback */
1635 mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
1636 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1637
1638 sqp->ud_header.lrh.virtual_lane = 0;
1639 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1640 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
1641 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1642 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
1643 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1644 else
1645 sqp->ud_header.bth.destination_qpn =
1646 cpu_to_be32(mdev->dev->caps.base_tunnel_sqpn +
1647 sqp->qp.port - 1);
1648
1649 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1650 if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
1651 return -EINVAL;
1652 sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
1653 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
1654
1655 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1656 sqp->ud_header.immediate_present = 0;
1657
1658 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
1659
1660 /*
1661 * Inline data segments may not cross a 64 byte boundary. If
1662 * our UD header is bigger than the space available up to the
1663 * next 64 byte boundary in the WQE, use two inline data
1664 * segments to hold the UD header.
1665 */
1666 spc = MLX4_INLINE_ALIGN -
1667 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
1668 if (header_size <= spc) {
1669 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1670 memcpy(inl + 1, sqp->header_buf, header_size);
1671 i = 1;
1672 } else {
1673 inl->byte_count = cpu_to_be32(1 << 31 | spc);
1674 memcpy(inl + 1, sqp->header_buf, spc);
1675
1676 inl = (void *) (inl + 1) + spc;
1677 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
1678 /*
1679 * Need a barrier here to make sure all the data is
1680 * visible before the byte_count field is set.
1681 * Otherwise the HCA prefetcher could grab the 64-byte
1682 * chunk with this inline segment and get a valid (!=
1683 * 0xffffffff) byte count but stale data, and end up
1684 * generating a packet with bad headers.
1685 *
1686 * The first inline segment's byte_count field doesn't
1687 * need a barrier, because it comes after a
1688 * control/MLX segment and therefore is at an offset
1689 * of 16 mod 64.
1690 */
1691 wmb();
1692 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
1693 i = 2;
1694 }
1695
1696 *mlx_seg_len =
1697 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1698 return 0;
1699}
1700
1403static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 1701static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1404 void *wqe, unsigned *mlx_seg_len) 1702 void *wqe, unsigned *mlx_seg_len)
1405{ 1703{
@@ -1418,6 +1716,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1418 int is_vlan = 0; 1716 int is_vlan = 0;
1419 int is_grh; 1717 int is_grh;
1420 u16 vlan; 1718 u16 vlan;
1719 int err = 0;
1421 1720
1422 send_size = 0; 1721 send_size = 0;
1423 for (i = 0; i < wr->num_sge; ++i) 1722 for (i = 0; i < wr->num_sge; ++i)
@@ -1426,8 +1725,24 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1426 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; 1725 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
1427 is_grh = mlx4_ib_ah_grh_present(ah); 1726 is_grh = mlx4_ib_ah_grh_present(ah);
1428 if (is_eth) { 1727 if (is_eth) {
1429 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, 1728 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1430 ah->av.ib.gid_index, &sgid); 1729 /* When multi-function is enabled, the ib_core gid
1730 * indexes don't necessarily match the hw ones, so
1731 * we must use our own cache */
1732 sgid.global.subnet_prefix =
1733 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1734 subnet_prefix;
1735 sgid.global.interface_id =
1736 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1737 guid_cache[ah->av.ib.gid_index];
1738 } else {
1739 err = ib_get_cached_gid(ib_dev,
1740 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1741 ah->av.ib.gid_index, &sgid);
1742 if (err)
1743 return err;
1744 }
1745
1431 vlan = rdma_get_vlan_id(&sgid); 1746 vlan = rdma_get_vlan_id(&sgid);
1432 is_vlan = vlan < 0x1000; 1747 is_vlan = vlan < 0x1000;
1433 } 1748 }
@@ -1446,8 +1761,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1446 sqp->ud_header.grh.flow_label = 1761 sqp->ud_header.grh.flow_label =
1447 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1762 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1448 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; 1763 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
1449 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, 1764 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1450 ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); 1765 /* When multi-function is enabled, the ib_core gid
1766 * indexes don't necessarily match the hw ones, so
1767 * we must use our own cache */
1768 sqp->ud_header.grh.source_gid.global.subnet_prefix =
1769 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1770 subnet_prefix;
1771 sqp->ud_header.grh.source_gid.global.interface_id =
1772 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1773 guid_cache[ah->av.ib.gid_index];
1774 } else
1775 ib_get_cached_gid(ib_dev,
1776 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1777 ah->av.ib.gid_index,
1778 &sqp->ud_header.grh.source_gid);
1451 memcpy(sqp->ud_header.grh.destination_gid.raw, 1779 memcpy(sqp->ud_header.grh.destination_gid.raw,
1452 ah->av.ib.dgid, 16); 1780 ah->av.ib.dgid, 16);
1453 } 1781 }
@@ -1459,6 +1787,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1459 (sqp->ud_header.lrh.destination_lid == 1787 (sqp->ud_header.lrh.destination_lid ==
1460 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | 1788 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
1461 (sqp->ud_header.lrh.service_level << 8)); 1789 (sqp->ud_header.lrh.service_level << 8));
1790 if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
1791 mlx->flags |= cpu_to_be32(0x1); /* force loopback */
1462 mlx->rlid = sqp->ud_header.lrh.destination_lid; 1792 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1463 } 1793 }
1464 1794
@@ -1667,6 +1997,63 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
1667 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); 1997 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
1668} 1998}
1669 1999
2000static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
2001 struct mlx4_wqe_datagram_seg *dseg,
2002 struct ib_send_wr *wr, enum ib_qp_type qpt)
2003{
2004 union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
2005 struct mlx4_av sqp_av = {0};
2006 int port = *((u8 *) &av->ib.port_pd) & 0x3;
2007
2008 /* force loopback */
2009 sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
2010 sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
2011 sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
2012 cpu_to_be32(0xf0000000);
2013
2014 memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
2015 dseg->dqpn = cpu_to_be32(dev->dev->caps.base_tunnel_sqpn +
2016 qpt * 2 + port - 1);
2017 /* use well-known qkey from the QPC */
2018 dseg->qkey = cpu_to_be32(0x80000000);
2019}
2020
2021static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
2022{
2023 struct mlx4_wqe_inline_seg *inl = wqe;
2024 struct mlx4_ib_tunnel_header hdr;
2025 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
2026 int spc;
2027 int i;
2028
2029 memcpy(&hdr.av, &ah->av, sizeof hdr.av);
2030 hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2031 hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
2032 hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2033
2034 spc = MLX4_INLINE_ALIGN -
2035 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
2036 if (sizeof (hdr) <= spc) {
2037 memcpy(inl + 1, &hdr, sizeof (hdr));
2038 wmb();
2039 inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
2040 i = 1;
2041 } else {
2042 memcpy(inl + 1, &hdr, spc);
2043 wmb();
2044 inl->byte_count = cpu_to_be32(1 << 31 | spc);
2045
2046 inl = (void *) (inl + 1) + spc;
2047 memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
2048 wmb();
2049 inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
2050 i = 2;
2051 }
2052
2053 *mlx_seg_len =
2054 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
2055}
2056
1670static void set_mlx_icrc_seg(void *dseg) 2057static void set_mlx_icrc_seg(void *dseg)
1671{ 2058{
1672 u32 *t = dseg; 2059 u32 *t = dseg;
@@ -1748,6 +2135,13 @@ static __be32 send_ieth(struct ib_send_wr *wr)
1748 } 2135 }
1749} 2136}
1750 2137
2138static void add_zero_len_inline(void *wqe)
2139{
2140 struct mlx4_wqe_inline_seg *inl = wqe;
2141 memset(wqe, 0, 16);
2142 inl->byte_count = cpu_to_be32(1 << 31);
2143}
2144
1751int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 2145int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1752 struct ib_send_wr **bad_wr) 2146 struct ib_send_wr **bad_wr)
1753{ 2147{
@@ -1806,9 +2200,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1806 wqe += sizeof *ctrl; 2200 wqe += sizeof *ctrl;
1807 size = sizeof *ctrl / 16; 2201 size = sizeof *ctrl / 16;
1808 2202
1809 switch (ibqp->qp_type) { 2203 switch (qp->mlx4_ib_qp_type) {
1810 case IB_QPT_RC: 2204 case MLX4_IB_QPT_RC:
1811 case IB_QPT_UC: 2205 case MLX4_IB_QPT_UC:
1812 switch (wr->opcode) { 2206 switch (wr->opcode) {
1813 case IB_WR_ATOMIC_CMP_AND_SWP: 2207 case IB_WR_ATOMIC_CMP_AND_SWP:
1814 case IB_WR_ATOMIC_FETCH_AND_ADD: 2208 case IB_WR_ATOMIC_FETCH_AND_ADD:
@@ -1869,7 +2263,25 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1869 } 2263 }
1870 break; 2264 break;
1871 2265
1872 case IB_QPT_UD: 2266 case MLX4_IB_QPT_TUN_SMI_OWNER:
2267 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2268 if (unlikely(err)) {
2269 *bad_wr = wr;
2270 goto out;
2271 }
2272 wqe += seglen;
2273 size += seglen / 16;
2274 break;
2275 case MLX4_IB_QPT_TUN_SMI:
2276 case MLX4_IB_QPT_TUN_GSI:
2277 /* this is a UD qp used in MAD responses to slaves. */
2278 set_datagram_seg(wqe, wr);
2279 /* set the forced-loopback bit in the data seg av */
2280 *(__be32 *) wqe |= cpu_to_be32(0x80000000);
2281 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2282 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2283 break;
2284 case MLX4_IB_QPT_UD:
1873 set_datagram_seg(wqe, wr); 2285 set_datagram_seg(wqe, wr);
1874 wqe += sizeof (struct mlx4_wqe_datagram_seg); 2286 wqe += sizeof (struct mlx4_wqe_datagram_seg);
1875 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 2287 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
@@ -1886,8 +2298,47 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1886 } 2298 }
1887 break; 2299 break;
1888 2300
1889 case IB_QPT_SMI: 2301 case MLX4_IB_QPT_PROXY_SMI_OWNER:
1890 case IB_QPT_GSI: 2302 if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
2303 err = -ENOSYS;
2304 *bad_wr = wr;
2305 goto out;
2306 }
2307 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2308 if (unlikely(err)) {
2309 *bad_wr = wr;
2310 goto out;
2311 }
2312 wqe += seglen;
2313 size += seglen / 16;
2314 /* to start tunnel header on a cache-line boundary */
2315 add_zero_len_inline(wqe);
2316 wqe += 16;
2317 size++;
2318 build_tunnel_header(wr, wqe, &seglen);
2319 wqe += seglen;
2320 size += seglen / 16;
2321 break;
2322 case MLX4_IB_QPT_PROXY_SMI:
2323 /* don't allow QP0 sends on guests */
2324 err = -ENOSYS;
2325 *bad_wr = wr;
2326 goto out;
2327 case MLX4_IB_QPT_PROXY_GSI:
2328 /* If we are tunneling special qps, this is a UD qp.
2329 * In this case we first add a UD segment targeting
2330 * the tunnel qp, and then add a header with address
2331 * information */
2332 set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
2333 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2334 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2335 build_tunnel_header(wr, wqe, &seglen);
2336 wqe += seglen;
2337 size += seglen / 16;
2338 break;
2339
2340 case MLX4_IB_QPT_SMI:
2341 case MLX4_IB_QPT_GSI:
1891 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); 2342 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
1892 if (unlikely(err)) { 2343 if (unlikely(err)) {
1893 *bad_wr = wr; 2344 *bad_wr = wr;
@@ -1913,8 +2364,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1913 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); 2364 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
1914 2365
1915 /* Add one more inline data segment for ICRC for MLX sends */ 2366 /* Add one more inline data segment for ICRC for MLX sends */
1916 if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || 2367 if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
1917 qp->ibqp.qp_type == IB_QPT_GSI)) { 2368 qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
2369 qp->mlx4_ib_qp_type &
2370 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
1918 set_mlx_icrc_seg(dseg + 1); 2371 set_mlx_icrc_seg(dseg + 1);
1919 size += sizeof (struct mlx4_wqe_data_seg) / 16; 2372 size += sizeof (struct mlx4_wqe_data_seg) / 16;
1920 } 2373 }
@@ -2006,8 +2459,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2006 int err = 0; 2459 int err = 0;
2007 int nreq; 2460 int nreq;
2008 int ind; 2461 int ind;
2462 int max_gs;
2009 int i; 2463 int i;
2010 2464
2465 max_gs = qp->rq.max_gs;
2011 spin_lock_irqsave(&qp->rq.lock, flags); 2466 spin_lock_irqsave(&qp->rq.lock, flags);
2012 2467
2013 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 2468 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
@@ -2027,10 +2482,25 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2027 2482
2028 scat = get_recv_wqe(qp, ind); 2483 scat = get_recv_wqe(qp, ind);
2029 2484
2485 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
2486 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
2487 ib_dma_sync_single_for_device(ibqp->device,
2488 qp->sqp_proxy_rcv[ind].map,
2489 sizeof (struct mlx4_ib_proxy_sqp_hdr),
2490 DMA_FROM_DEVICE);
2491 scat->byte_count =
2492 cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
2493 /* use dma lkey from upper layer entry */
2494 scat->lkey = cpu_to_be32(wr->sg_list->lkey);
2495 scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
2496 scat++;
2497 max_gs--;
2498 }
2499
2030 for (i = 0; i < wr->num_sge; ++i) 2500 for (i = 0; i < wr->num_sge; ++i)
2031 __set_data_seg(scat + i, wr->sg_list + i); 2501 __set_data_seg(scat + i, wr->sg_list + i);
2032 2502
2033 if (i < qp->rq.max_gs) { 2503 if (i < max_gs) {
2034 scat[i].byte_count = 0; 2504 scat[i].byte_count = 0;
2035 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); 2505 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
2036 scat[i].addr = 0; 2506 scat[i].addr = 0;