aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2008-07-15 02:48:45 -0400
committerRoland Dreier <rolandd@cisco.com>2008-07-15 02:48:45 -0400
commit00f7ec36c9324928e4cd23f02e6d8550f30c32ca (patch)
treedd9bea0d7589d49b4eb7e264e5f318045fcee1fb /drivers/infiniband
parentf89271da32bc1a636cf4eb078e615930886cd013 (diff)
RDMA/core: Add memory management extensions support
This patch adds support for the IB "base memory management extension" (BMME) and the equivalent iWARP operations (which the iWARP verbs mandates all devices must implement). The new operations are: - Allocate an ib_mr for use in fast register work requests. - Allocate/free a physical buffer lists for use in fast register work requests. This allows device drivers to allocate this memory as needed for use in posting send requests (eg via dma_alloc_coherent). - New send queue work requests: * send with remote invalidate * fast register memory region * local invalidate memory region * RDMA read with invalidate local memory region (iWARP only) Consumer interface details: - A new device capability flag IB_DEVICE_MEM_MGT_EXTENSIONS is added to indicate device support for these features. - New send work request opcodes IB_WR_FAST_REG_MR, IB_WR_LOCAL_INV, IB_WR_RDMA_READ_WITH_INV are added. - A new consumer API function, ib_alloc_mr() is added to allocate fast register memory regions. - New consumer API functions, ib_alloc_fast_reg_page_list() and ib_free_fast_reg_page_list() are added to allocate and free device-specific memory for fast registration page lists. - A new consumer API function, ib_update_fast_reg_key(), is added to allow the key portion of the R_Key and L_Key of a fast registration MR to be updated. Consumers call this if desired before posting a IB_WR_FAST_REG_MR work request. Consumers can use this as follows: - MR is allocated with ib_alloc_mr(). - Page list memory is allocated with ib_alloc_fast_reg_page_list(). - MR R_Key/L_Key "key" field is updated with ib_update_fast_reg_key(). - MR made VALID and bound to a specific page list via ib_post_send(IB_WR_FAST_REG_MR) - MR made INVALID via ib_post_send(IB_WR_LOCAL_INV), ib_post_send(IB_WR_RDMA_READ_WITH_INV) or an incoming send with invalidate operation. - MR is deallocated with ib_dereg_mr() - page lists dealloced via ib_free_fast_reg_page_list(). Applications can allocate a fast register MR once, and then can repeatedly bind the MR to different physical block lists (PBLs) via posting work requests to a send queue (SQ). For each outstanding MR-to-PBL binding in the SQ pipe, a fast_reg_page_list needs to be allocated (the fast_reg_page_list is owned by the low-level driver from the consumer posting a work request until the request completes). Thus pipelining can be achieved while still allowing device-specific page_list processing. The 32-bit fast register memory key/STag is composed of a 24-bit index and an 8-bit key. The application can change the key each time it fast registers thus allowing more control over the peer's use of the key/STag (ie it can effectively be changed each time the rkey is rebound to a page list). Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/core/verbs.c46
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c8
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c4
10 files changed, 69 insertions, 23 deletions
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 112b37cd689..56feab6c251 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -917,7 +917,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
917 resp->wc[i].opcode = wc[i].opcode; 917 resp->wc[i].opcode = wc[i].opcode;
918 resp->wc[i].vendor_err = wc[i].vendor_err; 918 resp->wc[i].vendor_err = wc[i].vendor_err;
919 resp->wc[i].byte_len = wc[i].byte_len; 919 resp->wc[i].byte_len = wc[i].byte_len;
920 resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; 920 resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data;
921 resp->wc[i].qp_num = wc[i].qp->qp_num; 921 resp->wc[i].qp_num = wc[i].qp->qp_num;
922 resp->wc[i].src_qp = wc[i].src_qp; 922 resp->wc[i].src_qp = wc[i].src_qp;
923 resp->wc[i].wc_flags = wc[i].wc_flags; 923 resp->wc[i].wc_flags = wc[i].wc_flags;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 9f399d3a42b..e0fbe597586 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -753,6 +753,52 @@ int ib_dereg_mr(struct ib_mr *mr)
753} 753}
754EXPORT_SYMBOL(ib_dereg_mr); 754EXPORT_SYMBOL(ib_dereg_mr);
755 755
756struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
757{
758 struct ib_mr *mr;
759
760 if (!pd->device->alloc_fast_reg_mr)
761 return ERR_PTR(-ENOSYS);
762
763 mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
764
765 if (!IS_ERR(mr)) {
766 mr->device = pd->device;
767 mr->pd = pd;
768 mr->uobject = NULL;
769 atomic_inc(&pd->usecnt);
770 atomic_set(&mr->usecnt, 0);
771 }
772
773 return mr;
774}
775EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
776
777struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
778 int max_page_list_len)
779{
780 struct ib_fast_reg_page_list *page_list;
781
782 if (!device->alloc_fast_reg_page_list)
783 return ERR_PTR(-ENOSYS);
784
785 page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
786
787 if (!IS_ERR(page_list)) {
788 page_list->device = device;
789 page_list->max_page_list_len = max_page_list_len;
790 }
791
792 return page_list;
793}
794EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
795
796void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
797{
798 page_list->device->free_fast_reg_page_list(page_list);
799}
800EXPORT_SYMBOL(ib_free_fast_reg_page_list);
801
756/* Memory windows */ 802/* Memory windows */
757 803
758struct ib_mw *ib_alloc_mw(struct ib_pd *pd) 804struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index f093b0033da..b799b271021 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -681,7 +681,7 @@ poll_cq_one_read_cqe:
681 wc->dlid_path_bits = cqe->dlid; 681 wc->dlid_path_bits = cqe->dlid;
682 wc->src_qp = cqe->remote_qp_number; 682 wc->src_qp = cqe->remote_qp_number;
683 wc->wc_flags = cqe->w_completion_flags; 683 wc->wc_flags = cqe->w_completion_flags;
684 wc->imm_data = cpu_to_be32(cqe->immediate_data); 684 wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
685 wc->sl = cqe->service_level; 685 wc->sl = cqe->service_level;
686 686
687poll_cq_one_exit0: 687poll_cq_one_exit0:
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index a03bd28d9b4..d385e4168c9 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -82,7 +82,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
82 wc->uqueue[head].opcode = entry->opcode; 82 wc->uqueue[head].opcode = entry->opcode;
83 wc->uqueue[head].vendor_err = entry->vendor_err; 83 wc->uqueue[head].vendor_err = entry->vendor_err;
84 wc->uqueue[head].byte_len = entry->byte_len; 84 wc->uqueue[head].byte_len = entry->byte_len;
85 wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data; 85 wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
86 wc->uqueue[head].qp_num = entry->qp->qp_num; 86 wc->uqueue[head].qp_num = entry->qp->qp_num;
87 wc->uqueue[head].src_qp = entry->src_qp; 87 wc->uqueue[head].src_qp = entry->src_qp;
88 wc->uqueue[head].wc_flags = entry->wc_flags; 88 wc->uqueue[head].wc_flags = entry->wc_flags;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 108df667d2e..97710522624 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1703,11 +1703,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1703 case OP(SEND_LAST_WITH_IMMEDIATE): 1703 case OP(SEND_LAST_WITH_IMMEDIATE):
1704 send_last_imm: 1704 send_last_imm:
1705 if (header_in_data) { 1705 if (header_in_data) {
1706 wc.imm_data = *(__be32 *) data; 1706 wc.ex.imm_data = *(__be32 *) data;
1707 data += sizeof(__be32); 1707 data += sizeof(__be32);
1708 } else { 1708 } else {
1709 /* Immediate data comes after BTH */ 1709 /* Immediate data comes after BTH */
1710 wc.imm_data = ohdr->u.imm_data; 1710 wc.ex.imm_data = ohdr->u.imm_data;
1711 } 1711 }
1712 hdrsize += 4; 1712 hdrsize += 4;
1713 wc.wc_flags = IB_WC_WITH_IMM; 1713 wc.wc_flags = IB_WC_WITH_IMM;
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index a4b5521567f..af051f75766 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -331,7 +331,7 @@ again:
331 switch (wqe->wr.opcode) { 331 switch (wqe->wr.opcode) {
332 case IB_WR_SEND_WITH_IMM: 332 case IB_WR_SEND_WITH_IMM:
333 wc.wc_flags = IB_WC_WITH_IMM; 333 wc.wc_flags = IB_WC_WITH_IMM;
334 wc.imm_data = wqe->wr.ex.imm_data; 334 wc.ex.imm_data = wqe->wr.ex.imm_data;
335 /* FALLTHROUGH */ 335 /* FALLTHROUGH */
336 case IB_WR_SEND: 336 case IB_WR_SEND:
337 if (!ipath_get_rwqe(qp, 0)) 337 if (!ipath_get_rwqe(qp, 0))
@@ -342,7 +342,7 @@ again:
342 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 342 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
343 goto inv_err; 343 goto inv_err;
344 wc.wc_flags = IB_WC_WITH_IMM; 344 wc.wc_flags = IB_WC_WITH_IMM;
345 wc.imm_data = wqe->wr.ex.imm_data; 345 wc.ex.imm_data = wqe->wr.ex.imm_data;
346 if (!ipath_get_rwqe(qp, 1)) 346 if (!ipath_get_rwqe(qp, 1))
347 goto rnr_nak; 347 goto rnr_nak;
348 /* FALLTHROUGH */ 348 /* FALLTHROUGH */
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 0596ec16fcb..82cc588b8bf 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -379,11 +379,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
379 case OP(SEND_LAST_WITH_IMMEDIATE): 379 case OP(SEND_LAST_WITH_IMMEDIATE):
380 send_last_imm: 380 send_last_imm:
381 if (header_in_data) { 381 if (header_in_data) {
382 wc.imm_data = *(__be32 *) data; 382 wc.ex.imm_data = *(__be32 *) data;
383 data += sizeof(__be32); 383 data += sizeof(__be32);
384 } else { 384 } else {
385 /* Immediate data comes after BTH */ 385 /* Immediate data comes after BTH */
386 wc.imm_data = ohdr->u.imm_data; 386 wc.ex.imm_data = ohdr->u.imm_data;
387 } 387 }
388 hdrsize += 4; 388 hdrsize += 4;
389 wc.wc_flags = IB_WC_WITH_IMM; 389 wc.wc_flags = IB_WC_WITH_IMM;
@@ -483,11 +483,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
484 rdma_last_imm: 484 rdma_last_imm:
485 if (header_in_data) { 485 if (header_in_data) {
486 wc.imm_data = *(__be32 *) data; 486 wc.ex.imm_data = *(__be32 *) data;
487 data += sizeof(__be32); 487 data += sizeof(__be32);
488 } else { 488 } else {
489 /* Immediate data comes after BTH */ 489 /* Immediate data comes after BTH */
490 wc.imm_data = ohdr->u.imm_data; 490 wc.ex.imm_data = ohdr->u.imm_data;
491 } 491 }
492 hdrsize += 4; 492 hdrsize += 4;
493 wc.wc_flags = IB_WC_WITH_IMM; 493 wc.wc_flags = IB_WC_WITH_IMM;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 77ca8ca74e7..36aa242c487 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -96,7 +96,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
96 96
97 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 97 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
98 wc.wc_flags = IB_WC_WITH_IMM; 98 wc.wc_flags = IB_WC_WITH_IMM;
99 wc.imm_data = swqe->wr.ex.imm_data; 99 wc.ex.imm_data = swqe->wr.ex.imm_data;
100 } 100 }
101 101
102 /* 102 /*
@@ -492,14 +492,14 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
492 if (qp->ibqp.qp_num > 1 && 492 if (qp->ibqp.qp_num > 1 &&
493 opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { 493 opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
494 if (header_in_data) { 494 if (header_in_data) {
495 wc.imm_data = *(__be32 *) data; 495 wc.ex.imm_data = *(__be32 *) data;
496 data += sizeof(__be32); 496 data += sizeof(__be32);
497 } else 497 } else
498 wc.imm_data = ohdr->u.ud.imm_data; 498 wc.ex.imm_data = ohdr->u.ud.imm_data;
499 wc.wc_flags = IB_WC_WITH_IMM; 499 wc.wc_flags = IB_WC_WITH_IMM;
500 hdrsize += sizeof(u32); 500 hdrsize += sizeof(u32);
501 } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { 501 } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
502 wc.imm_data = 0; 502 wc.ex.imm_data = 0;
503 wc.wc_flags = 0; 503 wc.wc_flags = 0;
504 } else { 504 } else {
505 dev->n_pkt_drops++; 505 dev->n_pkt_drops++;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 4521319b140..299f20832ab 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -663,18 +663,18 @@ repoll:
663 663
664 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 664 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
665 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: 665 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
666 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 666 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
667 wc->wc_flags = IB_WC_WITH_IMM; 667 wc->wc_flags = IB_WC_WITH_IMM;
668 wc->imm_data = cqe->immed_rss_invalid; 668 wc->ex.imm_data = cqe->immed_rss_invalid;
669 break; 669 break;
670 case MLX4_RECV_OPCODE_SEND: 670 case MLX4_RECV_OPCODE_SEND:
671 wc->opcode = IB_WC_RECV; 671 wc->opcode = IB_WC_RECV;
672 wc->wc_flags = 0; 672 wc->wc_flags = 0;
673 break; 673 break;
674 case MLX4_RECV_OPCODE_SEND_IMM: 674 case MLX4_RECV_OPCODE_SEND_IMM:
675 wc->opcode = IB_WC_RECV; 675 wc->opcode = IB_WC_RECV;
676 wc->wc_flags = IB_WC_WITH_IMM; 676 wc->wc_flags = IB_WC_WITH_IMM;
677 wc->imm_data = cqe->immed_rss_invalid; 677 wc->ex.imm_data = cqe->immed_rss_invalid;
678 break; 678 break;
679 } 679 }
680 680
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index f788fce71ac..d9f4735c2b3 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -620,13 +620,13 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
620 case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: 620 case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
621 case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: 621 case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
622 entry->wc_flags = IB_WC_WITH_IMM; 622 entry->wc_flags = IB_WC_WITH_IMM;
623 entry->imm_data = cqe->imm_etype_pkey_eec; 623 entry->ex.imm_data = cqe->imm_etype_pkey_eec;
624 entry->opcode = IB_WC_RECV; 624 entry->opcode = IB_WC_RECV;
625 break; 625 break;
626 case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: 626 case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
627 case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: 627 case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
628 entry->wc_flags = IB_WC_WITH_IMM; 628 entry->wc_flags = IB_WC_WITH_IMM;
629 entry->imm_data = cqe->imm_etype_pkey_eec; 629 entry->ex.imm_data = cqe->imm_etype_pkey_eec;
630 entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; 630 entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
631 break; 631 break;
632 default: 632 default: