aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2008-07-15 02:48:45 -0400
committerRoland Dreier <rolandd@cisco.com>2008-07-15 02:48:45 -0400
commit00f7ec36c9324928e4cd23f02e6d8550f30c32ca (patch)
treedd9bea0d7589d49b4eb7e264e5f318045fcee1fb
parentf89271da32bc1a636cf4eb078e615930886cd013 (diff)
RDMA/core: Add memory management extensions support
This patch adds support for the IB "base memory management extension" (BMME) and the equivalent iWARP operations (which the iWARP verbs mandates all devices must implement). The new operations are: - Allocate an ib_mr for use in fast register work requests. - Allocate/free a physical buffer lists for use in fast register work requests. This allows device drivers to allocate this memory as needed for use in posting send requests (eg via dma_alloc_coherent). - New send queue work requests: * send with remote invalidate * fast register memory region * local invalidate memory region * RDMA read with invalidate local memory region (iWARP only) Consumer interface details: - A new device capability flag IB_DEVICE_MEM_MGT_EXTENSIONS is added to indicate device support for these features. - New send work request opcodes IB_WR_FAST_REG_MR, IB_WR_LOCAL_INV, IB_WR_RDMA_READ_WITH_INV are added. - A new consumer API function, ib_alloc_mr() is added to allocate fast register memory regions. - New consumer API functions, ib_alloc_fast_reg_page_list() and ib_free_fast_reg_page_list() are added to allocate and free device-specific memory for fast registration page lists. - A new consumer API function, ib_update_fast_reg_key(), is added to allow the key portion of the R_Key and L_Key of a fast registration MR to be updated. Consumers call this if desired before posting a IB_WR_FAST_REG_MR work request. Consumers can use this as follows: - MR is allocated with ib_alloc_mr(). - Page list memory is allocated with ib_alloc_fast_reg_page_list(). - MR R_Key/L_Key "key" field is updated with ib_update_fast_reg_key(). - MR made VALID and bound to a specific page list via ib_post_send(IB_WR_FAST_REG_MR) - MR made INVALID via ib_post_send(IB_WR_LOCAL_INV), ib_post_send(IB_WR_RDMA_READ_WITH_INV) or an incoming send with invalidate operation. - MR is deallocated with ib_dereg_mr() - page lists dealloced via ib_free_fast_reg_page_list(). Applications can allocate a fast register MR once, and then can repeatedly bind the MR to different physical block lists (PBLs) via posting work requests to a send queue (SQ). For each outstanding MR-to-PBL binding in the SQ pipe, a fast_reg_page_list needs to be allocated (the fast_reg_page_list is owned by the low-level driver from the consumer posting a work request until the request completes). Thus pipelining can be achieved while still allowing device-specific page_list processing. The 32-bit fast register memory key/STag is composed of a 24-bit index and an 8-bit key. The application can change the key each time it fast registers thus allowing more control over the peer's use of the key/STag (ie it can effectively be changed each time the rkey is rebound to a page list). Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2
-rw-r--r--drivers/infiniband/core/verbs.c46
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_cq.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c8
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c4
-rw-r--r--include/rdma/ib_user_verbs.h5
-rw-r--r--include/rdma/ib_verbs.h83
12 files changed, 154 insertions, 26 deletions
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 112b37cd6895..56feab6c251e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -917,7 +917,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
917 resp->wc[i].opcode = wc[i].opcode; 917 resp->wc[i].opcode = wc[i].opcode;
918 resp->wc[i].vendor_err = wc[i].vendor_err; 918 resp->wc[i].vendor_err = wc[i].vendor_err;
919 resp->wc[i].byte_len = wc[i].byte_len; 919 resp->wc[i].byte_len = wc[i].byte_len;
920 resp->wc[i].imm_data = (__u32 __force) wc[i].imm_data; 920 resp->wc[i].ex.imm_data = (__u32 __force) wc[i].ex.imm_data;
921 resp->wc[i].qp_num = wc[i].qp->qp_num; 921 resp->wc[i].qp_num = wc[i].qp->qp_num;
922 resp->wc[i].src_qp = wc[i].src_qp; 922 resp->wc[i].src_qp = wc[i].src_qp;
923 resp->wc[i].wc_flags = wc[i].wc_flags; 923 resp->wc[i].wc_flags = wc[i].wc_flags;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 9f399d3a42b4..e0fbe5975866 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -753,6 +753,52 @@ int ib_dereg_mr(struct ib_mr *mr)
753} 753}
754EXPORT_SYMBOL(ib_dereg_mr); 754EXPORT_SYMBOL(ib_dereg_mr);
755 755
756struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
757{
758 struct ib_mr *mr;
759
760 if (!pd->device->alloc_fast_reg_mr)
761 return ERR_PTR(-ENOSYS);
762
763 mr = pd->device->alloc_fast_reg_mr(pd, max_page_list_len);
764
765 if (!IS_ERR(mr)) {
766 mr->device = pd->device;
767 mr->pd = pd;
768 mr->uobject = NULL;
769 atomic_inc(&pd->usecnt);
770 atomic_set(&mr->usecnt, 0);
771 }
772
773 return mr;
774}
775EXPORT_SYMBOL(ib_alloc_fast_reg_mr);
776
777struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
778 int max_page_list_len)
779{
780 struct ib_fast_reg_page_list *page_list;
781
782 if (!device->alloc_fast_reg_page_list)
783 return ERR_PTR(-ENOSYS);
784
785 page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
786
787 if (!IS_ERR(page_list)) {
788 page_list->device = device;
789 page_list->max_page_list_len = max_page_list_len;
790 }
791
792 return page_list;
793}
794EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
795
796void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
797{
798 page_list->device->free_fast_reg_page_list(page_list);
799}
800EXPORT_SYMBOL(ib_free_fast_reg_page_list);
801
756/* Memory windows */ 802/* Memory windows */
757 803
758struct ib_mw *ib_alloc_mw(struct ib_pd *pd) 804struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index f093b0033daf..b799b2710210 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -681,7 +681,7 @@ poll_cq_one_read_cqe:
681 wc->dlid_path_bits = cqe->dlid; 681 wc->dlid_path_bits = cqe->dlid;
682 wc->src_qp = cqe->remote_qp_number; 682 wc->src_qp = cqe->remote_qp_number;
683 wc->wc_flags = cqe->w_completion_flags; 683 wc->wc_flags = cqe->w_completion_flags;
684 wc->imm_data = cpu_to_be32(cqe->immediate_data); 684 wc->ex.imm_data = cpu_to_be32(cqe->immediate_data);
685 wc->sl = cqe->service_level; 685 wc->sl = cqe->service_level;
686 686
687poll_cq_one_exit0: 687poll_cq_one_exit0:
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index a03bd28d9b48..d385e4168c97 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -82,7 +82,7 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
82 wc->uqueue[head].opcode = entry->opcode; 82 wc->uqueue[head].opcode = entry->opcode;
83 wc->uqueue[head].vendor_err = entry->vendor_err; 83 wc->uqueue[head].vendor_err = entry->vendor_err;
84 wc->uqueue[head].byte_len = entry->byte_len; 84 wc->uqueue[head].byte_len = entry->byte_len;
85 wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data; 85 wc->uqueue[head].ex.imm_data = (__u32 __force) entry->ex.imm_data;
86 wc->uqueue[head].qp_num = entry->qp->qp_num; 86 wc->uqueue[head].qp_num = entry->qp->qp_num;
87 wc->uqueue[head].src_qp = entry->src_qp; 87 wc->uqueue[head].src_qp = entry->src_qp;
88 wc->uqueue[head].wc_flags = entry->wc_flags; 88 wc->uqueue[head].wc_flags = entry->wc_flags;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 108df667d2ee..97710522624d 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -1703,11 +1703,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1703 case OP(SEND_LAST_WITH_IMMEDIATE): 1703 case OP(SEND_LAST_WITH_IMMEDIATE):
1704 send_last_imm: 1704 send_last_imm:
1705 if (header_in_data) { 1705 if (header_in_data) {
1706 wc.imm_data = *(__be32 *) data; 1706 wc.ex.imm_data = *(__be32 *) data;
1707 data += sizeof(__be32); 1707 data += sizeof(__be32);
1708 } else { 1708 } else {
1709 /* Immediate data comes after BTH */ 1709 /* Immediate data comes after BTH */
1710 wc.imm_data = ohdr->u.imm_data; 1710 wc.ex.imm_data = ohdr->u.imm_data;
1711 } 1711 }
1712 hdrsize += 4; 1712 hdrsize += 4;
1713 wc.wc_flags = IB_WC_WITH_IMM; 1713 wc.wc_flags = IB_WC_WITH_IMM;
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index a4b5521567fe..af051f757663 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -331,7 +331,7 @@ again:
331 switch (wqe->wr.opcode) { 331 switch (wqe->wr.opcode) {
332 case IB_WR_SEND_WITH_IMM: 332 case IB_WR_SEND_WITH_IMM:
333 wc.wc_flags = IB_WC_WITH_IMM; 333 wc.wc_flags = IB_WC_WITH_IMM;
334 wc.imm_data = wqe->wr.ex.imm_data; 334 wc.ex.imm_data = wqe->wr.ex.imm_data;
335 /* FALLTHROUGH */ 335 /* FALLTHROUGH */
336 case IB_WR_SEND: 336 case IB_WR_SEND:
337 if (!ipath_get_rwqe(qp, 0)) 337 if (!ipath_get_rwqe(qp, 0))
@@ -342,7 +342,7 @@ again:
342 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 342 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
343 goto inv_err; 343 goto inv_err;
344 wc.wc_flags = IB_WC_WITH_IMM; 344 wc.wc_flags = IB_WC_WITH_IMM;
345 wc.imm_data = wqe->wr.ex.imm_data; 345 wc.ex.imm_data = wqe->wr.ex.imm_data;
346 if (!ipath_get_rwqe(qp, 1)) 346 if (!ipath_get_rwqe(qp, 1))
347 goto rnr_nak; 347 goto rnr_nak;
348 /* FALLTHROUGH */ 348 /* FALLTHROUGH */
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 0596ec16fcbd..82cc588b8bf2 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -379,11 +379,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
379 case OP(SEND_LAST_WITH_IMMEDIATE): 379 case OP(SEND_LAST_WITH_IMMEDIATE):
380 send_last_imm: 380 send_last_imm:
381 if (header_in_data) { 381 if (header_in_data) {
382 wc.imm_data = *(__be32 *) data; 382 wc.ex.imm_data = *(__be32 *) data;
383 data += sizeof(__be32); 383 data += sizeof(__be32);
384 } else { 384 } else {
385 /* Immediate data comes after BTH */ 385 /* Immediate data comes after BTH */
386 wc.imm_data = ohdr->u.imm_data; 386 wc.ex.imm_data = ohdr->u.imm_data;
387 } 387 }
388 hdrsize += 4; 388 hdrsize += 4;
389 wc.wc_flags = IB_WC_WITH_IMM; 389 wc.wc_flags = IB_WC_WITH_IMM;
@@ -483,11 +483,11 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
484 rdma_last_imm: 484 rdma_last_imm:
485 if (header_in_data) { 485 if (header_in_data) {
486 wc.imm_data = *(__be32 *) data; 486 wc.ex.imm_data = *(__be32 *) data;
487 data += sizeof(__be32); 487 data += sizeof(__be32);
488 } else { 488 } else {
489 /* Immediate data comes after BTH */ 489 /* Immediate data comes after BTH */
490 wc.imm_data = ohdr->u.imm_data; 490 wc.ex.imm_data = ohdr->u.imm_data;
491 } 491 }
492 hdrsize += 4; 492 hdrsize += 4;
493 wc.wc_flags = IB_WC_WITH_IMM; 493 wc.wc_flags = IB_WC_WITH_IMM;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 77ca8ca74e78..36aa242c487c 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -96,7 +96,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
96 96
97 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 97 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
98 wc.wc_flags = IB_WC_WITH_IMM; 98 wc.wc_flags = IB_WC_WITH_IMM;
99 wc.imm_data = swqe->wr.ex.imm_data; 99 wc.ex.imm_data = swqe->wr.ex.imm_data;
100 } 100 }
101 101
102 /* 102 /*
@@ -492,14 +492,14 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
492 if (qp->ibqp.qp_num > 1 && 492 if (qp->ibqp.qp_num > 1 &&
493 opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { 493 opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
494 if (header_in_data) { 494 if (header_in_data) {
495 wc.imm_data = *(__be32 *) data; 495 wc.ex.imm_data = *(__be32 *) data;
496 data += sizeof(__be32); 496 data += sizeof(__be32);
497 } else 497 } else
498 wc.imm_data = ohdr->u.ud.imm_data; 498 wc.ex.imm_data = ohdr->u.ud.imm_data;
499 wc.wc_flags = IB_WC_WITH_IMM; 499 wc.wc_flags = IB_WC_WITH_IMM;
500 hdrsize += sizeof(u32); 500 hdrsize += sizeof(u32);
501 } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { 501 } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
502 wc.imm_data = 0; 502 wc.ex.imm_data = 0;
503 wc.wc_flags = 0; 503 wc.wc_flags = 0;
504 } else { 504 } else {
505 dev->n_pkt_drops++; 505 dev->n_pkt_drops++;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 4521319b1406..299f20832ab6 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -663,18 +663,18 @@ repoll:
663 663
664 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 664 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
665 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: 665 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
666 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 666 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
667 wc->wc_flags = IB_WC_WITH_IMM; 667 wc->wc_flags = IB_WC_WITH_IMM;
668 wc->imm_data = cqe->immed_rss_invalid; 668 wc->ex.imm_data = cqe->immed_rss_invalid;
669 break; 669 break;
670 case MLX4_RECV_OPCODE_SEND: 670 case MLX4_RECV_OPCODE_SEND:
671 wc->opcode = IB_WC_RECV; 671 wc->opcode = IB_WC_RECV;
672 wc->wc_flags = 0; 672 wc->wc_flags = 0;
673 break; 673 break;
674 case MLX4_RECV_OPCODE_SEND_IMM: 674 case MLX4_RECV_OPCODE_SEND_IMM:
675 wc->opcode = IB_WC_RECV; 675 wc->opcode = IB_WC_RECV;
676 wc->wc_flags = IB_WC_WITH_IMM; 676 wc->wc_flags = IB_WC_WITH_IMM;
677 wc->imm_data = cqe->immed_rss_invalid; 677 wc->ex.imm_data = cqe->immed_rss_invalid;
678 break; 678 break;
679 } 679 }
680 680
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index f788fce71ac7..d9f4735c2b37 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -620,13 +620,13 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
620 case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: 620 case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE:
621 case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: 621 case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE:
622 entry->wc_flags = IB_WC_WITH_IMM; 622 entry->wc_flags = IB_WC_WITH_IMM;
623 entry->imm_data = cqe->imm_etype_pkey_eec; 623 entry->ex.imm_data = cqe->imm_etype_pkey_eec;
624 entry->opcode = IB_WC_RECV; 624 entry->opcode = IB_WC_RECV;
625 break; 625 break;
626 case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: 626 case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
627 case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: 627 case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
628 entry->wc_flags = IB_WC_WITH_IMM; 628 entry->wc_flags = IB_WC_WITH_IMM;
629 entry->imm_data = cqe->imm_etype_pkey_eec; 629 entry->ex.imm_data = cqe->imm_etype_pkey_eec;
630 entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; 630 entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
631 break; 631 break;
632 default: 632 default:
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index 885254f20bb3..a17f77106149 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -289,7 +289,10 @@ struct ib_uverbs_wc {
289 __u32 opcode; 289 __u32 opcode;
290 __u32 vendor_err; 290 __u32 vendor_err;
291 __u32 byte_len; 291 __u32 byte_len;
292 __u32 imm_data; 292 union {
293 __u32 imm_data;
294 __u32 invalidate_rkey;
295 } ex;
293 __u32 qp_num; 296 __u32 qp_num;
294 __u32 src_qp; 297 __u32 src_qp;
295 __u32 wc_flags; 298 __u32 wc_flags;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5f5621bf70bd..74c24b908908 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -103,6 +103,7 @@ enum ib_device_cap_flags {
103 */ 103 */
104 IB_DEVICE_UD_IP_CSUM = (1<<18), 104 IB_DEVICE_UD_IP_CSUM = (1<<18),
105 IB_DEVICE_UD_TSO = (1<<19), 105 IB_DEVICE_UD_TSO = (1<<19),
106 IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
106}; 107};
107 108
108enum ib_atomic_cap { 109enum ib_atomic_cap {
@@ -148,6 +149,7 @@ struct ib_device_attr {
148 int max_srq; 149 int max_srq;
149 int max_srq_wr; 150 int max_srq_wr;
150 int max_srq_sge; 151 int max_srq_sge;
152 unsigned int max_fast_reg_page_list_len;
151 u16 max_pkeys; 153 u16 max_pkeys;
152 u8 local_ca_ack_delay; 154 u8 local_ca_ack_delay;
153}; 155};
@@ -411,6 +413,8 @@ enum ib_wc_opcode {
411 IB_WC_FETCH_ADD, 413 IB_WC_FETCH_ADD,
412 IB_WC_BIND_MW, 414 IB_WC_BIND_MW,
413 IB_WC_LSO, 415 IB_WC_LSO,
416 IB_WC_LOCAL_INV,
417 IB_WC_FAST_REG_MR,
414/* 418/*
415 * Set value of IB_WC_RECV so consumers can test if a completion is a 419 * Set value of IB_WC_RECV so consumers can test if a completion is a
416 * receive by testing (opcode & IB_WC_RECV). 420 * receive by testing (opcode & IB_WC_RECV).
@@ -421,7 +425,8 @@ enum ib_wc_opcode {
421 425
422enum ib_wc_flags { 426enum ib_wc_flags {
423 IB_WC_GRH = 1, 427 IB_WC_GRH = 1,
424 IB_WC_WITH_IMM = (1<<1) 428 IB_WC_WITH_IMM = (1<<1),
429 IB_WC_WITH_INVALIDATE = (1<<2),
425}; 430};
426 431
427struct ib_wc { 432struct ib_wc {
@@ -431,7 +436,10 @@ struct ib_wc {
431 u32 vendor_err; 436 u32 vendor_err;
432 u32 byte_len; 437 u32 byte_len;
433 struct ib_qp *qp; 438 struct ib_qp *qp;
434 __be32 imm_data; 439 union {
440 __be32 imm_data;
441 u32 invalidate_rkey;
442 } ex;
435 u32 src_qp; 443 u32 src_qp;
436 int wc_flags; 444 int wc_flags;
437 u16 pkey_index; 445 u16 pkey_index;
@@ -625,6 +633,9 @@ enum ib_wr_opcode {
625 IB_WR_ATOMIC_FETCH_AND_ADD, 633 IB_WR_ATOMIC_FETCH_AND_ADD,
626 IB_WR_LSO, 634 IB_WR_LSO,
627 IB_WR_SEND_WITH_INV, 635 IB_WR_SEND_WITH_INV,
636 IB_WR_RDMA_READ_WITH_INV,
637 IB_WR_LOCAL_INV,
638 IB_WR_FAST_REG_MR,
628}; 639};
629 640
630enum ib_send_flags { 641enum ib_send_flags {
@@ -641,6 +652,12 @@ struct ib_sge {
641 u32 lkey; 652 u32 lkey;
642}; 653};
643 654
655struct ib_fast_reg_page_list {
656 struct ib_device *device;
657 u64 *page_list;
658 unsigned int max_page_list_len;
659};
660
644struct ib_send_wr { 661struct ib_send_wr {
645 struct ib_send_wr *next; 662 struct ib_send_wr *next;
646 u64 wr_id; 663 u64 wr_id;
@@ -673,6 +690,15 @@ struct ib_send_wr {
673 u16 pkey_index; /* valid for GSI only */ 690 u16 pkey_index; /* valid for GSI only */
674 u8 port_num; /* valid for DR SMPs on switch only */ 691 u8 port_num; /* valid for DR SMPs on switch only */
675 } ud; 692 } ud;
693 struct {
694 u64 iova_start;
695 struct ib_fast_reg_page_list *page_list;
696 unsigned int page_shift;
697 unsigned int page_list_len;
698 u32 length;
699 int access_flags;
700 u32 rkey;
701 } fast_reg;
676 } wr; 702 } wr;
677}; 703};
678 704
@@ -1011,6 +1037,11 @@ struct ib_device {
1011 int (*query_mr)(struct ib_mr *mr, 1037 int (*query_mr)(struct ib_mr *mr,
1012 struct ib_mr_attr *mr_attr); 1038 struct ib_mr_attr *mr_attr);
1013 int (*dereg_mr)(struct ib_mr *mr); 1039 int (*dereg_mr)(struct ib_mr *mr);
1040 struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
1041 int max_page_list_len);
1042 struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
1043 int page_list_len);
1044 void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
1014 int (*rereg_phys_mr)(struct ib_mr *mr, 1045 int (*rereg_phys_mr)(struct ib_mr *mr,
1015 int mr_rereg_mask, 1046 int mr_rereg_mask,
1016 struct ib_pd *pd, 1047 struct ib_pd *pd,
@@ -1805,6 +1836,54 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
1805int ib_dereg_mr(struct ib_mr *mr); 1836int ib_dereg_mr(struct ib_mr *mr);
1806 1837
1807/** 1838/**
1839 * ib_alloc_fast_reg_mr - Allocates memory region usable with the
1840 * IB_WR_FAST_REG_MR send work request.
1841 * @pd: The protection domain associated with the region.
1842 * @max_page_list_len: requested max physical buffer list length to be
1843 * used with fast register work requests for this MR.
1844 */
1845struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
1846
1847/**
1848 * ib_alloc_fast_reg_page_list - Allocates a page list array
1849 * @device - ib device pointer.
1850 * @page_list_len - size of the page list array to be allocated.
1851 *
1852 * This allocates and returns a struct ib_fast_reg_page_list * and a
1853 * page_list array that is at least page_list_len in size. The actual
1854 * size is returned in max_page_list_len. The caller is responsible
1855 * for initializing the contents of the page_list array before posting
1856 * a send work request with the IB_WC_FAST_REG_MR opcode.
1857 *
1858 * The page_list array entries must be translated using one of the
1859 * ib_dma_*() functions just like the addresses passed to
1860 * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct
1861 * ib_fast_reg_page_list must not be modified by the caller until the
1862 * IB_WC_FAST_REG_MR work request completes.
1863 */
1864struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
1865 struct ib_device *device, int page_list_len);
1866
1867/**
1868 * ib_free_fast_reg_page_list - Deallocates a previously allocated
1869 * page list array.
1870 * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
1871 */
1872void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
1873
1874/**
1875 * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
1876 * R_Key and L_Key.
1877 * @mr - struct ib_mr pointer to be updated.
1878 * @newkey - new key to be used.
1879 */
1880static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
1881{
1882 mr->lkey = (mr->lkey & 0xffffff00) | newkey;
1883 mr->rkey = (mr->rkey & 0xffffff00) | newkey;
1884}
1885
1886/**
1808 * ib_alloc_mw - Allocates a memory window. 1887 * ib_alloc_mw - Allocates a memory window.
1809 * @pd: The protection domain associated with the memory window. 1888 * @pd: The protection domain associated with the memory window.
1810 */ 1889 */