aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorSteve Wise <swise@opengridcomputing.com>2008-07-15 02:48:45 -0400
committerRoland Dreier <rolandd@cisco.com>2008-07-15 02:48:45 -0400
commit00f7ec36c9324928e4cd23f02e6d8550f30c32ca (patch)
treedd9bea0d7589d49b4eb7e264e5f318045fcee1fb /include
parentf89271da32bc1a636cf4eb078e615930886cd013 (diff)
RDMA/core: Add memory management extensions support
This patch adds support for the IB "base memory management extension" (BMME) and the equivalent iWARP operations (which the iWARP verbs mandates all devices must implement). The new operations are: - Allocate an ib_mr for use in fast register work requests. - Allocate/free a physical buffer lists for use in fast register work requests. This allows device drivers to allocate this memory as needed for use in posting send requests (eg via dma_alloc_coherent). - New send queue work requests: * send with remote invalidate * fast register memory region * local invalidate memory region * RDMA read with invalidate local memory region (iWARP only) Consumer interface details: - A new device capability flag IB_DEVICE_MEM_MGT_EXTENSIONS is added to indicate device support for these features. - New send work request opcodes IB_WR_FAST_REG_MR, IB_WR_LOCAL_INV, IB_WR_RDMA_READ_WITH_INV are added. - A new consumer API function, ib_alloc_mr() is added to allocate fast register memory regions. - New consumer API functions, ib_alloc_fast_reg_page_list() and ib_free_fast_reg_page_list() are added to allocate and free device-specific memory for fast registration page lists. - A new consumer API function, ib_update_fast_reg_key(), is added to allow the key portion of the R_Key and L_Key of a fast registration MR to be updated. Consumers call this if desired before posting a IB_WR_FAST_REG_MR work request. Consumers can use this as follows: - MR is allocated with ib_alloc_mr(). - Page list memory is allocated with ib_alloc_fast_reg_page_list(). - MR R_Key/L_Key "key" field is updated with ib_update_fast_reg_key(). - MR made VALID and bound to a specific page list via ib_post_send(IB_WR_FAST_REG_MR) - MR made INVALID via ib_post_send(IB_WR_LOCAL_INV), ib_post_send(IB_WR_RDMA_READ_WITH_INV) or an incoming send with invalidate operation. - MR is deallocated with ib_dereg_mr() - page lists dealloced via ib_free_fast_reg_page_list(). Applications can allocate a fast register MR once, and then can repeatedly bind the MR to different physical block lists (PBLs) via posting work requests to a send queue (SQ). For each outstanding MR-to-PBL binding in the SQ pipe, a fast_reg_page_list needs to be allocated (the fast_reg_page_list is owned by the low-level driver from the consumer posting a work request until the request completes). Thus pipelining can be achieved while still allowing device-specific page_list processing. The 32-bit fast register memory key/STag is composed of a 24-bit index and an 8-bit key. The application can change the key each time it fast registers thus allowing more control over the peer's use of the key/STag (ie it can effectively be changed each time the rkey is rebound to a page list). Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'include')
-rw-r--r--include/rdma/ib_user_verbs.h5
-rw-r--r--include/rdma/ib_verbs.h83
2 files changed, 85 insertions, 3 deletions
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index 885254f20bb3..a17f77106149 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -289,7 +289,10 @@ struct ib_uverbs_wc {
289 __u32 opcode; 289 __u32 opcode;
290 __u32 vendor_err; 290 __u32 vendor_err;
291 __u32 byte_len; 291 __u32 byte_len;
292 __u32 imm_data; 292 union {
293 __u32 imm_data;
294 __u32 invalidate_rkey;
295 } ex;
293 __u32 qp_num; 296 __u32 qp_num;
294 __u32 src_qp; 297 __u32 src_qp;
295 __u32 wc_flags; 298 __u32 wc_flags;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5f5621bf70bd..74c24b908908 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -103,6 +103,7 @@ enum ib_device_cap_flags {
103 */ 103 */
104 IB_DEVICE_UD_IP_CSUM = (1<<18), 104 IB_DEVICE_UD_IP_CSUM = (1<<18),
105 IB_DEVICE_UD_TSO = (1<<19), 105 IB_DEVICE_UD_TSO = (1<<19),
106 IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
106}; 107};
107 108
108enum ib_atomic_cap { 109enum ib_atomic_cap {
@@ -148,6 +149,7 @@ struct ib_device_attr {
148 int max_srq; 149 int max_srq;
149 int max_srq_wr; 150 int max_srq_wr;
150 int max_srq_sge; 151 int max_srq_sge;
152 unsigned int max_fast_reg_page_list_len;
151 u16 max_pkeys; 153 u16 max_pkeys;
152 u8 local_ca_ack_delay; 154 u8 local_ca_ack_delay;
153}; 155};
@@ -411,6 +413,8 @@ enum ib_wc_opcode {
411 IB_WC_FETCH_ADD, 413 IB_WC_FETCH_ADD,
412 IB_WC_BIND_MW, 414 IB_WC_BIND_MW,
413 IB_WC_LSO, 415 IB_WC_LSO,
416 IB_WC_LOCAL_INV,
417 IB_WC_FAST_REG_MR,
414/* 418/*
415 * Set value of IB_WC_RECV so consumers can test if a completion is a 419 * Set value of IB_WC_RECV so consumers can test if a completion is a
416 * receive by testing (opcode & IB_WC_RECV). 420 * receive by testing (opcode & IB_WC_RECV).
@@ -421,7 +425,8 @@ enum ib_wc_opcode {
421 425
422enum ib_wc_flags { 426enum ib_wc_flags {
423 IB_WC_GRH = 1, 427 IB_WC_GRH = 1,
424 IB_WC_WITH_IMM = (1<<1) 428 IB_WC_WITH_IMM = (1<<1),
429 IB_WC_WITH_INVALIDATE = (1<<2),
425}; 430};
426 431
427struct ib_wc { 432struct ib_wc {
@@ -431,7 +436,10 @@ struct ib_wc {
431 u32 vendor_err; 436 u32 vendor_err;
432 u32 byte_len; 437 u32 byte_len;
433 struct ib_qp *qp; 438 struct ib_qp *qp;
434 __be32 imm_data; 439 union {
440 __be32 imm_data;
441 u32 invalidate_rkey;
442 } ex;
435 u32 src_qp; 443 u32 src_qp;
436 int wc_flags; 444 int wc_flags;
437 u16 pkey_index; 445 u16 pkey_index;
@@ -625,6 +633,9 @@ enum ib_wr_opcode {
625 IB_WR_ATOMIC_FETCH_AND_ADD, 633 IB_WR_ATOMIC_FETCH_AND_ADD,
626 IB_WR_LSO, 634 IB_WR_LSO,
627 IB_WR_SEND_WITH_INV, 635 IB_WR_SEND_WITH_INV,
636 IB_WR_RDMA_READ_WITH_INV,
637 IB_WR_LOCAL_INV,
638 IB_WR_FAST_REG_MR,
628}; 639};
629 640
630enum ib_send_flags { 641enum ib_send_flags {
@@ -641,6 +652,12 @@ struct ib_sge {
641 u32 lkey; 652 u32 lkey;
642}; 653};
643 654
655struct ib_fast_reg_page_list {
656 struct ib_device *device;
657 u64 *page_list;
658 unsigned int max_page_list_len;
659};
660
644struct ib_send_wr { 661struct ib_send_wr {
645 struct ib_send_wr *next; 662 struct ib_send_wr *next;
646 u64 wr_id; 663 u64 wr_id;
@@ -673,6 +690,15 @@ struct ib_send_wr {
673 u16 pkey_index; /* valid for GSI only */ 690 u16 pkey_index; /* valid for GSI only */
674 u8 port_num; /* valid for DR SMPs on switch only */ 691 u8 port_num; /* valid for DR SMPs on switch only */
675 } ud; 692 } ud;
693 struct {
694 u64 iova_start;
695 struct ib_fast_reg_page_list *page_list;
696 unsigned int page_shift;
697 unsigned int page_list_len;
698 u32 length;
699 int access_flags;
700 u32 rkey;
701 } fast_reg;
676 } wr; 702 } wr;
677}; 703};
678 704
@@ -1011,6 +1037,11 @@ struct ib_device {
1011 int (*query_mr)(struct ib_mr *mr, 1037 int (*query_mr)(struct ib_mr *mr,
1012 struct ib_mr_attr *mr_attr); 1038 struct ib_mr_attr *mr_attr);
1013 int (*dereg_mr)(struct ib_mr *mr); 1039 int (*dereg_mr)(struct ib_mr *mr);
1040 struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
1041 int max_page_list_len);
1042 struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
1043 int page_list_len);
1044 void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
1014 int (*rereg_phys_mr)(struct ib_mr *mr, 1045 int (*rereg_phys_mr)(struct ib_mr *mr,
1015 int mr_rereg_mask, 1046 int mr_rereg_mask,
1016 struct ib_pd *pd, 1047 struct ib_pd *pd,
@@ -1805,6 +1836,54 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
1805int ib_dereg_mr(struct ib_mr *mr); 1836int ib_dereg_mr(struct ib_mr *mr);
1806 1837
1807/** 1838/**
1839 * ib_alloc_fast_reg_mr - Allocates memory region usable with the
1840 * IB_WR_FAST_REG_MR send work request.
1841 * @pd: The protection domain associated with the region.
1842 * @max_page_list_len: requested max physical buffer list length to be
1843 * used with fast register work requests for this MR.
1844 */
1845struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
1846
1847/**
1848 * ib_alloc_fast_reg_page_list - Allocates a page list array
1849 * @device - ib device pointer.
1850 * @page_list_len - size of the page list array to be allocated.
1851 *
1852 * This allocates and returns a struct ib_fast_reg_page_list * and a
1853 * page_list array that is at least page_list_len in size. The actual
1854 * size is returned in max_page_list_len. The caller is responsible
1855 * for initializing the contents of the page_list array before posting
1856 * a send work request with the IB_WC_FAST_REG_MR opcode.
1857 *
1858 * The page_list array entries must be translated using one of the
1859 * ib_dma_*() functions just like the addresses passed to
1860 * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct
1861 * ib_fast_reg_page_list must not be modified by the caller until the
1862 * IB_WC_FAST_REG_MR work request completes.
1863 */
1864struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
1865 struct ib_device *device, int page_list_len);
1866
1867/**
1868 * ib_free_fast_reg_page_list - Deallocates a previously allocated
1869 * page list array.
1870 * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
1871 */
1872void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
1873
1874/**
1875 * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
1876 * R_Key and L_Key.
1877 * @mr - struct ib_mr pointer to be updated.
1878 * @newkey - new key to be used.
1879 */
1880static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
1881{
1882 mr->lkey = (mr->lkey & 0xffffff00) | newkey;
1883 mr->rkey = (mr->rkey & 0xffffff00) | newkey;
1884}
1885
1886/**
1808 * ib_alloc_mw - Allocates a memory window. 1887 * ib_alloc_mw - Allocates a memory window.
1809 * @pd: The protection domain associated with the memory window. 1888 * @pd: The protection domain associated with the memory window.
1810 */ 1889 */