diff options
Diffstat (limited to 'net/sunrpc')
-rw-r--r-- | net/sunrpc/xprtrdma/Makefile | 3 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/fmr_ops.c | 208 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/frwr_ops.c | 353 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/physical_ops.c | 94 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 87 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 61 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/verbs.c | 699 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/xprt_rdma.h | 90 |
8 files changed, 875 insertions, 720 deletions
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index da5136fd5694..579f72bbcf4b 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
@@ -1,6 +1,7 @@ | |||
1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o |
2 | 2 | ||
3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o \ |
4 | fmr_ops.o frwr_ops.o physical_ops.o | ||
4 | 5 | ||
5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o | 6 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o |
6 | 7 | ||
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c new file mode 100644 index 000000000000..a91ba2c8ef1e --- /dev/null +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -0,0 +1,208 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* Lightweight memory registration using Fast Memory Regions (FMR). | ||
7 | * Referred to sometimes as MTHCAFMR mode. | ||
8 | * | ||
9 | * FMR uses synchronous memory registration and deregistration. | ||
10 | * FMR registration is known to be fast, but FMR deregistration | ||
11 | * can take tens of usecs to complete. | ||
12 | */ | ||
13 | |||
14 | #include "xprt_rdma.h" | ||
15 | |||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
18 | #endif | ||
19 | |||
20 | /* Maximum scatter/gather per FMR */ | ||
21 | #define RPCRDMA_MAX_FMR_SGES (64) | ||
22 | |||
23 | static int | ||
24 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
25 | struct rpcrdma_create_data_internal *cdata) | ||
26 | { | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | /* FMR mode conveys up to 64 pages of payload per chunk segment. | ||
31 | */ | ||
32 | static size_t | ||
33 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
34 | { | ||
35 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
36 | rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES); | ||
37 | } | ||
38 | |||
39 | static int | ||
40 | fmr_op_init(struct rpcrdma_xprt *r_xprt) | ||
41 | { | ||
42 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
43 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
44 | struct ib_fmr_attr fmr_attr = { | ||
45 | .max_pages = RPCRDMA_MAX_FMR_SGES, | ||
46 | .max_maps = 1, | ||
47 | .page_shift = PAGE_SHIFT | ||
48 | }; | ||
49 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
50 | struct rpcrdma_mw *r; | ||
51 | int i, rc; | ||
52 | |||
53 | INIT_LIST_HEAD(&buf->rb_mws); | ||
54 | INIT_LIST_HEAD(&buf->rb_all); | ||
55 | |||
56 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
57 | dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); | ||
58 | |||
59 | while (i--) { | ||
60 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
61 | if (!r) | ||
62 | return -ENOMEM; | ||
63 | |||
64 | r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); | ||
65 | if (IS_ERR(r->r.fmr)) | ||
66 | goto out_fmr_err; | ||
67 | |||
68 | list_add(&r->mw_list, &buf->rb_mws); | ||
69 | list_add(&r->mw_all, &buf->rb_all); | ||
70 | } | ||
71 | return 0; | ||
72 | |||
73 | out_fmr_err: | ||
74 | rc = PTR_ERR(r->r.fmr); | ||
75 | dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); | ||
76 | kfree(r); | ||
77 | return rc; | ||
78 | } | ||
79 | |||
80 | /* Use the ib_map_phys_fmr() verb to register a memory region | ||
81 | * for remote access via RDMA READ or RDMA WRITE. | ||
82 | */ | ||
83 | static int | ||
84 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
85 | int nsegs, bool writing) | ||
86 | { | ||
87 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
88 | struct ib_device *device = ia->ri_id->device; | ||
89 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
90 | struct rpcrdma_mr_seg *seg1 = seg; | ||
91 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
92 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
93 | int len, pageoff, i, rc; | ||
94 | |||
95 | pageoff = offset_in_page(seg1->mr_offset); | ||
96 | seg1->mr_offset -= pageoff; /* start of page */ | ||
97 | seg1->mr_len += pageoff; | ||
98 | len = -pageoff; | ||
99 | if (nsegs > RPCRDMA_MAX_FMR_SGES) | ||
100 | nsegs = RPCRDMA_MAX_FMR_SGES; | ||
101 | for (i = 0; i < nsegs;) { | ||
102 | rpcrdma_map_one(device, seg, direction); | ||
103 | physaddrs[i] = seg->mr_dma; | ||
104 | len += seg->mr_len; | ||
105 | ++seg; | ||
106 | ++i; | ||
107 | /* Check for holes */ | ||
108 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
109 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
110 | break; | ||
111 | } | ||
112 | |||
113 | rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
114 | if (rc) | ||
115 | goto out_maperr; | ||
116 | |||
117 | seg1->mr_rkey = mw->r.fmr->rkey; | ||
118 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
119 | seg1->mr_nsegs = i; | ||
120 | seg1->mr_len = len; | ||
121 | return i; | ||
122 | |||
123 | out_maperr: | ||
124 | dprintk("RPC: %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", | ||
125 | __func__, len, (unsigned long long)seg1->mr_dma, | ||
126 | pageoff, i, rc); | ||
127 | while (i--) | ||
128 | rpcrdma_unmap_one(device, --seg); | ||
129 | return rc; | ||
130 | } | ||
131 | |||
132 | /* Use the ib_unmap_fmr() verb to prevent further remote | ||
133 | * access via RDMA READ or RDMA WRITE. | ||
134 | */ | ||
135 | static int | ||
136 | fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
137 | { | ||
138 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
139 | struct rpcrdma_mr_seg *seg1 = seg; | ||
140 | struct ib_device *device; | ||
141 | int rc, nsegs = seg->mr_nsegs; | ||
142 | LIST_HEAD(l); | ||
143 | |||
144 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
145 | rc = ib_unmap_fmr(&l); | ||
146 | read_lock(&ia->ri_qplock); | ||
147 | device = ia->ri_id->device; | ||
148 | while (seg1->mr_nsegs--) | ||
149 | rpcrdma_unmap_one(device, seg++); | ||
150 | read_unlock(&ia->ri_qplock); | ||
151 | if (rc) | ||
152 | goto out_err; | ||
153 | return nsegs; | ||
154 | |||
155 | out_err: | ||
156 | dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); | ||
157 | return nsegs; | ||
158 | } | ||
159 | |||
160 | /* After a disconnect, unmap all FMRs. | ||
161 | * | ||
162 | * This is invoked only in the transport connect worker in order | ||
163 | * to serialize with rpcrdma_register_fmr_external(). | ||
164 | */ | ||
165 | static void | ||
166 | fmr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
167 | { | ||
168 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
169 | struct rpcrdma_mw *r; | ||
170 | LIST_HEAD(list); | ||
171 | int rc; | ||
172 | |||
173 | list_for_each_entry(r, &buf->rb_all, mw_all) | ||
174 | list_add(&r->r.fmr->list, &list); | ||
175 | |||
176 | rc = ib_unmap_fmr(&list); | ||
177 | if (rc) | ||
178 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
179 | __func__, rc); | ||
180 | } | ||
181 | |||
182 | static void | ||
183 | fmr_op_destroy(struct rpcrdma_buffer *buf) | ||
184 | { | ||
185 | struct rpcrdma_mw *r; | ||
186 | int rc; | ||
187 | |||
188 | while (!list_empty(&buf->rb_all)) { | ||
189 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
190 | list_del(&r->mw_all); | ||
191 | rc = ib_dealloc_fmr(r->r.fmr); | ||
192 | if (rc) | ||
193 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
194 | __func__, rc); | ||
195 | kfree(r); | ||
196 | } | ||
197 | } | ||
198 | |||
199 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | ||
200 | .ro_map = fmr_op_map, | ||
201 | .ro_unmap = fmr_op_unmap, | ||
202 | .ro_open = fmr_op_open, | ||
203 | .ro_maxpages = fmr_op_maxpages, | ||
204 | .ro_init = fmr_op_init, | ||
205 | .ro_reset = fmr_op_reset, | ||
206 | .ro_destroy = fmr_op_destroy, | ||
207 | .ro_displayname = "fmr", | ||
208 | }; | ||
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c new file mode 100644 index 000000000000..0a7b9df70133 --- /dev/null +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* Lightweight memory registration using Fast Registration Work | ||
7 | * Requests (FRWR). Also referred to sometimes as FRMR mode. | ||
8 | * | ||
9 | * FRWR features ordered asynchronous registration and deregistration | ||
10 | * of arbitrarily sized memory regions. This is the fastest and safest | ||
11 | * but most complex memory registration mode. | ||
12 | */ | ||
13 | |||
14 | #include "xprt_rdma.h" | ||
15 | |||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
17 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
18 | #endif | ||
19 | |||
20 | static int | ||
21 | __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, | ||
22 | unsigned int depth) | ||
23 | { | ||
24 | struct rpcrdma_frmr *f = &r->r.frmr; | ||
25 | int rc; | ||
26 | |||
27 | f->fr_mr = ib_alloc_fast_reg_mr(pd, depth); | ||
28 | if (IS_ERR(f->fr_mr)) | ||
29 | goto out_mr_err; | ||
30 | f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); | ||
31 | if (IS_ERR(f->fr_pgl)) | ||
32 | goto out_list_err; | ||
33 | return 0; | ||
34 | |||
35 | out_mr_err: | ||
36 | rc = PTR_ERR(f->fr_mr); | ||
37 | dprintk("RPC: %s: ib_alloc_fast_reg_mr status %i\n", | ||
38 | __func__, rc); | ||
39 | return rc; | ||
40 | |||
41 | out_list_err: | ||
42 | rc = PTR_ERR(f->fr_pgl); | ||
43 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", | ||
44 | __func__, rc); | ||
45 | ib_dereg_mr(f->fr_mr); | ||
46 | return rc; | ||
47 | } | ||
48 | |||
49 | static void | ||
50 | __frwr_release(struct rpcrdma_mw *r) | ||
51 | { | ||
52 | int rc; | ||
53 | |||
54 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
55 | if (rc) | ||
56 | dprintk("RPC: %s: ib_dereg_mr status %i\n", | ||
57 | __func__, rc); | ||
58 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
59 | } | ||
60 | |||
61 | static int | ||
62 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
63 | struct rpcrdma_create_data_internal *cdata) | ||
64 | { | ||
65 | struct ib_device_attr *devattr = &ia->ri_devattr; | ||
66 | int depth, delta; | ||
67 | |||
68 | ia->ri_max_frmr_depth = | ||
69 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
70 | devattr->max_fast_reg_page_list_len); | ||
71 | dprintk("RPC: %s: device's max FR page list len = %u\n", | ||
72 | __func__, ia->ri_max_frmr_depth); | ||
73 | |||
74 | /* Add room for frmr register and invalidate WRs. | ||
75 | * 1. FRMR reg WR for head | ||
76 | * 2. FRMR invalidate WR for head | ||
77 | * 3. N FRMR reg WRs for pagelist | ||
78 | * 4. N FRMR invalidate WRs for pagelist | ||
79 | * 5. FRMR reg WR for tail | ||
80 | * 6. FRMR invalidate WR for tail | ||
81 | * 7. The RDMA_SEND WR | ||
82 | */ | ||
83 | depth = 7; | ||
84 | |||
85 | /* Calculate N if the device max FRMR depth is smaller than | ||
86 | * RPCRDMA_MAX_DATA_SEGS. | ||
87 | */ | ||
88 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
89 | delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; | ||
90 | do { | ||
91 | depth += 2; /* FRMR reg + invalidate */ | ||
92 | delta -= ia->ri_max_frmr_depth; | ||
93 | } while (delta > 0); | ||
94 | } | ||
95 | |||
96 | ep->rep_attr.cap.max_send_wr *= depth; | ||
97 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
98 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
99 | if (!cdata->max_requests) | ||
100 | return -EINVAL; | ||
101 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
102 | depth; | ||
103 | } | ||
104 | |||
105 | return 0; | ||
106 | } | ||
107 | |||
108 | /* FRWR mode conveys a list of pages per chunk segment. The | ||
109 | * maximum length of that list is the FRWR page list depth. | ||
110 | */ | ||
111 | static size_t | ||
112 | frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
113 | { | ||
114 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
115 | |||
116 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
117 | rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); | ||
118 | } | ||
119 | |||
120 | /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ | ||
121 | static void | ||
122 | frwr_sendcompletion(struct ib_wc *wc) | ||
123 | { | ||
124 | struct rpcrdma_mw *r; | ||
125 | |||
126 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
127 | return; | ||
128 | |||
129 | /* WARNING: Only wr_id and status are reliable at this point */ | ||
130 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
131 | dprintk("RPC: %s: frmr %p (stale), status %d\n", | ||
132 | __func__, r, wc->status); | ||
133 | r->r.frmr.fr_state = FRMR_IS_STALE; | ||
134 | } | ||
135 | |||
136 | static int | ||
137 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | ||
138 | { | ||
139 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
140 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
141 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
142 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
143 | int i; | ||
144 | |||
145 | INIT_LIST_HEAD(&buf->rb_mws); | ||
146 | INIT_LIST_HEAD(&buf->rb_all); | ||
147 | |||
148 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
149 | dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); | ||
150 | |||
151 | while (i--) { | ||
152 | struct rpcrdma_mw *r; | ||
153 | int rc; | ||
154 | |||
155 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
156 | if (!r) | ||
157 | return -ENOMEM; | ||
158 | |||
159 | rc = __frwr_init(r, pd, device, depth); | ||
160 | if (rc) { | ||
161 | kfree(r); | ||
162 | return rc; | ||
163 | } | ||
164 | |||
165 | list_add(&r->mw_list, &buf->rb_mws); | ||
166 | list_add(&r->mw_all, &buf->rb_all); | ||
167 | r->mw_sendcompletion = frwr_sendcompletion; | ||
168 | } | ||
169 | |||
170 | return 0; | ||
171 | } | ||
172 | |||
173 | /* Post a FAST_REG Work Request to register a memory region | ||
174 | * for remote access via RDMA READ or RDMA WRITE. | ||
175 | */ | ||
176 | static int | ||
177 | frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
178 | int nsegs, bool writing) | ||
179 | { | ||
180 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
181 | struct ib_device *device = ia->ri_id->device; | ||
182 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | ||
183 | struct rpcrdma_mr_seg *seg1 = seg; | ||
184 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
185 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
186 | struct ib_mr *mr = frmr->fr_mr; | ||
187 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
188 | u8 key; | ||
189 | int len, pageoff; | ||
190 | int i, rc; | ||
191 | int seg_len; | ||
192 | u64 pa; | ||
193 | int page_no; | ||
194 | |||
195 | pageoff = offset_in_page(seg1->mr_offset); | ||
196 | seg1->mr_offset -= pageoff; /* start of page */ | ||
197 | seg1->mr_len += pageoff; | ||
198 | len = -pageoff; | ||
199 | if (nsegs > ia->ri_max_frmr_depth) | ||
200 | nsegs = ia->ri_max_frmr_depth; | ||
201 | for (page_no = i = 0; i < nsegs;) { | ||
202 | rpcrdma_map_one(device, seg, direction); | ||
203 | pa = seg->mr_dma; | ||
204 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
205 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
206 | pa += PAGE_SIZE; | ||
207 | } | ||
208 | len += seg->mr_len; | ||
209 | ++seg; | ||
210 | ++i; | ||
211 | /* Check for holes */ | ||
212 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | ||
213 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
214 | break; | ||
215 | } | ||
216 | dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", | ||
217 | __func__, mw, i, len); | ||
218 | |||
219 | frmr->fr_state = FRMR_IS_VALID; | ||
220 | |||
221 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
222 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
223 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
224 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; | ||
225 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
226 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
227 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
228 | fastreg_wr.wr.fast_reg.length = len; | ||
229 | fastreg_wr.wr.fast_reg.access_flags = writing ? | ||
230 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
231 | IB_ACCESS_REMOTE_READ; | ||
232 | key = (u8)(mr->rkey & 0x000000FF); | ||
233 | ib_update_fast_reg_key(mr, ++key); | ||
234 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
235 | |||
236 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
237 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
238 | if (rc) | ||
239 | goto out_senderr; | ||
240 | |||
241 | seg1->mr_rkey = mr->rkey; | ||
242 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
243 | seg1->mr_nsegs = i; | ||
244 | seg1->mr_len = len; | ||
245 | return i; | ||
246 | |||
247 | out_senderr: | ||
248 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
249 | ib_update_fast_reg_key(mr, --key); | ||
250 | frmr->fr_state = FRMR_IS_INVALID; | ||
251 | while (i--) | ||
252 | rpcrdma_unmap_one(device, --seg); | ||
253 | return rc; | ||
254 | } | ||
255 | |||
256 | /* Post a LOCAL_INV Work Request to prevent further remote access | ||
257 | * via RDMA READ or RDMA WRITE. | ||
258 | */ | ||
259 | static int | ||
260 | frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
261 | { | ||
262 | struct rpcrdma_mr_seg *seg1 = seg; | ||
263 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
264 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
265 | int rc, nsegs = seg->mr_nsegs; | ||
266 | struct ib_device *device; | ||
267 | |||
268 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
269 | |||
270 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | ||
271 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
272 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
273 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
274 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
275 | |||
276 | read_lock(&ia->ri_qplock); | ||
277 | device = ia->ri_id->device; | ||
278 | while (seg1->mr_nsegs--) | ||
279 | rpcrdma_unmap_one(device, seg++); | ||
280 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
281 | read_unlock(&ia->ri_qplock); | ||
282 | if (rc) | ||
283 | goto out_err; | ||
284 | return nsegs; | ||
285 | |||
286 | out_err: | ||
287 | /* Force rpcrdma_buffer_get() to retry */ | ||
288 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
289 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | ||
290 | return nsegs; | ||
291 | } | ||
292 | |||
293 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
294 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
295 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
296 | * also torn down. | ||
297 | * | ||
298 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
299 | * | ||
300 | * This is invoked only in the transport connect worker in order | ||
301 | * to serialize with rpcrdma_register_frmr_external(). | ||
302 | */ | ||
303 | static void | ||
304 | frwr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
305 | { | ||
306 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
307 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
308 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
309 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
310 | struct rpcrdma_mw *r; | ||
311 | int rc; | ||
312 | |||
313 | list_for_each_entry(r, &buf->rb_all, mw_all) { | ||
314 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
315 | continue; | ||
316 | |||
317 | __frwr_release(r); | ||
318 | rc = __frwr_init(r, pd, device, depth); | ||
319 | if (rc) { | ||
320 | dprintk("RPC: %s: mw %p left %s\n", | ||
321 | __func__, r, | ||
322 | (r->r.frmr.fr_state == FRMR_IS_STALE ? | ||
323 | "stale" : "valid")); | ||
324 | continue; | ||
325 | } | ||
326 | |||
327 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void | ||
332 | frwr_op_destroy(struct rpcrdma_buffer *buf) | ||
333 | { | ||
334 | struct rpcrdma_mw *r; | ||
335 | |||
336 | while (!list_empty(&buf->rb_all)) { | ||
337 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
338 | list_del(&r->mw_all); | ||
339 | __frwr_release(r); | ||
340 | kfree(r); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | ||
345 | .ro_map = frwr_op_map, | ||
346 | .ro_unmap = frwr_op_unmap, | ||
347 | .ro_open = frwr_op_open, | ||
348 | .ro_maxpages = frwr_op_maxpages, | ||
349 | .ro_init = frwr_op_init, | ||
350 | .ro_reset = frwr_op_reset, | ||
351 | .ro_destroy = frwr_op_destroy, | ||
352 | .ro_displayname = "frwr", | ||
353 | }; | ||
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c new file mode 100644 index 000000000000..ba518af16787 --- /dev/null +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | ||
4 | */ | ||
5 | |||
6 | /* No-op chunk preparation. All client memory is pre-registered. | ||
7 | * Sometimes referred to as ALLPHYSICAL mode. | ||
8 | * | ||
9 | * Physical registration is simple because all client memory is | ||
10 | * pre-registered and never deregistered. This mode is good for | ||
11 | * adapter bring up, but is considered not safe: the server is | ||
12 | * trusted not to abuse its access to client memory not involved | ||
13 | * in RDMA I/O. | ||
14 | */ | ||
15 | |||
16 | #include "xprt_rdma.h" | ||
17 | |||
18 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
19 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
20 | #endif | ||
21 | |||
22 | static int | ||
23 | physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | ||
24 | struct rpcrdma_create_data_internal *cdata) | ||
25 | { | ||
26 | return 0; | ||
27 | } | ||
28 | |||
29 | /* PHYSICAL memory registration conveys one page per chunk segment. | ||
30 | */ | ||
31 | static size_t | ||
32 | physical_op_maxpages(struct rpcrdma_xprt *r_xprt) | ||
33 | { | ||
34 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | ||
35 | rpcrdma_max_segments(r_xprt)); | ||
36 | } | ||
37 | |||
38 | static int | ||
39 | physical_op_init(struct rpcrdma_xprt *r_xprt) | ||
40 | { | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | /* The client's physical memory is already exposed for | ||
45 | * remote access via RDMA READ or RDMA WRITE. | ||
46 | */ | ||
47 | static int | ||
48 | physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | ||
49 | int nsegs, bool writing) | ||
50 | { | ||
51 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
52 | |||
53 | rpcrdma_map_one(ia->ri_id->device, seg, | ||
54 | rpcrdma_data_dir(writing)); | ||
55 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
56 | seg->mr_base = seg->mr_dma; | ||
57 | seg->mr_nsegs = 1; | ||
58 | return 1; | ||
59 | } | ||
60 | |||
61 | /* Unmap a memory region, but leave it registered. | ||
62 | */ | ||
63 | static int | ||
64 | physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | ||
65 | { | ||
66 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
67 | |||
68 | read_lock(&ia->ri_qplock); | ||
69 | rpcrdma_unmap_one(ia->ri_id->device, seg); | ||
70 | read_unlock(&ia->ri_qplock); | ||
71 | |||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | static void | ||
76 | physical_op_reset(struct rpcrdma_xprt *r_xprt) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | static void | ||
81 | physical_op_destroy(struct rpcrdma_buffer *buf) | ||
82 | { | ||
83 | } | ||
84 | |||
85 | const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | ||
86 | .ro_map = physical_op_map, | ||
87 | .ro_unmap = physical_op_unmap, | ||
88 | .ro_open = physical_op_open, | ||
89 | .ro_maxpages = physical_op_maxpages, | ||
90 | .ro_init = physical_op_init, | ||
91 | .ro_reset = physical_op_reset, | ||
92 | .ro_destroy = physical_op_destroy, | ||
93 | .ro_displayname = "physical", | ||
94 | }; | ||
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 91ffde82fa0c..2c53ea9e1b83 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -53,6 +53,14 @@ | |||
53 | # define RPCDBG_FACILITY RPCDBG_TRANS | 53 | # define RPCDBG_FACILITY RPCDBG_TRANS |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | enum rpcrdma_chunktype { | ||
57 | rpcrdma_noch = 0, | ||
58 | rpcrdma_readch, | ||
59 | rpcrdma_areadch, | ||
60 | rpcrdma_writech, | ||
61 | rpcrdma_replych | ||
62 | }; | ||
63 | |||
56 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 64 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
57 | static const char transfertypes[][12] = { | 65 | static const char transfertypes[][12] = { |
58 | "pure inline", /* no chunks */ | 66 | "pure inline", /* no chunks */ |
@@ -179,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
179 | struct rpcrdma_write_array *warray = NULL; | 187 | struct rpcrdma_write_array *warray = NULL; |
180 | struct rpcrdma_write_chunk *cur_wchunk = NULL; | 188 | struct rpcrdma_write_chunk *cur_wchunk = NULL; |
181 | __be32 *iptr = headerp->rm_body.rm_chunks; | 189 | __be32 *iptr = headerp->rm_body.rm_chunks; |
190 | int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool); | ||
182 | 191 | ||
183 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { | 192 | if (type == rpcrdma_readch || type == rpcrdma_areadch) { |
184 | /* a read chunk - server will RDMA Read our memory */ | 193 | /* a read chunk - server will RDMA Read our memory */ |
@@ -201,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
201 | if (nsegs < 0) | 210 | if (nsegs < 0) |
202 | return nsegs; | 211 | return nsegs; |
203 | 212 | ||
213 | map = r_xprt->rx_ia.ri_ops->ro_map; | ||
204 | do { | 214 | do { |
205 | n = rpcrdma_register_external(seg, nsegs, | 215 | n = map(r_xprt, seg, nsegs, cur_wchunk != NULL); |
206 | cur_wchunk != NULL, r_xprt); | ||
207 | if (n <= 0) | 216 | if (n <= 0) |
208 | goto out; | 217 | goto out; |
209 | if (cur_rchunk) { /* read */ | 218 | if (cur_rchunk) { /* read */ |
@@ -275,34 +284,13 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
275 | return (unsigned char *)iptr - (unsigned char *)headerp; | 284 | return (unsigned char *)iptr - (unsigned char *)headerp; |
276 | 285 | ||
277 | out: | 286 | out: |
278 | if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) { | 287 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) |
279 | for (pos = 0; nchunks--;) | 288 | return n; |
280 | pos += rpcrdma_deregister_external( | ||
281 | &req->rl_segments[pos], r_xprt); | ||
282 | } | ||
283 | return n; | ||
284 | } | ||
285 | 289 | ||
286 | /* | 290 | for (pos = 0; nchunks--;) |
287 | * Marshal chunks. This routine returns the header length | 291 | pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
288 | * consumed by marshaling. | 292 | &req->rl_segments[pos]); |
289 | * | 293 | return n; |
290 | * Returns positive RPC/RDMA header size, or negative errno. | ||
291 | */ | ||
292 | |||
293 | ssize_t | ||
294 | rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) | ||
295 | { | ||
296 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | ||
297 | struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf); | ||
298 | |||
299 | if (req->rl_rtype != rpcrdma_noch) | ||
300 | result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
301 | headerp, req->rl_rtype); | ||
302 | else if (req->rl_wtype != rpcrdma_noch) | ||
303 | result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
304 | headerp, req->rl_wtype); | ||
305 | return result; | ||
306 | } | 294 | } |
307 | 295 | ||
308 | /* | 296 | /* |
@@ -397,6 +385,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
397 | char *base; | 385 | char *base; |
398 | size_t rpclen, padlen; | 386 | size_t rpclen, padlen; |
399 | ssize_t hdrlen; | 387 | ssize_t hdrlen; |
388 | enum rpcrdma_chunktype rtype, wtype; | ||
400 | struct rpcrdma_msg *headerp; | 389 | struct rpcrdma_msg *headerp; |
401 | 390 | ||
402 | /* | 391 | /* |
@@ -433,13 +422,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
433 | * into pages; otherwise use reply chunks. | 422 | * into pages; otherwise use reply chunks. |
434 | */ | 423 | */ |
435 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) | 424 | if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) |
436 | req->rl_wtype = rpcrdma_noch; | 425 | wtype = rpcrdma_noch; |
437 | else if (rqst->rq_rcv_buf.page_len == 0) | 426 | else if (rqst->rq_rcv_buf.page_len == 0) |
438 | req->rl_wtype = rpcrdma_replych; | 427 | wtype = rpcrdma_replych; |
439 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) | 428 | else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) |
440 | req->rl_wtype = rpcrdma_writech; | 429 | wtype = rpcrdma_writech; |
441 | else | 430 | else |
442 | req->rl_wtype = rpcrdma_replych; | 431 | wtype = rpcrdma_replych; |
443 | 432 | ||
444 | /* | 433 | /* |
445 | * Chunks needed for arguments? | 434 | * Chunks needed for arguments? |
@@ -456,16 +445,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
456 | * TBD check NFSv4 setacl | 445 | * TBD check NFSv4 setacl |
457 | */ | 446 | */ |
458 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) | 447 | if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) |
459 | req->rl_rtype = rpcrdma_noch; | 448 | rtype = rpcrdma_noch; |
460 | else if (rqst->rq_snd_buf.page_len == 0) | 449 | else if (rqst->rq_snd_buf.page_len == 0) |
461 | req->rl_rtype = rpcrdma_areadch; | 450 | rtype = rpcrdma_areadch; |
462 | else | 451 | else |
463 | req->rl_rtype = rpcrdma_readch; | 452 | rtype = rpcrdma_readch; |
464 | 453 | ||
465 | /* The following simplification is not true forever */ | 454 | /* The following simplification is not true forever */ |
466 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych) | 455 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) |
467 | req->rl_wtype = rpcrdma_noch; | 456 | wtype = rpcrdma_noch; |
468 | if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) { | 457 | if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { |
469 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", | 458 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", |
470 | __func__); | 459 | __func__); |
471 | return -EIO; | 460 | return -EIO; |
@@ -479,7 +468,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
479 | * When padding is in use and applies to the transfer, insert | 468 | * When padding is in use and applies to the transfer, insert |
480 | * it and change the message type. | 469 | * it and change the message type. |
481 | */ | 470 | */ |
482 | if (req->rl_rtype == rpcrdma_noch) { | 471 | if (rtype == rpcrdma_noch) { |
483 | 472 | ||
484 | padlen = rpcrdma_inline_pullup(rqst, | 473 | padlen = rpcrdma_inline_pullup(rqst, |
485 | RPCRDMA_INLINE_PAD_VALUE(rqst)); | 474 | RPCRDMA_INLINE_PAD_VALUE(rqst)); |
@@ -494,7 +483,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
494 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; | 483 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; |
495 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; | 484 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; |
496 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ | 485 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ |
497 | if (req->rl_wtype != rpcrdma_noch) { | 486 | if (wtype != rpcrdma_noch) { |
498 | dprintk("RPC: %s: invalid chunk list\n", | 487 | dprintk("RPC: %s: invalid chunk list\n", |
499 | __func__); | 488 | __func__); |
500 | return -EIO; | 489 | return -EIO; |
@@ -515,18 +504,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
515 | * on receive. Therefore, we request a reply chunk | 504 | * on receive. Therefore, we request a reply chunk |
516 | * for non-writes wherever feasible and efficient. | 505 | * for non-writes wherever feasible and efficient. |
517 | */ | 506 | */ |
518 | if (req->rl_wtype == rpcrdma_noch) | 507 | if (wtype == rpcrdma_noch) |
519 | req->rl_wtype = rpcrdma_replych; | 508 | wtype = rpcrdma_replych; |
520 | } | 509 | } |
521 | } | 510 | } |
522 | 511 | ||
523 | hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen); | 512 | if (rtype != rpcrdma_noch) { |
513 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, | ||
514 | headerp, rtype); | ||
515 | wtype = rtype; /* simplify dprintk */ | ||
516 | |||
517 | } else if (wtype != rpcrdma_noch) { | ||
518 | hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf, | ||
519 | headerp, wtype); | ||
520 | } | ||
524 | if (hdrlen < 0) | 521 | if (hdrlen < 0) |
525 | return hdrlen; | 522 | return hdrlen; |
526 | 523 | ||
527 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" | 524 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
528 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 525 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
529 | __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, | 526 | __func__, transfertypes[wtype], hdrlen, rpclen, padlen, |
530 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); | 527 | headerp, base, rdmab_lkey(req->rl_rdmabuf)); |
531 | 528 | ||
532 | /* | 529 | /* |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2e192baa59f3..54f23b1be986 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -157,12 +157,47 @@ static struct ctl_table sunrpc_table[] = { | |||
157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | 157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ |
158 | 158 | ||
159 | static void | 159 | static void |
160 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
161 | { | ||
162 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
163 | char buf[20]; | ||
164 | |||
165 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
166 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
167 | |||
168 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; | ||
169 | } | ||
170 | |||
171 | static void | ||
172 | xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) | ||
173 | { | ||
174 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
175 | char buf[40]; | ||
176 | |||
177 | snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); | ||
178 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
179 | |||
180 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; | ||
181 | } | ||
182 | |||
183 | static void | ||
160 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) | 184 | xprt_rdma_format_addresses(struct rpc_xprt *xprt) |
161 | { | 185 | { |
162 | struct sockaddr *sap = (struct sockaddr *) | 186 | struct sockaddr *sap = (struct sockaddr *) |
163 | &rpcx_to_rdmad(xprt).addr; | 187 | &rpcx_to_rdmad(xprt).addr; |
164 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | 188 | char buf[128]; |
165 | char buf[64]; | 189 | |
190 | switch (sap->sa_family) { | ||
191 | case AF_INET: | ||
192 | xprt_rdma_format_addresses4(xprt, sap); | ||
193 | break; | ||
194 | case AF_INET6: | ||
195 | xprt_rdma_format_addresses6(xprt, sap); | ||
196 | break; | ||
197 | default: | ||
198 | pr_err("rpcrdma: Unrecognized address family\n"); | ||
199 | return; | ||
200 | } | ||
166 | 201 | ||
167 | (void)rpc_ntop(sap, buf, sizeof(buf)); | 202 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
168 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); | 203 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); |
@@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) | |||
170 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); | 205 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); |
171 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); | 206 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); |
172 | 207 | ||
173 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; | ||
174 | |||
175 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | ||
176 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | ||
177 | |||
178 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); | 208 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); |
179 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); | 209 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); |
180 | 210 | ||
181 | /* netid */ | 211 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; |
182 | xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; | ||
183 | } | 212 | } |
184 | 213 | ||
185 | static void | 214 | static void |
@@ -377,7 +406,10 @@ xprt_setup_rdma(struct xprt_create *args) | |||
377 | xprt_rdma_connect_worker); | 406 | xprt_rdma_connect_worker); |
378 | 407 | ||
379 | xprt_rdma_format_addresses(xprt); | 408 | xprt_rdma_format_addresses(xprt); |
380 | xprt->max_payload = rpcrdma_max_payload(new_xprt); | 409 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); |
410 | if (xprt->max_payload == 0) | ||
411 | goto out4; | ||
412 | xprt->max_payload <<= PAGE_SHIFT; | ||
381 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", | 413 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", |
382 | __func__, xprt->max_payload); | 414 | __func__, xprt->max_payload); |
383 | 415 | ||
@@ -552,8 +584,8 @@ xprt_rdma_free(void *buffer) | |||
552 | 584 | ||
553 | for (i = 0; req->rl_nchunks;) { | 585 | for (i = 0; req->rl_nchunks;) { |
554 | --req->rl_nchunks; | 586 | --req->rl_nchunks; |
555 | i += rpcrdma_deregister_external( | 587 | i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
556 | &req->rl_segments[i], r_xprt); | 588 | &req->rl_segments[i]); |
557 | } | 589 | } |
558 | 590 | ||
559 | rpcrdma_buffer_put(req); | 591 | rpcrdma_buffer_put(req); |
@@ -579,10 +611,7 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
579 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 611 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
580 | int rc = 0; | 612 | int rc = 0; |
581 | 613 | ||
582 | if (req->rl_niovs == 0) | 614 | rc = rpcrdma_marshal_req(rqst); |
583 | rc = rpcrdma_marshal_req(rqst); | ||
584 | else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL) | ||
585 | rc = rpcrdma_marshal_chunks(rqst, 0); | ||
586 | if (rc < 0) | 615 | if (rc < 0) |
587 | goto failed_marshal; | 616 | goto failed_marshal; |
588 | 617 | ||
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 124676c13780..4870d272e006 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/interrupt.h> | 50 | #include <linux/interrupt.h> |
51 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
52 | #include <linux/prefetch.h> | 52 | #include <linux/prefetch.h> |
53 | #include <linux/sunrpc/addr.h> | ||
53 | #include <asm/bitops.h> | 54 | #include <asm/bitops.h> |
54 | 55 | ||
55 | #include "xprt_rdma.h" | 56 | #include "xprt_rdma.h" |
@@ -62,9 +63,6 @@ | |||
62 | # define RPCDBG_FACILITY RPCDBG_TRANS | 63 | # define RPCDBG_FACILITY RPCDBG_TRANS |
63 | #endif | 64 | #endif |
64 | 65 | ||
65 | static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); | ||
66 | static void rpcrdma_reset_fmrs(struct rpcrdma_ia *); | ||
67 | |||
68 | /* | 66 | /* |
69 | * internal functions | 67 | * internal functions |
70 | */ | 68 | */ |
@@ -188,7 +186,7 @@ static const char * const wc_status[] = { | |||
188 | "remote access error", | 186 | "remote access error", |
189 | "remote operation error", | 187 | "remote operation error", |
190 | "transport retry counter exceeded", | 188 | "transport retry counter exceeded", |
191 | "RNR retrycounter exceeded", | 189 | "RNR retry counter exceeded", |
192 | "local RDD violation error", | 190 | "local RDD violation error", |
193 | "remove invalid RD request", | 191 | "remove invalid RD request", |
194 | "operation aborted", | 192 | "operation aborted", |
@@ -206,21 +204,17 @@ static const char * const wc_status[] = { | |||
206 | static void | 204 | static void |
207 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) | 205 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) |
208 | { | 206 | { |
209 | if (likely(wc->status == IB_WC_SUCCESS)) | ||
210 | return; | ||
211 | |||
212 | /* WARNING: Only wr_id and status are reliable at this point */ | 207 | /* WARNING: Only wr_id and status are reliable at this point */ |
213 | if (wc->wr_id == 0ULL) { | 208 | if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) { |
214 | if (wc->status != IB_WC_WR_FLUSH_ERR) | 209 | if (wc->status != IB_WC_SUCCESS && |
210 | wc->status != IB_WC_WR_FLUSH_ERR) | ||
215 | pr_err("RPC: %s: SEND: %s\n", | 211 | pr_err("RPC: %s: SEND: %s\n", |
216 | __func__, COMPLETION_MSG(wc->status)); | 212 | __func__, COMPLETION_MSG(wc->status)); |
217 | } else { | 213 | } else { |
218 | struct rpcrdma_mw *r; | 214 | struct rpcrdma_mw *r; |
219 | 215 | ||
220 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | 216 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; |
221 | r->r.frmr.fr_state = FRMR_IS_STALE; | 217 | r->mw_sendcompletion(wc); |
222 | pr_err("RPC: %s: frmr %p (stale): %s\n", | ||
223 | __func__, r, COMPLETION_MSG(wc->status)); | ||
224 | } | 218 | } |
225 | } | 219 | } |
226 | 220 | ||
@@ -424,7 +418,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) | |||
424 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 418 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
425 | struct rpcrdma_ep *ep = &xprt->rx_ep; | 419 | struct rpcrdma_ep *ep = &xprt->rx_ep; |
426 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 420 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
427 | struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; | 421 | struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr; |
428 | #endif | 422 | #endif |
429 | struct ib_qp_attr *attr = &ia->ri_qp_attr; | 423 | struct ib_qp_attr *attr = &ia->ri_qp_attr; |
430 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; | 424 | struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; |
@@ -480,9 +474,8 @@ connected: | |||
480 | wake_up_all(&ep->rep_connect_wait); | 474 | wake_up_all(&ep->rep_connect_wait); |
481 | /*FALLTHROUGH*/ | 475 | /*FALLTHROUGH*/ |
482 | default: | 476 | default: |
483 | dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n", | 477 | dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n", |
484 | __func__, &addr->sin_addr.s_addr, | 478 | __func__, sap, rpc_get_port(sap), ep, |
485 | ntohs(addr->sin_port), ep, | ||
486 | CONNECTION_MSG(event->event)); | 479 | CONNECTION_MSG(event->event)); |
487 | break; | 480 | break; |
488 | } | 481 | } |
@@ -491,19 +484,16 @@ connected: | |||
491 | if (connstate == 1) { | 484 | if (connstate == 1) { |
492 | int ird = attr->max_dest_rd_atomic; | 485 | int ird = attr->max_dest_rd_atomic; |
493 | int tird = ep->rep_remote_cma.responder_resources; | 486 | int tird = ep->rep_remote_cma.responder_resources; |
494 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u " | 487 | |
495 | "on %s, memreg %d slots %d ird %d%s\n", | 488 | pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", |
496 | &addr->sin_addr.s_addr, | 489 | sap, rpc_get_port(sap), |
497 | ntohs(addr->sin_port), | ||
498 | ia->ri_id->device->name, | 490 | ia->ri_id->device->name, |
499 | ia->ri_memreg_strategy, | 491 | ia->ri_ops->ro_displayname, |
500 | xprt->rx_buf.rb_max_requests, | 492 | xprt->rx_buf.rb_max_requests, |
501 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | 493 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); |
502 | } else if (connstate < 0) { | 494 | } else if (connstate < 0) { |
503 | printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n", | 495 | pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n", |
504 | &addr->sin_addr.s_addr, | 496 | sap, rpc_get_port(sap), connstate); |
505 | ntohs(addr->sin_port), | ||
506 | connstate); | ||
507 | } | 497 | } |
508 | #endif | 498 | #endif |
509 | 499 | ||
@@ -621,17 +611,13 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
621 | 611 | ||
622 | if (memreg == RPCRDMA_FRMR) { | 612 | if (memreg == RPCRDMA_FRMR) { |
623 | /* Requires both frmr reg and local dma lkey */ | 613 | /* Requires both frmr reg and local dma lkey */ |
624 | if ((devattr->device_cap_flags & | 614 | if (((devattr->device_cap_flags & |
625 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | 615 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != |
626 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | 616 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) || |
617 | (devattr->max_fast_reg_page_list_len == 0)) { | ||
627 | dprintk("RPC: %s: FRMR registration " | 618 | dprintk("RPC: %s: FRMR registration " |
628 | "not supported by HCA\n", __func__); | 619 | "not supported by HCA\n", __func__); |
629 | memreg = RPCRDMA_MTHCAFMR; | 620 | memreg = RPCRDMA_MTHCAFMR; |
630 | } else { | ||
631 | /* Mind the ia limit on FRMR page list depth */ | ||
632 | ia->ri_max_frmr_depth = min_t(unsigned int, | ||
633 | RPCRDMA_MAX_DATA_SEGS, | ||
634 | devattr->max_fast_reg_page_list_len); | ||
635 | } | 621 | } |
636 | } | 622 | } |
637 | if (memreg == RPCRDMA_MTHCAFMR) { | 623 | if (memreg == RPCRDMA_MTHCAFMR) { |
@@ -652,13 +638,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
652 | */ | 638 | */ |
653 | switch (memreg) { | 639 | switch (memreg) { |
654 | case RPCRDMA_FRMR: | 640 | case RPCRDMA_FRMR: |
641 | ia->ri_ops = &rpcrdma_frwr_memreg_ops; | ||
655 | break; | 642 | break; |
656 | case RPCRDMA_ALLPHYSICAL: | 643 | case RPCRDMA_ALLPHYSICAL: |
644 | ia->ri_ops = &rpcrdma_physical_memreg_ops; | ||
657 | mem_priv = IB_ACCESS_LOCAL_WRITE | | 645 | mem_priv = IB_ACCESS_LOCAL_WRITE | |
658 | IB_ACCESS_REMOTE_WRITE | | 646 | IB_ACCESS_REMOTE_WRITE | |
659 | IB_ACCESS_REMOTE_READ; | 647 | IB_ACCESS_REMOTE_READ; |
660 | goto register_setup; | 648 | goto register_setup; |
661 | case RPCRDMA_MTHCAFMR: | 649 | case RPCRDMA_MTHCAFMR: |
650 | ia->ri_ops = &rpcrdma_fmr_memreg_ops; | ||
662 | if (ia->ri_have_dma_lkey) | 651 | if (ia->ri_have_dma_lkey) |
663 | break; | 652 | break; |
664 | mem_priv = IB_ACCESS_LOCAL_WRITE; | 653 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
@@ -678,8 +667,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
678 | rc = -ENOMEM; | 667 | rc = -ENOMEM; |
679 | goto out3; | 668 | goto out3; |
680 | } | 669 | } |
681 | dprintk("RPC: %s: memory registration strategy is %d\n", | 670 | dprintk("RPC: %s: memory registration strategy is '%s'\n", |
682 | __func__, memreg); | 671 | __func__, ia->ri_ops->ro_displayname); |
683 | 672 | ||
684 | /* Else will do memory reg/dereg for each chunk */ | 673 | /* Else will do memory reg/dereg for each chunk */ |
685 | ia->ri_memreg_strategy = memreg; | 674 | ia->ri_memreg_strategy = memreg; |
@@ -743,49 +732,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
743 | 732 | ||
744 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; | 733 | ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; |
745 | ep->rep_attr.qp_context = ep; | 734 | ep->rep_attr.qp_context = ep; |
746 | /* send_cq and recv_cq initialized below */ | ||
747 | ep->rep_attr.srq = NULL; | 735 | ep->rep_attr.srq = NULL; |
748 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 736 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
749 | switch (ia->ri_memreg_strategy) { | 737 | rc = ia->ri_ops->ro_open(ia, ep, cdata); |
750 | case RPCRDMA_FRMR: { | 738 | if (rc) |
751 | int depth = 7; | 739 | return rc; |
752 | |||
753 | /* Add room for frmr register and invalidate WRs. | ||
754 | * 1. FRMR reg WR for head | ||
755 | * 2. FRMR invalidate WR for head | ||
756 | * 3. N FRMR reg WRs for pagelist | ||
757 | * 4. N FRMR invalidate WRs for pagelist | ||
758 | * 5. FRMR reg WR for tail | ||
759 | * 6. FRMR invalidate WR for tail | ||
760 | * 7. The RDMA_SEND WR | ||
761 | */ | ||
762 | |||
763 | /* Calculate N if the device max FRMR depth is smaller than | ||
764 | * RPCRDMA_MAX_DATA_SEGS. | ||
765 | */ | ||
766 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
767 | int delta = RPCRDMA_MAX_DATA_SEGS - | ||
768 | ia->ri_max_frmr_depth; | ||
769 | |||
770 | do { | ||
771 | depth += 2; /* FRMR reg + invalidate */ | ||
772 | delta -= ia->ri_max_frmr_depth; | ||
773 | } while (delta > 0); | ||
774 | |||
775 | } | ||
776 | ep->rep_attr.cap.max_send_wr *= depth; | ||
777 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | ||
778 | cdata->max_requests = devattr->max_qp_wr / depth; | ||
779 | if (!cdata->max_requests) | ||
780 | return -EINVAL; | ||
781 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | ||
782 | depth; | ||
783 | } | ||
784 | break; | ||
785 | } | ||
786 | default: | ||
787 | break; | ||
788 | } | ||
789 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; | 740 | ep->rep_attr.cap.max_recv_wr = cdata->max_requests; |
790 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); | 741 | ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); |
791 | ep->rep_attr.cap.max_recv_sge = 1; | 742 | ep->rep_attr.cap.max_recv_sge = 1; |
@@ -944,21 +895,9 @@ retry: | |||
944 | rpcrdma_ep_disconnect(ep, ia); | 895 | rpcrdma_ep_disconnect(ep, ia); |
945 | rpcrdma_flush_cqs(ep); | 896 | rpcrdma_flush_cqs(ep); |
946 | 897 | ||
947 | switch (ia->ri_memreg_strategy) { | ||
948 | case RPCRDMA_FRMR: | ||
949 | rpcrdma_reset_frmrs(ia); | ||
950 | break; | ||
951 | case RPCRDMA_MTHCAFMR: | ||
952 | rpcrdma_reset_fmrs(ia); | ||
953 | break; | ||
954 | case RPCRDMA_ALLPHYSICAL: | ||
955 | break; | ||
956 | default: | ||
957 | rc = -EIO; | ||
958 | goto out; | ||
959 | } | ||
960 | |||
961 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 898 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
899 | ia->ri_ops->ro_reset(xprt); | ||
900 | |||
962 | id = rpcrdma_create_id(xprt, ia, | 901 | id = rpcrdma_create_id(xprt, ia, |
963 | (struct sockaddr *)&xprt->rx_data.addr); | 902 | (struct sockaddr *)&xprt->rx_data.addr); |
964 | if (IS_ERR(id)) { | 903 | if (IS_ERR(id)) { |
@@ -1123,91 +1062,6 @@ out: | |||
1123 | return ERR_PTR(rc); | 1062 | return ERR_PTR(rc); |
1124 | } | 1063 | } |
1125 | 1064 | ||
1126 | static int | ||
1127 | rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
1128 | { | ||
1129 | int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ; | ||
1130 | struct ib_fmr_attr fmr_attr = { | ||
1131 | .max_pages = RPCRDMA_MAX_DATA_SEGS, | ||
1132 | .max_maps = 1, | ||
1133 | .page_shift = PAGE_SHIFT | ||
1134 | }; | ||
1135 | struct rpcrdma_mw *r; | ||
1136 | int i, rc; | ||
1137 | |||
1138 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1139 | dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); | ||
1140 | |||
1141 | while (i--) { | ||
1142 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1143 | if (r == NULL) | ||
1144 | return -ENOMEM; | ||
1145 | |||
1146 | r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr); | ||
1147 | if (IS_ERR(r->r.fmr)) { | ||
1148 | rc = PTR_ERR(r->r.fmr); | ||
1149 | dprintk("RPC: %s: ib_alloc_fmr failed %i\n", | ||
1150 | __func__, rc); | ||
1151 | goto out_free; | ||
1152 | } | ||
1153 | |||
1154 | list_add(&r->mw_list, &buf->rb_mws); | ||
1155 | list_add(&r->mw_all, &buf->rb_all); | ||
1156 | } | ||
1157 | return 0; | ||
1158 | |||
1159 | out_free: | ||
1160 | kfree(r); | ||
1161 | return rc; | ||
1162 | } | ||
1163 | |||
1164 | static int | ||
1165 | rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) | ||
1166 | { | ||
1167 | struct rpcrdma_frmr *f; | ||
1168 | struct rpcrdma_mw *r; | ||
1169 | int i, rc; | ||
1170 | |||
1171 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | ||
1172 | dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); | ||
1173 | |||
1174 | while (i--) { | ||
1175 | r = kzalloc(sizeof(*r), GFP_KERNEL); | ||
1176 | if (r == NULL) | ||
1177 | return -ENOMEM; | ||
1178 | f = &r->r.frmr; | ||
1179 | |||
1180 | f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1181 | ia->ri_max_frmr_depth); | ||
1182 | if (IS_ERR(f->fr_mr)) { | ||
1183 | rc = PTR_ERR(f->fr_mr); | ||
1184 | dprintk("RPC: %s: ib_alloc_fast_reg_mr " | ||
1185 | "failed %i\n", __func__, rc); | ||
1186 | goto out_free; | ||
1187 | } | ||
1188 | |||
1189 | f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device, | ||
1190 | ia->ri_max_frmr_depth); | ||
1191 | if (IS_ERR(f->fr_pgl)) { | ||
1192 | rc = PTR_ERR(f->fr_pgl); | ||
1193 | dprintk("RPC: %s: ib_alloc_fast_reg_page_list " | ||
1194 | "failed %i\n", __func__, rc); | ||
1195 | |||
1196 | ib_dereg_mr(f->fr_mr); | ||
1197 | goto out_free; | ||
1198 | } | ||
1199 | |||
1200 | list_add(&r->mw_list, &buf->rb_mws); | ||
1201 | list_add(&r->mw_all, &buf->rb_all); | ||
1202 | } | ||
1203 | |||
1204 | return 0; | ||
1205 | |||
1206 | out_free: | ||
1207 | kfree(r); | ||
1208 | return rc; | ||
1209 | } | ||
1210 | |||
1211 | int | 1065 | int |
1212 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | 1066 | rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) |
1213 | { | 1067 | { |
@@ -1244,22 +1098,9 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) | |||
1244 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; | 1098 | buf->rb_recv_bufs = (struct rpcrdma_rep **) p; |
1245 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; | 1099 | p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; |
1246 | 1100 | ||
1247 | INIT_LIST_HEAD(&buf->rb_mws); | 1101 | rc = ia->ri_ops->ro_init(r_xprt); |
1248 | INIT_LIST_HEAD(&buf->rb_all); | 1102 | if (rc) |
1249 | switch (ia->ri_memreg_strategy) { | 1103 | goto out; |
1250 | case RPCRDMA_FRMR: | ||
1251 | rc = rpcrdma_init_frmrs(ia, buf); | ||
1252 | if (rc) | ||
1253 | goto out; | ||
1254 | break; | ||
1255 | case RPCRDMA_MTHCAFMR: | ||
1256 | rc = rpcrdma_init_fmrs(ia, buf); | ||
1257 | if (rc) | ||
1258 | goto out; | ||
1259 | break; | ||
1260 | default: | ||
1261 | break; | ||
1262 | } | ||
1263 | 1104 | ||
1264 | for (i = 0; i < buf->rb_max_requests; i++) { | 1105 | for (i = 0; i < buf->rb_max_requests; i++) { |
1265 | struct rpcrdma_req *req; | 1106 | struct rpcrdma_req *req; |
@@ -1311,47 +1152,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | |||
1311 | kfree(req); | 1152 | kfree(req); |
1312 | } | 1153 | } |
1313 | 1154 | ||
1314 | static void | ||
1315 | rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) | ||
1316 | { | ||
1317 | struct rpcrdma_mw *r; | ||
1318 | int rc; | ||
1319 | |||
1320 | while (!list_empty(&buf->rb_all)) { | ||
1321 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1322 | list_del(&r->mw_all); | ||
1323 | list_del(&r->mw_list); | ||
1324 | |||
1325 | rc = ib_dealloc_fmr(r->r.fmr); | ||
1326 | if (rc) | ||
1327 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | ||
1328 | __func__, rc); | ||
1329 | |||
1330 | kfree(r); | ||
1331 | } | ||
1332 | } | ||
1333 | |||
1334 | static void | ||
1335 | rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf) | ||
1336 | { | ||
1337 | struct rpcrdma_mw *r; | ||
1338 | int rc; | ||
1339 | |||
1340 | while (!list_empty(&buf->rb_all)) { | ||
1341 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | ||
1342 | list_del(&r->mw_all); | ||
1343 | list_del(&r->mw_list); | ||
1344 | |||
1345 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1346 | if (rc) | ||
1347 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
1348 | __func__, rc); | ||
1349 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1350 | |||
1351 | kfree(r); | ||
1352 | } | ||
1353 | } | ||
1354 | |||
1355 | void | 1155 | void |
1356 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | 1156 | rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) |
1357 | { | 1157 | { |
@@ -1372,104 +1172,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1372 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); | 1172 | rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); |
1373 | } | 1173 | } |
1374 | 1174 | ||
1375 | switch (ia->ri_memreg_strategy) { | 1175 | ia->ri_ops->ro_destroy(buf); |
1376 | case RPCRDMA_FRMR: | ||
1377 | rpcrdma_destroy_frmrs(buf); | ||
1378 | break; | ||
1379 | case RPCRDMA_MTHCAFMR: | ||
1380 | rpcrdma_destroy_fmrs(buf); | ||
1381 | break; | ||
1382 | default: | ||
1383 | break; | ||
1384 | } | ||
1385 | 1176 | ||
1386 | kfree(buf->rb_pool); | 1177 | kfree(buf->rb_pool); |
1387 | } | 1178 | } |
1388 | 1179 | ||
1389 | /* After a disconnect, unmap all FMRs. | ||
1390 | * | ||
1391 | * This is invoked only in the transport connect worker in order | ||
1392 | * to serialize with rpcrdma_register_fmr_external(). | ||
1393 | */ | ||
1394 | static void | ||
1395 | rpcrdma_reset_fmrs(struct rpcrdma_ia *ia) | ||
1396 | { | ||
1397 | struct rpcrdma_xprt *r_xprt = | ||
1398 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1399 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1400 | struct list_head *pos; | ||
1401 | struct rpcrdma_mw *r; | ||
1402 | LIST_HEAD(l); | ||
1403 | int rc; | ||
1404 | |||
1405 | list_for_each(pos, &buf->rb_all) { | ||
1406 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
1407 | |||
1408 | INIT_LIST_HEAD(&l); | ||
1409 | list_add(&r->r.fmr->list, &l); | ||
1410 | rc = ib_unmap_fmr(&l); | ||
1411 | if (rc) | ||
1412 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
1413 | __func__, rc); | ||
1414 | } | ||
1415 | } | ||
1416 | |||
1417 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
1418 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
1419 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
1420 | * also torn down. | ||
1421 | * | ||
1422 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
1423 | * | ||
1424 | * This is invoked only in the transport connect worker in order | ||
1425 | * to serialize with rpcrdma_register_frmr_external(). | ||
1426 | */ | ||
1427 | static void | ||
1428 | rpcrdma_reset_frmrs(struct rpcrdma_ia *ia) | ||
1429 | { | ||
1430 | struct rpcrdma_xprt *r_xprt = | ||
1431 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1432 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
1433 | struct list_head *pos; | ||
1434 | struct rpcrdma_mw *r; | ||
1435 | int rc; | ||
1436 | |||
1437 | list_for_each(pos, &buf->rb_all) { | ||
1438 | r = list_entry(pos, struct rpcrdma_mw, mw_all); | ||
1439 | |||
1440 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
1441 | continue; | ||
1442 | |||
1443 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
1444 | if (rc) | ||
1445 | dprintk("RPC: %s: ib_dereg_mr failed %i\n", | ||
1446 | __func__, rc); | ||
1447 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
1448 | |||
1449 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | ||
1450 | ia->ri_max_frmr_depth); | ||
1451 | if (IS_ERR(r->r.frmr.fr_mr)) { | ||
1452 | rc = PTR_ERR(r->r.frmr.fr_mr); | ||
1453 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | ||
1454 | " failed %i\n", __func__, rc); | ||
1455 | continue; | ||
1456 | } | ||
1457 | r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( | ||
1458 | ia->ri_id->device, | ||
1459 | ia->ri_max_frmr_depth); | ||
1460 | if (IS_ERR(r->r.frmr.fr_pgl)) { | ||
1461 | rc = PTR_ERR(r->r.frmr.fr_pgl); | ||
1462 | dprintk("RPC: %s: " | ||
1463 | "ib_alloc_fast_reg_page_list " | ||
1464 | "failed %i\n", __func__, rc); | ||
1465 | |||
1466 | ib_dereg_mr(r->r.frmr.fr_mr); | ||
1467 | continue; | ||
1468 | } | ||
1469 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
1470 | } | ||
1471 | } | ||
1472 | |||
1473 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving | 1180 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving |
1474 | * some req segments uninitialized. | 1181 | * some req segments uninitialized. |
1475 | */ | 1182 | */ |
@@ -1509,7 +1216,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) | |||
1509 | } | 1216 | } |
1510 | } | 1217 | } |
1511 | 1218 | ||
1512 | /* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external(). | 1219 | /* rpcrdma_unmap_one() was already done during deregistration. |
1513 | * Redo only the ib_post_send(). | 1220 | * Redo only the ib_post_send(). |
1514 | */ | 1221 | */ |
1515 | static void | 1222 | static void |
@@ -1729,6 +1436,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | |||
1729 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. | 1436 | * Wrappers for internal-use kmalloc memory registration, used by buffer code. |
1730 | */ | 1437 | */ |
1731 | 1438 | ||
1439 | void | ||
1440 | rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg) | ||
1441 | { | ||
1442 | dprintk("RPC: map_one: offset %p iova %llx len %zu\n", | ||
1443 | seg->mr_offset, | ||
1444 | (unsigned long long)seg->mr_dma, seg->mr_dmalen); | ||
1445 | } | ||
1446 | |||
1732 | static int | 1447 | static int |
1733 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | 1448 | rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, |
1734 | struct ib_mr **mrp, struct ib_sge *iov) | 1449 | struct ib_mr **mrp, struct ib_sge *iov) |
@@ -1854,287 +1569,6 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |||
1854 | } | 1569 | } |
1855 | 1570 | ||
1856 | /* | 1571 | /* |
1857 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
1858 | */ | ||
1859 | |||
1860 | static void | ||
1861 | rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) | ||
1862 | { | ||
1863 | seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
1864 | seg->mr_dmalen = seg->mr_len; | ||
1865 | if (seg->mr_page) | ||
1866 | seg->mr_dma = ib_dma_map_page(ia->ri_id->device, | ||
1867 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
1868 | seg->mr_dmalen, seg->mr_dir); | ||
1869 | else | ||
1870 | seg->mr_dma = ib_dma_map_single(ia->ri_id->device, | ||
1871 | seg->mr_offset, | ||
1872 | seg->mr_dmalen, seg->mr_dir); | ||
1873 | if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) { | ||
1874 | dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n", | ||
1875 | __func__, | ||
1876 | (unsigned long long)seg->mr_dma, | ||
1877 | seg->mr_offset, seg->mr_dmalen); | ||
1878 | } | ||
1879 | } | ||
1880 | |||
1881 | static void | ||
1882 | rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) | ||
1883 | { | ||
1884 | if (seg->mr_page) | ||
1885 | ib_dma_unmap_page(ia->ri_id->device, | ||
1886 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1887 | else | ||
1888 | ib_dma_unmap_single(ia->ri_id->device, | ||
1889 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
1890 | } | ||
1891 | |||
1892 | static int | ||
1893 | rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1894 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
1895 | struct rpcrdma_xprt *r_xprt) | ||
1896 | { | ||
1897 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1898 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
1899 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | ||
1900 | struct ib_mr *mr = frmr->fr_mr; | ||
1901 | struct ib_send_wr fastreg_wr, *bad_wr; | ||
1902 | u8 key; | ||
1903 | int len, pageoff; | ||
1904 | int i, rc; | ||
1905 | int seg_len; | ||
1906 | u64 pa; | ||
1907 | int page_no; | ||
1908 | |||
1909 | pageoff = offset_in_page(seg1->mr_offset); | ||
1910 | seg1->mr_offset -= pageoff; /* start of page */ | ||
1911 | seg1->mr_len += pageoff; | ||
1912 | len = -pageoff; | ||
1913 | if (*nsegs > ia->ri_max_frmr_depth) | ||
1914 | *nsegs = ia->ri_max_frmr_depth; | ||
1915 | for (page_no = i = 0; i < *nsegs;) { | ||
1916 | rpcrdma_map_one(ia, seg, writing); | ||
1917 | pa = seg->mr_dma; | ||
1918 | for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { | ||
1919 | frmr->fr_pgl->page_list[page_no++] = pa; | ||
1920 | pa += PAGE_SIZE; | ||
1921 | } | ||
1922 | len += seg->mr_len; | ||
1923 | ++seg; | ||
1924 | ++i; | ||
1925 | /* Check for holes */ | ||
1926 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
1927 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
1928 | break; | ||
1929 | } | ||
1930 | dprintk("RPC: %s: Using frmr %p to map %d segments\n", | ||
1931 | __func__, mw, i); | ||
1932 | |||
1933 | frmr->fr_state = FRMR_IS_VALID; | ||
1934 | |||
1935 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | ||
1936 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | ||
1937 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | ||
1938 | fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma; | ||
1939 | fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; | ||
1940 | fastreg_wr.wr.fast_reg.page_list_len = page_no; | ||
1941 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | ||
1942 | fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; | ||
1943 | if (fastreg_wr.wr.fast_reg.length < len) { | ||
1944 | rc = -EIO; | ||
1945 | goto out_err; | ||
1946 | } | ||
1947 | |||
1948 | /* Bump the key */ | ||
1949 | key = (u8)(mr->rkey & 0x000000FF); | ||
1950 | ib_update_fast_reg_key(mr, ++key); | ||
1951 | |||
1952 | fastreg_wr.wr.fast_reg.access_flags = (writing ? | ||
1953 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | ||
1954 | IB_ACCESS_REMOTE_READ); | ||
1955 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | ||
1956 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1957 | |||
1958 | rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); | ||
1959 | if (rc) { | ||
1960 | dprintk("RPC: %s: failed ib_post_send for register," | ||
1961 | " status %i\n", __func__, rc); | ||
1962 | ib_update_fast_reg_key(mr, --key); | ||
1963 | goto out_err; | ||
1964 | } else { | ||
1965 | seg1->mr_rkey = mr->rkey; | ||
1966 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
1967 | seg1->mr_nsegs = i; | ||
1968 | seg1->mr_len = len; | ||
1969 | } | ||
1970 | *nsegs = i; | ||
1971 | return 0; | ||
1972 | out_err: | ||
1973 | frmr->fr_state = FRMR_IS_INVALID; | ||
1974 | while (i--) | ||
1975 | rpcrdma_unmap_one(ia, --seg); | ||
1976 | return rc; | ||
1977 | } | ||
1978 | |||
1979 | static int | ||
1980 | rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, | ||
1981 | struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) | ||
1982 | { | ||
1983 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1984 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1985 | int rc; | ||
1986 | |||
1987 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
1988 | |||
1989 | memset(&invalidate_wr, 0, sizeof invalidate_wr); | ||
1990 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | ||
1991 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1992 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | ||
1993 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1994 | |||
1995 | read_lock(&ia->ri_qplock); | ||
1996 | while (seg1->mr_nsegs--) | ||
1997 | rpcrdma_unmap_one(ia, seg++); | ||
1998 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1999 | read_unlock(&ia->ri_qplock); | ||
2000 | if (rc) { | ||
2001 | /* Force rpcrdma_buffer_get() to retry */ | ||
2002 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
2003 | dprintk("RPC: %s: failed ib_post_send for invalidate," | ||
2004 | " status %i\n", __func__, rc); | ||
2005 | } | ||
2006 | return rc; | ||
2007 | } | ||
2008 | |||
2009 | static int | ||
2010 | rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, | ||
2011 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
2012 | { | ||
2013 | struct rpcrdma_mr_seg *seg1 = seg; | ||
2014 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
2015 | int len, pageoff, i, rc; | ||
2016 | |||
2017 | pageoff = offset_in_page(seg1->mr_offset); | ||
2018 | seg1->mr_offset -= pageoff; /* start of page */ | ||
2019 | seg1->mr_len += pageoff; | ||
2020 | len = -pageoff; | ||
2021 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
2022 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
2023 | for (i = 0; i < *nsegs;) { | ||
2024 | rpcrdma_map_one(ia, seg, writing); | ||
2025 | physaddrs[i] = seg->mr_dma; | ||
2026 | len += seg->mr_len; | ||
2027 | ++seg; | ||
2028 | ++i; | ||
2029 | /* Check for holes */ | ||
2030 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
2031 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | ||
2032 | break; | ||
2033 | } | ||
2034 | rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma); | ||
2035 | if (rc) { | ||
2036 | dprintk("RPC: %s: failed ib_map_phys_fmr " | ||
2037 | "%u@0x%llx+%i (%d)... status %i\n", __func__, | ||
2038 | len, (unsigned long long)seg1->mr_dma, | ||
2039 | pageoff, i, rc); | ||
2040 | while (i--) | ||
2041 | rpcrdma_unmap_one(ia, --seg); | ||
2042 | } else { | ||
2043 | seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey; | ||
2044 | seg1->mr_base = seg1->mr_dma + pageoff; | ||
2045 | seg1->mr_nsegs = i; | ||
2046 | seg1->mr_len = len; | ||
2047 | } | ||
2048 | *nsegs = i; | ||
2049 | return rc; | ||
2050 | } | ||
2051 | |||
2052 | static int | ||
2053 | rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | ||
2054 | struct rpcrdma_ia *ia) | ||
2055 | { | ||
2056 | struct rpcrdma_mr_seg *seg1 = seg; | ||
2057 | LIST_HEAD(l); | ||
2058 | int rc; | ||
2059 | |||
2060 | list_add(&seg1->rl_mw->r.fmr->list, &l); | ||
2061 | rc = ib_unmap_fmr(&l); | ||
2062 | read_lock(&ia->ri_qplock); | ||
2063 | while (seg1->mr_nsegs--) | ||
2064 | rpcrdma_unmap_one(ia, seg++); | ||
2065 | read_unlock(&ia->ri_qplock); | ||
2066 | if (rc) | ||
2067 | dprintk("RPC: %s: failed ib_unmap_fmr," | ||
2068 | " status %i\n", __func__, rc); | ||
2069 | return rc; | ||
2070 | } | ||
2071 | |||
2072 | int | ||
2073 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | ||
2074 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | ||
2075 | { | ||
2076 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
2077 | int rc = 0; | ||
2078 | |||
2079 | switch (ia->ri_memreg_strategy) { | ||
2080 | |||
2081 | case RPCRDMA_ALLPHYSICAL: | ||
2082 | rpcrdma_map_one(ia, seg, writing); | ||
2083 | seg->mr_rkey = ia->ri_bind_mem->rkey; | ||
2084 | seg->mr_base = seg->mr_dma; | ||
2085 | seg->mr_nsegs = 1; | ||
2086 | nsegs = 1; | ||
2087 | break; | ||
2088 | |||
2089 | /* Registration using frmr registration */ | ||
2090 | case RPCRDMA_FRMR: | ||
2091 | rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); | ||
2092 | break; | ||
2093 | |||
2094 | /* Registration using fmr memory registration */ | ||
2095 | case RPCRDMA_MTHCAFMR: | ||
2096 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); | ||
2097 | break; | ||
2098 | |||
2099 | default: | ||
2100 | return -EIO; | ||
2101 | } | ||
2102 | if (rc) | ||
2103 | return rc; | ||
2104 | |||
2105 | return nsegs; | ||
2106 | } | ||
2107 | |||
2108 | int | ||
2109 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | ||
2110 | struct rpcrdma_xprt *r_xprt) | ||
2111 | { | ||
2112 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | ||
2113 | int nsegs = seg->mr_nsegs, rc; | ||
2114 | |||
2115 | switch (ia->ri_memreg_strategy) { | ||
2116 | |||
2117 | case RPCRDMA_ALLPHYSICAL: | ||
2118 | read_lock(&ia->ri_qplock); | ||
2119 | rpcrdma_unmap_one(ia, seg); | ||
2120 | read_unlock(&ia->ri_qplock); | ||
2121 | break; | ||
2122 | |||
2123 | case RPCRDMA_FRMR: | ||
2124 | rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); | ||
2125 | break; | ||
2126 | |||
2127 | case RPCRDMA_MTHCAFMR: | ||
2128 | rc = rpcrdma_deregister_fmr_external(seg, ia); | ||
2129 | break; | ||
2130 | |||
2131 | default: | ||
2132 | break; | ||
2133 | } | ||
2134 | return nsegs; | ||
2135 | } | ||
2136 | |||
2137 | /* | ||
2138 | * Prepost any receive buffer, then post send. | 1572 | * Prepost any receive buffer, then post send. |
2139 | * | 1573 | * |
2140 | * Receive buffer is donated to hardware, reclaimed upon recv completion. | 1574 | * Receive buffer is donated to hardware, reclaimed upon recv completion. |
@@ -2156,7 +1590,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
2156 | } | 1590 | } |
2157 | 1591 | ||
2158 | send_wr.next = NULL; | 1592 | send_wr.next = NULL; |
2159 | send_wr.wr_id = 0ULL; /* no send cookie */ | 1593 | send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION; |
2160 | send_wr.sg_list = req->rl_send_iov; | 1594 | send_wr.sg_list = req->rl_send_iov; |
2161 | send_wr.num_sge = req->rl_niovs; | 1595 | send_wr.num_sge = req->rl_niovs; |
2162 | send_wr.opcode = IB_WR_SEND; | 1596 | send_wr.opcode = IB_WR_SEND; |
@@ -2215,43 +1649,24 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
2215 | return rc; | 1649 | return rc; |
2216 | } | 1650 | } |
2217 | 1651 | ||
2218 | /* Physical mapping means one Read/Write list entry per-page. | 1652 | /* How many chunk list items fit within our inline buffers? |
2219 | * All list entries must fit within an inline buffer | ||
2220 | * | ||
2221 | * NB: The server must return a Write list for NFS READ, | ||
2222 | * which has the same constraint. Factor in the inline | ||
2223 | * rsize as well. | ||
2224 | */ | 1653 | */ |
2225 | static size_t | 1654 | unsigned int |
2226 | rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt) | 1655 | rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt) |
2227 | { | 1656 | { |
2228 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; | 1657 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
2229 | unsigned int inline_size, pages; | 1658 | int bytes, segments; |
2230 | 1659 | ||
2231 | inline_size = min_t(unsigned int, | 1660 | bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize); |
2232 | cdata->inline_wsize, cdata->inline_rsize); | 1661 | bytes -= RPCRDMA_HDRLEN_MIN; |
2233 | inline_size -= RPCRDMA_HDRLEN_MIN; | 1662 | if (bytes < sizeof(struct rpcrdma_segment) * 2) { |
2234 | pages = inline_size / sizeof(struct rpcrdma_segment); | 1663 | pr_warn("RPC: %s: inline threshold too small\n", |
2235 | return pages << PAGE_SHIFT; | 1664 | __func__); |
2236 | } | 1665 | return 0; |
2237 | |||
2238 | static size_t | ||
2239 | rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt) | ||
2240 | { | ||
2241 | return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; | ||
2242 | } | ||
2243 | |||
2244 | size_t | ||
2245 | rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt) | ||
2246 | { | ||
2247 | size_t result; | ||
2248 | |||
2249 | switch (r_xprt->rx_ia.ri_memreg_strategy) { | ||
2250 | case RPCRDMA_ALLPHYSICAL: | ||
2251 | result = rpcrdma_physical_max_payload(r_xprt); | ||
2252 | break; | ||
2253 | default: | ||
2254 | result = rpcrdma_mr_max_payload(r_xprt); | ||
2255 | } | 1666 | } |
2256 | return result; | 1667 | |
1668 | segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1); | ||
1669 | dprintk("RPC: %s: max chunk list size = %d segments\n", | ||
1670 | __func__, segments); | ||
1671 | return segments; | ||
2257 | } | 1672 | } |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0a16fb6f0885..78e0b8beaa36 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -60,6 +60,7 @@ | |||
60 | * Interface Adapter -- one per transport instance | 60 | * Interface Adapter -- one per transport instance |
61 | */ | 61 | */ |
62 | struct rpcrdma_ia { | 62 | struct rpcrdma_ia { |
63 | const struct rpcrdma_memreg_ops *ri_ops; | ||
63 | rwlock_t ri_qplock; | 64 | rwlock_t ri_qplock; |
64 | struct rdma_cm_id *ri_id; | 65 | struct rdma_cm_id *ri_id; |
65 | struct ib_pd *ri_pd; | 66 | struct ib_pd *ri_pd; |
@@ -105,6 +106,10 @@ struct rpcrdma_ep { | |||
105 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 106 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
106 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) | 107 | #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) |
107 | 108 | ||
109 | /* Force completion handler to ignore the signal | ||
110 | */ | ||
111 | #define RPCRDMA_IGNORE_COMPLETION (0ULL) | ||
112 | |||
108 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV | 113 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
109 | * | 114 | * |
110 | * The below structure appears at the front of a large region of kmalloc'd | 115 | * The below structure appears at the front of a large region of kmalloc'd |
@@ -143,14 +148,6 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) | |||
143 | return (struct rpcrdma_msg *)rb->rg_base; | 148 | return (struct rpcrdma_msg *)rb->rg_base; |
144 | } | 149 | } |
145 | 150 | ||
146 | enum rpcrdma_chunktype { | ||
147 | rpcrdma_noch = 0, | ||
148 | rpcrdma_readch, | ||
149 | rpcrdma_areadch, | ||
150 | rpcrdma_writech, | ||
151 | rpcrdma_replych | ||
152 | }; | ||
153 | |||
154 | /* | 151 | /* |
155 | * struct rpcrdma_rep -- this structure encapsulates state required to recv | 152 | * struct rpcrdma_rep -- this structure encapsulates state required to recv |
156 | * and complete a reply, asychronously. It needs several pieces of | 153 | * and complete a reply, asychronously. It needs several pieces of |
@@ -213,6 +210,7 @@ struct rpcrdma_mw { | |||
213 | struct ib_fmr *fmr; | 210 | struct ib_fmr *fmr; |
214 | struct rpcrdma_frmr frmr; | 211 | struct rpcrdma_frmr frmr; |
215 | } r; | 212 | } r; |
213 | void (*mw_sendcompletion)(struct ib_wc *); | ||
216 | struct list_head mw_list; | 214 | struct list_head mw_list; |
217 | struct list_head mw_all; | 215 | struct list_head mw_all; |
218 | }; | 216 | }; |
@@ -258,7 +256,6 @@ struct rpcrdma_req { | |||
258 | unsigned int rl_niovs; /* 0, 2 or 4 */ | 256 | unsigned int rl_niovs; /* 0, 2 or 4 */ |
259 | unsigned int rl_nchunks; /* non-zero if chunks */ | 257 | unsigned int rl_nchunks; /* non-zero if chunks */ |
260 | unsigned int rl_connect_cookie; /* retry detection */ | 258 | unsigned int rl_connect_cookie; /* retry detection */ |
261 | enum rpcrdma_chunktype rl_rtype, rl_wtype; | ||
262 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ | 259 | struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ |
263 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ | 260 | struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ |
264 | struct ib_sge rl_send_iov[4]; /* for active requests */ | 261 | struct ib_sge rl_send_iov[4]; /* for active requests */ |
@@ -340,6 +337,29 @@ struct rpcrdma_stats { | |||
340 | }; | 337 | }; |
341 | 338 | ||
342 | /* | 339 | /* |
340 | * Per-registration mode operations | ||
341 | */ | ||
342 | struct rpcrdma_xprt; | ||
343 | struct rpcrdma_memreg_ops { | ||
344 | int (*ro_map)(struct rpcrdma_xprt *, | ||
345 | struct rpcrdma_mr_seg *, int, bool); | ||
346 | int (*ro_unmap)(struct rpcrdma_xprt *, | ||
347 | struct rpcrdma_mr_seg *); | ||
348 | int (*ro_open)(struct rpcrdma_ia *, | ||
349 | struct rpcrdma_ep *, | ||
350 | struct rpcrdma_create_data_internal *); | ||
351 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | ||
352 | int (*ro_init)(struct rpcrdma_xprt *); | ||
353 | void (*ro_reset)(struct rpcrdma_xprt *); | ||
354 | void (*ro_destroy)(struct rpcrdma_buffer *); | ||
355 | const char *ro_displayname; | ||
356 | }; | ||
357 | |||
358 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | ||
359 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | ||
360 | extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops; | ||
361 | |||
362 | /* | ||
343 | * RPCRDMA transport -- encapsulates the structures above for | 363 | * RPCRDMA transport -- encapsulates the structures above for |
344 | * integration with RPC. | 364 | * integration with RPC. |
345 | * | 365 | * |
@@ -398,16 +418,56 @@ void rpcrdma_buffer_put(struct rpcrdma_req *); | |||
398 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | 418 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); |
399 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | 419 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
400 | 420 | ||
401 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, | ||
402 | int, int, struct rpcrdma_xprt *); | ||
403 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, | ||
404 | struct rpcrdma_xprt *); | ||
405 | |||
406 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, | 421 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, |
407 | size_t, gfp_t); | 422 | size_t, gfp_t); |
408 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, | 423 | void rpcrdma_free_regbuf(struct rpcrdma_ia *, |
409 | struct rpcrdma_regbuf *); | 424 | struct rpcrdma_regbuf *); |
410 | 425 | ||
426 | unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); | ||
427 | |||
428 | /* | ||
429 | * Wrappers for chunk registration, shared by read/write chunk code. | ||
430 | */ | ||
431 | |||
432 | void rpcrdma_mapping_error(struct rpcrdma_mr_seg *); | ||
433 | |||
434 | static inline enum dma_data_direction | ||
435 | rpcrdma_data_dir(bool writing) | ||
436 | { | ||
437 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | ||
438 | } | ||
439 | |||
440 | static inline void | ||
441 | rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg, | ||
442 | enum dma_data_direction direction) | ||
443 | { | ||
444 | seg->mr_dir = direction; | ||
445 | seg->mr_dmalen = seg->mr_len; | ||
446 | |||
447 | if (seg->mr_page) | ||
448 | seg->mr_dma = ib_dma_map_page(device, | ||
449 | seg->mr_page, offset_in_page(seg->mr_offset), | ||
450 | seg->mr_dmalen, seg->mr_dir); | ||
451 | else | ||
452 | seg->mr_dma = ib_dma_map_single(device, | ||
453 | seg->mr_offset, | ||
454 | seg->mr_dmalen, seg->mr_dir); | ||
455 | |||
456 | if (ib_dma_mapping_error(device, seg->mr_dma)) | ||
457 | rpcrdma_mapping_error(seg); | ||
458 | } | ||
459 | |||
460 | static inline void | ||
461 | rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg) | ||
462 | { | ||
463 | if (seg->mr_page) | ||
464 | ib_dma_unmap_page(device, | ||
465 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
466 | else | ||
467 | ib_dma_unmap_single(device, | ||
468 | seg->mr_dma, seg->mr_dmalen, seg->mr_dir); | ||
469 | } | ||
470 | |||
411 | /* | 471 | /* |
412 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | 472 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c |
413 | */ | 473 | */ |
@@ -418,9 +478,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); | |||
418 | /* | 478 | /* |
419 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | 479 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c |
420 | */ | 480 | */ |
421 | ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t); | ||
422 | int rpcrdma_marshal_req(struct rpc_rqst *); | 481 | int rpcrdma_marshal_req(struct rpc_rqst *); |
423 | size_t rpcrdma_max_payload(struct rpcrdma_xprt *); | ||
424 | 482 | ||
425 | /* Temporary NFS request map cache. Created in svc_rdma.c */ | 483 | /* Temporary NFS request map cache. Created in svc_rdma.c */ |
426 | extern struct kmem_cache *svc_rdma_map_cachep; | 484 | extern struct kmem_cache *svc_rdma_map_cachep; |