aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2018-05-07 15:27:21 -0400
committerJ. Bruce Fields <bfields@redhat.com>2018-05-11 15:48:57 -0400
commitecf85b2384ea5f7cb0577bf6143bc46d9ecfe4d3 (patch)
tree6f0ca0a83d39f3c6b82bd0df41896cfb315645f9
parentbd2abef33394dc16d63580c38c01420db991f0f2 (diff)
svcrdma: Introduce svc_rdma_recv_ctxt
svc_rdma_op_ctxt's are pre-allocated and maintained on a per-xprt free list. This eliminates the overhead of calling kmalloc / kfree, both of which grab a globally shared lock that disables interrupts. To reduce contention further, separate the use of these objects in the Receive and Send paths in svcrdma. Subsequent patches will take advantage of this separation by allocating real resources which are then cached in these objects. The allocations are freed when the transport is torn down. I've renamed the structure so that static type checking can be used to ensure that uses of op_ctxt and recv_ctxt are not confused. As an additional clean up, structure fields are renamed to conform with kernel coding conventions. As a final clean up, helpers related to recv_ctxt are moved closer to the functions that use them. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
-rw-r--r--include/linux/sunrpc/svc_rdma.h24
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c318
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c84
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c142
5 files changed, 349 insertions, 221 deletions
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 88da0c9bd7b1..37f759d65348 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -128,6 +128,9 @@ struct svcxprt_rdma {
128 unsigned long sc_flags; 128 unsigned long sc_flags;
129 struct list_head sc_read_complete_q; 129 struct list_head sc_read_complete_q;
130 struct work_struct sc_work; 130 struct work_struct sc_work;
131
132 spinlock_t sc_recv_lock;
133 struct list_head sc_recv_ctxts;
131}; 134};
132/* sc_flags */ 135/* sc_flags */
133#define RDMAXPRT_CONN_PENDING 3 136#define RDMAXPRT_CONN_PENDING 3
@@ -142,6 +145,19 @@ struct svcxprt_rdma {
142 145
143#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD 146#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
144 147
148struct svc_rdma_recv_ctxt {
149 struct list_head rc_list;
150 struct ib_recv_wr rc_recv_wr;
151 struct ib_cqe rc_cqe;
152 struct xdr_buf rc_arg;
153 u32 rc_byte_len;
154 unsigned int rc_page_count;
155 unsigned int rc_hdr_count;
156 struct ib_sge rc_sges[1 +
157 RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE];
158 struct page *rc_pages[RPCSVC_MAXPAGES];
159};
160
145/* Track DMA maps for this transport and context */ 161/* Track DMA maps for this transport and context */
146static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, 162static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
147 struct svc_rdma_op_ctxt *ctxt) 163 struct svc_rdma_op_ctxt *ctxt)
@@ -155,13 +171,19 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
155 struct xdr_buf *rcvbuf); 171 struct xdr_buf *rcvbuf);
156 172
157/* svc_rdma_recvfrom.c */ 173/* svc_rdma_recvfrom.c */
174extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma);
175extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma);
176extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
177 struct svc_rdma_recv_ctxt *ctxt,
178 int free_pages);
179extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
158extern int svc_rdma_recvfrom(struct svc_rqst *); 180extern int svc_rdma_recvfrom(struct svc_rqst *);
159 181
160/* svc_rdma_rw.c */ 182/* svc_rdma_rw.c */
161extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); 183extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
162extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, 184extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma,
163 struct svc_rqst *rqstp, 185 struct svc_rqst *rqstp,
164 struct svc_rdma_op_ctxt *head, __be32 *p); 186 struct svc_rdma_recv_ctxt *head, __be32 *p);
165extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, 187extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
166 __be32 *wr_ch, struct xdr_buf *xdr); 188 __be32 *wr_ch, struct xdr_buf *xdr);
167extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, 189extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 330d542fd96e..b7d9c55ee896 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,6 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2/* 2/*
3 * Copyright (c) 2016, 2017 Oracle. All rights reserved. 3 * Copyright (c) 2016-2018 Oracle. All rights reserved.
4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
6 * 6 *
@@ -61,7 +61,7 @@
61 * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's 61 * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's
62 * data payload from the client. svc_rdma_recvfrom sets up the 62 * data payload from the client. svc_rdma_recvfrom sets up the
63 * RDMA Reads using pages in svc_rqst::rq_pages, which are 63 * RDMA Reads using pages in svc_rqst::rq_pages, which are
64 * transferred to an svc_rdma_op_ctxt for the duration of the 64 * transferred to an svc_rdma_recv_ctxt for the duration of the
65 * I/O. svc_rdma_recvfrom then returns zero, since the RPC message 65 * I/O. svc_rdma_recvfrom then returns zero, since the RPC message
66 * is still not yet ready. 66 * is still not yet ready.
67 * 67 *
@@ -70,18 +70,18 @@
70 * svc_rdma_recvfrom again. This second call may use a different 70 * svc_rdma_recvfrom again. This second call may use a different
71 * svc_rqst than the first one, thus any information that needs 71 * svc_rqst than the first one, thus any information that needs
72 * to be preserved across these two calls is kept in an 72 * to be preserved across these two calls is kept in an
73 * svc_rdma_op_ctxt. 73 * svc_rdma_recv_ctxt.
74 * 74 *
75 * The second call to svc_rdma_recvfrom performs final assembly 75 * The second call to svc_rdma_recvfrom performs final assembly
76 * of the RPC Call message, using the RDMA Read sink pages kept in 76 * of the RPC Call message, using the RDMA Read sink pages kept in
77 * the svc_rdma_op_ctxt. The xdr_buf is copied from the 77 * the svc_rdma_recv_ctxt. The xdr_buf is copied from the
78 * svc_rdma_op_ctxt to the second svc_rqst. The second call returns 78 * svc_rdma_recv_ctxt to the second svc_rqst. The second call returns
79 * the length of the completed RPC Call message. 79 * the length of the completed RPC Call message.
80 * 80 *
81 * Page Management 81 * Page Management
82 * 82 *
83 * Pages under I/O must be transferred from the first svc_rqst to an 83 * Pages under I/O must be transferred from the first svc_rqst to an
84 * svc_rdma_op_ctxt before the first svc_rdma_recvfrom call returns. 84 * svc_rdma_recv_ctxt before the first svc_rdma_recvfrom call returns.
85 * 85 *
86 * The first svc_rqst supplies pages for RDMA Reads. These are moved 86 * The first svc_rqst supplies pages for RDMA Reads. These are moved
87 * from rqstp::rq_pages into ctxt::pages. The consumed elements of 87 * from rqstp::rq_pages into ctxt::pages. The consumed elements of
@@ -89,7 +89,7 @@
89 * svc_rdma_recvfrom call returns. 89 * svc_rdma_recvfrom call returns.
90 * 90 *
91 * During the second svc_rdma_recvfrom call, RDMA Read sink pages 91 * During the second svc_rdma_recvfrom call, RDMA Read sink pages
92 * are transferred from the svc_rdma_op_ctxt to the second svc_rqst 92 * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst
93 * (see rdma_read_complete() below). 93 * (see rdma_read_complete() below).
94 */ 94 */
95 95
@@ -108,13 +108,247 @@
108 108
109#define RPCDBG_FACILITY RPCDBG_SVCXPRT 109#define RPCDBG_FACILITY RPCDBG_SVCXPRT
110 110
111static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc);
112
113static inline struct svc_rdma_recv_ctxt *
114svc_rdma_next_recv_ctxt(struct list_head *list)
115{
116 return list_first_entry_or_null(list, struct svc_rdma_recv_ctxt,
117 rc_list);
118}
119
120/**
121 * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt
122 * @rdma: svcxprt_rdma being torn down
123 *
124 */
125void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
126{
127 struct svc_rdma_recv_ctxt *ctxt;
128
129 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
130 list_del(&ctxt->rc_list);
131 kfree(ctxt);
132 }
133}
134
135static struct svc_rdma_recv_ctxt *
136svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
137{
138 struct svc_rdma_recv_ctxt *ctxt;
139
140 spin_lock(&rdma->sc_recv_lock);
141 ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);
142 if (!ctxt)
143 goto out_empty;
144 list_del(&ctxt->rc_list);
145 spin_unlock(&rdma->sc_recv_lock);
146
147out:
148 ctxt->rc_recv_wr.num_sge = 0;
149 ctxt->rc_page_count = 0;
150 return ctxt;
151
152out_empty:
153 spin_unlock(&rdma->sc_recv_lock);
154
155 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
156 if (!ctxt)
157 return NULL;
158 goto out;
159}
160
161static void svc_rdma_recv_ctxt_unmap(struct svcxprt_rdma *rdma,
162 struct svc_rdma_recv_ctxt *ctxt)
163{
164 struct ib_device *device = rdma->sc_cm_id->device;
165 int i;
166
167 for (i = 0; i < ctxt->rc_recv_wr.num_sge; i++)
168 ib_dma_unmap_page(device,
169 ctxt->rc_sges[i].addr,
170 ctxt->rc_sges[i].length,
171 DMA_FROM_DEVICE);
172}
173
174/**
175 * svc_rdma_recv_ctxt_put - Return recv_ctxt to free list
176 * @rdma: controlling svcxprt_rdma
177 * @ctxt: object to return to the free list
178 * @free_pages: Non-zero if rc_pages should be freed
179 *
180 */
181void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
182 struct svc_rdma_recv_ctxt *ctxt,
183 int free_pages)
184{
185 unsigned int i;
186
187 if (free_pages)
188 for (i = 0; i < ctxt->rc_page_count; i++)
189 put_page(ctxt->rc_pages[i]);
190 spin_lock(&rdma->sc_recv_lock);
191 list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
192 spin_unlock(&rdma->sc_recv_lock);
193}
194
195static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
196{
197 struct ib_device *device = rdma->sc_cm_id->device;
198 struct svc_rdma_recv_ctxt *ctxt;
199 struct ib_recv_wr *bad_recv_wr;
200 int sge_no, buflen, ret;
201 struct page *page;
202 dma_addr_t pa;
203
204 ctxt = svc_rdma_recv_ctxt_get(rdma);
205 if (!ctxt)
206 return -ENOMEM;
207
208 buflen = 0;
209 ctxt->rc_cqe.done = svc_rdma_wc_receive;
210 for (sge_no = 0; buflen < rdma->sc_max_req_size; sge_no++) {
211 if (sge_no >= rdma->sc_max_sge) {
212 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
213 goto err_put_ctxt;
214 }
215
216 page = alloc_page(GFP_KERNEL);
217 if (!page)
218 goto err_put_ctxt;
219 ctxt->rc_pages[sge_no] = page;
220 ctxt->rc_page_count++;
221
222 pa = ib_dma_map_page(device, ctxt->rc_pages[sge_no],
223 0, PAGE_SIZE, DMA_FROM_DEVICE);
224 if (ib_dma_mapping_error(device, pa))
225 goto err_put_ctxt;
226 ctxt->rc_sges[sge_no].addr = pa;
227 ctxt->rc_sges[sge_no].length = PAGE_SIZE;
228 ctxt->rc_sges[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
229 ctxt->rc_recv_wr.num_sge++;
230
231 buflen += PAGE_SIZE;
232 }
233 ctxt->rc_recv_wr.next = NULL;
234 ctxt->rc_recv_wr.sg_list = &ctxt->rc_sges[0];
235 ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe;
236
237 svc_xprt_get(&rdma->sc_xprt);
238 ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr);
239 trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
240 if (ret)
241 goto err_post;
242 return 0;
243
244err_put_ctxt:
245 svc_rdma_recv_ctxt_unmap(rdma, ctxt);
246 svc_rdma_recv_ctxt_put(rdma, ctxt, 1);
247 return -ENOMEM;
248err_post:
249 svc_rdma_recv_ctxt_unmap(rdma, ctxt);
250 svc_rdma_recv_ctxt_put(rdma, ctxt, 1);
251 svc_xprt_put(&rdma->sc_xprt);
252 return ret;
253}
254
255/**
256 * svc_rdma_post_recvs - Post initial set of Recv WRs
257 * @rdma: fresh svcxprt_rdma
258 *
259 * Returns true if successful, otherwise false.
260 */
261bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
262{
263 unsigned int i;
264 int ret;
265
266 for (i = 0; i < rdma->sc_max_requests; i++) {
267 ret = svc_rdma_post_recv(rdma);
268 if (ret) {
269 pr_err("svcrdma: failure posting recv buffers: %d\n",
270 ret);
271 return false;
272 }
273 }
274 return true;
275}
276
277/**
278 * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
279 * @cq: Completion Queue context
280 * @wc: Work Completion object
281 *
282 * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
283 * the Receive completion handler could be running.
284 */
285static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
286{
287 struct svcxprt_rdma *rdma = cq->cq_context;
288 struct ib_cqe *cqe = wc->wr_cqe;
289 struct svc_rdma_recv_ctxt *ctxt;
290
291 trace_svcrdma_wc_receive(wc);
292
293 /* WARNING: Only wc->wr_cqe and wc->status are reliable */
294 ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
295 svc_rdma_recv_ctxt_unmap(rdma, ctxt);
296
297 if (wc->status != IB_WC_SUCCESS)
298 goto flushed;
299
300 if (svc_rdma_post_recv(rdma))
301 goto post_err;
302
303 /* All wc fields are now known to be valid */
304 ctxt->rc_byte_len = wc->byte_len;
305 spin_lock(&rdma->sc_rq_dto_lock);
306 list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
307 spin_unlock(&rdma->sc_rq_dto_lock);
308 set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
309 if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
310 svc_xprt_enqueue(&rdma->sc_xprt);
311 goto out;
312
313flushed:
314 if (wc->status != IB_WC_WR_FLUSH_ERR)
315 pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
316 ib_wc_status_msg(wc->status),
317 wc->status, wc->vendor_err);
318post_err:
319 svc_rdma_recv_ctxt_put(rdma, ctxt, 1);
320 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
321 svc_xprt_enqueue(&rdma->sc_xprt);
322out:
323 svc_xprt_put(&rdma->sc_xprt);
324}
325
326/**
327 * svc_rdma_flush_recv_queues - Drain pending Receive work
328 * @rdma: svcxprt_rdma being shut down
329 *
330 */
331void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
332{
333 struct svc_rdma_recv_ctxt *ctxt;
334
335 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) {
336 list_del(&ctxt->rc_list);
337 svc_rdma_recv_ctxt_put(rdma, ctxt, 1);
338 }
339 while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
340 list_del(&ctxt->rc_list);
341 svc_rdma_recv_ctxt_put(rdma, ctxt, 1);
342 }
343}
344
111/* 345/*
112 * Replace the pages in the rq_argpages array with the pages from the SGE in 346 * Replace the pages in the rq_argpages array with the pages from the SGE in
113 * the RDMA_RECV completion. The SGL should contain full pages up until the 347 * the RDMA_RECV completion. The SGL should contain full pages up until the
114 * last one. 348 * last one.
115 */ 349 */
116static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, 350static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
117 struct svc_rdma_op_ctxt *ctxt) 351 struct svc_rdma_recv_ctxt *ctxt)
118{ 352{
119 struct page *page; 353 struct page *page;
120 int sge_no; 354 int sge_no;
@@ -123,30 +357,30 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
123 /* The reply path assumes the Call's transport header resides 357 /* The reply path assumes the Call's transport header resides
124 * in rqstp->rq_pages[0]. 358 * in rqstp->rq_pages[0].
125 */ 359 */
126 page = ctxt->pages[0]; 360 page = ctxt->rc_pages[0];
127 put_page(rqstp->rq_pages[0]); 361 put_page(rqstp->rq_pages[0]);
128 rqstp->rq_pages[0] = page; 362 rqstp->rq_pages[0] = page;
129 363
130 /* Set up the XDR head */ 364 /* Set up the XDR head */
131 rqstp->rq_arg.head[0].iov_base = page_address(page); 365 rqstp->rq_arg.head[0].iov_base = page_address(page);
132 rqstp->rq_arg.head[0].iov_len = 366 rqstp->rq_arg.head[0].iov_len =
133 min_t(size_t, ctxt->byte_len, ctxt->sge[0].length); 367 min_t(size_t, ctxt->rc_byte_len, ctxt->rc_sges[0].length);
134 rqstp->rq_arg.len = ctxt->byte_len; 368 rqstp->rq_arg.len = ctxt->rc_byte_len;
135 rqstp->rq_arg.buflen = ctxt->byte_len; 369 rqstp->rq_arg.buflen = ctxt->rc_byte_len;
136 370
137 /* Compute bytes past head in the SGL */ 371 /* Compute bytes past head in the SGL */
138 len = ctxt->byte_len - rqstp->rq_arg.head[0].iov_len; 372 len = ctxt->rc_byte_len - rqstp->rq_arg.head[0].iov_len;
139 373
140 /* If data remains, store it in the pagelist */ 374 /* If data remains, store it in the pagelist */
141 rqstp->rq_arg.page_len = len; 375 rqstp->rq_arg.page_len = len;
142 rqstp->rq_arg.page_base = 0; 376 rqstp->rq_arg.page_base = 0;
143 377
144 sge_no = 1; 378 sge_no = 1;
145 while (len && sge_no < ctxt->count) { 379 while (len && sge_no < ctxt->rc_recv_wr.num_sge) {
146 page = ctxt->pages[sge_no]; 380 page = ctxt->rc_pages[sge_no];
147 put_page(rqstp->rq_pages[sge_no]); 381 put_page(rqstp->rq_pages[sge_no]);
148 rqstp->rq_pages[sge_no] = page; 382 rqstp->rq_pages[sge_no] = page;
149 len -= min_t(u32, len, ctxt->sge[sge_no].length); 383 len -= min_t(u32, len, ctxt->rc_sges[sge_no].length);
150 sge_no++; 384 sge_no++;
151 } 385 }
152 rqstp->rq_respages = &rqstp->rq_pages[sge_no]; 386 rqstp->rq_respages = &rqstp->rq_pages[sge_no];
@@ -154,11 +388,11 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
154 388
155 /* If not all pages were used from the SGL, free the remaining ones */ 389 /* If not all pages were used from the SGL, free the remaining ones */
156 len = sge_no; 390 len = sge_no;
157 while (sge_no < ctxt->count) { 391 while (sge_no < ctxt->rc_recv_wr.num_sge) {
158 page = ctxt->pages[sge_no++]; 392 page = ctxt->rc_pages[sge_no++];
159 put_page(page); 393 put_page(page);
160 } 394 }
161 ctxt->count = len; 395 ctxt->rc_page_count = len;
162 396
163 /* Set up tail */ 397 /* Set up tail */
164 rqstp->rq_arg.tail[0].iov_base = NULL; 398 rqstp->rq_arg.tail[0].iov_base = NULL;
@@ -364,29 +598,29 @@ out_inval:
364} 598}
365 599
366static void rdma_read_complete(struct svc_rqst *rqstp, 600static void rdma_read_complete(struct svc_rqst *rqstp,
367 struct svc_rdma_op_ctxt *head) 601 struct svc_rdma_recv_ctxt *head)
368{ 602{
369 int page_no; 603 int page_no;
370 604
371 /* Copy RPC pages */ 605 /* Copy RPC pages */
372 for (page_no = 0; page_no < head->count; page_no++) { 606 for (page_no = 0; page_no < head->rc_page_count; page_no++) {
373 put_page(rqstp->rq_pages[page_no]); 607 put_page(rqstp->rq_pages[page_no]);
374 rqstp->rq_pages[page_no] = head->pages[page_no]; 608 rqstp->rq_pages[page_no] = head->rc_pages[page_no];
375 } 609 }
376 610
377 /* Point rq_arg.pages past header */ 611 /* Point rq_arg.pages past header */
378 rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; 612 rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count];
379 rqstp->rq_arg.page_len = head->arg.page_len; 613 rqstp->rq_arg.page_len = head->rc_arg.page_len;
380 614
381 /* rq_respages starts after the last arg page */ 615 /* rq_respages starts after the last arg page */
382 rqstp->rq_respages = &rqstp->rq_pages[page_no]; 616 rqstp->rq_respages = &rqstp->rq_pages[page_no];
383 rqstp->rq_next_page = rqstp->rq_respages + 1; 617 rqstp->rq_next_page = rqstp->rq_respages + 1;
384 618
385 /* Rebuild rq_arg head and tail. */ 619 /* Rebuild rq_arg head and tail. */
386 rqstp->rq_arg.head[0] = head->arg.head[0]; 620 rqstp->rq_arg.head[0] = head->rc_arg.head[0];
387 rqstp->rq_arg.tail[0] = head->arg.tail[0]; 621 rqstp->rq_arg.tail[0] = head->rc_arg.tail[0];
388 rqstp->rq_arg.len = head->arg.len; 622 rqstp->rq_arg.len = head->rc_arg.len;
389 rqstp->rq_arg.buflen = head->arg.buflen; 623 rqstp->rq_arg.buflen = head->rc_arg.buflen;
390} 624}
391 625
392static void svc_rdma_send_error(struct svcxprt_rdma *xprt, 626static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
@@ -506,28 +740,26 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
506 struct svc_xprt *xprt = rqstp->rq_xprt; 740 struct svc_xprt *xprt = rqstp->rq_xprt;
507 struct svcxprt_rdma *rdma_xprt = 741 struct svcxprt_rdma *rdma_xprt =
508 container_of(xprt, struct svcxprt_rdma, sc_xprt); 742 container_of(xprt, struct svcxprt_rdma, sc_xprt);
509 struct svc_rdma_op_ctxt *ctxt; 743 struct svc_rdma_recv_ctxt *ctxt;
510 __be32 *p; 744 __be32 *p;
511 int ret; 745 int ret;
512 746
513 spin_lock(&rdma_xprt->sc_rq_dto_lock); 747 spin_lock(&rdma_xprt->sc_rq_dto_lock);
514 if (!list_empty(&rdma_xprt->sc_read_complete_q)) { 748 ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
515 ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q, 749 if (ctxt) {
516 struct svc_rdma_op_ctxt, list); 750 list_del(&ctxt->rc_list);
517 list_del(&ctxt->list);
518 spin_unlock(&rdma_xprt->sc_rq_dto_lock); 751 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
519 rdma_read_complete(rqstp, ctxt); 752 rdma_read_complete(rqstp, ctxt);
520 goto complete; 753 goto complete;
521 } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { 754 }
522 ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q, 755 ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
523 struct svc_rdma_op_ctxt, list); 756 if (!ctxt) {
524 list_del(&ctxt->list);
525 } else {
526 /* No new incoming requests, terminate the loop */ 757 /* No new incoming requests, terminate the loop */
527 clear_bit(XPT_DATA, &xprt->xpt_flags); 758 clear_bit(XPT_DATA, &xprt->xpt_flags);
528 spin_unlock(&rdma_xprt->sc_rq_dto_lock); 759 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
529 return 0; 760 return 0;
530 } 761 }
762 list_del(&ctxt->rc_list);
531 spin_unlock(&rdma_xprt->sc_rq_dto_lock); 763 spin_unlock(&rdma_xprt->sc_rq_dto_lock);
532 764
533 atomic_inc(&rdma_stat_recv); 765 atomic_inc(&rdma_stat_recv);
@@ -545,7 +777,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
545 if (svc_rdma_is_backchannel_reply(xprt, p)) { 777 if (svc_rdma_is_backchannel_reply(xprt, p)) {
546 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p, 778 ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
547 &rqstp->rq_arg); 779 &rqstp->rq_arg);
548 svc_rdma_put_context(ctxt, 0); 780 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt, 0);
549 return ret; 781 return ret;
550 } 782 }
551 783
@@ -554,7 +786,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
554 goto out_readchunk; 786 goto out_readchunk;
555 787
556complete: 788complete:
557 svc_rdma_put_context(ctxt, 0); 789 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt, 0);
558 rqstp->rq_prot = IPPROTO_MAX; 790 rqstp->rq_prot = IPPROTO_MAX;
559 svc_xprt_copy_addrs(rqstp, xprt); 791 svc_xprt_copy_addrs(rqstp, xprt);
560 return rqstp->rq_arg.len; 792 return rqstp->rq_arg.len;
@@ -567,16 +799,16 @@ out_readchunk:
567 799
568out_err: 800out_err:
569 svc_rdma_send_error(rdma_xprt, p, ret); 801 svc_rdma_send_error(rdma_xprt, p, ret);
570 svc_rdma_put_context(ctxt, 0); 802 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt, 0);
571 return 0; 803 return 0;
572 804
573out_postfail: 805out_postfail:
574 if (ret == -EINVAL) 806 if (ret == -EINVAL)
575 svc_rdma_send_error(rdma_xprt, p, ret); 807 svc_rdma_send_error(rdma_xprt, p, ret);
576 svc_rdma_put_context(ctxt, 1); 808 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt, 1);
577 return ret; 809 return ret;
578 810
579out_drop: 811out_drop:
580 svc_rdma_put_context(ctxt, 1); 812 svc_rdma_recv_ctxt_put(rdma_xprt, ctxt, 1);
581 return 0; 813 return 0;
582} 814}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 887ceef125b2..c080ce20ff40 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1,6 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2/* 2/*
3 * Copyright (c) 2016 Oracle. All rights reserved. 3 * Copyright (c) 2016-2018 Oracle. All rights reserved.
4 * 4 *
5 * Use the core R/W API to move RPC-over-RDMA Read and Write chunks. 5 * Use the core R/W API to move RPC-over-RDMA Read and Write chunks.
6 */ 6 */
@@ -227,7 +227,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
227/* State for pulling a Read chunk. 227/* State for pulling a Read chunk.
228 */ 228 */
229struct svc_rdma_read_info { 229struct svc_rdma_read_info {
230 struct svc_rdma_op_ctxt *ri_readctxt; 230 struct svc_rdma_recv_ctxt *ri_readctxt;
231 unsigned int ri_position; 231 unsigned int ri_position;
232 unsigned int ri_pageno; 232 unsigned int ri_pageno;
233 unsigned int ri_pageoff; 233 unsigned int ri_pageoff;
@@ -282,10 +282,10 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
282 pr_err("svcrdma: read ctx: %s (%u/0x%x)\n", 282 pr_err("svcrdma: read ctx: %s (%u/0x%x)\n",
283 ib_wc_status_msg(wc->status), 283 ib_wc_status_msg(wc->status),
284 wc->status, wc->vendor_err); 284 wc->status, wc->vendor_err);
285 svc_rdma_put_context(info->ri_readctxt, 1); 285 svc_rdma_recv_ctxt_put(rdma, info->ri_readctxt, 1);
286 } else { 286 } else {
287 spin_lock(&rdma->sc_rq_dto_lock); 287 spin_lock(&rdma->sc_rq_dto_lock);
288 list_add_tail(&info->ri_readctxt->list, 288 list_add_tail(&info->ri_readctxt->rc_list,
289 &rdma->sc_read_complete_q); 289 &rdma->sc_read_complete_q);
290 spin_unlock(&rdma->sc_rq_dto_lock); 290 spin_unlock(&rdma->sc_rq_dto_lock);
291 291
@@ -607,7 +607,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
607 struct svc_rqst *rqstp, 607 struct svc_rqst *rqstp,
608 u32 rkey, u32 len, u64 offset) 608 u32 rkey, u32 len, u64 offset)
609{ 609{
610 struct svc_rdma_op_ctxt *head = info->ri_readctxt; 610 struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
611 struct svc_rdma_chunk_ctxt *cc = &info->ri_cc; 611 struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
612 struct svc_rdma_rw_ctxt *ctxt; 612 struct svc_rdma_rw_ctxt *ctxt;
613 unsigned int sge_no, seg_len; 613 unsigned int sge_no, seg_len;
@@ -625,10 +625,10 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
625 seg_len = min_t(unsigned int, len, 625 seg_len = min_t(unsigned int, len,
626 PAGE_SIZE - info->ri_pageoff); 626 PAGE_SIZE - info->ri_pageoff);
627 627
628 head->arg.pages[info->ri_pageno] = 628 head->rc_arg.pages[info->ri_pageno] =
629 rqstp->rq_pages[info->ri_pageno]; 629 rqstp->rq_pages[info->ri_pageno];
630 if (!info->ri_pageoff) 630 if (!info->ri_pageoff)
631 head->count++; 631 head->rc_page_count++;
632 632
633 sg_set_page(sg, rqstp->rq_pages[info->ri_pageno], 633 sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
634 seg_len, info->ri_pageoff); 634 seg_len, info->ri_pageoff);
@@ -705,9 +705,9 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
705} 705}
706 706
707/* Construct RDMA Reads to pull over a normal Read chunk. The chunk 707/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
708 * data lands in the page list of head->arg.pages. 708 * data lands in the page list of head->rc_arg.pages.
709 * 709 *
710 * Currently NFSD does not look at the head->arg.tail[0] iovec. 710 * Currently NFSD does not look at the head->rc_arg.tail[0] iovec.
711 * Therefore, XDR round-up of the Read chunk and trailing 711 * Therefore, XDR round-up of the Read chunk and trailing
712 * inline content must both be added at the end of the pagelist. 712 * inline content must both be added at the end of the pagelist.
713 */ 713 */
@@ -715,10 +715,10 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
715 struct svc_rdma_read_info *info, 715 struct svc_rdma_read_info *info,
716 __be32 *p) 716 __be32 *p)
717{ 717{
718 struct svc_rdma_op_ctxt *head = info->ri_readctxt; 718 struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
719 int ret; 719 int ret;
720 720
721 info->ri_pageno = head->hdr_count; 721 info->ri_pageno = head->rc_hdr_count;
722 info->ri_pageoff = 0; 722 info->ri_pageoff = 0;
723 723
724 ret = svc_rdma_build_read_chunk(rqstp, info, p); 724 ret = svc_rdma_build_read_chunk(rqstp, info, p);
@@ -732,11 +732,11 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
732 * chunk is not included in either the pagelist or in 732 * chunk is not included in either the pagelist or in
733 * the tail. 733 * the tail.
734 */ 734 */
735 head->arg.tail[0].iov_base = 735 head->rc_arg.tail[0].iov_base =
736 head->arg.head[0].iov_base + info->ri_position; 736 head->rc_arg.head[0].iov_base + info->ri_position;
737 head->arg.tail[0].iov_len = 737 head->rc_arg.tail[0].iov_len =
738 head->arg.head[0].iov_len - info->ri_position; 738 head->rc_arg.head[0].iov_len - info->ri_position;
739 head->arg.head[0].iov_len = info->ri_position; 739 head->rc_arg.head[0].iov_len = info->ri_position;
740 740
741 /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2). 741 /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
742 * 742 *
@@ -749,9 +749,9 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
749 */ 749 */
750 info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2; 750 info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2;
751 751
752 head->arg.page_len = info->ri_chunklen; 752 head->rc_arg.page_len = info->ri_chunklen;
753 head->arg.len += info->ri_chunklen; 753 head->rc_arg.len += info->ri_chunklen;
754 head->arg.buflen += info->ri_chunklen; 754 head->rc_arg.buflen += info->ri_chunklen;
755 755
756out: 756out:
757 return ret; 757 return ret;
@@ -760,7 +760,7 @@ out:
760/* Construct RDMA Reads to pull over a Position Zero Read chunk. 760/* Construct RDMA Reads to pull over a Position Zero Read chunk.
761 * The start of the data lands in the first page just after 761 * The start of the data lands in the first page just after
762 * the Transport header, and the rest lands in the page list of 762 * the Transport header, and the rest lands in the page list of
763 * head->arg.pages. 763 * head->rc_arg.pages.
764 * 764 *
765 * Assumptions: 765 * Assumptions:
766 * - A PZRC has an XDR-aligned length (no implicit round-up). 766 * - A PZRC has an XDR-aligned length (no implicit round-up).
@@ -772,11 +772,11 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
772 struct svc_rdma_read_info *info, 772 struct svc_rdma_read_info *info,
773 __be32 *p) 773 __be32 *p)
774{ 774{
775 struct svc_rdma_op_ctxt *head = info->ri_readctxt; 775 struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
776 int ret; 776 int ret;
777 777
778 info->ri_pageno = head->hdr_count - 1; 778 info->ri_pageno = head->rc_hdr_count - 1;
779 info->ri_pageoff = offset_in_page(head->byte_len); 779 info->ri_pageoff = offset_in_page(head->rc_byte_len);
780 780
781 ret = svc_rdma_build_read_chunk(rqstp, info, p); 781 ret = svc_rdma_build_read_chunk(rqstp, info, p);
782 if (ret < 0) 782 if (ret < 0)
@@ -784,22 +784,22 @@ static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
784 784
785 trace_svcrdma_encode_pzr(info->ri_chunklen); 785 trace_svcrdma_encode_pzr(info->ri_chunklen);
786 786
787 head->arg.len += info->ri_chunklen; 787 head->rc_arg.len += info->ri_chunklen;
788 head->arg.buflen += info->ri_chunklen; 788 head->rc_arg.buflen += info->ri_chunklen;
789 789
790 if (head->arg.buflen <= head->sge[0].length) { 790 if (head->rc_arg.buflen <= head->rc_sges[0].length) {
791 /* Transport header and RPC message fit entirely 791 /* Transport header and RPC message fit entirely
792 * in page where head iovec resides. 792 * in page where head iovec resides.
793 */ 793 */
794 head->arg.head[0].iov_len = info->ri_chunklen; 794 head->rc_arg.head[0].iov_len = info->ri_chunklen;
795 } else { 795 } else {
796 /* Transport header and part of RPC message reside 796 /* Transport header and part of RPC message reside
797 * in the head iovec's page. 797 * in the head iovec's page.
798 */ 798 */
799 head->arg.head[0].iov_len = 799 head->rc_arg.head[0].iov_len =
800 head->sge[0].length - head->byte_len; 800 head->rc_sges[0].length - head->rc_byte_len;
801 head->arg.page_len = 801 head->rc_arg.page_len =
802 info->ri_chunklen - head->arg.head[0].iov_len; 802 info->ri_chunklen - head->rc_arg.head[0].iov_len;
803 } 803 }
804 804
805out: 805out:
@@ -824,24 +824,24 @@ out:
824 * - All Read segments in @p have the same Position value. 824 * - All Read segments in @p have the same Position value.
825 */ 825 */
826int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp, 826int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
827 struct svc_rdma_op_ctxt *head, __be32 *p) 827 struct svc_rdma_recv_ctxt *head, __be32 *p)
828{ 828{
829 struct svc_rdma_read_info *info; 829 struct svc_rdma_read_info *info;
830 struct page **page; 830 struct page **page;
831 int ret; 831 int ret;
832 832
833 /* The request (with page list) is constructed in 833 /* The request (with page list) is constructed in
834 * head->arg. Pages involved with RDMA Read I/O are 834 * head->rc_arg. Pages involved with RDMA Read I/O are
835 * transferred there. 835 * transferred there.
836 */ 836 */
837 head->hdr_count = head->count; 837 head->rc_hdr_count = head->rc_page_count;
838 head->arg.head[0] = rqstp->rq_arg.head[0]; 838 head->rc_arg.head[0] = rqstp->rq_arg.head[0];
839 head->arg.tail[0] = rqstp->rq_arg.tail[0]; 839 head->rc_arg.tail[0] = rqstp->rq_arg.tail[0];
840 head->arg.pages = head->pages; 840 head->rc_arg.pages = head->rc_pages;
841 head->arg.page_base = 0; 841 head->rc_arg.page_base = 0;
842 head->arg.page_len = 0; 842 head->rc_arg.page_len = 0;
843 head->arg.len = rqstp->rq_arg.len; 843 head->rc_arg.len = rqstp->rq_arg.len;
844 head->arg.buflen = rqstp->rq_arg.buflen; 844 head->rc_arg.buflen = rqstp->rq_arg.buflen;
845 845
846 info = svc_rdma_read_info_alloc(rdma); 846 info = svc_rdma_read_info_alloc(rdma);
847 if (!info) 847 if (!info)
@@ -867,7 +867,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
867 867
868out: 868out:
869 /* Read sink pages have been moved from rqstp->rq_pages to 869 /* Read sink pages have been moved from rqstp->rq_pages to
870 * head->arg.pages. Force svc_recv to refill those slots 870 * head->rc_arg.pages. Force svc_recv to refill those slots
871 * in rq_pages. 871 * in rq_pages.
872 */ 872 */
873 for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++) 873 for (page = rqstp->rq_pages; page < rqstp->rq_respages; page++)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index fed28de78d37..a397d9a3d80e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,6 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2/* 2/*
3 * Copyright (c) 2016 Oracle. All rights reserved. 3 * Copyright (c) 2016-2018 Oracle. All rights reserved.
4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 4 * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 5 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
6 * 6 *
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 05edb18f8ca3..05544f2f50d4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -63,7 +63,6 @@
63 63
64#define RPCDBG_FACILITY RPCDBG_SVCXPRT 64#define RPCDBG_FACILITY RPCDBG_SVCXPRT
65 65
66static int svc_rdma_post_recv(struct svcxprt_rdma *xprt);
67static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, 66static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
68 struct net *net); 67 struct net *net);
69static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 68static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
@@ -175,11 +174,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
175{ 174{
176 unsigned int i; 175 unsigned int i;
177 176
178 /* Each RPC/RDMA credit can consume one Receive and 177 i = xprt->sc_sq_depth;
179 * one Send WQE at the same time.
180 */
181 i = xprt->sc_sq_depth + xprt->sc_rq_depth;
182
183 while (i--) { 178 while (i--) {
184 struct svc_rdma_op_ctxt *ctxt; 179 struct svc_rdma_op_ctxt *ctxt;
185 180
@@ -298,54 +293,6 @@ static void qp_event_handler(struct ib_event *event, void *context)
298} 293}
299 294
300/** 295/**
301 * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
302 * @cq: completion queue
303 * @wc: completed WR
304 *
305 */
306static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
307{
308 struct svcxprt_rdma *xprt = cq->cq_context;
309 struct ib_cqe *cqe = wc->wr_cqe;
310 struct svc_rdma_op_ctxt *ctxt;
311
312 trace_svcrdma_wc_receive(wc);
313
314 /* WARNING: Only wc->wr_cqe and wc->status are reliable */
315 ctxt = container_of(cqe, struct svc_rdma_op_ctxt, cqe);
316 svc_rdma_unmap_dma(ctxt);
317
318 if (wc->status != IB_WC_SUCCESS)
319 goto flushed;
320
321 /* All wc fields are now known to be valid */
322 ctxt->byte_len = wc->byte_len;
323 spin_lock(&xprt->sc_rq_dto_lock);
324 list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
325 spin_unlock(&xprt->sc_rq_dto_lock);
326
327 svc_rdma_post_recv(xprt);
328
329 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
330 if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
331 goto out;
332 goto out_enqueue;
333
334flushed:
335 if (wc->status != IB_WC_WR_FLUSH_ERR)
336 pr_err("svcrdma: Recv: %s (%u/0x%x)\n",
337 ib_wc_status_msg(wc->status),
338 wc->status, wc->vendor_err);
339 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
340 svc_rdma_put_context(ctxt, 1);
341
342out_enqueue:
343 svc_xprt_enqueue(&xprt->sc_xprt);
344out:
345 svc_xprt_put(&xprt->sc_xprt);
346}
347
348/**
349 * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC 296 * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
350 * @cq: completion queue 297 * @cq: completion queue
351 * @wc: completed WR 298 * @wc: completed WR
@@ -392,12 +339,14 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
392 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 339 INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
393 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); 340 INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
394 INIT_LIST_HEAD(&cma_xprt->sc_ctxts); 341 INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
342 INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
395 INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts); 343 INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
396 init_waitqueue_head(&cma_xprt->sc_send_wait); 344 init_waitqueue_head(&cma_xprt->sc_send_wait);
397 345
398 spin_lock_init(&cma_xprt->sc_lock); 346 spin_lock_init(&cma_xprt->sc_lock);
399 spin_lock_init(&cma_xprt->sc_rq_dto_lock); 347 spin_lock_init(&cma_xprt->sc_rq_dto_lock);
400 spin_lock_init(&cma_xprt->sc_ctxt_lock); 348 spin_lock_init(&cma_xprt->sc_ctxt_lock);
349 spin_lock_init(&cma_xprt->sc_recv_lock);
401 spin_lock_init(&cma_xprt->sc_rw_ctxt_lock); 350 spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
402 351
403 /* 352 /*
@@ -411,63 +360,6 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
411 return cma_xprt; 360 return cma_xprt;
412} 361}
413 362
414static int
415svc_rdma_post_recv(struct svcxprt_rdma *xprt)
416{
417 struct ib_recv_wr recv_wr, *bad_recv_wr;
418 struct svc_rdma_op_ctxt *ctxt;
419 struct page *page;
420 dma_addr_t pa;
421 int sge_no;
422 int buflen;
423 int ret;
424
425 ctxt = svc_rdma_get_context(xprt);
426 buflen = 0;
427 ctxt->direction = DMA_FROM_DEVICE;
428 ctxt->cqe.done = svc_rdma_wc_receive;
429 for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
430 if (sge_no >= xprt->sc_max_sge) {
431 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
432 goto err_put_ctxt;
433 }
434 page = alloc_page(GFP_KERNEL);
435 if (!page)
436 goto err_put_ctxt;
437 ctxt->pages[sge_no] = page;
438 pa = ib_dma_map_page(xprt->sc_cm_id->device,
439 page, 0, PAGE_SIZE,
440 DMA_FROM_DEVICE);
441 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
442 goto err_put_ctxt;
443 svc_rdma_count_mappings(xprt, ctxt);
444 ctxt->sge[sge_no].addr = pa;
445 ctxt->sge[sge_no].length = PAGE_SIZE;
446 ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
447 ctxt->count = sge_no + 1;
448 buflen += PAGE_SIZE;
449 }
450 recv_wr.next = NULL;
451 recv_wr.sg_list = &ctxt->sge[0];
452 recv_wr.num_sge = ctxt->count;
453 recv_wr.wr_cqe = &ctxt->cqe;
454
455 svc_xprt_get(&xprt->sc_xprt);
456 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
457 trace_svcrdma_post_recv(&recv_wr, ret);
458 if (ret) {
459 svc_rdma_unmap_dma(ctxt);
460 svc_rdma_put_context(ctxt, 1);
461 svc_xprt_put(&xprt->sc_xprt);
462 }
463 return ret;
464
465 err_put_ctxt:
466 svc_rdma_unmap_dma(ctxt);
467 svc_rdma_put_context(ctxt, 1);
468 return -ENOMEM;
469}
470
471static void 363static void
472svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt, 364svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
473 struct rdma_conn_param *param) 365 struct rdma_conn_param *param)
@@ -698,7 +590,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
698 struct ib_qp_init_attr qp_attr; 590 struct ib_qp_init_attr qp_attr;
699 struct ib_device *dev; 591 struct ib_device *dev;
700 struct sockaddr *sap; 592 struct sockaddr *sap;
701 unsigned int i, ctxts; 593 unsigned int ctxts;
702 int ret = 0; 594 int ret = 0;
703 595
704 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); 596 listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
@@ -803,14 +695,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
803 !rdma_ib_or_roce(dev, newxprt->sc_port_num)) 695 !rdma_ib_or_roce(dev, newxprt->sc_port_num))
804 goto errout; 696 goto errout;
805 697
806 /* Post receive buffers */ 698 if (!svc_rdma_post_recvs(newxprt))
807 for (i = 0; i < newxprt->sc_max_requests; i++) { 699 goto errout;
808 ret = svc_rdma_post_recv(newxprt);
809 if (ret) {
810 dprintk("svcrdma: failure posting receive buffers\n");
811 goto errout;
812 }
813 }
814 700
815 /* Swap out the handler */ 701 /* Swap out the handler */
816 newxprt->sc_cm_id->event_handler = rdma_cma_handler; 702 newxprt->sc_cm_id->event_handler = rdma_cma_handler;
@@ -907,20 +793,7 @@ static void __svc_rdma_free(struct work_struct *work)
907 pr_err("svcrdma: sc_xprt still in use? (%d)\n", 793 pr_err("svcrdma: sc_xprt still in use? (%d)\n",
908 kref_read(&xprt->xpt_ref)); 794 kref_read(&xprt->xpt_ref));
909 795
910 while (!list_empty(&rdma->sc_read_complete_q)) { 796 svc_rdma_flush_recv_queues(rdma);
911 struct svc_rdma_op_ctxt *ctxt;
912 ctxt = list_first_entry(&rdma->sc_read_complete_q,
913 struct svc_rdma_op_ctxt, list);
914 list_del(&ctxt->list);
915 svc_rdma_put_context(ctxt, 1);
916 }
917 while (!list_empty(&rdma->sc_rq_dto_q)) {
918 struct svc_rdma_op_ctxt *ctxt;
919 ctxt = list_first_entry(&rdma->sc_rq_dto_q,
920 struct svc_rdma_op_ctxt, list);
921 list_del(&ctxt->list);
922 svc_rdma_put_context(ctxt, 1);
923 }
924 797
925 /* Warn if we leaked a resource or under-referenced */ 798 /* Warn if we leaked a resource or under-referenced */
926 if (rdma->sc_ctxt_used != 0) 799 if (rdma->sc_ctxt_used != 0)
@@ -935,6 +808,7 @@ static void __svc_rdma_free(struct work_struct *work)
935 808
936 svc_rdma_destroy_rw_ctxts(rdma); 809 svc_rdma_destroy_rw_ctxts(rdma);
937 svc_rdma_destroy_ctxts(rdma); 810 svc_rdma_destroy_ctxts(rdma);
811 svc_rdma_recv_ctxts_destroy(rdma);
938 812
939 /* Destroy the QP if present (not a listener) */ 813 /* Destroy the QP if present (not a listener) */
940 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 814 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))