aboutsummaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c86
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c55
-rw-r--r--net/sunrpc/xprtrdma/transport.c25
-rw-r--r--net/sunrpc/xprtrdma/verbs.c55
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
8 files changed, 208 insertions, 126 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 2ac3f6e8adff..554d0814c875 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -87,6 +87,8 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
87 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) 87 enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
88{ 88{
89 int len, n = 0, p; 89 int len, n = 0, p;
90 int page_base;
91 struct page **ppages;
90 92
91 if (pos == 0 && xdrbuf->head[0].iov_len) { 93 if (pos == 0 && xdrbuf->head[0].iov_len) {
92 seg[n].mr_page = NULL; 94 seg[n].mr_page = NULL;
@@ -95,34 +97,32 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
95 ++n; 97 ++n;
96 } 98 }
97 99
98 if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { 100 len = xdrbuf->page_len;
99 if (n == nsegs) 101 ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
100 return 0; 102 page_base = xdrbuf->page_base & ~PAGE_MASK;
101 seg[n].mr_page = xdrbuf->pages[0]; 103 p = 0;
102 seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; 104 while (len && n < nsegs) {
103 seg[n].mr_len = min_t(u32, 105 seg[n].mr_page = ppages[p];
104 PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); 106 seg[n].mr_offset = (void *)(unsigned long) page_base;
105 len = xdrbuf->page_len - seg[n].mr_len; 107 seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
108 BUG_ON(seg[n].mr_len > PAGE_SIZE);
109 len -= seg[n].mr_len;
106 ++n; 110 ++n;
107 p = 1; 111 ++p;
108 while (len > 0) { 112 page_base = 0; /* page offset only applies to first page */
109 if (n == nsegs)
110 return 0;
111 seg[n].mr_page = xdrbuf->pages[p];
112 seg[n].mr_offset = NULL;
113 seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
114 len -= seg[n].mr_len;
115 ++n;
116 ++p;
117 }
118 } 113 }
119 114
115 /* Message overflows the seg array */
116 if (len && n == nsegs)
117 return 0;
118
120 if (xdrbuf->tail[0].iov_len) { 119 if (xdrbuf->tail[0].iov_len) {
121 /* the rpcrdma protocol allows us to omit any trailing 120 /* the rpcrdma protocol allows us to omit any trailing
122 * xdr pad bytes, saving the server an RDMA operation. */ 121 * xdr pad bytes, saving the server an RDMA operation. */
123 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) 122 if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
124 return n; 123 return n;
125 if (n == nsegs) 124 if (n == nsegs)
125 /* Tail remains, but we're out of segments */
126 return 0; 126 return 0;
127 seg[n].mr_page = NULL; 127 seg[n].mr_page = NULL;
128 seg[n].mr_offset = xdrbuf->tail[0].iov_base; 128 seg[n].mr_offset = xdrbuf->tail[0].iov_base;
@@ -296,6 +296,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
296 int copy_len; 296 int copy_len;
297 unsigned char *srcp, *destp; 297 unsigned char *srcp, *destp;
298 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); 298 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
299 int page_base;
300 struct page **ppages;
299 301
300 destp = rqst->rq_svec[0].iov_base; 302 destp = rqst->rq_svec[0].iov_base;
301 curlen = rqst->rq_svec[0].iov_len; 303 curlen = rqst->rq_svec[0].iov_len;
@@ -324,28 +326,25 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
324 __func__, destp + copy_len, curlen); 326 __func__, destp + copy_len, curlen);
325 rqst->rq_svec[0].iov_len += curlen; 327 rqst->rq_svec[0].iov_len += curlen;
326 } 328 }
327
328 r_xprt->rx_stats.pullup_copy_count += copy_len; 329 r_xprt->rx_stats.pullup_copy_count += copy_len;
329 npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; 330
331 page_base = rqst->rq_snd_buf.page_base;
332 ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
333 page_base &= ~PAGE_MASK;
334 npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
330 for (i = 0; copy_len && i < npages; i++) { 335 for (i = 0; copy_len && i < npages; i++) {
331 if (i == 0) 336 curlen = PAGE_SIZE - page_base;
332 curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
333 else
334 curlen = PAGE_SIZE;
335 if (curlen > copy_len) 337 if (curlen > copy_len)
336 curlen = copy_len; 338 curlen = copy_len;
337 dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", 339 dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
338 __func__, i, destp, copy_len, curlen); 340 __func__, i, destp, copy_len, curlen);
339 srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], 341 srcp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
340 KM_SKB_SUNRPC_DATA); 342 memcpy(destp, srcp+page_base, curlen);
341 if (i == 0)
342 memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
343 else
344 memcpy(destp, srcp, curlen);
345 kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); 343 kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
346 rqst->rq_svec[0].iov_len += curlen; 344 rqst->rq_svec[0].iov_len += curlen;
347 destp += curlen; 345 destp += curlen;
348 copy_len -= curlen; 346 copy_len -= curlen;
347 page_base = 0;
349 } 348 }
350 /* header now contains entire send message */ 349 /* header now contains entire send message */
351 return pad; 350 return pad;
@@ -606,6 +605,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
606{ 605{
607 int i, npages, curlen, olen; 606 int i, npages, curlen, olen;
608 char *destp; 607 char *destp;
608 struct page **ppages;
609 int page_base;
609 610
610 curlen = rqst->rq_rcv_buf.head[0].iov_len; 611 curlen = rqst->rq_rcv_buf.head[0].iov_len;
611 if (curlen > copy_len) { /* write chunk header fixup */ 612 if (curlen > copy_len) { /* write chunk header fixup */
@@ -624,32 +625,29 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
624 olen = copy_len; 625 olen = copy_len;
625 i = 0; 626 i = 0;
626 rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; 627 rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
628 page_base = rqst->rq_rcv_buf.page_base;
629 ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
630 page_base &= ~PAGE_MASK;
631
627 if (copy_len && rqst->rq_rcv_buf.page_len) { 632 if (copy_len && rqst->rq_rcv_buf.page_len) {
628 npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + 633 npages = PAGE_ALIGN(page_base +
629 rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; 634 rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
630 for (; i < npages; i++) { 635 for (; i < npages; i++) {
631 if (i == 0) 636 curlen = PAGE_SIZE - page_base;
632 curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
633 else
634 curlen = PAGE_SIZE;
635 if (curlen > copy_len) 637 if (curlen > copy_len)
636 curlen = copy_len; 638 curlen = copy_len;
637 dprintk("RPC: %s: page %d" 639 dprintk("RPC: %s: page %d"
638 " srcp 0x%p len %d curlen %d\n", 640 " srcp 0x%p len %d curlen %d\n",
639 __func__, i, srcp, copy_len, curlen); 641 __func__, i, srcp, copy_len, curlen);
640 destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], 642 destp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
641 KM_SKB_SUNRPC_DATA); 643 memcpy(destp + page_base, srcp, curlen);
642 if (i == 0) 644 flush_dcache_page(ppages[i]);
643 memcpy(destp + rqst->rq_rcv_buf.page_base,
644 srcp, curlen);
645 else
646 memcpy(destp, srcp, curlen);
647 flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
648 kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); 645 kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
649 srcp += curlen; 646 srcp += curlen;
650 copy_len -= curlen; 647 copy_len -= curlen;
651 if (copy_len == 0) 648 if (copy_len == 0)
652 break; 649 break;
650 page_base = 0;
653 } 651 }
654 rqst->rq_rcv_buf.page_len = olen - copy_len; 652 rqst->rq_rcv_buf.page_len = olen - copy_len;
655 } else 653 } else
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index d718b8fa9525..09af4fab1a45 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -43,6 +43,7 @@
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/fs.h> 44#include <linux/fs.h>
45#include <linux/sysctl.h> 45#include <linux/sysctl.h>
46#include <linux/workqueue.h>
46#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
47#include <linux/sunrpc/sched.h> 48#include <linux/sunrpc/sched.h>
48#include <linux/sunrpc/svc_rdma.h> 49#include <linux/sunrpc/svc_rdma.h>
@@ -74,6 +75,8 @@ atomic_t rdma_stat_sq_prod;
74struct kmem_cache *svc_rdma_map_cachep; 75struct kmem_cache *svc_rdma_map_cachep;
75struct kmem_cache *svc_rdma_ctxt_cachep; 76struct kmem_cache *svc_rdma_ctxt_cachep;
76 77
78struct workqueue_struct *svc_rdma_wq;
79
77/* 80/*
78 * This function implements reading and resetting an atomic_t stat 81 * This function implements reading and resetting an atomic_t stat
79 * variable through read/write to a proc file. Any write to the file 82 * variable through read/write to a proc file. Any write to the file
@@ -231,7 +234,7 @@ static ctl_table svcrdma_root_table[] = {
231void svc_rdma_cleanup(void) 234void svc_rdma_cleanup(void)
232{ 235{
233 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); 236 dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
234 flush_scheduled_work(); 237 destroy_workqueue(svc_rdma_wq);
235 if (svcrdma_table_header) { 238 if (svcrdma_table_header) {
236 unregister_sysctl_table(svcrdma_table_header); 239 unregister_sysctl_table(svcrdma_table_header);
237 svcrdma_table_header = NULL; 240 svcrdma_table_header = NULL;
@@ -249,6 +252,11 @@ int svc_rdma_init(void)
249 dprintk("\tsq_depth : %d\n", 252 dprintk("\tsq_depth : %d\n",
250 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); 253 svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
251 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); 254 dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
255
256 svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
257 if (!svc_rdma_wq)
258 return -ENOMEM;
259
252 if (!svcrdma_table_header) 260 if (!svcrdma_table_header)
253 svcrdma_table_header = 261 svcrdma_table_header =
254 register_sysctl_table(svcrdma_root_table); 262 register_sysctl_table(svcrdma_root_table);
@@ -283,6 +291,7 @@ int svc_rdma_init(void)
283 kmem_cache_destroy(svc_rdma_map_cachep); 291 kmem_cache_destroy(svc_rdma_map_cachep);
284 err0: 292 err0:
285 unregister_sysctl_table(svcrdma_table_header); 293 unregister_sysctl_table(svcrdma_table_header);
294 destroy_workqueue(svc_rdma_wq);
286 return -ENOMEM; 295 return -ENOMEM;
287} 296}
288MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 297MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0194de814933..df67211c4baf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; 263 frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) { 264 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
265 frmr->page_list->page_list[page_no] = 265 frmr->page_list->page_list[page_no] =
266 ib_dma_map_single(xprt->sc_cm_id->device, 266 ib_dma_map_page(xprt->sc_cm_id->device,
267 page_address(rqstp->rq_arg.pages[page_no]), 267 rqstp->rq_arg.pages[page_no], 0,
268 PAGE_SIZE, DMA_FROM_DEVICE); 268 PAGE_SIZE, DMA_FROM_DEVICE);
269 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 269 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
270 frmr->page_list->page_list[page_no])) 270 frmr->page_list->page_list[page_no]))
271 goto fatal_err; 271 goto fatal_err;
@@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
309 int count) 309 int count)
310{ 310{
311 int i; 311 int i;
312 unsigned long off;
312 313
313 ctxt->count = count; 314 ctxt->count = count;
314 ctxt->direction = DMA_FROM_DEVICE; 315 ctxt->direction = DMA_FROM_DEVICE;
315 for (i = 0; i < count; i++) { 316 for (i = 0; i < count; i++) {
316 ctxt->sge[i].length = 0; /* in case map fails */ 317 ctxt->sge[i].length = 0; /* in case map fails */
317 if (!frmr) { 318 if (!frmr) {
319 BUG_ON(0 == virt_to_page(vec[i].iov_base));
320 off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
318 ctxt->sge[i].addr = 321 ctxt->sge[i].addr =
319 ib_dma_map_single(xprt->sc_cm_id->device, 322 ib_dma_map_page(xprt->sc_cm_id->device,
320 vec[i].iov_base, 323 virt_to_page(vec[i].iov_base),
321 vec[i].iov_len, 324 off,
322 DMA_FROM_DEVICE); 325 vec[i].iov_len,
326 DMA_FROM_DEVICE);
323 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 327 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
324 ctxt->sge[i].addr)) 328 ctxt->sge[i].addr))
325 return -EINVAL; 329 return -EINVAL;
@@ -491,6 +495,7 @@ next_sge:
491 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", 495 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
492 err); 496 err);
493 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 497 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
498 svc_rdma_unmap_dma(ctxt);
494 svc_rdma_put_context(ctxt, 0); 499 svc_rdma_put_context(ctxt, 0);
495 goto out; 500 goto out;
496 } 501 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b15e1ebb2bfa..249a835b703f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -70,8 +70,8 @@
70 * on extra page for the RPCRMDA header. 70 * on extra page for the RPCRMDA header.
71 */ 71 */
72static int fast_reg_xdr(struct svcxprt_rdma *xprt, 72static int fast_reg_xdr(struct svcxprt_rdma *xprt,
73 struct xdr_buf *xdr, 73 struct xdr_buf *xdr,
74 struct svc_rdma_req_map *vec) 74 struct svc_rdma_req_map *vec)
75{ 75{
76 int sge_no; 76 int sge_no;
77 u32 sge_bytes; 77 u32 sge_bytes;
@@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
96 vec->count = 2; 96 vec->count = 2;
97 sge_no++; 97 sge_no++;
98 98
99 /* Build the FRMR */ 99 /* Map the XDR head */
100 frmr->kva = frva; 100 frmr->kva = frva;
101 frmr->direction = DMA_TO_DEVICE; 101 frmr->direction = DMA_TO_DEVICE;
102 frmr->access_flags = 0; 102 frmr->access_flags = 0;
103 frmr->map_len = PAGE_SIZE; 103 frmr->map_len = PAGE_SIZE;
104 frmr->page_list_len = 1; 104 frmr->page_list_len = 1;
105 page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
105 frmr->page_list->page_list[page_no] = 106 frmr->page_list->page_list[page_no] =
106 ib_dma_map_single(xprt->sc_cm_id->device, 107 ib_dma_map_page(xprt->sc_cm_id->device,
107 (void *)xdr->head[0].iov_base, 108 virt_to_page(xdr->head[0].iov_base),
108 PAGE_SIZE, DMA_TO_DEVICE); 109 page_off,
110 PAGE_SIZE - page_off,
111 DMA_TO_DEVICE);
109 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 112 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
110 frmr->page_list->page_list[page_no])) 113 frmr->page_list->page_list[page_no]))
111 goto fatal_err; 114 goto fatal_err;
112 atomic_inc(&xprt->sc_dma_used); 115 atomic_inc(&xprt->sc_dma_used);
113 116
117 /* Map the XDR page list */
114 page_off = xdr->page_base; 118 page_off = xdr->page_base;
115 page_bytes = xdr->page_len + page_off; 119 page_bytes = xdr->page_len + page_off;
116 if (!page_bytes) 120 if (!page_bytes)
@@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
128 page_bytes -= sge_bytes; 132 page_bytes -= sge_bytes;
129 133
130 frmr->page_list->page_list[page_no] = 134 frmr->page_list->page_list[page_no] =
131 ib_dma_map_single(xprt->sc_cm_id->device, 135 ib_dma_map_page(xprt->sc_cm_id->device,
132 page_address(page), 136 page, page_off,
133 PAGE_SIZE, DMA_TO_DEVICE); 137 sge_bytes, DMA_TO_DEVICE);
134 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 138 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
135 frmr->page_list->page_list[page_no])) 139 frmr->page_list->page_list[page_no]))
136 goto fatal_err; 140 goto fatal_err;
@@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
166 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; 170 vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
167 171
168 frmr->page_list->page_list[page_no] = 172 frmr->page_list->page_list[page_no] =
169 ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, 173 ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
170 DMA_TO_DEVICE); 174 page_off,
175 PAGE_SIZE,
176 DMA_TO_DEVICE);
171 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 177 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
172 frmr->page_list->page_list[page_no])) 178 frmr->page_list->page_list[page_no]))
173 goto fatal_err; 179 goto fatal_err;
@@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt,
245 return 0; 251 return 0;
246} 252}
247 253
254static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
255 struct xdr_buf *xdr,
256 u32 xdr_off, size_t len, int dir)
257{
258 struct page *page;
259 dma_addr_t dma_addr;
260 if (xdr_off < xdr->head[0].iov_len) {
261 /* This offset is in the head */
262 xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
263 page = virt_to_page(xdr->head[0].iov_base);
264 } else {
265 xdr_off -= xdr->head[0].iov_len;
266 if (xdr_off < xdr->page_len) {
267 /* This offset is in the page list */
268 page = xdr->pages[xdr_off >> PAGE_SHIFT];
269 xdr_off &= ~PAGE_MASK;
270 } else {
271 /* This offset is in the tail */
272 xdr_off -= xdr->page_len;
273 xdr_off += (unsigned long)
274 xdr->tail[0].iov_base & ~PAGE_MASK;
275 page = virt_to_page(xdr->tail[0].iov_base);
276 }
277 }
278 dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
279 min_t(size_t, PAGE_SIZE, len), dir);
280 return dma_addr;
281}
282
248/* Assumptions: 283/* Assumptions:
249 * - We are using FRMR 284 * - We are using FRMR
250 * - or - 285 * - or -
@@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
293 sge[sge_no].length = sge_bytes; 328 sge[sge_no].length = sge_bytes;
294 if (!vec->frmr) { 329 if (!vec->frmr) {
295 sge[sge_no].addr = 330 sge[sge_no].addr =
296 ib_dma_map_single(xprt->sc_cm_id->device, 331 dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
297 (void *) 332 sge_bytes, DMA_TO_DEVICE);
298 vec->sge[xdr_sge_no].iov_base + sge_off, 333 xdr_off += sge_bytes;
299 sge_bytes, DMA_TO_DEVICE);
300 if (ib_dma_mapping_error(xprt->sc_cm_id->device, 334 if (ib_dma_mapping_error(xprt->sc_cm_id->device,
301 sge[sge_no].addr)) 335 sge[sge_no].addr))
302 goto err; 336 goto err;
@@ -333,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
333 goto err; 367 goto err;
334 return 0; 368 return 0;
335 err: 369 err:
370 svc_rdma_unmap_dma(ctxt);
371 svc_rdma_put_frmr(xprt, vec->frmr);
336 svc_rdma_put_context(ctxt, 0); 372 svc_rdma_put_context(ctxt, 0);
337 /* Fatal error, close transport */ 373 /* Fatal error, close transport */
338 return -EIO; 374 return -EIO;
@@ -494,7 +530,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
494 * In all three cases, this function prepares the RPCRDMA header in 530 * In all three cases, this function prepares the RPCRDMA header in
495 * sge[0], the 'type' parameter indicates the type to place in the 531 * sge[0], the 'type' parameter indicates the type to place in the
496 * RPCRDMA header, and the 'byte_count' field indicates how much of 532 * RPCRDMA header, and the 'byte_count' field indicates how much of
497 * the XDR to include in this RDMA_SEND. 533 * the XDR to include in this RDMA_SEND. NB: The offset of the payload
534 * to send is zero in the XDR.
498 */ 535 */
499static int send_reply(struct svcxprt_rdma *rdma, 536static int send_reply(struct svcxprt_rdma *rdma,
500 struct svc_rqst *rqstp, 537 struct svc_rqst *rqstp,
@@ -536,23 +573,24 @@ static int send_reply(struct svcxprt_rdma *rdma,
536 ctxt->sge[0].lkey = rdma->sc_dma_lkey; 573 ctxt->sge[0].lkey = rdma->sc_dma_lkey;
537 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); 574 ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
538 ctxt->sge[0].addr = 575 ctxt->sge[0].addr =
539 ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), 576 ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
540 ctxt->sge[0].length, DMA_TO_DEVICE); 577 ctxt->sge[0].length, DMA_TO_DEVICE);
541 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) 578 if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
542 goto err; 579 goto err;
543 atomic_inc(&rdma->sc_dma_used); 580 atomic_inc(&rdma->sc_dma_used);
544 581
545 ctxt->direction = DMA_TO_DEVICE; 582 ctxt->direction = DMA_TO_DEVICE;
546 583
547 /* Determine how many of our SGE are to be transmitted */ 584 /* Map the payload indicated by 'byte_count' */
548 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { 585 for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
586 int xdr_off = 0;
549 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); 587 sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
550 byte_count -= sge_bytes; 588 byte_count -= sge_bytes;
551 if (!vec->frmr) { 589 if (!vec->frmr) {
552 ctxt->sge[sge_no].addr = 590 ctxt->sge[sge_no].addr =
553 ib_dma_map_single(rdma->sc_cm_id->device, 591 dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
554 vec->sge[sge_no].iov_base, 592 sge_bytes, DMA_TO_DEVICE);
555 sge_bytes, DMA_TO_DEVICE); 593 xdr_off += sge_bytes;
556 if (ib_dma_mapping_error(rdma->sc_cm_id->device, 594 if (ib_dma_mapping_error(rdma->sc_cm_id->device,
557 ctxt->sge[sge_no].addr)) 595 ctxt->sge[sge_no].addr))
558 goto err; 596 goto err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index edea15a54e51..c3c232a88d94 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -45,6 +45,7 @@
45#include <linux/sched.h> 45#include <linux/sched.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/spinlock.h> 47#include <linux/spinlock.h>
48#include <linux/workqueue.h>
48#include <rdma/ib_verbs.h> 49#include <rdma/ib_verbs.h>
49#include <rdma/rdma_cm.h> 50#include <rdma/rdma_cm.h>
50#include <linux/sunrpc/svc_rdma.h> 51#include <linux/sunrpc/svc_rdma.h>
@@ -52,6 +53,7 @@
52#define RPCDBG_FACILITY RPCDBG_SVCXPRT 53#define RPCDBG_FACILITY RPCDBG_SVCXPRT
53 54
54static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 55static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
56 struct net *net,
55 struct sockaddr *sa, int salen, 57 struct sockaddr *sa, int salen,
56 int flags); 58 int flags);
57static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 59static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
@@ -89,6 +91,9 @@ struct svc_xprt_class svc_rdma_class = {
89/* WR context cache. Created in svc_rdma.c */ 91/* WR context cache. Created in svc_rdma.c */
90extern struct kmem_cache *svc_rdma_ctxt_cachep; 92extern struct kmem_cache *svc_rdma_ctxt_cachep;
91 93
94/* Workqueue created in svc_rdma.c */
95extern struct workqueue_struct *svc_rdma_wq;
96
92struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) 97struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
93{ 98{
94 struct svc_rdma_op_ctxt *ctxt; 99 struct svc_rdma_op_ctxt *ctxt;
@@ -120,7 +125,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
120 */ 125 */
121 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { 126 if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
122 atomic_dec(&xprt->sc_dma_used); 127 atomic_dec(&xprt->sc_dma_used);
123 ib_dma_unmap_single(xprt->sc_cm_id->device, 128 ib_dma_unmap_page(xprt->sc_cm_id->device,
124 ctxt->sge[i].addr, 129 ctxt->sge[i].addr,
125 ctxt->sge[i].length, 130 ctxt->sge[i].length,
126 ctxt->direction); 131 ctxt->direction);
@@ -328,7 +333,7 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
328} 333}
329 334
330/* 335/*
331 * Processs a completion context 336 * Process a completion context
332 */ 337 */
333static void process_context(struct svcxprt_rdma *xprt, 338static void process_context(struct svcxprt_rdma *xprt,
334 struct svc_rdma_op_ctxt *ctxt) 339 struct svc_rdma_op_ctxt *ctxt)
@@ -502,8 +507,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
502 BUG_ON(sge_no >= xprt->sc_max_sge); 507 BUG_ON(sge_no >= xprt->sc_max_sge);
503 page = svc_rdma_get_page(); 508 page = svc_rdma_get_page();
504 ctxt->pages[sge_no] = page; 509 ctxt->pages[sge_no] = page;
505 pa = ib_dma_map_single(xprt->sc_cm_id->device, 510 pa = ib_dma_map_page(xprt->sc_cm_id->device,
506 page_address(page), PAGE_SIZE, 511 page, 0, PAGE_SIZE,
507 DMA_FROM_DEVICE); 512 DMA_FROM_DEVICE);
508 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) 513 if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
509 goto err_put_ctxt; 514 goto err_put_ctxt;
@@ -511,9 +516,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
511 ctxt->sge[sge_no].addr = pa; 516 ctxt->sge[sge_no].addr = pa;
512 ctxt->sge[sge_no].length = PAGE_SIZE; 517 ctxt->sge[sge_no].length = PAGE_SIZE;
513 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; 518 ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
519 ctxt->count = sge_no + 1;
514 buflen += PAGE_SIZE; 520 buflen += PAGE_SIZE;
515 } 521 }
516 ctxt->count = sge_no;
517 recv_wr.next = NULL; 522 recv_wr.next = NULL;
518 recv_wr.sg_list = &ctxt->sge[0]; 523 recv_wr.sg_list = &ctxt->sge[0];
519 recv_wr.num_sge = ctxt->count; 524 recv_wr.num_sge = ctxt->count;
@@ -529,6 +534,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
529 return ret; 534 return ret;
530 535
531 err_put_ctxt: 536 err_put_ctxt:
537 svc_rdma_unmap_dma(ctxt);
532 svc_rdma_put_context(ctxt, 1); 538 svc_rdma_put_context(ctxt, 1);
533 return -ENOMEM; 539 return -ENOMEM;
534} 540}
@@ -670,6 +676,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
670 * Create a listening RDMA service endpoint. 676 * Create a listening RDMA service endpoint.
671 */ 677 */
672static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 678static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
679 struct net *net,
673 struct sockaddr *sa, int salen, 680 struct sockaddr *sa, int salen,
674 int flags) 681 int flags)
675{ 682{
@@ -688,7 +695,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
688 return ERR_PTR(-ENOMEM); 695 return ERR_PTR(-ENOMEM);
689 xprt = &cma_xprt->sc_xprt; 696 xprt = &cma_xprt->sc_xprt;
690 697
691 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); 698 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
699 IB_QPT_RC);
692 if (IS_ERR(listen_id)) { 700 if (IS_ERR(listen_id)) {
693 ret = PTR_ERR(listen_id); 701 ret = PTR_ERR(listen_id);
694 dprintk("svcrdma: rdma_create_id failed = %d\n", ret); 702 dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -798,8 +806,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
798 if (ib_dma_mapping_error(frmr->mr->device, addr)) 806 if (ib_dma_mapping_error(frmr->mr->device, addr))
799 continue; 807 continue;
800 atomic_dec(&xprt->sc_dma_used); 808 atomic_dec(&xprt->sc_dma_used);
801 ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, 809 ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
802 frmr->direction); 810 frmr->direction);
803 } 811 }
804} 812}
805 813
@@ -1184,7 +1192,7 @@ static void svc_rdma_free(struct svc_xprt *xprt)
1184 struct svcxprt_rdma *rdma = 1192 struct svcxprt_rdma *rdma =
1185 container_of(xprt, struct svcxprt_rdma, sc_xprt); 1193 container_of(xprt, struct svcxprt_rdma, sc_xprt);
1186 INIT_WORK(&rdma->sc_work, __svc_rdma_free); 1194 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
1187 schedule_work(&rdma->sc_work); 1195 queue_work(svc_rdma_wq, &rdma->sc_work);
1188} 1196}
1189 1197
1190static int svc_rdma_has_wspace(struct svc_xprt *xprt) 1198static int svc_rdma_has_wspace(struct svc_xprt *xprt)
@@ -1274,7 +1282,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1274 atomic_read(&xprt->sc_sq_count) < 1282 atomic_read(&xprt->sc_sq_count) <
1275 xprt->sc_sq_depth); 1283 xprt->sc_sq_depth);
1276 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1284 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1277 return 0; 1285 return -ENOTCONN;
1278 continue; 1286 continue;
1279 } 1287 }
1280 /* Take a transport ref for each WR posted */ 1288 /* Take a transport ref for each WR posted */
@@ -1306,7 +1314,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1306 enum rpcrdma_errcode err) 1314 enum rpcrdma_errcode err)
1307{ 1315{
1308 struct ib_send_wr err_wr; 1316 struct ib_send_wr err_wr;
1309 struct ib_sge sge;
1310 struct page *p; 1317 struct page *p;
1311 struct svc_rdma_op_ctxt *ctxt; 1318 struct svc_rdma_op_ctxt *ctxt;
1312 u32 *va; 1319 u32 *va;
@@ -1319,26 +1326,28 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1319 /* XDR encode error */ 1326 /* XDR encode error */
1320 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); 1327 length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
1321 1328
1329 ctxt = svc_rdma_get_context(xprt);
1330 ctxt->direction = DMA_FROM_DEVICE;
1331 ctxt->count = 1;
1332 ctxt->pages[0] = p;
1333
1322 /* Prepare SGE for local address */ 1334 /* Prepare SGE for local address */
1323 sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, 1335 ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
1324 page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); 1336 p, 0, length, DMA_FROM_DEVICE);
1325 if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { 1337 if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
1326 put_page(p); 1338 put_page(p);
1339 svc_rdma_put_context(ctxt, 1);
1327 return; 1340 return;
1328 } 1341 }
1329 atomic_inc(&xprt->sc_dma_used); 1342 atomic_inc(&xprt->sc_dma_used);
1330 sge.lkey = xprt->sc_dma_lkey; 1343 ctxt->sge[0].lkey = xprt->sc_dma_lkey;
1331 sge.length = length; 1344 ctxt->sge[0].length = length;
1332
1333 ctxt = svc_rdma_get_context(xprt);
1334 ctxt->count = 1;
1335 ctxt->pages[0] = p;
1336 1345
1337 /* Prepare SEND WR */ 1346 /* Prepare SEND WR */
1338 memset(&err_wr, 0, sizeof err_wr); 1347 memset(&err_wr, 0, sizeof err_wr);
1339 ctxt->wr_op = IB_WR_SEND; 1348 ctxt->wr_op = IB_WR_SEND;
1340 err_wr.wr_id = (unsigned long)ctxt; 1349 err_wr.wr_id = (unsigned long)ctxt;
1341 err_wr.sg_list = &sge; 1350 err_wr.sg_list = ctxt->sge;
1342 err_wr.num_sge = 1; 1351 err_wr.num_sge = 1;
1343 err_wr.opcode = IB_WR_SEND; 1352 err_wr.opcode = IB_WR_SEND;
1344 err_wr.send_flags = IB_SEND_SIGNALED; 1353 err_wr.send_flags = IB_SEND_SIGNALED;
@@ -1348,9 +1357,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1348 if (ret) { 1357 if (ret) {
1349 dprintk("svcrdma: Error %d posting send for protocol error\n", 1358 dprintk("svcrdma: Error %d posting send for protocol error\n",
1350 ret); 1359 ret);
1351 ib_dma_unmap_single(xprt->sc_cm_id->device, 1360 svc_rdma_unmap_dma(ctxt);
1352 sge.addr, PAGE_SIZE,
1353 DMA_FROM_DEVICE);
1354 svc_rdma_put_context(ctxt, 1); 1361 svc_rdma_put_context(ctxt, 1);
1355 } 1362 }
1356} 1363}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a85e866a77f7..0867070bb5ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -237,8 +237,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
237 237
238 dprintk("RPC: %s: called\n", __func__); 238 dprintk("RPC: %s: called\n", __func__);
239 239
240 cancel_delayed_work(&r_xprt->rdma_connect); 240 cancel_delayed_work_sync(&r_xprt->rdma_connect);
241 flush_scheduled_work();
242 241
243 xprt_clear_connected(xprt); 242 xprt_clear_connected(xprt);
244 243
@@ -251,9 +250,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
251 250
252 xprt_rdma_free_addresses(xprt); 251 xprt_rdma_free_addresses(xprt);
253 252
254 kfree(xprt->slot); 253 xprt_free(xprt);
255 xprt->slot = NULL;
256 kfree(xprt);
257 254
258 dprintk("RPC: %s: returning\n", __func__); 255 dprintk("RPC: %s: returning\n", __func__);
259 256
@@ -285,23 +282,14 @@ xprt_setup_rdma(struct xprt_create *args)
285 return ERR_PTR(-EBADF); 282 return ERR_PTR(-EBADF);
286 } 283 }
287 284
288 xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); 285 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
286 xprt_rdma_slot_table_entries);
289 if (xprt == NULL) { 287 if (xprt == NULL) {
290 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", 288 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
291 __func__); 289 __func__);
292 return ERR_PTR(-ENOMEM); 290 return ERR_PTR(-ENOMEM);
293 } 291 }
294 292
295 xprt->max_reqs = xprt_rdma_slot_table_entries;
296 xprt->slot = kcalloc(xprt->max_reqs,
297 sizeof(struct rpc_rqst), GFP_KERNEL);
298 if (xprt->slot == NULL) {
299 dprintk("RPC: %s: couldn't allocate %d slots\n",
300 __func__, xprt->max_reqs);
301 kfree(xprt);
302 return ERR_PTR(-ENOMEM);
303 }
304
305 /* 60 second timeout, no retries */ 293 /* 60 second timeout, no retries */
306 xprt->timeout = &xprt_rdma_default_timeout; 294 xprt->timeout = &xprt_rdma_default_timeout;
307 xprt->bind_timeout = (60U * HZ); 295 xprt->bind_timeout = (60U * HZ);
@@ -410,8 +398,7 @@ out3:
410out2: 398out2:
411 rpcrdma_ia_close(&new_xprt->rx_ia); 399 rpcrdma_ia_close(&new_xprt->rx_ia);
412out1: 400out1:
413 kfree(xprt->slot); 401 xprt_free(xprt);
414 kfree(xprt);
415 return ERR_PTR(rc); 402 return ERR_PTR(rc);
416} 403}
417 404
@@ -460,7 +447,7 @@ xprt_rdma_connect(struct rpc_task *task)
460 } else { 447 } else {
461 schedule_delayed_work(&r_xprt->rdma_connect, 0); 448 schedule_delayed_work(&r_xprt->rdma_connect, 0);
462 if (!RPC_IS_ASYNC(task)) 449 if (!RPC_IS_ASYNC(task))
463 flush_scheduled_work(); 450 flush_delayed_work(&r_xprt->rdma_connect);
464 } 451 }
465} 452}
466 453
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5f4c7b3bc711..80f8da344df5 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -144,6 +144,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
144static inline 144static inline
145void rpcrdma_event_process(struct ib_wc *wc) 145void rpcrdma_event_process(struct ib_wc *wc)
146{ 146{
147 struct rpcrdma_mw *frmr;
147 struct rpcrdma_rep *rep = 148 struct rpcrdma_rep *rep =
148 (struct rpcrdma_rep *)(unsigned long) wc->wr_id; 149 (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
149 150
@@ -154,15 +155,23 @@ void rpcrdma_event_process(struct ib_wc *wc)
154 return; 155 return;
155 156
156 if (IB_WC_SUCCESS != wc->status) { 157 if (IB_WC_SUCCESS != wc->status) {
157 dprintk("RPC: %s: %s WC status %X, connection lost\n", 158 dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
158 __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", 159 __func__, wc->opcode, wc->status);
159 wc->status);
160 rep->rr_len = ~0U; 160 rep->rr_len = ~0U;
161 rpcrdma_schedule_tasklet(rep); 161 if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
162 rpcrdma_schedule_tasklet(rep);
162 return; 163 return;
163 } 164 }
164 165
165 switch (wc->opcode) { 166 switch (wc->opcode) {
167 case IB_WC_FAST_REG_MR:
168 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
169 frmr->r.frmr.state = FRMR_IS_VALID;
170 break;
171 case IB_WC_LOCAL_INV:
172 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
173 frmr->r.frmr.state = FRMR_IS_INVALID;
174 break;
166 case IB_WC_RECV: 175 case IB_WC_RECV:
167 rep->rr_len = wc->byte_len; 176 rep->rr_len = wc->byte_len;
168 ib_dma_sync_single_for_cpu( 177 ib_dma_sync_single_for_cpu(
@@ -378,7 +387,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
378 387
379 init_completion(&ia->ri_done); 388 init_completion(&ia->ri_done);
380 389
381 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); 390 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
382 if (IS_ERR(id)) { 391 if (IS_ERR(id)) {
383 rc = PTR_ERR(id); 392 rc = PTR_ERR(id);
384 dprintk("RPC: %s: rdma_create_id() failed %i\n", 393 dprintk("RPC: %s: rdma_create_id() failed %i\n",
@@ -1450,6 +1459,12 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1450 seg->mr_dma = ib_dma_map_single(ia->ri_id->device, 1459 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1451 seg->mr_offset, 1460 seg->mr_offset,
1452 seg->mr_dmalen, seg->mr_dir); 1461 seg->mr_dmalen, seg->mr_dir);
1462 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1463 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1464 __func__,
1465 (unsigned long long)seg->mr_dma,
1466 seg->mr_offset, seg->mr_dmalen);
1467 }
1453} 1468}
1454 1469
1455static void 1470static void
@@ -1469,7 +1484,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1469 struct rpcrdma_xprt *r_xprt) 1484 struct rpcrdma_xprt *r_xprt)
1470{ 1485{
1471 struct rpcrdma_mr_seg *seg1 = seg; 1486 struct rpcrdma_mr_seg *seg1 = seg;
1472 struct ib_send_wr frmr_wr, *bad_wr; 1487 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1488
1473 u8 key; 1489 u8 key;
1474 int len, pageoff; 1490 int len, pageoff;
1475 int i, rc; 1491 int i, rc;
@@ -1484,6 +1500,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1484 rpcrdma_map_one(ia, seg, writing); 1500 rpcrdma_map_one(ia, seg, writing);
1485 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; 1501 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
1486 len += seg->mr_len; 1502 len += seg->mr_len;
1503 BUG_ON(seg->mr_len > PAGE_SIZE);
1487 ++seg; 1504 ++seg;
1488 ++i; 1505 ++i;
1489 /* Check for holes */ 1506 /* Check for holes */
@@ -1494,26 +1511,45 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1494 dprintk("RPC: %s: Using frmr %p to map %d segments\n", 1511 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1495 __func__, seg1->mr_chunk.rl_mw, i); 1512 __func__, seg1->mr_chunk.rl_mw, i);
1496 1513
1514 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1515 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1516 __func__,
1517 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1518 /* Invalidate before using. */
1519 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1520 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1521 invalidate_wr.next = &frmr_wr;
1522 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1523 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1524 invalidate_wr.ex.invalidate_rkey =
1525 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1526 DECR_CQCOUNT(&r_xprt->rx_ep);
1527 post_wr = &invalidate_wr;
1528 } else
1529 post_wr = &frmr_wr;
1530
1497 /* Bump the key */ 1531 /* Bump the key */
1498 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); 1532 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1499 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); 1533 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1500 1534
1501 /* Prepare FRMR WR */ 1535 /* Prepare FRMR WR */
1502 memset(&frmr_wr, 0, sizeof frmr_wr); 1536 memset(&frmr_wr, 0, sizeof frmr_wr);
1537 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1503 frmr_wr.opcode = IB_WR_FAST_REG_MR; 1538 frmr_wr.opcode = IB_WR_FAST_REG_MR;
1504 frmr_wr.send_flags = 0; /* unsignaled */ 1539 frmr_wr.send_flags = IB_SEND_SIGNALED;
1505 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; 1540 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1506 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; 1541 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
1507 frmr_wr.wr.fast_reg.page_list_len = i; 1542 frmr_wr.wr.fast_reg.page_list_len = i;
1508 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 1543 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1509 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; 1544 frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
1545 BUG_ON(frmr_wr.wr.fast_reg.length < len);
1510 frmr_wr.wr.fast_reg.access_flags = (writing ? 1546 frmr_wr.wr.fast_reg.access_flags = (writing ?
1511 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 1547 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1512 IB_ACCESS_REMOTE_READ); 1548 IB_ACCESS_REMOTE_READ);
1513 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1549 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1514 DECR_CQCOUNT(&r_xprt->rx_ep); 1550 DECR_CQCOUNT(&r_xprt->rx_ep);
1515 1551
1516 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); 1552 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
1517 1553
1518 if (rc) { 1554 if (rc) {
1519 dprintk("RPC: %s: failed ib_post_send for register," 1555 dprintk("RPC: %s: failed ib_post_send for register,"
@@ -1542,8 +1578,9 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1542 rpcrdma_unmap_one(ia, seg++); 1578 rpcrdma_unmap_one(ia, seg++);
1543 1579
1544 memset(&invalidate_wr, 0, sizeof invalidate_wr); 1580 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1581 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1545 invalidate_wr.opcode = IB_WR_LOCAL_INV; 1582 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1546 invalidate_wr.send_flags = 0; /* unsignaled */ 1583 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1547 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; 1584 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1548 DECR_CQCOUNT(&r_xprt->rx_ep); 1585 DECR_CQCOUNT(&r_xprt->rx_ep);
1549 1586
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c7a7eba991bc..cae761a8536c 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -164,6 +164,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
164 struct { 164 struct {
165 struct ib_fast_reg_page_list *fr_pgl; 165 struct ib_fast_reg_page_list *fr_pgl;
166 struct ib_mr *fr_mr; 166 struct ib_mr *fr_mr;
167 enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
167 } frmr; 168 } frmr;
168 } r; 169 } r;
169 struct list_head mw_list; 170 struct list_head mw_list;