aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 13:39:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 13:39:41 -0500
commit61845143febe6b88349acad4732adc54894009a3 (patch)
treebed6a23efe05b9867b8b4d1f4d251fc1c562e7e4 /net
parenta26be149facb22d30cd92cadb26f651d6fe802c9 (diff)
parentc23ae6017835b5bc9b9ec9d5d9c2b1523053f503 (diff)
Merge branch 'for-3.20' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields: "The main change is the pNFS block server support from Christoph, which allows an NFS client connected to shared disk to do block IO to the shared disk in place of NFS reads and writes. This also requires xfs patches, which should arrive soon through the xfs tree, barring unexpected problems. Support for other filesystems is also possible if there's interest. Thanks also to Chuck Lever for continuing work to get NFS/RDMA into shape" * 'for-3.20' of git://linux-nfs.org/~bfields/linux: (32 commits) nfsd: default NFSv4.2 to on nfsd: pNFS block layout driver exportfs: add methods for block layout exports nfsd: add trace events nfsd: update documentation for pNFS support nfsd: implement pNFS layout recalls nfsd: implement pNFS operations nfsd: make find_any_file available outside nfs4state.c nfsd: make find/get/put file available outside nfs4state.c nfsd: make lookup/alloc/unhash_stid available outside nfs4state.c nfsd: add fh_fsid_match helper nfsd: move nfsd_fh_match to nfsfh.h fs: add FL_LAYOUT lease type fs: track fl_owner for leases nfs: add LAYOUT_TYPE_MAX enum value nfsd: factor out a helper to decode nfstime4 values sunrpc/lockd: fix references to the BKL nfsd: fix year-2038 nfs4 state problem svcrdma: Handle additional inline content svcrdma: Move read list XDR round-up logic ...
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--net/sunrpc/svc_xprt.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c16
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c244
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c46
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c47
6 files changed, 209 insertions, 151 deletions
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 91eaef1844c8..78974e4d9ad2 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -768,8 +768,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
768EXPORT_SYMBOL_GPL(svc_set_num_threads); 768EXPORT_SYMBOL_GPL(svc_set_num_threads);
769 769
770/* 770/*
771 * Called from a server thread as it's exiting. Caller must hold the BKL or 771 * Called from a server thread as it's exiting. Caller must hold the "service
772 * the "service mutex", whichever is appropriate for the service. 772 * mutex" for the service.
773 */ 773 */
774void 774void
775svc_exit_thread(struct svc_rqst *rqstp) 775svc_exit_thread(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c69358b3cf7f..163ac45c3639 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -42,7 +42,7 @@ static LIST_HEAD(svc_xprt_class_list);
42 * svc_pool->sp_lock protects most of the fields of that pool. 42 * svc_pool->sp_lock protects most of the fields of that pool.
43 * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. 43 * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
44 * when both need to be taken (rare), svc_serv->sv_lock is first. 44 * when both need to be taken (rare), svc_serv->sv_lock is first.
45 * BKL protects svc_serv->sv_nrthread. 45 * The "service mutex" protects svc_serv->sv_nrthread.
46 * svc_sock->sk_lock protects the svc_sock->sk_deferred list 46 * svc_sock->sk_lock protects the svc_sock->sk_deferred list
47 * and the ->sk_info_authunix cache. 47 * and the ->sk_info_authunix cache.
48 * 48 *
@@ -67,7 +67,6 @@ static LIST_HEAD(svc_xprt_class_list);
67 * that no other thread will be using the transport or will 67 * that no other thread will be using the transport or will
68 * try to set XPT_DEAD. 68 * try to set XPT_DEAD.
69 */ 69 */
70
71int svc_reg_xprt_class(struct svc_xprt_class *xcl) 70int svc_reg_xprt_class(struct svc_xprt_class *xcl)
72{ 71{
73 struct svc_xprt_class *cl; 72 struct svc_xprt_class *cl;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 65b146297f5a..b681855cf970 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -71,22 +71,6 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
71} 71}
72 72
73/* 73/*
74 * Determine number of chunks and total bytes in chunk list. The chunk
75 * list has already been verified to fit within the RPCRDMA header.
76 */
77void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
78 int *ch_count, int *byte_count)
79{
80 /* compute the number of bytes represented by read chunks */
81 *byte_count = 0;
82 *ch_count = 0;
83 for (; ch->rc_discrim != 0; ch++) {
84 *byte_count = *byte_count + ntohl(ch->rc_target.rs_length);
85 *ch_count = *ch_count + 1;
86 }
87}
88
89/*
90 * Decodes a write chunk list. The expected format is as follows: 74 * Decodes a write chunk list. The expected format is as follows:
91 * descrim : xdr_one 75 * descrim : xdr_one
92 * nchunks : <count> 76 * nchunks : <count>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index e0110270d650..f9f13a32ddb8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -43,7 +43,6 @@
43#include <linux/sunrpc/debug.h> 43#include <linux/sunrpc/debug.h>
44#include <linux/sunrpc/rpc_rdma.h> 44#include <linux/sunrpc/rpc_rdma.h>
45#include <linux/spinlock.h> 45#include <linux/spinlock.h>
46#include <linux/highmem.h>
47#include <asm/unaligned.h> 46#include <asm/unaligned.h>
48#include <rdma/ib_verbs.h> 47#include <rdma/ib_verbs.h>
49#include <rdma/rdma_cm.h> 48#include <rdma/rdma_cm.h>
@@ -60,6 +59,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
60 struct svc_rdma_op_ctxt *ctxt, 59 struct svc_rdma_op_ctxt *ctxt,
61 u32 byte_count) 60 u32 byte_count)
62{ 61{
62 struct rpcrdma_msg *rmsgp;
63 struct page *page; 63 struct page *page;
64 u32 bc; 64 u32 bc;
65 int sge_no; 65 int sge_no;
@@ -82,7 +82,14 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
82 /* If data remains, store it in the pagelist */ 82 /* If data remains, store it in the pagelist */
83 rqstp->rq_arg.page_len = bc; 83 rqstp->rq_arg.page_len = bc;
84 rqstp->rq_arg.page_base = 0; 84 rqstp->rq_arg.page_base = 0;
85 rqstp->rq_arg.pages = &rqstp->rq_pages[1]; 85
86 /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
87 rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
88 if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
89 rqstp->rq_arg.pages = &rqstp->rq_pages[0];
90 else
91 rqstp->rq_arg.pages = &rqstp->rq_pages[1];
92
86 sge_no = 1; 93 sge_no = 1;
87 while (bc && sge_no < ctxt->count) { 94 while (bc && sge_no < ctxt->count) {
88 page = ctxt->pages[sge_no]; 95 page = ctxt->pages[sge_no];
@@ -95,14 +102,6 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
95 rqstp->rq_respages = &rqstp->rq_pages[sge_no]; 102 rqstp->rq_respages = &rqstp->rq_pages[sge_no];
96 rqstp->rq_next_page = rqstp->rq_respages + 1; 103 rqstp->rq_next_page = rqstp->rq_respages + 1;
97 104
98 /* We should never run out of SGE because the limit is defined to
99 * support the max allowed RPC data length
100 */
101 BUG_ON(bc && (sge_no == ctxt->count));
102 BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len)
103 != byte_count);
104 BUG_ON(rqstp->rq_arg.len != byte_count);
105
106 /* If not all pages were used from the SGL, free the remaining ones */ 105 /* If not all pages were used from the SGL, free the remaining ones */
107 bc = sge_no; 106 bc = sge_no;
108 while (sge_no < ctxt->count) { 107 while (sge_no < ctxt->count) {
@@ -125,26 +124,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
125 return min_t(int, sge_count, xprt->sc_max_sge); 124 return min_t(int, sge_count, xprt->sc_max_sge);
126} 125}
127 126
128typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
129 struct svc_rqst *rqstp,
130 struct svc_rdma_op_ctxt *head,
131 int *page_no,
132 u32 *page_offset,
133 u32 rs_handle,
134 u32 rs_length,
135 u64 rs_offset,
136 int last);
137
138/* Issue an RDMA_READ using the local lkey to map the data sink */ 127/* Issue an RDMA_READ using the local lkey to map the data sink */
139static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, 128int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
140 struct svc_rqst *rqstp, 129 struct svc_rqst *rqstp,
141 struct svc_rdma_op_ctxt *head, 130 struct svc_rdma_op_ctxt *head,
142 int *page_no, 131 int *page_no,
143 u32 *page_offset, 132 u32 *page_offset,
144 u32 rs_handle, 133 u32 rs_handle,
145 u32 rs_length, 134 u32 rs_length,
146 u64 rs_offset, 135 u64 rs_offset,
147 int last) 136 bool last)
148{ 137{
149 struct ib_send_wr read_wr; 138 struct ib_send_wr read_wr;
150 int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; 139 int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
@@ -229,15 +218,15 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
229} 218}
230 219
231/* Issue an RDMA_READ using an FRMR to map the data sink */ 220/* Issue an RDMA_READ using an FRMR to map the data sink */
232static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, 221int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
233 struct svc_rqst *rqstp, 222 struct svc_rqst *rqstp,
234 struct svc_rdma_op_ctxt *head, 223 struct svc_rdma_op_ctxt *head,
235 int *page_no, 224 int *page_no,
236 u32 *page_offset, 225 u32 *page_offset,
237 u32 rs_handle, 226 u32 rs_handle,
238 u32 rs_length, 227 u32 rs_length,
239 u64 rs_offset, 228 u64 rs_offset,
240 int last) 229 bool last)
241{ 230{
242 struct ib_send_wr read_wr; 231 struct ib_send_wr read_wr;
243 struct ib_send_wr inv_wr; 232 struct ib_send_wr inv_wr;
@@ -365,24 +354,84 @@ static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
365 return ret; 354 return ret;
366} 355}
367 356
357static unsigned int
358rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch)
359{
360 unsigned int count;
361
362 for (count = 0; ch->rc_discrim != xdr_zero; ch++)
363 count++;
364 return count;
365}
366
367/* If there was additional inline content, append it to the end of arg.pages.
368 * Tail copy has to be done after the reader function has determined how many
369 * pages are needed for RDMA READ.
370 */
371static int
372rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
373 u32 position, u32 byte_count, u32 page_offset, int page_no)
374{
375 char *srcp, *destp;
376 int ret;
377
378 ret = 0;
379 srcp = head->arg.head[0].iov_base + position;
380 byte_count = head->arg.head[0].iov_len - position;
381 if (byte_count > PAGE_SIZE) {
382 dprintk("svcrdma: large tail unsupported\n");
383 return 0;
384 }
385
386 /* Fit as much of the tail on the current page as possible */
387 if (page_offset != PAGE_SIZE) {
388 destp = page_address(rqstp->rq_arg.pages[page_no]);
389 destp += page_offset;
390 while (byte_count--) {
391 *destp++ = *srcp++;
392 page_offset++;
393 if (page_offset == PAGE_SIZE && byte_count)
394 goto more;
395 }
396 goto done;
397 }
398
399more:
400 /* Fit the rest on the next page */
401 page_no++;
402 destp = page_address(rqstp->rq_arg.pages[page_no]);
403 while (byte_count--)
404 *destp++ = *srcp++;
405
406 rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
407 rqstp->rq_next_page = rqstp->rq_respages + 1;
408
409done:
410 byte_count = head->arg.head[0].iov_len - position;
411 head->arg.page_len += byte_count;
412 head->arg.len += byte_count;
413 head->arg.buflen += byte_count;
414 return 1;
415}
416
368static int rdma_read_chunks(struct svcxprt_rdma *xprt, 417static int rdma_read_chunks(struct svcxprt_rdma *xprt,
369 struct rpcrdma_msg *rmsgp, 418 struct rpcrdma_msg *rmsgp,
370 struct svc_rqst *rqstp, 419 struct svc_rqst *rqstp,
371 struct svc_rdma_op_ctxt *head) 420 struct svc_rdma_op_ctxt *head)
372{ 421{
373 int page_no, ch_count, ret; 422 int page_no, ret;
374 struct rpcrdma_read_chunk *ch; 423 struct rpcrdma_read_chunk *ch;
375 u32 page_offset, byte_count; 424 u32 handle, page_offset, byte_count;
425 u32 position;
376 u64 rs_offset; 426 u64 rs_offset;
377 rdma_reader_fn reader; 427 bool last;
378 428
379 /* If no read list is present, return 0 */ 429 /* If no read list is present, return 0 */
380 ch = svc_rdma_get_read_chunk(rmsgp); 430 ch = svc_rdma_get_read_chunk(rmsgp);
381 if (!ch) 431 if (!ch)
382 return 0; 432 return 0;
383 433
384 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); 434 if (rdma_rcl_chunk_count(ch) > RPCSVC_MAXPAGES)
385 if (ch_count > RPCSVC_MAXPAGES)
386 return -EINVAL; 435 return -EINVAL;
387 436
388 /* The request is completed when the RDMA_READs complete. The 437 /* The request is completed when the RDMA_READs complete. The
@@ -391,34 +440,41 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
391 */ 440 */
392 head->arg.head[0] = rqstp->rq_arg.head[0]; 441 head->arg.head[0] = rqstp->rq_arg.head[0];
393 head->arg.tail[0] = rqstp->rq_arg.tail[0]; 442 head->arg.tail[0] = rqstp->rq_arg.tail[0];
394 head->arg.pages = &head->pages[head->count];
395 head->hdr_count = head->count; 443 head->hdr_count = head->count;
396 head->arg.page_base = 0; 444 head->arg.page_base = 0;
397 head->arg.page_len = 0; 445 head->arg.page_len = 0;
398 head->arg.len = rqstp->rq_arg.len; 446 head->arg.len = rqstp->rq_arg.len;
399 head->arg.buflen = rqstp->rq_arg.buflen; 447 head->arg.buflen = rqstp->rq_arg.buflen;
400 448
401 /* Use FRMR if supported */ 449 ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
402 if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) 450 position = be32_to_cpu(ch->rc_position);
403 reader = rdma_read_chunk_frmr; 451
404 else 452 /* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
405 reader = rdma_read_chunk_lcl; 453 if (position == 0) {
454 head->arg.pages = &head->pages[0];
455 page_offset = head->byte_len;
456 } else {
457 head->arg.pages = &head->pages[head->count];
458 page_offset = 0;
459 }
406 460
407 page_no = 0; page_offset = 0; 461 ret = 0;
408 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; 462 page_no = 0;
409 ch->rc_discrim != 0; ch++) { 463 for (; ch->rc_discrim != xdr_zero; ch++) {
464 if (be32_to_cpu(ch->rc_position) != position)
465 goto err;
410 466
467 handle = be32_to_cpu(ch->rc_target.rs_handle),
468 byte_count = be32_to_cpu(ch->rc_target.rs_length);
411 xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, 469 xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
412 &rs_offset); 470 &rs_offset);
413 byte_count = ntohl(ch->rc_target.rs_length);
414 471
415 while (byte_count > 0) { 472 while (byte_count > 0) {
416 ret = reader(xprt, rqstp, head, 473 last = (ch + 1)->rc_discrim == xdr_zero;
417 &page_no, &page_offset, 474 ret = xprt->sc_reader(xprt, rqstp, head,
418 ntohl(ch->rc_target.rs_handle), 475 &page_no, &page_offset,
419 byte_count, rs_offset, 476 handle, byte_count,
420 ((ch+1)->rc_discrim == 0) /* last */ 477 rs_offset, last);
421 );
422 if (ret < 0) 478 if (ret < 0)
423 goto err; 479 goto err;
424 byte_count -= ret; 480 byte_count -= ret;
@@ -426,7 +482,24 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
426 head->arg.buflen += ret; 482 head->arg.buflen += ret;
427 } 483 }
428 } 484 }
485
486 /* Read list may need XDR round-up (see RFC 5666, s. 3.7) */
487 if (page_offset & 3) {
488 u32 pad = 4 - (page_offset & 3);
489
490 head->arg.page_len += pad;
491 head->arg.len += pad;
492 head->arg.buflen += pad;
493 page_offset += pad;
494 }
495
429 ret = 1; 496 ret = 1;
497 if (position && position < head->arg.head[0].iov_len)
498 ret = rdma_copy_tail(rqstp, head, position,
499 byte_count, page_offset, page_no);
500 head->arg.head[0].iov_len = position;
501 head->position = position;
502
430 err: 503 err:
431 /* Detach arg pages. svc_recv will replenish them */ 504 /* Detach arg pages. svc_recv will replenish them */
432 for (page_no = 0; 505 for (page_no = 0;
@@ -436,47 +509,33 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
436 return ret; 509 return ret;
437} 510}
438 511
439/*
440 * To avoid a separate RDMA READ just for a handful of zero bytes,
441 * RFC 5666 section 3.7 allows the client to omit the XDR zero pad
442 * in chunk lists.
443 */
444static void
445rdma_fix_xdr_pad(struct xdr_buf *buf)
446{
447 unsigned int page_len = buf->page_len;
448 unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len;
449 unsigned int offset, pg_no;
450 char *p;
451
452 if (size == 0)
453 return;
454
455 pg_no = page_len >> PAGE_SHIFT;
456 offset = page_len & ~PAGE_MASK;
457 p = page_address(buf->pages[pg_no]);
458 memset(p + offset, 0, size);
459
460 buf->page_len += size;
461 buf->buflen += size;
462 buf->len += size;
463}
464
465static int rdma_read_complete(struct svc_rqst *rqstp, 512static int rdma_read_complete(struct svc_rqst *rqstp,
466 struct svc_rdma_op_ctxt *head) 513 struct svc_rdma_op_ctxt *head)
467{ 514{
468 int page_no; 515 int page_no;
469 int ret; 516 int ret;
470 517
471 BUG_ON(!head);
472
473 /* Copy RPC pages */ 518 /* Copy RPC pages */
474 for (page_no = 0; page_no < head->count; page_no++) { 519 for (page_no = 0; page_no < head->count; page_no++) {
475 put_page(rqstp->rq_pages[page_no]); 520 put_page(rqstp->rq_pages[page_no]);
476 rqstp->rq_pages[page_no] = head->pages[page_no]; 521 rqstp->rq_pages[page_no] = head->pages[page_no];
477 } 522 }
523
524 /* Adjustments made for RDMA_NOMSG type requests */
525 if (head->position == 0) {
526 if (head->arg.len <= head->sge[0].length) {
527 head->arg.head[0].iov_len = head->arg.len -
528 head->byte_len;
529 head->arg.page_len = 0;
530 } else {
531 head->arg.head[0].iov_len = head->sge[0].length -
532 head->byte_len;
533 head->arg.page_len = head->arg.len -
534 head->sge[0].length;
535 }
536 }
537
478 /* Point rq_arg.pages past header */ 538 /* Point rq_arg.pages past header */
479 rdma_fix_xdr_pad(&head->arg);
480 rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; 539 rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count];
481 rqstp->rq_arg.page_len = head->arg.page_len; 540 rqstp->rq_arg.page_len = head->arg.page_len;
482 rqstp->rq_arg.page_base = head->arg.page_base; 541 rqstp->rq_arg.page_base = head->arg.page_base;
@@ -501,8 +560,8 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
501 ret = rqstp->rq_arg.head[0].iov_len 560 ret = rqstp->rq_arg.head[0].iov_len
502 + rqstp->rq_arg.page_len 561 + rqstp->rq_arg.page_len
503 + rqstp->rq_arg.tail[0].iov_len; 562 + rqstp->rq_arg.tail[0].iov_len;
504 dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, " 563 dprintk("svcrdma: deferred read ret=%d, rq_arg.len=%u, "
505 "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", 564 "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zu\n",
506 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, 565 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
507 rqstp->rq_arg.head[0].iov_len); 566 rqstp->rq_arg.head[0].iov_len);
508 567
@@ -558,7 +617,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
558 } 617 }
559 dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", 618 dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
560 ctxt, rdma_xprt, rqstp, ctxt->wc_status); 619 ctxt, rdma_xprt, rqstp, ctxt->wc_status);
561 BUG_ON(ctxt->wc_status != IB_WC_SUCCESS);
562 atomic_inc(&rdma_stat_recv); 620 atomic_inc(&rdma_stat_recv);
563 621
564 /* Build up the XDR from the receive buffers. */ 622 /* Build up the XDR from the receive buffers. */
@@ -591,8 +649,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
591 + rqstp->rq_arg.tail[0].iov_len; 649 + rqstp->rq_arg.tail[0].iov_len;
592 svc_rdma_put_context(ctxt, 0); 650 svc_rdma_put_context(ctxt, 0);
593 out: 651 out:
594 dprintk("svcrdma: ret = %d, rq_arg.len =%d, " 652 dprintk("svcrdma: ret=%d, rq_arg.len=%u, "
595 "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", 653 "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len=%zd\n",
596 ret, rqstp->rq_arg.len, 654 ret, rqstp->rq_arg.len,
597 rqstp->rq_arg.head[0].iov_base, 655 rqstp->rq_arg.head[0].iov_base,
598 rqstp->rq_arg.head[0].iov_len); 656 rqstp->rq_arg.head[0].iov_len);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 9f1b50689c0f..7de33d1af9b6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -60,8 +60,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
60 u32 page_off; 60 u32 page_off;
61 int page_no; 61 int page_no;
62 62
63 BUG_ON(xdr->len != 63 if (xdr->len !=
64 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); 64 (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
65 pr_err("svcrdma: map_xdr: XDR buffer length error\n");
66 return -EIO;
67 }
65 68
66 /* Skip the first sge, this is for the RPCRDMA header */ 69 /* Skip the first sge, this is for the RPCRDMA header */
67 sge_no = 1; 70 sge_no = 1;
@@ -150,7 +153,11 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
150 int bc; 153 int bc;
151 struct svc_rdma_op_ctxt *ctxt; 154 struct svc_rdma_op_ctxt *ctxt;
152 155
153 BUG_ON(vec->count > RPCSVC_MAXPAGES); 156 if (vec->count > RPCSVC_MAXPAGES) {
157 pr_err("svcrdma: Too many pages (%lu)\n", vec->count);
158 return -EIO;
159 }
160
154 dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " 161 dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
155 "write_len=%d, vec->sge=%p, vec->count=%lu\n", 162 "write_len=%d, vec->sge=%p, vec->count=%lu\n",
156 rmr, (unsigned long long)to, xdr_off, 163 rmr, (unsigned long long)to, xdr_off,
@@ -190,7 +197,10 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
190 sge_off = 0; 197 sge_off = 0;
191 sge_no++; 198 sge_no++;
192 xdr_sge_no++; 199 xdr_sge_no++;
193 BUG_ON(xdr_sge_no > vec->count); 200 if (xdr_sge_no > vec->count) {
201 pr_err("svcrdma: Too many sges (%d)\n", xdr_sge_no);
202 goto err;
203 }
194 bc -= sge_bytes; 204 bc -= sge_bytes;
195 if (sge_no == xprt->sc_max_sge) 205 if (sge_no == xprt->sc_max_sge)
196 break; 206 break;
@@ -421,7 +431,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
421 ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; 431 ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
422 ctxt->sge[sge_no].length = sge_bytes; 432 ctxt->sge[sge_no].length = sge_bytes;
423 } 433 }
424 BUG_ON(byte_count != 0); 434 if (byte_count != 0) {
435 pr_err("svcrdma: Could not map %d bytes\n", byte_count);
436 goto err;
437 }
425 438
426 /* Save all respages in the ctxt and remove them from the 439 /* Save all respages in the ctxt and remove them from the
427 * respages array. They are our pages until the I/O 440 * respages array. They are our pages until the I/O
@@ -442,7 +455,10 @@ static int send_reply(struct svcxprt_rdma *rdma,
442 } 455 }
443 rqstp->rq_next_page = rqstp->rq_respages + 1; 456 rqstp->rq_next_page = rqstp->rq_respages + 1;
444 457
445 BUG_ON(sge_no > rdma->sc_max_sge); 458 if (sge_no > rdma->sc_max_sge) {
459 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
460 goto err;
461 }
446 memset(&send_wr, 0, sizeof send_wr); 462 memset(&send_wr, 0, sizeof send_wr);
447 ctxt->wr_op = IB_WR_SEND; 463 ctxt->wr_op = IB_WR_SEND;
448 send_wr.wr_id = (unsigned long)ctxt; 464 send_wr.wr_id = (unsigned long)ctxt;
@@ -467,18 +483,6 @@ void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
467{ 483{
468} 484}
469 485
470/*
471 * Return the start of an xdr buffer.
472 */
473static void *xdr_start(struct xdr_buf *xdr)
474{
475 return xdr->head[0].iov_base -
476 (xdr->len -
477 xdr->page_len -
478 xdr->tail[0].iov_len -
479 xdr->head[0].iov_len);
480}
481
482int svc_rdma_sendto(struct svc_rqst *rqstp) 486int svc_rdma_sendto(struct svc_rqst *rqstp)
483{ 487{
484 struct svc_xprt *xprt = rqstp->rq_xprt; 488 struct svc_xprt *xprt = rqstp->rq_xprt;
@@ -496,8 +500,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
496 500
497 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); 501 dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
498 502
499 /* Get the RDMA request header. */ 503 /* Get the RDMA request header. The receive logic always
500 rdma_argp = xdr_start(&rqstp->rq_arg); 504 * places this at the start of page 0.
505 */
506 rdma_argp = page_address(rqstp->rq_pages[0]);
501 507
502 /* Build an req vec for the XDR */ 508 /* Build an req vec for the XDR */
503 ctxt = svc_rdma_get_context(rdma); 509 ctxt = svc_rdma_get_context(rdma);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4e618808bc98..f609c1c2d38d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -139,7 +139,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
139 struct svcxprt_rdma *xprt; 139 struct svcxprt_rdma *xprt;
140 int i; 140 int i;
141 141
142 BUG_ON(!ctxt);
143 xprt = ctxt->xprt; 142 xprt = ctxt->xprt;
144 if (free_pages) 143 if (free_pages)
145 for (i = 0; i < ctxt->count; i++) 144 for (i = 0; i < ctxt->count; i++)
@@ -339,12 +338,14 @@ static void process_context(struct svcxprt_rdma *xprt,
339 338
340 switch (ctxt->wr_op) { 339 switch (ctxt->wr_op) {
341 case IB_WR_SEND: 340 case IB_WR_SEND:
342 BUG_ON(ctxt->frmr); 341 if (ctxt->frmr)
342 pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
343 svc_rdma_put_context(ctxt, 1); 343 svc_rdma_put_context(ctxt, 1);
344 break; 344 break;
345 345
346 case IB_WR_RDMA_WRITE: 346 case IB_WR_RDMA_WRITE:
347 BUG_ON(ctxt->frmr); 347 if (ctxt->frmr)
348 pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
348 svc_rdma_put_context(ctxt, 0); 349 svc_rdma_put_context(ctxt, 0);
349 break; 350 break;
350 351
@@ -353,19 +354,21 @@ static void process_context(struct svcxprt_rdma *xprt,
353 svc_rdma_put_frmr(xprt, ctxt->frmr); 354 svc_rdma_put_frmr(xprt, ctxt->frmr);
354 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { 355 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
355 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; 356 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
356 BUG_ON(!read_hdr); 357 if (read_hdr) {
357 spin_lock_bh(&xprt->sc_rq_dto_lock); 358 spin_lock_bh(&xprt->sc_rq_dto_lock);
358 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 359 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
359 list_add_tail(&read_hdr->dto_q, 360 list_add_tail(&read_hdr->dto_q,
360 &xprt->sc_read_complete_q); 361 &xprt->sc_read_complete_q);
361 spin_unlock_bh(&xprt->sc_rq_dto_lock); 362 spin_unlock_bh(&xprt->sc_rq_dto_lock);
363 } else {
364 pr_err("svcrdma: ctxt->read_hdr == NULL\n");
365 }
362 svc_xprt_enqueue(&xprt->sc_xprt); 366 svc_xprt_enqueue(&xprt->sc_xprt);
363 } 367 }
364 svc_rdma_put_context(ctxt, 0); 368 svc_rdma_put_context(ctxt, 0);
365 break; 369 break;
366 370
367 default: 371 default:
368 BUG_ON(1);
369 printk(KERN_ERR "svcrdma: unexpected completion type, " 372 printk(KERN_ERR "svcrdma: unexpected completion type, "
370 "opcode=%d\n", 373 "opcode=%d\n",
371 ctxt->wr_op); 374 ctxt->wr_op);
@@ -513,7 +516,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
513 buflen = 0; 516 buflen = 0;
514 ctxt->direction = DMA_FROM_DEVICE; 517 ctxt->direction = DMA_FROM_DEVICE;
515 for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { 518 for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) {
516 BUG_ON(sge_no >= xprt->sc_max_sge); 519 if (sge_no >= xprt->sc_max_sge) {
520 pr_err("svcrdma: Too many sges (%d)\n", sge_no);
521 goto err_put_ctxt;
522 }
517 page = svc_rdma_get_page(); 523 page = svc_rdma_get_page();
518 ctxt->pages[sge_no] = page; 524 ctxt->pages[sge_no] = page;
519 pa = ib_dma_map_page(xprt->sc_cm_id->device, 525 pa = ib_dma_map_page(xprt->sc_cm_id->device,
@@ -687,7 +693,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
687{ 693{
688 struct rdma_cm_id *listen_id; 694 struct rdma_cm_id *listen_id;
689 struct svcxprt_rdma *cma_xprt; 695 struct svcxprt_rdma *cma_xprt;
690 struct svc_xprt *xprt;
691 int ret; 696 int ret;
692 697
693 dprintk("svcrdma: Creating RDMA socket\n"); 698 dprintk("svcrdma: Creating RDMA socket\n");
@@ -698,7 +703,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
698 cma_xprt = rdma_create_xprt(serv, 1); 703 cma_xprt = rdma_create_xprt(serv, 1);
699 if (!cma_xprt) 704 if (!cma_xprt)
700 return ERR_PTR(-ENOMEM); 705 return ERR_PTR(-ENOMEM);
701 xprt = &cma_xprt->sc_xprt;
702 706
703 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, 707 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
704 IB_QPT_RC); 708 IB_QPT_RC);
@@ -822,7 +826,7 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
822 if (frmr) { 826 if (frmr) {
823 frmr_unmap_dma(rdma, frmr); 827 frmr_unmap_dma(rdma, frmr);
824 spin_lock_bh(&rdma->sc_frmr_q_lock); 828 spin_lock_bh(&rdma->sc_frmr_q_lock);
825 BUG_ON(!list_empty(&frmr->frmr_list)); 829 WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
826 list_add(&frmr->frmr_list, &rdma->sc_frmr_q); 830 list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
827 spin_unlock_bh(&rdma->sc_frmr_q_lock); 831 spin_unlock_bh(&rdma->sc_frmr_q_lock);
828 } 832 }
@@ -970,10 +974,12 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
970 * NB: iWARP requires remote write access for the data sink 974 * NB: iWARP requires remote write access for the data sink
971 * of an RDMA_READ. IB does not. 975 * of an RDMA_READ. IB does not.
972 */ 976 */
977 newxprt->sc_reader = rdma_read_chunk_lcl;
973 if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { 978 if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
974 newxprt->sc_frmr_pg_list_len = 979 newxprt->sc_frmr_pg_list_len =
975 devattr.max_fast_reg_page_list_len; 980 devattr.max_fast_reg_page_list_len;
976 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; 981 newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
982 newxprt->sc_reader = rdma_read_chunk_frmr;
977 } 983 }
978 984
979 /* 985 /*
@@ -1125,7 +1131,9 @@ static void __svc_rdma_free(struct work_struct *work)
1125 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); 1131 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
1126 1132
1127 /* We should only be called from kref_put */ 1133 /* We should only be called from kref_put */
1128 BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0); 1134 if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
1135 pr_err("svcrdma: sc_xprt still in use? (%d)\n",
1136 atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
1129 1137
1130 /* 1138 /*
1131 * Destroy queued, but not processed read completions. Note 1139 * Destroy queued, but not processed read completions. Note
@@ -1153,8 +1161,12 @@ static void __svc_rdma_free(struct work_struct *work)
1153 } 1161 }
1154 1162
1155 /* Warn if we leaked a resource or under-referenced */ 1163 /* Warn if we leaked a resource or under-referenced */
1156 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); 1164 if (atomic_read(&rdma->sc_ctxt_used) != 0)
1157 WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); 1165 pr_err("svcrdma: ctxt still in use? (%d)\n",
1166 atomic_read(&rdma->sc_ctxt_used));
1167 if (atomic_read(&rdma->sc_dma_used) != 0)
1168 pr_err("svcrdma: dma still in use? (%d)\n",
1169 atomic_read(&rdma->sc_dma_used));
1158 1170
1159 /* De-allocate fastreg mr */ 1171 /* De-allocate fastreg mr */
1160 rdma_dealloc_frmr_q(rdma); 1172 rdma_dealloc_frmr_q(rdma);
@@ -1254,7 +1266,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1254 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1266 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1255 return -ENOTCONN; 1267 return -ENOTCONN;
1256 1268
1257 BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
1258 wr_count = 1; 1269 wr_count = 1;
1259 for (n_wr = wr->next; n_wr; n_wr = n_wr->next) 1270 for (n_wr = wr->next; n_wr; n_wr = n_wr->next)
1260 wr_count++; 1271 wr_count++;