aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2015-01-13 11:03:53 -0500
committerJ. Bruce Fields <bfields@redhat.com>2015-01-15 15:01:49 -0500
commita97c331f9aa9080706a7835225d9d82e832e0bb6 (patch)
treeac442b57bd53c60018c7a808a8f9101a70db3a6b /net
parentfcbeced5b4df5e7f05ed8a18b69acfac733aab11 (diff)
svcrdma: Handle additional inline content
Most NFS RPCs place their large payload argument at the end of the RPC header (eg, NFSv3 WRITE). For NFSv3 WRITE and SYMLINK, RPC/RDMA sends the complete RPC header inline, and the payload argument in the read list. Data in the read list is the last part of the XDR stream. One important case is not like this, however. NFSv4 COMPOUND is a counted array of operations. A WRITE operation, with its large data payload, can appear in the middle of the compound's operations array. Thus NFSv4 WRITE compounds can have header content after the WRITE payload. The Linux client, for example, performs an NFSv4 WRITE like this: { PUTFH, WRITE, GETATTR } Though RFC 5667 is not precise about this, the proper way to convey this compound is to place the GETATTR inline, _after_ the front of the RPC header. The receiver inserts the read list payload into the XDR stream after the initial WRITE arguments, and before the GETATTR operation, thanks to the value of the read list "position" field. The Linux client currently sends the GETATTR at the end of the RPC/RDMA read list, which is incorrect. It will be corrected in the future. The Linux server currently rejects NFSv4 compounds with inline content after the read list. For the above NFSv4 WRITE compound, the NFS compound header indicates there are three operations, but the server finds nonsense when it looks in the XDR stream for the third operation, and the compound fails with OP_ILLEGAL. Move trailing inline content to the end of the XDR buffer's page list. This presents incoming NFSv4 WRITE compounds to NFSD in the same way the socket transport does. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Reviewed-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c55
1 files changed, 55 insertions, 0 deletions
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index a345cadad4dd..f9f13a32ddb8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -364,6 +364,56 @@ rdma_rcl_chunk_count(struct rpcrdma_read_chunk *ch)
364 return count; 364 return count;
365} 365}
366 366
367/* If there was additional inline content, append it to the end of arg.pages.
368 * Tail copy has to be done after the reader function has determined how many
369 * pages are needed for RDMA READ.
370 */
371static int
372rdma_copy_tail(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head,
373 u32 position, u32 byte_count, u32 page_offset, int page_no)
374{
375 char *srcp, *destp;
376 int ret;
377
378 ret = 0;
379 srcp = head->arg.head[0].iov_base + position;
380 byte_count = head->arg.head[0].iov_len - position;
381 if (byte_count > PAGE_SIZE) {
382 dprintk("svcrdma: large tail unsupported\n");
383 return 0;
384 }
385
386 /* Fit as much of the tail on the current page as possible */
387 if (page_offset != PAGE_SIZE) {
388 destp = page_address(rqstp->rq_arg.pages[page_no]);
389 destp += page_offset;
390 while (byte_count--) {
391 *destp++ = *srcp++;
392 page_offset++;
393 if (page_offset == PAGE_SIZE && byte_count)
394 goto more;
395 }
396 goto done;
397 }
398
399more:
400 /* Fit the rest on the next page */
401 page_no++;
402 destp = page_address(rqstp->rq_arg.pages[page_no]);
403 while (byte_count--)
404 *destp++ = *srcp++;
405
406 rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
407 rqstp->rq_next_page = rqstp->rq_respages + 1;
408
409done:
410 byte_count = head->arg.head[0].iov_len - position;
411 head->arg.page_len += byte_count;
412 head->arg.len += byte_count;
413 head->arg.buflen += byte_count;
414 return 1;
415}
416
367static int rdma_read_chunks(struct svcxprt_rdma *xprt, 417static int rdma_read_chunks(struct svcxprt_rdma *xprt,
368 struct rpcrdma_msg *rmsgp, 418 struct rpcrdma_msg *rmsgp,
369 struct svc_rqst *rqstp, 419 struct svc_rqst *rqstp,
@@ -440,9 +490,14 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt,
440 head->arg.page_len += pad; 490 head->arg.page_len += pad;
441 head->arg.len += pad; 491 head->arg.len += pad;
442 head->arg.buflen += pad; 492 head->arg.buflen += pad;
493 page_offset += pad;
443 } 494 }
444 495
445 ret = 1; 496 ret = 1;
497 if (position && position < head->arg.head[0].iov_len)
498 ret = rdma_copy_tail(rqstp, head, position,
499 byte_count, page_offset, page_no);
500 head->arg.head[0].iov_len = position;
446 head->position = position; 501 head->position = position;
447 502
448 err: 503 err: