aboutsummaryrefslogtreecommitdiffstats
path: root/net/9p
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2011-08-16 01:20:10 -0400
committerEric Van Hensbergen <ericvh@gmail.com>2011-10-24 12:13:11 -0400
commitabfa034e4b8ed0046fa589769e9840af645bc4ba (patch)
tree146195e891a2fd63e3fc91f803547abb904b2087 /net/9p
parentc3b92c8787367a8bb53d57d9789b558f1295cc96 (diff)
fs/9p: Update zero-copy implementation in 9p
* remove lot of update to different data structure * add a seperate callback for zero copy request. * above makes non zero copy code path simpler * remove conditionalizing TREAD/TREADDIR/TWRITE in the zero copy path * Fix the dotu p9_check_errors with zero copy. Add sufficient doc around * Add support for both in and output buffers in zero copy callback * pin and unpin pages in the same context * use helpers instead of defining page offset and rest of page ourself * Fix mem leak in p9_check_errors * Remove 'E' and 'F' in p9pdu_vwritef Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Diffstat (limited to 'net/9p')
-rw-r--r--net/9p/client.c391
-rw-r--r--net/9p/protocol.c46
-rw-r--r--net/9p/protocol.h1
-rw-r--r--net/9p/trans_common.c53
-rw-r--r--net/9p/trans_common.h21
-rw-r--r--net/9p/trans_virtio.c319
6 files changed, 500 insertions, 331 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 0505a03c374c..305a4e719b03 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -203,11 +203,12 @@ free_and_return:
203 * 203 *
204 */ 204 */
205 205
206static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) 206static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag, int max_size)
207{ 207{
208 unsigned long flags; 208 unsigned long flags;
209 int row, col; 209 int row, col;
210 struct p9_req_t *req; 210 struct p9_req_t *req;
211 int alloc_msize = min(c->msize, max_size);
211 212
212 /* This looks up the original request by tag so we know which 213 /* This looks up the original request by tag so we know which
213 * buffer to read the data into */ 214 * buffer to read the data into */
@@ -245,23 +246,12 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
245 return ERR_PTR(-ENOMEM); 246 return ERR_PTR(-ENOMEM);
246 } 247 }
247 init_waitqueue_head(req->wq); 248 init_waitqueue_head(req->wq);
248 if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == 249 req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
249 P9_TRANS_PREF_PAYLOAD_SEP) { 250 GFP_NOFS);
250 int alloc_msize = min(c->msize, 4096); 251 req->tc->capacity = alloc_msize;
251 req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, 252 req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
252 GFP_NOFS); 253 GFP_NOFS);
253 req->tc->capacity = alloc_msize; 254 req->rc->capacity = alloc_msize;
254 req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
255 GFP_NOFS);
256 req->rc->capacity = alloc_msize;
257 } else {
258 req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
259 GFP_NOFS);
260 req->tc->capacity = c->msize;
261 req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
262 GFP_NOFS);
263 req->rc->capacity = c->msize;
264 }
265 if ((!req->tc) || (!req->rc)) { 255 if ((!req->tc) || (!req->rc)) {
266 printk(KERN_ERR "Couldn't grow tag array\n"); 256 printk(KERN_ERR "Couldn't grow tag array\n");
267 kfree(req->tc); 257 kfree(req->tc);
@@ -485,27 +475,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
485 475
486 if (!p9_is_proto_dotl(c)) { 476 if (!p9_is_proto_dotl(c)) {
487 char *ename; 477 char *ename;
488
489 if (req->tc->pbuf_size) {
490 /* Handle user buffers */
491 size_t len = req->rc->size - req->rc->offset;
492 if (req->tc->pubuf) {
493 /* User Buffer */
494 err = copy_from_user(
495 &req->rc->sdata[req->rc->offset],
496 req->tc->pubuf, len);
497 if (err) {
498 err = -EFAULT;
499 goto out_err;
500 }
501 } else {
502 /* Kernel Buffer */
503 memmove(&req->rc->sdata[req->rc->offset],
504 req->tc->pkbuf, len);
505 }
506 }
507 err = p9pdu_readf(req->rc, c->proto_version, "s?d", 478 err = p9pdu_readf(req->rc, c->proto_version, "s?d",
508 &ename, &ecode); 479 &ename, &ecode);
509 if (err) 480 if (err)
510 goto out_err; 481 goto out_err;
511 482
@@ -515,11 +486,10 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
515 if (!err || !IS_ERR_VALUE(err)) { 486 if (!err || !IS_ERR_VALUE(err)) {
516 err = p9_errstr2errno(ename, strlen(ename)); 487 err = p9_errstr2errno(ename, strlen(ename));
517 488
518 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, 489 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n",
519 ename); 490 -ecode, ename);
520
521 kfree(ename);
522 } 491 }
492 kfree(ename);
523 } else { 493 } else {
524 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); 494 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
525 err = -ecode; 495 err = -ecode;
@@ -527,7 +497,6 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
527 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); 497 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
528 } 498 }
529 499
530
531 return err; 500 return err;
532 501
533out_err: 502out_err:
@@ -536,6 +505,110 @@ out_err:
536 return err; 505 return err;
537} 506}
538 507
508/**
509 * p9_check_zc_errors - check 9p packet for error return and process it
510 * @c: current client instance
511 * @req: request to parse and check for error conditions
512 * @in_hdrlen: Size of response protocol buffer.
513 *
514 * returns error code if one is discovered, otherwise returns 0
515 *
516 * this will have to be more complicated if we have multiple
517 * error packet types
518 */
519
520static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
521 char *uidata, int in_hdrlen, int kern_buf)
522{
523 int err;
524 int ecode;
525 int8_t type;
526 char *ename = NULL;
527
528 err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
529 if (err) {
530 P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);
531 return err;
532 }
533
534 if (type != P9_RERROR && type != P9_RLERROR)
535 return 0;
536
537 if (!p9_is_proto_dotl(c)) {
538 /* Error is reported in string format */
539 uint16_t len;
540 /* 7 = header size for RERROR, 2 is the size of string len; */
541 int inline_len = in_hdrlen - (7 + 2);
542
543 /* Read the size of error string */
544 err = p9pdu_readf(req->rc, c->proto_version, "w", &len);
545 if (err)
546 goto out_err;
547
548 ename = kmalloc(len + 1, GFP_NOFS);
549 if (!ename) {
550 err = -ENOMEM;
551 goto out_err;
552 }
553 if (len <= inline_len) {
554 /* We have error in protocol buffer itself */
555 if (pdu_read(req->rc, ename, len)) {
556 err = -EFAULT;
557 goto out_free;
558
559 }
560 } else {
561 /*
562 * Part of the data is in user space buffer.
563 */
564 if (pdu_read(req->rc, ename, inline_len)) {
565 err = -EFAULT;
566 goto out_free;
567
568 }
569 if (kern_buf) {
570 memcpy(ename + inline_len, uidata,
571 len - inline_len);
572 } else {
573 err = copy_from_user(ename + inline_len,
574 uidata, len - inline_len);
575 if (err) {
576 err = -EFAULT;
577 goto out_free;
578 }
579 }
580 }
581 ename[len] = 0;
582 if (p9_is_proto_dotu(c)) {
583 /* For dotu we also have error code */
584 err = p9pdu_readf(req->rc,
585 c->proto_version, "d", &ecode);
586 if (err)
587 goto out_free;
588 err = -ecode;
589 }
590 if (!err || !IS_ERR_VALUE(err)) {
591 err = p9_errstr2errno(ename, strlen(ename));
592
593 P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n",
594 -ecode, ename);
595 }
596 kfree(ename);
597 } else {
598 err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
599 err = -ecode;
600
601 P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
602 }
603 return err;
604
605out_free:
606 kfree(ename);
607out_err:
608 P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
609 return err;
610}
611
539static struct p9_req_t * 612static struct p9_req_t *
540p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); 613p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
541 614
@@ -579,23 +652,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
579 return 0; 652 return 0;
580} 653}
581 654
582/** 655static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
583 * p9_client_rpc - issue a request and wait for a response 656 int8_t type, int req_size,
584 * @c: client session 657 const char *fmt, va_list ap)
585 * @type: type of request
586 * @fmt: protocol format string (see protocol.c)
587 *
588 * Returns request structure (which client must free using p9_free_req)
589 */
590
591static struct p9_req_t *
592p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
593{ 658{
594 va_list ap;
595 int tag, err; 659 int tag, err;
596 struct p9_req_t *req; 660 struct p9_req_t *req;
597 unsigned long flags;
598 int sigpending;
599 661
600 P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); 662 P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
601 663
@@ -607,12 +669,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
607 if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) 669 if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))
608 return ERR_PTR(-EIO); 670 return ERR_PTR(-EIO);
609 671
610 if (signal_pending(current)) {
611 sigpending = 1;
612 clear_thread_flag(TIF_SIGPENDING);
613 } else
614 sigpending = 0;
615
616 tag = P9_NOTAG; 672 tag = P9_NOTAG;
617 if (type != P9_TVERSION) { 673 if (type != P9_TVERSION) {
618 tag = p9_idpool_get(c->tagpool); 674 tag = p9_idpool_get(c->tagpool);
@@ -620,18 +676,50 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
620 return ERR_PTR(-ENOMEM); 676 return ERR_PTR(-ENOMEM);
621 } 677 }
622 678
623 req = p9_tag_alloc(c, tag); 679 req = p9_tag_alloc(c, tag, req_size);
624 if (IS_ERR(req)) 680 if (IS_ERR(req))
625 return req; 681 return req;
626 682
627 /* marshall the data */ 683 /* marshall the data */
628 p9pdu_prepare(req->tc, tag, type); 684 p9pdu_prepare(req->tc, tag, type);
629 va_start(ap, fmt);
630 err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); 685 err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap);
631 va_end(ap);
632 if (err) 686 if (err)
633 goto reterr; 687 goto reterr;
634 p9pdu_finalize(req->tc); 688 p9pdu_finalize(req->tc);
689 return req;
690reterr:
691 p9_free_req(c, req);
692 return ERR_PTR(err);
693}
694
695/**
696 * p9_client_rpc - issue a request and wait for a response
697 * @c: client session
698 * @type: type of request
699 * @fmt: protocol format string (see protocol.c)
700 *
701 * Returns request structure (which client must free using p9_free_req)
702 */
703
704static struct p9_req_t *
705p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
706{
707 va_list ap;
708 int sigpending, err;
709 unsigned long flags;
710 struct p9_req_t *req;
711
712 va_start(ap, fmt);
713 req = p9_client_prepare_req(c, type, c->msize, fmt, ap);
714 va_end(ap);
715 if (IS_ERR(req))
716 return req;
717
718 if (signal_pending(current)) {
719 sigpending = 1;
720 clear_thread_flag(TIF_SIGPENDING);
721 } else
722 sigpending = 0;
635 723
636 err = c->trans_mod->request(c, req); 724 err = c->trans_mod->request(c, req);
637 if (err < 0) { 725 if (err < 0) {
@@ -639,18 +727,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
639 c->status = Disconnected; 727 c->status = Disconnected;
640 goto reterr; 728 goto reterr;
641 } 729 }
642 730 /* Wait for the response */
643 P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
644 err = wait_event_interruptible(*req->wq, 731 err = wait_event_interruptible(*req->wq,
645 req->status >= REQ_STATUS_RCVD); 732 req->status >= REQ_STATUS_RCVD);
646 P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n",
647 req->wq, tag, err);
648 733
649 if (req->status == REQ_STATUS_ERROR) { 734 if (req->status == REQ_STATUS_ERROR) {
650 P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); 735 P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
651 err = req->t_err; 736 err = req->t_err;
652 } 737 }
653
654 if ((err == -ERESTARTSYS) && (c->status == Connected)) { 738 if ((err == -ERESTARTSYS) && (c->status == Connected)) {
655 P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); 739 P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
656 sigpending = 1; 740 sigpending = 1;
@@ -663,13 +747,11 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
663 if (req->status == REQ_STATUS_RCVD) 747 if (req->status == REQ_STATUS_RCVD)
664 err = 0; 748 err = 0;
665 } 749 }
666
667 if (sigpending) { 750 if (sigpending) {
668 spin_lock_irqsave(&current->sighand->siglock, flags); 751 spin_lock_irqsave(&current->sighand->siglock, flags);
669 recalc_sigpending(); 752 recalc_sigpending();
670 spin_unlock_irqrestore(&current->sighand->siglock, flags); 753 spin_unlock_irqrestore(&current->sighand->siglock, flags);
671 } 754 }
672
673 if (err < 0) 755 if (err < 0)
674 goto reterr; 756 goto reterr;
675 757
@@ -678,7 +760,92 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
678 P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); 760 P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type);
679 return req; 761 return req;
680 } 762 }
763reterr:
764 P9_DPRINTK(P9_DEBUG_MUX,
765 "exit: client %p op %d error: %d\n", c, type, err);
766 p9_free_req(c, req);
767 return ERR_PTR(err);
768}
769
770/**
771 * p9_client_zc_rpc - issue a request and wait for a response
772 * @c: client session
773 * @type: type of request
774 * @uidata: user bffer that should be ued for zero copy read
775 * @uodata: user buffer that shoud be user for zero copy write
776 * @inlen: read buffer size
777 * @olen: write buffer size
778 * @hdrlen: reader header size, This is the size of response protocol data
779 * @fmt: protocol format string (see protocol.c)
780 *
781 * Returns request structure (which client must free using p9_free_req)
782 */
783static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
784 char *uidata, char *uodata,
785 int inlen, int olen, int in_hdrlen,
786 int kern_buf, const char *fmt, ...)
787{
788 va_list ap;
789 int sigpending, err;
790 unsigned long flags;
791 struct p9_req_t *req;
792
793 va_start(ap, fmt);
794 /*
795 * We allocate a inline protocol data of only 4k bytes.
796 * The actual content is passed in zero-copy fashion.
797 */
798 req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap);
799 va_end(ap);
800 if (IS_ERR(req))
801 return req;
802
803 if (signal_pending(current)) {
804 sigpending = 1;
805 clear_thread_flag(TIF_SIGPENDING);
806 } else
807 sigpending = 0;
808
809 /* If we are called with KERNEL_DS force kern_buf */
810 if (segment_eq(get_fs(), KERNEL_DS))
811 kern_buf = 1;
812
813 err = c->trans_mod->zc_request(c, req, uidata, uodata,
814 inlen, olen, in_hdrlen, kern_buf);
815 if (err < 0) {
816 if (err == -EIO)
817 c->status = Disconnected;
818 goto reterr;
819 }
820 if (req->status == REQ_STATUS_ERROR) {
821 P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
822 err = req->t_err;
823 }
824 if ((err == -ERESTARTSYS) && (c->status == Connected)) {
825 P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
826 sigpending = 1;
827 clear_thread_flag(TIF_SIGPENDING);
681 828
829 if (c->trans_mod->cancel(c, req))
830 p9_client_flush(c, req);
831
832 /* if we received the response anyway, don't signal error */
833 if (req->status == REQ_STATUS_RCVD)
834 err = 0;
835 }
836 if (sigpending) {
837 spin_lock_irqsave(&current->sighand->siglock, flags);
838 recalc_sigpending();
839 spin_unlock_irqrestore(&current->sighand->siglock, flags);
840 }
841 if (err < 0)
842 goto reterr;
843
844 err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf);
845 if (!err) {
846 P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type);
847 return req;
848 }
682reterr: 849reterr:
683 P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, 850 P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type,
684 err); 851 err);
@@ -1330,13 +1497,15 @@ int
1330p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, 1497p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1331 u32 count) 1498 u32 count)
1332{ 1499{
1333 int err, rsize;
1334 struct p9_client *clnt;
1335 struct p9_req_t *req;
1336 char *dataptr; 1500 char *dataptr;
1501 int kernel_buf = 0;
1502 struct p9_req_t *req;
1503 struct p9_client *clnt;
1504 int err, rsize, non_zc = 0;
1505
1337 1506
1338 P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, 1507 P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
1339 (long long unsigned) offset, count); 1508 fid->fid, (long long unsigned) offset, count);
1340 err = 0; 1509 err = 0;
1341 clnt = fid->clnt; 1510 clnt = fid->clnt;
1342 1511
@@ -1348,13 +1517,24 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1348 rsize = count; 1517 rsize = count;
1349 1518
1350 /* Don't bother zerocopy for small IO (< 1024) */ 1519 /* Don't bother zerocopy for small IO (< 1024) */
1351 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == 1520 if (clnt->trans_mod->zc_request && rsize > 1024) {
1352 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { 1521 char *indata;
1353 req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, 1522 if (data) {
1354 rsize, data, udata); 1523 kernel_buf = 1;
1524 indata = data;
1525 } else
1526 indata = (char *)udata;
1527 /*
1528 * response header len is 11
1529 * PDU Header(7) + IO Size (4)
1530 */
1531 req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0,
1532 11, kernel_buf, "dqd", fid->fid,
1533 offset, rsize);
1355 } else { 1534 } else {
1535 non_zc = 1;
1356 req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, 1536 req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
1357 rsize); 1537 rsize);
1358 } 1538 }
1359 if (IS_ERR(req)) { 1539 if (IS_ERR(req)) {
1360 err = PTR_ERR(req); 1540 err = PTR_ERR(req);
@@ -1370,7 +1550,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
1370 P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); 1550 P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
1371 P9_DUMP_PKT(1, req->rc); 1551 P9_DUMP_PKT(1, req->rc);
1372 1552
1373 if (!req->tc->pbuf_size) { 1553 if (non_zc) {
1374 if (data) { 1554 if (data) {
1375 memmove(data, dataptr, count); 1555 memmove(data, dataptr, count);
1376 } else { 1556 } else {
@@ -1396,6 +1576,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
1396 u64 offset, u32 count) 1576 u64 offset, u32 count)
1397{ 1577{
1398 int err, rsize; 1578 int err, rsize;
1579 int kernel_buf = 0;
1399 struct p9_client *clnt; 1580 struct p9_client *clnt;
1400 struct p9_req_t *req; 1581 struct p9_req_t *req;
1401 1582
@@ -1411,19 +1592,24 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
1411 if (count < rsize) 1592 if (count < rsize)
1412 rsize = count; 1593 rsize = count;
1413 1594
1414 /* Don't bother zerocopy form small IO (< 1024) */ 1595 /* Don't bother zerocopy for small IO (< 1024) */
1415 if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == 1596 if (clnt->trans_mod->zc_request && rsize > 1024) {
1416 P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { 1597 char *odata;
1417 req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, 1598 if (data) {
1418 rsize, data, udata); 1599 kernel_buf = 1;
1600 odata = data;
1601 } else
1602 odata = (char *)udata;
1603 req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize,
1604 P9_ZC_HDR_SZ, kernel_buf, "dqd",
1605 fid->fid, offset, rsize);
1419 } else { 1606 } else {
1420
1421 if (data) 1607 if (data)
1422 req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, 1608 req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
1423 offset, rsize, data); 1609 offset, rsize, data);
1424 else 1610 else
1425 req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, 1611 req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
1426 offset, rsize, udata); 1612 offset, rsize, udata);
1427 } 1613 }
1428 if (IS_ERR(req)) { 1614 if (IS_ERR(req)) {
1429 err = PTR_ERR(req); 1615 err = PTR_ERR(req);
@@ -1824,7 +2010,7 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate);
1824 2010
1825int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) 2011int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1826{ 2012{
1827 int err, rsize; 2013 int err, rsize, non_zc = 0;
1828 struct p9_client *clnt; 2014 struct p9_client *clnt;
1829 struct p9_req_t *req; 2015 struct p9_req_t *req;
1830 char *dataptr; 2016 char *dataptr;
@@ -1842,13 +2028,18 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1842 if (count < rsize) 2028 if (count < rsize)
1843 rsize = count; 2029 rsize = count;
1844 2030
1845 if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == 2031 /* Don't bother zerocopy for small IO (< 1024) */
1846 P9_TRANS_PREF_PAYLOAD_SEP) { 2032 if (clnt->trans_mod->zc_request && rsize > 1024) {
1847 req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, 2033 /*
1848 offset, rsize, data); 2034 * response header len is 11
2035 * PDU Header(7) + IO Size (4)
2036 */
2037 req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0,
2038 11, 1, "dqd", fid->fid, offset, rsize);
1849 } else { 2039 } else {
2040 non_zc = 1;
1850 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, 2041 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
1851 offset, rsize); 2042 offset, rsize);
1852 } 2043 }
1853 if (IS_ERR(req)) { 2044 if (IS_ERR(req)) {
1854 err = PTR_ERR(req); 2045 err = PTR_ERR(req);
@@ -1863,7 +2054,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1863 2054
1864 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); 2055 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
1865 2056
1866 if (!req->tc->pbuf_size && data) 2057 if (non_zc)
1867 memmove(data, dataptr, count); 2058 memmove(data, dataptr, count);
1868 2059
1869 p9_free_req(clnt, req); 2060 p9_free_req(clnt, req);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index df58375ea6b3..b7d4e8aa5383 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -81,7 +81,7 @@ void p9stat_free(struct p9_wstat *stbuf)
81} 81}
82EXPORT_SYMBOL(p9stat_free); 82EXPORT_SYMBOL(p9stat_free);
83 83
84static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) 84size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
85{ 85{
86 size_t len = min(pdu->size - pdu->offset, size); 86 size_t len = min(pdu->size - pdu->offset, size);
87 memcpy(data, &pdu->sdata[pdu->offset], len); 87 memcpy(data, &pdu->sdata[pdu->offset], len);
@@ -108,26 +108,6 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
108 return size - len; 108 return size - len;
109} 109}
110 110
111static size_t
112pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
113 size_t size)
114{
115 BUG_ON(pdu->size > P9_IOHDRSZ);
116 pdu->pubuf = (char __user *)udata;
117 pdu->pkbuf = (char *)kdata;
118 pdu->pbuf_size = size;
119 return 0;
120}
121
122static size_t
123pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
124{
125 BUG_ON(pdu->size > P9_READDIRHDRSZ);
126 pdu->pkbuf = (char *)kdata;
127 pdu->pbuf_size = size;
128 return 0;
129}
130
131/* 111/*
132 b - int8_t 112 b - int8_t
133 w - int16_t 113 w - int16_t
@@ -459,26 +439,6 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
459 errcode = -EFAULT; 439 errcode = -EFAULT;
460 } 440 }
461 break; 441 break;
462 case 'E':{
463 int32_t cnt = va_arg(ap, int32_t);
464 const char *k = va_arg(ap, const void *);
465 const char __user *u = va_arg(ap,
466 const void __user *);
467 errcode = p9pdu_writef(pdu, proto_version, "d",
468 cnt);
469 if (!errcode && pdu_write_urw(pdu, k, u, cnt))
470 errcode = -EFAULT;
471 }
472 break;
473 case 'F':{
474 int32_t cnt = va_arg(ap, int32_t);
475 const char *k = va_arg(ap, const void *);
476 errcode = p9pdu_writef(pdu, proto_version, "d",
477 cnt);
478 if (!errcode && pdu_write_readdir(pdu, k, cnt))
479 errcode = -EFAULT;
480 }
481 break;
482 case 'U':{ 442 case 'U':{
483 int32_t count = va_arg(ap, int32_t); 443 int32_t count = va_arg(ap, int32_t);
484 const char __user *udata = 444 const char __user *udata =
@@ -637,10 +597,6 @@ void p9pdu_reset(struct p9_fcall *pdu)
637{ 597{
638 pdu->offset = 0; 598 pdu->offset = 0;
639 pdu->size = 0; 599 pdu->size = 0;
640 pdu->private = NULL;
641 pdu->pubuf = NULL;
642 pdu->pkbuf = NULL;
643 pdu->pbuf_size = 0;
644} 600}
645 601
646int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, 602int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
diff --git a/net/9p/protocol.h b/net/9p/protocol.h
index 2431c0f38d56..a0eb8ff11f22 100644
--- a/net/9p/protocol.h
+++ b/net/9p/protocol.h
@@ -32,3 +32,4 @@ int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type);
32int p9pdu_finalize(struct p9_fcall *pdu); 32int p9pdu_finalize(struct p9_fcall *pdu);
33void p9pdu_dump(int, struct p9_fcall *); 33void p9pdu_dump(int, struct p9_fcall *);
34void p9pdu_reset(struct p9_fcall *pdu); 34void p9pdu_reset(struct p9_fcall *pdu);
35size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size);
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index 9a70ebdec56e..de8df957867d 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -21,30 +21,25 @@
21 21
22/** 22/**
23 * p9_release_req_pages - Release pages after the transaction. 23 * p9_release_req_pages - Release pages after the transaction.
24 * @*private: PDU's private page of struct trans_rpage_info
25 */ 24 */
26void 25void p9_release_pages(struct page **pages, int nr_pages)
27p9_release_req_pages(struct trans_rpage_info *rpinfo)
28{ 26{
29 int i = 0; 27 int i = 0;
30 28 while (pages[i] && nr_pages--) {
31 while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { 29 put_page(pages[i]);
32 put_page(rpinfo->rp_data[i]);
33 i++; 30 i++;
34 } 31 }
35} 32}
36EXPORT_SYMBOL(p9_release_req_pages); 33EXPORT_SYMBOL(p9_release_pages);
37 34
38/** 35/**
39 * p9_nr_pages - Return number of pages needed to accommodate the payload. 36 * p9_nr_pages - Return number of pages needed to accommodate the payload.
40 */ 37 */
41int 38int p9_nr_pages(char *data, int len)
42p9_nr_pages(struct p9_req_t *req)
43{ 39{
44 unsigned long start_page, end_page; 40 unsigned long start_page, end_page;
45 start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT; 41 start_page = (unsigned long)data >> PAGE_SHIFT;
46 end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size + 42 end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
47 PAGE_SIZE - 1) >> PAGE_SHIFT;
48 return end_page - start_page; 43 return end_page - start_page;
49} 44}
50EXPORT_SYMBOL(p9_nr_pages); 45EXPORT_SYMBOL(p9_nr_pages);
@@ -58,35 +53,17 @@ EXPORT_SYMBOL(p9_nr_pages);
58 * @nr_pages: number of pages to accommodate the payload 53 * @nr_pages: number of pages to accommodate the payload
59 * @rw: Indicates if the pages are for read or write. 54 * @rw: Indicates if the pages are for read or write.
60 */ 55 */
61int
62p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
63 int nr_pages, u8 rw)
64{
65 uint32_t first_page_bytes = 0;
66 int32_t pdata_mapped_pages;
67 struct trans_rpage_info *rpinfo;
68
69 *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1);
70 56
71 if (*pdata_off) 57int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write)
72 first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), 58{
73 req->tc->pbuf_size); 59 int nr_mapped_pages;
74 60
75 rpinfo = req->tc->private; 61 nr_mapped_pages = get_user_pages_fast((unsigned long)data,
76 pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, 62 *nr_pages, write, pages);
77 nr_pages, rw, &rpinfo->rp_data[0]); 63 if (nr_mapped_pages <= 0)
78 if (pdata_mapped_pages <= 0) 64 return nr_mapped_pages;
79 return pdata_mapped_pages;
80 65
81 rpinfo->rp_nr_pages = pdata_mapped_pages; 66 *nr_pages = nr_mapped_pages;
82 if (*pdata_off) {
83 *pdata_len = first_page_bytes;
84 *pdata_len += min((req->tc->pbuf_size - *pdata_len),
85 ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
86 } else {
87 *pdata_len = min(req->tc->pbuf_size,
88 (size_t)pdata_mapped_pages << PAGE_SHIFT);
89 }
90 return 0; 67 return 0;
91} 68}
92EXPORT_SYMBOL(p9_payload_gup); 69EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
index 76309223bb02..173bb550a9eb 100644
--- a/net/9p/trans_common.h
+++ b/net/9p/trans_common.h
@@ -12,21 +12,6 @@
12 * 12 *
13 */ 13 */
14 14
15/* TRUE if it is user context */ 15void p9_release_pages(struct page **, int);
16#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) 16int p9_payload_gup(char *, int *, struct page **, int);
17 17int p9_nr_pages(char *, int);
18/**
19 * struct trans_rpage_info - To store mapped page information in PDU.
20 * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
21 * @rp_nr_pages: Number of mapped pages
22 * @rp_data: Array of page pointers
23 */
24struct trans_rpage_info {
25 u8 rp_alloc;
26 int rp_nr_pages;
27 struct page *rp_data[0];
28};
29
30void p9_release_req_pages(struct trans_rpage_info *);
31int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
32int p9_nr_pages(struct p9_req_t *);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e317583fcc73..32aa9834229c 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -150,12 +150,10 @@ static void req_done(struct virtqueue *vq)
150 while (1) { 150 while (1) {
151 spin_lock_irqsave(&chan->lock, flags); 151 spin_lock_irqsave(&chan->lock, flags);
152 rc = virtqueue_get_buf(chan->vq, &len); 152 rc = virtqueue_get_buf(chan->vq, &len);
153
154 if (rc == NULL) { 153 if (rc == NULL) {
155 spin_unlock_irqrestore(&chan->lock, flags); 154 spin_unlock_irqrestore(&chan->lock, flags);
156 break; 155 break;
157 } 156 }
158
159 chan->ring_bufs_avail = 1; 157 chan->ring_bufs_avail = 1;
160 spin_unlock_irqrestore(&chan->lock, flags); 158 spin_unlock_irqrestore(&chan->lock, flags);
161 /* Wakeup if anyone waiting for VirtIO ring space. */ 159 /* Wakeup if anyone waiting for VirtIO ring space. */
@@ -163,17 +161,6 @@ static void req_done(struct virtqueue *vq)
163 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); 161 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
164 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 162 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
165 req = p9_tag_lookup(chan->client, rc->tag); 163 req = p9_tag_lookup(chan->client, rc->tag);
166 if (req->tc->private) {
167 struct trans_rpage_info *rp = req->tc->private;
168 int p = rp->rp_nr_pages;
169 /*Release pages */
170 p9_release_req_pages(rp);
171 atomic_sub(p, &vp_pinned);
172 wake_up(&vp_wq);
173 if (rp->rp_alloc)
174 kfree(rp);
175 req->tc->private = NULL;
176 }
177 req->status = REQ_STATUS_RCVD; 164 req->status = REQ_STATUS_RCVD;
178 p9_client_cb(chan->client, req); 165 p9_client_cb(chan->client, req);
179 } 166 }
@@ -193,9 +180,8 @@ static void req_done(struct virtqueue *vq)
193 * 180 *
194 */ 181 */
195 182
196static int 183static int pack_sg_list(struct scatterlist *sg, int start,
197pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, 184 int limit, char *data, int count)
198 int count)
199{ 185{
200 int s; 186 int s;
201 int index = start; 187 int index = start;
@@ -224,31 +210,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
224 * this takes a list of pages. 210 * this takes a list of pages.
225 * @sg: scatter/gather list to pack into 211 * @sg: scatter/gather list to pack into
226 * @start: which segment of the sg_list to start at 212 * @start: which segment of the sg_list to start at
227 * @pdata_off: Offset into the first page
228 * @**pdata: a list of pages to add into sg. 213 * @**pdata: a list of pages to add into sg.
214 * @nr_pages: number of pages to pack into the scatter/gather list
215 * @data: data to pack into scatter/gather list
229 * @count: amount of data to pack into the scatter/gather list 216 * @count: amount of data to pack into the scatter/gather list
230 */ 217 */
231static int 218static int
232pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, 219pack_sg_list_p(struct scatterlist *sg, int start, int limit,
233 struct page **pdata, int count) 220 struct page **pdata, int nr_pages, char *data, int count)
234{ 221{
235 int s; 222 int i = 0, s;
236 int i = 0; 223 int data_off;
237 int index = start; 224 int index = start;
238 225
239 if (pdata_off) { 226 BUG_ON(nr_pages > (limit - start));
240 s = min((int)(PAGE_SIZE - pdata_off), count); 227 /*
241 sg_set_page(&sg[index++], pdata[i++], s, pdata_off); 228 * if the first page doesn't start at
242 count -= s; 229 * page boundary find the offset
243 } 230 */
244 231 data_off = offset_in_page(data);
245 while (count) { 232 while (nr_pages) {
246 BUG_ON(index > limit); 233 s = rest_of_page(data);
247 s = min((int)PAGE_SIZE, count); 234 if (s > count)
248 sg_set_page(&sg[index++], pdata[i++], s, 0); 235 s = count;
236 sg_set_page(&sg[index++], pdata[i++], s, data_off);
237 data_off = 0;
238 data += s;
249 count -= s; 239 count -= s;
240 nr_pages--;
250 } 241 }
251 return index-start; 242 return index - start;
252} 243}
253 244
254/** 245/**
@@ -261,114 +252,166 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
261static int 252static int
262p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 253p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
263{ 254{
264 int in, out, inp, outp; 255 int err;
265 struct virtio_chan *chan = client->trans; 256 int in, out;
266 unsigned long flags; 257 unsigned long flags;
267 size_t pdata_off = 0; 258 struct virtio_chan *chan = client->trans;
268 struct trans_rpage_info *rpinfo = NULL;
269 int err, pdata_len = 0;
270 259
271 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 260 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
272 261
273 req->status = REQ_STATUS_SENT; 262 req->status = REQ_STATUS_SENT;
263req_retry:
264 spin_lock_irqsave(&chan->lock, flags);
265
266 /* Handle out VirtIO ring buffers */
267 out = pack_sg_list(chan->sg, 0,
268 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
274 269
275 if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { 270 in = pack_sg_list(chan->sg, out,
276 int nr_pages = p9_nr_pages(req); 271 VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
277 int rpinfo_size = sizeof(struct trans_rpage_info) +
278 sizeof(struct page *) * nr_pages;
279 272
280 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { 273 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
281 err = wait_event_interruptible(vp_wq, 274 if (err < 0) {
282 atomic_read(&vp_pinned) < chan->p9_max_pages); 275 if (err == -ENOSPC) {
276 chan->ring_bufs_avail = 0;
277 spin_unlock_irqrestore(&chan->lock, flags);
278 err = wait_event_interruptible(*chan->vc_wq,
279 chan->ring_bufs_avail);
283 if (err == -ERESTARTSYS) 280 if (err == -ERESTARTSYS)
284 return err; 281 return err;
285 P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
286 }
287 282
288 if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { 283 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
289 /* We can use sdata */ 284 goto req_retry;
290 req->tc->private = req->tc->sdata + req->tc->size;
291 rpinfo = (struct trans_rpage_info *)req->tc->private;
292 rpinfo->rp_alloc = 0;
293 } else { 285 } else {
294 req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); 286 spin_unlock_irqrestore(&chan->lock, flags);
295 if (!req->tc->private) { 287 P9_DPRINTK(P9_DEBUG_TRANS,
296 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " 288 "9p debug: "
297 "private kmalloc returned NULL"); 289 "virtio rpc add_buf returned failure");
298 return -ENOMEM; 290 return -EIO;
299 }
300 rpinfo = (struct trans_rpage_info *)req->tc->private;
301 rpinfo->rp_alloc = 1;
302 } 291 }
292 }
293 virtqueue_kick(chan->vq);
294 spin_unlock_irqrestore(&chan->lock, flags);
303 295
304 err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, 296 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
305 req->tc->id == P9_TREAD ? 1 : 0); 297 return 0;
306 if (err < 0) { 298}
307 if (rpinfo->rp_alloc) 299
308 kfree(rpinfo); 300static int p9_get_mapped_pages(struct virtio_chan *chan,
301 struct page **pages, char *data,
302 int nr_pages, int write, int kern_buf)
303{
304 int err;
305 if (!kern_buf) {
306 /*
307 * We allow only p9_max_pages pinned. We wait for the
308 * Other zc request to finish here
309 */
310 if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
311 err = wait_event_interruptible(vp_wq,
312 (atomic_read(&vp_pinned) < chan->p9_max_pages));
313 if (err == -ERESTARTSYS)
314 return err;
315 }
316 err = p9_payload_gup(data, &nr_pages, pages, write);
317 if (err < 0)
309 return err; 318 return err;
310 } else { 319 atomic_add(nr_pages, &vp_pinned);
311 atomic_add(rpinfo->rp_nr_pages, &vp_pinned); 320 } else {
321 /* kernel buffer, no need to pin pages */
322 int s, index = 0;
323 int count = nr_pages;
324 while (nr_pages) {
325 s = rest_of_page(data);
326 pages[index++] = virt_to_page(data);
327 data += s;
328 nr_pages--;
312 } 329 }
330 nr_pages = count;
313 } 331 }
332 return nr_pages;
333}
314 334
315req_retry_pinned: 335/**
316 spin_lock_irqsave(&chan->lock, flags); 336 * p9_virtio_zc_request - issue a zero copy request
337 * @client: client instance issuing the request
338 * @req: request to be issued
339 * @uidata: user bffer that should be ued for zero copy read
340 * @uodata: user buffer that shoud be user for zero copy write
341 * @inlen: read buffer size
342 * @olen: write buffer size
343 * @hdrlen: reader header size, This is the size of response protocol data
344 *
345 */
346static int
347p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
348 char *uidata, char *uodata, int inlen,
349 int outlen, int in_hdr_len, int kern_buf)
350{
351 int in, out, err;
352 unsigned long flags;
353 int in_nr_pages = 0, out_nr_pages = 0;
354 struct page **in_pages = NULL, **out_pages = NULL;
355 struct virtio_chan *chan = client->trans;
317 356
318 /* Handle out VirtIO ring buffers */ 357 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
319 out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, 358
320 req->tc->size); 359 if (uodata) {
321 360 out_nr_pages = p9_nr_pages(uodata, outlen);
322 if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { 361 out_pages = kmalloc(sizeof(struct page *) * out_nr_pages,
323 /* We have additional write payload buffer to take care */ 362 GFP_NOFS);
324 if (req->tc->pubuf && P9_IS_USER_CONTEXT) { 363 if (!out_pages) {
325 outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, 364 err = -ENOMEM;
326 pdata_off, rpinfo->rp_data, pdata_len); 365 goto err_out;
327 } else { 366 }
328 char *pbuf; 367 out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata,
329 if (req->tc->pubuf) 368 out_nr_pages, 0, kern_buf);
330 pbuf = (__force char *) req->tc->pubuf; 369 if (out_nr_pages < 0) {
331 else 370 err = out_nr_pages;
332 pbuf = req->tc->pkbuf; 371 kfree(out_pages);
333 outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, 372 out_pages = NULL;
334 req->tc->pbuf_size); 373 goto err_out;
335 } 374 }
336 out += outp;
337 } 375 }
338 376 if (uidata) {
339 /* Handle in VirtIO ring buffers */ 377 in_nr_pages = p9_nr_pages(uidata, inlen);
340 if (req->tc->pbuf_size && 378 in_pages = kmalloc(sizeof(struct page *) * in_nr_pages,
341 ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { 379 GFP_NOFS);
342 /* 380 if (!in_pages) {
343 * Take care of additional Read payload. 381 err = -ENOMEM;
344 * 11 is the read/write header = PDU Header(7) + IO Size (4). 382 goto err_out;
345 * Arrange in such a way that server places header in the 383 }
346 * alloced memory and payload onto the user buffer. 384 in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata,
347 */ 385 in_nr_pages, 1, kern_buf);
348 inp = pack_sg_list(chan->sg, out, 386 if (in_nr_pages < 0) {
349 VIRTQUEUE_NUM, req->rc->sdata, 11); 387 err = in_nr_pages;
350 /* 388 kfree(in_pages);
351 * Running executables in the filesystem may result in 389 in_pages = NULL;
352 * a read request with kernel buffer as opposed to user buffer. 390 goto err_out;
353 */
354 if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
355 in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
356 pdata_off, rpinfo->rp_data, pdata_len);
357 } else {
358 char *pbuf;
359 if (req->tc->pubuf)
360 pbuf = (__force char *) req->tc->pubuf;
361 else
362 pbuf = req->tc->pkbuf;
363
364 in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
365 pbuf, req->tc->pbuf_size);
366 } 391 }
367 in += inp;
368 } else {
369 in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM,
370 req->rc->sdata, req->rc->capacity);
371 } 392 }
393 req->status = REQ_STATUS_SENT;
394req_retry_pinned:
395 spin_lock_irqsave(&chan->lock, flags);
396 /* out data */
397 out = pack_sg_list(chan->sg, 0,
398 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
399
400 if (out_pages)
401 out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
402 out_pages, out_nr_pages, uodata, outlen);
403 /*
404 * Take care of in data
405 * For example TREAD have 11.
406 * 11 is the read/write header = PDU Header(7) + IO Size (4).
407 * Arrange in such a way that server places header in the
408 * alloced memory and payload onto the user buffer.
409 */
410 in = pack_sg_list(chan->sg, out,
411 VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
412 if (in_pages)
413 in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
414 in_pages, in_nr_pages, uidata, inlen);
372 415
373 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); 416 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
374 if (err < 0) { 417 if (err < 0) {
@@ -376,28 +419,45 @@ req_retry_pinned:
376 chan->ring_bufs_avail = 0; 419 chan->ring_bufs_avail = 0;
377 spin_unlock_irqrestore(&chan->lock, flags); 420 spin_unlock_irqrestore(&chan->lock, flags);
378 err = wait_event_interruptible(*chan->vc_wq, 421 err = wait_event_interruptible(*chan->vc_wq,
379 chan->ring_bufs_avail); 422 chan->ring_bufs_avail);
380 if (err == -ERESTARTSYS) 423 if (err == -ERESTARTSYS)
381 return err; 424 goto err_out;
382 425
383 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); 426 P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
384 goto req_retry_pinned; 427 goto req_retry_pinned;
385 } else { 428 } else {
386 spin_unlock_irqrestore(&chan->lock, flags); 429 spin_unlock_irqrestore(&chan->lock, flags);
387 P9_DPRINTK(P9_DEBUG_TRANS, 430 P9_DPRINTK(P9_DEBUG_TRANS,
388 "9p debug: " 431 "9p debug: "
389 "virtio rpc add_buf returned failure"); 432 "virtio rpc add_buf returned failure");
390 if (rpinfo && rpinfo->rp_alloc) 433 err = -EIO;
391 kfree(rpinfo); 434 goto err_out;
392 return -EIO;
393 } 435 }
394 } 436 }
395
396 virtqueue_kick(chan->vq); 437 virtqueue_kick(chan->vq);
397 spin_unlock_irqrestore(&chan->lock, flags); 438 spin_unlock_irqrestore(&chan->lock, flags);
398
399 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); 439 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
400 return 0; 440 err = wait_event_interruptible(*req->wq,
441 req->status >= REQ_STATUS_RCVD);
442 /*
443 * Non kernel buffers are pinned, unpin them
444 */
445err_out:
446 if (!kern_buf) {
447 if (in_pages) {
448 p9_release_pages(in_pages, in_nr_pages);
449 atomic_sub(in_nr_pages, &vp_pinned);
450 }
451 if (out_pages) {
452 p9_release_pages(out_pages, out_nr_pages);
453 atomic_sub(out_nr_pages, &vp_pinned);
454 }
455 /* wakeup anybody waiting for slots to pin pages */
456 wake_up(&vp_wq);
457 }
458 kfree(in_pages);
459 kfree(out_pages);
460 return err;
401} 461}
402 462
403static ssize_t p9_mount_tag_show(struct device *dev, 463static ssize_t p9_mount_tag_show(struct device *dev,
@@ -591,8 +651,8 @@ static struct p9_trans_module p9_virtio_trans = {
591 .create = p9_virtio_create, 651 .create = p9_virtio_create,
592 .close = p9_virtio_close, 652 .close = p9_virtio_close,
593 .request = p9_virtio_request, 653 .request = p9_virtio_request,
654 .zc_request = p9_virtio_zc_request,
594 .cancel = p9_virtio_cancel, 655 .cancel = p9_virtio_cancel,
595
596 /* 656 /*
597 * We leave one entry for input and one entry for response 657 * We leave one entry for input and one entry for response
598 * headers. We also skip one more entry to accomodate, address 658 * headers. We also skip one more entry to accomodate, address
@@ -600,7 +660,6 @@ static struct p9_trans_module p9_virtio_trans = {
600 * page in zero copy. 660 * page in zero copy.
601 */ 661 */
602 .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), 662 .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
603 .pref = P9_TRANS_PREF_PAYLOAD_SEP,
604 .def = 0, 663 .def = 0,
605 .owner = THIS_MODULE, 664 .owner = THIS_MODULE,
606}; 665};