diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2011-08-16 01:20:10 -0400 |
---|---|---|
committer | Eric Van Hensbergen <ericvh@gmail.com> | 2011-10-24 12:13:11 -0400 |
commit | abfa034e4b8ed0046fa589769e9840af645bc4ba (patch) | |
tree | 146195e891a2fd63e3fc91f803547abb904b2087 /net/9p | |
parent | c3b92c8787367a8bb53d57d9789b558f1295cc96 (diff) |
fs/9p: Update zero-copy implementation in 9p
* remove lot of update to different data structure
* add a seperate callback for zero copy request.
* above makes non zero copy code path simpler
* remove conditionalizing TREAD/TREADDIR/TWRITE in the zero copy path
* Fix the dotu p9_check_errors with zero copy. Add sufficient doc around
* Add support for both in and output buffers in zero copy callback
* pin and unpin pages in the same context
* use helpers instead of defining page offset and rest of page ourself
* Fix mem leak in p9_check_errors
* Remove 'E' and 'F' in p9pdu_vwritef
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Diffstat (limited to 'net/9p')
-rw-r--r-- | net/9p/client.c | 391 | ||||
-rw-r--r-- | net/9p/protocol.c | 46 | ||||
-rw-r--r-- | net/9p/protocol.h | 1 | ||||
-rw-r--r-- | net/9p/trans_common.c | 53 | ||||
-rw-r--r-- | net/9p/trans_common.h | 21 | ||||
-rw-r--r-- | net/9p/trans_virtio.c | 319 |
6 files changed, 500 insertions, 331 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 0505a03c374c..305a4e719b03 100644 --- a/net/9p/client.c +++ b/net/9p/client.c | |||
@@ -203,11 +203,12 @@ free_and_return: | |||
203 | * | 203 | * |
204 | */ | 204 | */ |
205 | 205 | ||
206 | static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) | 206 | static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag, int max_size) |
207 | { | 207 | { |
208 | unsigned long flags; | 208 | unsigned long flags; |
209 | int row, col; | 209 | int row, col; |
210 | struct p9_req_t *req; | 210 | struct p9_req_t *req; |
211 | int alloc_msize = min(c->msize, max_size); | ||
211 | 212 | ||
212 | /* This looks up the original request by tag so we know which | 213 | /* This looks up the original request by tag so we know which |
213 | * buffer to read the data into */ | 214 | * buffer to read the data into */ |
@@ -245,23 +246,12 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) | |||
245 | return ERR_PTR(-ENOMEM); | 246 | return ERR_PTR(-ENOMEM); |
246 | } | 247 | } |
247 | init_waitqueue_head(req->wq); | 248 | init_waitqueue_head(req->wq); |
248 | if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == | 249 | req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, |
249 | P9_TRANS_PREF_PAYLOAD_SEP) { | 250 | GFP_NOFS); |
250 | int alloc_msize = min(c->msize, 4096); | 251 | req->tc->capacity = alloc_msize; |
251 | req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, | 252 | req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, |
252 | GFP_NOFS); | 253 | GFP_NOFS); |
253 | req->tc->capacity = alloc_msize; | 254 | req->rc->capacity = alloc_msize; |
254 | req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize, | ||
255 | GFP_NOFS); | ||
256 | req->rc->capacity = alloc_msize; | ||
257 | } else { | ||
258 | req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, | ||
259 | GFP_NOFS); | ||
260 | req->tc->capacity = c->msize; | ||
261 | req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, | ||
262 | GFP_NOFS); | ||
263 | req->rc->capacity = c->msize; | ||
264 | } | ||
265 | if ((!req->tc) || (!req->rc)) { | 255 | if ((!req->tc) || (!req->rc)) { |
266 | printk(KERN_ERR "Couldn't grow tag array\n"); | 256 | printk(KERN_ERR "Couldn't grow tag array\n"); |
267 | kfree(req->tc); | 257 | kfree(req->tc); |
@@ -485,27 +475,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) | |||
485 | 475 | ||
486 | if (!p9_is_proto_dotl(c)) { | 476 | if (!p9_is_proto_dotl(c)) { |
487 | char *ename; | 477 | char *ename; |
488 | |||
489 | if (req->tc->pbuf_size) { | ||
490 | /* Handle user buffers */ | ||
491 | size_t len = req->rc->size - req->rc->offset; | ||
492 | if (req->tc->pubuf) { | ||
493 | /* User Buffer */ | ||
494 | err = copy_from_user( | ||
495 | &req->rc->sdata[req->rc->offset], | ||
496 | req->tc->pubuf, len); | ||
497 | if (err) { | ||
498 | err = -EFAULT; | ||
499 | goto out_err; | ||
500 | } | ||
501 | } else { | ||
502 | /* Kernel Buffer */ | ||
503 | memmove(&req->rc->sdata[req->rc->offset], | ||
504 | req->tc->pkbuf, len); | ||
505 | } | ||
506 | } | ||
507 | err = p9pdu_readf(req->rc, c->proto_version, "s?d", | 478 | err = p9pdu_readf(req->rc, c->proto_version, "s?d", |
508 | &ename, &ecode); | 479 | &ename, &ecode); |
509 | if (err) | 480 | if (err) |
510 | goto out_err; | 481 | goto out_err; |
511 | 482 | ||
@@ -515,11 +486,10 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) | |||
515 | if (!err || !IS_ERR_VALUE(err)) { | 486 | if (!err || !IS_ERR_VALUE(err)) { |
516 | err = p9_errstr2errno(ename, strlen(ename)); | 487 | err = p9_errstr2errno(ename, strlen(ename)); |
517 | 488 | ||
518 | P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, | 489 | P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", |
519 | ename); | 490 | -ecode, ename); |
520 | |||
521 | kfree(ename); | ||
522 | } | 491 | } |
492 | kfree(ename); | ||
523 | } else { | 493 | } else { |
524 | err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); | 494 | err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); |
525 | err = -ecode; | 495 | err = -ecode; |
@@ -527,7 +497,6 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) | |||
527 | P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); | 497 | P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); |
528 | } | 498 | } |
529 | 499 | ||
530 | |||
531 | return err; | 500 | return err; |
532 | 501 | ||
533 | out_err: | 502 | out_err: |
@@ -536,6 +505,110 @@ out_err: | |||
536 | return err; | 505 | return err; |
537 | } | 506 | } |
538 | 507 | ||
508 | /** | ||
509 | * p9_check_zc_errors - check 9p packet for error return and process it | ||
510 | * @c: current client instance | ||
511 | * @req: request to parse and check for error conditions | ||
512 | * @in_hdrlen: Size of response protocol buffer. | ||
513 | * | ||
514 | * returns error code if one is discovered, otherwise returns 0 | ||
515 | * | ||
516 | * this will have to be more complicated if we have multiple | ||
517 | * error packet types | ||
518 | */ | ||
519 | |||
520 | static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, | ||
521 | char *uidata, int in_hdrlen, int kern_buf) | ||
522 | { | ||
523 | int err; | ||
524 | int ecode; | ||
525 | int8_t type; | ||
526 | char *ename = NULL; | ||
527 | |||
528 | err = p9_parse_header(req->rc, NULL, &type, NULL, 0); | ||
529 | if (err) { | ||
530 | P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); | ||
531 | return err; | ||
532 | } | ||
533 | |||
534 | if (type != P9_RERROR && type != P9_RLERROR) | ||
535 | return 0; | ||
536 | |||
537 | if (!p9_is_proto_dotl(c)) { | ||
538 | /* Error is reported in string format */ | ||
539 | uint16_t len; | ||
540 | /* 7 = header size for RERROR, 2 is the size of string len; */ | ||
541 | int inline_len = in_hdrlen - (7 + 2); | ||
542 | |||
543 | /* Read the size of error string */ | ||
544 | err = p9pdu_readf(req->rc, c->proto_version, "w", &len); | ||
545 | if (err) | ||
546 | goto out_err; | ||
547 | |||
548 | ename = kmalloc(len + 1, GFP_NOFS); | ||
549 | if (!ename) { | ||
550 | err = -ENOMEM; | ||
551 | goto out_err; | ||
552 | } | ||
553 | if (len <= inline_len) { | ||
554 | /* We have error in protocol buffer itself */ | ||
555 | if (pdu_read(req->rc, ename, len)) { | ||
556 | err = -EFAULT; | ||
557 | goto out_free; | ||
558 | |||
559 | } | ||
560 | } else { | ||
561 | /* | ||
562 | * Part of the data is in user space buffer. | ||
563 | */ | ||
564 | if (pdu_read(req->rc, ename, inline_len)) { | ||
565 | err = -EFAULT; | ||
566 | goto out_free; | ||
567 | |||
568 | } | ||
569 | if (kern_buf) { | ||
570 | memcpy(ename + inline_len, uidata, | ||
571 | len - inline_len); | ||
572 | } else { | ||
573 | err = copy_from_user(ename + inline_len, | ||
574 | uidata, len - inline_len); | ||
575 | if (err) { | ||
576 | err = -EFAULT; | ||
577 | goto out_free; | ||
578 | } | ||
579 | } | ||
580 | } | ||
581 | ename[len] = 0; | ||
582 | if (p9_is_proto_dotu(c)) { | ||
583 | /* For dotu we also have error code */ | ||
584 | err = p9pdu_readf(req->rc, | ||
585 | c->proto_version, "d", &ecode); | ||
586 | if (err) | ||
587 | goto out_free; | ||
588 | err = -ecode; | ||
589 | } | ||
590 | if (!err || !IS_ERR_VALUE(err)) { | ||
591 | err = p9_errstr2errno(ename, strlen(ename)); | ||
592 | |||
593 | P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", | ||
594 | -ecode, ename); | ||
595 | } | ||
596 | kfree(ename); | ||
597 | } else { | ||
598 | err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); | ||
599 | err = -ecode; | ||
600 | |||
601 | P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); | ||
602 | } | ||
603 | return err; | ||
604 | |||
605 | out_free: | ||
606 | kfree(ename); | ||
607 | out_err: | ||
608 | P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); | ||
609 | return err; | ||
610 | } | ||
611 | |||
539 | static struct p9_req_t * | 612 | static struct p9_req_t * |
540 | p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); | 613 | p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); |
541 | 614 | ||
@@ -579,23 +652,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) | |||
579 | return 0; | 652 | return 0; |
580 | } | 653 | } |
581 | 654 | ||
582 | /** | 655 | static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, |
583 | * p9_client_rpc - issue a request and wait for a response | 656 | int8_t type, int req_size, |
584 | * @c: client session | 657 | const char *fmt, va_list ap) |
585 | * @type: type of request | ||
586 | * @fmt: protocol format string (see protocol.c) | ||
587 | * | ||
588 | * Returns request structure (which client must free using p9_free_req) | ||
589 | */ | ||
590 | |||
591 | static struct p9_req_t * | ||
592 | p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) | ||
593 | { | 658 | { |
594 | va_list ap; | ||
595 | int tag, err; | 659 | int tag, err; |
596 | struct p9_req_t *req; | 660 | struct p9_req_t *req; |
597 | unsigned long flags; | ||
598 | int sigpending; | ||
599 | 661 | ||
600 | P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); | 662 | P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); |
601 | 663 | ||
@@ -607,12 +669,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) | |||
607 | if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) | 669 | if ((c->status == BeginDisconnect) && (type != P9_TCLUNK)) |
608 | return ERR_PTR(-EIO); | 670 | return ERR_PTR(-EIO); |
609 | 671 | ||
610 | if (signal_pending(current)) { | ||
611 | sigpending = 1; | ||
612 | clear_thread_flag(TIF_SIGPENDING); | ||
613 | } else | ||
614 | sigpending = 0; | ||
615 | |||
616 | tag = P9_NOTAG; | 672 | tag = P9_NOTAG; |
617 | if (type != P9_TVERSION) { | 673 | if (type != P9_TVERSION) { |
618 | tag = p9_idpool_get(c->tagpool); | 674 | tag = p9_idpool_get(c->tagpool); |
@@ -620,18 +676,50 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) | |||
620 | return ERR_PTR(-ENOMEM); | 676 | return ERR_PTR(-ENOMEM); |
621 | } | 677 | } |
622 | 678 | ||
623 | req = p9_tag_alloc(c, tag); | 679 | req = p9_tag_alloc(c, tag, req_size); |
624 | if (IS_ERR(req)) | 680 | if (IS_ERR(req)) |
625 | return req; | 681 | return req; |
626 | 682 | ||
627 | /* marshall the data */ | 683 | /* marshall the data */ |
628 | p9pdu_prepare(req->tc, tag, type); | 684 | p9pdu_prepare(req->tc, tag, type); |
629 | va_start(ap, fmt); | ||
630 | err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); | 685 | err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); |
631 | va_end(ap); | ||
632 | if (err) | 686 | if (err) |
633 | goto reterr; | 687 | goto reterr; |
634 | p9pdu_finalize(req->tc); | 688 | p9pdu_finalize(req->tc); |
689 | return req; | ||
690 | reterr: | ||
691 | p9_free_req(c, req); | ||
692 | return ERR_PTR(err); | ||
693 | } | ||
694 | |||
695 | /** | ||
696 | * p9_client_rpc - issue a request and wait for a response | ||
697 | * @c: client session | ||
698 | * @type: type of request | ||
699 | * @fmt: protocol format string (see protocol.c) | ||
700 | * | ||
701 | * Returns request structure (which client must free using p9_free_req) | ||
702 | */ | ||
703 | |||
704 | static struct p9_req_t * | ||
705 | p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) | ||
706 | { | ||
707 | va_list ap; | ||
708 | int sigpending, err; | ||
709 | unsigned long flags; | ||
710 | struct p9_req_t *req; | ||
711 | |||
712 | va_start(ap, fmt); | ||
713 | req = p9_client_prepare_req(c, type, c->msize, fmt, ap); | ||
714 | va_end(ap); | ||
715 | if (IS_ERR(req)) | ||
716 | return req; | ||
717 | |||
718 | if (signal_pending(current)) { | ||
719 | sigpending = 1; | ||
720 | clear_thread_flag(TIF_SIGPENDING); | ||
721 | } else | ||
722 | sigpending = 0; | ||
635 | 723 | ||
636 | err = c->trans_mod->request(c, req); | 724 | err = c->trans_mod->request(c, req); |
637 | if (err < 0) { | 725 | if (err < 0) { |
@@ -639,18 +727,14 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) | |||
639 | c->status = Disconnected; | 727 | c->status = Disconnected; |
640 | goto reterr; | 728 | goto reterr; |
641 | } | 729 | } |
642 | 730 | /* Wait for the response */ | |
643 | P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); | ||
644 | err = wait_event_interruptible(*req->wq, | 731 | err = wait_event_interruptible(*req->wq, |
645 | req->status >= REQ_STATUS_RCVD); | 732 | req->status >= REQ_STATUS_RCVD); |
646 | P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", | ||
647 | req->wq, tag, err); | ||
648 | 733 | ||
649 | if (req->status == REQ_STATUS_ERROR) { | 734 | if (req->status == REQ_STATUS_ERROR) { |
650 | P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); | 735 | P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); |
651 | err = req->t_err; | 736 | err = req->t_err; |
652 | } | 737 | } |
653 | |||
654 | if ((err == -ERESTARTSYS) && (c->status == Connected)) { | 738 | if ((err == -ERESTARTSYS) && (c->status == Connected)) { |
655 | P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); | 739 | P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); |
656 | sigpending = 1; | 740 | sigpending = 1; |
@@ -663,13 +747,11 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) | |||
663 | if (req->status == REQ_STATUS_RCVD) | 747 | if (req->status == REQ_STATUS_RCVD) |
664 | err = 0; | 748 | err = 0; |
665 | } | 749 | } |
666 | |||
667 | if (sigpending) { | 750 | if (sigpending) { |
668 | spin_lock_irqsave(¤t->sighand->siglock, flags); | 751 | spin_lock_irqsave(¤t->sighand->siglock, flags); |
669 | recalc_sigpending(); | 752 | recalc_sigpending(); |
670 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | 753 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); |
671 | } | 754 | } |
672 | |||
673 | if (err < 0) | 755 | if (err < 0) |
674 | goto reterr; | 756 | goto reterr; |
675 | 757 | ||
@@ -678,7 +760,92 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) | |||
678 | P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); | 760 | P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); |
679 | return req; | 761 | return req; |
680 | } | 762 | } |
763 | reterr: | ||
764 | P9_DPRINTK(P9_DEBUG_MUX, | ||
765 | "exit: client %p op %d error: %d\n", c, type, err); | ||
766 | p9_free_req(c, req); | ||
767 | return ERR_PTR(err); | ||
768 | } | ||
769 | |||
770 | /** | ||
771 | * p9_client_zc_rpc - issue a request and wait for a response | ||
772 | * @c: client session | ||
773 | * @type: type of request | ||
774 | * @uidata: user bffer that should be ued for zero copy read | ||
775 | * @uodata: user buffer that shoud be user for zero copy write | ||
776 | * @inlen: read buffer size | ||
777 | * @olen: write buffer size | ||
778 | * @hdrlen: reader header size, This is the size of response protocol data | ||
779 | * @fmt: protocol format string (see protocol.c) | ||
780 | * | ||
781 | * Returns request structure (which client must free using p9_free_req) | ||
782 | */ | ||
783 | static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, | ||
784 | char *uidata, char *uodata, | ||
785 | int inlen, int olen, int in_hdrlen, | ||
786 | int kern_buf, const char *fmt, ...) | ||
787 | { | ||
788 | va_list ap; | ||
789 | int sigpending, err; | ||
790 | unsigned long flags; | ||
791 | struct p9_req_t *req; | ||
792 | |||
793 | va_start(ap, fmt); | ||
794 | /* | ||
795 | * We allocate a inline protocol data of only 4k bytes. | ||
796 | * The actual content is passed in zero-copy fashion. | ||
797 | */ | ||
798 | req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); | ||
799 | va_end(ap); | ||
800 | if (IS_ERR(req)) | ||
801 | return req; | ||
802 | |||
803 | if (signal_pending(current)) { | ||
804 | sigpending = 1; | ||
805 | clear_thread_flag(TIF_SIGPENDING); | ||
806 | } else | ||
807 | sigpending = 0; | ||
808 | |||
809 | /* If we are called with KERNEL_DS force kern_buf */ | ||
810 | if (segment_eq(get_fs(), KERNEL_DS)) | ||
811 | kern_buf = 1; | ||
812 | |||
813 | err = c->trans_mod->zc_request(c, req, uidata, uodata, | ||
814 | inlen, olen, in_hdrlen, kern_buf); | ||
815 | if (err < 0) { | ||
816 | if (err == -EIO) | ||
817 | c->status = Disconnected; | ||
818 | goto reterr; | ||
819 | } | ||
820 | if (req->status == REQ_STATUS_ERROR) { | ||
821 | P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); | ||
822 | err = req->t_err; | ||
823 | } | ||
824 | if ((err == -ERESTARTSYS) && (c->status == Connected)) { | ||
825 | P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); | ||
826 | sigpending = 1; | ||
827 | clear_thread_flag(TIF_SIGPENDING); | ||
681 | 828 | ||
829 | if (c->trans_mod->cancel(c, req)) | ||
830 | p9_client_flush(c, req); | ||
831 | |||
832 | /* if we received the response anyway, don't signal error */ | ||
833 | if (req->status == REQ_STATUS_RCVD) | ||
834 | err = 0; | ||
835 | } | ||
836 | if (sigpending) { | ||
837 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
838 | recalc_sigpending(); | ||
839 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
840 | } | ||
841 | if (err < 0) | ||
842 | goto reterr; | ||
843 | |||
844 | err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); | ||
845 | if (!err) { | ||
846 | P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); | ||
847 | return req; | ||
848 | } | ||
682 | reterr: | 849 | reterr: |
683 | P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, | 850 | P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, |
684 | err); | 851 | err); |
@@ -1330,13 +1497,15 @@ int | |||
1330 | p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, | 1497 | p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, |
1331 | u32 count) | 1498 | u32 count) |
1332 | { | 1499 | { |
1333 | int err, rsize; | ||
1334 | struct p9_client *clnt; | ||
1335 | struct p9_req_t *req; | ||
1336 | char *dataptr; | 1500 | char *dataptr; |
1501 | int kernel_buf = 0; | ||
1502 | struct p9_req_t *req; | ||
1503 | struct p9_client *clnt; | ||
1504 | int err, rsize, non_zc = 0; | ||
1505 | |||
1337 | 1506 | ||
1338 | P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, | 1507 | P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", |
1339 | (long long unsigned) offset, count); | 1508 | fid->fid, (long long unsigned) offset, count); |
1340 | err = 0; | 1509 | err = 0; |
1341 | clnt = fid->clnt; | 1510 | clnt = fid->clnt; |
1342 | 1511 | ||
@@ -1348,13 +1517,24 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, | |||
1348 | rsize = count; | 1517 | rsize = count; |
1349 | 1518 | ||
1350 | /* Don't bother zerocopy for small IO (< 1024) */ | 1519 | /* Don't bother zerocopy for small IO (< 1024) */ |
1351 | if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == | 1520 | if (clnt->trans_mod->zc_request && rsize > 1024) { |
1352 | P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { | 1521 | char *indata; |
1353 | req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset, | 1522 | if (data) { |
1354 | rsize, data, udata); | 1523 | kernel_buf = 1; |
1524 | indata = data; | ||
1525 | } else | ||
1526 | indata = (char *)udata; | ||
1527 | /* | ||
1528 | * response header len is 11 | ||
1529 | * PDU Header(7) + IO Size (4) | ||
1530 | */ | ||
1531 | req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, | ||
1532 | 11, kernel_buf, "dqd", fid->fid, | ||
1533 | offset, rsize); | ||
1355 | } else { | 1534 | } else { |
1535 | non_zc = 1; | ||
1356 | req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, | 1536 | req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, |
1357 | rsize); | 1537 | rsize); |
1358 | } | 1538 | } |
1359 | if (IS_ERR(req)) { | 1539 | if (IS_ERR(req)) { |
1360 | err = PTR_ERR(req); | 1540 | err = PTR_ERR(req); |
@@ -1370,7 +1550,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, | |||
1370 | P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); | 1550 | P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); |
1371 | P9_DUMP_PKT(1, req->rc); | 1551 | P9_DUMP_PKT(1, req->rc); |
1372 | 1552 | ||
1373 | if (!req->tc->pbuf_size) { | 1553 | if (non_zc) { |
1374 | if (data) { | 1554 | if (data) { |
1375 | memmove(data, dataptr, count); | 1555 | memmove(data, dataptr, count); |
1376 | } else { | 1556 | } else { |
@@ -1396,6 +1576,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, | |||
1396 | u64 offset, u32 count) | 1576 | u64 offset, u32 count) |
1397 | { | 1577 | { |
1398 | int err, rsize; | 1578 | int err, rsize; |
1579 | int kernel_buf = 0; | ||
1399 | struct p9_client *clnt; | 1580 | struct p9_client *clnt; |
1400 | struct p9_req_t *req; | 1581 | struct p9_req_t *req; |
1401 | 1582 | ||
@@ -1411,19 +1592,24 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, | |||
1411 | if (count < rsize) | 1592 | if (count < rsize) |
1412 | rsize = count; | 1593 | rsize = count; |
1413 | 1594 | ||
1414 | /* Don't bother zerocopy form small IO (< 1024) */ | 1595 | /* Don't bother zerocopy for small IO (< 1024) */ |
1415 | if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == | 1596 | if (clnt->trans_mod->zc_request && rsize > 1024) { |
1416 | P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) { | 1597 | char *odata; |
1417 | req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset, | 1598 | if (data) { |
1418 | rsize, data, udata); | 1599 | kernel_buf = 1; |
1600 | odata = data; | ||
1601 | } else | ||
1602 | odata = (char *)udata; | ||
1603 | req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, | ||
1604 | P9_ZC_HDR_SZ, kernel_buf, "dqd", | ||
1605 | fid->fid, offset, rsize); | ||
1419 | } else { | 1606 | } else { |
1420 | |||
1421 | if (data) | 1607 | if (data) |
1422 | req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, | 1608 | req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, |
1423 | offset, rsize, data); | 1609 | offset, rsize, data); |
1424 | else | 1610 | else |
1425 | req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, | 1611 | req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, |
1426 | offset, rsize, udata); | 1612 | offset, rsize, udata); |
1427 | } | 1613 | } |
1428 | if (IS_ERR(req)) { | 1614 | if (IS_ERR(req)) { |
1429 | err = PTR_ERR(req); | 1615 | err = PTR_ERR(req); |
@@ -1824,7 +2010,7 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate); | |||
1824 | 2010 | ||
1825 | int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) | 2011 | int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) |
1826 | { | 2012 | { |
1827 | int err, rsize; | 2013 | int err, rsize, non_zc = 0; |
1828 | struct p9_client *clnt; | 2014 | struct p9_client *clnt; |
1829 | struct p9_req_t *req; | 2015 | struct p9_req_t *req; |
1830 | char *dataptr; | 2016 | char *dataptr; |
@@ -1842,13 +2028,18 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) | |||
1842 | if (count < rsize) | 2028 | if (count < rsize) |
1843 | rsize = count; | 2029 | rsize = count; |
1844 | 2030 | ||
1845 | if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) == | 2031 | /* Don't bother zerocopy for small IO (< 1024) */ |
1846 | P9_TRANS_PREF_PAYLOAD_SEP) { | 2032 | if (clnt->trans_mod->zc_request && rsize > 1024) { |
1847 | req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid, | 2033 | /* |
1848 | offset, rsize, data); | 2034 | * response header len is 11 |
2035 | * PDU Header(7) + IO Size (4) | ||
2036 | */ | ||
2037 | req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, | ||
2038 | 11, 1, "dqd", fid->fid, offset, rsize); | ||
1849 | } else { | 2039 | } else { |
2040 | non_zc = 1; | ||
1850 | req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, | 2041 | req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, |
1851 | offset, rsize); | 2042 | offset, rsize); |
1852 | } | 2043 | } |
1853 | if (IS_ERR(req)) { | 2044 | if (IS_ERR(req)) { |
1854 | err = PTR_ERR(req); | 2045 | err = PTR_ERR(req); |
@@ -1863,7 +2054,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) | |||
1863 | 2054 | ||
1864 | P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); | 2055 | P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); |
1865 | 2056 | ||
1866 | if (!req->tc->pbuf_size && data) | 2057 | if (non_zc) |
1867 | memmove(data, dataptr, count); | 2058 | memmove(data, dataptr, count); |
1868 | 2059 | ||
1869 | p9_free_req(clnt, req); | 2060 | p9_free_req(clnt, req); |
diff --git a/net/9p/protocol.c b/net/9p/protocol.c index df58375ea6b3..b7d4e8aa5383 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c | |||
@@ -81,7 +81,7 @@ void p9stat_free(struct p9_wstat *stbuf) | |||
81 | } | 81 | } |
82 | EXPORT_SYMBOL(p9stat_free); | 82 | EXPORT_SYMBOL(p9stat_free); |
83 | 83 | ||
84 | static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) | 84 | size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) |
85 | { | 85 | { |
86 | size_t len = min(pdu->size - pdu->offset, size); | 86 | size_t len = min(pdu->size - pdu->offset, size); |
87 | memcpy(data, &pdu->sdata[pdu->offset], len); | 87 | memcpy(data, &pdu->sdata[pdu->offset], len); |
@@ -108,26 +108,6 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) | |||
108 | return size - len; | 108 | return size - len; |
109 | } | 109 | } |
110 | 110 | ||
111 | static size_t | ||
112 | pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata, | ||
113 | size_t size) | ||
114 | { | ||
115 | BUG_ON(pdu->size > P9_IOHDRSZ); | ||
116 | pdu->pubuf = (char __user *)udata; | ||
117 | pdu->pkbuf = (char *)kdata; | ||
118 | pdu->pbuf_size = size; | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static size_t | ||
123 | pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size) | ||
124 | { | ||
125 | BUG_ON(pdu->size > P9_READDIRHDRSZ); | ||
126 | pdu->pkbuf = (char *)kdata; | ||
127 | pdu->pbuf_size = size; | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | /* | 111 | /* |
132 | b - int8_t | 112 | b - int8_t |
133 | w - int16_t | 113 | w - int16_t |
@@ -459,26 +439,6 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, | |||
459 | errcode = -EFAULT; | 439 | errcode = -EFAULT; |
460 | } | 440 | } |
461 | break; | 441 | break; |
462 | case 'E':{ | ||
463 | int32_t cnt = va_arg(ap, int32_t); | ||
464 | const char *k = va_arg(ap, const void *); | ||
465 | const char __user *u = va_arg(ap, | ||
466 | const void __user *); | ||
467 | errcode = p9pdu_writef(pdu, proto_version, "d", | ||
468 | cnt); | ||
469 | if (!errcode && pdu_write_urw(pdu, k, u, cnt)) | ||
470 | errcode = -EFAULT; | ||
471 | } | ||
472 | break; | ||
473 | case 'F':{ | ||
474 | int32_t cnt = va_arg(ap, int32_t); | ||
475 | const char *k = va_arg(ap, const void *); | ||
476 | errcode = p9pdu_writef(pdu, proto_version, "d", | ||
477 | cnt); | ||
478 | if (!errcode && pdu_write_readdir(pdu, k, cnt)) | ||
479 | errcode = -EFAULT; | ||
480 | } | ||
481 | break; | ||
482 | case 'U':{ | 442 | case 'U':{ |
483 | int32_t count = va_arg(ap, int32_t); | 443 | int32_t count = va_arg(ap, int32_t); |
484 | const char __user *udata = | 444 | const char __user *udata = |
@@ -637,10 +597,6 @@ void p9pdu_reset(struct p9_fcall *pdu) | |||
637 | { | 597 | { |
638 | pdu->offset = 0; | 598 | pdu->offset = 0; |
639 | pdu->size = 0; | 599 | pdu->size = 0; |
640 | pdu->private = NULL; | ||
641 | pdu->pubuf = NULL; | ||
642 | pdu->pkbuf = NULL; | ||
643 | pdu->pbuf_size = 0; | ||
644 | } | 600 | } |
645 | 601 | ||
646 | int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, | 602 | int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, |
diff --git a/net/9p/protocol.h b/net/9p/protocol.h index 2431c0f38d56..a0eb8ff11f22 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h | |||
@@ -32,3 +32,4 @@ int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); | |||
32 | int p9pdu_finalize(struct p9_fcall *pdu); | 32 | int p9pdu_finalize(struct p9_fcall *pdu); |
33 | void p9pdu_dump(int, struct p9_fcall *); | 33 | void p9pdu_dump(int, struct p9_fcall *); |
34 | void p9pdu_reset(struct p9_fcall *pdu); | 34 | void p9pdu_reset(struct p9_fcall *pdu); |
35 | size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size); | ||
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 9a70ebdec56e..de8df957867d 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c | |||
@@ -21,30 +21,25 @@ | |||
21 | 21 | ||
22 | /** | 22 | /** |
23 | * p9_release_req_pages - Release pages after the transaction. | 23 | * p9_release_req_pages - Release pages after the transaction. |
24 | * @*private: PDU's private page of struct trans_rpage_info | ||
25 | */ | 24 | */ |
26 | void | 25 | void p9_release_pages(struct page **pages, int nr_pages) |
27 | p9_release_req_pages(struct trans_rpage_info *rpinfo) | ||
28 | { | 26 | { |
29 | int i = 0; | 27 | int i = 0; |
30 | 28 | while (pages[i] && nr_pages--) { | |
31 | while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) { | 29 | put_page(pages[i]); |
32 | put_page(rpinfo->rp_data[i]); | ||
33 | i++; | 30 | i++; |
34 | } | 31 | } |
35 | } | 32 | } |
36 | EXPORT_SYMBOL(p9_release_req_pages); | 33 | EXPORT_SYMBOL(p9_release_pages); |
37 | 34 | ||
38 | /** | 35 | /** |
39 | * p9_nr_pages - Return number of pages needed to accommodate the payload. | 36 | * p9_nr_pages - Return number of pages needed to accommodate the payload. |
40 | */ | 37 | */ |
41 | int | 38 | int p9_nr_pages(char *data, int len) |
42 | p9_nr_pages(struct p9_req_t *req) | ||
43 | { | 39 | { |
44 | unsigned long start_page, end_page; | 40 | unsigned long start_page, end_page; |
45 | start_page = (unsigned long)req->tc->pubuf >> PAGE_SHIFT; | 41 | start_page = (unsigned long)data >> PAGE_SHIFT; |
46 | end_page = ((unsigned long)req->tc->pubuf + req->tc->pbuf_size + | 42 | end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
47 | PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
48 | return end_page - start_page; | 43 | return end_page - start_page; |
49 | } | 44 | } |
50 | EXPORT_SYMBOL(p9_nr_pages); | 45 | EXPORT_SYMBOL(p9_nr_pages); |
@@ -58,35 +53,17 @@ EXPORT_SYMBOL(p9_nr_pages); | |||
58 | * @nr_pages: number of pages to accommodate the payload | 53 | * @nr_pages: number of pages to accommodate the payload |
59 | * @rw: Indicates if the pages are for read or write. | 54 | * @rw: Indicates if the pages are for read or write. |
60 | */ | 55 | */ |
61 | int | ||
62 | p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len, | ||
63 | int nr_pages, u8 rw) | ||
64 | { | ||
65 | uint32_t first_page_bytes = 0; | ||
66 | int32_t pdata_mapped_pages; | ||
67 | struct trans_rpage_info *rpinfo; | ||
68 | |||
69 | *pdata_off = (__force size_t)req->tc->pubuf & (PAGE_SIZE-1); | ||
70 | 56 | ||
71 | if (*pdata_off) | 57 | int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write) |
72 | first_page_bytes = min(((size_t)PAGE_SIZE - *pdata_off), | 58 | { |
73 | req->tc->pbuf_size); | 59 | int nr_mapped_pages; |
74 | 60 | ||
75 | rpinfo = req->tc->private; | 61 | nr_mapped_pages = get_user_pages_fast((unsigned long)data, |
76 | pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf, | 62 | *nr_pages, write, pages); |
77 | nr_pages, rw, &rpinfo->rp_data[0]); | 63 | if (nr_mapped_pages <= 0) |
78 | if (pdata_mapped_pages <= 0) | 64 | return nr_mapped_pages; |
79 | return pdata_mapped_pages; | ||
80 | 65 | ||
81 | rpinfo->rp_nr_pages = pdata_mapped_pages; | 66 | *nr_pages = nr_mapped_pages; |
82 | if (*pdata_off) { | ||
83 | *pdata_len = first_page_bytes; | ||
84 | *pdata_len += min((req->tc->pbuf_size - *pdata_len), | ||
85 | ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT); | ||
86 | } else { | ||
87 | *pdata_len = min(req->tc->pbuf_size, | ||
88 | (size_t)pdata_mapped_pages << PAGE_SHIFT); | ||
89 | } | ||
90 | return 0; | 67 | return 0; |
91 | } | 68 | } |
92 | EXPORT_SYMBOL(p9_payload_gup); | 69 | EXPORT_SYMBOL(p9_payload_gup); |
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h index 76309223bb02..173bb550a9eb 100644 --- a/net/9p/trans_common.h +++ b/net/9p/trans_common.h | |||
@@ -12,21 +12,6 @@ | |||
12 | * | 12 | * |
13 | */ | 13 | */ |
14 | 14 | ||
15 | /* TRUE if it is user context */ | 15 | void p9_release_pages(struct page **, int); |
16 | #define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS)) | 16 | int p9_payload_gup(char *, int *, struct page **, int); |
17 | 17 | int p9_nr_pages(char *, int); | |
18 | /** | ||
19 | * struct trans_rpage_info - To store mapped page information in PDU. | ||
20 | * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu. | ||
21 | * @rp_nr_pages: Number of mapped pages | ||
22 | * @rp_data: Array of page pointers | ||
23 | */ | ||
24 | struct trans_rpage_info { | ||
25 | u8 rp_alloc; | ||
26 | int rp_nr_pages; | ||
27 | struct page *rp_data[0]; | ||
28 | }; | ||
29 | |||
30 | void p9_release_req_pages(struct trans_rpage_info *); | ||
31 | int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8); | ||
32 | int p9_nr_pages(struct p9_req_t *); | ||
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e317583fcc73..32aa9834229c 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c | |||
@@ -150,12 +150,10 @@ static void req_done(struct virtqueue *vq) | |||
150 | while (1) { | 150 | while (1) { |
151 | spin_lock_irqsave(&chan->lock, flags); | 151 | spin_lock_irqsave(&chan->lock, flags); |
152 | rc = virtqueue_get_buf(chan->vq, &len); | 152 | rc = virtqueue_get_buf(chan->vq, &len); |
153 | |||
154 | if (rc == NULL) { | 153 | if (rc == NULL) { |
155 | spin_unlock_irqrestore(&chan->lock, flags); | 154 | spin_unlock_irqrestore(&chan->lock, flags); |
156 | break; | 155 | break; |
157 | } | 156 | } |
158 | |||
159 | chan->ring_bufs_avail = 1; | 157 | chan->ring_bufs_avail = 1; |
160 | spin_unlock_irqrestore(&chan->lock, flags); | 158 | spin_unlock_irqrestore(&chan->lock, flags); |
161 | /* Wakeup if anyone waiting for VirtIO ring space. */ | 159 | /* Wakeup if anyone waiting for VirtIO ring space. */ |
@@ -163,17 +161,6 @@ static void req_done(struct virtqueue *vq) | |||
163 | P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); | 161 | P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); |
164 | P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); | 162 | P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); |
165 | req = p9_tag_lookup(chan->client, rc->tag); | 163 | req = p9_tag_lookup(chan->client, rc->tag); |
166 | if (req->tc->private) { | ||
167 | struct trans_rpage_info *rp = req->tc->private; | ||
168 | int p = rp->rp_nr_pages; | ||
169 | /*Release pages */ | ||
170 | p9_release_req_pages(rp); | ||
171 | atomic_sub(p, &vp_pinned); | ||
172 | wake_up(&vp_wq); | ||
173 | if (rp->rp_alloc) | ||
174 | kfree(rp); | ||
175 | req->tc->private = NULL; | ||
176 | } | ||
177 | req->status = REQ_STATUS_RCVD; | 164 | req->status = REQ_STATUS_RCVD; |
178 | p9_client_cb(chan->client, req); | 165 | p9_client_cb(chan->client, req); |
179 | } | 166 | } |
@@ -193,9 +180,8 @@ static void req_done(struct virtqueue *vq) | |||
193 | * | 180 | * |
194 | */ | 181 | */ |
195 | 182 | ||
196 | static int | 183 | static int pack_sg_list(struct scatterlist *sg, int start, |
197 | pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, | 184 | int limit, char *data, int count) |
198 | int count) | ||
199 | { | 185 | { |
200 | int s; | 186 | int s; |
201 | int index = start; | 187 | int index = start; |
@@ -224,31 +210,36 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) | |||
224 | * this takes a list of pages. | 210 | * this takes a list of pages. |
225 | * @sg: scatter/gather list to pack into | 211 | * @sg: scatter/gather list to pack into |
226 | * @start: which segment of the sg_list to start at | 212 | * @start: which segment of the sg_list to start at |
227 | * @pdata_off: Offset into the first page | ||
228 | * @**pdata: a list of pages to add into sg. | 213 | * @**pdata: a list of pages to add into sg. |
214 | * @nr_pages: number of pages to pack into the scatter/gather list | ||
215 | * @data: data to pack into scatter/gather list | ||
229 | * @count: amount of data to pack into the scatter/gather list | 216 | * @count: amount of data to pack into the scatter/gather list |
230 | */ | 217 | */ |
231 | static int | 218 | static int |
232 | pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, | 219 | pack_sg_list_p(struct scatterlist *sg, int start, int limit, |
233 | struct page **pdata, int count) | 220 | struct page **pdata, int nr_pages, char *data, int count) |
234 | { | 221 | { |
235 | int s; | 222 | int i = 0, s; |
236 | int i = 0; | 223 | int data_off; |
237 | int index = start; | 224 | int index = start; |
238 | 225 | ||
239 | if (pdata_off) { | 226 | BUG_ON(nr_pages > (limit - start)); |
240 | s = min((int)(PAGE_SIZE - pdata_off), count); | 227 | /* |
241 | sg_set_page(&sg[index++], pdata[i++], s, pdata_off); | 228 | * if the first page doesn't start at |
242 | count -= s; | 229 | * page boundary find the offset |
243 | } | 230 | */ |
244 | 231 | data_off = offset_in_page(data); | |
245 | while (count) { | 232 | while (nr_pages) { |
246 | BUG_ON(index > limit); | 233 | s = rest_of_page(data); |
247 | s = min((int)PAGE_SIZE, count); | 234 | if (s > count) |
248 | sg_set_page(&sg[index++], pdata[i++], s, 0); | 235 | s = count; |
236 | sg_set_page(&sg[index++], pdata[i++], s, data_off); | ||
237 | data_off = 0; | ||
238 | data += s; | ||
249 | count -= s; | 239 | count -= s; |
240 | nr_pages--; | ||
250 | } | 241 | } |
251 | return index-start; | 242 | return index - start; |
252 | } | 243 | } |
253 | 244 | ||
254 | /** | 245 | /** |
@@ -261,114 +252,166 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off, | |||
261 | static int | 252 | static int |
262 | p9_virtio_request(struct p9_client *client, struct p9_req_t *req) | 253 | p9_virtio_request(struct p9_client *client, struct p9_req_t *req) |
263 | { | 254 | { |
264 | int in, out, inp, outp; | 255 | int err; |
265 | struct virtio_chan *chan = client->trans; | 256 | int in, out; |
266 | unsigned long flags; | 257 | unsigned long flags; |
267 | size_t pdata_off = 0; | 258 | struct virtio_chan *chan = client->trans; |
268 | struct trans_rpage_info *rpinfo = NULL; | ||
269 | int err, pdata_len = 0; | ||
270 | 259 | ||
271 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); | 260 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); |
272 | 261 | ||
273 | req->status = REQ_STATUS_SENT; | 262 | req->status = REQ_STATUS_SENT; |
263 | req_retry: | ||
264 | spin_lock_irqsave(&chan->lock, flags); | ||
265 | |||
266 | /* Handle out VirtIO ring buffers */ | ||
267 | out = pack_sg_list(chan->sg, 0, | ||
268 | VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); | ||
274 | 269 | ||
275 | if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) { | 270 | in = pack_sg_list(chan->sg, out, |
276 | int nr_pages = p9_nr_pages(req); | 271 | VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); |
277 | int rpinfo_size = sizeof(struct trans_rpage_info) + | ||
278 | sizeof(struct page *) * nr_pages; | ||
279 | 272 | ||
280 | if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { | 273 | err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); |
281 | err = wait_event_interruptible(vp_wq, | 274 | if (err < 0) { |
282 | atomic_read(&vp_pinned) < chan->p9_max_pages); | 275 | if (err == -ENOSPC) { |
276 | chan->ring_bufs_avail = 0; | ||
277 | spin_unlock_irqrestore(&chan->lock, flags); | ||
278 | err = wait_event_interruptible(*chan->vc_wq, | ||
279 | chan->ring_bufs_avail); | ||
283 | if (err == -ERESTARTSYS) | 280 | if (err == -ERESTARTSYS) |
284 | return err; | 281 | return err; |
285 | P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n"); | ||
286 | } | ||
287 | 282 | ||
288 | if (rpinfo_size <= (req->tc->capacity - req->tc->size)) { | 283 | P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); |
289 | /* We can use sdata */ | 284 | goto req_retry; |
290 | req->tc->private = req->tc->sdata + req->tc->size; | ||
291 | rpinfo = (struct trans_rpage_info *)req->tc->private; | ||
292 | rpinfo->rp_alloc = 0; | ||
293 | } else { | 285 | } else { |
294 | req->tc->private = kmalloc(rpinfo_size, GFP_NOFS); | 286 | spin_unlock_irqrestore(&chan->lock, flags); |
295 | if (!req->tc->private) { | 287 | P9_DPRINTK(P9_DEBUG_TRANS, |
296 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: " | 288 | "9p debug: " |
297 | "private kmalloc returned NULL"); | 289 | "virtio rpc add_buf returned failure"); |
298 | return -ENOMEM; | 290 | return -EIO; |
299 | } | ||
300 | rpinfo = (struct trans_rpage_info *)req->tc->private; | ||
301 | rpinfo->rp_alloc = 1; | ||
302 | } | 291 | } |
292 | } | ||
293 | virtqueue_kick(chan->vq); | ||
294 | spin_unlock_irqrestore(&chan->lock, flags); | ||
303 | 295 | ||
304 | err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages, | 296 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); |
305 | req->tc->id == P9_TREAD ? 1 : 0); | 297 | return 0; |
306 | if (err < 0) { | 298 | } |
307 | if (rpinfo->rp_alloc) | 299 | |
308 | kfree(rpinfo); | 300 | static int p9_get_mapped_pages(struct virtio_chan *chan, |
301 | struct page **pages, char *data, | ||
302 | int nr_pages, int write, int kern_buf) | ||
303 | { | ||
304 | int err; | ||
305 | if (!kern_buf) { | ||
306 | /* | ||
307 | * We allow only p9_max_pages pinned. We wait for the | ||
308 | * Other zc request to finish here | ||
309 | */ | ||
310 | if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { | ||
311 | err = wait_event_interruptible(vp_wq, | ||
312 | (atomic_read(&vp_pinned) < chan->p9_max_pages)); | ||
313 | if (err == -ERESTARTSYS) | ||
314 | return err; | ||
315 | } | ||
316 | err = p9_payload_gup(data, &nr_pages, pages, write); | ||
317 | if (err < 0) | ||
309 | return err; | 318 | return err; |
310 | } else { | 319 | atomic_add(nr_pages, &vp_pinned); |
311 | atomic_add(rpinfo->rp_nr_pages, &vp_pinned); | 320 | } else { |
321 | /* kernel buffer, no need to pin pages */ | ||
322 | int s, index = 0; | ||
323 | int count = nr_pages; | ||
324 | while (nr_pages) { | ||
325 | s = rest_of_page(data); | ||
326 | pages[index++] = virt_to_page(data); | ||
327 | data += s; | ||
328 | nr_pages--; | ||
312 | } | 329 | } |
330 | nr_pages = count; | ||
313 | } | 331 | } |
332 | return nr_pages; | ||
333 | } | ||
314 | 334 | ||
315 | req_retry_pinned: | 335 | /** |
316 | spin_lock_irqsave(&chan->lock, flags); | 336 | * p9_virtio_zc_request - issue a zero copy request |
337 | * @client: client instance issuing the request | ||
338 | * @req: request to be issued | ||
339 | * @uidata: user bffer that should be ued for zero copy read | ||
340 | * @uodata: user buffer that shoud be user for zero copy write | ||
341 | * @inlen: read buffer size | ||
342 | * @olen: write buffer size | ||
343 | * @hdrlen: reader header size, This is the size of response protocol data | ||
344 | * | ||
345 | */ | ||
346 | static int | ||
347 | p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, | ||
348 | char *uidata, char *uodata, int inlen, | ||
349 | int outlen, int in_hdr_len, int kern_buf) | ||
350 | { | ||
351 | int in, out, err; | ||
352 | unsigned long flags; | ||
353 | int in_nr_pages = 0, out_nr_pages = 0; | ||
354 | struct page **in_pages = NULL, **out_pages = NULL; | ||
355 | struct virtio_chan *chan = client->trans; | ||
317 | 356 | ||
318 | /* Handle out VirtIO ring buffers */ | 357 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); |
319 | out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, | 358 | |
320 | req->tc->size); | 359 | if (uodata) { |
321 | 360 | out_nr_pages = p9_nr_pages(uodata, outlen); | |
322 | if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) { | 361 | out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, |
323 | /* We have additional write payload buffer to take care */ | 362 | GFP_NOFS); |
324 | if (req->tc->pubuf && P9_IS_USER_CONTEXT) { | 363 | if (!out_pages) { |
325 | outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, | 364 | err = -ENOMEM; |
326 | pdata_off, rpinfo->rp_data, pdata_len); | 365 | goto err_out; |
327 | } else { | 366 | } |
328 | char *pbuf; | 367 | out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, |
329 | if (req->tc->pubuf) | 368 | out_nr_pages, 0, kern_buf); |
330 | pbuf = (__force char *) req->tc->pubuf; | 369 | if (out_nr_pages < 0) { |
331 | else | 370 | err = out_nr_pages; |
332 | pbuf = req->tc->pkbuf; | 371 | kfree(out_pages); |
333 | outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf, | 372 | out_pages = NULL; |
334 | req->tc->pbuf_size); | 373 | goto err_out; |
335 | } | 374 | } |
336 | out += outp; | ||
337 | } | 375 | } |
338 | 376 | if (uidata) { | |
339 | /* Handle in VirtIO ring buffers */ | 377 | in_nr_pages = p9_nr_pages(uidata, inlen); |
340 | if (req->tc->pbuf_size && | 378 | in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, |
341 | ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) { | 379 | GFP_NOFS); |
342 | /* | 380 | if (!in_pages) { |
343 | * Take care of additional Read payload. | 381 | err = -ENOMEM; |
344 | * 11 is the read/write header = PDU Header(7) + IO Size (4). | 382 | goto err_out; |
345 | * Arrange in such a way that server places header in the | 383 | } |
346 | * alloced memory and payload onto the user buffer. | 384 | in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, |
347 | */ | 385 | in_nr_pages, 1, kern_buf); |
348 | inp = pack_sg_list(chan->sg, out, | 386 | if (in_nr_pages < 0) { |
349 | VIRTQUEUE_NUM, req->rc->sdata, 11); | 387 | err = in_nr_pages; |
350 | /* | 388 | kfree(in_pages); |
351 | * Running executables in the filesystem may result in | 389 | in_pages = NULL; |
352 | * a read request with kernel buffer as opposed to user buffer. | 390 | goto err_out; |
353 | */ | ||
354 | if (req->tc->pubuf && P9_IS_USER_CONTEXT) { | ||
355 | in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM, | ||
356 | pdata_off, rpinfo->rp_data, pdata_len); | ||
357 | } else { | ||
358 | char *pbuf; | ||
359 | if (req->tc->pubuf) | ||
360 | pbuf = (__force char *) req->tc->pubuf; | ||
361 | else | ||
362 | pbuf = req->tc->pkbuf; | ||
363 | |||
364 | in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM, | ||
365 | pbuf, req->tc->pbuf_size); | ||
366 | } | 391 | } |
367 | in += inp; | ||
368 | } else { | ||
369 | in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, | ||
370 | req->rc->sdata, req->rc->capacity); | ||
371 | } | 392 | } |
393 | req->status = REQ_STATUS_SENT; | ||
394 | req_retry_pinned: | ||
395 | spin_lock_irqsave(&chan->lock, flags); | ||
396 | /* out data */ | ||
397 | out = pack_sg_list(chan->sg, 0, | ||
398 | VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); | ||
399 | |||
400 | if (out_pages) | ||
401 | out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, | ||
402 | out_pages, out_nr_pages, uodata, outlen); | ||
403 | /* | ||
404 | * Take care of in data | ||
405 | * For example TREAD have 11. | ||
406 | * 11 is the read/write header = PDU Header(7) + IO Size (4). | ||
407 | * Arrange in such a way that server places header in the | ||
408 | * alloced memory and payload onto the user buffer. | ||
409 | */ | ||
410 | in = pack_sg_list(chan->sg, out, | ||
411 | VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); | ||
412 | if (in_pages) | ||
413 | in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, | ||
414 | in_pages, in_nr_pages, uidata, inlen); | ||
372 | 415 | ||
373 | err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); | 416 | err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); |
374 | if (err < 0) { | 417 | if (err < 0) { |
@@ -376,28 +419,45 @@ req_retry_pinned: | |||
376 | chan->ring_bufs_avail = 0; | 419 | chan->ring_bufs_avail = 0; |
377 | spin_unlock_irqrestore(&chan->lock, flags); | 420 | spin_unlock_irqrestore(&chan->lock, flags); |
378 | err = wait_event_interruptible(*chan->vc_wq, | 421 | err = wait_event_interruptible(*chan->vc_wq, |
379 | chan->ring_bufs_avail); | 422 | chan->ring_bufs_avail); |
380 | if (err == -ERESTARTSYS) | 423 | if (err == -ERESTARTSYS) |
381 | return err; | 424 | goto err_out; |
382 | 425 | ||
383 | P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); | 426 | P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); |
384 | goto req_retry_pinned; | 427 | goto req_retry_pinned; |
385 | } else { | 428 | } else { |
386 | spin_unlock_irqrestore(&chan->lock, flags); | 429 | spin_unlock_irqrestore(&chan->lock, flags); |
387 | P9_DPRINTK(P9_DEBUG_TRANS, | 430 | P9_DPRINTK(P9_DEBUG_TRANS, |
388 | "9p debug: " | 431 | "9p debug: " |
389 | "virtio rpc add_buf returned failure"); | 432 | "virtio rpc add_buf returned failure"); |
390 | if (rpinfo && rpinfo->rp_alloc) | 433 | err = -EIO; |
391 | kfree(rpinfo); | 434 | goto err_out; |
392 | return -EIO; | ||
393 | } | 435 | } |
394 | } | 436 | } |
395 | |||
396 | virtqueue_kick(chan->vq); | 437 | virtqueue_kick(chan->vq); |
397 | spin_unlock_irqrestore(&chan->lock, flags); | 438 | spin_unlock_irqrestore(&chan->lock, flags); |
398 | |||
399 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); | 439 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); |
400 | return 0; | 440 | err = wait_event_interruptible(*req->wq, |
441 | req->status >= REQ_STATUS_RCVD); | ||
442 | /* | ||
443 | * Non kernel buffers are pinned, unpin them | ||
444 | */ | ||
445 | err_out: | ||
446 | if (!kern_buf) { | ||
447 | if (in_pages) { | ||
448 | p9_release_pages(in_pages, in_nr_pages); | ||
449 | atomic_sub(in_nr_pages, &vp_pinned); | ||
450 | } | ||
451 | if (out_pages) { | ||
452 | p9_release_pages(out_pages, out_nr_pages); | ||
453 | atomic_sub(out_nr_pages, &vp_pinned); | ||
454 | } | ||
455 | /* wakeup anybody waiting for slots to pin pages */ | ||
456 | wake_up(&vp_wq); | ||
457 | } | ||
458 | kfree(in_pages); | ||
459 | kfree(out_pages); | ||
460 | return err; | ||
401 | } | 461 | } |
402 | 462 | ||
403 | static ssize_t p9_mount_tag_show(struct device *dev, | 463 | static ssize_t p9_mount_tag_show(struct device *dev, |
@@ -591,8 +651,8 @@ static struct p9_trans_module p9_virtio_trans = { | |||
591 | .create = p9_virtio_create, | 651 | .create = p9_virtio_create, |
592 | .close = p9_virtio_close, | 652 | .close = p9_virtio_close, |
593 | .request = p9_virtio_request, | 653 | .request = p9_virtio_request, |
654 | .zc_request = p9_virtio_zc_request, | ||
594 | .cancel = p9_virtio_cancel, | 655 | .cancel = p9_virtio_cancel, |
595 | |||
596 | /* | 656 | /* |
597 | * We leave one entry for input and one entry for response | 657 | * We leave one entry for input and one entry for response |
598 | * headers. We also skip one more entry to accomodate, address | 658 | * headers. We also skip one more entry to accomodate, address |
@@ -600,7 +660,6 @@ static struct p9_trans_module p9_virtio_trans = { | |||
600 | * page in zero copy. | 660 | * page in zero copy. |
601 | */ | 661 | */ |
602 | .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), | 662 | .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), |
603 | .pref = P9_TRANS_PREF_PAYLOAD_SEP, | ||
604 | .def = 0, | 663 | .def = 0, |
605 | .owner = THIS_MODULE, | 664 | .owner = THIS_MODULE, |
606 | }; | 665 | }; |