diff options
author | Long Li <longli@microsoft.com> | 2018-10-31 18:13:10 -0400 |
---|---|---|
committer | Steve French <stfrench@microsoft.com> | 2018-11-02 15:09:42 -0400 |
commit | 8c5f9c1ab7cb45f6584ce312b14fb310a8fa28f3 (patch) | |
tree | 04d1492038748e64e95f13c295aaf0f338f6af3e | |
parent | 6e6e2b86c29c6fcfa16ad9fdc7ea32027bea5d73 (diff) |
CIFS: Add support for direct I/O write
With direct I/O write, user supplied buffers are pinned to the memory and data
are transferred directly from user buffers to the transport layer.
Change in v3: add support for kernel AIO
Change in v4:
Refactor common write code to __cifs_writev for direct and non-direct I/O.
Retry on direct I/O failure.
Signed-off-by: Long Li <longli@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
-rw-r--r-- | fs/cifs/cifsfs.h | 1 | ||||
-rw-r--r-- | fs/cifs/file.c | 204 |
2 files changed, 164 insertions, 41 deletions
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 3abea1a3f20c..4c3b5cfccc49 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -104,6 +104,7 @@ extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to); | |||
104 | extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to); | 104 | extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to); |
105 | extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); | 105 | extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); |
106 | extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); | 106 | extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); |
107 | extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from); | ||
107 | extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); | 108 | extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); |
108 | extern int cifs_lock(struct file *, int, struct file_lock *); | 109 | extern int cifs_lock(struct file *, int, struct file_lock *); |
109 | extern int cifs_fsync(struct file *, loff_t, loff_t, int); | 110 | extern int cifs_fsync(struct file *, loff_t, loff_t, int); |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 1be36076e960..937ffa79066b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -2538,6 +2538,61 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, | |||
2538 | } | 2538 | } |
2539 | 2539 | ||
2540 | static int | 2540 | static int |
2541 | cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, | ||
2542 | struct cifs_aio_ctx *ctx) | ||
2543 | { | ||
2544 | int wait_retry = 0; | ||
2545 | unsigned int wsize, credits; | ||
2546 | int rc; | ||
2547 | struct TCP_Server_Info *server = | ||
2548 | tlink_tcon(wdata->cfile->tlink)->ses->server; | ||
2549 | |||
2550 | /* | ||
2551 | * Try to resend this wdata, waiting for credits up to 3 seconds. | ||
2552 | * Note: we are attempting to resend the whole wdata not in segments | ||
2553 | */ | ||
2554 | do { | ||
2555 | rc = server->ops->wait_mtu_credits( | ||
2556 | server, wdata->bytes, &wsize, &credits); | ||
2557 | |||
2558 | if (rc) | ||
2559 | break; | ||
2560 | |||
2561 | if (wsize < wdata->bytes) { | ||
2562 | add_credits_and_wake_if(server, credits, 0); | ||
2563 | msleep(1000); | ||
2564 | wait_retry++; | ||
2565 | } | ||
2566 | } while (wsize < wdata->bytes && wait_retry < 3); | ||
2567 | |||
2568 | if (wsize < wdata->bytes) { | ||
2569 | rc = -EBUSY; | ||
2570 | goto out; | ||
2571 | } | ||
2572 | |||
2573 | rc = -EAGAIN; | ||
2574 | while (rc == -EAGAIN) { | ||
2575 | rc = 0; | ||
2576 | if (wdata->cfile->invalidHandle) | ||
2577 | rc = cifs_reopen_file(wdata->cfile, false); | ||
2578 | if (!rc) | ||
2579 | rc = server->ops->async_writev(wdata, | ||
2580 | cifs_uncached_writedata_release); | ||
2581 | } | ||
2582 | |||
2583 | if (!rc) { | ||
2584 | list_add_tail(&wdata->list, wdata_list); | ||
2585 | return 0; | ||
2586 | } | ||
2587 | |||
2588 | add_credits_and_wake_if(server, wdata->credits, 0); | ||
2589 | out: | ||
2590 | kref_put(&wdata->refcount, cifs_uncached_writedata_release); | ||
2591 | |||
2592 | return rc; | ||
2593 | } | ||
2594 | |||
2595 | static int | ||
2541 | cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, | 2596 | cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, |
2542 | struct cifsFileInfo *open_file, | 2597 | struct cifsFileInfo *open_file, |
2543 | struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, | 2598 | struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, |
@@ -2551,6 +2606,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, | |||
2551 | loff_t saved_offset = offset; | 2606 | loff_t saved_offset = offset; |
2552 | pid_t pid; | 2607 | pid_t pid; |
2553 | struct TCP_Server_Info *server; | 2608 | struct TCP_Server_Info *server; |
2609 | struct page **pagevec; | ||
2610 | size_t start; | ||
2554 | 2611 | ||
2555 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) | 2612 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) |
2556 | pid = open_file->pid; | 2613 | pid = open_file->pid; |
@@ -2567,38 +2624,76 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, | |||
2567 | if (rc) | 2624 | if (rc) |
2568 | break; | 2625 | break; |
2569 | 2626 | ||
2570 | nr_pages = get_numpages(wsize, len, &cur_len); | 2627 | if (ctx->direct_io) { |
2571 | wdata = cifs_writedata_alloc(nr_pages, | 2628 | cur_len = iov_iter_get_pages_alloc( |
2629 | from, &pagevec, wsize, &start); | ||
2630 | if (cur_len < 0) { | ||
2631 | cifs_dbg(VFS, | ||
2632 | "direct_writev couldn't get user pages " | ||
2633 | "(rc=%zd) iter type %d iov_offset %zd " | ||
2634 | "count %zd\n", | ||
2635 | cur_len, from->type, | ||
2636 | from->iov_offset, from->count); | ||
2637 | dump_stack(); | ||
2638 | break; | ||
2639 | } | ||
2640 | iov_iter_advance(from, cur_len); | ||
2641 | |||
2642 | nr_pages = | ||
2643 | (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE; | ||
2644 | |||
2645 | wdata = cifs_writedata_direct_alloc(pagevec, | ||
2572 | cifs_uncached_writev_complete); | 2646 | cifs_uncached_writev_complete); |
2573 | if (!wdata) { | 2647 | if (!wdata) { |
2574 | rc = -ENOMEM; | 2648 | rc = -ENOMEM; |
2575 | add_credits_and_wake_if(server, credits, 0); | 2649 | add_credits_and_wake_if(server, credits, 0); |
2576 | break; | 2650 | break; |
2577 | } | 2651 | } |
2578 | 2652 | ||
2579 | rc = cifs_write_allocate_pages(wdata->pages, nr_pages); | ||
2580 | if (rc) { | ||
2581 | kfree(wdata); | ||
2582 | add_credits_and_wake_if(server, credits, 0); | ||
2583 | break; | ||
2584 | } | ||
2585 | 2653 | ||
2586 | num_pages = nr_pages; | 2654 | wdata->page_offset = start; |
2587 | rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages); | 2655 | wdata->tailsz = |
2588 | if (rc) { | 2656 | nr_pages > 1 ? |
2589 | for (i = 0; i < nr_pages; i++) | 2657 | cur_len - (PAGE_SIZE - start) - |
2590 | put_page(wdata->pages[i]); | 2658 | (nr_pages - 2) * PAGE_SIZE : |
2591 | kfree(wdata); | 2659 | cur_len; |
2592 | add_credits_and_wake_if(server, credits, 0); | 2660 | } else { |
2593 | break; | 2661 | nr_pages = get_numpages(wsize, len, &cur_len); |
2594 | } | 2662 | wdata = cifs_writedata_alloc(nr_pages, |
2663 | cifs_uncached_writev_complete); | ||
2664 | if (!wdata) { | ||
2665 | rc = -ENOMEM; | ||
2666 | add_credits_and_wake_if(server, credits, 0); | ||
2667 | break; | ||
2668 | } | ||
2595 | 2669 | ||
2596 | /* | 2670 | rc = cifs_write_allocate_pages(wdata->pages, nr_pages); |
2597 | * Bring nr_pages down to the number of pages we actually used, | 2671 | if (rc) { |
2598 | * and free any pages that we didn't use. | 2672 | kfree(wdata); |
2599 | */ | 2673 | add_credits_and_wake_if(server, credits, 0); |
2600 | for ( ; nr_pages > num_pages; nr_pages--) | 2674 | break; |
2601 | put_page(wdata->pages[nr_pages - 1]); | 2675 | } |
2676 | |||
2677 | num_pages = nr_pages; | ||
2678 | rc = wdata_fill_from_iovec( | ||
2679 | wdata, from, &cur_len, &num_pages); | ||
2680 | if (rc) { | ||
2681 | for (i = 0; i < nr_pages; i++) | ||
2682 | put_page(wdata->pages[i]); | ||
2683 | kfree(wdata); | ||
2684 | add_credits_and_wake_if(server, credits, 0); | ||
2685 | break; | ||
2686 | } | ||
2687 | |||
2688 | /* | ||
2689 | * Bring nr_pages down to the number of pages we | ||
2690 | * actually used, and free any pages that we didn't use. | ||
2691 | */ | ||
2692 | for ( ; nr_pages > num_pages; nr_pages--) | ||
2693 | put_page(wdata->pages[nr_pages - 1]); | ||
2694 | |||
2695 | wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); | ||
2696 | } | ||
2602 | 2697 | ||
2603 | wdata->sync_mode = WB_SYNC_ALL; | 2698 | wdata->sync_mode = WB_SYNC_ALL; |
2604 | wdata->nr_pages = nr_pages; | 2699 | wdata->nr_pages = nr_pages; |
@@ -2607,7 +2702,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, | |||
2607 | wdata->pid = pid; | 2702 | wdata->pid = pid; |
2608 | wdata->bytes = cur_len; | 2703 | wdata->bytes = cur_len; |
2609 | wdata->pagesz = PAGE_SIZE; | 2704 | wdata->pagesz = PAGE_SIZE; |
2610 | wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); | ||
2611 | wdata->credits = credits; | 2705 | wdata->credits = credits; |
2612 | wdata->ctx = ctx; | 2706 | wdata->ctx = ctx; |
2613 | kref_get(&ctx->refcount); | 2707 | kref_get(&ctx->refcount); |
@@ -2682,13 +2776,18 @@ restart_loop: | |||
2682 | INIT_LIST_HEAD(&tmp_list); | 2776 | INIT_LIST_HEAD(&tmp_list); |
2683 | list_del_init(&wdata->list); | 2777 | list_del_init(&wdata->list); |
2684 | 2778 | ||
2685 | iov_iter_advance(&tmp_from, | 2779 | if (ctx->direct_io) |
2780 | rc = cifs_resend_wdata( | ||
2781 | wdata, &tmp_list, ctx); | ||
2782 | else { | ||
2783 | iov_iter_advance(&tmp_from, | ||
2686 | wdata->offset - ctx->pos); | 2784 | wdata->offset - ctx->pos); |
2687 | 2785 | ||
2688 | rc = cifs_write_from_iter(wdata->offset, | 2786 | rc = cifs_write_from_iter(wdata->offset, |
2689 | wdata->bytes, &tmp_from, | 2787 | wdata->bytes, &tmp_from, |
2690 | ctx->cfile, cifs_sb, &tmp_list, | 2788 | ctx->cfile, cifs_sb, &tmp_list, |
2691 | ctx); | 2789 | ctx); |
2790 | } | ||
2692 | 2791 | ||
2693 | list_splice(&tmp_list, &ctx->list); | 2792 | list_splice(&tmp_list, &ctx->list); |
2694 | 2793 | ||
@@ -2701,8 +2800,9 @@ restart_loop: | |||
2701 | kref_put(&wdata->refcount, cifs_uncached_writedata_release); | 2800 | kref_put(&wdata->refcount, cifs_uncached_writedata_release); |
2702 | } | 2801 | } |
2703 | 2802 | ||
2704 | for (i = 0; i < ctx->npages; i++) | 2803 | if (!ctx->direct_io) |
2705 | put_page(ctx->bv[i].bv_page); | 2804 | for (i = 0; i < ctx->npages; i++) |
2805 | put_page(ctx->bv[i].bv_page); | ||
2706 | 2806 | ||
2707 | cifs_stats_bytes_written(tcon, ctx->total_len); | 2807 | cifs_stats_bytes_written(tcon, ctx->total_len); |
2708 | set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); | 2808 | set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); |
@@ -2717,7 +2817,8 @@ restart_loop: | |||
2717 | complete(&ctx->done); | 2817 | complete(&ctx->done); |
2718 | } | 2818 | } |
2719 | 2819 | ||
2720 | ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) | 2820 | static ssize_t __cifs_writev( |
2821 | struct kiocb *iocb, struct iov_iter *from, bool direct) | ||
2721 | { | 2822 | { |
2722 | struct file *file = iocb->ki_filp; | 2823 | struct file *file = iocb->ki_filp; |
2723 | ssize_t total_written = 0; | 2824 | ssize_t total_written = 0; |
@@ -2726,13 +2827,18 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) | |||
2726 | struct cifs_sb_info *cifs_sb; | 2827 | struct cifs_sb_info *cifs_sb; |
2727 | struct cifs_aio_ctx *ctx; | 2828 | struct cifs_aio_ctx *ctx; |
2728 | struct iov_iter saved_from = *from; | 2829 | struct iov_iter saved_from = *from; |
2830 | size_t len = iov_iter_count(from); | ||
2729 | int rc; | 2831 | int rc; |
2730 | 2832 | ||
2731 | /* | 2833 | /* |
2732 | * BB - optimize the way when signing is disabled. We can drop this | 2834 | * iov_iter_get_pages_alloc doesn't work with ITER_KVEC. |
2733 | * extra memory-to-memory copying and use iovec buffers for constructing | 2835 | * In this case, fall back to non-direct write function. |
2734 | * write request. | 2836 | * this could be improved by getting pages directly in ITER_KVEC |
2735 | */ | 2837 | */ |
2838 | if (direct && from->type & ITER_KVEC) { | ||
2839 | cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n"); | ||
2840 | direct = false; | ||
2841 | } | ||
2736 | 2842 | ||
2737 | rc = generic_write_checks(iocb, from); | 2843 | rc = generic_write_checks(iocb, from); |
2738 | if (rc <= 0) | 2844 | if (rc <= 0) |
@@ -2756,10 +2862,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) | |||
2756 | 2862 | ||
2757 | ctx->pos = iocb->ki_pos; | 2863 | ctx->pos = iocb->ki_pos; |
2758 | 2864 | ||
2759 | rc = setup_aio_ctx_iter(ctx, from, WRITE); | 2865 | if (direct) { |
2760 | if (rc) { | 2866 | ctx->direct_io = true; |
2761 | kref_put(&ctx->refcount, cifs_aio_ctx_release); | 2867 | ctx->iter = *from; |
2762 | return rc; | 2868 | ctx->len = len; |
2869 | } else { | ||
2870 | rc = setup_aio_ctx_iter(ctx, from, WRITE); | ||
2871 | if (rc) { | ||
2872 | kref_put(&ctx->refcount, cifs_aio_ctx_release); | ||
2873 | return rc; | ||
2874 | } | ||
2763 | } | 2875 | } |
2764 | 2876 | ||
2765 | /* grab a lock here due to read response handlers can access ctx */ | 2877 | /* grab a lock here due to read response handlers can access ctx */ |
@@ -2809,6 +2921,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) | |||
2809 | return total_written; | 2921 | return total_written; |
2810 | } | 2922 | } |
2811 | 2923 | ||
2924 | ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) | ||
2925 | { | ||
2926 | return __cifs_writev(iocb, from, true); | ||
2927 | } | ||
2928 | |||
2929 | ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) | ||
2930 | { | ||
2931 | return __cifs_writev(iocb, from, false); | ||
2932 | } | ||
2933 | |||
2812 | static ssize_t | 2934 | static ssize_t |
2813 | cifs_writev(struct kiocb *iocb, struct iov_iter *from) | 2935 | cifs_writev(struct kiocb *iocb, struct iov_iter *from) |
2814 | { | 2936 | { |