aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLong Li <longli@microsoft.com>2018-10-31 18:13:10 -0400
committerSteve French <stfrench@microsoft.com>2018-11-02 15:09:42 -0400
commit8c5f9c1ab7cb45f6584ce312b14fb310a8fa28f3 (patch)
tree04d1492038748e64e95f13c295aaf0f338f6af3e
parent6e6e2b86c29c6fcfa16ad9fdc7ea32027bea5d73 (diff)
CIFS: Add support for direct I/O write
With direct I/O write, user supplied buffers are pinned to the memory and data are transferred directly from user buffers to the transport layer. Change in v3: add support for kernel AIO Change in v4: Refactor common write code to __cifs_writev for direct and non-direct I/O. Retry on direct I/O failure. Signed-off-by: Long Li <longli@microsoft.com> Signed-off-by: Steve French <stfrench@microsoft.com>
-rw-r--r--fs/cifs/cifsfs.h1
-rw-r--r--fs/cifs/file.c204
2 files changed, 164 insertions, 41 deletions
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 3abea1a3f20c..4c3b5cfccc49 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -104,6 +104,7 @@ extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
104extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to); 104extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
105extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); 105extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
106extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); 106extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
107extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from);
107extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); 108extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
108extern int cifs_lock(struct file *, int, struct file_lock *); 109extern int cifs_lock(struct file *, int, struct file_lock *);
109extern int cifs_fsync(struct file *, loff_t, loff_t, int); 110extern int cifs_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1be36076e960..937ffa79066b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2538,6 +2538,61 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2538} 2538}
2539 2539
2540static int 2540static int
2541cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2542 struct cifs_aio_ctx *ctx)
2543{
2544 int wait_retry = 0;
2545 unsigned int wsize, credits;
2546 int rc;
2547 struct TCP_Server_Info *server =
2548 tlink_tcon(wdata->cfile->tlink)->ses->server;
2549
2550 /*
2551 * Try to resend this wdata, waiting for credits up to 3 seconds.
2552 * Note: we are attempting to resend the whole wdata not in segments
2553 */
2554 do {
2555 rc = server->ops->wait_mtu_credits(
2556 server, wdata->bytes, &wsize, &credits);
2557
2558 if (rc)
2559 break;
2560
2561 if (wsize < wdata->bytes) {
2562 add_credits_and_wake_if(server, credits, 0);
2563 msleep(1000);
2564 wait_retry++;
2565 }
2566 } while (wsize < wdata->bytes && wait_retry < 3);
2567
2568 if (wsize < wdata->bytes) {
2569 rc = -EBUSY;
2570 goto out;
2571 }
2572
2573 rc = -EAGAIN;
2574 while (rc == -EAGAIN) {
2575 rc = 0;
2576 if (wdata->cfile->invalidHandle)
2577 rc = cifs_reopen_file(wdata->cfile, false);
2578 if (!rc)
2579 rc = server->ops->async_writev(wdata,
2580 cifs_uncached_writedata_release);
2581 }
2582
2583 if (!rc) {
2584 list_add_tail(&wdata->list, wdata_list);
2585 return 0;
2586 }
2587
2588 add_credits_and_wake_if(server, wdata->credits, 0);
2589out:
2590 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2591
2592 return rc;
2593}
2594
2595static int
2541cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, 2596cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2542 struct cifsFileInfo *open_file, 2597 struct cifsFileInfo *open_file,
2543 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, 2598 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
@@ -2551,6 +2606,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2551 loff_t saved_offset = offset; 2606 loff_t saved_offset = offset;
2552 pid_t pid; 2607 pid_t pid;
2553 struct TCP_Server_Info *server; 2608 struct TCP_Server_Info *server;
2609 struct page **pagevec;
2610 size_t start;
2554 2611
2555 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) 2612 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2556 pid = open_file->pid; 2613 pid = open_file->pid;
@@ -2567,38 +2624,76 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2567 if (rc) 2624 if (rc)
2568 break; 2625 break;
2569 2626
2570 nr_pages = get_numpages(wsize, len, &cur_len); 2627 if (ctx->direct_io) {
2571 wdata = cifs_writedata_alloc(nr_pages, 2628 cur_len = iov_iter_get_pages_alloc(
2629 from, &pagevec, wsize, &start);
2630 if (cur_len < 0) {
2631 cifs_dbg(VFS,
2632 "direct_writev couldn't get user pages "
2633 "(rc=%zd) iter type %d iov_offset %zd "
2634 "count %zd\n",
2635 cur_len, from->type,
2636 from->iov_offset, from->count);
2637 dump_stack();
2638 break;
2639 }
2640 iov_iter_advance(from, cur_len);
2641
2642 nr_pages =
2643 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2644
2645 wdata = cifs_writedata_direct_alloc(pagevec,
2572 cifs_uncached_writev_complete); 2646 cifs_uncached_writev_complete);
2573 if (!wdata) { 2647 if (!wdata) {
2574 rc = -ENOMEM; 2648 rc = -ENOMEM;
2575 add_credits_and_wake_if(server, credits, 0); 2649 add_credits_and_wake_if(server, credits, 0);
2576 break; 2650 break;
2577 } 2651 }
2578 2652
2579 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2580 if (rc) {
2581 kfree(wdata);
2582 add_credits_and_wake_if(server, credits, 0);
2583 break;
2584 }
2585 2653
2586 num_pages = nr_pages; 2654 wdata->page_offset = start;
2587 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages); 2655 wdata->tailsz =
2588 if (rc) { 2656 nr_pages > 1 ?
2589 for (i = 0; i < nr_pages; i++) 2657 cur_len - (PAGE_SIZE - start) -
2590 put_page(wdata->pages[i]); 2658 (nr_pages - 2) * PAGE_SIZE :
2591 kfree(wdata); 2659 cur_len;
2592 add_credits_and_wake_if(server, credits, 0); 2660 } else {
2593 break; 2661 nr_pages = get_numpages(wsize, len, &cur_len);
2594 } 2662 wdata = cifs_writedata_alloc(nr_pages,
2663 cifs_uncached_writev_complete);
2664 if (!wdata) {
2665 rc = -ENOMEM;
2666 add_credits_and_wake_if(server, credits, 0);
2667 break;
2668 }
2595 2669
2596 /* 2670 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2597 * Bring nr_pages down to the number of pages we actually used, 2671 if (rc) {
2598 * and free any pages that we didn't use. 2672 kfree(wdata);
2599 */ 2673 add_credits_and_wake_if(server, credits, 0);
2600 for ( ; nr_pages > num_pages; nr_pages--) 2674 break;
2601 put_page(wdata->pages[nr_pages - 1]); 2675 }
2676
2677 num_pages = nr_pages;
2678 rc = wdata_fill_from_iovec(
2679 wdata, from, &cur_len, &num_pages);
2680 if (rc) {
2681 for (i = 0; i < nr_pages; i++)
2682 put_page(wdata->pages[i]);
2683 kfree(wdata);
2684 add_credits_and_wake_if(server, credits, 0);
2685 break;
2686 }
2687
2688 /*
2689 * Bring nr_pages down to the number of pages we
2690 * actually used, and free any pages that we didn't use.
2691 */
2692 for ( ; nr_pages > num_pages; nr_pages--)
2693 put_page(wdata->pages[nr_pages - 1]);
2694
2695 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2696 }
2602 2697
2603 wdata->sync_mode = WB_SYNC_ALL; 2698 wdata->sync_mode = WB_SYNC_ALL;
2604 wdata->nr_pages = nr_pages; 2699 wdata->nr_pages = nr_pages;
@@ -2607,7 +2702,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2607 wdata->pid = pid; 2702 wdata->pid = pid;
2608 wdata->bytes = cur_len; 2703 wdata->bytes = cur_len;
2609 wdata->pagesz = PAGE_SIZE; 2704 wdata->pagesz = PAGE_SIZE;
2610 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2611 wdata->credits = credits; 2705 wdata->credits = credits;
2612 wdata->ctx = ctx; 2706 wdata->ctx = ctx;
2613 kref_get(&ctx->refcount); 2707 kref_get(&ctx->refcount);
@@ -2682,13 +2776,18 @@ restart_loop:
2682 INIT_LIST_HEAD(&tmp_list); 2776 INIT_LIST_HEAD(&tmp_list);
2683 list_del_init(&wdata->list); 2777 list_del_init(&wdata->list);
2684 2778
2685 iov_iter_advance(&tmp_from, 2779 if (ctx->direct_io)
2780 rc = cifs_resend_wdata(
2781 wdata, &tmp_list, ctx);
2782 else {
2783 iov_iter_advance(&tmp_from,
2686 wdata->offset - ctx->pos); 2784 wdata->offset - ctx->pos);
2687 2785
2688 rc = cifs_write_from_iter(wdata->offset, 2786 rc = cifs_write_from_iter(wdata->offset,
2689 wdata->bytes, &tmp_from, 2787 wdata->bytes, &tmp_from,
2690 ctx->cfile, cifs_sb, &tmp_list, 2788 ctx->cfile, cifs_sb, &tmp_list,
2691 ctx); 2789 ctx);
2790 }
2692 2791
2693 list_splice(&tmp_list, &ctx->list); 2792 list_splice(&tmp_list, &ctx->list);
2694 2793
@@ -2701,8 +2800,9 @@ restart_loop:
2701 kref_put(&wdata->refcount, cifs_uncached_writedata_release); 2800 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2702 } 2801 }
2703 2802
2704 for (i = 0; i < ctx->npages; i++) 2803 if (!ctx->direct_io)
2705 put_page(ctx->bv[i].bv_page); 2804 for (i = 0; i < ctx->npages; i++)
2805 put_page(ctx->bv[i].bv_page);
2706 2806
2707 cifs_stats_bytes_written(tcon, ctx->total_len); 2807 cifs_stats_bytes_written(tcon, ctx->total_len);
2708 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); 2808 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
@@ -2717,7 +2817,8 @@ restart_loop:
2717 complete(&ctx->done); 2817 complete(&ctx->done);
2718} 2818}
2719 2819
2720ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) 2820static ssize_t __cifs_writev(
2821 struct kiocb *iocb, struct iov_iter *from, bool direct)
2721{ 2822{
2722 struct file *file = iocb->ki_filp; 2823 struct file *file = iocb->ki_filp;
2723 ssize_t total_written = 0; 2824 ssize_t total_written = 0;
@@ -2726,13 +2827,18 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2726 struct cifs_sb_info *cifs_sb; 2827 struct cifs_sb_info *cifs_sb;
2727 struct cifs_aio_ctx *ctx; 2828 struct cifs_aio_ctx *ctx;
2728 struct iov_iter saved_from = *from; 2829 struct iov_iter saved_from = *from;
2830 size_t len = iov_iter_count(from);
2729 int rc; 2831 int rc;
2730 2832
2731 /* 2833 /*
2732 * BB - optimize the way when signing is disabled. We can drop this 2834 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
2733 * extra memory-to-memory copying and use iovec buffers for constructing 2835 * In this case, fall back to non-direct write function.
2734 * write request. 2836 * this could be improved by getting pages directly in ITER_KVEC
2735 */ 2837 */
2838 if (direct && from->type & ITER_KVEC) {
2839 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
2840 direct = false;
2841 }
2736 2842
2737 rc = generic_write_checks(iocb, from); 2843 rc = generic_write_checks(iocb, from);
2738 if (rc <= 0) 2844 if (rc <= 0)
@@ -2756,10 +2862,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2756 2862
2757 ctx->pos = iocb->ki_pos; 2863 ctx->pos = iocb->ki_pos;
2758 2864
2759 rc = setup_aio_ctx_iter(ctx, from, WRITE); 2865 if (direct) {
2760 if (rc) { 2866 ctx->direct_io = true;
2761 kref_put(&ctx->refcount, cifs_aio_ctx_release); 2867 ctx->iter = *from;
2762 return rc; 2868 ctx->len = len;
2869 } else {
2870 rc = setup_aio_ctx_iter(ctx, from, WRITE);
2871 if (rc) {
2872 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2873 return rc;
2874 }
2763 } 2875 }
2764 2876
2765 /* grab a lock here due to read response handlers can access ctx */ 2877 /* grab a lock here due to read response handlers can access ctx */
@@ -2809,6 +2921,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2809 return total_written; 2921 return total_written;
2810} 2922}
2811 2923
2924ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
2925{
2926 return __cifs_writev(iocb, from, true);
2927}
2928
2929ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2930{
2931 return __cifs_writev(iocb, from, false);
2932}
2933
2812static ssize_t 2934static ssize_t
2813cifs_writev(struct kiocb *iocb, struct iov_iter *from) 2935cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2814{ 2936{