aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/direct.c
diff options
context:
space:
mode:
authorFred Isaman <iisaman@netapp.com>2012-04-20 14:47:57 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2012-04-27 14:10:39 -0400
commit1763da1234cba663b849476d451bdccac5147859 (patch)
treeb2b4bf3e1ce8624217c78508054fc620aac79a55 /fs/nfs/direct.c
parent56f9cd684d25f1bae901c5a872b8427f8b417c3f (diff)
NFS: rewrite directio write to use async coalesce code
This also has the advantage that it allows directio to use pnfs. Signed-off-by: Fred Isaman <iisaman@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs/direct.c')
-rw-r--r--fs/nfs/direct.c527
1 files changed, 230 insertions, 297 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4ba9a2c839bb..d44de2f83952 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@
56 56
57#include "internal.h" 57#include "internal.h"
58#include "iostat.h" 58#include "iostat.h"
59#include "pnfs.h"
59 60
60#define NFSDBG_FACILITY NFSDBG_VFS 61#define NFSDBG_FACILITY NFSDBG_VFS
61 62
@@ -81,16 +82,19 @@ struct nfs_direct_req {
81 struct completion completion; /* wait for i/o completion */ 82 struct completion completion; /* wait for i/o completion */
82 83
83 /* commit state */ 84 /* commit state */
84 struct list_head rewrite_list; /* saved nfs_write_data structs */ 85 struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
85 struct nfs_commit_data *commit_data; /* special write_data for commits */ 86 struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
87 struct work_struct work;
86 int flags; 88 int flags;
87#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ 89#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
88#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ 90#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
89 struct nfs_writeverf verf; /* unstable write verifier */ 91 struct nfs_writeverf verf; /* unstable write verifier */
90}; 92};
91 93
94static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
95static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
92static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); 96static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
93static const struct rpc_call_ops nfs_write_direct_ops; 97static void nfs_direct_write_schedule_work(struct work_struct *work);
94 98
95static inline void get_dreq(struct nfs_direct_req *dreq) 99static inline void get_dreq(struct nfs_direct_req *dreq)
96{ 100{
@@ -131,6 +135,16 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
131 page_cache_release(pages[i]); 135 page_cache_release(pages[i]);
132} 136}
133 137
138void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
139 struct nfs_direct_req *dreq)
140{
141 cinfo->lock = &dreq->lock;
142 cinfo->mds = &dreq->mds_cinfo;
143 cinfo->ds = &dreq->ds_cinfo;
144 cinfo->dreq = dreq;
145 cinfo->completion_ops = &nfs_direct_commit_completion_ops;
146}
147
134static inline struct nfs_direct_req *nfs_direct_req_alloc(void) 148static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
135{ 149{
136 struct nfs_direct_req *dreq; 150 struct nfs_direct_req *dreq;
@@ -142,7 +156,11 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
142 kref_init(&dreq->kref); 156 kref_init(&dreq->kref);
143 kref_get(&dreq->kref); 157 kref_get(&dreq->kref);
144 init_completion(&dreq->completion); 158 init_completion(&dreq->completion);
145 INIT_LIST_HEAD(&dreq->rewrite_list); 159 dreq->mds_cinfo.ncommit = 0;
160 atomic_set(&dreq->mds_cinfo.rpcs_out, 0);
161 INIT_LIST_HEAD(&dreq->mds_cinfo.list);
162 INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
163 memset(&dreq->ds_cinfo, 0, sizeof(dreq->ds_cinfo));
146 dreq->iocb = NULL; 164 dreq->iocb = NULL;
147 dreq->ctx = NULL; 165 dreq->ctx = NULL;
148 dreq->l_ctx = NULL; 166 dreq->l_ctx = NULL;
@@ -457,112 +475,60 @@ out:
457 return result; 475 return result;
458} 476}
459 477
460static void nfs_direct_writehdr_release(struct nfs_write_header *whdr)
461{
462 struct nfs_write_data *data = &whdr->rpc_data;
463
464 if (data->pages.pagevec != data->pages.page_array)
465 kfree(data->pages.pagevec);
466 nfs_writehdr_free(&whdr->header);
467}
468
469static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
470{
471 while (!list_empty(&dreq->rewrite_list)) {
472 struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages);
473 struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
474 struct nfs_page_array *p = &whdr->rpc_data.pages;
475
476 list_del(&hdr->pages);
477 nfs_direct_release_pages(p->pagevec, p->npages);
478 nfs_direct_writehdr_release(whdr);
479 }
480}
481
482#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 478#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
483static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 479static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
484{ 480{
485 struct inode *inode = dreq->inode; 481 struct nfs_pageio_descriptor desc;
486 struct list_head *p; 482 struct nfs_page *req, *tmp;
487 struct nfs_write_data *data; 483 LIST_HEAD(reqs);
488 struct nfs_pgio_header *hdr; 484 struct nfs_commit_info cinfo;
489 struct rpc_task *task; 485 LIST_HEAD(failed);
490 struct rpc_message msg = { 486
491 .rpc_cred = dreq->ctx->cred, 487 nfs_init_cinfo_from_dreq(&cinfo, dreq);
492 }; 488 pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
493 struct rpc_task_setup task_setup_data = { 489 spin_lock(cinfo.lock);
494 .rpc_client = NFS_CLIENT(inode), 490 nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
495 .rpc_message = &msg, 491 spin_unlock(cinfo.lock);
496 .callback_ops = &nfs_write_direct_ops,
497 .workqueue = nfsiod_workqueue,
498 .flags = RPC_TASK_ASYNC,
499 };
500 492
501 dreq->count = 0; 493 dreq->count = 0;
502 get_dreq(dreq); 494 get_dreq(dreq);
503 495
504 list_for_each(p, &dreq->rewrite_list) { 496 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
505 hdr = list_entry(p, struct nfs_pgio_header, pages); 497 &nfs_direct_write_completion_ops);
506 data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data; 498 desc.pg_dreq = dreq;
507
508 get_dreq(dreq);
509
510 /* Use stable writes */
511 data->args.stable = NFS_FILE_SYNC;
512
513 /*
514 * Reset data->res.
515 */
516 nfs_fattr_init(&data->fattr);
517 data->res.count = data->args.count;
518 memset(&data->verf, 0, sizeof(data->verf));
519
520 /*
521 * Reuse data->task; data->args should not have changed
522 * since the original request was sent.
523 */
524 task_setup_data.task = &data->task;
525 task_setup_data.callback_data = data;
526 msg.rpc_argp = &data->args;
527 msg.rpc_resp = &data->res;
528 NFS_PROTO(inode)->write_setup(data, &msg);
529
530 /*
531 * We're called via an RPC callback, so BKL is already held.
532 */
533 task = rpc_run_task(&task_setup_data);
534 if (!IS_ERR(task))
535 rpc_put_task(task);
536
537 dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
538 data->task.tk_pid,
539 inode->i_sb->s_id,
540 (long long)NFS_FILEID(inode),
541 data->args.count,
542 (unsigned long long)data->args.offset);
543 }
544 499
545 if (put_dreq(dreq)) 500 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
546 nfs_direct_write_complete(dreq, inode); 501 if (!nfs_pageio_add_request(&desc, req)) {
547} 502 nfs_list_add_request(req, &failed);
503 spin_lock(cinfo.lock);
504 dreq->flags = 0;
505 dreq->error = -EIO;
506 spin_unlock(cinfo.lock);
507 }
508 }
509 nfs_pageio_complete(&desc);
548 510
549static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) 511 while (!list_empty(&failed)) {
550{ 512 page_cache_release(req->wb_page);
551 struct nfs_commit_data *data = calldata; 513 nfs_release_request(req);
514 nfs_unlock_request(req);
515 }
552 516
553 /* Call the NFS version-specific code */ 517 if (put_dreq(dreq))
554 NFS_PROTO(data->inode)->commit_done(task, data); 518 nfs_direct_write_complete(dreq, dreq->inode);
555} 519}
556 520
557static void nfs_direct_commit_release(void *calldata) 521static void nfs_direct_commit_complete(struct nfs_commit_data *data)
558{ 522{
559 struct nfs_commit_data *data = calldata;
560 struct nfs_direct_req *dreq = data->dreq; 523 struct nfs_direct_req *dreq = data->dreq;
524 struct nfs_commit_info cinfo;
525 struct nfs_page *req;
561 int status = data->task.tk_status; 526 int status = data->task.tk_status;
562 527
528 nfs_init_cinfo_from_dreq(&cinfo, dreq);
563 if (status < 0) { 529 if (status < 0) {
564 dprintk("NFS: %5u commit failed with error %d.\n", 530 dprintk("NFS: %5u commit failed with error %d.\n",
565 data->task.tk_pid, status); 531 data->task.tk_pid, status);
566 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 532 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
567 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { 533 } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
568 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); 534 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -570,59 +536,49 @@ static void nfs_direct_commit_release(void *calldata)
570 } 536 }
571 537
572 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 538 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
573 nfs_direct_write_complete(dreq, data->inode); 539 while (!list_empty(&data->pages)) {
574 nfs_commit_free(data); 540 req = nfs_list_entry(data->pages.next);
541 nfs_list_remove_request(req);
542 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
543 /* Note the rewrite will go through mds */
544 nfs_mark_request_commit(req, NULL, &cinfo);
545 } else {
546 page_cache_release(req->wb_page);
547 nfs_release_request(req);
548 }
549 nfs_unlock_request(req);
550 }
551
552 if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
553 nfs_direct_write_complete(dreq, data->inode);
575} 554}
576 555
577static const struct rpc_call_ops nfs_commit_direct_ops = { 556static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
578 .rpc_call_prepare = nfs_commit_prepare, 557{
579 .rpc_call_done = nfs_direct_commit_result, 558 /* There is no lock to clear */
580 .rpc_release = nfs_direct_commit_release, 559}
560
561static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
562 .completion = nfs_direct_commit_complete,
563 .error_cleanup = nfs_direct_error_cleanup,
581}; 564};
582 565
583static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 566static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
584{ 567{
585 struct nfs_commit_data *data = dreq->commit_data; 568 int res;
586 struct rpc_task *task; 569 struct nfs_commit_info cinfo;
587 struct rpc_message msg = { 570 LIST_HEAD(mds_list);
588 .rpc_argp = &data->args, 571
589 .rpc_resp = &data->res, 572 nfs_init_cinfo_from_dreq(&cinfo, dreq);
590 .rpc_cred = dreq->ctx->cred, 573 nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
591 }; 574 res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
592 struct rpc_task_setup task_setup_data = { 575 if (res < 0) /* res == -ENOMEM */
593 .task = &data->task, 576 nfs_direct_write_reschedule(dreq);
594 .rpc_client = NFS_CLIENT(dreq->inode),
595 .rpc_message = &msg,
596 .callback_ops = &nfs_commit_direct_ops,
597 .callback_data = data,
598 .workqueue = nfsiod_workqueue,
599 .flags = RPC_TASK_ASYNC,
600 };
601
602 data->inode = dreq->inode;
603 data->cred = msg.rpc_cred;
604
605 data->args.fh = NFS_FH(data->inode);
606 data->args.offset = 0;
607 data->args.count = 0;
608 data->res.fattr = &data->fattr;
609 data->res.verf = &data->verf;
610 nfs_fattr_init(&data->fattr);
611
612 NFS_PROTO(data->inode)->commit_setup(data, &msg);
613
614 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
615 dreq->commit_data = NULL;
616
617 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
618
619 task = rpc_run_task(&task_setup_data);
620 if (!IS_ERR(task))
621 rpc_put_task(task);
622} 577}
623 578
624static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 579static void nfs_direct_write_schedule_work(struct work_struct *work)
625{ 580{
581 struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
626 int flags = dreq->flags; 582 int flags = dreq->flags;
627 583
628 dreq->flags = 0; 584 dreq->flags = 0;
@@ -634,90 +590,29 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
634 nfs_direct_write_reschedule(dreq); 590 nfs_direct_write_reschedule(dreq);
635 break; 591 break;
636 default: 592 default:
637 if (dreq->commit_data != NULL) 593 nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
638 nfs_commit_free(dreq->commit_data);
639 nfs_direct_free_writedata(dreq);
640 nfs_zap_mapping(inode, inode->i_mapping);
641 nfs_direct_complete(dreq); 594 nfs_direct_complete(dreq);
642 } 595 }
643} 596}
644 597
645static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) 598static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
646{ 599{
647 dreq->commit_data = nfs_commitdata_alloc(); 600 schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
648 if (dreq->commit_data != NULL)
649 dreq->commit_data->dreq = dreq;
650} 601}
602
651#else 603#else
652static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
653{
654 dreq->commit_data = NULL;
655}
656 604
657static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) 605static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
658{ 606{
659 nfs_direct_free_writedata(dreq);
660 nfs_zap_mapping(inode, inode->i_mapping); 607 nfs_zap_mapping(inode, inode->i_mapping);
661 nfs_direct_complete(dreq); 608 nfs_direct_complete(dreq);
662} 609}
663#endif 610#endif
664 611
665static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
666{
667 struct nfs_write_data *data = calldata;
668
669 nfs_writeback_done(task, data);
670}
671
672/* 612/*
673 * NB: Return the value of the first error return code. Subsequent 613 * NB: Return the value of the first error return code. Subsequent
674 * errors after the first one are ignored. 614 * errors after the first one are ignored.
675 */ 615 */
676static void nfs_direct_write_release(void *calldata)
677{
678 struct nfs_write_data *data = calldata;
679 struct nfs_pgio_header *hdr = data->header;
680 struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req;
681 int status = data->task.tk_status;
682
683 spin_lock(&dreq->lock);
684
685 if (unlikely(status < 0)) {
686 /* An error has occurred, so we should not commit */
687 dreq->flags = 0;
688 dreq->error = status;
689 }
690 if (unlikely(dreq->error != 0))
691 goto out_unlock;
692
693 dreq->count += data->res.count;
694
695 if (data->res.verf->committed != NFS_FILE_SYNC) {
696 switch (dreq->flags) {
697 case 0:
698 memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
699 dreq->flags = NFS_ODIRECT_DO_COMMIT;
700 break;
701 case NFS_ODIRECT_DO_COMMIT:
702 if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
703 dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
704 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
705 }
706 }
707 }
708out_unlock:
709 spin_unlock(&dreq->lock);
710
711 if (put_dreq(dreq))
712 nfs_direct_write_complete(dreq, hdr->inode);
713}
714
715static const struct rpc_call_ops nfs_write_direct_ops = {
716 .rpc_call_prepare = nfs_write_prepare,
717 .rpc_call_done = nfs_direct_write_result,
718 .rpc_release = nfs_direct_write_release,
719};
720
721/* 616/*
722 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE 617 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
723 * operation. If nfs_writedata_alloc() or get_user_pages() fails, 618 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
@@ -725,143 +620,181 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
725 * handled automatically by nfs_direct_write_result(). Otherwise, if 620 * handled automatically by nfs_direct_write_result(). Otherwise, if
726 * no requests have been sent, just return an error. 621 * no requests have been sent, just return an error.
727 */ 622 */
728static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, 623static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
729 const struct iovec *iov, 624 const struct iovec *iov,
730 loff_t pos, int sync) 625 loff_t pos)
731{ 626{
627 struct nfs_direct_req *dreq = desc->pg_dreq;
732 struct nfs_open_context *ctx = dreq->ctx; 628 struct nfs_open_context *ctx = dreq->ctx;
733 struct inode *inode = ctx->dentry->d_inode; 629 struct inode *inode = ctx->dentry->d_inode;
734 unsigned long user_addr = (unsigned long)iov->iov_base; 630 unsigned long user_addr = (unsigned long)iov->iov_base;
735 size_t count = iov->iov_len; 631 size_t count = iov->iov_len;
736 struct rpc_task *task;
737 struct rpc_message msg = {
738 .rpc_cred = ctx->cred,
739 };
740 struct rpc_task_setup task_setup_data = {
741 .rpc_client = NFS_CLIENT(inode),
742 .rpc_message = &msg,
743 .callback_ops = &nfs_write_direct_ops,
744 .workqueue = nfsiod_workqueue,
745 .flags = RPC_TASK_ASYNC,
746 };
747 size_t wsize = NFS_SERVER(inode)->wsize; 632 size_t wsize = NFS_SERVER(inode)->wsize;
748 unsigned int pgbase; 633 unsigned int pgbase;
749 int result; 634 int result;
750 ssize_t started = 0; 635 ssize_t started = 0;
636 struct page **pagevec = NULL;
637 unsigned int npages;
751 638
752 do { 639 do {
753 struct nfs_write_header *whdr;
754 struct nfs_write_data *data;
755 struct nfs_page_array *pages;
756 size_t bytes; 640 size_t bytes;
641 int i;
757 642
758 pgbase = user_addr & ~PAGE_MASK; 643 pgbase = user_addr & ~PAGE_MASK;
759 bytes = min(wsize,count); 644 bytes = min(max(wsize, PAGE_SIZE), count);
760 645
761 result = -ENOMEM; 646 result = -ENOMEM;
762 whdr = nfs_writehdr_alloc(); 647 npages = nfs_page_array_len(pgbase, bytes);
763 if (unlikely(!whdr)) 648 if (!pagevec)
649 pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
650 if (!pagevec)
764 break; 651 break;
765 652
766 data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes));
767 if (!data) {
768 nfs_writehdr_free(&whdr->header);
769 break;
770 }
771 data->header = &whdr->header;
772 atomic_inc(&data->header->refcnt);
773 pages = &data->pages;
774
775 down_read(&current->mm->mmap_sem); 653 down_read(&current->mm->mmap_sem);
776 result = get_user_pages(current, current->mm, user_addr, 654 result = get_user_pages(current, current->mm, user_addr,
777 pages->npages, 0, 0, pages->pagevec, NULL); 655 npages, 0, 0, pagevec, NULL);
778 up_read(&current->mm->mmap_sem); 656 up_read(&current->mm->mmap_sem);
779 if (result < 0) { 657 if (result < 0)
780 nfs_direct_writehdr_release(whdr);
781 break; 658 break;
782 } 659
783 if ((unsigned)result < pages->npages) { 660 if ((unsigned)result < npages) {
784 bytes = result * PAGE_SIZE; 661 bytes = result * PAGE_SIZE;
785 if (bytes <= pgbase) { 662 if (bytes <= pgbase) {
786 nfs_direct_release_pages(pages->pagevec, result); 663 nfs_direct_release_pages(pagevec, result);
787 nfs_direct_writehdr_release(whdr);
788 break; 664 break;
789 } 665 }
790 bytes -= pgbase; 666 bytes -= pgbase;
791 pages->npages = result; 667 npages = result;
792 } 668 }
793 669
794 get_dreq(dreq); 670 for (i = 0; i < npages; i++) {
795 671 struct nfs_page *req;
796 list_move_tail(&whdr->header.pages, &dreq->rewrite_list); 672 unsigned int req_len = min(bytes, PAGE_SIZE - pgbase);
797
798 whdr->header.req = (struct nfs_page *) dreq;
799 whdr->header.inode = inode;
800 whdr->header.cred = msg.rpc_cred;
801 data->args.fh = NFS_FH(inode);
802 data->args.context = ctx;
803 data->args.lock_context = dreq->l_ctx;
804 data->args.offset = pos;
805 data->args.pgbase = pgbase;
806 data->args.pages = pages->pagevec;
807 data->args.count = bytes;
808 data->args.stable = sync;
809 data->res.fattr = &data->fattr;
810 data->res.count = bytes;
811 data->res.verf = &data->verf;
812 nfs_fattr_init(&data->fattr);
813
814 task_setup_data.task = &data->task;
815 task_setup_data.callback_data = data;
816 msg.rpc_argp = &data->args;
817 msg.rpc_resp = &data->res;
818 NFS_PROTO(inode)->write_setup(data, &msg);
819
820 task = rpc_run_task(&task_setup_data);
821 if (IS_ERR(task))
822 break;
823 673
824 dprintk("NFS: %5u initiated direct write call " 674 req = nfs_create_request(dreq->ctx, dreq->inode,
825 "(req %s/%Ld, %zu bytes @ offset %Lu)\n", 675 pagevec[i],
826 task->tk_pid, 676 pgbase, req_len);
827 inode->i_sb->s_id, 677 if (IS_ERR(req)) {
828 (long long)NFS_FILEID(inode), 678 nfs_direct_release_pages(pagevec + i,
829 bytes, 679 npages - i);
830 (unsigned long long)data->args.offset); 680 result = PTR_ERR(req);
831 rpc_put_task(task); 681 break;
832 682 }
833 started += bytes; 683 nfs_lock_request(req);
834 user_addr += bytes; 684 req->wb_index = pos >> PAGE_SHIFT;
835 pos += bytes; 685 req->wb_offset = pos & ~PAGE_MASK;
836 686 if (!nfs_pageio_add_request(desc, req)) {
837 /* FIXME: Remove this useless math from the final patch */ 687 result = desc->pg_error;
838 pgbase += bytes; 688 nfs_unlock_request(req);
839 pgbase &= ~PAGE_MASK; 689 nfs_release_request(req);
840 BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); 690 nfs_direct_release_pages(pagevec + i,
841 691 npages - i);
842 count -= bytes; 692 }
693 pgbase = 0;
694 bytes -= req_len;
695 started += req_len;
696 user_addr += req_len;
697 pos += req_len;
698 count -= req_len;
699 }
843 } while (count != 0); 700 } while (count != 0);
844 701
702 kfree(pagevec);
703
845 if (started) 704 if (started)
846 return started; 705 return started;
847 return result < 0 ? (ssize_t) result : -EFAULT; 706 return result < 0 ? (ssize_t) result : -EFAULT;
848} 707}
849 708
709static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
710{
711 struct nfs_direct_req *dreq = hdr->dreq;
712 struct nfs_commit_info cinfo;
713 int bit = -1;
714 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
715
716 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
717 goto out_put;
718
719 nfs_init_cinfo_from_dreq(&cinfo, dreq);
720
721 spin_lock(&dreq->lock);
722
723 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
724 dreq->flags = 0;
725 dreq->error = hdr->error;
726 }
727 if (dreq->error != 0)
728 bit = NFS_IOHDR_ERROR;
729 else {
730 dreq->count += hdr->good_bytes;
731 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
732 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
733 bit = NFS_IOHDR_NEED_RESCHED;
734 } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
735 if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
736 bit = NFS_IOHDR_NEED_RESCHED;
737 else if (dreq->flags == 0) {
738 memcpy(&dreq->verf, &req->wb_verf,
739 sizeof(dreq->verf));
740 bit = NFS_IOHDR_NEED_COMMIT;
741 dreq->flags = NFS_ODIRECT_DO_COMMIT;
742 } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
743 if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
744 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
745 bit = NFS_IOHDR_NEED_RESCHED;
746 } else
747 bit = NFS_IOHDR_NEED_COMMIT;
748 }
749 }
750 }
751 spin_unlock(&dreq->lock);
752
753 while (!list_empty(&hdr->pages)) {
754 req = nfs_list_entry(hdr->pages.next);
755 nfs_list_remove_request(req);
756 switch (bit) {
757 case NFS_IOHDR_NEED_RESCHED:
758 case NFS_IOHDR_NEED_COMMIT:
759 nfs_mark_request_commit(req, hdr->lseg, &cinfo);
760 break;
761 default:
762 page_cache_release(req->wb_page);
763 nfs_release_request(req);
764 }
765 nfs_unlock_request(req);
766 }
767
768out_put:
769 if (put_dreq(dreq))
770 nfs_direct_write_complete(dreq, hdr->inode);
771 hdr->release(hdr);
772}
773
774static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
775 .error_cleanup = nfs_sync_pgio_error,
776 .init_hdr = nfs_direct_pgio_init,
777 .completion = nfs_direct_write_completion,
778};
779
850static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 780static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
851 const struct iovec *iov, 781 const struct iovec *iov,
852 unsigned long nr_segs, 782 unsigned long nr_segs,
853 loff_t pos, int sync) 783 loff_t pos)
854{ 784{
785 struct nfs_pageio_descriptor desc;
855 ssize_t result = 0; 786 ssize_t result = 0;
856 size_t requested_bytes = 0; 787 size_t requested_bytes = 0;
857 unsigned long seg; 788 unsigned long seg;
858 789
790 nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
791 &nfs_direct_write_completion_ops);
792 desc.pg_dreq = dreq;
859 get_dreq(dreq); 793 get_dreq(dreq);
860 794
861 for (seg = 0; seg < nr_segs; seg++) { 795 for (seg = 0; seg < nr_segs; seg++) {
862 const struct iovec *vec = &iov[seg]; 796 const struct iovec *vec = &iov[seg];
863 result = nfs_direct_write_schedule_segment(dreq, vec, 797 result = nfs_direct_write_schedule_segment(&desc, vec, pos);
864 pos, sync);
865 if (result < 0) 798 if (result < 0)
866 break; 799 break;
867 requested_bytes += result; 800 requested_bytes += result;
@@ -869,6 +802,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
869 break; 802 break;
870 pos += vec->iov_len; 803 pos += vec->iov_len;
871 } 804 }
805 nfs_pageio_complete(&desc);
872 806
873 /* 807 /*
874 * If no bytes were started, return the error, and let the 808 * If no bytes were started, return the error, and let the
@@ -891,16 +825,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
891 ssize_t result = -ENOMEM; 825 ssize_t result = -ENOMEM;
892 struct inode *inode = iocb->ki_filp->f_mapping->host; 826 struct inode *inode = iocb->ki_filp->f_mapping->host;
893 struct nfs_direct_req *dreq; 827 struct nfs_direct_req *dreq;
894 size_t wsize = NFS_SERVER(inode)->wsize;
895 int sync = NFS_UNSTABLE;
896 828
897 dreq = nfs_direct_req_alloc(); 829 dreq = nfs_direct_req_alloc();
898 if (!dreq) 830 if (!dreq)
899 goto out; 831 goto out;
900 nfs_alloc_commit_data(dreq);
901
902 if (dreq->commit_data == NULL || count <= wsize)
903 sync = NFS_FILE_SYNC;
904 832
905 dreq->inode = inode; 833 dreq->inode = inode;
906 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 834 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -910,7 +838,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
910 if (!is_sync_kiocb(iocb)) 838 if (!is_sync_kiocb(iocb))
911 dreq->iocb = iocb; 839 dreq->iocb = iocb;
912 840
913 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); 841 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
914 if (!result) 842 if (!result)
915 result = nfs_direct_wait(dreq); 843 result = nfs_direct_wait(dreq);
916out_release: 844out_release:
@@ -1030,10 +958,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
1030 task_io_account_write(count); 958 task_io_account_write(count);
1031 959
1032 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 960 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
961 if (retval > 0) {
962 struct inode *inode = mapping->host;
1033 963
1034 if (retval > 0)
1035 iocb->ki_pos = pos + retval; 964 iocb->ki_pos = pos + retval;
1036 965 spin_lock(&inode->i_lock);
966 if (i_size_read(inode) < iocb->ki_pos)
967 i_size_write(inode, iocb->ki_pos);
968 spin_unlock(&inode->i_lock);
969 }
1037out: 970out:
1038 return retval; 971 return retval;
1039} 972}