aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2006-03-20 13:44:36 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2006-03-20 13:44:36 -0500
commitfad61490419b3e494f300e9b2579810ef3bcda31 (patch)
tree1bca68921a83d4dc0219aa8d46c3c8e8313965c2
parente17b1fc4b35399935f00a635206e183d9292fe4f (diff)
nfs: Use UNSTABLE + COMMIT for NFS O_DIRECT writes
Currently NFS O_DIRECT writes use FILE_SYNC so that a COMMIT is not necessary. This simplifies the internal logic, but this could be a difficult workload for some servers. Instead, let's send UNSTABLE writes, and after they all complete, send a COMMIT for the dirty range. After the COMMIT returns successfully, then do the wake_up or fire off aio_complete(). Test plan: Async direct I/O tests against Solaris (or any server that requires committed unstable writes). Reboot server during test. Based on an earlier patch by Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/direct.c224
-rw-r--r--include/linux/nfs_fs.h1
2 files changed, 200 insertions, 25 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 737990dd4dfe..f0f2053c7a61 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -69,11 +69,15 @@ struct nfs_direct_req {
69 struct kref kref; /* release manager */ 69 struct kref kref; /* release manager */
70 70
71 /* I/O parameters */ 71 /* I/O parameters */
72 struct list_head list; /* nfs_read/write_data structs */ 72 struct list_head list, /* nfs_read/write_data structs */
73 rewrite_list; /* saved nfs_write_data structs */
73 struct file * filp; /* file descriptor */ 74 struct file * filp; /* file descriptor */
74 struct kiocb * iocb; /* controlling i/o request */ 75 struct kiocb * iocb; /* controlling i/o request */
75 wait_queue_head_t wait; /* wait for i/o completion */ 76 wait_queue_head_t wait; /* wait for i/o completion */
76 struct inode * inode; /* target file of i/o */ 77 struct inode * inode; /* target file of i/o */
78 unsigned long user_addr; /* location of user's buffer */
79 size_t user_count; /* total bytes to move */
80 loff_t pos; /* starting offset in file */
77 struct page ** pages; /* pages in our buffer */ 81 struct page ** pages; /* pages in our buffer */
78 unsigned int npages; /* count of pages */ 82 unsigned int npages; /* count of pages */
79 83
@@ -82,8 +86,18 @@ struct nfs_direct_req {
82 int outstanding; /* i/os we're waiting for */ 86 int outstanding; /* i/os we're waiting for */
83 ssize_t count, /* bytes actually processed */ 87 ssize_t count, /* bytes actually processed */
84 error; /* any reported error */ 88 error; /* any reported error */
89
90 /* commit state */
91 struct nfs_write_data * commit_data; /* special write_data for commits */
92 int flags;
93#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
94#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
95 struct nfs_writeverf verf; /* unstable write verifier */
85}; 96};
86 97
98static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
99static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
100
87/** 101/**
88 * nfs_direct_IO - NFS address space operation for direct I/O 102 * nfs_direct_IO - NFS address space operation for direct I/O
89 * @rw: direction (read or write) 103 * @rw: direction (read or write)
@@ -160,11 +174,13 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
160 kref_init(&dreq->kref); 174 kref_init(&dreq->kref);
161 init_waitqueue_head(&dreq->wait); 175 init_waitqueue_head(&dreq->wait);
162 INIT_LIST_HEAD(&dreq->list); 176 INIT_LIST_HEAD(&dreq->list);
177 INIT_LIST_HEAD(&dreq->rewrite_list);
163 dreq->iocb = NULL; 178 dreq->iocb = NULL;
164 spin_lock_init(&dreq->lock); 179 spin_lock_init(&dreq->lock);
165 dreq->outstanding = 0; 180 dreq->outstanding = 0;
166 dreq->count = 0; 181 dreq->count = 0;
167 dreq->error = 0; 182 dreq->error = 0;
183 dreq->flags = 0;
168 184
169 return dreq; 185 return dreq;
170} 186}
@@ -299,7 +315,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
299 * For each nfs_read_data struct that was allocated on the list, dispatch 315 * For each nfs_read_data struct that was allocated on the list, dispatch
300 * an NFS READ operation 316 * an NFS READ operation
301 */ 317 */
302static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) 318static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
303{ 319{
304 struct file *file = dreq->filp; 320 struct file *file = dreq->filp;
305 struct inode *inode = file->f_mapping->host; 321 struct inode *inode = file->f_mapping->host;
@@ -307,11 +323,13 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long
307 file->private_data; 323 file->private_data;
308 struct list_head *list = &dreq->list; 324 struct list_head *list = &dreq->list;
309 struct page **pages = dreq->pages; 325 struct page **pages = dreq->pages;
326 size_t count = dreq->user_count;
327 loff_t pos = dreq->pos;
310 size_t rsize = NFS_SERVER(inode)->rsize; 328 size_t rsize = NFS_SERVER(inode)->rsize;
311 unsigned int curpage, pgbase; 329 unsigned int curpage, pgbase;
312 330
313 curpage = 0; 331 curpage = 0;
314 pgbase = user_addr & ~PAGE_MASK; 332 pgbase = dreq->user_addr & ~PAGE_MASK;
315 do { 333 do {
316 struct nfs_read_data *data; 334 struct nfs_read_data *data;
317 size_t bytes; 335 size_t bytes;
@@ -373,6 +391,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
373 if (!dreq) 391 if (!dreq)
374 return -ENOMEM; 392 return -ENOMEM;
375 393
394 dreq->user_addr = user_addr;
395 dreq->user_count = count;
396 dreq->pos = pos;
376 dreq->pages = pages; 397 dreq->pages = pages;
377 dreq->npages = nr_pages; 398 dreq->npages = nr_pages;
378 igrab(inode); 399 igrab(inode);
@@ -383,13 +404,137 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size
383 404
384 nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count); 405 nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
385 rpc_clnt_sigmask(clnt, &oldset); 406 rpc_clnt_sigmask(clnt, &oldset);
386 nfs_direct_read_schedule(dreq, user_addr, count, pos); 407 nfs_direct_read_schedule(dreq);
387 result = nfs_direct_wait(dreq); 408 result = nfs_direct_wait(dreq);
388 rpc_clnt_sigunmask(clnt, &oldset); 409 rpc_clnt_sigunmask(clnt, &oldset);
389 410
390 return result; 411 return result;
391} 412}
392 413
414static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
415{
416 list_splice_init(&dreq->rewrite_list, &dreq->list);
417 while (!list_empty(&dreq->list)) {
418 struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
419 list_del(&data->pages);
420 nfs_writedata_release(data);
421 }
422}
423
424#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
425static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
426{
427 struct list_head *pos;
428
429 list_splice_init(&dreq->rewrite_list, &dreq->list);
430 list_for_each(pos, &dreq->list)
431 dreq->outstanding++;
432 dreq->count = 0;
433
434 nfs_direct_write_schedule(dreq, FLUSH_STABLE);
435}
436
437static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
438{
439 struct nfs_write_data *data = calldata;
440 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
441
442 /* Call the NFS version-specific code */
443 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
444 return;
445 if (unlikely(task->tk_status < 0)) {
446 dreq->error = task->tk_status;
447 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
448 }
449 if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
450 dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
451 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
452 }
453
454 dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
455 nfs_direct_write_complete(dreq, data->inode);
456}
457
458static const struct rpc_call_ops nfs_commit_direct_ops = {
459 .rpc_call_done = nfs_direct_commit_result,
460 .rpc_release = nfs_commit_release,
461};
462
463static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
464{
465 struct file *file = dreq->filp;
466 struct nfs_open_context *ctx = (struct nfs_open_context *)
467 file->private_data;
468 struct nfs_write_data *data = dreq->commit_data;
469 struct rpc_task *task = &data->task;
470
471 data->inode = dreq->inode;
472 data->cred = ctx->cred;
473
474 data->args.fh = NFS_FH(data->inode);
475 data->args.offset = dreq->pos;
476 data->args.count = dreq->user_count;
477 data->res.count = 0;
478 data->res.fattr = &data->fattr;
479 data->res.verf = &data->verf;
480
481 rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
482 &nfs_commit_direct_ops, data);
483 NFS_PROTO(data->inode)->commit_setup(data, 0);
484
485 data->task.tk_priority = RPC_PRIORITY_NORMAL;
486 data->task.tk_cookie = (unsigned long)data->inode;
487 /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
488 dreq->commit_data = NULL;
489
490 dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
491
492 lock_kernel();
493 rpc_execute(&data->task);
494 unlock_kernel();
495}
496
497static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
498{
499 int flags = dreq->flags;
500
501 dreq->flags = 0;
502 switch (flags) {
503 case NFS_ODIRECT_DO_COMMIT:
504 nfs_direct_commit_schedule(dreq);
505 break;
506 case NFS_ODIRECT_RESCHED_WRITES:
507 nfs_direct_write_reschedule(dreq);
508 break;
509 default:
510 nfs_end_data_update(inode);
511 if (dreq->commit_data != NULL)
512 nfs_commit_free(dreq->commit_data);
513 nfs_direct_free_writedata(dreq);
514 nfs_direct_complete(dreq);
515 }
516}
517
518static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
519{
520 dreq->commit_data = nfs_commit_alloc(0);
521 if (dreq->commit_data != NULL)
522 dreq->commit_data->req = (struct nfs_page *) dreq;
523}
524#else
525static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
526{
527 dreq->commit_data = NULL;
528}
529
530static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
531{
532 nfs_end_data_update(inode);
533 nfs_direct_free_writedata(dreq);
534 nfs_direct_complete(dreq);
535}
536#endif
537
393static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) 538static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
394{ 539{
395 struct list_head *list; 540 struct list_head *list;
@@ -424,14 +569,13 @@ static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize
424 break; 569 break;
425 nbytes -= wsize; 570 nbytes -= wsize;
426 } 571 }
572
573 nfs_alloc_commit_data(dreq);
574
427 kref_get(&dreq->kref); 575 kref_get(&dreq->kref);
428 return dreq; 576 return dreq;
429} 577}
430 578
431/*
432 * NB: Return the value of the first error return code. Subsequent
433 * errors after the first one are ignored.
434 */
435static void nfs_direct_write_result(struct rpc_task *task, void *calldata) 579static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
436{ 580{
437 struct nfs_write_data *data = calldata; 581 struct nfs_write_data *data = calldata;
@@ -440,41 +584,62 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
440 584
441 if (nfs_writeback_done(task, data) != 0) 585 if (nfs_writeback_done(task, data) != 0)
442 return; 586 return;
443 /* If the server fell back to an UNSTABLE write, it's an error. */
444 if (unlikely(data->res.verf->committed != NFS_FILE_SYNC))
445 status = -EIO;
446 587
447 spin_lock(&dreq->lock); 588 spin_lock(&dreq->lock);
448 589
449 if (likely(status >= 0)) 590 if (likely(status >= 0))
450 dreq->count += data->res.count; 591 dreq->count += data->res.count;
451 else 592 else
452 dreq->error = status; 593 dreq->error = task->tk_status;
453 594
595 if (data->res.verf->committed != NFS_FILE_SYNC) {
596 switch (dreq->flags) {
597 case 0:
598 memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
599 dreq->flags = NFS_ODIRECT_DO_COMMIT;
600 break;
601 case NFS_ODIRECT_DO_COMMIT:
602 if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
603 dprintk("NFS: %5u write verify failed\n", task->tk_pid);
604 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
605 }
606 }
607 }
608 /* In case we have to resend */
609 data->args.stable = NFS_FILE_SYNC;
610
611 spin_unlock(&dreq->lock);
612}
613
614/*
615 * NB: Return the value of the first error return code. Subsequent
616 * errors after the first one are ignored.
617 */
618static void nfs_direct_write_release(void *calldata)
619{
620 struct nfs_write_data *data = calldata;
621 struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
622
623 spin_lock(&dreq->lock);
454 if (--dreq->outstanding) { 624 if (--dreq->outstanding) {
455 spin_unlock(&dreq->lock); 625 spin_unlock(&dreq->lock);
456 return; 626 return;
457 } 627 }
458
459 spin_unlock(&dreq->lock); 628 spin_unlock(&dreq->lock);
460 629
461 nfs_end_data_update(data->inode); 630 nfs_direct_write_complete(dreq, data->inode);
462 nfs_direct_complete(dreq);
463} 631}
464 632
465static const struct rpc_call_ops nfs_write_direct_ops = { 633static const struct rpc_call_ops nfs_write_direct_ops = {
466 .rpc_call_done = nfs_direct_write_result, 634 .rpc_call_done = nfs_direct_write_result,
467 .rpc_release = nfs_writedata_release, 635 .rpc_release = nfs_direct_write_release,
468}; 636};
469 637
470/* 638/*
471 * For each nfs_write_data struct that was allocated on the list, dispatch 639 * For each nfs_write_data struct that was allocated on the list, dispatch
472 * an NFS WRITE operation 640 * an NFS WRITE operation
473 *
474 * XXX: For now, support only FILE_SYNC writes. Later we may add
475 * support for UNSTABLE + COMMIT.
476 */ 641 */
477static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) 642static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
478{ 643{
479 struct file *file = dreq->filp; 644 struct file *file = dreq->filp;
480 struct inode *inode = file->f_mapping->host; 645 struct inode *inode = file->f_mapping->host;
@@ -482,11 +647,13 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
482 file->private_data; 647 file->private_data;
483 struct list_head *list = &dreq->list; 648 struct list_head *list = &dreq->list;
484 struct page **pages = dreq->pages; 649 struct page **pages = dreq->pages;
650 size_t count = dreq->user_count;
651 loff_t pos = dreq->pos;
485 size_t wsize = NFS_SERVER(inode)->wsize; 652 size_t wsize = NFS_SERVER(inode)->wsize;
486 unsigned int curpage, pgbase; 653 unsigned int curpage, pgbase;
487 654
488 curpage = 0; 655 curpage = 0;
489 pgbase = user_addr & ~PAGE_MASK; 656 pgbase = dreq->user_addr & ~PAGE_MASK;
490 do { 657 do {
491 struct nfs_write_data *data; 658 struct nfs_write_data *data;
492 size_t bytes; 659 size_t bytes;
@@ -496,7 +663,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
496 bytes = count; 663 bytes = count;
497 664
498 data = list_entry(list->next, struct nfs_write_data, pages); 665 data = list_entry(list->next, struct nfs_write_data, pages);
499 list_del_init(&data->pages); 666 list_move_tail(&data->pages, &dreq->rewrite_list);
500 667
501 data->inode = inode; 668 data->inode = inode;
502 data->cred = ctx->cred; 669 data->cred = ctx->cred;
@@ -512,7 +679,7 @@ static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long
512 679
513 rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, 680 rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
514 &nfs_write_direct_ops, data); 681 &nfs_write_direct_ops, data);
515 NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); 682 NFS_PROTO(inode)->write_setup(data, sync);
516 683
517 data->task.tk_priority = RPC_PRIORITY_NORMAL; 684 data->task.tk_priority = RPC_PRIORITY_NORMAL;
518 data->task.tk_cookie = (unsigned long) inode; 685 data->task.tk_cookie = (unsigned long) inode;
@@ -544,11 +711,18 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
544 struct inode *inode = iocb->ki_filp->f_mapping->host; 711 struct inode *inode = iocb->ki_filp->f_mapping->host;
545 struct rpc_clnt *clnt = NFS_CLIENT(inode); 712 struct rpc_clnt *clnt = NFS_CLIENT(inode);
546 struct nfs_direct_req *dreq; 713 struct nfs_direct_req *dreq;
714 size_t wsize = NFS_SERVER(inode)->wsize;
715 int sync = 0;
547 716
548 dreq = nfs_direct_write_alloc(count, NFS_SERVER(inode)->wsize); 717 dreq = nfs_direct_write_alloc(count, wsize);
549 if (!dreq) 718 if (!dreq)
550 return -ENOMEM; 719 return -ENOMEM;
720 if (dreq->commit_data == NULL || count < wsize)
721 sync = FLUSH_STABLE;
551 722
723 dreq->user_addr = user_addr;
724 dreq->user_count = count;
725 dreq->pos = pos;
552 dreq->pages = pages; 726 dreq->pages = pages;
553 dreq->npages = nr_pages; 727 dreq->npages = nr_pages;
554 igrab(inode); 728 igrab(inode);
@@ -562,7 +736,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz
562 nfs_begin_data_update(inode); 736 nfs_begin_data_update(inode);
563 737
564 rpc_clnt_sigmask(clnt, &oldset); 738 rpc_clnt_sigmask(clnt, &oldset);
565 nfs_direct_write_schedule(dreq, user_addr, count, pos); 739 nfs_direct_write_schedule(dreq, sync);
566 result = nfs_direct_wait(dreq); 740 result = nfs_direct_wait(dreq);
567 rpc_clnt_sigunmask(clnt, &oldset); 741 rpc_clnt_sigunmask(clnt, &oldset);
568 742
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 423f202b881c..9f84c8a5ea43 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -422,6 +422,7 @@ void nfs_commit_free(struct nfs_write_data *p);
422extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); 422extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
423#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 423#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
424extern int nfs_commit_inode(struct inode *, int); 424extern int nfs_commit_inode(struct inode *, int);
425extern void nfs_commit_release(void *wdata);
425#else 426#else
426static inline int 427static inline int
427nfs_commit_inode(struct inode *inode, int how) 428nfs_commit_inode(struct inode *inode, int how)