diff options
-rw-r--r-- | fs/nfs/direct.c | 240 | ||||
-rw-r--r-- | fs/nfs/write.c | 3 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 2 |
3 files changed, 162 insertions, 83 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4df21ce28e17..dea3239cdded 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -384,106 +384,185 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size | |||
384 | return result; | 384 | return result; |
385 | } | 385 | } |
386 | 386 | ||
387 | static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) | 387 | static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) |
388 | { | 388 | { |
389 | const unsigned int wsize = NFS_SERVER(inode)->wsize; | 389 | struct list_head *list; |
390 | size_t request; | 390 | struct nfs_direct_req *dreq; |
391 | int curpage, need_commit; | 391 | unsigned int writes = 0; |
392 | ssize_t result, tot_bytes; | 392 | unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
393 | struct nfs_writeverf first_verf; | ||
394 | struct nfs_write_data *wdata; | ||
395 | |||
396 | wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); | ||
397 | if (!wdata) | ||
398 | return -ENOMEM; | ||
399 | 393 | ||
400 | wdata->inode = inode; | 394 | dreq = nfs_direct_req_alloc(); |
401 | wdata->cred = ctx->cred; | 395 | if (!dreq) |
402 | wdata->args.fh = NFS_FH(inode); | 396 | return NULL; |
403 | wdata->args.context = ctx; | 397 | |
404 | wdata->args.stable = NFS_UNSTABLE; | 398 | list = &dreq->list; |
405 | if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) | 399 | for(;;) { |
406 | wdata->args.stable = NFS_FILE_SYNC; | 400 | struct nfs_write_data *data = nfs_writedata_alloc(wpages); |
407 | wdata->res.fattr = &wdata->fattr; | 401 | |
408 | wdata->res.verf = &wdata->verf; | 402 | if (unlikely(!data)) { |
403 | while (!list_empty(list)) { | ||
404 | data = list_entry(list->next, | ||
405 | struct nfs_write_data, pages); | ||
406 | list_del(&data->pages); | ||
407 | nfs_writedata_free(data); | ||
408 | } | ||
409 | kref_put(&dreq->kref, nfs_direct_req_release); | ||
410 | return NULL; | ||
411 | } | ||
412 | |||
413 | INIT_LIST_HEAD(&data->pages); | ||
414 | list_add(&data->pages, list); | ||
415 | |||
416 | data->req = (struct nfs_page *) dreq; | ||
417 | writes++; | ||
418 | if (nbytes <= wsize) | ||
419 | break; | ||
420 | nbytes -= wsize; | ||
421 | } | ||
422 | kref_get(&dreq->kref); | ||
423 | atomic_set(&dreq->complete, writes); | ||
424 | return dreq; | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * Collects and returns the final error value/byte-count. | ||
429 | */ | ||
430 | static ssize_t nfs_direct_write_wait(struct nfs_direct_req *dreq, int intr) | ||
431 | { | ||
432 | int result = 0; | ||
433 | |||
434 | if (intr) { | ||
435 | result = wait_event_interruptible(dreq->wait, | ||
436 | (atomic_read(&dreq->complete) == 0)); | ||
437 | } else { | ||
438 | wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0)); | ||
439 | } | ||
440 | |||
441 | if (!result) | ||
442 | result = atomic_read(&dreq->error); | ||
443 | if (!result) | ||
444 | result = atomic_read(&dreq->count); | ||
445 | |||
446 | kref_put(&dreq->kref, nfs_direct_req_release); | ||
447 | return (ssize_t) result; | ||
448 | } | ||
449 | |||
450 | static void nfs_direct_write_result(struct rpc_task *task, void *calldata) | ||
451 | { | ||
452 | struct nfs_write_data *data = calldata; | ||
453 | struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; | ||
454 | int status = task->tk_status; | ||
455 | |||
456 | if (nfs_writeback_done(task, data) != 0) | ||
457 | return; | ||
458 | /* If the server fell back to an UNSTABLE write, it's an error. */ | ||
459 | if (unlikely(data->res.verf->committed != NFS_FILE_SYNC)) | ||
460 | status = -EIO; | ||
461 | |||
462 | if (likely(status >= 0)) | ||
463 | atomic_add(data->res.count, &dreq->count); | ||
464 | else | ||
465 | atomic_set(&dreq->error, status); | ||
466 | |||
467 | if (unlikely(atomic_dec_and_test(&dreq->complete))) | ||
468 | nfs_direct_complete(dreq); | ||
469 | } | ||
470 | |||
471 | static const struct rpc_call_ops nfs_write_direct_ops = { | ||
472 | .rpc_call_done = nfs_direct_write_result, | ||
473 | .rpc_release = nfs_writedata_release, | ||
474 | }; | ||
475 | |||
476 | /* | ||
477 | * For each nfs_write_data struct that was allocated on the list, dispatch | ||
478 | * an NFS WRITE operation | ||
479 | * | ||
480 | * XXX: For now, support only FILE_SYNC writes. Later we may add | ||
481 | * support for UNSTABLE + COMMIT. | ||
482 | */ | ||
483 | static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset) | ||
484 | { | ||
485 | struct list_head *list = &dreq->list; | ||
486 | struct page **pages = dreq->pages; | ||
487 | size_t wsize = NFS_SERVER(inode)->wsize; | ||
488 | unsigned int curpage, pgbase; | ||
409 | 489 | ||
410 | nfs_begin_data_update(inode); | ||
411 | retry: | ||
412 | need_commit = 0; | ||
413 | tot_bytes = 0; | ||
414 | curpage = 0; | 490 | curpage = 0; |
415 | request = count; | 491 | pgbase = user_addr & ~PAGE_MASK; |
416 | wdata->args.pgbase = user_addr & ~PAGE_MASK; | ||
417 | wdata->args.offset = file_offset; | ||
418 | do { | 492 | do { |
419 | wdata->args.count = request; | 493 | struct nfs_write_data *data; |
420 | if (wdata->args.count > wsize) | 494 | size_t bytes; |
421 | wdata->args.count = wsize; | 495 | |
422 | wdata->args.pages = &pages[curpage]; | 496 | bytes = wsize; |
497 | if (count < wsize) | ||
498 | bytes = count; | ||
499 | |||
500 | data = list_entry(list->next, struct nfs_write_data, pages); | ||
501 | list_del_init(&data->pages); | ||
502 | |||
503 | data->inode = inode; | ||
504 | data->cred = ctx->cred; | ||
505 | data->args.fh = NFS_FH(inode); | ||
506 | data->args.context = ctx; | ||
507 | data->args.offset = file_offset; | ||
508 | data->args.pgbase = pgbase; | ||
509 | data->args.pages = &pages[curpage]; | ||
510 | data->args.count = bytes; | ||
511 | data->res.fattr = &data->fattr; | ||
512 | data->res.count = bytes; | ||
513 | |||
514 | rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, | ||
515 | &nfs_write_direct_ops, data); | ||
516 | NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE); | ||
423 | 517 | ||
424 | dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", | 518 | data->task.tk_priority = RPC_PRIORITY_NORMAL; |
425 | wdata->args.count, (long long) wdata->args.offset, | 519 | data->task.tk_cookie = (unsigned long) inode; |
426 | user_addr + tot_bytes, wdata->args.pgbase, curpage); | ||
427 | 520 | ||
428 | lock_kernel(); | 521 | lock_kernel(); |
429 | result = NFS_PROTO(inode)->write(wdata); | 522 | rpc_execute(&data->task); |
430 | unlock_kernel(); | 523 | unlock_kernel(); |
431 | 524 | ||
432 | if (result <= 0) { | 525 | dfprintk(VFS, "NFS: %4d initiated direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", |
433 | if (tot_bytes > 0) | 526 | data->task.tk_pid, |
434 | break; | 527 | inode->i_sb->s_id, |
435 | goto out; | 528 | (long long)NFS_FILEID(inode), |
436 | } | 529 | bytes, |
530 | (unsigned long long)data->args.offset); | ||
437 | 531 | ||
438 | if (tot_bytes == 0) | 532 | file_offset += bytes; |
439 | memcpy(&first_verf.verifier, &wdata->verf.verifier, | 533 | pgbase += bytes; |
440 | sizeof(first_verf.verifier)); | 534 | curpage += pgbase >> PAGE_SHIFT; |
441 | if (wdata->verf.committed != NFS_FILE_SYNC) { | 535 | pgbase &= ~PAGE_MASK; |
442 | need_commit = 1; | ||
443 | if (memcmp(&first_verf.verifier, &wdata->verf.verifier, | ||
444 | sizeof(first_verf.verifier))) | ||
445 | goto sync_retry; | ||
446 | } | ||
447 | 536 | ||
448 | tot_bytes += result; | 537 | count -= bytes; |
538 | } while (count != 0); | ||
539 | } | ||
449 | 540 | ||
450 | /* in case of a short write: stop now, let the app recover */ | 541 | static ssize_t nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) |
451 | if (result < wdata->args.count) | 542 | { |
452 | break; | 543 | ssize_t result; |
544 | sigset_t oldset; | ||
545 | struct rpc_clnt *clnt = NFS_CLIENT(inode); | ||
546 | struct nfs_direct_req *dreq; | ||
453 | 547 | ||
454 | wdata->args.offset += result; | 548 | dreq = nfs_direct_write_alloc(count, NFS_SERVER(inode)->wsize); |
455 | wdata->args.pgbase += result; | 549 | if (!dreq) |
456 | curpage += wdata->args.pgbase >> PAGE_SHIFT; | 550 | return -ENOMEM; |
457 | wdata->args.pgbase &= ~PAGE_MASK; | ||
458 | request -= result; | ||
459 | } while (request != 0); | ||
460 | 551 | ||
461 | /* | 552 | dreq->pages = pages; |
462 | * Commit data written so far, even in the event of an error | 553 | dreq->npages = nr_pages; |
463 | */ | ||
464 | if (need_commit) { | ||
465 | wdata->args.count = tot_bytes; | ||
466 | wdata->args.offset = file_offset; | ||
467 | 554 | ||
468 | lock_kernel(); | 555 | nfs_begin_data_update(inode); |
469 | result = NFS_PROTO(inode)->commit(wdata); | ||
470 | unlock_kernel(); | ||
471 | 556 | ||
472 | if (result < 0 || memcmp(&first_verf.verifier, | 557 | rpc_clnt_sigmask(clnt, &oldset); |
473 | &wdata->verf.verifier, | 558 | nfs_direct_write_schedule(dreq, inode, ctx, user_addr, count, |
474 | sizeof(first_verf.verifier)) != 0) | 559 | file_offset); |
475 | goto sync_retry; | 560 | result = nfs_direct_write_wait(dreq, clnt->cl_intr); |
476 | } | 561 | rpc_clnt_sigunmask(clnt, &oldset); |
477 | result = tot_bytes; | ||
478 | 562 | ||
479 | out: | ||
480 | nfs_end_data_update(inode); | 563 | nfs_end_data_update(inode); |
481 | nfs_writedata_free(wdata); | ||
482 | return result; | ||
483 | 564 | ||
484 | sync_retry: | 565 | return result; |
485 | wdata->args.stable = NFS_FILE_SYNC; | ||
486 | goto retry; | ||
487 | } | 566 | } |
488 | 567 | ||
489 | /* | 568 | /* |
@@ -515,7 +594,6 @@ static ssize_t nfs_direct_write(struct inode *inode, struct nfs_open_context *ct | |||
515 | nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size); | 594 | nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size); |
516 | result = nfs_direct_write_seg(inode, ctx, user_addr, size, | 595 | result = nfs_direct_write_seg(inode, ctx, user_addr, size, |
517 | file_offset, pages, page_count); | 596 | file_offset, pages, page_count); |
518 | nfs_free_user_pages(pages, page_count, 0); | ||
519 | 597 | ||
520 | if (result <= 0) { | 598 | if (result <= 0) { |
521 | if (tot_bytes > 0) | 599 | if (tot_bytes > 0) |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5912274ff1a1..875f5b060533 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -77,7 +77,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, | |||
77 | struct inode *, | 77 | struct inode *, |
78 | struct page *, | 78 | struct page *, |
79 | unsigned int, unsigned int); | 79 | unsigned int, unsigned int); |
80 | static int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); | ||
81 | static int nfs_wait_on_write_congestion(struct address_space *, int); | 80 | static int nfs_wait_on_write_congestion(struct address_space *, int); |
82 | static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); | 81 | static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); |
83 | static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, | 82 | static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, |
@@ -1183,7 +1182,7 @@ static const struct rpc_call_ops nfs_write_full_ops = { | |||
1183 | /* | 1182 | /* |
1184 | * This function is called when the WRITE call is complete. | 1183 | * This function is called when the WRITE call is complete. |
1185 | */ | 1184 | */ |
1186 | static int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | 1185 | int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) |
1187 | { | 1186 | { |
1188 | struct nfs_writeargs *argp = &data->args; | 1187 | struct nfs_writeargs *argp = &data->args; |
1189 | struct nfs_writeres *resp = &data->res; | 1188 | struct nfs_writeres *resp = &data->res; |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f55827be4f8e..6c130a6b0f4d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -407,6 +407,8 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); | |||
407 | extern int nfs_writepages(struct address_space *, struct writeback_control *); | 407 | extern int nfs_writepages(struct address_space *, struct writeback_control *); |
408 | extern int nfs_flush_incompatible(struct file *file, struct page *page); | 408 | extern int nfs_flush_incompatible(struct file *file, struct page *page); |
409 | extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); | 409 | extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); |
410 | extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); | ||
411 | extern void nfs_writedata_release(void *); | ||
410 | 412 | ||
411 | /* | 413 | /* |
412 | * Try to write back everything synchronously (but check the | 414 | * Try to write back everything synchronously (but check the |