diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2014-03-21 04:58:33 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2014-05-06 17:32:53 -0400 |
commit | 91f79c43d1b54d7154b118860d81b39bad07dfff (patch) | |
tree | a5b142ba57fdabf835476b6dbca24288a78f0c53 /fs/nfs/direct.c | |
parent | f67da30c1d5fc9e341bc8121708874bfd7b31e45 (diff) |
new helper: iov_iter_get_pages_alloc()
same as iov_iter_get_pages(), except that pages array is allocated
(kmalloc if possible, vmalloc if that fails) and left for caller to
free. Lustre and NFS ->direct_IO() switched to it.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/nfs/direct.c')
-rw-r--r-- | fs/nfs/direct.c | 290 |
1 files changed, 88 insertions, 202 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1d34f454989e..b122fe21fea0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -322,60 +322,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { | |||
322 | * handled automatically by nfs_direct_read_result(). Otherwise, if | 322 | * handled automatically by nfs_direct_read_result(). Otherwise, if |
323 | * no requests have been sent, just return an error. | 323 | * no requests have been sent, just return an error. |
324 | */ | 324 | */ |
325 | static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, | ||
326 | const struct iovec *iov, | ||
327 | loff_t pos, bool uio) | ||
328 | { | ||
329 | struct nfs_direct_req *dreq = desc->pg_dreq; | ||
330 | struct nfs_open_context *ctx = dreq->ctx; | ||
331 | struct inode *inode = ctx->dentry->d_inode; | ||
332 | unsigned long user_addr = (unsigned long)iov->iov_base; | ||
333 | size_t count = iov->iov_len; | ||
334 | size_t rsize = NFS_SERVER(inode)->rsize; | ||
335 | unsigned int pgbase; | ||
336 | int result; | ||
337 | ssize_t started = 0; | ||
338 | struct page **pagevec = NULL; | ||
339 | unsigned int npages; | ||
340 | |||
341 | do { | ||
342 | size_t bytes; | ||
343 | int i; | ||
344 | 325 | ||
345 | pgbase = user_addr & ~PAGE_MASK; | 326 | static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, |
346 | bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); | 327 | struct iov_iter *iter, |
328 | loff_t pos) | ||
329 | { | ||
330 | struct nfs_pageio_descriptor desc; | ||
331 | struct inode *inode = dreq->inode; | ||
332 | ssize_t result = -EINVAL; | ||
333 | size_t requested_bytes = 0; | ||
334 | size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); | ||
347 | 335 | ||
348 | result = -ENOMEM; | 336 | NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, |
349 | npages = nfs_page_array_len(pgbase, bytes); | 337 | &nfs_direct_read_completion_ops); |
350 | if (!pagevec) | 338 | get_dreq(dreq); |
351 | pagevec = kmalloc(npages * sizeof(struct page *), | 339 | desc.pg_dreq = dreq; |
352 | GFP_KERNEL); | 340 | atomic_inc(&inode->i_dio_count); |
353 | if (!pagevec) | ||
354 | break; | ||
355 | if (uio) { | ||
356 | down_read(¤t->mm->mmap_sem); | ||
357 | result = get_user_pages(current, current->mm, user_addr, | ||
358 | npages, 1, 0, pagevec, NULL); | ||
359 | up_read(¤t->mm->mmap_sem); | ||
360 | if (result < 0) | ||
361 | break; | ||
362 | } else { | ||
363 | WARN_ON(npages != 1); | ||
364 | result = get_kernel_page(user_addr, 1, pagevec); | ||
365 | if (WARN_ON(result != 1)) | ||
366 | break; | ||
367 | } | ||
368 | 341 | ||
369 | if ((unsigned)result < npages) { | 342 | while (iov_iter_count(iter)) { |
370 | bytes = result * PAGE_SIZE; | 343 | struct page **pagevec; |
371 | if (bytes <= pgbase) { | 344 | size_t bytes; |
372 | nfs_direct_release_pages(pagevec, result); | 345 | size_t pgbase; |
373 | break; | 346 | unsigned npages, i; |
374 | } | ||
375 | bytes -= pgbase; | ||
376 | npages = result; | ||
377 | } | ||
378 | 347 | ||
348 | result = iov_iter_get_pages_alloc(iter, &pagevec, | ||
349 | rsize, &pgbase); | ||
350 | if (result < 0) | ||
351 | break; | ||
352 | |||
353 | bytes = result; | ||
354 | iov_iter_advance(iter, bytes); | ||
355 | npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; | ||
379 | for (i = 0; i < npages; i++) { | 356 | for (i = 0; i < npages; i++) { |
380 | struct nfs_page *req; | 357 | struct nfs_page *req; |
381 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | 358 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); |
@@ -389,55 +366,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de | |||
389 | } | 366 | } |
390 | req->wb_index = pos >> PAGE_SHIFT; | 367 | req->wb_index = pos >> PAGE_SHIFT; |
391 | req->wb_offset = pos & ~PAGE_MASK; | 368 | req->wb_offset = pos & ~PAGE_MASK; |
392 | if (!nfs_pageio_add_request(desc, req)) { | 369 | if (!nfs_pageio_add_request(&desc, req)) { |
393 | result = desc->pg_error; | 370 | result = desc.pg_error; |
394 | nfs_release_request(req); | 371 | nfs_release_request(req); |
395 | break; | 372 | break; |
396 | } | 373 | } |
397 | pgbase = 0; | 374 | pgbase = 0; |
398 | bytes -= req_len; | 375 | bytes -= req_len; |
399 | started += req_len; | 376 | requested_bytes += req_len; |
400 | user_addr += req_len; | ||
401 | pos += req_len; | 377 | pos += req_len; |
402 | count -= req_len; | ||
403 | dreq->bytes_left -= req_len; | 378 | dreq->bytes_left -= req_len; |
404 | } | 379 | } |
405 | /* The nfs_page now hold references to these pages */ | ||
406 | nfs_direct_release_pages(pagevec, npages); | 380 | nfs_direct_release_pages(pagevec, npages); |
407 | } while (count != 0 && result >= 0); | 381 | kvfree(pagevec); |
408 | |||
409 | kfree(pagevec); | ||
410 | |||
411 | if (started) | ||
412 | return started; | ||
413 | return result < 0 ? (ssize_t) result : -EFAULT; | ||
414 | } | ||
415 | |||
416 | static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | ||
417 | struct iov_iter *iter, | ||
418 | loff_t pos, bool uio) | ||
419 | { | ||
420 | struct nfs_pageio_descriptor desc; | ||
421 | struct inode *inode = dreq->inode; | ||
422 | ssize_t result = -EINVAL; | ||
423 | size_t requested_bytes = 0; | ||
424 | unsigned long seg; | ||
425 | |||
426 | NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, | ||
427 | &nfs_direct_read_completion_ops); | ||
428 | get_dreq(dreq); | ||
429 | desc.pg_dreq = dreq; | ||
430 | atomic_inc(&inode->i_dio_count); | ||
431 | |||
432 | for (seg = 0; seg < iter->nr_segs; seg++) { | ||
433 | const struct iovec *vec = &iter->iov[seg]; | ||
434 | result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); | ||
435 | if (result < 0) | 382 | if (result < 0) |
436 | break; | 383 | break; |
437 | requested_bytes += result; | ||
438 | if ((size_t)result < vec->iov_len) | ||
439 | break; | ||
440 | pos += vec->iov_len; | ||
441 | } | 384 | } |
442 | 385 | ||
443 | nfs_pageio_complete(&desc); | 386 | nfs_pageio_complete(&desc); |
@@ -521,7 +464,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, | |||
521 | dreq->iocb = iocb; | 464 | dreq->iocb = iocb; |
522 | 465 | ||
523 | NFS_I(inode)->read_io += count; | 466 | NFS_I(inode)->read_io += count; |
524 | result = nfs_direct_read_schedule_iovec(dreq, iter, pos, uio); | 467 | result = nfs_direct_read_schedule_iovec(dreq, iter, pos); |
525 | 468 | ||
526 | mutex_unlock(&inode->i_mutex); | 469 | mutex_unlock(&inode->i_mutex); |
527 | 470 | ||
@@ -677,109 +620,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
677 | } | 620 | } |
678 | #endif | 621 | #endif |
679 | 622 | ||
680 | /* | ||
681 | * NB: Return the value of the first error return code. Subsequent | ||
682 | * errors after the first one are ignored. | ||
683 | */ | ||
684 | /* | ||
685 | * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE | ||
686 | * operation. If nfs_writedata_alloc() or get_user_pages() fails, | ||
687 | * bail and stop sending more writes. Write length accounting is | ||
688 | * handled automatically by nfs_direct_write_result(). Otherwise, if | ||
689 | * no requests have been sent, just return an error. | ||
690 | */ | ||
691 | static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, | ||
692 | const struct iovec *iov, | ||
693 | loff_t pos, bool uio) | ||
694 | { | ||
695 | struct nfs_direct_req *dreq = desc->pg_dreq; | ||
696 | struct nfs_open_context *ctx = dreq->ctx; | ||
697 | struct inode *inode = ctx->dentry->d_inode; | ||
698 | unsigned long user_addr = (unsigned long)iov->iov_base; | ||
699 | size_t count = iov->iov_len; | ||
700 | size_t wsize = NFS_SERVER(inode)->wsize; | ||
701 | unsigned int pgbase; | ||
702 | int result; | ||
703 | ssize_t started = 0; | ||
704 | struct page **pagevec = NULL; | ||
705 | unsigned int npages; | ||
706 | |||
707 | do { | ||
708 | size_t bytes; | ||
709 | int i; | ||
710 | |||
711 | pgbase = user_addr & ~PAGE_MASK; | ||
712 | bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); | ||
713 | |||
714 | result = -ENOMEM; | ||
715 | npages = nfs_page_array_len(pgbase, bytes); | ||
716 | if (!pagevec) | ||
717 | pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); | ||
718 | if (!pagevec) | ||
719 | break; | ||
720 | |||
721 | if (uio) { | ||
722 | down_read(¤t->mm->mmap_sem); | ||
723 | result = get_user_pages(current, current->mm, user_addr, | ||
724 | npages, 0, 0, pagevec, NULL); | ||
725 | up_read(¤t->mm->mmap_sem); | ||
726 | if (result < 0) | ||
727 | break; | ||
728 | } else { | ||
729 | WARN_ON(npages != 1); | ||
730 | result = get_kernel_page(user_addr, 0, pagevec); | ||
731 | if (WARN_ON(result != 1)) | ||
732 | break; | ||
733 | } | ||
734 | |||
735 | if ((unsigned)result < npages) { | ||
736 | bytes = result * PAGE_SIZE; | ||
737 | if (bytes <= pgbase) { | ||
738 | nfs_direct_release_pages(pagevec, result); | ||
739 | break; | ||
740 | } | ||
741 | bytes -= pgbase; | ||
742 | npages = result; | ||
743 | } | ||
744 | |||
745 | for (i = 0; i < npages; i++) { | ||
746 | struct nfs_page *req; | ||
747 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | ||
748 | |||
749 | req = nfs_create_request(dreq->ctx, dreq->inode, | ||
750 | pagevec[i], | ||
751 | pgbase, req_len); | ||
752 | if (IS_ERR(req)) { | ||
753 | result = PTR_ERR(req); | ||
754 | break; | ||
755 | } | ||
756 | nfs_lock_request(req); | ||
757 | req->wb_index = pos >> PAGE_SHIFT; | ||
758 | req->wb_offset = pos & ~PAGE_MASK; | ||
759 | if (!nfs_pageio_add_request(desc, req)) { | ||
760 | result = desc->pg_error; | ||
761 | nfs_unlock_and_release_request(req); | ||
762 | break; | ||
763 | } | ||
764 | pgbase = 0; | ||
765 | bytes -= req_len; | ||
766 | started += req_len; | ||
767 | user_addr += req_len; | ||
768 | pos += req_len; | ||
769 | count -= req_len; | ||
770 | dreq->bytes_left -= req_len; | ||
771 | } | ||
772 | /* The nfs_page now hold references to these pages */ | ||
773 | nfs_direct_release_pages(pagevec, npages); | ||
774 | } while (count != 0 && result >= 0); | ||
775 | |||
776 | kfree(pagevec); | ||
777 | |||
778 | if (started) | ||
779 | return started; | ||
780 | return result < 0 ? (ssize_t) result : -EFAULT; | ||
781 | } | ||
782 | |||
783 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | 623 | static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) |
784 | { | 624 | { |
785 | struct nfs_direct_req *dreq = hdr->dreq; | 625 | struct nfs_direct_req *dreq = hdr->dreq; |
@@ -859,15 +699,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { | |||
859 | .completion = nfs_direct_write_completion, | 699 | .completion = nfs_direct_write_completion, |
860 | }; | 700 | }; |
861 | 701 | ||
702 | |||
703 | /* | ||
704 | * NB: Return the value of the first error return code. Subsequent | ||
705 | * errors after the first one are ignored. | ||
706 | */ | ||
707 | /* | ||
708 | * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE | ||
709 | * operation. If nfs_writedata_alloc() or get_user_pages() fails, | ||
710 | * bail and stop sending more writes. Write length accounting is | ||
711 | * handled automatically by nfs_direct_write_result(). Otherwise, if | ||
712 | * no requests have been sent, just return an error. | ||
713 | */ | ||
862 | static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | 714 | static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, |
863 | struct iov_iter *iter, | 715 | struct iov_iter *iter, |
864 | loff_t pos, bool uio) | 716 | loff_t pos) |
865 | { | 717 | { |
866 | struct nfs_pageio_descriptor desc; | 718 | struct nfs_pageio_descriptor desc; |
867 | struct inode *inode = dreq->inode; | 719 | struct inode *inode = dreq->inode; |
868 | ssize_t result = 0; | 720 | ssize_t result = 0; |
869 | size_t requested_bytes = 0; | 721 | size_t requested_bytes = 0; |
870 | unsigned long seg; | 722 | size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); |
871 | 723 | ||
872 | NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, | 724 | NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, |
873 | &nfs_direct_write_completion_ops); | 725 | &nfs_direct_write_completion_ops); |
@@ -875,16 +727,50 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
875 | get_dreq(dreq); | 727 | get_dreq(dreq); |
876 | atomic_inc(&inode->i_dio_count); | 728 | atomic_inc(&inode->i_dio_count); |
877 | 729 | ||
878 | NFS_I(dreq->inode)->write_io += iov_iter_count(iter); | 730 | NFS_I(inode)->write_io += iov_iter_count(iter); |
879 | for (seg = 0; seg < iter->nr_segs; seg++) { | 731 | while (iov_iter_count(iter)) { |
880 | const struct iovec *vec = &iter->iov[seg]; | 732 | struct page **pagevec; |
881 | result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); | 733 | size_t bytes; |
734 | size_t pgbase; | ||
735 | unsigned npages, i; | ||
736 | |||
737 | result = iov_iter_get_pages_alloc(iter, &pagevec, | ||
738 | wsize, &pgbase); | ||
882 | if (result < 0) | 739 | if (result < 0) |
883 | break; | 740 | break; |
884 | requested_bytes += result; | 741 | |
885 | if ((size_t)result < vec->iov_len) | 742 | bytes = result; |
743 | iov_iter_advance(iter, bytes); | ||
744 | npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; | ||
745 | for (i = 0; i < npages; i++) { | ||
746 | struct nfs_page *req; | ||
747 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | ||
748 | |||
749 | req = nfs_create_request(dreq->ctx, inode, | ||
750 | pagevec[i], | ||
751 | pgbase, req_len); | ||
752 | if (IS_ERR(req)) { | ||
753 | result = PTR_ERR(req); | ||
754 | break; | ||
755 | } | ||
756 | nfs_lock_request(req); | ||
757 | req->wb_index = pos >> PAGE_SHIFT; | ||
758 | req->wb_offset = pos & ~PAGE_MASK; | ||
759 | if (!nfs_pageio_add_request(&desc, req)) { | ||
760 | result = desc.pg_error; | ||
761 | nfs_unlock_and_release_request(req); | ||
762 | break; | ||
763 | } | ||
764 | pgbase = 0; | ||
765 | bytes -= req_len; | ||
766 | requested_bytes += req_len; | ||
767 | pos += req_len; | ||
768 | dreq->bytes_left -= req_len; | ||
769 | } | ||
770 | nfs_direct_release_pages(pagevec, npages); | ||
771 | kvfree(pagevec); | ||
772 | if (result < 0) | ||
886 | break; | 773 | break; |
887 | pos += vec->iov_len; | ||
888 | } | 774 | } |
889 | nfs_pageio_complete(&desc); | 775 | nfs_pageio_complete(&desc); |
890 | 776 | ||
@@ -985,7 +871,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, | |||
985 | if (!is_sync_kiocb(iocb)) | 871 | if (!is_sync_kiocb(iocb)) |
986 | dreq->iocb = iocb; | 872 | dreq->iocb = iocb; |
987 | 873 | ||
988 | result = nfs_direct_write_schedule_iovec(dreq, iter, pos, uio); | 874 | result = nfs_direct_write_schedule_iovec(dreq, iter, pos); |
989 | 875 | ||
990 | if (mapping->nrpages) { | 876 | if (mapping->nrpages) { |
991 | invalidate_inode_pages2_range(mapping, | 877 | invalidate_inode_pages2_range(mapping, |