aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs/direct.c
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2014-03-21 04:58:33 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2014-05-06 17:32:53 -0400
commit91f79c43d1b54d7154b118860d81b39bad07dfff (patch)
treea5b142ba57fdabf835476b6dbca24288a78f0c53 /fs/nfs/direct.c
parentf67da30c1d5fc9e341bc8121708874bfd7b31e45 (diff)
new helper: iov_iter_get_pages_alloc()
same as iov_iter_get_pages(), except that pages array is allocated (kmalloc if possible, vmalloc if that fails) and left for caller to free. Lustre and NFS ->direct_IO() switched to it. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/nfs/direct.c')
-rw-r--r--fs/nfs/direct.c290
1 files changed, 88 insertions, 202 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1d34f454989e..b122fe21fea0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -322,60 +322,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
322 * handled automatically by nfs_direct_read_result(). Otherwise, if 322 * handled automatically by nfs_direct_read_result(). Otherwise, if
323 * no requests have been sent, just return an error. 323 * no requests have been sent, just return an error.
324 */ 324 */
325static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
326 const struct iovec *iov,
327 loff_t pos, bool uio)
328{
329 struct nfs_direct_req *dreq = desc->pg_dreq;
330 struct nfs_open_context *ctx = dreq->ctx;
331 struct inode *inode = ctx->dentry->d_inode;
332 unsigned long user_addr = (unsigned long)iov->iov_base;
333 size_t count = iov->iov_len;
334 size_t rsize = NFS_SERVER(inode)->rsize;
335 unsigned int pgbase;
336 int result;
337 ssize_t started = 0;
338 struct page **pagevec = NULL;
339 unsigned int npages;
340
341 do {
342 size_t bytes;
343 int i;
344 325
345 pgbase = user_addr & ~PAGE_MASK; 326static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
346 bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); 327 struct iov_iter *iter,
328 loff_t pos)
329{
330 struct nfs_pageio_descriptor desc;
331 struct inode *inode = dreq->inode;
332 ssize_t result = -EINVAL;
333 size_t requested_bytes = 0;
334 size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
347 335
348 result = -ENOMEM; 336 NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
349 npages = nfs_page_array_len(pgbase, bytes); 337 &nfs_direct_read_completion_ops);
350 if (!pagevec) 338 get_dreq(dreq);
351 pagevec = kmalloc(npages * sizeof(struct page *), 339 desc.pg_dreq = dreq;
352 GFP_KERNEL); 340 atomic_inc(&inode->i_dio_count);
353 if (!pagevec)
354 break;
355 if (uio) {
356 down_read(&current->mm->mmap_sem);
357 result = get_user_pages(current, current->mm, user_addr,
358 npages, 1, 0, pagevec, NULL);
359 up_read(&current->mm->mmap_sem);
360 if (result < 0)
361 break;
362 } else {
363 WARN_ON(npages != 1);
364 result = get_kernel_page(user_addr, 1, pagevec);
365 if (WARN_ON(result != 1))
366 break;
367 }
368 341
369 if ((unsigned)result < npages) { 342 while (iov_iter_count(iter)) {
370 bytes = result * PAGE_SIZE; 343 struct page **pagevec;
371 if (bytes <= pgbase) { 344 size_t bytes;
372 nfs_direct_release_pages(pagevec, result); 345 size_t pgbase;
373 break; 346 unsigned npages, i;
374 }
375 bytes -= pgbase;
376 npages = result;
377 }
378 347
348 result = iov_iter_get_pages_alloc(iter, &pagevec,
349 rsize, &pgbase);
350 if (result < 0)
351 break;
352
353 bytes = result;
354 iov_iter_advance(iter, bytes);
355 npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
379 for (i = 0; i < npages; i++) { 356 for (i = 0; i < npages; i++) {
380 struct nfs_page *req; 357 struct nfs_page *req;
381 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 358 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
@@ -389,55 +366,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
389 } 366 }
390 req->wb_index = pos >> PAGE_SHIFT; 367 req->wb_index = pos >> PAGE_SHIFT;
391 req->wb_offset = pos & ~PAGE_MASK; 368 req->wb_offset = pos & ~PAGE_MASK;
392 if (!nfs_pageio_add_request(desc, req)) { 369 if (!nfs_pageio_add_request(&desc, req)) {
393 result = desc->pg_error; 370 result = desc.pg_error;
394 nfs_release_request(req); 371 nfs_release_request(req);
395 break; 372 break;
396 } 373 }
397 pgbase = 0; 374 pgbase = 0;
398 bytes -= req_len; 375 bytes -= req_len;
399 started += req_len; 376 requested_bytes += req_len;
400 user_addr += req_len;
401 pos += req_len; 377 pos += req_len;
402 count -= req_len;
403 dreq->bytes_left -= req_len; 378 dreq->bytes_left -= req_len;
404 } 379 }
405 /* The nfs_page now hold references to these pages */
406 nfs_direct_release_pages(pagevec, npages); 380 nfs_direct_release_pages(pagevec, npages);
407 } while (count != 0 && result >= 0); 381 kvfree(pagevec);
408
409 kfree(pagevec);
410
411 if (started)
412 return started;
413 return result < 0 ? (ssize_t) result : -EFAULT;
414}
415
416static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
417 struct iov_iter *iter,
418 loff_t pos, bool uio)
419{
420 struct nfs_pageio_descriptor desc;
421 struct inode *inode = dreq->inode;
422 ssize_t result = -EINVAL;
423 size_t requested_bytes = 0;
424 unsigned long seg;
425
426 NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
427 &nfs_direct_read_completion_ops);
428 get_dreq(dreq);
429 desc.pg_dreq = dreq;
430 atomic_inc(&inode->i_dio_count);
431
432 for (seg = 0; seg < iter->nr_segs; seg++) {
433 const struct iovec *vec = &iter->iov[seg];
434 result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
435 if (result < 0) 382 if (result < 0)
436 break; 383 break;
437 requested_bytes += result;
438 if ((size_t)result < vec->iov_len)
439 break;
440 pos += vec->iov_len;
441 } 384 }
442 385
443 nfs_pageio_complete(&desc); 386 nfs_pageio_complete(&desc);
@@ -521,7 +464,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
521 dreq->iocb = iocb; 464 dreq->iocb = iocb;
522 465
523 NFS_I(inode)->read_io += count; 466 NFS_I(inode)->read_io += count;
524 result = nfs_direct_read_schedule_iovec(dreq, iter, pos, uio); 467 result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
525 468
526 mutex_unlock(&inode->i_mutex); 469 mutex_unlock(&inode->i_mutex);
527 470
@@ -677,109 +620,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
677} 620}
678#endif 621#endif
679 622
680/*
681 * NB: Return the value of the first error return code. Subsequent
682 * errors after the first one are ignored.
683 */
684/*
685 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
686 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
687 * bail and stop sending more writes. Write length accounting is
688 * handled automatically by nfs_direct_write_result(). Otherwise, if
689 * no requests have been sent, just return an error.
690 */
691static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
692 const struct iovec *iov,
693 loff_t pos, bool uio)
694{
695 struct nfs_direct_req *dreq = desc->pg_dreq;
696 struct nfs_open_context *ctx = dreq->ctx;
697 struct inode *inode = ctx->dentry->d_inode;
698 unsigned long user_addr = (unsigned long)iov->iov_base;
699 size_t count = iov->iov_len;
700 size_t wsize = NFS_SERVER(inode)->wsize;
701 unsigned int pgbase;
702 int result;
703 ssize_t started = 0;
704 struct page **pagevec = NULL;
705 unsigned int npages;
706
707 do {
708 size_t bytes;
709 int i;
710
711 pgbase = user_addr & ~PAGE_MASK;
712 bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
713
714 result = -ENOMEM;
715 npages = nfs_page_array_len(pgbase, bytes);
716 if (!pagevec)
717 pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
718 if (!pagevec)
719 break;
720
721 if (uio) {
722 down_read(&current->mm->mmap_sem);
723 result = get_user_pages(current, current->mm, user_addr,
724 npages, 0, 0, pagevec, NULL);
725 up_read(&current->mm->mmap_sem);
726 if (result < 0)
727 break;
728 } else {
729 WARN_ON(npages != 1);
730 result = get_kernel_page(user_addr, 0, pagevec);
731 if (WARN_ON(result != 1))
732 break;
733 }
734
735 if ((unsigned)result < npages) {
736 bytes = result * PAGE_SIZE;
737 if (bytes <= pgbase) {
738 nfs_direct_release_pages(pagevec, result);
739 break;
740 }
741 bytes -= pgbase;
742 npages = result;
743 }
744
745 for (i = 0; i < npages; i++) {
746 struct nfs_page *req;
747 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
748
749 req = nfs_create_request(dreq->ctx, dreq->inode,
750 pagevec[i],
751 pgbase, req_len);
752 if (IS_ERR(req)) {
753 result = PTR_ERR(req);
754 break;
755 }
756 nfs_lock_request(req);
757 req->wb_index = pos >> PAGE_SHIFT;
758 req->wb_offset = pos & ~PAGE_MASK;
759 if (!nfs_pageio_add_request(desc, req)) {
760 result = desc->pg_error;
761 nfs_unlock_and_release_request(req);
762 break;
763 }
764 pgbase = 0;
765 bytes -= req_len;
766 started += req_len;
767 user_addr += req_len;
768 pos += req_len;
769 count -= req_len;
770 dreq->bytes_left -= req_len;
771 }
772 /* The nfs_page now hold references to these pages */
773 nfs_direct_release_pages(pagevec, npages);
774 } while (count != 0 && result >= 0);
775
776 kfree(pagevec);
777
778 if (started)
779 return started;
780 return result < 0 ? (ssize_t) result : -EFAULT;
781}
782
783static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) 623static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
784{ 624{
785 struct nfs_direct_req *dreq = hdr->dreq; 625 struct nfs_direct_req *dreq = hdr->dreq;
@@ -859,15 +699,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
859 .completion = nfs_direct_write_completion, 699 .completion = nfs_direct_write_completion,
860}; 700};
861 701
702
703/*
704 * NB: Return the value of the first error return code. Subsequent
705 * errors after the first one are ignored.
706 */
707/*
708 * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
709 * operation. If nfs_writedata_alloc() or get_user_pages() fails,
710 * bail and stop sending more writes. Write length accounting is
711 * handled automatically by nfs_direct_write_result(). Otherwise, if
712 * no requests have been sent, just return an error.
713 */
862static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 714static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
863 struct iov_iter *iter, 715 struct iov_iter *iter,
864 loff_t pos, bool uio) 716 loff_t pos)
865{ 717{
866 struct nfs_pageio_descriptor desc; 718 struct nfs_pageio_descriptor desc;
867 struct inode *inode = dreq->inode; 719 struct inode *inode = dreq->inode;
868 ssize_t result = 0; 720 ssize_t result = 0;
869 size_t requested_bytes = 0; 721 size_t requested_bytes = 0;
870 unsigned long seg; 722 size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
871 723
872 NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, 724 NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
873 &nfs_direct_write_completion_ops); 725 &nfs_direct_write_completion_ops);
@@ -875,16 +727,50 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
875 get_dreq(dreq); 727 get_dreq(dreq);
876 atomic_inc(&inode->i_dio_count); 728 atomic_inc(&inode->i_dio_count);
877 729
878 NFS_I(dreq->inode)->write_io += iov_iter_count(iter); 730 NFS_I(inode)->write_io += iov_iter_count(iter);
879 for (seg = 0; seg < iter->nr_segs; seg++) { 731 while (iov_iter_count(iter)) {
880 const struct iovec *vec = &iter->iov[seg]; 732 struct page **pagevec;
881 result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); 733 size_t bytes;
734 size_t pgbase;
735 unsigned npages, i;
736
737 result = iov_iter_get_pages_alloc(iter, &pagevec,
738 wsize, &pgbase);
882 if (result < 0) 739 if (result < 0)
883 break; 740 break;
884 requested_bytes += result; 741
885 if ((size_t)result < vec->iov_len) 742 bytes = result;
743 iov_iter_advance(iter, bytes);
744 npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
745 for (i = 0; i < npages; i++) {
746 struct nfs_page *req;
747 unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
748
749 req = nfs_create_request(dreq->ctx, inode,
750 pagevec[i],
751 pgbase, req_len);
752 if (IS_ERR(req)) {
753 result = PTR_ERR(req);
754 break;
755 }
756 nfs_lock_request(req);
757 req->wb_index = pos >> PAGE_SHIFT;
758 req->wb_offset = pos & ~PAGE_MASK;
759 if (!nfs_pageio_add_request(&desc, req)) {
760 result = desc.pg_error;
761 nfs_unlock_and_release_request(req);
762 break;
763 }
764 pgbase = 0;
765 bytes -= req_len;
766 requested_bytes += req_len;
767 pos += req_len;
768 dreq->bytes_left -= req_len;
769 }
770 nfs_direct_release_pages(pagevec, npages);
771 kvfree(pagevec);
772 if (result < 0)
886 break; 773 break;
887 pos += vec->iov_len;
888 } 774 }
889 nfs_pageio_complete(&desc); 775 nfs_pageio_complete(&desc);
890 776
@@ -985,7 +871,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
985 if (!is_sync_kiocb(iocb)) 871 if (!is_sync_kiocb(iocb))
986 dreq->iocb = iocb; 872 dreq->iocb = iocb;
987 873
988 result = nfs_direct_write_schedule_iovec(dreq, iter, pos, uio); 874 result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
989 875
990 if (mapping->nrpages) { 876 if (mapping->nrpages) {
991 invalidate_inode_pages2_range(mapping, 877 invalidate_inode_pages2_range(mapping,