summaryrefslogtreecommitdiffstats
path: root/fs/ceph/file.c
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2016-11-08 08:54:34 -0500
committerIlya Dryomov <idryomov@gmail.com>2016-12-12 17:54:27 -0500
commit7ce469a53e7106acdaca2e25027941d0f7c12a8e (patch)
tree81864fb541f3ba5eb6080823d961addc87053955 /fs/ceph/file.c
parent2b1ac852eb67a6e95595e576371d23519105559f (diff)
ceph: fix splice read for no Fc capability case
When iov_iter type is ITER_PIPE, copy_page_to_iter() increases the page's reference and add the page to a pipe_buffer. It also set the pipe_buffer's ops to page_cache_pipe_buf_ops. The comfirm callback in page_cache_pipe_buf_ops expects the page is from page cache and uptodate, otherwise it return error. For ceph_sync_read() case, pages are not from page cache. So we can't call copy_page_to_iter() when iov_iter type is ITER_PIPE. The fix is using iov_iter_get_pages_alloc() to allocate pages for the pipe. (the code is similar to default_file_splice_read) Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r--fs/ceph/file.c120
1 files changed, 66 insertions, 54 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ae3cec5724d6..12ce2b562d14 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -458,71 +458,60 @@ enum {
458 * only return a short read to the caller if we hit EOF. 458 * only return a short read to the caller if we hit EOF.
459 */ 459 */
460static int striped_read(struct inode *inode, 460static int striped_read(struct inode *inode,
461 u64 off, u64 len, 461 u64 pos, u64 len,
462 struct page **pages, int num_pages, 462 struct page **pages, int num_pages,
463 int *checkeof) 463 int page_align, int *checkeof)
464{ 464{
465 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 465 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
466 struct ceph_inode_info *ci = ceph_inode(inode); 466 struct ceph_inode_info *ci = ceph_inode(inode);
467 u64 pos, this_len, left; 467 u64 this_len;
468 loff_t i_size; 468 loff_t i_size;
469 int page_align, pages_left; 469 int page_idx;
470 int read, ret; 470 int ret, read = 0;
471 struct page **page_pos;
472 bool hit_stripe, was_short; 471 bool hit_stripe, was_short;
473 472
474 /* 473 /*
475 * we may need to do multiple reads. not atomic, unfortunately. 474 * we may need to do multiple reads. not atomic, unfortunately.
476 */ 475 */
477 pos = off;
478 left = len;
479 page_pos = pages;
480 pages_left = num_pages;
481 read = 0;
482
483more: 476more:
484 page_align = pos & ~PAGE_MASK; 477 this_len = len;
485 this_len = left; 478 page_idx = (page_align + read) >> PAGE_SHIFT;
486 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), 479 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
487 &ci->i_layout, pos, &this_len, 480 &ci->i_layout, pos, &this_len,
488 ci->i_truncate_seq, 481 ci->i_truncate_seq, ci->i_truncate_size,
489 ci->i_truncate_size, 482 pages + page_idx, num_pages - page_idx,
490 page_pos, pages_left, page_align); 483 ((page_align + read) & ~PAGE_MASK));
491 if (ret == -ENOENT) 484 if (ret == -ENOENT)
492 ret = 0; 485 ret = 0;
493 hit_stripe = this_len < left; 486 hit_stripe = this_len < len;
494 was_short = ret >= 0 && ret < this_len; 487 was_short = ret >= 0 && ret < this_len;
495 dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read, 488 dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, len, read,
496 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); 489 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
497 490
498 i_size = i_size_read(inode); 491 i_size = i_size_read(inode);
499 if (ret >= 0) { 492 if (ret >= 0) {
500 int didpages;
501 if (was_short && (pos + ret < i_size)) { 493 if (was_short && (pos + ret < i_size)) {
502 int zlen = min(this_len - ret, i_size - pos - ret); 494 int zlen = min(this_len - ret, i_size - pos - ret);
503 int zoff = (off & ~PAGE_MASK) + read + ret; 495 int zoff = page_align + read + ret;
504 dout(" zero gap %llu to %llu\n", 496 dout(" zero gap %llu to %llu\n",
505 pos + ret, pos + ret + zlen); 497 pos + ret, pos + ret + zlen);
506 ceph_zero_page_vector_range(zoff, zlen, pages); 498 ceph_zero_page_vector_range(zoff, zlen, pages);
507 ret += zlen; 499 ret += zlen;
508 } 500 }
509 501
510 didpages = (page_align + ret) >> PAGE_SHIFT; 502 read += ret;
511 pos += ret; 503 pos += ret;
512 read = pos - off; 504 len -= ret;
513 left -= ret;
514 page_pos += didpages;
515 pages_left -= didpages;
516 505
517 /* hit stripe and need continue*/ 506 /* hit stripe and need continue*/
518 if (left && hit_stripe && pos < i_size) 507 if (len && hit_stripe && pos < i_size)
519 goto more; 508 goto more;
520 } 509 }
521 510
522 if (read > 0) { 511 if (read > 0) {
523 ret = read; 512 ret = read;
524 /* did we bounce off eof? */ 513 /* did we bounce off eof? */
525 if (pos + left > i_size) 514 if (pos + len > i_size)
526 *checkeof = CHECK_EOF; 515 *checkeof = CHECK_EOF;
527 } 516 }
528 517
@@ -536,15 +525,16 @@ more:
536 * 525 *
537 * If the read spans object boundary, just do multiple reads. 526 * If the read spans object boundary, just do multiple reads.
538 */ 527 */
539static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, 528static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
540 int *checkeof) 529 int *checkeof)
541{ 530{
542 struct file *file = iocb->ki_filp; 531 struct file *file = iocb->ki_filp;
543 struct inode *inode = file_inode(file); 532 struct inode *inode = file_inode(file);
544 struct page **pages; 533 struct page **pages;
545 u64 off = iocb->ki_pos; 534 u64 off = iocb->ki_pos;
546 int num_pages, ret; 535 int num_pages;
547 size_t len = iov_iter_count(i); 536 ssize_t ret;
537 size_t len = iov_iter_count(to);
548 538
549 dout("sync_read on file %p %llu~%u %s\n", file, off, 539 dout("sync_read on file %p %llu~%u %s\n", file, off,
550 (unsigned)len, 540 (unsigned)len,
@@ -563,35 +553,56 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
563 if (ret < 0) 553 if (ret < 0)
564 return ret; 554 return ret;
565 555
566 num_pages = calc_pages_for(off, len); 556 if (unlikely(to->type & ITER_PIPE)) {
567 pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 557 size_t page_off;
568 if (IS_ERR(pages)) 558 ret = iov_iter_get_pages_alloc(to, &pages, len,
569 return PTR_ERR(pages); 559 &page_off);
570 ret = striped_read(inode, off, len, pages, 560 if (ret <= 0)
571 num_pages, checkeof); 561 return -ENOMEM;
572 if (ret > 0) { 562 num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE);
573 int l, k = 0; 563
574 size_t left = ret; 564 ret = striped_read(inode, off, ret, pages, num_pages,
575 565 page_off, checkeof);
576 while (left) { 566 if (ret > 0) {
577 size_t page_off = off & ~PAGE_MASK; 567 iov_iter_advance(to, ret);
578 size_t copy = min_t(size_t, left, 568 off += ret;
579 PAGE_SIZE - page_off); 569 } else {
580 l = copy_page_to_iter(pages[k++], page_off, copy, i); 570 iov_iter_advance(to, 0);
581 off += l; 571 }
582 left -= l; 572 ceph_put_page_vector(pages, num_pages, false);
583 if (l < copy) 573 } else {
584 break; 574 num_pages = calc_pages_for(off, len);
575 pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
576 if (IS_ERR(pages))
577 return PTR_ERR(pages);
578
579 ret = striped_read(inode, off, len, pages, num_pages,
580 (off & ~PAGE_MASK), checkeof);
581 if (ret > 0) {
582 int l, k = 0;
583 size_t left = ret;
584
585 while (left) {
586 size_t page_off = off & ~PAGE_MASK;
587 size_t copy = min_t(size_t, left,
588 PAGE_SIZE - page_off);
589 l = copy_page_to_iter(pages[k++], page_off,
590 copy, to);
591 off += l;
592 left -= l;
593 if (l < copy)
594 break;
595 }
585 } 596 }
597 ceph_release_page_vector(pages, num_pages);
586 } 598 }
587 ceph_release_page_vector(pages, num_pages);
588 599
589 if (off > iocb->ki_pos) { 600 if (off > iocb->ki_pos) {
590 ret = off - iocb->ki_pos; 601 ret = off - iocb->ki_pos;
591 iocb->ki_pos = off; 602 iocb->ki_pos = off;
592 } 603 }
593 604
594 dout("sync_read result %d\n", ret); 605 dout("sync_read result %zd\n", ret);
595 return ret; 606 return ret;
596} 607}
597 608
@@ -1771,6 +1782,7 @@ const struct file_operations ceph_file_fops = {
1771 .fsync = ceph_fsync, 1782 .fsync = ceph_fsync,
1772 .lock = ceph_lock, 1783 .lock = ceph_lock,
1773 .flock = ceph_flock, 1784 .flock = ceph_flock,
1785 .splice_read = generic_file_splice_read,
1774 .splice_write = iter_file_splice_write, 1786 .splice_write = iter_file_splice_write,
1775 .unlocked_ioctl = ceph_ioctl, 1787 .unlocked_ioctl = ceph_ioctl,
1776 .compat_ioctl = ceph_ioctl, 1788 .compat_ioctl = ceph_ioctl,