diff options
author | Yan, Zheng <zyan@redhat.com> | 2016-11-08 08:54:34 -0500 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2016-12-12 17:54:27 -0500 |
commit | 7ce469a53e7106acdaca2e25027941d0f7c12a8e (patch) | |
tree | 81864fb541f3ba5eb6080823d961addc87053955 /fs/ceph/file.c | |
parent | 2b1ac852eb67a6e95595e576371d23519105559f (diff) |
ceph: fix splice read for no Fc capability case
When iov_iter type is ITER_PIPE, copy_page_to_iter() increases
the page's reference and add the page to a pipe_buffer. It also
set the pipe_buffer's ops to page_cache_pipe_buf_ops. The comfirm
callback in page_cache_pipe_buf_ops expects the page is from page
cache and uptodate, otherwise it return error.
For ceph_sync_read() case, pages are not from page cache. So we
can't call copy_page_to_iter() when iov_iter type is ITER_PIPE.
The fix is using iov_iter_get_pages_alloc() to allocate pages
for the pipe. (the code is similar to default_file_splice_read)
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r-- | fs/ceph/file.c | 120 |
1 files changed, 66 insertions, 54 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ae3cec5724d6..12ce2b562d14 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -458,71 +458,60 @@ enum { | |||
458 | * only return a short read to the caller if we hit EOF. | 458 | * only return a short read to the caller if we hit EOF. |
459 | */ | 459 | */ |
460 | static int striped_read(struct inode *inode, | 460 | static int striped_read(struct inode *inode, |
461 | u64 off, u64 len, | 461 | u64 pos, u64 len, |
462 | struct page **pages, int num_pages, | 462 | struct page **pages, int num_pages, |
463 | int *checkeof) | 463 | int page_align, int *checkeof) |
464 | { | 464 | { |
465 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 465 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
466 | struct ceph_inode_info *ci = ceph_inode(inode); | 466 | struct ceph_inode_info *ci = ceph_inode(inode); |
467 | u64 pos, this_len, left; | 467 | u64 this_len; |
468 | loff_t i_size; | 468 | loff_t i_size; |
469 | int page_align, pages_left; | 469 | int page_idx; |
470 | int read, ret; | 470 | int ret, read = 0; |
471 | struct page **page_pos; | ||
472 | bool hit_stripe, was_short; | 471 | bool hit_stripe, was_short; |
473 | 472 | ||
474 | /* | 473 | /* |
475 | * we may need to do multiple reads. not atomic, unfortunately. | 474 | * we may need to do multiple reads. not atomic, unfortunately. |
476 | */ | 475 | */ |
477 | pos = off; | ||
478 | left = len; | ||
479 | page_pos = pages; | ||
480 | pages_left = num_pages; | ||
481 | read = 0; | ||
482 | |||
483 | more: | 476 | more: |
484 | page_align = pos & ~PAGE_MASK; | 477 | this_len = len; |
485 | this_len = left; | 478 | page_idx = (page_align + read) >> PAGE_SHIFT; |
486 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), | 479 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
487 | &ci->i_layout, pos, &this_len, | 480 | &ci->i_layout, pos, &this_len, |
488 | ci->i_truncate_seq, | 481 | ci->i_truncate_seq, ci->i_truncate_size, |
489 | ci->i_truncate_size, | 482 | pages + page_idx, num_pages - page_idx, |
490 | page_pos, pages_left, page_align); | 483 | ((page_align + read) & ~PAGE_MASK)); |
491 | if (ret == -ENOENT) | 484 | if (ret == -ENOENT) |
492 | ret = 0; | 485 | ret = 0; |
493 | hit_stripe = this_len < left; | 486 | hit_stripe = this_len < len; |
494 | was_short = ret >= 0 && ret < this_len; | 487 | was_short = ret >= 0 && ret < this_len; |
495 | dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read, | 488 | dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, len, read, |
496 | ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); | 489 | ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); |
497 | 490 | ||
498 | i_size = i_size_read(inode); | 491 | i_size = i_size_read(inode); |
499 | if (ret >= 0) { | 492 | if (ret >= 0) { |
500 | int didpages; | ||
501 | if (was_short && (pos + ret < i_size)) { | 493 | if (was_short && (pos + ret < i_size)) { |
502 | int zlen = min(this_len - ret, i_size - pos - ret); | 494 | int zlen = min(this_len - ret, i_size - pos - ret); |
503 | int zoff = (off & ~PAGE_MASK) + read + ret; | 495 | int zoff = page_align + read + ret; |
504 | dout(" zero gap %llu to %llu\n", | 496 | dout(" zero gap %llu to %llu\n", |
505 | pos + ret, pos + ret + zlen); | 497 | pos + ret, pos + ret + zlen); |
506 | ceph_zero_page_vector_range(zoff, zlen, pages); | 498 | ceph_zero_page_vector_range(zoff, zlen, pages); |
507 | ret += zlen; | 499 | ret += zlen; |
508 | } | 500 | } |
509 | 501 | ||
510 | didpages = (page_align + ret) >> PAGE_SHIFT; | 502 | read += ret; |
511 | pos += ret; | 503 | pos += ret; |
512 | read = pos - off; | 504 | len -= ret; |
513 | left -= ret; | ||
514 | page_pos += didpages; | ||
515 | pages_left -= didpages; | ||
516 | 505 | ||
517 | /* hit stripe and need continue*/ | 506 | /* hit stripe and need continue*/ |
518 | if (left && hit_stripe && pos < i_size) | 507 | if (len && hit_stripe && pos < i_size) |
519 | goto more; | 508 | goto more; |
520 | } | 509 | } |
521 | 510 | ||
522 | if (read > 0) { | 511 | if (read > 0) { |
523 | ret = read; | 512 | ret = read; |
524 | /* did we bounce off eof? */ | 513 | /* did we bounce off eof? */ |
525 | if (pos + left > i_size) | 514 | if (pos + len > i_size) |
526 | *checkeof = CHECK_EOF; | 515 | *checkeof = CHECK_EOF; |
527 | } | 516 | } |
528 | 517 | ||
@@ -536,15 +525,16 @@ more: | |||
536 | * | 525 | * |
537 | * If the read spans object boundary, just do multiple reads. | 526 | * If the read spans object boundary, just do multiple reads. |
538 | */ | 527 | */ |
539 | static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, | 528 | static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, |
540 | int *checkeof) | 529 | int *checkeof) |
541 | { | 530 | { |
542 | struct file *file = iocb->ki_filp; | 531 | struct file *file = iocb->ki_filp; |
543 | struct inode *inode = file_inode(file); | 532 | struct inode *inode = file_inode(file); |
544 | struct page **pages; | 533 | struct page **pages; |
545 | u64 off = iocb->ki_pos; | 534 | u64 off = iocb->ki_pos; |
546 | int num_pages, ret; | 535 | int num_pages; |
547 | size_t len = iov_iter_count(i); | 536 | ssize_t ret; |
537 | size_t len = iov_iter_count(to); | ||
548 | 538 | ||
549 | dout("sync_read on file %p %llu~%u %s\n", file, off, | 539 | dout("sync_read on file %p %llu~%u %s\n", file, off, |
550 | (unsigned)len, | 540 | (unsigned)len, |
@@ -563,35 +553,56 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, | |||
563 | if (ret < 0) | 553 | if (ret < 0) |
564 | return ret; | 554 | return ret; |
565 | 555 | ||
566 | num_pages = calc_pages_for(off, len); | 556 | if (unlikely(to->type & ITER_PIPE)) { |
567 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); | 557 | size_t page_off; |
568 | if (IS_ERR(pages)) | 558 | ret = iov_iter_get_pages_alloc(to, &pages, len, |
569 | return PTR_ERR(pages); | 559 | &page_off); |
570 | ret = striped_read(inode, off, len, pages, | 560 | if (ret <= 0) |
571 | num_pages, checkeof); | 561 | return -ENOMEM; |
572 | if (ret > 0) { | 562 | num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE); |
573 | int l, k = 0; | 563 | |
574 | size_t left = ret; | 564 | ret = striped_read(inode, off, ret, pages, num_pages, |
575 | 565 | page_off, checkeof); | |
576 | while (left) { | 566 | if (ret > 0) { |
577 | size_t page_off = off & ~PAGE_MASK; | 567 | iov_iter_advance(to, ret); |
578 | size_t copy = min_t(size_t, left, | 568 | off += ret; |
579 | PAGE_SIZE - page_off); | 569 | } else { |
580 | l = copy_page_to_iter(pages[k++], page_off, copy, i); | 570 | iov_iter_advance(to, 0); |
581 | off += l; | 571 | } |
582 | left -= l; | 572 | ceph_put_page_vector(pages, num_pages, false); |
583 | if (l < copy) | 573 | } else { |
584 | break; | 574 | num_pages = calc_pages_for(off, len); |
575 | pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); | ||
576 | if (IS_ERR(pages)) | ||
577 | return PTR_ERR(pages); | ||
578 | |||
579 | ret = striped_read(inode, off, len, pages, num_pages, | ||
580 | (off & ~PAGE_MASK), checkeof); | ||
581 | if (ret > 0) { | ||
582 | int l, k = 0; | ||
583 | size_t left = ret; | ||
584 | |||
585 | while (left) { | ||
586 | size_t page_off = off & ~PAGE_MASK; | ||
587 | size_t copy = min_t(size_t, left, | ||
588 | PAGE_SIZE - page_off); | ||
589 | l = copy_page_to_iter(pages[k++], page_off, | ||
590 | copy, to); | ||
591 | off += l; | ||
592 | left -= l; | ||
593 | if (l < copy) | ||
594 | break; | ||
595 | } | ||
585 | } | 596 | } |
597 | ceph_release_page_vector(pages, num_pages); | ||
586 | } | 598 | } |
587 | ceph_release_page_vector(pages, num_pages); | ||
588 | 599 | ||
589 | if (off > iocb->ki_pos) { | 600 | if (off > iocb->ki_pos) { |
590 | ret = off - iocb->ki_pos; | 601 | ret = off - iocb->ki_pos; |
591 | iocb->ki_pos = off; | 602 | iocb->ki_pos = off; |
592 | } | 603 | } |
593 | 604 | ||
594 | dout("sync_read result %d\n", ret); | 605 | dout("sync_read result %zd\n", ret); |
595 | return ret; | 606 | return ret; |
596 | } | 607 | } |
597 | 608 | ||
@@ -1771,6 +1782,7 @@ const struct file_operations ceph_file_fops = { | |||
1771 | .fsync = ceph_fsync, | 1782 | .fsync = ceph_fsync, |
1772 | .lock = ceph_lock, | 1783 | .lock = ceph_lock, |
1773 | .flock = ceph_flock, | 1784 | .flock = ceph_flock, |
1785 | .splice_read = generic_file_splice_read, | ||
1774 | .splice_write = iter_file_splice_write, | 1786 | .splice_write = iter_file_splice_write, |
1775 | .unlocked_ioctl = ceph_ioctl, | 1787 | .unlocked_ioctl = ceph_ioctl, |
1776 | .compat_ioctl = ceph_ioctl, | 1788 | .compat_ioctl = ceph_ioctl, |