summaryrefslogtreecommitdiffstats
path: root/fs/ceph/file.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-16 14:23:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-16 14:23:34 -0500
commit59331c215daf600a650e281b6e8ef3e1ed1174c2 (patch)
treef62bf3253fe48d7dd8b09b040c8e7fd9b2c02a86 /fs/ceph/file.c
parentff0f962ca3c38239b299a70e7eea27abfbb979c3 (diff)
parent45ee2c1d66185e5bd27702c60cce3c43fa3370d2 (diff)
Merge tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "A varied set of changes: - a large rework of cephx auth code to cope with CONFIG_VMAP_STACK (myself). Also fixed a deadlock caused by a bogus allocation on the writeback path and authorize reply verification. - a fix for long stalls during fsync (Jeff Layton). The client now has a way to force the MDS log flush, leading to ~100x speedups in some synthetic tests. - a new [no]require_active_mds mount option (Zheng Yan). On mount, we will now check whether any of the MDSes are available and bail rather than block if none are. This check can be avoided by specifying the "no" option. - a couple of MDS cap handling fixes and a few assorted patches throughout" * tag 'ceph-for-4.10-rc1' of git://github.com/ceph/ceph-client: (32 commits) libceph: remove now unused finish_request() wrapper libceph: always signal completion when done ceph: avoid creating orphan object when checking pool permission ceph: properly set issue_seq for cap release ceph: add flags parameter to send_cap_msg ceph: update cap message struct version to 10 ceph: define new argument structure for send_cap_msg ceph: move xattr initialzation before the encoding past the ceph_mds_caps ceph: fix minor typo in unsafe_request_wait ceph: record truncate size/seq for snap data writeback ceph: check availability of mds cluster on mount ceph: fix splice read for no Fc capability case ceph: try getting buffer capability for readahead/fadvise ceph: fix scheduler warning due to nested blocking ceph: fix printing wrong return variable in ceph_direct_read_write() crush: include mapper.h in mapper.c rbd: silence bogus -Wmaybe-uninitialized warning libceph: no need to drop con->mutex for ->get_authorizer() libceph: drop len argument of *verify_authorizer_reply() libceph: verify authorize reply on connect ...
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r--fs/ceph/file.c127
1 files changed, 70 insertions, 57 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 159fc8f1a6a0..045d30d26624 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -454,71 +454,60 @@ enum {
454 * only return a short read to the caller if we hit EOF. 454 * only return a short read to the caller if we hit EOF.
455 */ 455 */
456static int striped_read(struct inode *inode, 456static int striped_read(struct inode *inode,
457 u64 off, u64 len, 457 u64 pos, u64 len,
458 struct page **pages, int num_pages, 458 struct page **pages, int num_pages,
459 int *checkeof) 459 int page_align, int *checkeof)
460{ 460{
461 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 461 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
462 struct ceph_inode_info *ci = ceph_inode(inode); 462 struct ceph_inode_info *ci = ceph_inode(inode);
463 u64 pos, this_len, left; 463 u64 this_len;
464 loff_t i_size; 464 loff_t i_size;
465 int page_align, pages_left; 465 int page_idx;
466 int read, ret; 466 int ret, read = 0;
467 struct page **page_pos;
468 bool hit_stripe, was_short; 467 bool hit_stripe, was_short;
469 468
470 /* 469 /*
471 * we may need to do multiple reads. not atomic, unfortunately. 470 * we may need to do multiple reads. not atomic, unfortunately.
472 */ 471 */
473 pos = off;
474 left = len;
475 page_pos = pages;
476 pages_left = num_pages;
477 read = 0;
478
479more: 472more:
480 page_align = pos & ~PAGE_MASK; 473 this_len = len;
481 this_len = left; 474 page_idx = (page_align + read) >> PAGE_SHIFT;
482 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), 475 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
483 &ci->i_layout, pos, &this_len, 476 &ci->i_layout, pos, &this_len,
484 ci->i_truncate_seq, 477 ci->i_truncate_seq, ci->i_truncate_size,
485 ci->i_truncate_size, 478 pages + page_idx, num_pages - page_idx,
486 page_pos, pages_left, page_align); 479 ((page_align + read) & ~PAGE_MASK));
487 if (ret == -ENOENT) 480 if (ret == -ENOENT)
488 ret = 0; 481 ret = 0;
489 hit_stripe = this_len < left; 482 hit_stripe = this_len < len;
490 was_short = ret >= 0 && ret < this_len; 483 was_short = ret >= 0 && ret < this_len;
491 dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read, 484 dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, len, read,
492 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); 485 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
493 486
494 i_size = i_size_read(inode); 487 i_size = i_size_read(inode);
495 if (ret >= 0) { 488 if (ret >= 0) {
496 int didpages;
497 if (was_short && (pos + ret < i_size)) { 489 if (was_short && (pos + ret < i_size)) {
498 int zlen = min(this_len - ret, i_size - pos - ret); 490 int zlen = min(this_len - ret, i_size - pos - ret);
499 int zoff = (off & ~PAGE_MASK) + read + ret; 491 int zoff = page_align + read + ret;
500 dout(" zero gap %llu to %llu\n", 492 dout(" zero gap %llu to %llu\n",
501 pos + ret, pos + ret + zlen); 493 pos + ret, pos + ret + zlen);
502 ceph_zero_page_vector_range(zoff, zlen, pages); 494 ceph_zero_page_vector_range(zoff, zlen, pages);
503 ret += zlen; 495 ret += zlen;
504 } 496 }
505 497
506 didpages = (page_align + ret) >> PAGE_SHIFT; 498 read += ret;
507 pos += ret; 499 pos += ret;
508 read = pos - off; 500 len -= ret;
509 left -= ret;
510 page_pos += didpages;
511 pages_left -= didpages;
512 501
513 /* hit stripe and need continue*/ 502 /* hit stripe and need continue*/
514 if (left && hit_stripe && pos < i_size) 503 if (len && hit_stripe && pos < i_size)
515 goto more; 504 goto more;
516 } 505 }
517 506
518 if (read > 0) { 507 if (read > 0) {
519 ret = read; 508 ret = read;
520 /* did we bounce off eof? */ 509 /* did we bounce off eof? */
521 if (pos + left > i_size) 510 if (pos + len > i_size)
522 *checkeof = CHECK_EOF; 511 *checkeof = CHECK_EOF;
523 } 512 }
524 513
@@ -532,15 +521,16 @@ more:
532 * 521 *
533 * If the read spans object boundary, just do multiple reads. 522 * If the read spans object boundary, just do multiple reads.
534 */ 523 */
535static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, 524static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
536 int *checkeof) 525 int *checkeof)
537{ 526{
538 struct file *file = iocb->ki_filp; 527 struct file *file = iocb->ki_filp;
539 struct inode *inode = file_inode(file); 528 struct inode *inode = file_inode(file);
540 struct page **pages; 529 struct page **pages;
541 u64 off = iocb->ki_pos; 530 u64 off = iocb->ki_pos;
542 int num_pages, ret; 531 int num_pages;
543 size_t len = iov_iter_count(i); 532 ssize_t ret;
533 size_t len = iov_iter_count(to);
544 534
545 dout("sync_read on file %p %llu~%u %s\n", file, off, 535 dout("sync_read on file %p %llu~%u %s\n", file, off,
546 (unsigned)len, 536 (unsigned)len,
@@ -559,35 +549,56 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
559 if (ret < 0) 549 if (ret < 0)
560 return ret; 550 return ret;
561 551
562 num_pages = calc_pages_for(off, len); 552 if (unlikely(to->type & ITER_PIPE)) {
563 pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 553 size_t page_off;
564 if (IS_ERR(pages)) 554 ret = iov_iter_get_pages_alloc(to, &pages, len,
565 return PTR_ERR(pages); 555 &page_off);
566 ret = striped_read(inode, off, len, pages, 556 if (ret <= 0)
567 num_pages, checkeof); 557 return -ENOMEM;
568 if (ret > 0) { 558 num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE);
569 int l, k = 0; 559
570 size_t left = ret; 560 ret = striped_read(inode, off, ret, pages, num_pages,
571 561 page_off, checkeof);
572 while (left) { 562 if (ret > 0) {
573 size_t page_off = off & ~PAGE_MASK; 563 iov_iter_advance(to, ret);
574 size_t copy = min_t(size_t, left, 564 off += ret;
575 PAGE_SIZE - page_off); 565 } else {
576 l = copy_page_to_iter(pages[k++], page_off, copy, i); 566 iov_iter_advance(to, 0);
577 off += l; 567 }
578 left -= l; 568 ceph_put_page_vector(pages, num_pages, false);
579 if (l < copy) 569 } else {
580 break; 570 num_pages = calc_pages_for(off, len);
571 pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
572 if (IS_ERR(pages))
573 return PTR_ERR(pages);
574
575 ret = striped_read(inode, off, len, pages, num_pages,
576 (off & ~PAGE_MASK), checkeof);
577 if (ret > 0) {
578 int l, k = 0;
579 size_t left = ret;
580
581 while (left) {
582 size_t page_off = off & ~PAGE_MASK;
583 size_t copy = min_t(size_t, left,
584 PAGE_SIZE - page_off);
585 l = copy_page_to_iter(pages[k++], page_off,
586 copy, to);
587 off += l;
588 left -= l;
589 if (l < copy)
590 break;
591 }
581 } 592 }
593 ceph_release_page_vector(pages, num_pages);
582 } 594 }
583 ceph_release_page_vector(pages, num_pages);
584 595
585 if (off > iocb->ki_pos) { 596 if (off > iocb->ki_pos) {
586 ret = off - iocb->ki_pos; 597 ret = off - iocb->ki_pos;
587 iocb->ki_pos = off; 598 iocb->ki_pos = off;
588 } 599 }
589 600
590 dout("sync_read result %d\n", ret); 601 dout("sync_read result %zd\n", ret);
591 return ret; 602 return ret;
592} 603}
593 604
@@ -849,7 +860,7 @@ void ceph_sync_write_wait(struct inode *inode)
849 860
850 dout("sync_write_wait on tid %llu (until %llu)\n", 861 dout("sync_write_wait on tid %llu (until %llu)\n",
851 req->r_tid, last_tid); 862 req->r_tid, last_tid);
852 wait_for_completion(&req->r_safe_completion); 863 wait_for_completion(&req->r_done_completion);
853 ceph_osdc_put_request(req); 864 ceph_osdc_put_request(req);
854 865
855 spin_lock(&ci->i_unsafe_lock); 866 spin_lock(&ci->i_unsafe_lock);
@@ -902,7 +913,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
902 pos >> PAGE_SHIFT, 913 pos >> PAGE_SHIFT,
903 (pos + count) >> PAGE_SHIFT); 914 (pos + count) >> PAGE_SHIFT);
904 if (ret2 < 0) 915 if (ret2 < 0)
905 dout("invalidate_inode_pages2_range returned %d\n", ret); 916 dout("invalidate_inode_pages2_range returned %d\n", ret2);
906 917
907 flags = CEPH_OSD_FLAG_ORDERSNAP | 918 flags = CEPH_OSD_FLAG_ORDERSNAP |
908 CEPH_OSD_FLAG_ONDISK | 919 CEPH_OSD_FLAG_ONDISK |
@@ -1245,8 +1256,9 @@ again:
1245 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", 1256 dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
1246 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, 1257 inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
1247 ceph_cap_string(got)); 1258 ceph_cap_string(got));
1248 1259 current->journal_info = filp;
1249 ret = generic_file_read_iter(iocb, to); 1260 ret = generic_file_read_iter(iocb, to);
1261 current->journal_info = NULL;
1250 } 1262 }
1251 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 1263 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
1252 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); 1264 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
@@ -1766,6 +1778,7 @@ const struct file_operations ceph_file_fops = {
1766 .fsync = ceph_fsync, 1778 .fsync = ceph_fsync,
1767 .lock = ceph_lock, 1779 .lock = ceph_lock,
1768 .flock = ceph_flock, 1780 .flock = ceph_flock,
1781 .splice_read = generic_file_splice_read,
1769 .splice_write = iter_file_splice_write, 1782 .splice_write = iter_file_splice_write,
1770 .unlocked_ioctl = ceph_ioctl, 1783 .unlocked_ioctl = ceph_ioctl,
1771 .compat_ioctl = ceph_ioctl, 1784 .compat_ioctl = ceph_ioctl,