summaryrefslogtreecommitdiffstats
path: root/fs/ceph/addr.c
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2017-08-29 23:36:06 -0400
committerIlya Dryomov <idryomov@gmail.com>2017-09-06 13:56:55 -0400
commit1f934b00e907527cddb83984d0783cc4a029952a (patch)
tree3e1f89e89c990448a8dd84d7d00e743d84902679 /fs/ceph/addr.c
parentb072d774664b690768bdf7e068ee95a161e5f107 (diff)
ceph: properly get capsnap's size in get_oldest_context()
capsnap's size is set by __ceph_finish_cap_snap(). If capsnap is under writing, its size is zero. In this case, get_oldest_context() should read i_size. Besides, ceph_writepages_start() should re-check capsnap's size after dirty pages get locked. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r--fs/ceph/addr.c137
1 files changed, 80 insertions, 57 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b6ac3da9ddab..03a1ee27b33c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -463,14 +463,20 @@ out:
463 return rc; 463 return rc;
464} 464}
465 465
466struct ceph_writeback_ctl
467{
468 loff_t i_size;
469 u64 truncate_size;
470 u32 truncate_seq;
471 bool size_stable;
472};
473
466/* 474/*
467 * Get ref for the oldest snapc for an inode with dirty data... that is, the 475 * Get ref for the oldest snapc for an inode with dirty data... that is, the
468 * only snap context we are allowed to write back. 476 * only snap context we are allowed to write back.
469 */ 477 */
470static struct ceph_snap_context *get_oldest_context(struct inode *inode, 478static struct ceph_snap_context *
471 loff_t *snap_size, 479get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl)
472 u64 *truncate_size,
473 u32 *truncate_seq)
474{ 480{
475 struct ceph_inode_info *ci = ceph_inode(inode); 481 struct ceph_inode_info *ci = ceph_inode(inode);
476 struct ceph_snap_context *snapc = NULL; 482 struct ceph_snap_context *snapc = NULL;
@@ -482,12 +488,17 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
482 capsnap->context, capsnap->dirty_pages); 488 capsnap->context, capsnap->dirty_pages);
483 if (capsnap->dirty_pages) { 489 if (capsnap->dirty_pages) {
484 snapc = ceph_get_snap_context(capsnap->context); 490 snapc = ceph_get_snap_context(capsnap->context);
485 if (snap_size) 491 if (ctl) {
486 *snap_size = capsnap->size; 492 if (capsnap->writing) {
487 if (truncate_size) 493 ctl->i_size = i_size_read(inode);
488 *truncate_size = capsnap->truncate_size; 494 ctl->size_stable = false;
489 if (truncate_seq) 495 } else {
490 *truncate_seq = capsnap->truncate_seq; 496 ctl->i_size = capsnap->size;
497 ctl->size_stable = true;
498 }
499 ctl->truncate_size = capsnap->truncate_size;
500 ctl->truncate_seq = capsnap->truncate_seq;
501 }
491 break; 502 break;
492 } 503 }
493 } 504 }
@@ -495,15 +506,44 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
495 snapc = ceph_get_snap_context(ci->i_head_snapc); 506 snapc = ceph_get_snap_context(ci->i_head_snapc);
496 dout(" head snapc %p has %d dirty pages\n", 507 dout(" head snapc %p has %d dirty pages\n",
497 snapc, ci->i_wrbuffer_ref_head); 508 snapc, ci->i_wrbuffer_ref_head);
498 if (truncate_size) 509 if (ctl) {
499 *truncate_size = ci->i_truncate_size; 510 ctl->i_size = i_size_read(inode);
500 if (truncate_seq) 511 ctl->truncate_size = ci->i_truncate_size;
501 *truncate_seq = ci->i_truncate_seq; 512 ctl->truncate_seq = ci->i_truncate_seq;
513 ctl->size_stable = false;
514 }
502 } 515 }
503 spin_unlock(&ci->i_ceph_lock); 516 spin_unlock(&ci->i_ceph_lock);
504 return snapc; 517 return snapc;
505} 518}
506 519
520static u64 get_writepages_data_length(struct inode *inode,
521 struct page *page, u64 start)
522{
523 struct ceph_inode_info *ci = ceph_inode(inode);
524 struct ceph_snap_context *snapc = page_snap_context(page);
525 struct ceph_cap_snap *capsnap = NULL;
526 u64 end = i_size_read(inode);
527
528 if (snapc != ci->i_head_snapc) {
529 bool found = false;
530 spin_lock(&ci->i_ceph_lock);
531 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
532 if (capsnap->context == snapc) {
533 if (!capsnap->writing)
534 end = capsnap->size;
535 found = true;
536 break;
537 }
538 }
539 spin_unlock(&ci->i_ceph_lock);
540 WARN_ON(!found);
541 }
542 if (end > page_offset(page) + PAGE_SIZE)
543 end = page_offset(page) + PAGE_SIZE;
544 return end > start ? end - start : 0;
545}
546
507/* 547/*
508 * Write a single page, but leave the page locked. 548 * Write a single page, but leave the page locked.
509 * 549 *
@@ -515,21 +555,17 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
515 struct inode *inode; 555 struct inode *inode;
516 struct ceph_inode_info *ci; 556 struct ceph_inode_info *ci;
517 struct ceph_fs_client *fsc; 557 struct ceph_fs_client *fsc;
518 struct ceph_osd_client *osdc;
519 struct ceph_snap_context *snapc, *oldest; 558 struct ceph_snap_context *snapc, *oldest;
520 loff_t page_off = page_offset(page); 559 loff_t page_off = page_offset(page);
521 loff_t snap_size = -1;
522 long writeback_stat; 560 long writeback_stat;
523 u64 truncate_size;
524 u32 truncate_seq;
525 int err, len = PAGE_SIZE; 561 int err, len = PAGE_SIZE;
562 struct ceph_writeback_ctl ceph_wbc;
526 563
527 dout("writepage %p idx %lu\n", page, page->index); 564 dout("writepage %p idx %lu\n", page, page->index);
528 565
529 inode = page->mapping->host; 566 inode = page->mapping->host;
530 ci = ceph_inode(inode); 567 ci = ceph_inode(inode);
531 fsc = ceph_inode_to_client(inode); 568 fsc = ceph_inode_to_client(inode);
532 osdc = &fsc->client->osdc;
533 569
534 /* verify this is a writeable snap context */ 570 /* verify this is a writeable snap context */
535 snapc = page_snap_context(page); 571 snapc = page_snap_context(page);
@@ -537,8 +573,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
537 dout("writepage %p page %p not dirty?\n", inode, page); 573 dout("writepage %p page %p not dirty?\n", inode, page);
538 return 0; 574 return 0;
539 } 575 }
540 oldest = get_oldest_context(inode, &snap_size, 576 oldest = get_oldest_context(inode, &ceph_wbc);
541 &truncate_size, &truncate_seq);
542 if (snapc->seq > oldest->seq) { 577 if (snapc->seq > oldest->seq) {
543 dout("writepage %p page %p snapc %p not writeable - noop\n", 578 dout("writepage %p page %p snapc %p not writeable - noop\n",
544 inode, page, snapc); 579 inode, page, snapc);
@@ -550,17 +585,14 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
550 } 585 }
551 ceph_put_snap_context(oldest); 586 ceph_put_snap_context(oldest);
552 587
553 if (snap_size == -1)
554 snap_size = i_size_read(inode);
555
556 /* is this a partial page at end of file? */ 588 /* is this a partial page at end of file? */
557 if (page_off >= snap_size) { 589 if (page_off >= ceph_wbc.i_size) {
558 dout("%p page eof %llu\n", page, snap_size); 590 dout("%p page eof %llu\n", page, ceph_wbc.i_size);
559 return 0; 591 return 0;
560 } 592 }
561 593
562 if (snap_size < page_off + len) 594 if (ceph_wbc.i_size < page_off + len)
563 len = snap_size - page_off; 595 len = ceph_wbc.i_size - page_off;
564 596
565 dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n", 597 dout("writepage %p page %p index %lu on %llu~%u snapc %p seq %lld\n",
566 inode, page, page->index, page_off, len, snapc, snapc->seq); 598 inode, page, page->index, page_off, len, snapc, snapc->seq);
@@ -571,10 +603,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
571 set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); 603 set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
572 604
573 set_page_writeback(page); 605 set_page_writeback(page);
574 err = ceph_osdc_writepages(osdc, ceph_vino(inode), 606 err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode),
575 &ci->i_layout, snapc, 607 &ci->i_layout, snapc, page_off, len,
576 page_off, len, 608 ceph_wbc.truncate_seq,
577 truncate_seq, truncate_size, 609 ceph_wbc.truncate_size,
578 &inode->i_mtime, &page, 1); 610 &inode->i_mtime, &page, 1);
579 if (err < 0) { 611 if (err < 0) {
580 struct writeback_control tmp_wbc; 612 struct writeback_control tmp_wbc;
@@ -745,9 +777,7 @@ static int ceph_writepages_start(struct address_space *mapping,
745 int rc = 0; 777 int rc = 0;
746 unsigned int wsize = i_blocksize(inode); 778 unsigned int wsize = i_blocksize(inode);
747 struct ceph_osd_request *req = NULL; 779 struct ceph_osd_request *req = NULL;
748 loff_t snap_size, i_size; 780 struct ceph_writeback_ctl ceph_wbc;
749 u64 truncate_size;
750 u32 truncate_seq;
751 781
752 dout("writepages_start %p (mode=%s)\n", inode, 782 dout("writepages_start %p (mode=%s)\n", inode,
753 wbc->sync_mode == WB_SYNC_NONE ? "NONE" : 783 wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
@@ -786,9 +816,7 @@ static int ceph_writepages_start(struct address_space *mapping,
786retry: 816retry:
787 /* find oldest snap context with dirty data */ 817 /* find oldest snap context with dirty data */
788 ceph_put_snap_context(snapc); 818 ceph_put_snap_context(snapc);
789 snap_size = -1; 819 snapc = get_oldest_context(inode, &ceph_wbc);
790 snapc = get_oldest_context(inode, &snap_size,
791 &truncate_size, &truncate_seq);
792 if (!snapc) { 820 if (!snapc) {
793 /* hmm, why does writepages get called when there 821 /* hmm, why does writepages get called when there
794 is no dirty data? */ 822 is no dirty data? */
@@ -798,8 +826,6 @@ retry:
798 dout(" oldest snapc is %p seq %lld (%d snaps)\n", 826 dout(" oldest snapc is %p seq %lld (%d snaps)\n",
799 snapc, snapc->seq, snapc->num_snaps); 827 snapc, snapc->seq, snapc->num_snaps);
800 828
801 i_size = i_size_read(inode);
802
803 if (last_snapc && snapc != last_snapc) { 829 if (last_snapc && snapc != last_snapc) {
804 /* if we switched to a newer snapc, restart our scan at the 830 /* if we switched to a newer snapc, restart our scan at the
805 * start of the original file range. */ 831 * start of the original file range. */
@@ -865,10 +891,9 @@ get_more_pages:
865 dout("waiting on writeback %p\n", page); 891 dout("waiting on writeback %p\n", page);
866 wait_on_page_writeback(page); 892 wait_on_page_writeback(page);
867 } 893 }
868 if (page_offset(page) >= 894 if (page_offset(page) >= ceph_wbc.i_size) {
869 (snap_size == -1 ? i_size : snap_size)) { 895 dout("%p page eof %llu\n",
870 dout("%p page eof %llu\n", page, 896 page, ceph_wbc.i_size);
871 (snap_size == -1 ? i_size : snap_size));
872 done = 1; 897 done = 1;
873 unlock_page(page); 898 unlock_page(page);
874 break; 899 break;
@@ -996,10 +1021,9 @@ new_request:
996 req = ceph_osdc_new_request(&fsc->client->osdc, 1021 req = ceph_osdc_new_request(&fsc->client->osdc,
997 &ci->i_layout, vino, 1022 &ci->i_layout, vino,
998 offset, &len, 0, num_ops, 1023 offset, &len, 0, num_ops,
999 CEPH_OSD_OP_WRITE, 1024 CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
1000 CEPH_OSD_FLAG_WRITE, 1025 snapc, ceph_wbc.truncate_seq,
1001 snapc, truncate_seq, 1026 ceph_wbc.truncate_size, false);
1002 truncate_size, false);
1003 if (IS_ERR(req)) { 1027 if (IS_ERR(req)) {
1004 req = ceph_osdc_new_request(&fsc->client->osdc, 1028 req = ceph_osdc_new_request(&fsc->client->osdc,
1005 &ci->i_layout, vino, 1029 &ci->i_layout, vino,
@@ -1008,8 +1032,8 @@ new_request:
1008 CEPH_OSD_SLAB_OPS), 1032 CEPH_OSD_SLAB_OPS),
1009 CEPH_OSD_OP_WRITE, 1033 CEPH_OSD_OP_WRITE,
1010 CEPH_OSD_FLAG_WRITE, 1034 CEPH_OSD_FLAG_WRITE,
1011 snapc, truncate_seq, 1035 snapc, ceph_wbc.truncate_seq,
1012 truncate_size, true); 1036 ceph_wbc.truncate_size, true);
1013 BUG_ON(IS_ERR(req)); 1037 BUG_ON(IS_ERR(req));
1014 } 1038 }
1015 BUG_ON(len < page_offset(pages[locked_pages - 1]) + 1039 BUG_ON(len < page_offset(pages[locked_pages - 1]) +
@@ -1046,14 +1070,15 @@ new_request:
1046 len += PAGE_SIZE; 1070 len += PAGE_SIZE;
1047 } 1071 }
1048 1072
1049 if (snap_size != -1) { 1073 if (ceph_wbc.size_stable) {
1050 len = min(len, snap_size - offset); 1074 len = min(len, ceph_wbc.i_size - offset);
1051 } else if (i == locked_pages) { 1075 } else if (i == locked_pages) {
1052 /* writepages_finish() clears writeback pages 1076 /* writepages_finish() clears writeback pages
1053 * according to the data length, so make sure 1077 * according to the data length, so make sure
1054 * data length covers all locked pages */ 1078 * data length covers all locked pages */
1055 u64 min_len = len + 1 - PAGE_SIZE; 1079 u64 min_len = len + 1 - PAGE_SIZE;
1056 len = min(len, (u64)i_size_read(inode) - offset); 1080 len = get_writepages_data_length(inode, pages[i - 1],
1081 offset);
1057 len = max(len, min_len); 1082 len = max(len, min_len);
1058 } 1083 }
1059 dout("writepages got pages at %llu~%llu\n", offset, len); 1084 dout("writepages got pages at %llu~%llu\n", offset, len);
@@ -1137,8 +1162,7 @@ out:
1137static int context_is_writeable_or_written(struct inode *inode, 1162static int context_is_writeable_or_written(struct inode *inode,
1138 struct ceph_snap_context *snapc) 1163 struct ceph_snap_context *snapc)
1139{ 1164{
1140 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL, 1165 struct ceph_snap_context *oldest = get_oldest_context(inode, NULL);
1141 NULL, NULL);
1142 int ret = !oldest || snapc->seq <= oldest->seq; 1166 int ret = !oldest || snapc->seq <= oldest->seq;
1143 1167
1144 ceph_put_snap_context(oldest); 1168 ceph_put_snap_context(oldest);
@@ -1183,8 +1207,7 @@ retry_locked:
1183 * this page is already dirty in another (older) snap 1207 * this page is already dirty in another (older) snap
1184 * context! is it writeable now? 1208 * context! is it writeable now?
1185 */ 1209 */
1186 oldest = get_oldest_context(inode, NULL, NULL, NULL); 1210 oldest = get_oldest_context(inode, NULL);
1187
1188 if (snapc->seq > oldest->seq) { 1211 if (snapc->seq > oldest->seq) {
1189 ceph_put_snap_context(oldest); 1212 ceph_put_snap_context(oldest);
1190 dout(" page %p snapc %p not current or oldest\n", 1213 dout(" page %p snapc %p not current or oldest\n",