diff options
author | Yan, Zheng <zheng.z.yan@intel.com> | 2013-02-28 21:55:39 -0500 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-05-02 00:16:10 -0400 |
commit | 6070e0c1e2b515ad5edc2f8224031b051bd08109 (patch) | |
tree | 2a9bb6aea63854927ce03a2ecee8ccaa6b914008 | |
parent | 7971bd92baf729fcebe04d7330ac22dc668d0261 (diff) |
ceph: don't early drop Fw cap
ceph_aio_write() has an optimization that marks CEPH_CAP_FILE_WR
cap dirty before data is copied to page cache and inode size is
updated. The optimization avoids slow cap revocation caused by
balance_dirty_pages(), but introduces inode size update race. If
ceph_check_caps() flushes the dirty cap before the inode size is
updated, MDS can miss the new inode size. So just remove the
optimization.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
-rw-r--r-- | fs/ceph/file.c | 42 |
1 files changed, 17 insertions, 25 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b86d2a0eb145..3d1aefeee948 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -724,9 +724,12 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
724 | if (ceph_snap(inode) != CEPH_NOSNAP) | 724 | if (ceph_snap(inode) != CEPH_NOSNAP) |
725 | return -EROFS; | 725 | return -EROFS; |
726 | 726 | ||
727 | sb_start_write(inode->i_sb); | ||
727 | retry_snap: | 728 | retry_snap: |
728 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) | 729 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { |
729 | return -ENOSPC; | 730 | ret = -ENOSPC; |
731 | goto out; | ||
732 | } | ||
730 | __ceph_do_pending_vmtruncate(inode); | 733 | __ceph_do_pending_vmtruncate(inode); |
731 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | 734 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", |
732 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 735 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
@@ -750,29 +753,10 @@ retry_snap: | |||
750 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 753 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
751 | &iocb->ki_pos); | 754 | &iocb->ki_pos); |
752 | } else { | 755 | } else { |
753 | /* | 756 | mutex_lock(&inode->i_mutex); |
754 | * buffered write; drop Fw early to avoid slow | 757 | ret = __generic_file_aio_write(iocb, iov, nr_segs, |
755 | * revocation if we get stuck on balance_dirty_pages | 758 | &iocb->ki_pos); |
756 | */ | 759 | mutex_unlock(&inode->i_mutex); |
757 | int dirty; | ||
758 | |||
759 | spin_lock(&ci->i_ceph_lock); | ||
760 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | ||
761 | spin_unlock(&ci->i_ceph_lock); | ||
762 | ceph_put_cap_refs(ci, got); | ||
763 | |||
764 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
765 | if ((ret >= 0 || ret == -EIOCBQUEUED) && | ||
766 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) | ||
767 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | ||
768 | err = vfs_fsync_range(file, pos, pos + ret - 1, 1); | ||
769 | if (err < 0) | ||
770 | ret = err; | ||
771 | } | ||
772 | |||
773 | if (dirty) | ||
774 | __mark_inode_dirty(inode, dirty); | ||
775 | goto out; | ||
776 | } | 760 | } |
777 | 761 | ||
778 | if (ret >= 0) { | 762 | if (ret >= 0) { |
@@ -790,12 +774,20 @@ out_put: | |||
790 | ceph_cap_string(got)); | 774 | ceph_cap_string(got)); |
791 | ceph_put_cap_refs(ci, got); | 775 | ceph_put_cap_refs(ci, got); |
792 | 776 | ||
777 | if (ret >= 0 && | ||
778 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || | ||
779 | ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | ||
780 | err = vfs_fsync_range(file, pos, pos + ret - 1, 1); | ||
781 | if (err < 0) | ||
782 | ret = err; | ||
783 | } | ||
793 | out: | 784 | out: |
794 | if (ret == -EOLDSNAPC) { | 785 | if (ret == -EOLDSNAPC) { |
795 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", | 786 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", |
796 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); | 787 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); |
797 | goto retry_snap; | 788 | goto retry_snap; |
798 | } | 789 | } |
790 | sb_end_write(inode->i_sb); | ||
799 | 791 | ||
800 | return ret; | 792 | return ret; |
801 | } | 793 | } |