diff options
| author | Yan, Zheng <zheng.z.yan@intel.com> | 2013-02-28 21:55:39 -0500 |
|---|---|---|
| committer | Sage Weil <sage@inktank.com> | 2013-05-02 00:16:10 -0400 |
| commit | 6070e0c1e2b515ad5edc2f8224031b051bd08109 (patch) | |
| tree | 2a9bb6aea63854927ce03a2ecee8ccaa6b914008 /fs/ceph/file.c | |
| parent | 7971bd92baf729fcebe04d7330ac22dc668d0261 (diff) | |
ceph: don't early drop Fw cap
ceph_aio_write() has an optimization that marks CEPH_CAP_FILE_WR
cap dirty before data is copied to page cache and inode size is
updated. The optimization avoids slow cap revocation caused by
balance_dirty_pages(), but introduces inode size update race. If
ceph_check_caps() flushes the dirty cap before the inode size is
updated, MDS can miss the new inode size. So just remove the
optimization.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
Diffstat (limited to 'fs/ceph/file.c')
| -rw-r--r-- | fs/ceph/file.c | 42 |
1 files changed, 17 insertions, 25 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b86d2a0eb145..3d1aefeee948 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -724,9 +724,12 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 724 | if (ceph_snap(inode) != CEPH_NOSNAP) | 724 | if (ceph_snap(inode) != CEPH_NOSNAP) |
| 725 | return -EROFS; | 725 | return -EROFS; |
| 726 | 726 | ||
| 727 | sb_start_write(inode->i_sb); | ||
| 727 | retry_snap: | 728 | retry_snap: |
| 728 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) | 729 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { |
| 729 | return -ENOSPC; | 730 | ret = -ENOSPC; |
| 731 | goto out; | ||
| 732 | } | ||
| 730 | __ceph_do_pending_vmtruncate(inode); | 733 | __ceph_do_pending_vmtruncate(inode); |
| 731 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | 734 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", |
| 732 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 735 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
| @@ -750,29 +753,10 @@ retry_snap: | |||
| 750 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 753 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
| 751 | &iocb->ki_pos); | 754 | &iocb->ki_pos); |
| 752 | } else { | 755 | } else { |
| 753 | /* | 756 | mutex_lock(&inode->i_mutex); |
| 754 | * buffered write; drop Fw early to avoid slow | 757 | ret = __generic_file_aio_write(iocb, iov, nr_segs, |
| 755 | * revocation if we get stuck on balance_dirty_pages | 758 | &iocb->ki_pos); |
| 756 | */ | 759 | mutex_unlock(&inode->i_mutex); |
| 757 | int dirty; | ||
| 758 | |||
| 759 | spin_lock(&ci->i_ceph_lock); | ||
| 760 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | ||
| 761 | spin_unlock(&ci->i_ceph_lock); | ||
| 762 | ceph_put_cap_refs(ci, got); | ||
| 763 | |||
| 764 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
| 765 | if ((ret >= 0 || ret == -EIOCBQUEUED) && | ||
| 766 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) | ||
| 767 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | ||
| 768 | err = vfs_fsync_range(file, pos, pos + ret - 1, 1); | ||
| 769 | if (err < 0) | ||
| 770 | ret = err; | ||
| 771 | } | ||
| 772 | |||
| 773 | if (dirty) | ||
| 774 | __mark_inode_dirty(inode, dirty); | ||
| 775 | goto out; | ||
| 776 | } | 760 | } |
| 777 | 761 | ||
| 778 | if (ret >= 0) { | 762 | if (ret >= 0) { |
| @@ -790,12 +774,20 @@ out_put: | |||
| 790 | ceph_cap_string(got)); | 774 | ceph_cap_string(got)); |
| 791 | ceph_put_cap_refs(ci, got); | 775 | ceph_put_cap_refs(ci, got); |
| 792 | 776 | ||
| 777 | if (ret >= 0 && | ||
| 778 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || | ||
| 779 | ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | ||
| 780 | err = vfs_fsync_range(file, pos, pos + ret - 1, 1); | ||
| 781 | if (err < 0) | ||
| 782 | ret = err; | ||
| 783 | } | ||
| 793 | out: | 784 | out: |
| 794 | if (ret == -EOLDSNAPC) { | 785 | if (ret == -EOLDSNAPC) { |
| 795 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", | 786 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", |
| 796 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); | 787 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); |
| 797 | goto retry_snap; | 788 | goto retry_snap; |
| 798 | } | 789 | } |
| 790 | sb_end_write(inode->i_sb); | ||
| 799 | 791 | ||
| 800 | return ret; | 792 | return ret; |
| 801 | } | 793 | } |
