diff options
author | Sage Weil <sage@inktank.com> | 2013-05-02 00:15:58 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-05-02 00:15:58 -0400 |
commit | 7971bd92baf729fcebe04d7330ac22dc668d0261 (patch) | |
tree | 78a8ea3b4e072e52840dac968dfacfff737765bd /fs/ceph/file.c | |
parent | a8673d61ad77ddf2118599507bd40cc345e95368 (diff) |
ceph: revert commit 22cddde104
commit 22cddde104 breaks the atomicity of write operation, it also
introduces a deadlock between write and truncate.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Greg Farnum <greg@inktank.com>
Conflicts:
fs/ceph/addr.c
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r-- | fs/ceph/file.c | 73 |
1 files changed, 43 insertions, 30 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index bf338d9b67e3..b86d2a0eb145 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -718,53 +718,63 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
718 | struct ceph_osd_client *osdc = | 718 | struct ceph_osd_client *osdc = |
719 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | 719 | &ceph_sb_to_client(inode->i_sb)->client->osdc; |
720 | loff_t endoff = pos + iov->iov_len; | 720 | loff_t endoff = pos + iov->iov_len; |
721 | int got = 0; | 721 | int want, got = 0; |
722 | int ret, err, written; | 722 | int ret, err; |
723 | 723 | ||
724 | if (ceph_snap(inode) != CEPH_NOSNAP) | 724 | if (ceph_snap(inode) != CEPH_NOSNAP) |
725 | return -EROFS; | 725 | return -EROFS; |
726 | 726 | ||
727 | retry_snap: | 727 | retry_snap: |
728 | written = 0; | ||
729 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) | 728 | if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) |
730 | return -ENOSPC; | 729 | return -ENOSPC; |
731 | __ceph_do_pending_vmtruncate(inode); | 730 | __ceph_do_pending_vmtruncate(inode); |
731 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | ||
732 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | ||
733 | inode->i_size); | ||
734 | if (fi->fmode & CEPH_FILE_MODE_LAZY) | ||
735 | want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; | ||
736 | else | ||
737 | want = CEPH_CAP_FILE_BUFFER; | ||
738 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | ||
739 | if (ret < 0) | ||
740 | goto out_put; | ||
732 | 741 | ||
733 | /* | 742 | dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", |
734 | * try to do a buffered write. if we don't have sufficient | 743 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
735 | * caps, we'll get -EAGAIN from generic_file_aio_write, or a | 744 | ceph_cap_string(got)); |
736 | * short write if we only get caps for some pages. | 745 | |
737 | */ | 746 | if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || |
738 | if (!(iocb->ki_filp->f_flags & O_DIRECT) && | 747 | (iocb->ki_filp->f_flags & O_DIRECT) || |
739 | !(inode->i_sb->s_flags & MS_SYNCHRONOUS) && | 748 | (inode->i_sb->s_flags & MS_SYNCHRONOUS) || |
740 | !(fi->flags & CEPH_F_SYNC)) { | 749 | (fi->flags & CEPH_F_SYNC)) { |
741 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | 750 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
742 | if (ret >= 0) | 751 | &iocb->ki_pos); |
743 | written = ret; | 752 | } else { |
753 | /* | ||
754 | * buffered write; drop Fw early to avoid slow | ||
755 | * revocation if we get stuck on balance_dirty_pages | ||
756 | */ | ||
757 | int dirty; | ||
744 | 758 | ||
759 | spin_lock(&ci->i_ceph_lock); | ||
760 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | ||
761 | spin_unlock(&ci->i_ceph_lock); | ||
762 | ceph_put_cap_refs(ci, got); | ||
763 | |||
764 | ret = generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
745 | if ((ret >= 0 || ret == -EIOCBQUEUED) && | 765 | if ((ret >= 0 || ret == -EIOCBQUEUED) && |
746 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) | 766 | ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) |
747 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { | 767 | || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { |
748 | err = vfs_fsync_range(file, pos, pos + written - 1, 1); | 768 | err = vfs_fsync_range(file, pos, pos + ret - 1, 1); |
749 | if (err < 0) | 769 | if (err < 0) |
750 | ret = err; | 770 | ret = err; |
751 | } | 771 | } |
752 | if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff) | ||
753 | goto out; | ||
754 | } | ||
755 | 772 | ||
756 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | 773 | if (dirty) |
757 | inode, ceph_vinop(inode), pos + written, | 774 | __mark_inode_dirty(inode, dirty); |
758 | (unsigned)iov->iov_len - written, inode->i_size); | ||
759 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff); | ||
760 | if (ret < 0) | ||
761 | goto out; | 775 | goto out; |
776 | } | ||
762 | 777 | ||
763 | dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", | ||
764 | inode, ceph_vinop(inode), pos + written, | ||
765 | (unsigned)iov->iov_len - written, ceph_cap_string(got)); | ||
766 | ret = ceph_sync_write(file, iov->iov_base + written, | ||
767 | iov->iov_len - written, &iocb->ki_pos); | ||
768 | if (ret >= 0) { | 778 | if (ret >= 0) { |
769 | int dirty; | 779 | int dirty; |
770 | spin_lock(&ci->i_ceph_lock); | 780 | spin_lock(&ci->i_ceph_lock); |
@@ -773,10 +783,13 @@ retry_snap: | |||
773 | if (dirty) | 783 | if (dirty) |
774 | __mark_inode_dirty(inode, dirty); | 784 | __mark_inode_dirty(inode, dirty); |
775 | } | 785 | } |
786 | |||
787 | out_put: | ||
776 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", | 788 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", |
777 | inode, ceph_vinop(inode), pos + written, | 789 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
778 | (unsigned)iov->iov_len - written, ceph_cap_string(got)); | 790 | ceph_cap_string(got)); |
779 | ceph_put_cap_refs(ci, got); | 791 | ceph_put_cap_refs(ci, got); |
792 | |||
780 | out: | 793 | out: |
781 | if (ret == -EOLDSNAPC) { | 794 | if (ret == -EOLDSNAPC) { |
782 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", | 795 | dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", |