aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/file.c
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2013-05-02 00:15:58 -0400
committerSage Weil <sage@inktank.com>2013-05-02 00:15:58 -0400
commit7971bd92baf729fcebe04d7330ac22dc668d0261 (patch)
tree78a8ea3b4e072e52840dac968dfacfff737765bd /fs/ceph/file.c
parenta8673d61ad77ddf2118599507bd40cc345e95368 (diff)
ceph: revert commit 22cddde104
commit 22cddde104 breaks the atomicity of write operation, it also introduces a deadlock between write and truncate. Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: Greg Farnum <greg@inktank.com> Conflicts: fs/ceph/addr.c
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r--fs/ceph/file.c73
1 files changed, 43 insertions, 30 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index bf338d9b67e3..b86d2a0eb145 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -718,53 +718,63 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
718 struct ceph_osd_client *osdc = 718 struct ceph_osd_client *osdc =
719 &ceph_sb_to_client(inode->i_sb)->client->osdc; 719 &ceph_sb_to_client(inode->i_sb)->client->osdc;
720 loff_t endoff = pos + iov->iov_len; 720 loff_t endoff = pos + iov->iov_len;
721 int got = 0; 721 int want, got = 0;
722 int ret, err, written; 722 int ret, err;
723 723
724 if (ceph_snap(inode) != CEPH_NOSNAP) 724 if (ceph_snap(inode) != CEPH_NOSNAP)
725 return -EROFS; 725 return -EROFS;
726 726
727retry_snap: 727retry_snap:
728 written = 0;
729 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) 728 if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
730 return -ENOSPC; 729 return -ENOSPC;
731 __ceph_do_pending_vmtruncate(inode); 730 __ceph_do_pending_vmtruncate(inode);
731 dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
732 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
733 inode->i_size);
734 if (fi->fmode & CEPH_FILE_MODE_LAZY)
735 want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
736 else
737 want = CEPH_CAP_FILE_BUFFER;
738 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
739 if (ret < 0)
740 goto out_put;
732 741
733 /* 742 dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
734 * try to do a buffered write. if we don't have sufficient 743 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
735 * caps, we'll get -EAGAIN from generic_file_aio_write, or a 744 ceph_cap_string(got));
736 * short write if we only get caps for some pages. 745
737 */ 746 if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
738 if (!(iocb->ki_filp->f_flags & O_DIRECT) && 747 (iocb->ki_filp->f_flags & O_DIRECT) ||
739 !(inode->i_sb->s_flags & MS_SYNCHRONOUS) && 748 (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
740 !(fi->flags & CEPH_F_SYNC)) { 749 (fi->flags & CEPH_F_SYNC)) {
741 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 750 ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
742 if (ret >= 0) 751 &iocb->ki_pos);
743 written = ret; 752 } else {
753 /*
754 * buffered write; drop Fw early to avoid slow
755 * revocation if we get stuck on balance_dirty_pages
756 */
757 int dirty;
744 758
759 spin_lock(&ci->i_ceph_lock);
760 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
761 spin_unlock(&ci->i_ceph_lock);
762 ceph_put_cap_refs(ci, got);
763
764 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
745 if ((ret >= 0 || ret == -EIOCBQUEUED) && 765 if ((ret >= 0 || ret == -EIOCBQUEUED) &&
746 ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) 766 ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
747 || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { 767 || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
748 err = vfs_fsync_range(file, pos, pos + written - 1, 1); 768 err = vfs_fsync_range(file, pos, pos + ret - 1, 1);
749 if (err < 0) 769 if (err < 0)
750 ret = err; 770 ret = err;
751 } 771 }
752 if ((ret < 0 && ret != -EAGAIN) || pos + written >= endoff)
753 goto out;
754 }
755 772
756 dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", 773 if (dirty)
757 inode, ceph_vinop(inode), pos + written, 774 __mark_inode_dirty(inode, dirty);
758 (unsigned)iov->iov_len - written, inode->i_size);
759 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, 0, &got, endoff);
760 if (ret < 0)
761 goto out; 775 goto out;
776 }
762 777
763 dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
764 inode, ceph_vinop(inode), pos + written,
765 (unsigned)iov->iov_len - written, ceph_cap_string(got));
766 ret = ceph_sync_write(file, iov->iov_base + written,
767 iov->iov_len - written, &iocb->ki_pos);
768 if (ret >= 0) { 778 if (ret >= 0) {
769 int dirty; 779 int dirty;
770 spin_lock(&ci->i_ceph_lock); 780 spin_lock(&ci->i_ceph_lock);
@@ -773,10 +783,13 @@ retry_snap:
773 if (dirty) 783 if (dirty)
774 __mark_inode_dirty(inode, dirty); 784 __mark_inode_dirty(inode, dirty);
775 } 785 }
786
787out_put:
776 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", 788 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
777 inode, ceph_vinop(inode), pos + written, 789 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
778 (unsigned)iov->iov_len - written, ceph_cap_string(got)); 790 ceph_cap_string(got));
779 ceph_put_cap_refs(ci, got); 791 ceph_put_cap_refs(ci, got);
792
780out: 793out:
781 if (ret == -EOLDSNAPC) { 794 if (ret == -EOLDSNAPC) {
782 dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", 795 dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",