aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2013-04-12 04:11:10 -0400
committerSage Weil <sage@inktank.com>2013-05-02 00:18:53 -0400
commit37505d5768b9389a5ef7bc1efa465d4484a5462e (patch)
tree208daa8785fe4847061a1dafee58ff32b5b861e6 /fs/ceph
parent26be88087ae8a04a5b576aa2f490597b649fc132 (diff)
ceph: take i_mutex before getting Fw cap
There is deadlock as illustrated bellow. The fix is taking i_mutex before getting Fw cap reference. write truncate MDS --------------------- -------------------- -------------- get Fw cap lock i_mutex lock i_mutex (blocked) request setattr.size -> <- revoke Fw cap Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: Alex Elder <elder@inktank.com> Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/caps.c13
-rw-r--r--fs/ceph/file.c12
2 files changed, 13 insertions, 12 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f9563108d189..da0f9b8a3bcb 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2052,6 +2052,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2052 goto out; 2052 goto out;
2053 } 2053 }
2054 2054
2055 /* finish pending truncate */
2056 while (ci->i_truncate_pending) {
2057 spin_unlock(&ci->i_ceph_lock);
2058 __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR));
2059 spin_lock(&ci->i_ceph_lock);
2060 }
2061
2055 if (need & CEPH_CAP_FILE_WR) { 2062 if (need & CEPH_CAP_FILE_WR) {
2056 if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { 2063 if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
2057 dout("get_cap_refs %p endoff %llu > maxsize %llu\n", 2064 dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
@@ -2073,12 +2080,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2073 } 2080 }
2074 have = __ceph_caps_issued(ci, &implemented); 2081 have = __ceph_caps_issued(ci, &implemented);
2075 2082
2076 /*
2077 * disallow writes while a truncate is pending
2078 */
2079 if (ci->i_truncate_pending)
2080 have &= ~CEPH_CAP_FILE_WR;
2081
2082 if ((have & need) == need) { 2083 if ((have & need) == need) {
2083 /* 2084 /*
2084 * Look at (implemented & ~have & not) so that we keep waiting 2085 * Look at (implemented & ~have & not) so that we keep waiting
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index a65acf355384..dd44f3593992 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -651,7 +651,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
651 dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", 651 dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
652 inode, ceph_vinop(inode), pos, (unsigned)len, inode); 652 inode, ceph_vinop(inode), pos, (unsigned)len, inode);
653again: 653again:
654 __ceph_do_pending_vmtruncate(inode, true);
655 if (fi->fmode & CEPH_FILE_MODE_LAZY) 654 if (fi->fmode & CEPH_FILE_MODE_LAZY)
656 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; 655 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
657 else 656 else
@@ -728,7 +727,7 @@ retry_snap:
728 ret = -ENOSPC; 727 ret = -ENOSPC;
729 goto out; 728 goto out;
730 } 729 }
731 __ceph_do_pending_vmtruncate(inode, true); 730 mutex_lock(&inode->i_mutex);
732 dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", 731 dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
733 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, 732 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
734 inode->i_size); 733 inode->i_size);
@@ -737,8 +736,10 @@ retry_snap:
737 else 736 else
738 want = CEPH_CAP_FILE_BUFFER; 737 want = CEPH_CAP_FILE_BUFFER;
739 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); 738 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
740 if (ret < 0) 739 if (ret < 0) {
741 goto out_put; 740 mutex_unlock(&inode->i_mutex);
741 goto out;
742 }
742 743
743 dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", 744 dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
744 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, 745 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
@@ -748,10 +749,10 @@ retry_snap:
748 (iocb->ki_filp->f_flags & O_DIRECT) || 749 (iocb->ki_filp->f_flags & O_DIRECT) ||
749 (inode->i_sb->s_flags & MS_SYNCHRONOUS) || 750 (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
750 (fi->flags & CEPH_F_SYNC)) { 751 (fi->flags & CEPH_F_SYNC)) {
752 mutex_unlock(&inode->i_mutex);
751 ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, 753 ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
752 &iocb->ki_pos); 754 &iocb->ki_pos);
753 } else { 755 } else {
754 mutex_lock(&inode->i_mutex);
755 ret = __generic_file_aio_write(iocb, iov, nr_segs, 756 ret = __generic_file_aio_write(iocb, iov, nr_segs,
756 &iocb->ki_pos); 757 &iocb->ki_pos);
757 mutex_unlock(&inode->i_mutex); 758 mutex_unlock(&inode->i_mutex);
@@ -766,7 +767,6 @@ retry_snap:
766 __mark_inode_dirty(inode, dirty); 767 __mark_inode_dirty(inode, dirty);
767 } 768 }
768 769
769out_put:
770 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", 770 dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
771 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, 771 inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
772 ceph_cap_string(got)); 772 ceph_cap_string(got));