diff options
| author | Yan, Zheng <zheng.z.yan@intel.com> | 2013-04-12 04:11:10 -0400 |
|---|---|---|
| committer | Sage Weil <sage@inktank.com> | 2013-05-02 00:18:53 -0400 |
| commit | 37505d5768b9389a5ef7bc1efa465d4484a5462e (patch) | |
| tree | 208daa8785fe4847061a1dafee58ff32b5b861e6 /fs/ceph | |
| parent | 26be88087ae8a04a5b576aa2f490597b649fc132 (diff) | |
ceph: take i_mutex before getting Fw cap
There is deadlock as illustrated bellow. The fix is taking i_mutex
before getting Fw cap reference.
write truncate MDS
--------------------- -------------------- --------------
get Fw cap
lock i_mutex
lock i_mutex (blocked)
request setattr.size ->
<- revoke Fw cap
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Alex Elder <elder@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'fs/ceph')
| -rw-r--r-- | fs/ceph/caps.c | 13 | ||||
| -rw-r--r-- | fs/ceph/file.c | 12 |
2 files changed, 13 insertions, 12 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index f9563108d189..da0f9b8a3bcb 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -2052,6 +2052,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, | |||
| 2052 | goto out; | 2052 | goto out; |
| 2053 | } | 2053 | } |
| 2054 | 2054 | ||
| 2055 | /* finish pending truncate */ | ||
| 2056 | while (ci->i_truncate_pending) { | ||
| 2057 | spin_unlock(&ci->i_ceph_lock); | ||
| 2058 | __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); | ||
| 2059 | spin_lock(&ci->i_ceph_lock); | ||
| 2060 | } | ||
| 2061 | |||
| 2055 | if (need & CEPH_CAP_FILE_WR) { | 2062 | if (need & CEPH_CAP_FILE_WR) { |
| 2056 | if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { | 2063 | if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { |
| 2057 | dout("get_cap_refs %p endoff %llu > maxsize %llu\n", | 2064 | dout("get_cap_refs %p endoff %llu > maxsize %llu\n", |
| @@ -2073,12 +2080,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, | |||
| 2073 | } | 2080 | } |
| 2074 | have = __ceph_caps_issued(ci, &implemented); | 2081 | have = __ceph_caps_issued(ci, &implemented); |
| 2075 | 2082 | ||
| 2076 | /* | ||
| 2077 | * disallow writes while a truncate is pending | ||
| 2078 | */ | ||
| 2079 | if (ci->i_truncate_pending) | ||
| 2080 | have &= ~CEPH_CAP_FILE_WR; | ||
| 2081 | |||
| 2082 | if ((have & need) == need) { | 2083 | if ((have & need) == need) { |
| 2083 | /* | 2084 | /* |
| 2084 | * Look at (implemented & ~have & not) so that we keep waiting | 2085 | * Look at (implemented & ~have & not) so that we keep waiting |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a65acf355384..dd44f3593992 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -651,7 +651,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 651 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", | 651 | dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", |
| 652 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); | 652 | inode, ceph_vinop(inode), pos, (unsigned)len, inode); |
| 653 | again: | 653 | again: |
| 654 | __ceph_do_pending_vmtruncate(inode, true); | ||
| 655 | if (fi->fmode & CEPH_FILE_MODE_LAZY) | 654 | if (fi->fmode & CEPH_FILE_MODE_LAZY) |
| 656 | want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; | 655 | want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; |
| 657 | else | 656 | else |
| @@ -728,7 +727,7 @@ retry_snap: | |||
| 728 | ret = -ENOSPC; | 727 | ret = -ENOSPC; |
| 729 | goto out; | 728 | goto out; |
| 730 | } | 729 | } |
| 731 | __ceph_do_pending_vmtruncate(inode, true); | 730 | mutex_lock(&inode->i_mutex); |
| 732 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", | 731 | dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", |
| 733 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 732 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
| 734 | inode->i_size); | 733 | inode->i_size); |
| @@ -737,8 +736,10 @@ retry_snap: | |||
| 737 | else | 736 | else |
| 738 | want = CEPH_CAP_FILE_BUFFER; | 737 | want = CEPH_CAP_FILE_BUFFER; |
| 739 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); | 738 | ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); |
| 740 | if (ret < 0) | 739 | if (ret < 0) { |
| 741 | goto out_put; | 740 | mutex_unlock(&inode->i_mutex); |
| 741 | goto out; | ||
| 742 | } | ||
| 742 | 743 | ||
| 743 | dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", | 744 | dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", |
| 744 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 745 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
| @@ -748,10 +749,10 @@ retry_snap: | |||
| 748 | (iocb->ki_filp->f_flags & O_DIRECT) || | 749 | (iocb->ki_filp->f_flags & O_DIRECT) || |
| 749 | (inode->i_sb->s_flags & MS_SYNCHRONOUS) || | 750 | (inode->i_sb->s_flags & MS_SYNCHRONOUS) || |
| 750 | (fi->flags & CEPH_F_SYNC)) { | 751 | (fi->flags & CEPH_F_SYNC)) { |
| 752 | mutex_unlock(&inode->i_mutex); | ||
| 751 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, | 753 | ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, |
| 752 | &iocb->ki_pos); | 754 | &iocb->ki_pos); |
| 753 | } else { | 755 | } else { |
| 754 | mutex_lock(&inode->i_mutex); | ||
| 755 | ret = __generic_file_aio_write(iocb, iov, nr_segs, | 756 | ret = __generic_file_aio_write(iocb, iov, nr_segs, |
| 756 | &iocb->ki_pos); | 757 | &iocb->ki_pos); |
| 757 | mutex_unlock(&inode->i_mutex); | 758 | mutex_unlock(&inode->i_mutex); |
| @@ -766,7 +767,6 @@ retry_snap: | |||
| 766 | __mark_inode_dirty(inode, dirty); | 767 | __mark_inode_dirty(inode, dirty); |
| 767 | } | 768 | } |
| 768 | 769 | ||
| 769 | out_put: | ||
| 770 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", | 770 | dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", |
| 771 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, | 771 | inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, |
| 772 | ceph_cap_string(got)); | 772 | ceph_cap_string(got)); |
