diff options
author | Yan, Zheng <zheng.z.yan@intel.com> | 2013-07-02 00:40:19 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-07-03 18:32:56 -0400 |
commit | b415bf4f9fe25f39934f5c464125e4a2dffb6d08 (patch) | |
tree | f38df9c1746a1419e7942afb4534625175a5d353 /fs | |
parent | 5446429630257f4723829409337a26c076907d5d (diff) |
ceph: fix pending vmtruncate race
The locking order for pending vmtruncate is wrong, it can lead to
following race:
write wmtruncate work
------------------------ ----------------------
lock i_mutex
check i_truncate_pending check i_truncate_pending
truncate_inode_pages() lock i_mutex (blocked)
copy data to page cache
unlock i_mutex
truncate_inode_pages()
The fix is take i_mutex before calling __ceph_do_pending_vmtruncate()
Fixes: http://tracker.ceph.com/issues/5453
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Sage Weil <sage@inktank.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/caps.c | 6 | ||||
-rw-r--r-- | fs/ceph/file.c | 2 | ||||
-rw-r--r-- | fs/ceph/inode.c | 14 | ||||
-rw-r--r-- | fs/ceph/super.h | 2 |
4 files changed, 13 insertions, 11 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 8ec27b130cc9..16266f3e9a33 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -2057,7 +2057,11 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, | |||
2057 | /* finish pending truncate */ | 2057 | /* finish pending truncate */ |
2058 | while (ci->i_truncate_pending) { | 2058 | while (ci->i_truncate_pending) { |
2059 | spin_unlock(&ci->i_ceph_lock); | 2059 | spin_unlock(&ci->i_ceph_lock); |
2060 | __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); | 2060 | if (!(need & CEPH_CAP_FILE_WR)) |
2061 | mutex_lock(&inode->i_mutex); | ||
2062 | __ceph_do_pending_vmtruncate(inode); | ||
2063 | if (!(need & CEPH_CAP_FILE_WR)) | ||
2064 | mutex_unlock(&inode->i_mutex); | ||
2061 | spin_lock(&ci->i_ceph_lock); | 2065 | spin_lock(&ci->i_ceph_lock); |
2062 | } | 2066 | } |
2063 | 2067 | ||
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7c69f4f0dee6..a44d5153179b 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -822,7 +822,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) | |||
822 | int ret; | 822 | int ret; |
823 | 823 | ||
824 | mutex_lock(&inode->i_mutex); | 824 | mutex_lock(&inode->i_mutex); |
825 | __ceph_do_pending_vmtruncate(inode, false); | 825 | __ceph_do_pending_vmtruncate(inode); |
826 | 826 | ||
827 | if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { | 827 | if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { |
828 | ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); | 828 | ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index be0f7e20d62e..4906ada4a97c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -1465,7 +1465,9 @@ static void ceph_vmtruncate_work(struct work_struct *work) | |||
1465 | struct inode *inode = &ci->vfs_inode; | 1465 | struct inode *inode = &ci->vfs_inode; |
1466 | 1466 | ||
1467 | dout("vmtruncate_work %p\n", inode); | 1467 | dout("vmtruncate_work %p\n", inode); |
1468 | __ceph_do_pending_vmtruncate(inode, true); | 1468 | mutex_lock(&inode->i_mutex); |
1469 | __ceph_do_pending_vmtruncate(inode); | ||
1470 | mutex_unlock(&inode->i_mutex); | ||
1469 | iput(inode); | 1471 | iput(inode); |
1470 | } | 1472 | } |
1471 | 1473 | ||
@@ -1492,7 +1494,7 @@ void ceph_queue_vmtruncate(struct inode *inode) | |||
1492 | * Make sure any pending truncation is applied before doing anything | 1494 | * Make sure any pending truncation is applied before doing anything |
1493 | * that may depend on it. | 1495 | * that may depend on it. |
1494 | */ | 1496 | */ |
1495 | void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock) | 1497 | void __ceph_do_pending_vmtruncate(struct inode *inode) |
1496 | { | 1498 | { |
1497 | struct ceph_inode_info *ci = ceph_inode(inode); | 1499 | struct ceph_inode_info *ci = ceph_inode(inode); |
1498 | u64 to; | 1500 | u64 to; |
@@ -1525,11 +1527,7 @@ retry: | |||
1525 | ci->i_truncate_pending, to); | 1527 | ci->i_truncate_pending, to); |
1526 | spin_unlock(&ci->i_ceph_lock); | 1528 | spin_unlock(&ci->i_ceph_lock); |
1527 | 1529 | ||
1528 | if (needlock) | ||
1529 | mutex_lock(&inode->i_mutex); | ||
1530 | truncate_inode_pages(inode->i_mapping, to); | 1530 | truncate_inode_pages(inode->i_mapping, to); |
1531 | if (needlock) | ||
1532 | mutex_unlock(&inode->i_mutex); | ||
1533 | 1531 | ||
1534 | spin_lock(&ci->i_ceph_lock); | 1532 | spin_lock(&ci->i_ceph_lock); |
1535 | if (to == ci->i_truncate_size) { | 1533 | if (to == ci->i_truncate_size) { |
@@ -1588,7 +1586,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1588 | if (ceph_snap(inode) != CEPH_NOSNAP) | 1586 | if (ceph_snap(inode) != CEPH_NOSNAP) |
1589 | return -EROFS; | 1587 | return -EROFS; |
1590 | 1588 | ||
1591 | __ceph_do_pending_vmtruncate(inode, false); | 1589 | __ceph_do_pending_vmtruncate(inode); |
1592 | 1590 | ||
1593 | err = inode_change_ok(inode, attr); | 1591 | err = inode_change_ok(inode, attr); |
1594 | if (err != 0) | 1592 | if (err != 0) |
@@ -1770,7 +1768,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1770 | ceph_cap_string(dirtied), mask); | 1768 | ceph_cap_string(dirtied), mask); |
1771 | 1769 | ||
1772 | ceph_mdsc_put_request(req); | 1770 | ceph_mdsc_put_request(req); |
1773 | __ceph_do_pending_vmtruncate(inode, false); | 1771 | __ceph_do_pending_vmtruncate(inode); |
1774 | return err; | 1772 | return err; |
1775 | out: | 1773 | out: |
1776 | spin_unlock(&ci->i_ceph_lock); | 1774 | spin_unlock(&ci->i_ceph_lock); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index dfbb729b3130..cbded572345e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -692,7 +692,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
692 | extern int ceph_inode_holds_cap(struct inode *inode, int mask); | 692 | extern int ceph_inode_holds_cap(struct inode *inode, int mask); |
693 | 693 | ||
694 | extern int ceph_inode_set_size(struct inode *inode, loff_t size); | 694 | extern int ceph_inode_set_size(struct inode *inode, loff_t size); |
695 | extern void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock); | 695 | extern void __ceph_do_pending_vmtruncate(struct inode *inode); |
696 | extern void ceph_queue_vmtruncate(struct inode *inode); | 696 | extern void ceph_queue_vmtruncate(struct inode *inode); |
697 | 697 | ||
698 | extern void ceph_queue_invalidate(struct inode *inode); | 698 | extern void ceph_queue_invalidate(struct inode *inode); |