aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2013-07-02 00:40:19 -0400
committerSage Weil <sage@inktank.com>2013-07-03 18:32:56 -0400
commitb415bf4f9fe25f39934f5c464125e4a2dffb6d08 (patch)
treef38df9c1746a1419e7942afb4534625175a5d353
parent5446429630257f4723829409337a26c076907d5d (diff)
ceph: fix pending vmtruncate race
The locking order for pending vmtruncate is wrong, it can lead to following race: write wmtruncate work ------------------------ ---------------------- lock i_mutex check i_truncate_pending check i_truncate_pending truncate_inode_pages() lock i_mutex (blocked) copy data to page cache unlock i_mutex truncate_inode_pages() The fix is take i_mutex before calling __ceph_do_pending_vmtruncate() Fixes: http://tracker.ceph.com/issues/5453 Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Reviewed-by: Sage Weil <sage@inktank.com>
-rw-r--r--fs/ceph/caps.c6
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c14
-rw-r--r--fs/ceph/super.h2
4 files changed, 13 insertions, 11 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8ec27b130cc9..16266f3e9a33 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2057,7 +2057,11 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2057 /* finish pending truncate */ 2057 /* finish pending truncate */
2058 while (ci->i_truncate_pending) { 2058 while (ci->i_truncate_pending) {
2059 spin_unlock(&ci->i_ceph_lock); 2059 spin_unlock(&ci->i_ceph_lock);
2060 __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR)); 2060 if (!(need & CEPH_CAP_FILE_WR))
2061 mutex_lock(&inode->i_mutex);
2062 __ceph_do_pending_vmtruncate(inode);
2063 if (!(need & CEPH_CAP_FILE_WR))
2064 mutex_unlock(&inode->i_mutex);
2061 spin_lock(&ci->i_ceph_lock); 2065 spin_lock(&ci->i_ceph_lock);
2062 } 2066 }
2063 2067
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7c69f4f0dee6..a44d5153179b 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -822,7 +822,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
822 int ret; 822 int ret;
823 823
824 mutex_lock(&inode->i_mutex); 824 mutex_lock(&inode->i_mutex);
825 __ceph_do_pending_vmtruncate(inode, false); 825 __ceph_do_pending_vmtruncate(inode);
826 826
827 if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { 827 if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
828 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); 828 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index be0f7e20d62e..4906ada4a97c 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1465,7 +1465,9 @@ static void ceph_vmtruncate_work(struct work_struct *work)
1465 struct inode *inode = &ci->vfs_inode; 1465 struct inode *inode = &ci->vfs_inode;
1466 1466
1467 dout("vmtruncate_work %p\n", inode); 1467 dout("vmtruncate_work %p\n", inode);
1468 __ceph_do_pending_vmtruncate(inode, true); 1468 mutex_lock(&inode->i_mutex);
1469 __ceph_do_pending_vmtruncate(inode);
1470 mutex_unlock(&inode->i_mutex);
1469 iput(inode); 1471 iput(inode);
1470} 1472}
1471 1473
@@ -1492,7 +1494,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
1492 * Make sure any pending truncation is applied before doing anything 1494 * Make sure any pending truncation is applied before doing anything
1493 * that may depend on it. 1495 * that may depend on it.
1494 */ 1496 */
1495void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock) 1497void __ceph_do_pending_vmtruncate(struct inode *inode)
1496{ 1498{
1497 struct ceph_inode_info *ci = ceph_inode(inode); 1499 struct ceph_inode_info *ci = ceph_inode(inode);
1498 u64 to; 1500 u64 to;
@@ -1525,11 +1527,7 @@ retry:
1525 ci->i_truncate_pending, to); 1527 ci->i_truncate_pending, to);
1526 spin_unlock(&ci->i_ceph_lock); 1528 spin_unlock(&ci->i_ceph_lock);
1527 1529
1528 if (needlock)
1529 mutex_lock(&inode->i_mutex);
1530 truncate_inode_pages(inode->i_mapping, to); 1530 truncate_inode_pages(inode->i_mapping, to);
1531 if (needlock)
1532 mutex_unlock(&inode->i_mutex);
1533 1531
1534 spin_lock(&ci->i_ceph_lock); 1532 spin_lock(&ci->i_ceph_lock);
1535 if (to == ci->i_truncate_size) { 1533 if (to == ci->i_truncate_size) {
@@ -1588,7 +1586,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1588 if (ceph_snap(inode) != CEPH_NOSNAP) 1586 if (ceph_snap(inode) != CEPH_NOSNAP)
1589 return -EROFS; 1587 return -EROFS;
1590 1588
1591 __ceph_do_pending_vmtruncate(inode, false); 1589 __ceph_do_pending_vmtruncate(inode);
1592 1590
1593 err = inode_change_ok(inode, attr); 1591 err = inode_change_ok(inode, attr);
1594 if (err != 0) 1592 if (err != 0)
@@ -1770,7 +1768,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1770 ceph_cap_string(dirtied), mask); 1768 ceph_cap_string(dirtied), mask);
1771 1769
1772 ceph_mdsc_put_request(req); 1770 ceph_mdsc_put_request(req);
1773 __ceph_do_pending_vmtruncate(inode, false); 1771 __ceph_do_pending_vmtruncate(inode);
1774 return err; 1772 return err;
1775out: 1773out:
1776 spin_unlock(&ci->i_ceph_lock); 1774 spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index dfbb729b3130..cbded572345e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -692,7 +692,7 @@ extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
692extern int ceph_inode_holds_cap(struct inode *inode, int mask); 692extern int ceph_inode_holds_cap(struct inode *inode, int mask);
693 693
694extern int ceph_inode_set_size(struct inode *inode, loff_t size); 694extern int ceph_inode_set_size(struct inode *inode, loff_t size);
695extern void __ceph_do_pending_vmtruncate(struct inode *inode, bool needlock); 695extern void __ceph_do_pending_vmtruncate(struct inode *inode);
696extern void ceph_queue_vmtruncate(struct inode *inode); 696extern void ceph_queue_vmtruncate(struct inode *inode);
697 697
698extern void ceph_queue_invalidate(struct inode *inode); 698extern void ceph_queue_invalidate(struct inode *inode);