aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMaxim Patlasov <MPatlasov@parallels.com>2013-08-30 09:06:04 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-09-26 20:18:30 -0400
commit4e208303119d234347b1f4337b84d47306e73811 (patch)
tree0f288cc210992ddcdf6812a33db2c2664f78457f /fs
parenteb97a45d12c9517b7846ebe3c97ee554b777ad34 (diff)
fuse: hotfix truncate_pagecache() issue
commit 06a7c3c2781409af95000c60a5df743fd4e2f8b4 upstream. The way how fuse calls truncate_pagecache() from fuse_change_attributes() is completely wrong. Because, w/o i_mutex held, we never sure whether 'oldsize' and 'attr->size' are valid by the time of execution of truncate_pagecache(inode, oldsize, attr->size). In fact, as soon as we released fc->lock in the middle of fuse_change_attributes(), we completely loose control of actions which may happen with given inode until we reach truncate_pagecache. The list of potentially dangerous actions includes mmap-ed reads and writes, ftruncate(2) and write(2) extending file size. The typical outcome of doing truncate_pagecache() with outdated arguments is data corruption from user point of view. This is (in some sense) acceptable in cases when the issue is triggered by a change of the file on the server (i.e. externally wrt fuse operation), but it is absolutely intolerable in scenarios when a single fuse client modifies a file without any external intervention. A real life case I discovered by fsx-linux looked like this: 1. Shrinking ftruncate(2) comes to fuse_do_setattr(). The latter sends FUSE_SETATTR to the server synchronously, but before getting fc->lock ... 2. fuse_dentry_revalidate() is asynchronously called. It sends FUSE_LOOKUP to the server synchronously, then calls fuse_change_attributes(). The latter updates i_size, releases fc->lock, but before comparing oldsize vs attr->size.. 3. fuse_do_setattr() from the first step proceeds by acquiring fc->lock and updating attributes and i_size, but now oldsize is equal to outarg.attr.size because i_size has just been updated (step 2). Hence, fuse_do_setattr() returns w/o calling truncate_pagecache(). 4. As soon as ftruncate(2) completes, the user extends file size by write(2) making a hole in the middle of file, then reads data from the hole either by read(2) or mmap-ed read. The user expects to get zero data from the hole, but gets stale data because truncate_pagecache() is not executed yet. The scenario above illustrates one side of the problem: not truncating the page cache even though we should. Another side corresponds to truncating page cache too late, when the state of inode changed significantly. Theoretically, the following is possible: 1. As in the previous scenario fuse_dentry_revalidate() discovered that i_size changed (due to our own fuse_do_setattr()) and is going to call truncate_pagecache() for some 'new_size' it believes valid right now. But by the time that particular truncate_pagecache() is called ... 2. fuse_do_setattr() returns (either having called truncate_pagecache() or not -- it doesn't matter). 3. The file is extended either by write(2) or ftruncate(2) or fallocate(2). 4. mmap-ed write makes a page in the extended region dirty. The result will be the lost of data user wrote on the fourth step. The patch is a hotfix resolving the issue in a simplistic way: let's skip dangerous i_size update and truncate_pagecache if an operation changing file size is in progress. This simplistic approach looks correct for the cases w/o external changes. And to handle them properly, more sophisticated and intrusive techniques (e.g. NFS-like one) would be required. I'd like to postpone it until the issue is well discussed on the mailing list(s). Changed in v2: - improved patch description to cover both sides of the issue. Signed-off-by: Maxim Patlasov <mpatlasov@parallels.com> Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/fuse/dir.c7
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/fuse/inode.c3
4 files changed, 17 insertions, 3 deletions
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e1536f9ae809..a30c60d5ce4f 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1594,6 +1594,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1594 struct file *file) 1594 struct file *file)
1595{ 1595{
1596 struct fuse_conn *fc = get_fuse_conn(inode); 1596 struct fuse_conn *fc = get_fuse_conn(inode);
1597 struct fuse_inode *fi = get_fuse_inode(inode);
1597 struct fuse_req *req; 1598 struct fuse_req *req;
1598 struct fuse_setattr_in inarg; 1599 struct fuse_setattr_in inarg;
1599 struct fuse_attr_out outarg; 1600 struct fuse_attr_out outarg;
@@ -1621,8 +1622,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1621 if (IS_ERR(req)) 1622 if (IS_ERR(req))
1622 return PTR_ERR(req); 1623 return PTR_ERR(req);
1623 1624
1624 if (is_truncate) 1625 if (is_truncate) {
1625 fuse_set_nowrite(inode); 1626 fuse_set_nowrite(inode);
1627 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1628 }
1626 1629
1627 memset(&inarg, 0, sizeof(inarg)); 1630 memset(&inarg, 0, sizeof(inarg));
1628 memset(&outarg, 0, sizeof(outarg)); 1631 memset(&outarg, 0, sizeof(outarg));
@@ -1684,12 +1687,14 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
1684 invalidate_inode_pages2(inode->i_mapping); 1687 invalidate_inode_pages2(inode->i_mapping);
1685 } 1688 }
1686 1689
1690 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1687 return 0; 1691 return 0;
1688 1692
1689error: 1693error:
1690 if (is_truncate) 1694 if (is_truncate)
1691 fuse_release_nowrite(inode); 1695 fuse_release_nowrite(inode);
1692 1696
1697 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1693 return err; 1698 return err;
1694} 1699}
1695 1700
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4943f2c9943d..473e8453a7df 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -630,7 +630,8 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
630 struct fuse_inode *fi = get_fuse_inode(inode); 630 struct fuse_inode *fi = get_fuse_inode(inode);
631 631
632 spin_lock(&fc->lock); 632 spin_lock(&fc->lock);
633 if (attr_ver == fi->attr_version && size < inode->i_size) { 633 if (attr_ver == fi->attr_version && size < inode->i_size &&
634 !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
634 fi->attr_version = ++fc->attr_version; 635 fi->attr_version = ++fc->attr_version;
635 i_size_write(inode, size); 636 i_size_write(inode, size);
636 } 637 }
@@ -1033,12 +1034,16 @@ static ssize_t fuse_perform_write(struct file *file,
1033{ 1034{
1034 struct inode *inode = mapping->host; 1035 struct inode *inode = mapping->host;
1035 struct fuse_conn *fc = get_fuse_conn(inode); 1036 struct fuse_conn *fc = get_fuse_conn(inode);
1037 struct fuse_inode *fi = get_fuse_inode(inode);
1036 int err = 0; 1038 int err = 0;
1037 ssize_t res = 0; 1039 ssize_t res = 0;
1038 1040
1039 if (is_bad_inode(inode)) 1041 if (is_bad_inode(inode))
1040 return -EIO; 1042 return -EIO;
1041 1043
1044 if (inode->i_size < pos + iov_iter_count(ii))
1045 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1046
1042 do { 1047 do {
1043 struct fuse_req *req; 1048 struct fuse_req *req;
1044 ssize_t count; 1049 ssize_t count;
@@ -1074,6 +1079,7 @@ static ssize_t fuse_perform_write(struct file *file,
1074 if (res > 0) 1079 if (res > 0)
1075 fuse_write_update_size(inode, pos); 1080 fuse_write_update_size(inode, pos);
1076 1081
1082 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1077 fuse_invalidate_attr(inode); 1083 fuse_invalidate_attr(inode);
1078 1084
1079 return res > 0 ? res : err; 1085 return res > 0 ? res : err;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fde7249a3a96..5ced199b50bb 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -115,6 +115,8 @@ struct fuse_inode {
115enum { 115enum {
116 /** Advise readdirplus */ 116 /** Advise readdirplus */
117 FUSE_I_ADVISE_RDPLUS, 117 FUSE_I_ADVISE_RDPLUS,
118 /** An operation changing file size is in progress */
119 FUSE_I_SIZE_UNSTABLE,
118}; 120};
119 121
120struct fuse_conn; 122struct fuse_conn;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9a0cdde14a08..b5718516825b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -201,7 +201,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
201 struct timespec old_mtime; 201 struct timespec old_mtime;
202 202
203 spin_lock(&fc->lock); 203 spin_lock(&fc->lock);
204 if (attr_version != 0 && fi->attr_version > attr_version) { 204 if ((attr_version != 0 && fi->attr_version > attr_version) ||
205 test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
205 spin_unlock(&fc->lock); 206 spin_unlock(&fc->lock);
206 return; 207 return;
207 } 208 }