diff options
author | Maxim Patlasov <MPatlasov@parallels.com> | 2013-08-30 09:06:04 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2013-09-26 20:18:30 -0400 |
commit | 4e208303119d234347b1f4337b84d47306e73811 (patch) | |
tree | 0f288cc210992ddcdf6812a33db2c2664f78457f /fs | |
parent | eb97a45d12c9517b7846ebe3c97ee554b777ad34 (diff) |
fuse: hotfix truncate_pagecache() issue
commit 06a7c3c2781409af95000c60a5df743fd4e2f8b4 upstream.
The way how fuse calls truncate_pagecache() from fuse_change_attributes()
is completely wrong. Because, w/o i_mutex held, we never sure whether
'oldsize' and 'attr->size' are valid by the time of execution of
truncate_pagecache(inode, oldsize, attr->size). In fact, as soon as we
released fc->lock in the middle of fuse_change_attributes(), we completely
loose control of actions which may happen with given inode until we reach
truncate_pagecache. The list of potentially dangerous actions includes
mmap-ed reads and writes, ftruncate(2) and write(2) extending file size.
The typical outcome of doing truncate_pagecache() with outdated arguments
is data corruption from user point of view. This is (in some sense)
acceptable in cases when the issue is triggered by a change of the file on
the server (i.e. externally wrt fuse operation), but it is absolutely
intolerable in scenarios when a single fuse client modifies a file without
any external intervention. A real life case I discovered by fsx-linux
looked like this:
1. Shrinking ftruncate(2) comes to fuse_do_setattr(). The latter sends
FUSE_SETATTR to the server synchronously, but before getting fc->lock ...
2. fuse_dentry_revalidate() is asynchronously called. It sends FUSE_LOOKUP
to the server synchronously, then calls fuse_change_attributes(). The
latter updates i_size, releases fc->lock, but before comparing oldsize vs
attr->size..
3. fuse_do_setattr() from the first step proceeds by acquiring fc->lock and
updating attributes and i_size, but now oldsize is equal to
outarg.attr.size because i_size has just been updated (step 2). Hence,
fuse_do_setattr() returns w/o calling truncate_pagecache().
4. As soon as ftruncate(2) completes, the user extends file size by
write(2) making a hole in the middle of file, then reads data from the hole
either by read(2) or mmap-ed read. The user expects to get zero data from
the hole, but gets stale data because truncate_pagecache() is not executed
yet.
The scenario above illustrates one side of the problem: not truncating the
page cache even though we should. Another side corresponds to truncating
page cache too late, when the state of inode changed significantly.
Theoretically, the following is possible:
1. As in the previous scenario fuse_dentry_revalidate() discovered that
i_size changed (due to our own fuse_do_setattr()) and is going to call
truncate_pagecache() for some 'new_size' it believes valid right now. But
by the time that particular truncate_pagecache() is called ...
2. fuse_do_setattr() returns (either having called truncate_pagecache() or
not -- it doesn't matter).
3. The file is extended either by write(2) or ftruncate(2) or fallocate(2).
4. mmap-ed write makes a page in the extended region dirty.
The result will be the lost of data user wrote on the fourth step.
The patch is a hotfix resolving the issue in a simplistic way: let's skip
dangerous i_size update and truncate_pagecache if an operation changing
file size is in progress. This simplistic approach looks correct for the
cases w/o external changes. And to handle them properly, more sophisticated
and intrusive techniques (e.g. NFS-like one) would be required. I'd like to
postpone it until the issue is well discussed on the mailing list(s).
Changed in v2:
- improved patch description to cover both sides of the issue.
Signed-off-by: Maxim Patlasov <mpatlasov@parallels.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/fuse/dir.c | 7 | ||||
-rw-r--r-- | fs/fuse/file.c | 8 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 2 | ||||
-rw-r--r-- | fs/fuse/inode.c | 3 |
4 files changed, 17 insertions, 3 deletions
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e1536f9ae809..a30c60d5ce4f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -1594,6 +1594,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
1594 | struct file *file) | 1594 | struct file *file) |
1595 | { | 1595 | { |
1596 | struct fuse_conn *fc = get_fuse_conn(inode); | 1596 | struct fuse_conn *fc = get_fuse_conn(inode); |
1597 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
1597 | struct fuse_req *req; | 1598 | struct fuse_req *req; |
1598 | struct fuse_setattr_in inarg; | 1599 | struct fuse_setattr_in inarg; |
1599 | struct fuse_attr_out outarg; | 1600 | struct fuse_attr_out outarg; |
@@ -1621,8 +1622,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
1621 | if (IS_ERR(req)) | 1622 | if (IS_ERR(req)) |
1622 | return PTR_ERR(req); | 1623 | return PTR_ERR(req); |
1623 | 1624 | ||
1624 | if (is_truncate) | 1625 | if (is_truncate) { |
1625 | fuse_set_nowrite(inode); | 1626 | fuse_set_nowrite(inode); |
1627 | set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); | ||
1628 | } | ||
1626 | 1629 | ||
1627 | memset(&inarg, 0, sizeof(inarg)); | 1630 | memset(&inarg, 0, sizeof(inarg)); |
1628 | memset(&outarg, 0, sizeof(outarg)); | 1631 | memset(&outarg, 0, sizeof(outarg)); |
@@ -1684,12 +1687,14 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
1684 | invalidate_inode_pages2(inode->i_mapping); | 1687 | invalidate_inode_pages2(inode->i_mapping); |
1685 | } | 1688 | } |
1686 | 1689 | ||
1690 | clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); | ||
1687 | return 0; | 1691 | return 0; |
1688 | 1692 | ||
1689 | error: | 1693 | error: |
1690 | if (is_truncate) | 1694 | if (is_truncate) |
1691 | fuse_release_nowrite(inode); | 1695 | fuse_release_nowrite(inode); |
1692 | 1696 | ||
1697 | clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); | ||
1693 | return err; | 1698 | return err; |
1694 | } | 1699 | } |
1695 | 1700 | ||
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4943f2c9943d..473e8453a7df 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -630,7 +630,8 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, | |||
630 | struct fuse_inode *fi = get_fuse_inode(inode); | 630 | struct fuse_inode *fi = get_fuse_inode(inode); |
631 | 631 | ||
632 | spin_lock(&fc->lock); | 632 | spin_lock(&fc->lock); |
633 | if (attr_ver == fi->attr_version && size < inode->i_size) { | 633 | if (attr_ver == fi->attr_version && size < inode->i_size && |
634 | !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { | ||
634 | fi->attr_version = ++fc->attr_version; | 635 | fi->attr_version = ++fc->attr_version; |
635 | i_size_write(inode, size); | 636 | i_size_write(inode, size); |
636 | } | 637 | } |
@@ -1033,12 +1034,16 @@ static ssize_t fuse_perform_write(struct file *file, | |||
1033 | { | 1034 | { |
1034 | struct inode *inode = mapping->host; | 1035 | struct inode *inode = mapping->host; |
1035 | struct fuse_conn *fc = get_fuse_conn(inode); | 1036 | struct fuse_conn *fc = get_fuse_conn(inode); |
1037 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
1036 | int err = 0; | 1038 | int err = 0; |
1037 | ssize_t res = 0; | 1039 | ssize_t res = 0; |
1038 | 1040 | ||
1039 | if (is_bad_inode(inode)) | 1041 | if (is_bad_inode(inode)) |
1040 | return -EIO; | 1042 | return -EIO; |
1041 | 1043 | ||
1044 | if (inode->i_size < pos + iov_iter_count(ii)) | ||
1045 | set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); | ||
1046 | |||
1042 | do { | 1047 | do { |
1043 | struct fuse_req *req; | 1048 | struct fuse_req *req; |
1044 | ssize_t count; | 1049 | ssize_t count; |
@@ -1074,6 +1079,7 @@ static ssize_t fuse_perform_write(struct file *file, | |||
1074 | if (res > 0) | 1079 | if (res > 0) |
1075 | fuse_write_update_size(inode, pos); | 1080 | fuse_write_update_size(inode, pos); |
1076 | 1081 | ||
1082 | clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); | ||
1077 | fuse_invalidate_attr(inode); | 1083 | fuse_invalidate_attr(inode); |
1078 | 1084 | ||
1079 | return res > 0 ? res : err; | 1085 | return res > 0 ? res : err; |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fde7249a3a96..5ced199b50bb 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -115,6 +115,8 @@ struct fuse_inode { | |||
115 | enum { | 115 | enum { |
116 | /** Advise readdirplus */ | 116 | /** Advise readdirplus */ |
117 | FUSE_I_ADVISE_RDPLUS, | 117 | FUSE_I_ADVISE_RDPLUS, |
118 | /** An operation changing file size is in progress */ | ||
119 | FUSE_I_SIZE_UNSTABLE, | ||
118 | }; | 120 | }; |
119 | 121 | ||
120 | struct fuse_conn; | 122 | struct fuse_conn; |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde14a08..b5718516825b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -201,7 +201,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | |||
201 | struct timespec old_mtime; | 201 | struct timespec old_mtime; |
202 | 202 | ||
203 | spin_lock(&fc->lock); | 203 | spin_lock(&fc->lock); |
204 | if (attr_version != 0 && fi->attr_version > attr_version) { | 204 | if ((attr_version != 0 && fi->attr_version > attr_version) || |
205 | test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { | ||
205 | spin_unlock(&fc->lock); | 206 | spin_unlock(&fc->lock); |
206 | return; | 207 | return; |
207 | } | 208 | } |