aboutsummaryrefslogtreecommitdiffstats
path: root/fs/cifs/inode.c
diff options
context:
space:
mode:
authorJeff Layton <jlayton@redhat.com>2007-11-20 18:19:03 -0500
committerSteve French <sfrench@us.ibm.com>2007-11-20 18:19:03 -0500
commitcea218054ad277d6c126890213afde07b4eb1602 (patch)
tree8bfbd3c7d8ab94d35ec749ed4e0d66b1f6b69101 /fs/cifs/inode.c
parent2a97468024fb5b6eccee2a67a7796485c829343a (diff)
[CIFS] Fix potential data corruption when writing out cached dirty pages
Fix RedHat bug 329431 The idea here is separate "conscious" from "unconscious" flushes. Conscious flushes are those due to a fsync() or close(). Unconscious ones are flushes that occur as a side effect of some other operation or due to memory pressure. Currently, when an error occurs during an unconscious flush (ENOSPC or EIO), we toss out the page and don't preserve that error to report to the user when a conscious flush occurs. If after the unconscious flush, there are no more dirty pages for the inode, the conscious flush will simply return success even though there were previous errors when writing out pages. This can lead to data corruption. The easiest way to reproduce this is to mount up a CIFS share that's very close to being full or where the user is very close to quota. mv a file to the share that's slightly larger than the quota allows. The writes will all succeed (since they go to pagecache). The mv will do a setattr to set the new file's attributes. This calls filemap_write_and_wait, which will return an error since all of the pages can't be written out. Then later, when the flush and release ops occur, there are no more dirty pages in pagecache for the file and those operations return 0. mv then assumes that the file was written out correctly and deletes the original. CIFS already has a write_behind_rc variable where it stores the results from earlier flushes, but that value is only reported in cifs_close. Since the VFS ignores the return value from the release operation, this isn't helpful. We should be reporting this error during the flush operation. This patch does the following: 1) changes cifs_fsync to use filemap_write_and_wait and cifs_flush and also sync to check its return code. If it returns successful, they then check the value of write_behind_rc to see if an earlier flush had reported any errors. If so, they return that error and clear write_behind_rc. 2) sets write_behind_rc in a few other places where pages are written out as a side effect of other operations and the code waits on them. 3) changes cifs_setattr to only call filemap_write_and_wait for ATTR_SIZE changes. 4) makes cifs_writepages accurately distinguish between EIO and ENOSPC errors when writing out pages. Some simple testing indicates that the patch works as expected and that it fixes the reproduceable known problem. Acked-by: Dave Kleikamp <shaggy@austin.rr.com> Signed-off-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Steve French <sfrench@us.ibm.com>
Diffstat (limited to 'fs/cifs/inode.c')
-rw-r--r--fs/cifs/inode.c26
1 files changed, 20 insertions, 6 deletions
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7d907e84e032..e915eb1d2e66 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1233,7 +1233,7 @@ cifs_rename_exit:
1233int cifs_revalidate(struct dentry *direntry) 1233int cifs_revalidate(struct dentry *direntry)
1234{ 1234{
1235 int xid; 1235 int xid;
1236 int rc = 0; 1236 int rc = 0, wbrc = 0;
1237 char *full_path; 1237 char *full_path;
1238 struct cifs_sb_info *cifs_sb; 1238 struct cifs_sb_info *cifs_sb;
1239 struct cifsInodeInfo *cifsInode; 1239 struct cifsInodeInfo *cifsInode;
@@ -1333,7 +1333,9 @@ int cifs_revalidate(struct dentry *direntry)
1333 if (direntry->d_inode->i_mapping) { 1333 if (direntry->d_inode->i_mapping) {
1334 /* do we need to lock inode until after invalidate completes 1334 /* do we need to lock inode until after invalidate completes
1335 below? */ 1335 below? */
1336 filemap_fdatawrite(direntry->d_inode->i_mapping); 1336 wbrc = filemap_fdatawrite(direntry->d_inode->i_mapping);
1337 if (wbrc)
1338 CIFS_I(direntry->d_inode)->write_behind_rc = wbrc;
1337 } 1339 }
1338 if (invalidate_inode) { 1340 if (invalidate_inode) {
1339 /* shrink_dcache not necessary now that cifs dentry ops 1341 /* shrink_dcache not necessary now that cifs dentry ops
@@ -1342,7 +1344,9 @@ int cifs_revalidate(struct dentry *direntry)
1342 shrink_dcache_parent(direntry); */ 1344 shrink_dcache_parent(direntry); */
1343 if (S_ISREG(direntry->d_inode->i_mode)) { 1345 if (S_ISREG(direntry->d_inode->i_mode)) {
1344 if (direntry->d_inode->i_mapping) 1346 if (direntry->d_inode->i_mapping)
1345 filemap_fdatawait(direntry->d_inode->i_mapping); 1347 wbrc = filemap_fdatawait(direntry->d_inode->i_mapping);
1348 if (wbrc)
1349 CIFS_I(direntry->d_inode)->write_behind_rc = wbrc;
1346 /* may eventually have to do this for open files too */ 1350 /* may eventually have to do this for open files too */
1347 if (list_empty(&(cifsInode->openFileList))) { 1351 if (list_empty(&(cifsInode->openFileList))) {
1348 /* changed on server - flush read ahead pages */ 1352 /* changed on server - flush read ahead pages */
@@ -1485,10 +1489,20 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1485 1489
1486 /* BB check if we need to refresh inode from server now ? BB */ 1490 /* BB check if we need to refresh inode from server now ? BB */
1487 1491
1488 /* need to flush data before changing file size on server */
1489 filemap_write_and_wait(direntry->d_inode->i_mapping);
1490
1491 if (attrs->ia_valid & ATTR_SIZE) { 1492 if (attrs->ia_valid & ATTR_SIZE) {
1493 /*
1494 Flush data before changing file size on server. If the
1495 flush returns error, store it to report later and continue.
1496 BB: This should be smarter. Why bother flushing pages that
1497 will be truncated anyway? Also, should we error out here if
1498 the flush returns error?
1499 */
1500 rc = filemap_write_and_wait(direntry->d_inode->i_mapping);
1501 if (rc != 0) {
1502 CIFS_I(direntry->d_inode)->write_behind_rc = rc;
1503 rc = 0;
1504 }
1505
1492 /* To avoid spurious oplock breaks from server, in the case of 1506 /* To avoid spurious oplock breaks from server, in the case of
1493 inodes that we already have open, avoid doing path based 1507 inodes that we already have open, avoid doing path based
1494 setting of file size if we can do it by handle. 1508 setting of file size if we can do it by handle.