aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2009-03-31 18:23:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-01 11:59:14 -0400
commit56a76f8275c379ed73c8a43cfa1dfa2f5e9cfa19 (patch)
treecbeaa82516c4818f72535b6ebe48a607cef88af4
parentc2ec175c39f62949438354f603f4aa170846aabb (diff)
fs: fix page_mkwrite error cases in core code and btrfs
page_mkwrite is called with neither the page lock nor the ptl held. This means a page can be concurrently truncated or invalidated out from underneath it. Callers are supposed to prevent truncate races themselves, however previously the only thing they can do in case they hit one is to raise a SIGBUS. A sigbus is wrong for the case that the page has been invalidated or truncated within i_size (eg. hole punched). Callers may also have to perform memory allocations in this path, where again, SIGBUS would be wrong. The previous patch ("mm: page_mkwrite change prototype to match fault") made it possible to properly specify errors. Convert the generic buffer.c code and btrfs to return sane error values (in the case of page removed from pagecache, VM_FAULT_NOPAGE will cause the fault handler to exit without doing anything, and the fault will be retried properly). This fixes core code, and converts btrfs as a template/example. All other filesystems defining their own page_mkwrite should be fixed in a similar manner. Acked-by: Chris Mason <chris.mason@oracle.com> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/btrfs/inode.c11
-rw-r--r--fs/buffer.c12
2 files changed, 15 insertions, 8 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ec5423790bbb..17e608c4dc70 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4307,10 +4307,15 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4307 u64 page_end; 4307 u64 page_end;
4308 4308
4309 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 4309 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
4310 if (ret) 4310 if (ret) {
4311 if (ret == -ENOMEM)
4312 ret = VM_FAULT_OOM;
4313 else /* -ENOSPC, -EIO, etc */
4314 ret = VM_FAULT_SIGBUS;
4311 goto out; 4315 goto out;
4316 }
4312 4317
4313 ret = -EINVAL; 4318 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
4314again: 4319again:
4315 lock_page(page); 4320 lock_page(page);
4316 size = i_size_read(inode); 4321 size = i_size_read(inode);
@@ -4363,8 +4368,6 @@ again:
4363out_unlock: 4368out_unlock:
4364 unlock_page(page); 4369 unlock_page(page);
4365out: 4370out:
4366 if (ret)
4367 ret = VM_FAULT_SIGBUS;
4368 return ret; 4371 return ret;
4369} 4372}
4370 4373
diff --git a/fs/buffer.c b/fs/buffer.c
index 6d51a3da362c..0c14f8d52ee5 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2320,7 +2320,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2320 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 2320 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
2321 unsigned long end; 2321 unsigned long end;
2322 loff_t size; 2322 loff_t size;
2323 int ret = -EINVAL; 2323 int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
2324 2324
2325 lock_page(page); 2325 lock_page(page);
2326 size = i_size_read(inode); 2326 size = i_size_read(inode);
@@ -2340,10 +2340,14 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2340 if (!ret) 2340 if (!ret)
2341 ret = block_commit_write(page, 0, end); 2341 ret = block_commit_write(page, 0, end);
2342 2342
2343out_unlock: 2343 if (unlikely(ret)) {
2344 if (ret) 2344 if (ret == -ENOMEM)
2345 ret = VM_FAULT_SIGBUS; 2345 ret = VM_FAULT_OOM;
2346 else /* -ENOSPC, -EIO, etc */
2347 ret = VM_FAULT_SIGBUS;
2348 }
2346 2349
2350out_unlock:
2347 unlock_page(page); 2351 unlock_page(page);
2348 return ret; 2352 return ret;
2349} 2353}