aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2012-06-12 10:20:37 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2012-07-31 01:45:47 -0400
commit14da9200140f8d722ad1767dfabadebd8b34f2ad (patch)
treeea5d88b091999f7a64af0b9d335d7cad4c79edfb /fs
parent5d37e9e6dec65cd21be68ee92de99686213e916b (diff)
fs: Protect write paths by sb_start_write - sb_end_write
There are several entry points which dirty pages in a filesystem. mmap (handled by block_page_mkwrite()), buffered write (handled by __generic_file_aio_write()), splice write (generic_file_splice_write), truncate, and fallocate (these can dirty last partial page - handled inside each filesystem separately). Protect these places with sb_start_write() and sb_end_write(). ->page_mkwrite() calls are particularly complex since they are called with mmap_sem held and thus we cannot use standard sb_start_write() due to lock ordering constraints. We solve the problem by using a special freeze protection sb_start_pagefault() which ranks below mmap_sem. BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa <kamal@canonical.com> Tested-by: Peter M. Petrakis <peter.petrakis@canonical.com> Tested-by: Dann Frazier <dann.frazier@canonical.com> Tested-by: Massimo Morana <massimo.morana@canonical.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c22
-rw-r--r--fs/open.c7
-rw-r--r--fs/splice.c3
3 files changed, 13 insertions, 19 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index d5ec360e332d..9f6d2e41281d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write);
2306 * beyond EOF, then the page is guaranteed safe against truncation until we 2306 * beyond EOF, then the page is guaranteed safe against truncation until we
2307 * unlock the page. 2307 * unlock the page.
2308 * 2308 *
2309 * Direct callers of this function should call vfs_check_frozen() so that page 2309 * Direct callers of this function should protect against filesystem freezing
2310 * fault does not busyloop until the fs is thawed. 2310 * using sb_start_write() - sb_end_write() functions.
2311 */ 2311 */
2312int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, 2312int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2313 get_block_t get_block) 2313 get_block_t get_block)
@@ -2345,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2345 2345
2346 if (unlikely(ret < 0)) 2346 if (unlikely(ret < 0))
2347 goto out_unlock; 2347 goto out_unlock;
2348 /*
2349 * Freezing in progress? We check after the page is marked dirty and
2350 * with page lock held so if the test here fails, we are sure freezing
2351 * code will wait during syncing until the page fault is done - at that
2352 * point page will be dirty and unlocked so freezing code will write it
2353 * and writeprotect it again.
2354 */
2355 set_page_dirty(page); 2348 set_page_dirty(page);
2356 if (inode->i_sb->s_frozen != SB_UNFROZEN) {
2357 ret = -EAGAIN;
2358 goto out_unlock;
2359 }
2360 wait_on_page_writeback(page); 2349 wait_on_page_writeback(page);
2361 return 0; 2350 return 0;
2362out_unlock: 2351out_unlock:
@@ -2371,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2371 int ret; 2360 int ret;
2372 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; 2361 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2373 2362
2374 /* 2363 sb_start_pagefault(sb);
2375 * This check is racy but catches the common case. The check in
2376 * __block_page_mkwrite() is reliable.
2377 */
2378 vfs_check_frozen(sb, SB_FREEZE_WRITE);
2379 ret = __block_page_mkwrite(vma, vmf, get_block); 2364 ret = __block_page_mkwrite(vma, vmf, get_block);
2365 sb_end_pagefault(sb);
2380 return block_page_mkwrite_return(ret); 2366 return block_page_mkwrite_return(ret);
2381} 2367}
2382EXPORT_SYMBOL(block_page_mkwrite); 2368EXPORT_SYMBOL(block_page_mkwrite);
diff --git a/fs/open.c b/fs/open.c
index 9ddc18565503..f3d96e7e7b19 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
164 if (IS_APPEND(inode)) 164 if (IS_APPEND(inode))
165 goto out_putf; 165 goto out_putf;
166 166
167 sb_start_write(inode->i_sb);
167 error = locks_verify_truncate(inode, file, length); 168 error = locks_verify_truncate(inode, file, length);
168 if (!error) 169 if (!error)
169 error = security_path_truncate(&file->f_path); 170 error = security_path_truncate(&file->f_path);
170 if (!error) 171 if (!error)
171 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
173 sb_end_write(inode->i_sb);
172out_putf: 174out_putf:
173 fput(file); 175 fput(file);
174out: 176out:
@@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
266 if (!file->f_op->fallocate) 268 if (!file->f_op->fallocate)
267 return -EOPNOTSUPP; 269 return -EOPNOTSUPP;
268 270
269 return file->f_op->fallocate(file, mode, offset, len); 271 sb_start_write(inode->i_sb);
272 ret = file->f_op->fallocate(file, mode, offset, len);
273 sb_end_write(inode->i_sb);
274 return ret;
270} 275}
271 276
272SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 277SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
diff --git a/fs/splice.c b/fs/splice.c
index 7bf08fa22ec9..41514dd89462 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
996 }; 996 };
997 ssize_t ret; 997 ssize_t ret;
998 998
999 sb_start_write(inode->i_sb);
1000
999 pipe_lock(pipe); 1001 pipe_lock(pipe);
1000 1002
1001 splice_from_pipe_begin(&sd); 1003 splice_from_pipe_begin(&sd);
@@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1034 *ppos += ret; 1036 *ppos += ret;
1035 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 1037 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
1036 } 1038 }
1039 sb_end_write(inode->i_sb);
1037 1040
1038 return ret; 1041 return ret;
1039} 1042}