aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDmitry Monakhov <dmonakhov@openvz.org>2012-10-05 11:31:55 -0400
committerTheodore Ts'o <tytso@mit.edu>2012-10-05 11:31:55 -0400
commitc278531d39f3158bfee93dc67da0b77e09776de2 (patch)
treeb83341e04d54b3f1cd8171f43ec77bbfba06e571 /fs
parent041bbb6d369811e948ae01f3d00414264076be35 (diff)
ext4: fix ext4_flush_completed_IO wait semantics
BUG #1) All places where we call ext4_flush_completed_IO are broken because buffered io and DIO/AIO goes through three stages 1) submitted io, 2) completed io (in i_completed_io_list) conversion pended 3) finished io (conversion done) And by calling ext4_flush_completed_IO we will flush only requests which were in (2) stage, which is wrong because: 1) punch_hole and truncate _must_ wait for all outstanding unwritten io regardless to it's state. 2) fsync and nolock_dio_read should also wait because there is a time window between end_page_writeback() and ext4_add_complete_io() As result integrity fsync is broken in case of buffered write to fallocated region: fsync blkdev_completion ->filemap_write_and_wait_range ->ext4_end_bio ->end_page_writeback <-- filemap_write_and_wait_range return ->ext4_flush_completed_IO sees empty i_completed_io_list but pended conversion still exist ->ext4_add_complete_io BUG #2) Race window becomes wider due to the 'ext4: completed_io locking cleanup V4' patch series This patch make following changes: 1) ext4_flush_completed_io() now first try to flush completed io and when wait for any outstanding unwritten io via ext4_unwritten_wait() 2) Rename function to more appropriate name. 3) Assert that all callers of ext4_flush_unwritten_io should hold i_mutex to prevent endless wait Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/fsync.c2
-rw-r--r--fs/ext4/indirect.c8
-rw-r--r--fs/ext4/page-io.c11
6 files changed, 19 insertions, 13 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1be2b4472a83..3ab2539b7b2e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1947,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1947 1947
1948/* fsync.c */ 1948/* fsync.c */
1949extern int ext4_sync_file(struct file *, loff_t, loff_t, int); 1949extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
1950extern int ext4_flush_completed_IO(struct inode *); 1950extern int ext4_flush_unwritten_io(struct inode *);
1951 1951
1952/* hash.c */ 1952/* hash.c */
1953extern int ext4fs_dirhash(const char *name, int len, struct 1953extern int ext4fs_dirhash(const char *name, int len, struct
@@ -2371,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations;
2371extern const struct inode_operations ext4_file_inode_operations; 2371extern const struct inode_operations ext4_file_inode_operations;
2372extern const struct file_operations ext4_file_operations; 2372extern const struct file_operations ext4_file_operations;
2373extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2373extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2374extern void ext4_unwritten_wait(struct inode *inode);
2374 2375
2375/* namei.c */ 2376/* namei.c */
2376extern const struct inode_operations ext4_dir_inode_operations; 2377extern const struct inode_operations ext4_dir_inode_operations;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c1fcf489e056..1c94cca35ed1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4268,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode)
4268 * finish any pending end_io work so we won't run the risk of 4268 * finish any pending end_io work so we won't run the risk of
4269 * converting any truncated blocks to initialized later 4269 * converting any truncated blocks to initialized later
4270 */ 4270 */
4271 ext4_flush_completed_IO(inode); 4271 ext4_flush_unwritten_io(inode);
4272 4272
4273 /* 4273 /*
4274 * probably first extent we're gonna free will be last in block 4274 * probably first extent we're gonna free will be last in block
@@ -4847,10 +4847,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4847 4847
4848 /* Wait all existing dio workers, newcomers will block on i_mutex */ 4848 /* Wait all existing dio workers, newcomers will block on i_mutex */
4849 ext4_inode_block_unlocked_dio(inode); 4849 ext4_inode_block_unlocked_dio(inode);
4850 inode_dio_wait(inode); 4850 err = ext4_flush_unwritten_io(inode);
4851 err = ext4_flush_completed_IO(inode);
4852 if (err) 4851 if (err)
4853 goto out_dio; 4852 goto out_dio;
4853 inode_dio_wait(inode);
4854 4854
4855 credits = ext4_writepage_trans_blocks(inode); 4855 credits = ext4_writepage_trans_blocks(inode);
4856 handle = ext4_journal_start(inode, credits); 4856 handle = ext4_journal_start(inode, credits);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 39335bda404b..ca6f07afe601 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,7 +55,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
55 return 0; 55 return 0;
56} 56}
57 57
58static void ext4_unwritten_wait(struct inode *inode) 58void ext4_unwritten_wait(struct inode *inode)
59{ 59{
60 wait_queue_head_t *wq = ext4_ioend_wq(inode); 60 wait_queue_head_t *wq = ext4_ioend_wq(inode);
61 61
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 460000868b8e..be1d89f385b4 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -138,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
138 if (inode->i_sb->s_flags & MS_RDONLY) 138 if (inode->i_sb->s_flags & MS_RDONLY)
139 goto out; 139 goto out;
140 140
141 ret = ext4_flush_completed_IO(inode); 141 ret = ext4_flush_unwritten_io(inode);
142 if (ret < 0) 142 if (ret < 0)
143 goto out; 143 goto out;
144 144
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 8d849dae8428..792e388e7b44 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -807,9 +807,11 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
807 807
808retry: 808retry:
809 if (rw == READ && ext4_should_dioread_nolock(inode)) { 809 if (rw == READ && ext4_should_dioread_nolock(inode)) {
810 if (unlikely(!list_empty(&ei->i_completed_io_list))) 810 if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) {
811 ext4_flush_completed_IO(inode); 811 mutex_lock(&inode->i_mutex);
812 812 ext4_flush_unwritten_io(inode);
813 mutex_unlock(&inode->i_mutex);
814 }
813 /* 815 /*
814 * Nolock dioread optimization may be dynamically disabled 816 * Nolock dioread optimization may be dynamically disabled
815 * via ext4_inode_block_unlocked_dio(). Check inode's state 817 * via ext4_inode_block_unlocked_dio(). Check inode's state
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 5b24c407701b..68e896e12a67 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -189,8 +189,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
189 189
190 list_add_tail(&io->list, &complete); 190 list_add_tail(&io->list, &complete);
191 } 191 }
192 /* It is important to update all flags for all end_io in one shot w/o
193 * dropping the lock.*/
194 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 192 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
195 while (!list_empty(&complete)) { 193 while (!list_empty(&complete)) {
196 io = list_entry(complete.next, ext4_io_end_t, list); 194 io = list_entry(complete.next, ext4_io_end_t, list);
@@ -228,9 +226,14 @@ static void ext4_end_io_work(struct work_struct *work)
228 ext4_do_flush_completed_IO(io->inode, io); 226 ext4_do_flush_completed_IO(io->inode, io);
229} 227}
230 228
231int ext4_flush_completed_IO(struct inode *inode) 229int ext4_flush_unwritten_io(struct inode *inode)
232{ 230{
233 return ext4_do_flush_completed_IO(inode, NULL); 231 int ret;
232 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
233 !(inode->i_state & I_FREEING));
234 ret = ext4_do_flush_completed_IO(inode, NULL);
235 ext4_unwritten_wait(inode);
236 return ret;
234} 237}
235 238
236ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) 239ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)