aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-09-07 18:22:14 -0400
committerJan Kara <jack@suse.cz>2009-09-16 11:44:11 -0400
commit4f003fd32bc54ec438b8691795279844df27ce38 (patch)
tree2fe0a97307f9b3fe481f07713a271630951eb51b /fs/ext3
parent00171d3c7e3b738ba582c7a9b37408e796f49046 (diff)
ext3: Add locking to ext3_do_update_inode
I've been struggling with this off and on while I've been testing the data=guarded work. The symptom is corrupted orphan lists and inodes with the wrong i_size stored on disk. I was convinced the data=guarded code was just missing a call to ext3_mark_inode_dirty, but tracing showed the i_disksize I was sending to ext3_mark_inode_dirty wasn't actually making it to the drive. ext3_mark_inode_dirty can be called without locks held (atime updates and a few others), so the data=guarded code uses locks while updating the in-memory inode, and then calls ext3_mark_inode_dirty without any locks held. But, ext3_mark_inode_dirty has no internal locking to make sure that only one CPU is updating the buffer head at a time. Generally this works out ok because everyone that changes the inode then calls ext3_mark_inode_dirty themselves. Even though it races, eventually someone updates the buffer heads and things move on. But there is still a risk of the wrong values getting in, and the data=guarded code seems to hit the race very often. Since everyone that changes the inode also logs it, it should be possible to fix this with some memory barriers. I'll leave that as an exercise to the reader and lock the buffer head instead. It it probably a good idea to have a different patch series for lockless bit flipping on the ext3 i_state field. ext3_do_update_inode &= clears EXT3_STATE_NEW without any locks held. Signed-off-by: Chris Mason <chris.mason@oracle.com> Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/ext3')
-rw-r--r--fs/ext3/inode.c9
1 files changed, 9 insertions, 0 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index eb0b4e0d517b..cd098a7b77fc 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2903,6 +2903,10 @@ static int ext3_do_update_inode(handle_t *handle,
2903 struct buffer_head *bh = iloc->bh; 2903 struct buffer_head *bh = iloc->bh;
2904 int err = 0, rc, block; 2904 int err = 0, rc, block;
2905 2905
2906again:
2907 /* we can't allow multiple procs in here at once, its a bit racey */
2908 lock_buffer(bh);
2909
2906 /* For fields not not tracking in the in-memory inode, 2910 /* For fields not not tracking in the in-memory inode,
2907 * initialise them to zero for new inodes. */ 2911 * initialise them to zero for new inodes. */
2908 if (ei->i_state & EXT3_STATE_NEW) 2912 if (ei->i_state & EXT3_STATE_NEW)
@@ -2962,16 +2966,20 @@ static int ext3_do_update_inode(handle_t *handle,
2962 /* If this is the first large file 2966 /* If this is the first large file
2963 * created, add a flag to the superblock. 2967 * created, add a flag to the superblock.
2964 */ 2968 */
2969 unlock_buffer(bh);
2965 err = ext3_journal_get_write_access(handle, 2970 err = ext3_journal_get_write_access(handle,
2966 EXT3_SB(sb)->s_sbh); 2971 EXT3_SB(sb)->s_sbh);
2967 if (err) 2972 if (err)
2968 goto out_brelse; 2973 goto out_brelse;
2974
2969 ext3_update_dynamic_rev(sb); 2975 ext3_update_dynamic_rev(sb);
2970 EXT3_SET_RO_COMPAT_FEATURE(sb, 2976 EXT3_SET_RO_COMPAT_FEATURE(sb,
2971 EXT3_FEATURE_RO_COMPAT_LARGE_FILE); 2977 EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
2972 handle->h_sync = 1; 2978 handle->h_sync = 1;
2973 err = ext3_journal_dirty_metadata(handle, 2979 err = ext3_journal_dirty_metadata(handle,
2974 EXT3_SB(sb)->s_sbh); 2980 EXT3_SB(sb)->s_sbh);
2981 /* get our lock and start over */
2982 goto again;
2975 } 2983 }
2976 } 2984 }
2977 } 2985 }
@@ -2994,6 +3002,7 @@ static int ext3_do_update_inode(handle_t *handle,
2994 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 3002 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
2995 3003
2996 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); 3004 BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
3005 unlock_buffer(bh);
2997 rc = ext3_journal_dirty_metadata(handle, bh); 3006 rc = ext3_journal_dirty_metadata(handle, bh);
2998 if (!err) 3007 if (!err)
2999 err = rc; 3008 err = rc;