aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/ext4.h
diff options
context:
space:
mode:
authorAditya Kali <adityakali@google.com>2011-09-09 19:20:51 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-09-09 19:20:51 -0400
commit5356f2615cd558c57a1f7d7528d1ad4de3640d96 (patch)
treee3590bf14d9a21c4eb365105886382bfb1131b95 /fs/ext4/ext4.h
parentd8990240d8c911064447f8aa5a440f9345a6d692 (diff)
ext4: attempt to fix race in bigalloc code path
Currently, there exists a race between delayed allocated writes and the writeback when bigalloc feature is in use. The race was because we wanted to determine what blocks in a cluster are under delayed allocation and we were using buffer_delayed(bh) check for it. But, the writeback codepath clears this bit without any synchronization which resulted in a race and an ext4 warning similar to: EXT4-fs (ram1): ext4_da_update_reserve_space: ino 13, used 1 with only 0 reserved data blocks The race existed in two places. (1) between ext4_find_delalloc_range() and ext4_map_blocks() when called from writeback code path. (2) between ext4_find_delalloc_range() and ext4_da_get_block_prep() (where buffer_delayed(bh) is set. To fix (1), this patch introduces a new buffer_head state bit - BH_Da_Mapped. This bit is set under the protection of EXT4_I(inode)->i_data_sem when we have actually mapped the delayed allocated blocks during the writeout time. We can now reliably check for this bit inside ext4_find_delalloc_range() to determine whether the reservation for the blocks have already been claimed or not. To fix (2), it was necessary to set buffer_delay(bh) under the protection of i_data_sem. So, I extracted the very beginning of ext4_map_blocks into a new function - ext4_da_map_blocks() - and performed the required setting of bh_delay bit and the quota reservation under the protection of i_data_sem. These two fixes makes the checking of buffer_delay(bh) and buffer_da_mapped(bh) consistent, thus removing the race. Tested: I was able to reproduce the problem by running 'dd' and 'fsync' in parallel. Also, xfstests sometimes used to reproduce this race. After the fix both my test and xfstests were successful and no race (warning message) was observed. Google-Bug-Id: 4997027 Signed-off-by: Aditya Kali <adityakali@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/ext4.h')
-rw-r--r--fs/ext4/ext4.h5
1 files changed, 4 insertions, 1 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 751277a4890c..1bbd2caebe7f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1893,7 +1893,6 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1893extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1893extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1894extern void ext4_da_update_reserve_space(struct inode *inode, 1894extern void ext4_da_update_reserve_space(struct inode *inode,
1895 int used, int quota_claim); 1895 int used, int quota_claim);
1896extern int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock);
1897 1896
1898/* indirect.c */ 1897/* indirect.c */
1899extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, 1898extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -2300,10 +2299,14 @@ enum ext4_state_bits {
2300 * never, ever appear in a buffer_head's state 2299 * never, ever appear in a buffer_head's state
2301 * flag. See EXT4_MAP_FROM_CLUSTER to see where 2300 * flag. See EXT4_MAP_FROM_CLUSTER to see where
2302 * this is used. */ 2301 * this is used. */
2302 BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This
2303 * flag is set when ext4_map_blocks is called on a
2304 * delayed allocated block to get its real mapping. */
2303}; 2305};
2304 2306
2305BUFFER_FNS(Uninit, uninit) 2307BUFFER_FNS(Uninit, uninit)
2306TAS_BUFFER_FNS(Uninit, uninit) 2308TAS_BUFFER_FNS(Uninit, uninit)
2309BUFFER_FNS(Da_Mapped, da_mapped)
2307 2310
2308/* 2311/*
2309 * Add new method to test wether block and inode bitmaps are properly 2312 * Add new method to test wether block and inode bitmaps are properly