diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2009-01-05 22:19:52 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2009-01-05 22:19:52 -0500 |
commit | 5d1b1b3f492f8696ea18950a454a141381b0f926 (patch) | |
tree | e6277cd3e01c074403b9da7390de1daa6b9f248f | |
parent | b7be019e80da4db96d283734d55366014509911c (diff) |
ext4: fix BUG when calling ext4_error with locked block group
The mballoc code likes to call ext4_error while it is holding locked
block groups. This can causes a scheduling in atomic context BUG. We
can't just unlock the block group and relock it after/if ext4_error
returns since that might result in race conditions in the case where
the filesystem is set to continue after finding errors.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/ext4.h | 47 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 30 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 47 | ||||
-rw-r--r-- | fs/ext4/super.c | 45 |
4 files changed, 105 insertions, 64 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8152b5603f0a..f0b1db6acf85 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1126,6 +1126,9 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...) | |||
1126 | __attribute__ ((format (printf, 3, 4))); | 1126 | __attribute__ ((format (printf, 3, 4))); |
1127 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) | 1127 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) |
1128 | __attribute__ ((format (printf, 3, 4))); | 1128 | __attribute__ ((format (printf, 3, 4))); |
1129 | extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, | ||
1130 | const char *, const char *, ...) | ||
1131 | __attribute__ ((format (printf, 4, 5))); | ||
1129 | extern void ext4_update_dynamic_rev(struct super_block *sb); | 1132 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
1130 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 1133 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
1131 | __u32 compat); | 1134 | __u32 compat); |
@@ -1249,6 +1252,50 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | |||
1249 | return ; | 1252 | return ; |
1250 | } | 1253 | } |
1251 | 1254 | ||
1255 | struct ext4_group_info { | ||
1256 | unsigned long bb_state; | ||
1257 | struct rb_root bb_free_root; | ||
1258 | unsigned short bb_first_free; | ||
1259 | unsigned short bb_free; | ||
1260 | unsigned short bb_fragments; | ||
1261 | struct list_head bb_prealloc_list; | ||
1262 | #ifdef DOUBLE_CHECK | ||
1263 | void *bb_bitmap; | ||
1264 | #endif | ||
1265 | struct rw_semaphore alloc_sem; | ||
1266 | unsigned short bb_counters[]; | ||
1267 | }; | ||
1268 | |||
1269 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | ||
1270 | #define EXT4_GROUP_INFO_LOCKED_BIT 1 | ||
1271 | |||
1272 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | ||
1273 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | ||
1274 | |||
1275 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | ||
1276 | { | ||
1277 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
1278 | |||
1279 | bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
1280 | } | ||
1281 | |||
1282 | static inline void ext4_unlock_group(struct super_block *sb, | ||
1283 | ext4_group_t group) | ||
1284 | { | ||
1285 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
1286 | |||
1287 | bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
1288 | } | ||
1289 | |||
1290 | static inline int ext4_is_group_locked(struct super_block *sb, | ||
1291 | ext4_group_t group) | ||
1292 | { | ||
1293 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
1294 | |||
1295 | return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, | ||
1296 | &(grinfo->bb_state)); | ||
1297 | } | ||
1298 | |||
1252 | /* | 1299 | /* |
1253 | * Inodes and files operations | 1300 | * Inodes and files operations |
1254 | */ | 1301 | */ |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0bf4c4c06b19..cda69632eea3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -457,8 +457,8 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
457 | blocknr += first + i; | 457 | blocknr += first + i; |
458 | blocknr += | 458 | blocknr += |
459 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 459 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
460 | 460 | ext4_grp_locked_error(sb, e4b->bd_group, | |
461 | ext4_error(sb, __func__, "double-free of inode" | 461 | __func__, "double-free of inode" |
462 | " %lu's block %llu(bit %u in group %u)", | 462 | " %lu's block %llu(bit %u in group %u)", |
463 | inode ? inode->i_ino : 0, blocknr, | 463 | inode ? inode->i_ino : 0, blocknr, |
464 | first + i, e4b->bd_group); | 464 | first + i, e4b->bd_group); |
@@ -702,7 +702,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, | |||
702 | grp->bb_fragments = fragments; | 702 | grp->bb_fragments = fragments; |
703 | 703 | ||
704 | if (free != grp->bb_free) { | 704 | if (free != grp->bb_free) { |
705 | ext4_error(sb, __func__, | 705 | ext4_grp_locked_error(sb, group, __func__, |
706 | "EXT4-fs: group %u: %u blocks in bitmap, %u in gd", | 706 | "EXT4-fs: group %u: %u blocks in bitmap, %u in gd", |
707 | group, free, grp->bb_free); | 707 | group, free, grp->bb_free); |
708 | /* | 708 | /* |
@@ -1095,8 +1095,6 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) | |||
1095 | 1095 | ||
1096 | static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | 1096 | static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, |
1097 | int first, int count) | 1097 | int first, int count) |
1098 | __releases(bitlock) | ||
1099 | __acquires(bitlock) | ||
1100 | { | 1098 | { |
1101 | int block = 0; | 1099 | int block = 0; |
1102 | int max = 0; | 1100 | int max = 0; |
@@ -1135,12 +1133,11 @@ __acquires(bitlock) | |||
1135 | blocknr += block; | 1133 | blocknr += block; |
1136 | blocknr += | 1134 | blocknr += |
1137 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 1135 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
1138 | ext4_unlock_group(sb, e4b->bd_group); | 1136 | ext4_grp_locked_error(sb, e4b->bd_group, |
1139 | ext4_error(sb, __func__, "double-free of inode" | 1137 | __func__, "double-free of inode" |
1140 | " %lu's block %llu(bit %u in group %u)", | 1138 | " %lu's block %llu(bit %u in group %u)", |
1141 | inode ? inode->i_ino : 0, blocknr, block, | 1139 | inode ? inode->i_ino : 0, blocknr, block, |
1142 | e4b->bd_group); | 1140 | e4b->bd_group); |
1143 | ext4_lock_group(sb, e4b->bd_group); | ||
1144 | } | 1141 | } |
1145 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); | 1142 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); |
1146 | e4b->bd_info->bb_counters[order]++; | 1143 | e4b->bd_info->bb_counters[order]++; |
@@ -1623,7 +1620,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1623 | * free blocks even though group info says we | 1620 | * free blocks even though group info says we |
1624 | * we have free blocks | 1621 | * we have free blocks |
1625 | */ | 1622 | */ |
1626 | ext4_error(sb, __func__, "%d free blocks as per " | 1623 | ext4_grp_locked_error(sb, e4b->bd_group, |
1624 | __func__, "%d free blocks as per " | ||
1627 | "group info. But bitmap says 0", | 1625 | "group info. But bitmap says 0", |
1628 | free); | 1626 | free); |
1629 | break; | 1627 | break; |
@@ -1632,7 +1630,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1632 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); | 1630 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); |
1633 | BUG_ON(ex.fe_len <= 0); | 1631 | BUG_ON(ex.fe_len <= 0); |
1634 | if (free < ex.fe_len) { | 1632 | if (free < ex.fe_len) { |
1635 | ext4_error(sb, __func__, "%d free blocks as per " | 1633 | ext4_grp_locked_error(sb, e4b->bd_group, |
1634 | __func__, "%d free blocks as per " | ||
1636 | "group info. But got %d blocks", | 1635 | "group info. But got %d blocks", |
1637 | free, ex.fe_len); | 1636 | free, ex.fe_len); |
1638 | /* | 1637 | /* |
@@ -3822,8 +3821,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3822 | pa, (unsigned long) pa->pa_lstart, | 3821 | pa, (unsigned long) pa->pa_lstart, |
3823 | (unsigned long) pa->pa_pstart, | 3822 | (unsigned long) pa->pa_pstart, |
3824 | (unsigned long) pa->pa_len); | 3823 | (unsigned long) pa->pa_len); |
3825 | ext4_error(sb, __func__, "free %u, pa_free %u", | 3824 | ext4_grp_locked_error(sb, group, |
3826 | free, pa->pa_free); | 3825 | __func__, "free %u, pa_free %u", |
3826 | free, pa->pa_free); | ||
3827 | /* | 3827 | /* |
3828 | * pa is already deleted so we use the value obtained | 3828 | * pa is already deleted so we use the value obtained |
3829 | * from the bitmap and continue. | 3829 | * from the bitmap and continue. |
@@ -4633,9 +4633,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4633 | else if (block >= (entry->start_blk + entry->count)) | 4633 | else if (block >= (entry->start_blk + entry->count)) |
4634 | n = &(*n)->rb_right; | 4634 | n = &(*n)->rb_right; |
4635 | else { | 4635 | else { |
4636 | ext4_error(sb, __func__, | 4636 | ext4_grp_locked_error(sb, e4b->bd_group, __func__, |
4637 | "Double free of blocks %d (%d %d)", | 4637 | "Double free of blocks %d (%d %d)", |
4638 | block, entry->start_blk, entry->count); | 4638 | block, entry->start_blk, entry->count); |
4639 | return 0; | 4639 | return 0; |
4640 | } | 4640 | } |
4641 | } | 4641 | } |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 997f78fff129..95d4c7f29a8a 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -118,27 +118,6 @@ struct ext4_free_data { | |||
118 | tid_t t_tid; | 118 | tid_t t_tid; |
119 | }; | 119 | }; |
120 | 120 | ||
121 | struct ext4_group_info { | ||
122 | unsigned long bb_state; | ||
123 | struct rb_root bb_free_root; | ||
124 | unsigned short bb_first_free; | ||
125 | unsigned short bb_free; | ||
126 | unsigned short bb_fragments; | ||
127 | struct list_head bb_prealloc_list; | ||
128 | #ifdef DOUBLE_CHECK | ||
129 | void *bb_bitmap; | ||
130 | #endif | ||
131 | struct rw_semaphore alloc_sem; | ||
132 | unsigned short bb_counters[]; | ||
133 | }; | ||
134 | |||
135 | #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 | ||
136 | #define EXT4_GROUP_INFO_LOCKED_BIT 1 | ||
137 | |||
138 | #define EXT4_MB_GRP_NEED_INIT(grp) \ | ||
139 | (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) | ||
140 | |||
141 | |||
142 | struct ext4_prealloc_space { | 121 | struct ext4_prealloc_space { |
143 | struct list_head pa_inode_list; | 122 | struct list_head pa_inode_list; |
144 | struct list_head pa_group_list; | 123 | struct list_head pa_group_list; |
@@ -264,32 +243,6 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) | |||
264 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 243 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
265 | 244 | ||
266 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | 245 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); |
267 | |||
268 | |||
269 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | ||
270 | { | ||
271 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
272 | |||
273 | bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
274 | } | ||
275 | |||
276 | static inline void ext4_unlock_group(struct super_block *sb, | ||
277 | ext4_group_t group) | ||
278 | { | ||
279 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
280 | |||
281 | bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); | ||
282 | } | ||
283 | |||
284 | static inline int ext4_is_group_locked(struct super_block *sb, | ||
285 | ext4_group_t group) | ||
286 | { | ||
287 | struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); | ||
288 | |||
289 | return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, | ||
290 | &(grinfo->bb_state)); | ||
291 | } | ||
292 | |||
293 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | 246 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, |
294 | struct ext4_free_extent *fex) | 247 | struct ext4_free_extent *fex) |
295 | { | 248 | { |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a9dd1170bfea..2415e2b09707 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -366,6 +366,44 @@ void ext4_warning(struct super_block *sb, const char *function, | |||
366 | va_end(args); | 366 | va_end(args); |
367 | } | 367 | } |
368 | 368 | ||
369 | void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, | ||
370 | const char *function, const char *fmt, ...) | ||
371 | __releases(bitlock) | ||
372 | __acquires(bitlock) | ||
373 | { | ||
374 | va_list args; | ||
375 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
376 | |||
377 | va_start(args, fmt); | ||
378 | printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); | ||
379 | vprintk(fmt, args); | ||
380 | printk("\n"); | ||
381 | va_end(args); | ||
382 | |||
383 | if (test_opt(sb, ERRORS_CONT)) { | ||
384 | EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; | ||
385 | es->s_state |= cpu_to_le16(EXT4_ERROR_FS); | ||
386 | ext4_commit_super(sb, es, 0); | ||
387 | return; | ||
388 | } | ||
389 | ext4_unlock_group(sb, grp); | ||
390 | ext4_handle_error(sb); | ||
391 | /* | ||
392 | * We only get here in the ERRORS_RO case; relocking the group | ||
393 | * may be dangerous, but nothing bad will happen since the | ||
394 | * filesystem will have already been marked read/only and the | ||
395 | * journal has been aborted. We return 1 as a hint to callers | ||
396 | * who might what to use the return value from | ||
397 | * ext4_grp_locked_error() to distinguish beween the | ||
398 | * ERRORS_CONT and ERRORS_RO case, and perhaps return more | ||
399 | * aggressively from the ext4 function in question, with a | ||
400 | * more appropriate error code. | ||
401 | */ | ||
402 | ext4_lock_group(sb, grp); | ||
403 | return; | ||
404 | } | ||
405 | |||
406 | |||
369 | void ext4_update_dynamic_rev(struct super_block *sb) | 407 | void ext4_update_dynamic_rev(struct super_block *sb) |
370 | { | 408 | { |
371 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | 409 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; |
@@ -2868,8 +2906,11 @@ static void ext4_commit_super(struct super_block *sb, | |||
2868 | set_buffer_uptodate(sbh); | 2906 | set_buffer_uptodate(sbh); |
2869 | } | 2907 | } |
2870 | es->s_wtime = cpu_to_le32(get_seconds()); | 2908 | es->s_wtime = cpu_to_le32(get_seconds()); |
2871 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); | 2909 | ext4_free_blocks_count_set(es, percpu_counter_sum_positive( |
2872 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); | 2910 | &EXT4_SB(sb)->s_freeblocks_counter)); |
2911 | es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( | ||
2912 | &EXT4_SB(sb)->s_freeinodes_counter)); | ||
2913 | |||
2873 | BUFFER_TRACE(sbh, "marking dirty"); | 2914 | BUFFER_TRACE(sbh, "marking dirty"); |
2874 | mark_buffer_dirty(sbh); | 2915 | mark_buffer_dirty(sbh); |
2875 | if (sync) { | 2916 | if (sync) { |