aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2009-01-05 22:19:52 -0500
committerTheodore Ts'o <tytso@mit.edu>2009-01-05 22:19:52 -0500
commit5d1b1b3f492f8696ea18950a454a141381b0f926 (patch)
treee6277cd3e01c074403b9da7390de1daa6b9f248f
parentb7be019e80da4db96d283734d55366014509911c (diff)
ext4: fix BUG when calling ext4_error with locked block group
The mballoc code likes to call ext4_error while it is holding locked block groups. This can causes a scheduling in atomic context BUG. We can't just unlock the block group and relock it after/if ext4_error returns since that might result in race conditions in the case where the filesystem is set to continue after finding errors. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--fs/ext4/ext4.h47
-rw-r--r--fs/ext4/mballoc.c30
-rw-r--r--fs/ext4/mballoc.h47
-rw-r--r--fs/ext4/super.c45
4 files changed, 105 insertions, 64 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8152b5603f0a..f0b1db6acf85 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1126,6 +1126,9 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
1126 __attribute__ ((format (printf, 3, 4))); 1126 __attribute__ ((format (printf, 3, 4)));
1127extern void ext4_warning(struct super_block *, const char *, const char *, ...) 1127extern void ext4_warning(struct super_block *, const char *, const char *, ...)
1128 __attribute__ ((format (printf, 3, 4))); 1128 __attribute__ ((format (printf, 3, 4)));
1129extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
1130 const char *, const char *, ...)
1131 __attribute__ ((format (printf, 4, 5)));
1129extern void ext4_update_dynamic_rev(struct super_block *sb); 1132extern void ext4_update_dynamic_rev(struct super_block *sb);
1130extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, 1133extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
1131 __u32 compat); 1134 __u32 compat);
@@ -1249,6 +1252,50 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
1249 return ; 1252 return ;
1250} 1253}
1251 1254
1255struct ext4_group_info {
1256 unsigned long bb_state;
1257 struct rb_root bb_free_root;
1258 unsigned short bb_first_free;
1259 unsigned short bb_free;
1260 unsigned short bb_fragments;
1261 struct list_head bb_prealloc_list;
1262#ifdef DOUBLE_CHECK
1263 void *bb_bitmap;
1264#endif
1265 struct rw_semaphore alloc_sem;
1266 unsigned short bb_counters[];
1267};
1268
1269#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
1270#define EXT4_GROUP_INFO_LOCKED_BIT 1
1271
1272#define EXT4_MB_GRP_NEED_INIT(grp) \
1273 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
1274
1275static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
1276{
1277 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
1278
1279 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1280}
1281
1282static inline void ext4_unlock_group(struct super_block *sb,
1283 ext4_group_t group)
1284{
1285 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
1286
1287 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
1288}
1289
1290static inline int ext4_is_group_locked(struct super_block *sb,
1291 ext4_group_t group)
1292{
1293 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
1294
1295 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
1296 &(grinfo->bb_state));
1297}
1298
1252/* 1299/*
1253 * Inodes and files operations 1300 * Inodes and files operations
1254 */ 1301 */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0bf4c4c06b19..cda69632eea3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -457,8 +457,8 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
457 blocknr += first + i; 457 blocknr += first + i;
458 blocknr += 458 blocknr +=
459 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 459 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
460 460 ext4_grp_locked_error(sb, e4b->bd_group,
461 ext4_error(sb, __func__, "double-free of inode" 461 __func__, "double-free of inode"
462 " %lu's block %llu(bit %u in group %u)", 462 " %lu's block %llu(bit %u in group %u)",
463 inode ? inode->i_ino : 0, blocknr, 463 inode ? inode->i_ino : 0, blocknr,
464 first + i, e4b->bd_group); 464 first + i, e4b->bd_group);
@@ -702,7 +702,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
702 grp->bb_fragments = fragments; 702 grp->bb_fragments = fragments;
703 703
704 if (free != grp->bb_free) { 704 if (free != grp->bb_free) {
705 ext4_error(sb, __func__, 705 ext4_grp_locked_error(sb, group, __func__,
706 "EXT4-fs: group %u: %u blocks in bitmap, %u in gd", 706 "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
707 group, free, grp->bb_free); 707 group, free, grp->bb_free);
708 /* 708 /*
@@ -1095,8 +1095,6 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
1095 1095
1096static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, 1096static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1097 int first, int count) 1097 int first, int count)
1098__releases(bitlock)
1099__acquires(bitlock)
1100{ 1098{
1101 int block = 0; 1099 int block = 0;
1102 int max = 0; 1100 int max = 0;
@@ -1135,12 +1133,11 @@ __acquires(bitlock)
1135 blocknr += block; 1133 blocknr += block;
1136 blocknr += 1134 blocknr +=
1137 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 1135 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1138 ext4_unlock_group(sb, e4b->bd_group); 1136 ext4_grp_locked_error(sb, e4b->bd_group,
1139 ext4_error(sb, __func__, "double-free of inode" 1137 __func__, "double-free of inode"
1140 " %lu's block %llu(bit %u in group %u)", 1138 " %lu's block %llu(bit %u in group %u)",
1141 inode ? inode->i_ino : 0, blocknr, block, 1139 inode ? inode->i_ino : 0, blocknr, block,
1142 e4b->bd_group); 1140 e4b->bd_group);
1143 ext4_lock_group(sb, e4b->bd_group);
1144 } 1141 }
1145 mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); 1142 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
1146 e4b->bd_info->bb_counters[order]++; 1143 e4b->bd_info->bb_counters[order]++;
@@ -1623,7 +1620,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1623 * free blocks even though group info says we 1620 * free blocks even though group info says we
1624 * we have free blocks 1621 * we have free blocks
1625 */ 1622 */
1626 ext4_error(sb, __func__, "%d free blocks as per " 1623 ext4_grp_locked_error(sb, e4b->bd_group,
1624 __func__, "%d free blocks as per "
1627 "group info. But bitmap says 0", 1625 "group info. But bitmap says 0",
1628 free); 1626 free);
1629 break; 1627 break;
@@ -1632,7 +1630,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1632 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1630 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1633 BUG_ON(ex.fe_len <= 0); 1631 BUG_ON(ex.fe_len <= 0);
1634 if (free < ex.fe_len) { 1632 if (free < ex.fe_len) {
1635 ext4_error(sb, __func__, "%d free blocks as per " 1633 ext4_grp_locked_error(sb, e4b->bd_group,
1634 __func__, "%d free blocks as per "
1636 "group info. But got %d blocks", 1635 "group info. But got %d blocks",
1637 free, ex.fe_len); 1636 free, ex.fe_len);
1638 /* 1637 /*
@@ -3822,8 +3821,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3822 pa, (unsigned long) pa->pa_lstart, 3821 pa, (unsigned long) pa->pa_lstart,
3823 (unsigned long) pa->pa_pstart, 3822 (unsigned long) pa->pa_pstart,
3824 (unsigned long) pa->pa_len); 3823 (unsigned long) pa->pa_len);
3825 ext4_error(sb, __func__, "free %u, pa_free %u", 3824 ext4_grp_locked_error(sb, group,
3826 free, pa->pa_free); 3825 __func__, "free %u, pa_free %u",
3826 free, pa->pa_free);
3827 /* 3827 /*
3828 * pa is already deleted so we use the value obtained 3828 * pa is already deleted so we use the value obtained
3829 * from the bitmap and continue. 3829 * from the bitmap and continue.
@@ -4633,9 +4633,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4633 else if (block >= (entry->start_blk + entry->count)) 4633 else if (block >= (entry->start_blk + entry->count))
4634 n = &(*n)->rb_right; 4634 n = &(*n)->rb_right;
4635 else { 4635 else {
4636 ext4_error(sb, __func__, 4636 ext4_grp_locked_error(sb, e4b->bd_group, __func__,
4637 "Double free of blocks %d (%d %d)", 4637 "Double free of blocks %d (%d %d)",
4638 block, entry->start_blk, entry->count); 4638 block, entry->start_blk, entry->count);
4639 return 0; 4639 return 0;
4640 } 4640 }
4641 } 4641 }
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 997f78fff129..95d4c7f29a8a 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -118,27 +118,6 @@ struct ext4_free_data {
118 tid_t t_tid; 118 tid_t t_tid;
119}; 119};
120 120
121struct ext4_group_info {
122 unsigned long bb_state;
123 struct rb_root bb_free_root;
124 unsigned short bb_first_free;
125 unsigned short bb_free;
126 unsigned short bb_fragments;
127 struct list_head bb_prealloc_list;
128#ifdef DOUBLE_CHECK
129 void *bb_bitmap;
130#endif
131 struct rw_semaphore alloc_sem;
132 unsigned short bb_counters[];
133};
134
135#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
136#define EXT4_GROUP_INFO_LOCKED_BIT 1
137
138#define EXT4_MB_GRP_NEED_INIT(grp) \
139 (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
140
141
142struct ext4_prealloc_space { 121struct ext4_prealloc_space {
143 struct list_head pa_inode_list; 122 struct list_head pa_inode_list;
144 struct list_head pa_group_list; 123 struct list_head pa_group_list;
@@ -264,32 +243,6 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
264#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 243#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
265 244
266struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); 245struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
267
268
269static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
270{
271 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
272
273 bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
274}
275
276static inline void ext4_unlock_group(struct super_block *sb,
277 ext4_group_t group)
278{
279 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
280
281 bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
282}
283
284static inline int ext4_is_group_locked(struct super_block *sb,
285 ext4_group_t group)
286{
287 struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
288
289 return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
290 &(grinfo->bb_state));
291}
292
293static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 246static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
294 struct ext4_free_extent *fex) 247 struct ext4_free_extent *fex)
295{ 248{
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a9dd1170bfea..2415e2b09707 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -366,6 +366,44 @@ void ext4_warning(struct super_block *sb, const char *function,
366 va_end(args); 366 va_end(args);
367} 367}
368 368
369void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
370 const char *function, const char *fmt, ...)
371__releases(bitlock)
372__acquires(bitlock)
373{
374 va_list args;
375 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
376
377 va_start(args, fmt);
378 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
379 vprintk(fmt, args);
380 printk("\n");
381 va_end(args);
382
383 if (test_opt(sb, ERRORS_CONT)) {
384 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
385 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
386 ext4_commit_super(sb, es, 0);
387 return;
388 }
389 ext4_unlock_group(sb, grp);
390 ext4_handle_error(sb);
391 /*
392 * We only get here in the ERRORS_RO case; relocking the group
393 * may be dangerous, but nothing bad will happen since the
394 * filesystem will have already been marked read/only and the
395 * journal has been aborted. We return 1 as a hint to callers
396 * who might what to use the return value from
397 * ext4_grp_locked_error() to distinguish beween the
398 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
399 * aggressively from the ext4 function in question, with a
400 * more appropriate error code.
401 */
402 ext4_lock_group(sb, grp);
403 return;
404}
405
406
369void ext4_update_dynamic_rev(struct super_block *sb) 407void ext4_update_dynamic_rev(struct super_block *sb)
370{ 408{
371 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 409 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -2868,8 +2906,11 @@ static void ext4_commit_super(struct super_block *sb,
2868 set_buffer_uptodate(sbh); 2906 set_buffer_uptodate(sbh);
2869 } 2907 }
2870 es->s_wtime = cpu_to_le32(get_seconds()); 2908 es->s_wtime = cpu_to_le32(get_seconds());
2871 ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); 2909 ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
2872 es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); 2910 &EXT4_SB(sb)->s_freeblocks_counter));
2911 es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
2912 &EXT4_SB(sb)->s_freeinodes_counter));
2913
2873 BUFFER_TRACE(sbh, "marking dirty"); 2914 BUFFER_TRACE(sbh, "marking dirty");
2874 mark_buffer_dirty(sbh); 2915 mark_buffer_dirty(sbh);
2875 if (sync) { 2916 if (sync) {