aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2014-02-15 21:33:13 -0500
committerTheodore Ts'o <tytso@mit.edu>2014-02-15 21:33:13 -0500
commitb93c95353413041a8cebad915a8109619f66bcc6 (patch)
tree50b4813da3bdf8eb0963435bbc910fe231d913a3 /fs/ext4
parent23301410972330c0ae9a8afc379ba2005e249cc6 (diff)
ext4: fix online resize with very large inode tables
If a file system has a large number of inodes per block group, all of the metadata blocks in a flex_bg may be larger than what can fit in a single block group. Unfortunately, ext4_alloc_group_tables() in resize.c was never tested to see if it would handle this case correctly, and there were a large number of bugs which caused the following sequence to result in a BUG_ON: kernel bug at fs/ext4/resize.c:409! ... call trace: [<ffffffff81256768>] ext4_flex_group_add+0x1448/0x1830 [<ffffffff81257de2>] ext4_resize_fs+0x7b2/0xe80 [<ffffffff8123ac50>] ext4_ioctl+0xbf0/0xf00 [<ffffffff811c111d>] do_vfs_ioctl+0x2dd/0x4b0 [<ffffffff811b9df2>] ? final_putname+0x22/0x50 [<ffffffff811c1371>] sys_ioctl+0x81/0xa0 [<ffffffff81676aa9>] system_call_fastpath+0x16/0x1b code: c8 4c 89 df e8 41 96 f8 ff 44 89 e8 49 01 c4 44 29 6d d4 0 rip [<ffffffff81254fa1>] set_flexbg_block_bitmap+0x171/0x180 This can be reproduced with the following command sequence: mke2fs -t ext4 -i 4096 /dev/vdd 1G mount -t ext4 /dev/vdd /vdd resize2fs /dev/vdd 8G To fix this, we need to make sure the right thing happens when a block group's inode table straddles two block groups, which means the following bugs had to be fixed: 1) Not clearing the BLOCK_UNINIT flag in the second block group in ext4_alloc_group_tables --- the was proximate cause of the BUG_ON. 2) Incorrectly determining how many block groups contained contiguous free blocks in ext4_alloc_group_tables(). 3) Incorrectly setting the start of the next block range to be marked in use after a discontinuity in setup_new_flex_group_blocks(). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/resize.c32
1 files changed, 20 insertions, 12 deletions
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c5adbb318a90..69a62610a6ca 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -243,6 +243,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
243 ext4_group_t group; 243 ext4_group_t group;
244 ext4_group_t last_group; 244 ext4_group_t last_group;
245 unsigned overhead; 245 unsigned overhead;
246 __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
246 247
247 BUG_ON(flex_gd->count == 0 || group_data == NULL); 248 BUG_ON(flex_gd->count == 0 || group_data == NULL);
248 249
@@ -266,7 +267,7 @@ next_group:
266 src_group++; 267 src_group++;
267 for (; src_group <= last_group; src_group++) { 268 for (; src_group <= last_group; src_group++) {
268 overhead = ext4_group_overhead_blocks(sb, src_group); 269 overhead = ext4_group_overhead_blocks(sb, src_group);
269 if (overhead != 0) 270 if (overhead == 0)
270 last_blk += group_data[src_group - group].blocks_count; 271 last_blk += group_data[src_group - group].blocks_count;
271 else 272 else
272 break; 273 break;
@@ -280,8 +281,7 @@ next_group:
280 group = ext4_get_group_number(sb, start_blk - 1); 281 group = ext4_get_group_number(sb, start_blk - 1);
281 group -= group_data[0].group; 282 group -= group_data[0].group;
282 group_data[group].free_blocks_count--; 283 group_data[group].free_blocks_count--;
283 if (flexbg_size > 1) 284 flex_gd->bg_flags[group] &= uninit_mask;
284 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
285 } 285 }
286 286
287 /* Allocate inode bitmaps */ 287 /* Allocate inode bitmaps */
@@ -292,22 +292,30 @@ next_group:
292 group = ext4_get_group_number(sb, start_blk - 1); 292 group = ext4_get_group_number(sb, start_blk - 1);
293 group -= group_data[0].group; 293 group -= group_data[0].group;
294 group_data[group].free_blocks_count--; 294 group_data[group].free_blocks_count--;
295 if (flexbg_size > 1) 295 flex_gd->bg_flags[group] &= uninit_mask;
296 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
297 } 296 }
298 297
299 /* Allocate inode tables */ 298 /* Allocate inode tables */
300 for (; it_index < flex_gd->count; it_index++) { 299 for (; it_index < flex_gd->count; it_index++) {
301 if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) 300 unsigned int itb = EXT4_SB(sb)->s_itb_per_group;
301 ext4_fsblk_t next_group_start;
302
303 if (start_blk + itb > last_blk)
302 goto next_group; 304 goto next_group;
303 group_data[it_index].inode_table = start_blk; 305 group_data[it_index].inode_table = start_blk;
304 group = ext4_get_group_number(sb, start_blk - 1); 306 group = ext4_get_group_number(sb, start_blk);
307 next_group_start = ext4_group_first_block_no(sb, group + 1);
305 group -= group_data[0].group; 308 group -= group_data[0].group;
306 group_data[group].free_blocks_count -=
307 EXT4_SB(sb)->s_itb_per_group;
308 if (flexbg_size > 1)
309 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
310 309
310 if (start_blk + itb > next_group_start) {
311 flex_gd->bg_flags[group + 1] &= uninit_mask;
312 overhead = start_blk + itb - next_group_start;
313 group_data[group + 1].free_blocks_count -= overhead;
314 itb -= overhead;
315 }
316
317 group_data[group].free_blocks_count -= itb;
318 flex_gd->bg_flags[group] &= uninit_mask;
311 start_blk += EXT4_SB(sb)->s_itb_per_group; 319 start_blk += EXT4_SB(sb)->s_itb_per_group;
312 } 320 }
313 321
@@ -620,7 +628,7 @@ handle_ib:
620 if (err) 628 if (err)
621 goto out; 629 goto out;
622 count = group_table_count[j]; 630 count = group_table_count[j];
623 start = group_data[i].block_bitmap; 631 start = (&group_data[i].block_bitmap)[j];
624 block = start; 632 block = start;
625 } 633 }
626 634