diff options
author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2009-01-05 21:36:02 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2009-01-05 21:36:02 -0500 |
commit | e21675d4b63975d09eb75c443c48ebe663d23e18 (patch) | |
tree | 73ace586265c977c2f0b41bbe2ec0b462809aa58 | |
parent | 3a06d778dfeda7eaeeb79bfa49cf97f2aae132b4 (diff) |
ext4: Add blocks added during resize to bitmap
With this change new blocks added during resize
are marked as free in the block bitmap and the
group is flagged with EXT4_GROUP_INFO_NEED_INIT_BIT
flag. This makes sure when mballoc tries to allocate
blocks from the new group we would reload the
buddy information using the bitmap present in the disk.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
-rw-r--r-- | fs/ext4/balloc.c | 136 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 5 | ||||
-rw-r--r-- | fs/ext4/resize.c | 11 |
3 files changed, 34 insertions, 118 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index a0c23b03a264..c54192e2384e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include "ext4.h" | 20 | #include "ext4.h" |
21 | #include "ext4_jbd2.h" | 21 | #include "ext4_jbd2.h" |
22 | #include "group.h" | 22 | #include "group.h" |
23 | #include "mballoc.h" | ||
23 | 24 | ||
24 | /* | 25 | /* |
25 | * balloc.c contains the blocks allocation and deallocation routines | 26 | * balloc.c contains the blocks allocation and deallocation routines |
@@ -350,62 +351,43 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
350 | } | 351 | } |
351 | 352 | ||
352 | /** | 353 | /** |
353 | * ext4_free_blocks_sb() -- Free given blocks and update quota | 354 | * ext4_add_groupblocks() -- Add given blocks to an existing group |
354 | * @handle: handle to this transaction | 355 | * @handle: handle to this transaction |
355 | * @sb: super block | 356 | * @sb: super block |
356 | * @block: start physcial block to free | 357 | * @block: start physcial block to add to the block group |
357 | * @count: number of blocks to free | 358 | * @count: number of blocks to free |
358 | * @pdquot_freed_blocks: pointer to quota | ||
359 | * | 359 | * |
360 | * XXX This function is only used by the on-line resizing code, which | 360 | * This marks the blocks as free in the bitmap. We ask the |
361 | * should probably be fixed up to call the mballoc variant. There | 361 | * mballoc to reload the buddy after this by setting group |
362 | * this needs to be cleaned up later; in fact, I'm not convinced this | 362 | * EXT4_GROUP_INFO_NEED_INIT_BIT flag |
363 | * is 100% correct in the face of the mballoc code. The online resizing | ||
364 | * code needs to be fixed up to more tightly (and correctly) interlock | ||
365 | * with the mballoc code. | ||
366 | */ | 363 | */ |
367 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | 364 | void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, |
368 | ext4_fsblk_t block, unsigned long count, | 365 | ext4_fsblk_t block, unsigned long count) |
369 | unsigned long *pdquot_freed_blocks) | ||
370 | { | 366 | { |
371 | struct buffer_head *bitmap_bh = NULL; | 367 | struct buffer_head *bitmap_bh = NULL; |
372 | struct buffer_head *gd_bh; | 368 | struct buffer_head *gd_bh; |
373 | ext4_group_t block_group; | 369 | ext4_group_t block_group; |
374 | ext4_grpblk_t bit; | 370 | ext4_grpblk_t bit; |
375 | unsigned int i; | 371 | unsigned int i; |
376 | unsigned int overflow; | ||
377 | struct ext4_group_desc *desc; | 372 | struct ext4_group_desc *desc; |
378 | struct ext4_super_block *es; | 373 | struct ext4_super_block *es; |
379 | struct ext4_sb_info *sbi; | 374 | struct ext4_sb_info *sbi; |
380 | int err = 0, ret; | 375 | int err = 0, ret; |
381 | ext4_grpblk_t group_freed; | 376 | ext4_grpblk_t blocks_freed; |
377 | struct ext4_group_info *grp; | ||
382 | 378 | ||
383 | *pdquot_freed_blocks = 0; | ||
384 | sbi = EXT4_SB(sb); | 379 | sbi = EXT4_SB(sb); |
385 | es = sbi->s_es; | 380 | es = sbi->s_es; |
386 | if (block < le32_to_cpu(es->s_first_data_block) || | 381 | ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); |
387 | block + count < block || | ||
388 | block + count > ext4_blocks_count(es)) { | ||
389 | ext4_error(sb, "ext4_free_blocks", | ||
390 | "Freeing blocks not in datazone - " | ||
391 | "block = %llu, count = %lu", block, count); | ||
392 | goto error_return; | ||
393 | } | ||
394 | |||
395 | ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1); | ||
396 | 382 | ||
397 | do_more: | ||
398 | overflow = 0; | ||
399 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 383 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
400 | /* | 384 | /* |
401 | * Check to see if we are freeing blocks across a group | 385 | * Check to see if we are freeing blocks across a group |
402 | * boundary. | 386 | * boundary. |
403 | */ | 387 | */ |
404 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { | 388 | if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { |
405 | overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); | 389 | goto error_return; |
406 | count -= overflow; | ||
407 | } | 390 | } |
408 | brelse(bitmap_bh); | ||
409 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 391 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
410 | if (!bitmap_bh) | 392 | if (!bitmap_bh) |
411 | goto error_return; | 393 | goto error_return; |
@@ -418,18 +400,17 @@ do_more: | |||
418 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | 400 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
419 | in_range(block + count - 1, ext4_inode_table(sb, desc), | 401 | in_range(block + count - 1, ext4_inode_table(sb, desc), |
420 | sbi->s_itb_per_group)) { | 402 | sbi->s_itb_per_group)) { |
421 | ext4_error(sb, "ext4_free_blocks", | 403 | ext4_error(sb, __func__, |
422 | "Freeing blocks in system zones - " | 404 | "Adding blocks in system zones - " |
423 | "Block = %llu, count = %lu", | 405 | "Block = %llu, count = %lu", |
424 | block, count); | 406 | block, count); |
425 | goto error_return; | 407 | goto error_return; |
426 | } | 408 | } |
427 | 409 | ||
428 | /* | 410 | /* |
429 | * We are about to start releasing blocks in the bitmap, | 411 | * We are about to add blocks to the bitmap, |
430 | * so we need undo access. | 412 | * so we need undo access. |
431 | */ | 413 | */ |
432 | /* @@@ check errors */ | ||
433 | BUFFER_TRACE(bitmap_bh, "getting undo access"); | 414 | BUFFER_TRACE(bitmap_bh, "getting undo access"); |
434 | err = ext4_journal_get_undo_access(handle, bitmap_bh); | 415 | err = ext4_journal_get_undo_access(handle, bitmap_bh); |
435 | if (err) | 416 | if (err) |
@@ -445,87 +426,28 @@ do_more: | |||
445 | if (err) | 426 | if (err) |
446 | goto error_return; | 427 | goto error_return; |
447 | 428 | ||
448 | jbd_lock_bh_state(bitmap_bh); | 429 | for (i = 0, blocks_freed = 0; i < count; i++) { |
449 | |||
450 | for (i = 0, group_freed = 0; i < count; i++) { | ||
451 | /* | ||
452 | * An HJ special. This is expensive... | ||
453 | */ | ||
454 | #ifdef CONFIG_JBD2_DEBUG | ||
455 | jbd_unlock_bh_state(bitmap_bh); | ||
456 | { | ||
457 | struct buffer_head *debug_bh; | ||
458 | debug_bh = sb_find_get_block(sb, block + i); | ||
459 | if (debug_bh) { | ||
460 | BUFFER_TRACE(debug_bh, "Deleted!"); | ||
461 | if (!bh2jh(bitmap_bh)->b_committed_data) | ||
462 | BUFFER_TRACE(debug_bh, | ||
463 | "No commited data in bitmap"); | ||
464 | BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); | ||
465 | __brelse(debug_bh); | ||
466 | } | ||
467 | } | ||
468 | jbd_lock_bh_state(bitmap_bh); | ||
469 | #endif | ||
470 | if (need_resched()) { | ||
471 | jbd_unlock_bh_state(bitmap_bh); | ||
472 | cond_resched(); | ||
473 | jbd_lock_bh_state(bitmap_bh); | ||
474 | } | ||
475 | /* @@@ This prevents newly-allocated data from being | ||
476 | * freed and then reallocated within the same | ||
477 | * transaction. | ||
478 | * | ||
479 | * Ideally we would want to allow that to happen, but to | ||
480 | * do so requires making jbd2_journal_forget() capable of | ||
481 | * revoking the queued write of a data block, which | ||
482 | * implies blocking on the journal lock. *forget() | ||
483 | * cannot block due to truncate races. | ||
484 | * | ||
485 | * Eventually we can fix this by making jbd2_journal_forget() | ||
486 | * return a status indicating whether or not it was able | ||
487 | * to revoke the buffer. On successful revoke, it is | ||
488 | * safe not to set the allocation bit in the committed | ||
489 | * bitmap, because we know that there is no outstanding | ||
490 | * activity on the buffer any more and so it is safe to | ||
491 | * reallocate it. | ||
492 | */ | ||
493 | BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); | ||
494 | J_ASSERT_BH(bitmap_bh, | ||
495 | bh2jh(bitmap_bh)->b_committed_data != NULL); | ||
496 | ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, | ||
497 | bh2jh(bitmap_bh)->b_committed_data); | ||
498 | |||
499 | /* | ||
500 | * We clear the bit in the bitmap after setting the committed | ||
501 | * data bit, because this is the reverse order to that which | ||
502 | * the allocator uses. | ||
503 | */ | ||
504 | BUFFER_TRACE(bitmap_bh, "clear bit"); | 430 | BUFFER_TRACE(bitmap_bh, "clear bit"); |
505 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | 431 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), |
506 | bit + i, bitmap_bh->b_data)) { | 432 | bit + i, bitmap_bh->b_data)) { |
507 | jbd_unlock_bh_state(bitmap_bh); | ||
508 | ext4_error(sb, __func__, | 433 | ext4_error(sb, __func__, |
509 | "bit already cleared for block %llu", | 434 | "bit already cleared for block %llu", |
510 | (ext4_fsblk_t)(block + i)); | 435 | (ext4_fsblk_t)(block + i)); |
511 | jbd_lock_bh_state(bitmap_bh); | ||
512 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); | 436 | BUFFER_TRACE(bitmap_bh, "bit already cleared"); |
513 | } else { | 437 | } else { |
514 | group_freed++; | 438 | blocks_freed++; |
515 | } | 439 | } |
516 | } | 440 | } |
517 | jbd_unlock_bh_state(bitmap_bh); | ||
518 | |||
519 | spin_lock(sb_bgl_lock(sbi, block_group)); | 441 | spin_lock(sb_bgl_lock(sbi, block_group)); |
520 | le16_add_cpu(&desc->bg_free_blocks_count, group_freed); | 442 | le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed); |
521 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); | 443 | desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); |
522 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 444 | spin_unlock(sb_bgl_lock(sbi, block_group)); |
523 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 445 | percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); |
524 | 446 | ||
525 | if (sbi->s_log_groups_per_flex) { | 447 | if (sbi->s_log_groups_per_flex) { |
526 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); | 448 | ext4_group_t flex_group = ext4_flex_group(sbi, block_group); |
527 | spin_lock(sb_bgl_lock(sbi, flex_group)); | 449 | spin_lock(sb_bgl_lock(sbi, flex_group)); |
528 | sbi->s_flex_groups[flex_group].free_blocks += count; | 450 | sbi->s_flex_groups[flex_group].free_blocks += blocks_freed; |
529 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | 451 | spin_unlock(sb_bgl_lock(sbi, flex_group)); |
530 | } | 452 | } |
531 | 453 | ||
@@ -536,15 +458,17 @@ do_more: | |||
536 | /* And the group descriptor block */ | 458 | /* And the group descriptor block */ |
537 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | 459 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); |
538 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); | 460 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); |
539 | if (!err) err = ret; | 461 | if (!err) |
540 | *pdquot_freed_blocks += group_freed; | 462 | err = ret; |
541 | |||
542 | if (overflow && !err) { | ||
543 | block += count; | ||
544 | count = overflow; | ||
545 | goto do_more; | ||
546 | } | ||
547 | sb->s_dirt = 1; | 463 | sb->s_dirt = 1; |
464 | /* | ||
465 | * request to reload the buddy with the | ||
466 | * new bitmap information | ||
467 | */ | ||
468 | grp = ext4_get_group_info(sb, block_group); | ||
469 | set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | ||
470 | ext4_mb_update_group_info(grp, blocks_freed); | ||
471 | |||
548 | error_return: | 472 | error_return: |
549 | brelse(bitmap_bh); | 473 | brelse(bitmap_bh); |
550 | ext4_std_error(sb, err); | 474 | ext4_std_error(sb, err); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5125c1f6e7ec..8021bf558d1e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1014,9 +1014,8 @@ extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | |||
1014 | extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); | 1014 | extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
1015 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, | 1015 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
1016 | ext4_fsblk_t block, unsigned long count, int metadata); | 1016 | ext4_fsblk_t block, unsigned long count, int metadata); |
1017 | extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | 1017 | extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, |
1018 | ext4_fsblk_t block, unsigned long count, | 1018 | ext4_fsblk_t block, unsigned long count); |
1019 | unsigned long *pdquot_freed_blocks); | ||
1020 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); | 1019 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
1021 | extern void ext4_check_blocks_bitmap(struct super_block *); | 1020 | extern void ext4_check_blocks_bitmap(struct super_block *); |
1022 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1021 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 1865d6a53de3..526db73701b4 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -977,9 +977,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
977 | struct buffer_head *bh; | 977 | struct buffer_head *bh; |
978 | handle_t *handle; | 978 | handle_t *handle; |
979 | int err; | 979 | int err; |
980 | unsigned long freed_blocks; | ||
981 | ext4_group_t group; | 980 | ext4_group_t group; |
982 | struct ext4_group_info *grp; | ||
983 | 981 | ||
984 | /* We don't need to worry about locking wrt other resizers just | 982 | /* We don't need to worry about locking wrt other resizers just |
985 | * yet: we're going to revalidate es->s_blocks_count after | 983 | * yet: we're going to revalidate es->s_blocks_count after |
@@ -1077,7 +1075,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1077 | unlock_super(sb); | 1075 | unlock_super(sb); |
1078 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | 1076 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
1079 | o_blocks_count + add); | 1077 | o_blocks_count + add); |
1080 | ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); | 1078 | /* We add the blocks to the bitmap and set the group need init bit */ |
1079 | ext4_add_groupblocks(handle, sb, o_blocks_count, add); | ||
1081 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | 1080 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, |
1082 | o_blocks_count + add); | 1081 | o_blocks_count + add); |
1083 | if ((err = ext4_journal_stop(handle))) | 1082 | if ((err = ext4_journal_stop(handle))) |
@@ -1120,12 +1119,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1120 | ClearPageUptodate(page); | 1119 | ClearPageUptodate(page); |
1121 | page_cache_release(page); | 1120 | page_cache_release(page); |
1122 | } | 1121 | } |
1123 | |||
1124 | /* Get the info on the last group */ | ||
1125 | grp = ext4_get_group_info(sb, group); | ||
1126 | |||
1127 | /* Update free blocks in group info */ | ||
1128 | ext4_mb_update_group_info(grp, add); | ||
1129 | } | 1122 | } |
1130 | 1123 | ||
1131 | if (test_opt(sb, DEBUG)) | 1124 | if (test_opt(sb, DEBUG)) |