aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2009-01-05 21:36:02 -0500
committerTheodore Ts'o <tytso@mit.edu>2009-01-05 21:36:02 -0500
commite21675d4b63975d09eb75c443c48ebe663d23e18 (patch)
tree73ace586265c977c2f0b41bbe2ec0b462809aa58 /fs
parent3a06d778dfeda7eaeeb79bfa49cf97f2aae132b4 (diff)
ext4: Add blocks added during resize to bitmap
With this change new blocks added during resize are marked as free in the block bitmap and the group is flagged with EXT4_GROUP_INFO_NEED_INIT_BIT flag. This makes sure when mballoc tries to allocate blocks from the new group we would reload the buddy information using the bitmap present in the disk. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@kernel.org
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/balloc.c136
-rw-r--r--fs/ext4/ext4.h5
-rw-r--r--fs/ext4/resize.c11
3 files changed, 34 insertions, 118 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index a0c23b03a264..c54192e2384e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -20,6 +20,7 @@
20#include "ext4.h" 20#include "ext4.h"
21#include "ext4_jbd2.h" 21#include "ext4_jbd2.h"
22#include "group.h" 22#include "group.h"
23#include "mballoc.h"
23 24
24/* 25/*
25 * balloc.c contains the blocks allocation and deallocation routines 26 * balloc.c contains the blocks allocation and deallocation routines
@@ -350,62 +351,43 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
350} 351}
351 352
352/** 353/**
353 * ext4_free_blocks_sb() -- Free given blocks and update quota 354 * ext4_add_groupblocks() -- Add given blocks to an existing group
354 * @handle: handle to this transaction 355 * @handle: handle to this transaction
355 * @sb: super block 356 * @sb: super block
356 * @block: start physcial block to free 357 * @block: start physcial block to add to the block group
357 * @count: number of blocks to free 358 * @count: number of blocks to free
358 * @pdquot_freed_blocks: pointer to quota
359 * 359 *
360 * XXX This function is only used by the on-line resizing code, which 360 * This marks the blocks as free in the bitmap. We ask the
361 * should probably be fixed up to call the mballoc variant. There 361 * mballoc to reload the buddy after this by setting group
362 * this needs to be cleaned up later; in fact, I'm not convinced this 362 * EXT4_GROUP_INFO_NEED_INIT_BIT flag
363 * is 100% correct in the face of the mballoc code. The online resizing
364 * code needs to be fixed up to more tightly (and correctly) interlock
365 * with the mballoc code.
366 */ 363 */
367void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 364void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
368 ext4_fsblk_t block, unsigned long count, 365 ext4_fsblk_t block, unsigned long count)
369 unsigned long *pdquot_freed_blocks)
370{ 366{
371 struct buffer_head *bitmap_bh = NULL; 367 struct buffer_head *bitmap_bh = NULL;
372 struct buffer_head *gd_bh; 368 struct buffer_head *gd_bh;
373 ext4_group_t block_group; 369 ext4_group_t block_group;
374 ext4_grpblk_t bit; 370 ext4_grpblk_t bit;
375 unsigned int i; 371 unsigned int i;
376 unsigned int overflow;
377 struct ext4_group_desc *desc; 372 struct ext4_group_desc *desc;
378 struct ext4_super_block *es; 373 struct ext4_super_block *es;
379 struct ext4_sb_info *sbi; 374 struct ext4_sb_info *sbi;
380 int err = 0, ret; 375 int err = 0, ret;
381 ext4_grpblk_t group_freed; 376 ext4_grpblk_t blocks_freed;
377 struct ext4_group_info *grp;
382 378
383 *pdquot_freed_blocks = 0;
384 sbi = EXT4_SB(sb); 379 sbi = EXT4_SB(sb);
385 es = sbi->s_es; 380 es = sbi->s_es;
386 if (block < le32_to_cpu(es->s_first_data_block) || 381 ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
387 block + count < block ||
388 block + count > ext4_blocks_count(es)) {
389 ext4_error(sb, "ext4_free_blocks",
390 "Freeing blocks not in datazone - "
391 "block = %llu, count = %lu", block, count);
392 goto error_return;
393 }
394
395 ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1);
396 382
397do_more:
398 overflow = 0;
399 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 383 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
400 /* 384 /*
401 * Check to see if we are freeing blocks across a group 385 * Check to see if we are freeing blocks across a group
402 * boundary. 386 * boundary.
403 */ 387 */
404 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { 388 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
405 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 389 goto error_return;
406 count -= overflow;
407 } 390 }
408 brelse(bitmap_bh);
409 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 391 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
410 if (!bitmap_bh) 392 if (!bitmap_bh)
411 goto error_return; 393 goto error_return;
@@ -418,18 +400,17 @@ do_more:
418 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || 400 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
419 in_range(block + count - 1, ext4_inode_table(sb, desc), 401 in_range(block + count - 1, ext4_inode_table(sb, desc),
420 sbi->s_itb_per_group)) { 402 sbi->s_itb_per_group)) {
421 ext4_error(sb, "ext4_free_blocks", 403 ext4_error(sb, __func__,
422 "Freeing blocks in system zones - " 404 "Adding blocks in system zones - "
423 "Block = %llu, count = %lu", 405 "Block = %llu, count = %lu",
424 block, count); 406 block, count);
425 goto error_return; 407 goto error_return;
426 } 408 }
427 409
428 /* 410 /*
429 * We are about to start releasing blocks in the bitmap, 411 * We are about to add blocks to the bitmap,
430 * so we need undo access. 412 * so we need undo access.
431 */ 413 */
432 /* @@@ check errors */
433 BUFFER_TRACE(bitmap_bh, "getting undo access"); 414 BUFFER_TRACE(bitmap_bh, "getting undo access");
434 err = ext4_journal_get_undo_access(handle, bitmap_bh); 415 err = ext4_journal_get_undo_access(handle, bitmap_bh);
435 if (err) 416 if (err)
@@ -445,87 +426,28 @@ do_more:
445 if (err) 426 if (err)
446 goto error_return; 427 goto error_return;
447 428
448 jbd_lock_bh_state(bitmap_bh); 429 for (i = 0, blocks_freed = 0; i < count; i++) {
449
450 for (i = 0, group_freed = 0; i < count; i++) {
451 /*
452 * An HJ special. This is expensive...
453 */
454#ifdef CONFIG_JBD2_DEBUG
455 jbd_unlock_bh_state(bitmap_bh);
456 {
457 struct buffer_head *debug_bh;
458 debug_bh = sb_find_get_block(sb, block + i);
459 if (debug_bh) {
460 BUFFER_TRACE(debug_bh, "Deleted!");
461 if (!bh2jh(bitmap_bh)->b_committed_data)
462 BUFFER_TRACE(debug_bh,
463 "No commited data in bitmap");
464 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
465 __brelse(debug_bh);
466 }
467 }
468 jbd_lock_bh_state(bitmap_bh);
469#endif
470 if (need_resched()) {
471 jbd_unlock_bh_state(bitmap_bh);
472 cond_resched();
473 jbd_lock_bh_state(bitmap_bh);
474 }
475 /* @@@ This prevents newly-allocated data from being
476 * freed and then reallocated within the same
477 * transaction.
478 *
479 * Ideally we would want to allow that to happen, but to
480 * do so requires making jbd2_journal_forget() capable of
481 * revoking the queued write of a data block, which
482 * implies blocking on the journal lock. *forget()
483 * cannot block due to truncate races.
484 *
485 * Eventually we can fix this by making jbd2_journal_forget()
486 * return a status indicating whether or not it was able
487 * to revoke the buffer. On successful revoke, it is
488 * safe not to set the allocation bit in the committed
489 * bitmap, because we know that there is no outstanding
490 * activity on the buffer any more and so it is safe to
491 * reallocate it.
492 */
493 BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
494 J_ASSERT_BH(bitmap_bh,
495 bh2jh(bitmap_bh)->b_committed_data != NULL);
496 ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
497 bh2jh(bitmap_bh)->b_committed_data);
498
499 /*
500 * We clear the bit in the bitmap after setting the committed
501 * data bit, because this is the reverse order to that which
502 * the allocator uses.
503 */
504 BUFFER_TRACE(bitmap_bh, "clear bit"); 430 BUFFER_TRACE(bitmap_bh, "clear bit");
505 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 431 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
506 bit + i, bitmap_bh->b_data)) { 432 bit + i, bitmap_bh->b_data)) {
507 jbd_unlock_bh_state(bitmap_bh);
508 ext4_error(sb, __func__, 433 ext4_error(sb, __func__,
509 "bit already cleared for block %llu", 434 "bit already cleared for block %llu",
510 (ext4_fsblk_t)(block + i)); 435 (ext4_fsblk_t)(block + i));
511 jbd_lock_bh_state(bitmap_bh);
512 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 436 BUFFER_TRACE(bitmap_bh, "bit already cleared");
513 } else { 437 } else {
514 group_freed++; 438 blocks_freed++;
515 } 439 }
516 } 440 }
517 jbd_unlock_bh_state(bitmap_bh);
518
519 spin_lock(sb_bgl_lock(sbi, block_group)); 441 spin_lock(sb_bgl_lock(sbi, block_group));
520 le16_add_cpu(&desc->bg_free_blocks_count, group_freed); 442 le16_add_cpu(&desc->bg_free_blocks_count, blocks_freed);
521 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 443 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
522 spin_unlock(sb_bgl_lock(sbi, block_group)); 444 spin_unlock(sb_bgl_lock(sbi, block_group));
523 percpu_counter_add(&sbi->s_freeblocks_counter, count); 445 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
524 446
525 if (sbi->s_log_groups_per_flex) { 447 if (sbi->s_log_groups_per_flex) {
526 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 448 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
527 spin_lock(sb_bgl_lock(sbi, flex_group)); 449 spin_lock(sb_bgl_lock(sbi, flex_group));
528 sbi->s_flex_groups[flex_group].free_blocks += count; 450 sbi->s_flex_groups[flex_group].free_blocks += blocks_freed;
529 spin_unlock(sb_bgl_lock(sbi, flex_group)); 451 spin_unlock(sb_bgl_lock(sbi, flex_group));
530 } 452 }
531 453
@@ -536,15 +458,17 @@ do_more:
536 /* And the group descriptor block */ 458 /* And the group descriptor block */
537 BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); 459 BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
538 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); 460 ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
539 if (!err) err = ret; 461 if (!err)
540 *pdquot_freed_blocks += group_freed; 462 err = ret;
541
542 if (overflow && !err) {
543 block += count;
544 count = overflow;
545 goto do_more;
546 }
547 sb->s_dirt = 1; 463 sb->s_dirt = 1;
464 /*
465 * request to reload the buddy with the
466 * new bitmap information
467 */
468 grp = ext4_get_group_info(sb, block_group);
469 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
470 ext4_mb_update_group_info(grp, blocks_freed);
471
548error_return: 472error_return:
549 brelse(bitmap_bh); 473 brelse(bitmap_bh);
550 ext4_std_error(sb, err); 474 ext4_std_error(sb, err);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5125c1f6e7ec..8021bf558d1e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1014,9 +1014,8 @@ extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1014extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1014extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1015extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1015extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1016 ext4_fsblk_t block, unsigned long count, int metadata); 1016 ext4_fsblk_t block, unsigned long count, int metadata);
1017extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 1017extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1018 ext4_fsblk_t block, unsigned long count, 1018 ext4_fsblk_t block, unsigned long count);
1019 unsigned long *pdquot_freed_blocks);
1020extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1019extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
1021extern void ext4_check_blocks_bitmap(struct super_block *); 1020extern void ext4_check_blocks_bitmap(struct super_block *);
1022extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 1021extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 1865d6a53de3..526db73701b4 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -977,9 +977,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
977 struct buffer_head *bh; 977 struct buffer_head *bh;
978 handle_t *handle; 978 handle_t *handle;
979 int err; 979 int err;
980 unsigned long freed_blocks;
981 ext4_group_t group; 980 ext4_group_t group;
982 struct ext4_group_info *grp;
983 981
984 /* We don't need to worry about locking wrt other resizers just 982 /* We don't need to worry about locking wrt other resizers just
985 * yet: we're going to revalidate es->s_blocks_count after 983 * yet: we're going to revalidate es->s_blocks_count after
@@ -1077,7 +1075,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1077 unlock_super(sb); 1075 unlock_super(sb);
1078 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1076 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1079 o_blocks_count + add); 1077 o_blocks_count + add);
1080 ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); 1078 /* We add the blocks to the bitmap and set the group need init bit */
1079 ext4_add_groupblocks(handle, sb, o_blocks_count, add);
1081 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, 1080 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
1082 o_blocks_count + add); 1081 o_blocks_count + add);
1083 if ((err = ext4_journal_stop(handle))) 1082 if ((err = ext4_journal_stop(handle)))
@@ -1120,12 +1119,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1120 ClearPageUptodate(page); 1119 ClearPageUptodate(page);
1121 page_cache_release(page); 1120 page_cache_release(page);
1122 } 1121 }
1123
1124 /* Get the info on the last group */
1125 grp = ext4_get_group_info(sb, group);
1126
1127 /* Update free blocks in group info */
1128 ext4_mb_update_group_info(grp, add);
1129 } 1122 }
1130 1123
1131 if (test_opt(sb, DEBUG)) 1124 if (test_opt(sb, DEBUG))