diff options
author | Theodore Ts'o <tytso@mit.edu> | 2011-09-09 18:50:51 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2011-09-09 18:50:51 -0400 |
commit | 84130193e0e6568dfdfb823f0e1e19aec80aff6e (patch) | |
tree | 799534ea6cd551ba346e32da2ee2f363851a3257 /fs/ext4/mballoc.c | |
parent | 53accfa9f819c80056db6f03f9c5cfa4bcba1ed8 (diff) |
ext4: teach ext4_free_blocks() about bigalloc and clusters
The ext4_free_blocks() function now has two new flags that indicate
whether a partial cluster at the beginning or the end of the block
extents should be freed or not. That will be up the caller (i.e.,
truncate), who can figure out whether partial clusters at the
beginning or the end of a block range can be freed.
We also have to update the ext4_mb_free_metadata() and
release_blocks_on_commit() machinery to be cluster-based, since it is
used by ext4_free_blocks().
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 86 |
1 files changed, 62 insertions, 24 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8765f2512f13..57ce6960e940 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2602,11 +2602,13 @@ int ext4_mb_release(struct super_block *sb) | |||
2602 | } | 2602 | } |
2603 | 2603 | ||
2604 | static inline int ext4_issue_discard(struct super_block *sb, | 2604 | static inline int ext4_issue_discard(struct super_block *sb, |
2605 | ext4_group_t block_group, ext4_grpblk_t block, int count) | 2605 | ext4_group_t block_group, ext4_grpblk_t cluster, int count) |
2606 | { | 2606 | { |
2607 | ext4_fsblk_t discard_block; | 2607 | ext4_fsblk_t discard_block; |
2608 | 2608 | ||
2609 | discard_block = block + ext4_group_first_block_no(sb, block_group); | 2609 | discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) + |
2610 | ext4_group_first_block_no(sb, block_group)); | ||
2611 | count = EXT4_C2B(EXT4_SB(sb), count); | ||
2610 | trace_ext4_discard_blocks(sb, | 2612 | trace_ext4_discard_blocks(sb, |
2611 | (unsigned long long) discard_block, count); | 2613 | (unsigned long long) discard_block, count); |
2612 | return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); | 2614 | return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
@@ -2633,7 +2635,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2633 | 2635 | ||
2634 | if (test_opt(sb, DISCARD)) | 2636 | if (test_opt(sb, DISCARD)) |
2635 | ext4_issue_discard(sb, entry->group, | 2637 | ext4_issue_discard(sb, entry->group, |
2636 | entry->start_blk, entry->count); | 2638 | entry->start_cluster, entry->count); |
2637 | 2639 | ||
2638 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2640 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2639 | /* we expect to find existing buddy because it's pinned */ | 2641 | /* we expect to find existing buddy because it's pinned */ |
@@ -2646,7 +2648,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2646 | ext4_lock_group(sb, entry->group); | 2648 | ext4_lock_group(sb, entry->group); |
2647 | /* Take it out of per group rb tree */ | 2649 | /* Take it out of per group rb tree */ |
2648 | rb_erase(&entry->node, &(db->bb_free_root)); | 2650 | rb_erase(&entry->node, &(db->bb_free_root)); |
2649 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); | 2651 | mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); |
2650 | 2652 | ||
2651 | /* | 2653 | /* |
2652 | * Clear the trimmed flag for the group so that the next | 2654 | * Clear the trimmed flag for the group so that the next |
@@ -3300,7 +3302,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3300 | 3302 | ||
3301 | while (n) { | 3303 | while (n) { |
3302 | entry = rb_entry(n, struct ext4_free_data, node); | 3304 | entry = rb_entry(n, struct ext4_free_data, node); |
3303 | ext4_set_bits(bitmap, entry->start_blk, entry->count); | 3305 | ext4_set_bits(bitmap, entry->start_cluster, entry->count); |
3304 | n = rb_next(n); | 3306 | n = rb_next(n); |
3305 | } | 3307 | } |
3306 | return; | 3308 | return; |
@@ -4401,7 +4403,7 @@ static int can_merge(struct ext4_free_data *entry1, | |||
4401 | { | 4403 | { |
4402 | if ((entry1->t_tid == entry2->t_tid) && | 4404 | if ((entry1->t_tid == entry2->t_tid) && |
4403 | (entry1->group == entry2->group) && | 4405 | (entry1->group == entry2->group) && |
4404 | ((entry1->start_blk + entry1->count) == entry2->start_blk)) | 4406 | ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) |
4405 | return 1; | 4407 | return 1; |
4406 | return 0; | 4408 | return 0; |
4407 | } | 4409 | } |
@@ -4411,7 +4413,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4411 | struct ext4_free_data *new_entry) | 4413 | struct ext4_free_data *new_entry) |
4412 | { | 4414 | { |
4413 | ext4_group_t group = e4b->bd_group; | 4415 | ext4_group_t group = e4b->bd_group; |
4414 | ext4_grpblk_t block; | 4416 | ext4_grpblk_t cluster; |
4415 | struct ext4_free_data *entry; | 4417 | struct ext4_free_data *entry; |
4416 | struct ext4_group_info *db = e4b->bd_info; | 4418 | struct ext4_group_info *db = e4b->bd_info; |
4417 | struct super_block *sb = e4b->bd_sb; | 4419 | struct super_block *sb = e4b->bd_sb; |
@@ -4424,7 +4426,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4424 | BUG_ON(e4b->bd_buddy_page == NULL); | 4426 | BUG_ON(e4b->bd_buddy_page == NULL); |
4425 | 4427 | ||
4426 | new_node = &new_entry->node; | 4428 | new_node = &new_entry->node; |
4427 | block = new_entry->start_blk; | 4429 | cluster = new_entry->start_cluster; |
4428 | 4430 | ||
4429 | if (!*n) { | 4431 | if (!*n) { |
4430 | /* first free block exent. We need to | 4432 | /* first free block exent. We need to |
@@ -4438,13 +4440,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4438 | while (*n) { | 4440 | while (*n) { |
4439 | parent = *n; | 4441 | parent = *n; |
4440 | entry = rb_entry(parent, struct ext4_free_data, node); | 4442 | entry = rb_entry(parent, struct ext4_free_data, node); |
4441 | if (block < entry->start_blk) | 4443 | if (cluster < entry->start_cluster) |
4442 | n = &(*n)->rb_left; | 4444 | n = &(*n)->rb_left; |
4443 | else if (block >= (entry->start_blk + entry->count)) | 4445 | else if (cluster >= (entry->start_cluster + entry->count)) |
4444 | n = &(*n)->rb_right; | 4446 | n = &(*n)->rb_right; |
4445 | else { | 4447 | else { |
4446 | ext4_grp_locked_error(sb, group, 0, | 4448 | ext4_grp_locked_error(sb, group, 0, |
4447 | ext4_group_first_block_no(sb, group) + block, | 4449 | ext4_group_first_block_no(sb, group) + |
4450 | EXT4_C2B(sbi, cluster), | ||
4448 | "Block already on to-be-freed list"); | 4451 | "Block already on to-be-freed list"); |
4449 | return 0; | 4452 | return 0; |
4450 | } | 4453 | } |
@@ -4458,7 +4461,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4458 | if (node) { | 4461 | if (node) { |
4459 | entry = rb_entry(node, struct ext4_free_data, node); | 4462 | entry = rb_entry(node, struct ext4_free_data, node); |
4460 | if (can_merge(entry, new_entry)) { | 4463 | if (can_merge(entry, new_entry)) { |
4461 | new_entry->start_blk = entry->start_blk; | 4464 | new_entry->start_cluster = entry->start_cluster; |
4462 | new_entry->count += entry->count; | 4465 | new_entry->count += entry->count; |
4463 | rb_erase(node, &(db->bb_free_root)); | 4466 | rb_erase(node, &(db->bb_free_root)); |
4464 | spin_lock(&sbi->s_md_lock); | 4467 | spin_lock(&sbi->s_md_lock); |
@@ -4509,6 +4512,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4509 | ext4_group_t block_group; | 4512 | ext4_group_t block_group; |
4510 | struct ext4_sb_info *sbi; | 4513 | struct ext4_sb_info *sbi; |
4511 | struct ext4_buddy e4b; | 4514 | struct ext4_buddy e4b; |
4515 | unsigned int count_clusters; | ||
4512 | int err = 0; | 4516 | int err = 0; |
4513 | int ret; | 4517 | int ret; |
4514 | 4518 | ||
@@ -4557,6 +4561,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
4557 | if (!ext4_should_writeback_data(inode)) | 4561 | if (!ext4_should_writeback_data(inode)) |
4558 | flags |= EXT4_FREE_BLOCKS_METADATA; | 4562 | flags |= EXT4_FREE_BLOCKS_METADATA; |
4559 | 4563 | ||
4564 | /* | ||
4565 | * If the extent to be freed does not begin on a cluster | ||
4566 | * boundary, we need to deal with partial clusters at the | ||
4567 | * beginning and end of the extent. Normally we will free | ||
4568 | * blocks at the beginning or the end unless we are explicitly | ||
4569 | * requested to avoid doing so. | ||
4570 | */ | ||
4571 | overflow = block & (sbi->s_cluster_ratio - 1); | ||
4572 | if (overflow) { | ||
4573 | if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { | ||
4574 | overflow = sbi->s_cluster_ratio - overflow; | ||
4575 | block += overflow; | ||
4576 | if (count > overflow) | ||
4577 | count -= overflow; | ||
4578 | else | ||
4579 | return; | ||
4580 | } else { | ||
4581 | block -= overflow; | ||
4582 | count += overflow; | ||
4583 | } | ||
4584 | } | ||
4585 | overflow = count & (sbi->s_cluster_ratio - 1); | ||
4586 | if (overflow) { | ||
4587 | if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { | ||
4588 | if (count > overflow) | ||
4589 | count -= overflow; | ||
4590 | else | ||
4591 | return; | ||
4592 | } else | ||
4593 | count += sbi->s_cluster_ratio - overflow; | ||
4594 | } | ||
4595 | |||
4560 | do_more: | 4596 | do_more: |
4561 | overflow = 0; | 4597 | overflow = 0; |
4562 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); | 4598 | ext4_get_group_no_and_offset(sb, block, &block_group, &bit); |
@@ -4565,10 +4601,12 @@ do_more: | |||
4565 | * Check to see if we are freeing blocks across a group | 4601 | * Check to see if we are freeing blocks across a group |
4566 | * boundary. | 4602 | * boundary. |
4567 | */ | 4603 | */ |
4568 | if (bit + count > EXT4_CLUSTERS_PER_GROUP(sb)) { | 4604 | if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) { |
4569 | overflow = bit + count - EXT4_CLUSTERS_PER_GROUP(sb); | 4605 | overflow = EXT4_C2B(sbi, bit) + count - |
4606 | EXT4_BLOCKS_PER_GROUP(sb); | ||
4570 | count -= overflow; | 4607 | count -= overflow; |
4571 | } | 4608 | } |
4609 | count_clusters = EXT4_B2C(sbi, count); | ||
4572 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 4610 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
4573 | if (!bitmap_bh) { | 4611 | if (!bitmap_bh) { |
4574 | err = -EIO; | 4612 | err = -EIO; |
@@ -4583,9 +4621,9 @@ do_more: | |||
4583 | if (in_range(ext4_block_bitmap(sb, gdp), block, count) || | 4621 | if (in_range(ext4_block_bitmap(sb, gdp), block, count) || |
4584 | in_range(ext4_inode_bitmap(sb, gdp), block, count) || | 4622 | in_range(ext4_inode_bitmap(sb, gdp), block, count) || |
4585 | in_range(block, ext4_inode_table(sb, gdp), | 4623 | in_range(block, ext4_inode_table(sb, gdp), |
4586 | EXT4_SB(sb)->s_itb_per_group) || | 4624 | EXT4_SB(sb)->s_itb_per_group) || |
4587 | in_range(block + count - 1, ext4_inode_table(sb, gdp), | 4625 | in_range(block + count - 1, ext4_inode_table(sb, gdp), |
4588 | EXT4_SB(sb)->s_itb_per_group)) { | 4626 | EXT4_SB(sb)->s_itb_per_group)) { |
4589 | 4627 | ||
4590 | ext4_error(sb, "Freeing blocks in system zone - " | 4628 | ext4_error(sb, "Freeing blocks in system zone - " |
4591 | "Block = %llu, count = %lu", block, count); | 4629 | "Block = %llu, count = %lu", block, count); |
@@ -4610,11 +4648,11 @@ do_more: | |||
4610 | #ifdef AGGRESSIVE_CHECK | 4648 | #ifdef AGGRESSIVE_CHECK |
4611 | { | 4649 | { |
4612 | int i; | 4650 | int i; |
4613 | for (i = 0; i < count; i++) | 4651 | for (i = 0; i < count_clusters; i++) |
4614 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4652 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
4615 | } | 4653 | } |
4616 | #endif | 4654 | #endif |
4617 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count); | 4655 | trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters); |
4618 | 4656 | ||
4619 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4657 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4620 | if (err) | 4658 | if (err) |
@@ -4631,13 +4669,13 @@ do_more: | |||
4631 | err = -ENOMEM; | 4669 | err = -ENOMEM; |
4632 | goto error_return; | 4670 | goto error_return; |
4633 | } | 4671 | } |
4634 | new_entry->start_blk = bit; | 4672 | new_entry->start_cluster = bit; |
4635 | new_entry->group = block_group; | 4673 | new_entry->group = block_group; |
4636 | new_entry->count = count; | 4674 | new_entry->count = count_clusters; |
4637 | new_entry->t_tid = handle->h_transaction->t_tid; | 4675 | new_entry->t_tid = handle->h_transaction->t_tid; |
4638 | 4676 | ||
4639 | ext4_lock_group(sb, block_group); | 4677 | ext4_lock_group(sb, block_group); |
4640 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4678 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
4641 | ext4_mb_free_metadata(handle, &e4b, new_entry); | 4679 | ext4_mb_free_metadata(handle, &e4b, new_entry); |
4642 | } else { | 4680 | } else { |
4643 | /* need to update group_info->bb_free and bitmap | 4681 | /* need to update group_info->bb_free and bitmap |
@@ -4645,11 +4683,11 @@ do_more: | |||
4645 | * them with group lock_held | 4683 | * them with group lock_held |
4646 | */ | 4684 | */ |
4647 | ext4_lock_group(sb, block_group); | 4685 | ext4_lock_group(sb, block_group); |
4648 | mb_clear_bits(bitmap_bh->b_data, bit, count); | 4686 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |
4649 | mb_free_blocks(inode, &e4b, bit, count); | 4687 | mb_free_blocks(inode, &e4b, bit, count_clusters); |
4650 | } | 4688 | } |
4651 | 4689 | ||
4652 | ret = ext4_free_blks_count(sb, gdp) + count; | 4690 | ret = ext4_free_blks_count(sb, gdp) + count_clusters; |
4653 | ext4_free_blks_set(sb, gdp, ret); | 4691 | ext4_free_blks_set(sb, gdp, ret); |
4654 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); | 4692 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); |
4655 | ext4_unlock_group(sb, block_group); | 4693 | ext4_unlock_group(sb, block_group); |