aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2011-09-09 18:50:51 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-09-09 18:50:51 -0400
commit84130193e0e6568dfdfb823f0e1e19aec80aff6e (patch)
tree799534ea6cd551ba346e32da2ee2f363851a3257 /fs
parent53accfa9f819c80056db6f03f9c5cfa4bcba1ed8 (diff)
ext4: teach ext4_free_blocks() about bigalloc and clusters
The ext4_free_blocks() function now has two new flags that indicate whether a partial cluster at the beginning or the end of the block extents should be freed or not. That will be up the caller (i.e., truncate), who can figure out whether partial clusters at the beginning or the end of a block range can be freed. We also have to update the ext4_mb_free_metadata() and release_blocks_on_commit() machinery to be cluster-based, since it is used by ext4_free_blocks(). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/mballoc.c86
-rw-r--r--fs/ext4/mballoc.h2
3 files changed, 65 insertions, 25 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 803cfa42e1e8..030bfc1cb59d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -539,6 +539,8 @@ struct ext4_new_group_data {
539#define EXT4_FREE_BLOCKS_FORGET 0x0002 539#define EXT4_FREE_BLOCKS_FORGET 0x0002
540#define EXT4_FREE_BLOCKS_VALIDATED 0x0004 540#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
541#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008 541#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
542#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
543#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
542 544
543/* 545/*
544 * Flags used by ext4_discard_partial_page_buffers 546 * Flags used by ext4_discard_partial_page_buffers
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8765f2512f13..57ce6960e940 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2602,11 +2602,13 @@ int ext4_mb_release(struct super_block *sb)
2602} 2602}
2603 2603
2604static inline int ext4_issue_discard(struct super_block *sb, 2604static inline int ext4_issue_discard(struct super_block *sb,
2605 ext4_group_t block_group, ext4_grpblk_t block, int count) 2605 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2606{ 2606{
2607 ext4_fsblk_t discard_block; 2607 ext4_fsblk_t discard_block;
2608 2608
2609 discard_block = block + ext4_group_first_block_no(sb, block_group); 2609 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2610 ext4_group_first_block_no(sb, block_group));
2611 count = EXT4_C2B(EXT4_SB(sb), count);
2610 trace_ext4_discard_blocks(sb, 2612 trace_ext4_discard_blocks(sb,
2611 (unsigned long long) discard_block, count); 2613 (unsigned long long) discard_block, count);
2612 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2614 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
@@ -2633,7 +2635,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2633 2635
2634 if (test_opt(sb, DISCARD)) 2636 if (test_opt(sb, DISCARD))
2635 ext4_issue_discard(sb, entry->group, 2637 ext4_issue_discard(sb, entry->group,
2636 entry->start_blk, entry->count); 2638 entry->start_cluster, entry->count);
2637 2639
2638 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2640 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2639 /* we expect to find existing buddy because it's pinned */ 2641 /* we expect to find existing buddy because it's pinned */
@@ -2646,7 +2648,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2646 ext4_lock_group(sb, entry->group); 2648 ext4_lock_group(sb, entry->group);
2647 /* Take it out of per group rb tree */ 2649 /* Take it out of per group rb tree */
2648 rb_erase(&entry->node, &(db->bb_free_root)); 2650 rb_erase(&entry->node, &(db->bb_free_root));
2649 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); 2651 mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
2650 2652
2651 /* 2653 /*
2652 * Clear the trimmed flag for the group so that the next 2654 * Clear the trimmed flag for the group so that the next
@@ -3300,7 +3302,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3300 3302
3301 while (n) { 3303 while (n) {
3302 entry = rb_entry(n, struct ext4_free_data, node); 3304 entry = rb_entry(n, struct ext4_free_data, node);
3303 ext4_set_bits(bitmap, entry->start_blk, entry->count); 3305 ext4_set_bits(bitmap, entry->start_cluster, entry->count);
3304 n = rb_next(n); 3306 n = rb_next(n);
3305 } 3307 }
3306 return; 3308 return;
@@ -4401,7 +4403,7 @@ static int can_merge(struct ext4_free_data *entry1,
4401{ 4403{
4402 if ((entry1->t_tid == entry2->t_tid) && 4404 if ((entry1->t_tid == entry2->t_tid) &&
4403 (entry1->group == entry2->group) && 4405 (entry1->group == entry2->group) &&
4404 ((entry1->start_blk + entry1->count) == entry2->start_blk)) 4406 ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
4405 return 1; 4407 return 1;
4406 return 0; 4408 return 0;
4407} 4409}
@@ -4411,7 +4413,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4411 struct ext4_free_data *new_entry) 4413 struct ext4_free_data *new_entry)
4412{ 4414{
4413 ext4_group_t group = e4b->bd_group; 4415 ext4_group_t group = e4b->bd_group;
4414 ext4_grpblk_t block; 4416 ext4_grpblk_t cluster;
4415 struct ext4_free_data *entry; 4417 struct ext4_free_data *entry;
4416 struct ext4_group_info *db = e4b->bd_info; 4418 struct ext4_group_info *db = e4b->bd_info;
4417 struct super_block *sb = e4b->bd_sb; 4419 struct super_block *sb = e4b->bd_sb;
@@ -4424,7 +4426,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4424 BUG_ON(e4b->bd_buddy_page == NULL); 4426 BUG_ON(e4b->bd_buddy_page == NULL);
4425 4427
4426 new_node = &new_entry->node; 4428 new_node = &new_entry->node;
4427 block = new_entry->start_blk; 4429 cluster = new_entry->start_cluster;
4428 4430
4429 if (!*n) { 4431 if (!*n) {
4430 /* first free block exent. We need to 4432 /* first free block exent. We need to
@@ -4438,13 +4440,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4438 while (*n) { 4440 while (*n) {
4439 parent = *n; 4441 parent = *n;
4440 entry = rb_entry(parent, struct ext4_free_data, node); 4442 entry = rb_entry(parent, struct ext4_free_data, node);
4441 if (block < entry->start_blk) 4443 if (cluster < entry->start_cluster)
4442 n = &(*n)->rb_left; 4444 n = &(*n)->rb_left;
4443 else if (block >= (entry->start_blk + entry->count)) 4445 else if (cluster >= (entry->start_cluster + entry->count))
4444 n = &(*n)->rb_right; 4446 n = &(*n)->rb_right;
4445 else { 4447 else {
4446 ext4_grp_locked_error(sb, group, 0, 4448 ext4_grp_locked_error(sb, group, 0,
4447 ext4_group_first_block_no(sb, group) + block, 4449 ext4_group_first_block_no(sb, group) +
4450 EXT4_C2B(sbi, cluster),
4448 "Block already on to-be-freed list"); 4451 "Block already on to-be-freed list");
4449 return 0; 4452 return 0;
4450 } 4453 }
@@ -4458,7 +4461,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4458 if (node) { 4461 if (node) {
4459 entry = rb_entry(node, struct ext4_free_data, node); 4462 entry = rb_entry(node, struct ext4_free_data, node);
4460 if (can_merge(entry, new_entry)) { 4463 if (can_merge(entry, new_entry)) {
4461 new_entry->start_blk = entry->start_blk; 4464 new_entry->start_cluster = entry->start_cluster;
4462 new_entry->count += entry->count; 4465 new_entry->count += entry->count;
4463 rb_erase(node, &(db->bb_free_root)); 4466 rb_erase(node, &(db->bb_free_root));
4464 spin_lock(&sbi->s_md_lock); 4467 spin_lock(&sbi->s_md_lock);
@@ -4509,6 +4512,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4509 ext4_group_t block_group; 4512 ext4_group_t block_group;
4510 struct ext4_sb_info *sbi; 4513 struct ext4_sb_info *sbi;
4511 struct ext4_buddy e4b; 4514 struct ext4_buddy e4b;
4515 unsigned int count_clusters;
4512 int err = 0; 4516 int err = 0;
4513 int ret; 4517 int ret;
4514 4518
@@ -4557,6 +4561,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4557 if (!ext4_should_writeback_data(inode)) 4561 if (!ext4_should_writeback_data(inode))
4558 flags |= EXT4_FREE_BLOCKS_METADATA; 4562 flags |= EXT4_FREE_BLOCKS_METADATA;
4559 4563
4564 /*
4565 * If the extent to be freed does not begin on a cluster
4566 * boundary, we need to deal with partial clusters at the
4567 * beginning and end of the extent. Normally we will free
4568 * blocks at the beginning or the end unless we are explicitly
4569 * requested to avoid doing so.
4570 */
4571 overflow = block & (sbi->s_cluster_ratio - 1);
4572 if (overflow) {
4573 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4574 overflow = sbi->s_cluster_ratio - overflow;
4575 block += overflow;
4576 if (count > overflow)
4577 count -= overflow;
4578 else
4579 return;
4580 } else {
4581 block -= overflow;
4582 count += overflow;
4583 }
4584 }
4585 overflow = count & (sbi->s_cluster_ratio - 1);
4586 if (overflow) {
4587 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4588 if (count > overflow)
4589 count -= overflow;
4590 else
4591 return;
4592 } else
4593 count += sbi->s_cluster_ratio - overflow;
4594 }
4595
4560do_more: 4596do_more:
4561 overflow = 0; 4597 overflow = 0;
4562 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4598 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4565,10 +4601,12 @@ do_more:
4565 * Check to see if we are freeing blocks across a group 4601 * Check to see if we are freeing blocks across a group
4566 * boundary. 4602 * boundary.
4567 */ 4603 */
4568 if (bit + count > EXT4_CLUSTERS_PER_GROUP(sb)) { 4604 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4569 overflow = bit + count - EXT4_CLUSTERS_PER_GROUP(sb); 4605 overflow = EXT4_C2B(sbi, bit) + count -
4606 EXT4_BLOCKS_PER_GROUP(sb);
4570 count -= overflow; 4607 count -= overflow;
4571 } 4608 }
4609 count_clusters = EXT4_B2C(sbi, count);
4572 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4610 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4573 if (!bitmap_bh) { 4611 if (!bitmap_bh) {
4574 err = -EIO; 4612 err = -EIO;
@@ -4583,9 +4621,9 @@ do_more:
4583 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4621 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4584 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4622 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4585 in_range(block, ext4_inode_table(sb, gdp), 4623 in_range(block, ext4_inode_table(sb, gdp),
4586 EXT4_SB(sb)->s_itb_per_group) || 4624 EXT4_SB(sb)->s_itb_per_group) ||
4587 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4625 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4588 EXT4_SB(sb)->s_itb_per_group)) { 4626 EXT4_SB(sb)->s_itb_per_group)) {
4589 4627
4590 ext4_error(sb, "Freeing blocks in system zone - " 4628 ext4_error(sb, "Freeing blocks in system zone - "
4591 "Block = %llu, count = %lu", block, count); 4629 "Block = %llu, count = %lu", block, count);
@@ -4610,11 +4648,11 @@ do_more:
4610#ifdef AGGRESSIVE_CHECK 4648#ifdef AGGRESSIVE_CHECK
4611 { 4649 {
4612 int i; 4650 int i;
4613 for (i = 0; i < count; i++) 4651 for (i = 0; i < count_clusters; i++)
4614 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4652 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4615 } 4653 }
4616#endif 4654#endif
4617 trace_ext4_mballoc_free(sb, inode, block_group, bit, count); 4655 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4618 4656
4619 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4657 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4620 if (err) 4658 if (err)
@@ -4631,13 +4669,13 @@ do_more:
4631 err = -ENOMEM; 4669 err = -ENOMEM;
4632 goto error_return; 4670 goto error_return;
4633 } 4671 }
4634 new_entry->start_blk = bit; 4672 new_entry->start_cluster = bit;
4635 new_entry->group = block_group; 4673 new_entry->group = block_group;
4636 new_entry->count = count; 4674 new_entry->count = count_clusters;
4637 new_entry->t_tid = handle->h_transaction->t_tid; 4675 new_entry->t_tid = handle->h_transaction->t_tid;
4638 4676
4639 ext4_lock_group(sb, block_group); 4677 ext4_lock_group(sb, block_group);
4640 mb_clear_bits(bitmap_bh->b_data, bit, count); 4678 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4641 ext4_mb_free_metadata(handle, &e4b, new_entry); 4679 ext4_mb_free_metadata(handle, &e4b, new_entry);
4642 } else { 4680 } else {
4643 /* need to update group_info->bb_free and bitmap 4681 /* need to update group_info->bb_free and bitmap
@@ -4645,11 +4683,11 @@ do_more:
4645 * them with group lock_held 4683 * them with group lock_held
4646 */ 4684 */
4647 ext4_lock_group(sb, block_group); 4685 ext4_lock_group(sb, block_group);
4648 mb_clear_bits(bitmap_bh->b_data, bit, count); 4686 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4649 mb_free_blocks(inode, &e4b, bit, count); 4687 mb_free_blocks(inode, &e4b, bit, count_clusters);
4650 } 4688 }
4651 4689
4652 ret = ext4_free_blks_count(sb, gdp) + count; 4690 ret = ext4_free_blks_count(sb, gdp) + count_clusters;
4653 ext4_free_blks_set(sb, gdp, ret); 4691 ext4_free_blks_set(sb, gdp, ret);
4654 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4692 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4655 ext4_unlock_group(sb, block_group); 4693 ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 1641f4b57439..dc99930d4cb5 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -106,7 +106,7 @@ struct ext4_free_data {
106 ext4_group_t group; 106 ext4_group_t group;
107 107
108 /* free block extent */ 108 /* free block extent */
109 ext4_grpblk_t start_blk; 109 ext4_grpblk_t start_cluster;
110 ext4_grpblk_t count; 110 ext4_grpblk_t count;
111 111
112 /* transaction which freed this extent */ 112 /* transaction which freed this extent */