aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-10-16 10:14:27 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-10-16 10:14:27 -0400
commitc894058d66637c7720569fbe12957f4de64d9991 (patch)
treef13472d7fd76155f1365550515997a24aff611c9
parentc2774d84fd6cab2bfa2a2fae0b1ca8d8ebde48a2 (diff)
ext4: Use an rbtree for tracking blocks freed during transaction.
With this patch we track the block freed during a transaction using red-black tree. We also make sure contiguous blocks freed are collected in one node in the tree. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
-rw-r--r--fs/ext4/mballoc.c184
-rw-r--r--fs/ext4/mballoc.h26
2 files changed, 133 insertions, 77 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 154f8dec97e..bd9b011941a 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2300 } 2300 }
2301 2301
2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2303 meta_group_info[i]->bb_free_root.rb_node = NULL;;
2303 2304
2304#ifdef DOUBLE_CHECK 2305#ifdef DOUBLE_CHECK
2305 { 2306 {
@@ -2647,13 +2648,11 @@ int ext4_mb_release(struct super_block *sb)
2647static noinline_for_stack void 2648static noinline_for_stack void
2648ext4_mb_free_committed_blocks(struct super_block *sb) 2649ext4_mb_free_committed_blocks(struct super_block *sb)
2649{ 2650{
2650 struct ext4_sb_info *sbi = EXT4_SB(sb);
2651 int err;
2652 int i;
2653 int count = 0;
2654 int count2 = 0;
2655 struct ext4_free_metadata *md;
2656 struct ext4_buddy e4b; 2651 struct ext4_buddy e4b;
2652 struct ext4_group_info *db;
2653 struct ext4_sb_info *sbi = EXT4_SB(sb);
2654 int err, count = 0, count2 = 0;
2655 struct ext4_free_data *entry;
2657 2656
2658 if (list_empty(&sbi->s_committed_transaction)) 2657 if (list_empty(&sbi->s_committed_transaction))
2659 return; 2658 return;
@@ -2661,44 +2660,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2661 /* there is committed blocks to be freed yet */ 2660 /* there is committed blocks to be freed yet */
2662 do { 2661 do {
2663 /* get next array of blocks */ 2662 /* get next array of blocks */
2664 md = NULL; 2663 entry = NULL;
2665 spin_lock(&sbi->s_md_lock); 2664 spin_lock(&sbi->s_md_lock);
2666 if (!list_empty(&sbi->s_committed_transaction)) { 2665 if (!list_empty(&sbi->s_committed_transaction)) {
2667 md = list_entry(sbi->s_committed_transaction.next, 2666 entry = list_entry(sbi->s_committed_transaction.next,
2668 struct ext4_free_metadata, list); 2667 struct ext4_free_data, list);
2669 list_del(&md->list); 2668 list_del(&entry->list);
2670 } 2669 }
2671 spin_unlock(&sbi->s_md_lock); 2670 spin_unlock(&sbi->s_md_lock);
2672 2671
2673 if (md == NULL) 2672 if (entry == NULL)
2674 break; 2673 break;
2675 2674
2676 mb_debug("gonna free %u blocks in group %lu (0x%p):", 2675 mb_debug("gonna free %u blocks in group %lu (0x%p):",
2677 md->num, md->group, md); 2676 entry->count, entry->group, entry);
2678 2677
2679 err = ext4_mb_load_buddy(sb, md->group, &e4b); 2678 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2680 /* we expect to find existing buddy because it's pinned */ 2679 /* we expect to find existing buddy because it's pinned */
2681 BUG_ON(err != 0); 2680 BUG_ON(err != 0);
2682 2681
2682 db = e4b.bd_info;
2683 /* there are blocks to put in buddy to make them really free */ 2683 /* there are blocks to put in buddy to make them really free */
2684 count += md->num; 2684 count += entry->count;
2685 count2++; 2685 count2++;
2686 ext4_lock_group(sb, md->group); 2686 ext4_lock_group(sb, entry->group);
2687 for (i = 0; i < md->num; i++) { 2687 /* Take it out of per group rb tree */
2688 mb_debug(" %u", md->blocks[i]); 2688 rb_erase(&entry->node, &(db->bb_free_root));
2689 mb_free_blocks(NULL, &e4b, md->blocks[i], 1); 2689 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2690
2691 if (!db->bb_free_root.rb_node) {
2692 /* No more items in the per group rb tree
2693 * balance refcounts from ext4_mb_free_metadata()
2694 */
2695 page_cache_release(e4b.bd_buddy_page);
2696 page_cache_release(e4b.bd_bitmap_page);
2690 } 2697 }
2691 mb_debug("\n"); 2698 ext4_unlock_group(sb, entry->group);
2692 ext4_unlock_group(sb, md->group);
2693
2694 /* balance refcounts from ext4_mb_free_metadata() */
2695 page_cache_release(e4b.bd_buddy_page);
2696 page_cache_release(e4b.bd_bitmap_page);
2697 2699
2698 kfree(md); 2700 kmem_cache_free(ext4_free_ext_cachep, entry);
2699 ext4_mb_release_desc(&e4b); 2701 ext4_mb_release_desc(&e4b);
2700 2702 } while (1);
2701 } while (md);
2702 2703
2703 mb_debug("freed %u blocks in %u structures\n", count, count2); 2704 mb_debug("freed %u blocks in %u structures\n", count, count2);
2704} 2705}
@@ -2771,6 +2772,16 @@ int __init init_ext4_mballoc(void)
2771 kmem_cache_destroy(ext4_pspace_cachep); 2772 kmem_cache_destroy(ext4_pspace_cachep);
2772 return -ENOMEM; 2773 return -ENOMEM;
2773 } 2774 }
2775
2776 ext4_free_ext_cachep =
2777 kmem_cache_create("ext4_free_block_extents",
2778 sizeof(struct ext4_free_data),
2779 0, SLAB_RECLAIM_ACCOUNT, NULL);
2780 if (ext4_free_ext_cachep == NULL) {
2781 kmem_cache_destroy(ext4_pspace_cachep);
2782 kmem_cache_destroy(ext4_ac_cachep);
2783 return -ENOMEM;
2784 }
2774 return 0; 2785 return 0;
2775} 2786}
2776 2787
@@ -2779,6 +2790,7 @@ void exit_ext4_mballoc(void)
2779 /* XXX: synchronize_rcu(); */ 2790 /* XXX: synchronize_rcu(); */
2780 kmem_cache_destroy(ext4_pspace_cachep); 2791 kmem_cache_destroy(ext4_pspace_cachep);
2781 kmem_cache_destroy(ext4_ac_cachep); 2792 kmem_cache_destroy(ext4_ac_cachep);
2793 kmem_cache_destroy(ext4_free_ext_cachep);
2782} 2794}
2783 2795
2784 2796
@@ -4415,6 +4427,21 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
4415 ext4_mb_free_committed_blocks(sb); 4427 ext4_mb_free_committed_blocks(sb);
4416} 4428}
4417 4429
4430/*
4431 * We can merge two free data extents only if the physical blocks
4432 * are contiguous, AND the extents were freed by the same transaction,
4433 * AND the blocks are associated with the same group.
4434 */
4435static int can_merge(struct ext4_free_data *entry1,
4436 struct ext4_free_data *entry2)
4437{
4438 if ((entry1->t_tid == entry2->t_tid) &&
4439 (entry1->group == entry2->group) &&
4440 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4441 return 1;
4442 return 0;
4443}
4444
4418static noinline_for_stack int 4445static noinline_for_stack int
4419ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, 4446ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4420 ext4_group_t group, ext4_grpblk_t block, int count) 4447 ext4_group_t group, ext4_grpblk_t block, int count)
@@ -4422,57 +4449,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 struct ext4_group_info *db = e4b->bd_info; 4449 struct ext4_group_info *db = e4b->bd_info;
4423 struct super_block *sb = e4b->bd_sb; 4450 struct super_block *sb = e4b->bd_sb;
4424 struct ext4_sb_info *sbi = EXT4_SB(sb); 4451 struct ext4_sb_info *sbi = EXT4_SB(sb);
4425 struct ext4_free_metadata *md; 4452 struct ext4_free_data *entry, *new_entry;
4426 int i; 4453 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4454 struct rb_node *parent = NULL, *new_node;
4455
4427 4456
4428 BUG_ON(e4b->bd_bitmap_page == NULL); 4457 BUG_ON(e4b->bd_bitmap_page == NULL);
4429 BUG_ON(e4b->bd_buddy_page == NULL); 4458 BUG_ON(e4b->bd_buddy_page == NULL);
4430 4459
4460 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4461 new_entry->start_blk = block;
4462 new_entry->group = group;
4463 new_entry->count = count;
4464 new_entry->t_tid = handle->h_transaction->t_tid;
4465 new_node = &new_entry->node;
4466
4431 ext4_lock_group(sb, group); 4467 ext4_lock_group(sb, group);
4432 for (i = 0; i < count; i++) { 4468 if (!*n) {
4433 md = db->bb_md_cur; 4469 /* first free block exent. We need to
4434 if (md && db->bb_tid != handle->h_transaction->t_tid) { 4470 protect buddy cache from being freed,
4435 db->bb_md_cur = NULL; 4471 * otherwise we'll refresh it from
4436 md = NULL; 4472 * on-disk bitmap and lose not-yet-available
4473 * blocks */
4474 page_cache_get(e4b->bd_buddy_page);
4475 page_cache_get(e4b->bd_bitmap_page);
4476 }
4477 while (*n) {
4478 parent = *n;
4479 entry = rb_entry(parent, struct ext4_free_data, node);
4480 if (block < entry->start_blk)
4481 n = &(*n)->rb_left;
4482 else if (block >= (entry->start_blk + entry->count))
4483 n = &(*n)->rb_right;
4484 else {
4485 ext4_error(sb, __func__,
4486 "Double free of blocks %d (%d %d)\n",
4487 block, entry->start_blk, entry->count);
4488 return 0;
4437 } 4489 }
4490 }
4438 4491
4439 if (md == NULL) { 4492 rb_link_node(new_node, parent, n);
4440 ext4_unlock_group(sb, group); 4493 rb_insert_color(new_node, &db->bb_free_root);
4441 md = kmalloc(sizeof(*md), GFP_NOFS); 4494
4442 if (md == NULL) 4495 /* Now try to see the extent can be merged to left and right */
4443 return -ENOMEM; 4496 node = rb_prev(new_node);
4444 md->num = 0; 4497 if (node) {
4445 md->group = group; 4498 entry = rb_entry(node, struct ext4_free_data, node);
4446 4499 if (can_merge(entry, new_entry)) {
4447 ext4_lock_group(sb, group); 4500 new_entry->start_blk = entry->start_blk;
4448 if (db->bb_md_cur == NULL) { 4501 new_entry->count += entry->count;
4449 spin_lock(&sbi->s_md_lock); 4502 rb_erase(node, &(db->bb_free_root));
4450 list_add(&md->list, &sbi->s_active_transaction); 4503 spin_lock(&sbi->s_md_lock);
4451 spin_unlock(&sbi->s_md_lock); 4504 list_del(&entry->list);
4452 /* protect buddy cache from being freed, 4505 spin_unlock(&sbi->s_md_lock);
4453 * otherwise we'll refresh it from 4506 kmem_cache_free(ext4_free_ext_cachep, entry);
4454 * on-disk bitmap and lose not-yet-available
4455 * blocks */
4456 page_cache_get(e4b->bd_buddy_page);
4457 page_cache_get(e4b->bd_bitmap_page);
4458 db->bb_md_cur = md;
4459 db->bb_tid = handle->h_transaction->t_tid;
4460 mb_debug("new md 0x%p for group %lu\n",
4461 md, md->group);
4462 } else {
4463 kfree(md);
4464 md = db->bb_md_cur;
4465 }
4466 } 4507 }
4508 }
4467 4509
4468 BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); 4510 node = rb_next(new_node);
4469 md->blocks[md->num] = block + i; 4511 if (node) {
4470 md->num++; 4512 entry = rb_entry(node, struct ext4_free_data, node);
4471 if (md->num == EXT4_BB_MAX_BLOCKS) { 4513 if (can_merge(new_entry, entry)) {
4472 /* no more space, put full container on a sb's list */ 4514 new_entry->count += entry->count;
4473 db->bb_md_cur = NULL; 4515 rb_erase(node, &(db->bb_free_root));
4516 spin_lock(&sbi->s_md_lock);
4517 list_del(&entry->list);
4518 spin_unlock(&sbi->s_md_lock);
4519 kmem_cache_free(ext4_free_ext_cachep, entry);
4474 } 4520 }
4475 } 4521 }
4522 /* Add the extent to active_transaction list */
4523 spin_lock(&sbi->s_md_lock);
4524 list_add(&new_entry->list, &sbi->s_active_transaction);
4525 spin_unlock(&sbi->s_md_lock);
4476 ext4_unlock_group(sb, group); 4526 ext4_unlock_group(sb, group);
4477 return 0; 4527 return 0;
4478} 4528}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b3b4828f8b8..9e815c4e37d 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -98,23 +98,29 @@
98 98
99static struct kmem_cache *ext4_pspace_cachep; 99static struct kmem_cache *ext4_pspace_cachep;
100static struct kmem_cache *ext4_ac_cachep; 100static struct kmem_cache *ext4_ac_cachep;
101static struct kmem_cache *ext4_free_ext_cachep;
101 102
102#ifdef EXT4_BB_MAX_BLOCKS 103struct ext4_free_data {
103#undef EXT4_BB_MAX_BLOCKS 104 /* this links the free block information from group_info */
104#endif 105 struct rb_node node;
105#define EXT4_BB_MAX_BLOCKS 30
106 106
107struct ext4_free_metadata { 107 /* this links the free block information from ext4_sb_info */
108 ext4_group_t group;
109 unsigned short num;
110 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
111 struct list_head list; 108 struct list_head list;
109
110 /* group which free block extent belongs */
111 ext4_group_t group;
112
113 /* free block extent */
114 ext4_grpblk_t start_blk;
115 ext4_grpblk_t count;
116
117 /* transaction which freed this extent */
118 tid_t t_tid;
112}; 119};
113 120
114struct ext4_group_info { 121struct ext4_group_info {
115 unsigned long bb_state; 122 unsigned long bb_state;
116 unsigned long bb_tid; 123 struct rb_root bb_free_root;
117 struct ext4_free_metadata *bb_md_cur;
118 unsigned short bb_first_free; 124 unsigned short bb_first_free;
119 unsigned short bb_free; 125 unsigned short bb_free;
120 unsigned short bb_fragments; 126 unsigned short bb_fragments;