diff options
| author | Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | 2008-10-16 10:14:27 -0400 |
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2008-10-16 10:14:27 -0400 |
| commit | c894058d66637c7720569fbe12957f4de64d9991 (patch) | |
| tree | f13472d7fd76155f1365550515997a24aff611c9 | |
| parent | c2774d84fd6cab2bfa2a2fae0b1ca8d8ebde48a2 (diff) | |
ext4: Use an rbtree for tracking blocks freed during transaction.
With this patch we track the block freed during a transaction using
red-black tree. We also make sure contiguous blocks freed are collected
in one node in the tree.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
| -rw-r--r-- | fs/ext4/mballoc.c | 184 | ||||
| -rw-r--r-- | fs/ext4/mballoc.h | 26 |
2 files changed, 133 insertions, 77 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 154f8dec97ea..bd9b011941a2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
| 2300 | } | 2300 | } |
| 2301 | 2301 | ||
| 2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
| 2303 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | ||
| 2303 | 2304 | ||
| 2304 | #ifdef DOUBLE_CHECK | 2305 | #ifdef DOUBLE_CHECK |
| 2305 | { | 2306 | { |
| @@ -2647,13 +2648,11 @@ int ext4_mb_release(struct super_block *sb) | |||
| 2647 | static noinline_for_stack void | 2648 | static noinline_for_stack void |
| 2648 | ext4_mb_free_committed_blocks(struct super_block *sb) | 2649 | ext4_mb_free_committed_blocks(struct super_block *sb) |
| 2649 | { | 2650 | { |
| 2650 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 2651 | int err; | ||
| 2652 | int i; | ||
| 2653 | int count = 0; | ||
| 2654 | int count2 = 0; | ||
| 2655 | struct ext4_free_metadata *md; | ||
| 2656 | struct ext4_buddy e4b; | 2651 | struct ext4_buddy e4b; |
| 2652 | struct ext4_group_info *db; | ||
| 2653 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 2654 | int err, count = 0, count2 = 0; | ||
| 2655 | struct ext4_free_data *entry; | ||
| 2657 | 2656 | ||
| 2658 | if (list_empty(&sbi->s_committed_transaction)) | 2657 | if (list_empty(&sbi->s_committed_transaction)) |
| 2659 | return; | 2658 | return; |
| @@ -2661,44 +2660,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
| 2661 | /* there is committed blocks to be freed yet */ | 2660 | /* there is committed blocks to be freed yet */ |
| 2662 | do { | 2661 | do { |
| 2663 | /* get next array of blocks */ | 2662 | /* get next array of blocks */ |
| 2664 | md = NULL; | 2663 | entry = NULL; |
| 2665 | spin_lock(&sbi->s_md_lock); | 2664 | spin_lock(&sbi->s_md_lock); |
| 2666 | if (!list_empty(&sbi->s_committed_transaction)) { | 2665 | if (!list_empty(&sbi->s_committed_transaction)) { |
| 2667 | md = list_entry(sbi->s_committed_transaction.next, | 2666 | entry = list_entry(sbi->s_committed_transaction.next, |
| 2668 | struct ext4_free_metadata, list); | 2667 | struct ext4_free_data, list); |
| 2669 | list_del(&md->list); | 2668 | list_del(&entry->list); |
| 2670 | } | 2669 | } |
| 2671 | spin_unlock(&sbi->s_md_lock); | 2670 | spin_unlock(&sbi->s_md_lock); |
| 2672 | 2671 | ||
| 2673 | if (md == NULL) | 2672 | if (entry == NULL) |
| 2674 | break; | 2673 | break; |
| 2675 | 2674 | ||
| 2676 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | 2675 | mb_debug("gonna free %u blocks in group %lu (0x%p):", |
| 2677 | md->num, md->group, md); | 2676 | entry->count, entry->group, entry); |
| 2678 | 2677 | ||
| 2679 | err = ext4_mb_load_buddy(sb, md->group, &e4b); | 2678 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
| 2680 | /* we expect to find existing buddy because it's pinned */ | 2679 | /* we expect to find existing buddy because it's pinned */ |
| 2681 | BUG_ON(err != 0); | 2680 | BUG_ON(err != 0); |
| 2682 | 2681 | ||
| 2682 | db = e4b.bd_info; | ||
| 2683 | /* there are blocks to put in buddy to make them really free */ | 2683 | /* there are blocks to put in buddy to make them really free */ |
| 2684 | count += md->num; | 2684 | count += entry->count; |
| 2685 | count2++; | 2685 | count2++; |
| 2686 | ext4_lock_group(sb, md->group); | 2686 | ext4_lock_group(sb, entry->group); |
| 2687 | for (i = 0; i < md->num; i++) { | 2687 | /* Take it out of per group rb tree */ |
| 2688 | mb_debug(" %u", md->blocks[i]); | 2688 | rb_erase(&entry->node, &(db->bb_free_root)); |
| 2689 | mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | 2689 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); |
| 2690 | |||
| 2691 | if (!db->bb_free_root.rb_node) { | ||
| 2692 | /* No more items in the per group rb tree | ||
| 2693 | * balance refcounts from ext4_mb_free_metadata() | ||
| 2694 | */ | ||
| 2695 | page_cache_release(e4b.bd_buddy_page); | ||
| 2696 | page_cache_release(e4b.bd_bitmap_page); | ||
| 2690 | } | 2697 | } |
| 2691 | mb_debug("\n"); | 2698 | ext4_unlock_group(sb, entry->group); |
| 2692 | ext4_unlock_group(sb, md->group); | ||
| 2693 | |||
| 2694 | /* balance refcounts from ext4_mb_free_metadata() */ | ||
| 2695 | page_cache_release(e4b.bd_buddy_page); | ||
| 2696 | page_cache_release(e4b.bd_bitmap_page); | ||
| 2697 | 2699 | ||
| 2698 | kfree(md); | 2700 | kmem_cache_free(ext4_free_ext_cachep, entry); |
| 2699 | ext4_mb_release_desc(&e4b); | 2701 | ext4_mb_release_desc(&e4b); |
| 2700 | 2702 | } while (1); | |
| 2701 | } while (md); | ||
| 2702 | 2703 | ||
| 2703 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2704 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
| 2704 | } | 2705 | } |
| @@ -2771,6 +2772,16 @@ int __init init_ext4_mballoc(void) | |||
| 2771 | kmem_cache_destroy(ext4_pspace_cachep); | 2772 | kmem_cache_destroy(ext4_pspace_cachep); |
| 2772 | return -ENOMEM; | 2773 | return -ENOMEM; |
| 2773 | } | 2774 | } |
| 2775 | |||
| 2776 | ext4_free_ext_cachep = | ||
| 2777 | kmem_cache_create("ext4_free_block_extents", | ||
| 2778 | sizeof(struct ext4_free_data), | ||
| 2779 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
| 2780 | if (ext4_free_ext_cachep == NULL) { | ||
| 2781 | kmem_cache_destroy(ext4_pspace_cachep); | ||
| 2782 | kmem_cache_destroy(ext4_ac_cachep); | ||
| 2783 | return -ENOMEM; | ||
| 2784 | } | ||
| 2774 | return 0; | 2785 | return 0; |
| 2775 | } | 2786 | } |
| 2776 | 2787 | ||
| @@ -2779,6 +2790,7 @@ void exit_ext4_mballoc(void) | |||
| 2779 | /* XXX: synchronize_rcu(); */ | 2790 | /* XXX: synchronize_rcu(); */ |
| 2780 | kmem_cache_destroy(ext4_pspace_cachep); | 2791 | kmem_cache_destroy(ext4_pspace_cachep); |
| 2781 | kmem_cache_destroy(ext4_ac_cachep); | 2792 | kmem_cache_destroy(ext4_ac_cachep); |
| 2793 | kmem_cache_destroy(ext4_free_ext_cachep); | ||
| 2782 | } | 2794 | } |
| 2783 | 2795 | ||
| 2784 | 2796 | ||
| @@ -4415,6 +4427,21 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb, | |||
| 4415 | ext4_mb_free_committed_blocks(sb); | 4427 | ext4_mb_free_committed_blocks(sb); |
| 4416 | } | 4428 | } |
| 4417 | 4429 | ||
| 4430 | /* | ||
| 4431 | * We can merge two free data extents only if the physical blocks | ||
| 4432 | * are contiguous, AND the extents were freed by the same transaction, | ||
| 4433 | * AND the blocks are associated with the same group. | ||
| 4434 | */ | ||
| 4435 | static int can_merge(struct ext4_free_data *entry1, | ||
| 4436 | struct ext4_free_data *entry2) | ||
| 4437 | { | ||
| 4438 | if ((entry1->t_tid == entry2->t_tid) && | ||
| 4439 | (entry1->group == entry2->group) && | ||
| 4440 | ((entry1->start_blk + entry1->count) == entry2->start_blk)) | ||
| 4441 | return 1; | ||
| 4442 | return 0; | ||
| 4443 | } | ||
| 4444 | |||
| 4418 | static noinline_for_stack int | 4445 | static noinline_for_stack int |
| 4419 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | 4446 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, |
| 4420 | ext4_group_t group, ext4_grpblk_t block, int count) | 4447 | ext4_group_t group, ext4_grpblk_t block, int count) |
| @@ -4422,57 +4449,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
| 4422 | struct ext4_group_info *db = e4b->bd_info; | 4449 | struct ext4_group_info *db = e4b->bd_info; |
| 4423 | struct super_block *sb = e4b->bd_sb; | 4450 | struct super_block *sb = e4b->bd_sb; |
| 4424 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4451 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 4425 | struct ext4_free_metadata *md; | 4452 | struct ext4_free_data *entry, *new_entry; |
| 4426 | int i; | 4453 | struct rb_node **n = &db->bb_free_root.rb_node, *node; |
| 4454 | struct rb_node *parent = NULL, *new_node; | ||
| 4455 | |||
| 4427 | 4456 | ||
| 4428 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4457 | BUG_ON(e4b->bd_bitmap_page == NULL); |
| 4429 | BUG_ON(e4b->bd_buddy_page == NULL); | 4458 | BUG_ON(e4b->bd_buddy_page == NULL); |
| 4430 | 4459 | ||
| 4460 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | ||
| 4461 | new_entry->start_blk = block; | ||
| 4462 | new_entry->group = group; | ||
| 4463 | new_entry->count = count; | ||
| 4464 | new_entry->t_tid = handle->h_transaction->t_tid; | ||
| 4465 | new_node = &new_entry->node; | ||
| 4466 | |||
| 4431 | ext4_lock_group(sb, group); | 4467 | ext4_lock_group(sb, group); |
| 4432 | for (i = 0; i < count; i++) { | 4468 | if (!*n) { |
| 4433 | md = db->bb_md_cur; | 4469 | /* first free block exent. We need to |
| 4434 | if (md && db->bb_tid != handle->h_transaction->t_tid) { | 4470 | protect buddy cache from being freed, |
| 4435 | db->bb_md_cur = NULL; | 4471 | * otherwise we'll refresh it from |
| 4436 | md = NULL; | 4472 | * on-disk bitmap and lose not-yet-available |
| 4473 | * blocks */ | ||
| 4474 | page_cache_get(e4b->bd_buddy_page); | ||
| 4475 | page_cache_get(e4b->bd_bitmap_page); | ||
| 4476 | } | ||
| 4477 | while (*n) { | ||
| 4478 | parent = *n; | ||
| 4479 | entry = rb_entry(parent, struct ext4_free_data, node); | ||
| 4480 | if (block < entry->start_blk) | ||
| 4481 | n = &(*n)->rb_left; | ||
| 4482 | else if (block >= (entry->start_blk + entry->count)) | ||
| 4483 | n = &(*n)->rb_right; | ||
| 4484 | else { | ||
| 4485 | ext4_error(sb, __func__, | ||
| 4486 | "Double free of blocks %d (%d %d)\n", | ||
| 4487 | block, entry->start_blk, entry->count); | ||
| 4488 | return 0; | ||
| 4437 | } | 4489 | } |
| 4490 | } | ||
| 4438 | 4491 | ||
| 4439 | if (md == NULL) { | 4492 | rb_link_node(new_node, parent, n); |
| 4440 | ext4_unlock_group(sb, group); | 4493 | rb_insert_color(new_node, &db->bb_free_root); |
| 4441 | md = kmalloc(sizeof(*md), GFP_NOFS); | 4494 | |
| 4442 | if (md == NULL) | 4495 | /* Now try to see the extent can be merged to left and right */ |
| 4443 | return -ENOMEM; | 4496 | node = rb_prev(new_node); |
| 4444 | md->num = 0; | 4497 | if (node) { |
| 4445 | md->group = group; | 4498 | entry = rb_entry(node, struct ext4_free_data, node); |
| 4446 | 4499 | if (can_merge(entry, new_entry)) { | |
| 4447 | ext4_lock_group(sb, group); | 4500 | new_entry->start_blk = entry->start_blk; |
| 4448 | if (db->bb_md_cur == NULL) { | 4501 | new_entry->count += entry->count; |
| 4449 | spin_lock(&sbi->s_md_lock); | 4502 | rb_erase(node, &(db->bb_free_root)); |
| 4450 | list_add(&md->list, &sbi->s_active_transaction); | 4503 | spin_lock(&sbi->s_md_lock); |
| 4451 | spin_unlock(&sbi->s_md_lock); | 4504 | list_del(&entry->list); |
| 4452 | /* protect buddy cache from being freed, | 4505 | spin_unlock(&sbi->s_md_lock); |
| 4453 | * otherwise we'll refresh it from | 4506 | kmem_cache_free(ext4_free_ext_cachep, entry); |
| 4454 | * on-disk bitmap and lose not-yet-available | ||
| 4455 | * blocks */ | ||
| 4456 | page_cache_get(e4b->bd_buddy_page); | ||
| 4457 | page_cache_get(e4b->bd_bitmap_page); | ||
| 4458 | db->bb_md_cur = md; | ||
| 4459 | db->bb_tid = handle->h_transaction->t_tid; | ||
| 4460 | mb_debug("new md 0x%p for group %lu\n", | ||
| 4461 | md, md->group); | ||
| 4462 | } else { | ||
| 4463 | kfree(md); | ||
| 4464 | md = db->bb_md_cur; | ||
| 4465 | } | ||
| 4466 | } | 4507 | } |
| 4508 | } | ||
| 4467 | 4509 | ||
| 4468 | BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); | 4510 | node = rb_next(new_node); |
| 4469 | md->blocks[md->num] = block + i; | 4511 | if (node) { |
| 4470 | md->num++; | 4512 | entry = rb_entry(node, struct ext4_free_data, node); |
| 4471 | if (md->num == EXT4_BB_MAX_BLOCKS) { | 4513 | if (can_merge(new_entry, entry)) { |
| 4472 | /* no more space, put full container on a sb's list */ | 4514 | new_entry->count += entry->count; |
| 4473 | db->bb_md_cur = NULL; | 4515 | rb_erase(node, &(db->bb_free_root)); |
| 4516 | spin_lock(&sbi->s_md_lock); | ||
| 4517 | list_del(&entry->list); | ||
| 4518 | spin_unlock(&sbi->s_md_lock); | ||
| 4519 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
| 4474 | } | 4520 | } |
| 4475 | } | 4521 | } |
| 4522 | /* Add the extent to active_transaction list */ | ||
| 4523 | spin_lock(&sbi->s_md_lock); | ||
| 4524 | list_add(&new_entry->list, &sbi->s_active_transaction); | ||
| 4525 | spin_unlock(&sbi->s_md_lock); | ||
| 4476 | ext4_unlock_group(sb, group); | 4526 | ext4_unlock_group(sb, group); |
| 4477 | return 0; | 4527 | return 0; |
| 4478 | } | 4528 | } |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index b3b4828f8b89..9e815c4e37df 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
| @@ -98,23 +98,29 @@ | |||
| 98 | 98 | ||
| 99 | static struct kmem_cache *ext4_pspace_cachep; | 99 | static struct kmem_cache *ext4_pspace_cachep; |
| 100 | static struct kmem_cache *ext4_ac_cachep; | 100 | static struct kmem_cache *ext4_ac_cachep; |
| 101 | static struct kmem_cache *ext4_free_ext_cachep; | ||
| 101 | 102 | ||
| 102 | #ifdef EXT4_BB_MAX_BLOCKS | 103 | struct ext4_free_data { |
| 103 | #undef EXT4_BB_MAX_BLOCKS | 104 | /* this links the free block information from group_info */ |
| 104 | #endif | 105 | struct rb_node node; |
| 105 | #define EXT4_BB_MAX_BLOCKS 30 | ||
| 106 | 106 | ||
| 107 | struct ext4_free_metadata { | 107 | /* this links the free block information from ext4_sb_info */ |
| 108 | ext4_group_t group; | ||
| 109 | unsigned short num; | ||
| 110 | ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; | ||
| 111 | struct list_head list; | 108 | struct list_head list; |
| 109 | |||
| 110 | /* group which free block extent belongs */ | ||
| 111 | ext4_group_t group; | ||
| 112 | |||
| 113 | /* free block extent */ | ||
| 114 | ext4_grpblk_t start_blk; | ||
| 115 | ext4_grpblk_t count; | ||
| 116 | |||
| 117 | /* transaction which freed this extent */ | ||
| 118 | tid_t t_tid; | ||
| 112 | }; | 119 | }; |
| 113 | 120 | ||
| 114 | struct ext4_group_info { | 121 | struct ext4_group_info { |
| 115 | unsigned long bb_state; | 122 | unsigned long bb_state; |
| 116 | unsigned long bb_tid; | 123 | struct rb_root bb_free_root; |
| 117 | struct ext4_free_metadata *bb_md_cur; | ||
| 118 | unsigned short bb_first_free; | 124 | unsigned short bb_first_free; |
| 119 | unsigned short bb_free; | 125 | unsigned short bb_free; |
| 120 | unsigned short bb_fragments; | 126 | unsigned short bb_fragments; |
