diff options
author | Bobi Jam <bobijam@whamcloud.com> | 2012-02-20 17:53:02 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-02-20 17:53:02 -0500 |
commit | 18aadd47f88464928b5ce57791c2e8f9f2aaece0 (patch) | |
tree | bb4f32855484a6dd6081c33fae1564359a88af74 /fs/ext4/mballoc.c | |
parent | 15291164b22a357cb211b618adfef4fa82fc0de3 (diff) |
ext4: expand commit callback and
The per-commit callback was used by mballoc code to manage free space
bitmaps after deleted blocks have been released. This patch expands
it to support multiple different callbacks, to allow other things to
be done after the commit has been completed.
Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 151 |
1 files changed, 71 insertions, 80 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 545fa0256606..2e467718d413 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -21,6 +21,7 @@ | |||
21 | * mballoc.c contains the multiblocks allocation routines | 21 | * mballoc.c contains the multiblocks allocation routines |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include "ext4_jbd2.h" | ||
24 | #include "mballoc.h" | 25 | #include "mballoc.h" |
25 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
26 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
@@ -339,7 +340,7 @@ | |||
339 | */ | 340 | */ |
340 | static struct kmem_cache *ext4_pspace_cachep; | 341 | static struct kmem_cache *ext4_pspace_cachep; |
341 | static struct kmem_cache *ext4_ac_cachep; | 342 | static struct kmem_cache *ext4_ac_cachep; |
342 | static struct kmem_cache *ext4_free_ext_cachep; | 343 | static struct kmem_cache *ext4_free_data_cachep; |
343 | 344 | ||
344 | /* We create slab caches for groupinfo data structures based on the | 345 | /* We create slab caches for groupinfo data structures based on the |
345 | * superblock block size. There will be one per mounted filesystem for | 346 | * superblock block size. There will be one per mounted filesystem for |
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
357 | ext4_group_t group); | 358 | ext4_group_t group); |
358 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | 359 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, |
359 | ext4_group_t group); | 360 | ext4_group_t group); |
360 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | 361 | static void ext4_free_data_callback(struct super_block *sb, |
362 | struct ext4_journal_cb_entry *jce, int rc); | ||
361 | 363 | ||
362 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) | 364 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) |
363 | { | 365 | { |
@@ -2473,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2473 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | 2475 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2474 | &ext4_mb_seq_groups_fops, sb); | 2476 | &ext4_mb_seq_groups_fops, sb); |
2475 | 2477 | ||
2476 | if (sbi->s_journal) | ||
2477 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | ||
2478 | |||
2479 | return 0; | 2478 | return 0; |
2480 | 2479 | ||
2481 | out_free_locality_groups: | 2480 | out_free_locality_groups: |
@@ -2588,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb, | |||
2588 | * This function is called by the jbd2 layer once the commit has finished, | 2587 | * This function is called by the jbd2 layer once the commit has finished, |
2589 | * so we know we can free the blocks that were released with that commit. | 2588 | * so we know we can free the blocks that were released with that commit. |
2590 | */ | 2589 | */ |
2591 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | 2590 | static void ext4_free_data_callback(struct super_block *sb, |
2591 | struct ext4_journal_cb_entry *jce, | ||
2592 | int rc) | ||
2592 | { | 2593 | { |
2593 | struct super_block *sb = journal->j_private; | 2594 | struct ext4_free_data *entry = (struct ext4_free_data *)jce; |
2594 | struct ext4_buddy e4b; | 2595 | struct ext4_buddy e4b; |
2595 | struct ext4_group_info *db; | 2596 | struct ext4_group_info *db; |
2596 | int err, count = 0, count2 = 0; | 2597 | int err, count = 0, count2 = 0; |
2597 | struct ext4_free_data *entry; | ||
2598 | struct list_head *l, *ltmp; | ||
2599 | 2598 | ||
2600 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2599 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2601 | entry = list_entry(l, struct ext4_free_data, list); | 2600 | entry->efd_count, entry->efd_group, entry); |
2602 | 2601 | ||
2603 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", | 2602 | if (test_opt(sb, DISCARD)) |
2604 | entry->count, entry->group, entry); | 2603 | ext4_issue_discard(sb, entry->efd_group, |
2604 | entry->efd_start_cluster, entry->efd_count); | ||
2605 | 2605 | ||
2606 | if (test_opt(sb, DISCARD)) | 2606 | err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b); |
2607 | ext4_issue_discard(sb, entry->group, | 2607 | /* we expect to find existing buddy because it's pinned */ |
2608 | entry->start_cluster, entry->count); | 2608 | BUG_ON(err != 0); |
2609 | 2609 | ||
2610 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | ||
2611 | /* we expect to find existing buddy because it's pinned */ | ||
2612 | BUG_ON(err != 0); | ||
2613 | 2610 | ||
2614 | db = e4b.bd_info; | 2611 | db = e4b.bd_info; |
2615 | /* there are blocks to put in buddy to make them really free */ | 2612 | /* there are blocks to put in buddy to make them really free */ |
2616 | count += entry->count; | 2613 | count += entry->efd_count; |
2617 | count2++; | 2614 | count2++; |
2618 | ext4_lock_group(sb, entry->group); | 2615 | ext4_lock_group(sb, entry->efd_group); |
2619 | /* Take it out of per group rb tree */ | 2616 | /* Take it out of per group rb tree */ |
2620 | rb_erase(&entry->node, &(db->bb_free_root)); | 2617 | rb_erase(&entry->efd_node, &(db->bb_free_root)); |
2621 | mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); | 2618 | mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count); |
2622 | 2619 | ||
2623 | /* | 2620 | /* |
2624 | * Clear the trimmed flag for the group so that the next | 2621 | * Clear the trimmed flag for the group so that the next |
2625 | * ext4_trim_fs can trim it. | 2622 | * ext4_trim_fs can trim it. |
2626 | * If the volume is mounted with -o discard, online discard | 2623 | * If the volume is mounted with -o discard, online discard |
2627 | * is supported and the free blocks will be trimmed online. | 2624 | * is supported and the free blocks will be trimmed online. |
2628 | */ | 2625 | */ |
2629 | if (!test_opt(sb, DISCARD)) | 2626 | if (!test_opt(sb, DISCARD)) |
2630 | EXT4_MB_GRP_CLEAR_TRIMMED(db); | 2627 | EXT4_MB_GRP_CLEAR_TRIMMED(db); |
2631 | 2628 | ||
2632 | if (!db->bb_free_root.rb_node) { | 2629 | if (!db->bb_free_root.rb_node) { |
2633 | /* No more items in the per group rb tree | 2630 | /* No more items in the per group rb tree |
2634 | * balance refcounts from ext4_mb_free_metadata() | 2631 | * balance refcounts from ext4_mb_free_metadata() |
2635 | */ | 2632 | */ |
2636 | page_cache_release(e4b.bd_buddy_page); | 2633 | page_cache_release(e4b.bd_buddy_page); |
2637 | page_cache_release(e4b.bd_bitmap_page); | 2634 | page_cache_release(e4b.bd_bitmap_page); |
2638 | } | ||
2639 | ext4_unlock_group(sb, entry->group); | ||
2640 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
2641 | ext4_mb_unload_buddy(&e4b); | ||
2642 | } | 2635 | } |
2636 | ext4_unlock_group(sb, entry->efd_group); | ||
2637 | kmem_cache_free(ext4_free_data_cachep, entry); | ||
2638 | ext4_mb_unload_buddy(&e4b); | ||
2643 | 2639 | ||
2644 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); | 2640 | mb_debug(1, "freed %u blocks in %u structures\n", count, count2); |
2645 | } | 2641 | } |
@@ -2692,9 +2688,9 @@ int __init ext4_init_mballoc(void) | |||
2692 | return -ENOMEM; | 2688 | return -ENOMEM; |
2693 | } | 2689 | } |
2694 | 2690 | ||
2695 | ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, | 2691 | ext4_free_data_cachep = KMEM_CACHE(ext4_free_data, |
2696 | SLAB_RECLAIM_ACCOUNT); | 2692 | SLAB_RECLAIM_ACCOUNT); |
2697 | if (ext4_free_ext_cachep == NULL) { | 2693 | if (ext4_free_data_cachep == NULL) { |
2698 | kmem_cache_destroy(ext4_pspace_cachep); | 2694 | kmem_cache_destroy(ext4_pspace_cachep); |
2699 | kmem_cache_destroy(ext4_ac_cachep); | 2695 | kmem_cache_destroy(ext4_ac_cachep); |
2700 | return -ENOMEM; | 2696 | return -ENOMEM; |
@@ -2712,7 +2708,7 @@ void ext4_exit_mballoc(void) | |||
2712 | rcu_barrier(); | 2708 | rcu_barrier(); |
2713 | kmem_cache_destroy(ext4_pspace_cachep); | 2709 | kmem_cache_destroy(ext4_pspace_cachep); |
2714 | kmem_cache_destroy(ext4_ac_cachep); | 2710 | kmem_cache_destroy(ext4_ac_cachep); |
2715 | kmem_cache_destroy(ext4_free_ext_cachep); | 2711 | kmem_cache_destroy(ext4_free_data_cachep); |
2716 | ext4_groupinfo_destroy_slabs(); | 2712 | ext4_groupinfo_destroy_slabs(); |
2717 | ext4_remove_debugfs_entry(); | 2713 | ext4_remove_debugfs_entry(); |
2718 | } | 2714 | } |
@@ -3272,8 +3268,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
3272 | n = rb_first(&(grp->bb_free_root)); | 3268 | n = rb_first(&(grp->bb_free_root)); |
3273 | 3269 | ||
3274 | while (n) { | 3270 | while (n) { |
3275 | entry = rb_entry(n, struct ext4_free_data, node); | 3271 | entry = rb_entry(n, struct ext4_free_data, efd_node); |
3276 | ext4_set_bits(bitmap, entry->start_cluster, entry->count); | 3272 | ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count); |
3277 | n = rb_next(n); | 3273 | n = rb_next(n); |
3278 | } | 3274 | } |
3279 | return; | 3275 | return; |
@@ -4379,9 +4375,9 @@ out: | |||
4379 | static int can_merge(struct ext4_free_data *entry1, | 4375 | static int can_merge(struct ext4_free_data *entry1, |
4380 | struct ext4_free_data *entry2) | 4376 | struct ext4_free_data *entry2) |
4381 | { | 4377 | { |
4382 | if ((entry1->t_tid == entry2->t_tid) && | 4378 | if ((entry1->efd_tid == entry2->efd_tid) && |
4383 | (entry1->group == entry2->group) && | 4379 | (entry1->efd_group == entry2->efd_group) && |
4384 | ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) | 4380 | ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster)) |
4385 | return 1; | 4381 | return 1; |
4386 | return 0; | 4382 | return 0; |
4387 | } | 4383 | } |
@@ -4403,8 +4399,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4403 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4399 | BUG_ON(e4b->bd_bitmap_page == NULL); |
4404 | BUG_ON(e4b->bd_buddy_page == NULL); | 4400 | BUG_ON(e4b->bd_buddy_page == NULL); |
4405 | 4401 | ||
4406 | new_node = &new_entry->node; | 4402 | new_node = &new_entry->efd_node; |
4407 | cluster = new_entry->start_cluster; | 4403 | cluster = new_entry->efd_start_cluster; |
4408 | 4404 | ||
4409 | if (!*n) { | 4405 | if (!*n) { |
4410 | /* first free block exent. We need to | 4406 | /* first free block exent. We need to |
@@ -4417,10 +4413,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4417 | } | 4413 | } |
4418 | while (*n) { | 4414 | while (*n) { |
4419 | parent = *n; | 4415 | parent = *n; |
4420 | entry = rb_entry(parent, struct ext4_free_data, node); | 4416 | entry = rb_entry(parent, struct ext4_free_data, efd_node); |
4421 | if (cluster < entry->start_cluster) | 4417 | if (cluster < entry->efd_start_cluster) |
4422 | n = &(*n)->rb_left; | 4418 | n = &(*n)->rb_left; |
4423 | else if (cluster >= (entry->start_cluster + entry->count)) | 4419 | else if (cluster >= (entry->efd_start_cluster + entry->efd_count)) |
4424 | n = &(*n)->rb_right; | 4420 | n = &(*n)->rb_right; |
4425 | else { | 4421 | else { |
4426 | ext4_grp_locked_error(sb, group, 0, | 4422 | ext4_grp_locked_error(sb, group, 0, |
@@ -4437,34 +4433,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4437 | /* Now try to see the extent can be merged to left and right */ | 4433 | /* Now try to see the extent can be merged to left and right */ |
4438 | node = rb_prev(new_node); | 4434 | node = rb_prev(new_node); |
4439 | if (node) { | 4435 | if (node) { |
4440 | entry = rb_entry(node, struct ext4_free_data, node); | 4436 | entry = rb_entry(node, struct ext4_free_data, efd_node); |
4441 | if (can_merge(entry, new_entry)) { | 4437 | if (can_merge(entry, new_entry)) { |
4442 | new_entry->start_cluster = entry->start_cluster; | 4438 | new_entry->efd_start_cluster = entry->efd_start_cluster; |
4443 | new_entry->count += entry->count; | 4439 | new_entry->efd_count += entry->efd_count; |
4444 | rb_erase(node, &(db->bb_free_root)); | 4440 | rb_erase(node, &(db->bb_free_root)); |
4445 | spin_lock(&sbi->s_md_lock); | 4441 | ext4_journal_callback_del(handle, &entry->efd_jce); |
4446 | list_del(&entry->list); | 4442 | kmem_cache_free(ext4_free_data_cachep, entry); |
4447 | spin_unlock(&sbi->s_md_lock); | ||
4448 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4449 | } | 4443 | } |
4450 | } | 4444 | } |
4451 | 4445 | ||
4452 | node = rb_next(new_node); | 4446 | node = rb_next(new_node); |
4453 | if (node) { | 4447 | if (node) { |
4454 | entry = rb_entry(node, struct ext4_free_data, node); | 4448 | entry = rb_entry(node, struct ext4_free_data, efd_node); |
4455 | if (can_merge(new_entry, entry)) { | 4449 | if (can_merge(new_entry, entry)) { |
4456 | new_entry->count += entry->count; | 4450 | new_entry->efd_count += entry->efd_count; |
4457 | rb_erase(node, &(db->bb_free_root)); | 4451 | rb_erase(node, &(db->bb_free_root)); |
4458 | spin_lock(&sbi->s_md_lock); | 4452 | ext4_journal_callback_del(handle, &entry->efd_jce); |
4459 | list_del(&entry->list); | 4453 | kmem_cache_free(ext4_free_data_cachep, entry); |
4460 | spin_unlock(&sbi->s_md_lock); | ||
4461 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4462 | } | 4454 | } |
4463 | } | 4455 | } |
4464 | /* Add the extent to transaction's private list */ | 4456 | /* Add the extent to transaction's private list */ |
4465 | spin_lock(&sbi->s_md_lock); | 4457 | ext4_journal_callback_add(handle, ext4_free_data_callback, |
4466 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); | 4458 | &new_entry->efd_jce); |
4467 | spin_unlock(&sbi->s_md_lock); | ||
4468 | return 0; | 4459 | return 0; |
4469 | } | 4460 | } |
4470 | 4461 | ||
@@ -4642,15 +4633,15 @@ do_more: | |||
4642 | * blocks being freed are metadata. these blocks shouldn't | 4633 | * blocks being freed are metadata. these blocks shouldn't |
4643 | * be used until this transaction is committed | 4634 | * be used until this transaction is committed |
4644 | */ | 4635 | */ |
4645 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | 4636 | new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); |
4646 | if (!new_entry) { | 4637 | if (!new_entry) { |
4647 | err = -ENOMEM; | 4638 | err = -ENOMEM; |
4648 | goto error_return; | 4639 | goto error_return; |
4649 | } | 4640 | } |
4650 | new_entry->start_cluster = bit; | 4641 | new_entry->efd_start_cluster = bit; |
4651 | new_entry->group = block_group; | 4642 | new_entry->efd_group = block_group; |
4652 | new_entry->count = count_clusters; | 4643 | new_entry->efd_count = count_clusters; |
4653 | new_entry->t_tid = handle->h_transaction->t_tid; | 4644 | new_entry->efd_tid = handle->h_transaction->t_tid; |
4654 | 4645 | ||
4655 | ext4_lock_group(sb, block_group); | 4646 | ext4_lock_group(sb, block_group); |
4656 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); | 4647 | mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); |