aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
authorBobi Jam <bobijam@whamcloud.com>2012-02-20 17:53:02 -0500
committerTheodore Ts'o <tytso@mit.edu>2012-02-20 17:53:02 -0500
commit18aadd47f88464928b5ce57791c2e8f9f2aaece0 (patch)
treebb4f32855484a6dd6081c33fae1564359a88af74 /fs/ext4/mballoc.c
parent15291164b22a357cb211b618adfef4fa82fc0de3 (diff)
ext4: expand commit callback and
The per-commit callback was used by mballoc code to manage free space bitmaps after deleted blocks have been released. This patch expands it to support multiple different callbacks, to allow other things to be done after the commit has been completed. Signed-off-by: Bobi Jam <bobijam@whamcloud.com> Signed-off-by: Andreas Dilger <adilger@whamcloud.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c151
1 files changed, 71 insertions, 80 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 545fa0256606..2e467718d413 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,6 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include "ext4_jbd2.h"
24#include "mballoc.h" 25#include "mballoc.h"
25#include <linux/debugfs.h> 26#include <linux/debugfs.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
@@ -339,7 +340,7 @@
339 */ 340 */
340static struct kmem_cache *ext4_pspace_cachep; 341static struct kmem_cache *ext4_pspace_cachep;
341static struct kmem_cache *ext4_ac_cachep; 342static struct kmem_cache *ext4_ac_cachep;
342static struct kmem_cache *ext4_free_ext_cachep; 343static struct kmem_cache *ext4_free_data_cachep;
343 344
344/* We create slab caches for groupinfo data structures based on the 345/* We create slab caches for groupinfo data structures based on the
345 * superblock block size. There will be one per mounted filesystem for 346 * superblock block size. There will be one per mounted filesystem for
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
357 ext4_group_t group); 358 ext4_group_t group);
358static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 359static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
359 ext4_group_t group); 360 ext4_group_t group);
360static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 361static void ext4_free_data_callback(struct super_block *sb,
362 struct ext4_journal_cb_entry *jce, int rc);
361 363
362static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 364static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
363{ 365{
@@ -2473,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2473 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2475 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2474 &ext4_mb_seq_groups_fops, sb); 2476 &ext4_mb_seq_groups_fops, sb);
2475 2477
2476 if (sbi->s_journal)
2477 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2478
2479 return 0; 2478 return 0;
2480 2479
2481out_free_locality_groups: 2480out_free_locality_groups:
@@ -2588,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb,
2588 * This function is called by the jbd2 layer once the commit has finished, 2587 * This function is called by the jbd2 layer once the commit has finished,
2589 * so we know we can free the blocks that were released with that commit. 2588 * so we know we can free the blocks that were released with that commit.
2590 */ 2589 */
2591static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) 2590static void ext4_free_data_callback(struct super_block *sb,
2591 struct ext4_journal_cb_entry *jce,
2592 int rc)
2592{ 2593{
2593 struct super_block *sb = journal->j_private; 2594 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2594 struct ext4_buddy e4b; 2595 struct ext4_buddy e4b;
2595 struct ext4_group_info *db; 2596 struct ext4_group_info *db;
2596 int err, count = 0, count2 = 0; 2597 int err, count = 0, count2 = 0;
2597 struct ext4_free_data *entry;
2598 struct list_head *l, *ltmp;
2599 2598
2600 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2599 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2601 entry = list_entry(l, struct ext4_free_data, list); 2600 entry->efd_count, entry->efd_group, entry);
2602 2601
2603 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2602 if (test_opt(sb, DISCARD))
2604 entry->count, entry->group, entry); 2603 ext4_issue_discard(sb, entry->efd_group,
2604 entry->efd_start_cluster, entry->efd_count);
2605 2605
2606 if (test_opt(sb, DISCARD)) 2606 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2607 ext4_issue_discard(sb, entry->group, 2607 /* we expect to find existing buddy because it's pinned */
2608 entry->start_cluster, entry->count); 2608 BUG_ON(err != 0);
2609 2609
2610 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2611 /* we expect to find existing buddy because it's pinned */
2612 BUG_ON(err != 0);
2613 2610
2614 db = e4b.bd_info; 2611 db = e4b.bd_info;
2615 /* there are blocks to put in buddy to make them really free */ 2612 /* there are blocks to put in buddy to make them really free */
2616 count += entry->count; 2613 count += entry->efd_count;
2617 count2++; 2614 count2++;
2618 ext4_lock_group(sb, entry->group); 2615 ext4_lock_group(sb, entry->efd_group);
2619 /* Take it out of per group rb tree */ 2616 /* Take it out of per group rb tree */
2620 rb_erase(&entry->node, &(db->bb_free_root)); 2617 rb_erase(&entry->efd_node, &(db->bb_free_root));
2621 mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); 2618 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2622 2619
2623 /* 2620 /*
2624 * Clear the trimmed flag for the group so that the next 2621 * Clear the trimmed flag for the group so that the next
2625 * ext4_trim_fs can trim it. 2622 * ext4_trim_fs can trim it.
2626 * If the volume is mounted with -o discard, online discard 2623 * If the volume is mounted with -o discard, online discard
2627 * is supported and the free blocks will be trimmed online. 2624 * is supported and the free blocks will be trimmed online.
2628 */ 2625 */
2629 if (!test_opt(sb, DISCARD)) 2626 if (!test_opt(sb, DISCARD))
2630 EXT4_MB_GRP_CLEAR_TRIMMED(db); 2627 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2631 2628
2632 if (!db->bb_free_root.rb_node) { 2629 if (!db->bb_free_root.rb_node) {
2633 /* No more items in the per group rb tree 2630 /* No more items in the per group rb tree
2634 * balance refcounts from ext4_mb_free_metadata() 2631 * balance refcounts from ext4_mb_free_metadata()
2635 */ 2632 */
2636 page_cache_release(e4b.bd_buddy_page); 2633 page_cache_release(e4b.bd_buddy_page);
2637 page_cache_release(e4b.bd_bitmap_page); 2634 page_cache_release(e4b.bd_bitmap_page);
2638 }
2639 ext4_unlock_group(sb, entry->group);
2640 kmem_cache_free(ext4_free_ext_cachep, entry);
2641 ext4_mb_unload_buddy(&e4b);
2642 } 2635 }
2636 ext4_unlock_group(sb, entry->efd_group);
2637 kmem_cache_free(ext4_free_data_cachep, entry);
2638 ext4_mb_unload_buddy(&e4b);
2643 2639
2644 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2640 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2645} 2641}
@@ -2692,9 +2688,9 @@ int __init ext4_init_mballoc(void)
2692 return -ENOMEM; 2688 return -ENOMEM;
2693 } 2689 }
2694 2690
2695 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, 2691 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2696 SLAB_RECLAIM_ACCOUNT); 2692 SLAB_RECLAIM_ACCOUNT);
2697 if (ext4_free_ext_cachep == NULL) { 2693 if (ext4_free_data_cachep == NULL) {
2698 kmem_cache_destroy(ext4_pspace_cachep); 2694 kmem_cache_destroy(ext4_pspace_cachep);
2699 kmem_cache_destroy(ext4_ac_cachep); 2695 kmem_cache_destroy(ext4_ac_cachep);
2700 return -ENOMEM; 2696 return -ENOMEM;
@@ -2712,7 +2708,7 @@ void ext4_exit_mballoc(void)
2712 rcu_barrier(); 2708 rcu_barrier();
2713 kmem_cache_destroy(ext4_pspace_cachep); 2709 kmem_cache_destroy(ext4_pspace_cachep);
2714 kmem_cache_destroy(ext4_ac_cachep); 2710 kmem_cache_destroy(ext4_ac_cachep);
2715 kmem_cache_destroy(ext4_free_ext_cachep); 2711 kmem_cache_destroy(ext4_free_data_cachep);
2716 ext4_groupinfo_destroy_slabs(); 2712 ext4_groupinfo_destroy_slabs();
2717 ext4_remove_debugfs_entry(); 2713 ext4_remove_debugfs_entry();
2718} 2714}
@@ -3272,8 +3268,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3272 n = rb_first(&(grp->bb_free_root)); 3268 n = rb_first(&(grp->bb_free_root));
3273 3269
3274 while (n) { 3270 while (n) {
3275 entry = rb_entry(n, struct ext4_free_data, node); 3271 entry = rb_entry(n, struct ext4_free_data, efd_node);
3276 ext4_set_bits(bitmap, entry->start_cluster, entry->count); 3272 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3277 n = rb_next(n); 3273 n = rb_next(n);
3278 } 3274 }
3279 return; 3275 return;
@@ -4379,9 +4375,9 @@ out:
4379static int can_merge(struct ext4_free_data *entry1, 4375static int can_merge(struct ext4_free_data *entry1,
4380 struct ext4_free_data *entry2) 4376 struct ext4_free_data *entry2)
4381{ 4377{
4382 if ((entry1->t_tid == entry2->t_tid) && 4378 if ((entry1->efd_tid == entry2->efd_tid) &&
4383 (entry1->group == entry2->group) && 4379 (entry1->efd_group == entry2->efd_group) &&
4384 ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) 4380 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
4385 return 1; 4381 return 1;
4386 return 0; 4382 return 0;
4387} 4383}
@@ -4403,8 +4399,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4403 BUG_ON(e4b->bd_bitmap_page == NULL); 4399 BUG_ON(e4b->bd_bitmap_page == NULL);
4404 BUG_ON(e4b->bd_buddy_page == NULL); 4400 BUG_ON(e4b->bd_buddy_page == NULL);
4405 4401
4406 new_node = &new_entry->node; 4402 new_node = &new_entry->efd_node;
4407 cluster = new_entry->start_cluster; 4403 cluster = new_entry->efd_start_cluster;
4408 4404
4409 if (!*n) { 4405 if (!*n) {
4410 /* first free block exent. We need to 4406 /* first free block exent. We need to
@@ -4417,10 +4413,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4417 } 4413 }
4418 while (*n) { 4414 while (*n) {
4419 parent = *n; 4415 parent = *n;
4420 entry = rb_entry(parent, struct ext4_free_data, node); 4416 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4421 if (cluster < entry->start_cluster) 4417 if (cluster < entry->efd_start_cluster)
4422 n = &(*n)->rb_left; 4418 n = &(*n)->rb_left;
4423 else if (cluster >= (entry->start_cluster + entry->count)) 4419 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4424 n = &(*n)->rb_right; 4420 n = &(*n)->rb_right;
4425 else { 4421 else {
4426 ext4_grp_locked_error(sb, group, 0, 4422 ext4_grp_locked_error(sb, group, 0,
@@ -4437,34 +4433,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4437 /* Now try to see the extent can be merged to left and right */ 4433 /* Now try to see the extent can be merged to left and right */
4438 node = rb_prev(new_node); 4434 node = rb_prev(new_node);
4439 if (node) { 4435 if (node) {
4440 entry = rb_entry(node, struct ext4_free_data, node); 4436 entry = rb_entry(node, struct ext4_free_data, efd_node);
4441 if (can_merge(entry, new_entry)) { 4437 if (can_merge(entry, new_entry)) {
4442 new_entry->start_cluster = entry->start_cluster; 4438 new_entry->efd_start_cluster = entry->efd_start_cluster;
4443 new_entry->count += entry->count; 4439 new_entry->efd_count += entry->efd_count;
4444 rb_erase(node, &(db->bb_free_root)); 4440 rb_erase(node, &(db->bb_free_root));
4445 spin_lock(&sbi->s_md_lock); 4441 ext4_journal_callback_del(handle, &entry->efd_jce);
4446 list_del(&entry->list); 4442 kmem_cache_free(ext4_free_data_cachep, entry);
4447 spin_unlock(&sbi->s_md_lock);
4448 kmem_cache_free(ext4_free_ext_cachep, entry);
4449 } 4443 }
4450 } 4444 }
4451 4445
4452 node = rb_next(new_node); 4446 node = rb_next(new_node);
4453 if (node) { 4447 if (node) {
4454 entry = rb_entry(node, struct ext4_free_data, node); 4448 entry = rb_entry(node, struct ext4_free_data, efd_node);
4455 if (can_merge(new_entry, entry)) { 4449 if (can_merge(new_entry, entry)) {
4456 new_entry->count += entry->count; 4450 new_entry->efd_count += entry->efd_count;
4457 rb_erase(node, &(db->bb_free_root)); 4451 rb_erase(node, &(db->bb_free_root));
4458 spin_lock(&sbi->s_md_lock); 4452 ext4_journal_callback_del(handle, &entry->efd_jce);
4459 list_del(&entry->list); 4453 kmem_cache_free(ext4_free_data_cachep, entry);
4460 spin_unlock(&sbi->s_md_lock);
4461 kmem_cache_free(ext4_free_ext_cachep, entry);
4462 } 4454 }
4463 } 4455 }
4464 /* Add the extent to transaction's private list */ 4456 /* Add the extent to transaction's private list */
4465 spin_lock(&sbi->s_md_lock); 4457 ext4_journal_callback_add(handle, ext4_free_data_callback,
4466 list_add(&new_entry->list, &handle->h_transaction->t_private_list); 4458 &new_entry->efd_jce);
4467 spin_unlock(&sbi->s_md_lock);
4468 return 0; 4459 return 0;
4469} 4460}
4470 4461
@@ -4642,15 +4633,15 @@ do_more:
4642 * blocks being freed are metadata. these blocks shouldn't 4633 * blocks being freed are metadata. these blocks shouldn't
4643 * be used until this transaction is committed 4634 * be used until this transaction is committed
4644 */ 4635 */
4645 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); 4636 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
4646 if (!new_entry) { 4637 if (!new_entry) {
4647 err = -ENOMEM; 4638 err = -ENOMEM;
4648 goto error_return; 4639 goto error_return;
4649 } 4640 }
4650 new_entry->start_cluster = bit; 4641 new_entry->efd_start_cluster = bit;
4651 new_entry->group = block_group; 4642 new_entry->efd_group = block_group;
4652 new_entry->count = count_clusters; 4643 new_entry->efd_count = count_clusters;
4653 new_entry->t_tid = handle->h_transaction->t_tid; 4644 new_entry->efd_tid = handle->h_transaction->t_tid;
4654 4645
4655 ext4_lock_group(sb, block_group); 4646 ext4_lock_group(sb, block_group);
4656 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); 4647 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);