From 688f05a01983711a4e715b1d6e15a89a89c96a66 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 13 Oct 2008 12:14:14 -0400 Subject: ext4: Free ext4_prealloc_space using kmem_cache_free We should use kmem_cache_free to free memory allocated via kmem_cache_alloc Signed-off-by: Aneesh Kumar K.V Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b580714f0d85..154f8dec97ea 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); list_del(&pa->pa_group_list); count++; - kfree(pa); + kmem_cache_free(ext4_pspace_cachep, pa); } if (count) mb_debug("mballoc: %u PAs left\n", count); -- cgit v1.2.2 From c894058d66637c7720569fbe12957f4de64d9991 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 16 Oct 2008 10:14:27 -0400 Subject: ext4: Use an rbtree for tracking blocks freed during transaction. With this patch we track the block freed during a transaction using red-black tree. We also make sure contiguous blocks freed are collected in one node in the tree. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 184 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 117 insertions(+), 67 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 154f8dec97ea..bd9b011941a2 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, } INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); + meta_group_info[i]->bb_free_root.rb_node = NULL;; #ifdef DOUBLE_CHECK { @@ -2647,13 +2648,11 @@ int ext4_mb_release(struct super_block *sb) static noinline_for_stack void ext4_mb_free_committed_blocks(struct super_block *sb) { - struct ext4_sb_info *sbi = EXT4_SB(sb); - int err; - int i; - int count = 0; - int count2 = 0; - struct ext4_free_metadata *md; struct ext4_buddy e4b; + struct ext4_group_info *db; + struct ext4_sb_info *sbi = EXT4_SB(sb); + int err, count = 0, count2 = 0; + struct ext4_free_data *entry; if (list_empty(&sbi->s_committed_transaction)) return; @@ -2661,44 +2660,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb) /* there is committed blocks to be freed yet */ do { /* get next array of blocks */ - md = NULL; + entry = NULL; spin_lock(&sbi->s_md_lock); if (!list_empty(&sbi->s_committed_transaction)) { - md = list_entry(sbi->s_committed_transaction.next, - struct ext4_free_metadata, list); - list_del(&md->list); + entry = list_entry(sbi->s_committed_transaction.next, + struct ext4_free_data, list); + list_del(&entry->list); } spin_unlock(&sbi->s_md_lock); - if (md == NULL) + if (entry == NULL) break; mb_debug("gonna free %u blocks in group %lu (0x%p):", - md->num, md->group, md); + entry->count, entry->group, entry); - err = ext4_mb_load_buddy(sb, md->group, &e4b); + err = ext4_mb_load_buddy(sb, entry->group, &e4b); /* we expect to find existing buddy because it's pinned */ BUG_ON(err != 0); + db = e4b.bd_info; /* there are blocks to put in buddy to make them really free */ - count += md->num; + count += entry->count; count2++; - ext4_lock_group(sb, md->group); - for (i = 0; i < md->num; i++) { - mb_debug(" %u", md->blocks[i]); - mb_free_blocks(NULL, &e4b, md->blocks[i], 1); + ext4_lock_group(sb, entry->group); + /* Take it out of per group rb tree */ + rb_erase(&entry->node, &(db->bb_free_root)); + mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); + + if (!db->bb_free_root.rb_node) { + /* No more items in the per group rb tree + * balance refcounts from ext4_mb_free_metadata() + */ + page_cache_release(e4b.bd_buddy_page); + page_cache_release(e4b.bd_bitmap_page); } - mb_debug("\n"); - ext4_unlock_group(sb, md->group); - - /* balance refcounts from ext4_mb_free_metadata() */ - page_cache_release(e4b.bd_buddy_page); - page_cache_release(e4b.bd_bitmap_page); + ext4_unlock_group(sb, entry->group); - kfree(md); + kmem_cache_free(ext4_free_ext_cachep, entry); ext4_mb_release_desc(&e4b); - - } while (md); + } while (1); mb_debug("freed %u blocks in %u structures\n", count, count2); } @@ -2771,6 +2772,16 @@ int __init init_ext4_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); return -ENOMEM; } + + ext4_free_ext_cachep = + kmem_cache_create("ext4_free_block_extents", + sizeof(struct ext4_free_data), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + if (ext4_free_ext_cachep == NULL) { + kmem_cache_destroy(ext4_pspace_cachep); + kmem_cache_destroy(ext4_ac_cachep); + return -ENOMEM; + } return 0; } @@ -2779,6 +2790,7 @@ void exit_ext4_mballoc(void) /* XXX: synchronize_rcu(); */ kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); + kmem_cache_destroy(ext4_free_ext_cachep); } @@ -4415,6 +4427,21 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb, ext4_mb_free_committed_blocks(sb); } +/* + * We can merge two free data extents only if the physical blocks + * are contiguous, AND the extents were freed by the same transaction, + * AND the blocks are associated with the same group. + */ +static int can_merge(struct ext4_free_data *entry1, + struct ext4_free_data *entry2) +{ + if ((entry1->t_tid == entry2->t_tid) && + (entry1->group == entry2->group) && + ((entry1->start_blk + entry1->count) == entry2->start_blk)) + return 1; + return 0; +} + static noinline_for_stack int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ext4_group_t group, ext4_grpblk_t block, int count) @@ -4422,57 +4449,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, struct ext4_group_info *db = e4b->bd_info; struct super_block *sb = e4b->bd_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_free_metadata *md; - int i; + struct ext4_free_data *entry, *new_entry; + struct rb_node **n = &db->bb_free_root.rb_node, *node; + struct rb_node *parent = NULL, *new_node; + BUG_ON(e4b->bd_bitmap_page == NULL); BUG_ON(e4b->bd_buddy_page == NULL); + new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); + new_entry->start_blk = block; + new_entry->group = group; + new_entry->count = count; + new_entry->t_tid = handle->h_transaction->t_tid; + new_node = &new_entry->node; + ext4_lock_group(sb, group); - for (i = 0; i < count; i++) { - md = db->bb_md_cur; - if (md && db->bb_tid != handle->h_transaction->t_tid) { - db->bb_md_cur = NULL; - md = NULL; + if (!*n) { + /* first free block exent. We need to + protect buddy cache from being freed, + * otherwise we'll refresh it from + * on-disk bitmap and lose not-yet-available + * blocks */ + page_cache_get(e4b->bd_buddy_page); + page_cache_get(e4b->bd_bitmap_page); + } + while (*n) { + parent = *n; + entry = rb_entry(parent, struct ext4_free_data, node); + if (block < entry->start_blk) + n = &(*n)->rb_left; + else if (block >= (entry->start_blk + entry->count)) + n = &(*n)->rb_right; + else { + ext4_error(sb, __func__, + "Double free of blocks %d (%d %d)\n", + block, entry->start_blk, entry->count); + return 0; } + } - if (md == NULL) { - ext4_unlock_group(sb, group); - md = kmalloc(sizeof(*md), GFP_NOFS); - if (md == NULL) - return -ENOMEM; - md->num = 0; - md->group = group; - - ext4_lock_group(sb, group); - if (db->bb_md_cur == NULL) { - spin_lock(&sbi->s_md_lock); - list_add(&md->list, &sbi->s_active_transaction); - spin_unlock(&sbi->s_md_lock); - /* protect buddy cache from being freed, - * otherwise we'll refresh it from - * on-disk bitmap and lose not-yet-available - * blocks */ - page_cache_get(e4b->bd_buddy_page); - page_cache_get(e4b->bd_bitmap_page); - db->bb_md_cur = md; - db->bb_tid = handle->h_transaction->t_tid; - mb_debug("new md 0x%p for group %lu\n", - md, md->group); - } else { - kfree(md); - md = db->bb_md_cur; - } + rb_link_node(new_node, parent, n); + rb_insert_color(new_node, &db->bb_free_root); + + /* Now try to see the extent can be merged to left and right */ + node = rb_prev(new_node); + if (node) { + entry = rb_entry(node, struct ext4_free_data, node); + if (can_merge(entry, new_entry)) { + new_entry->start_blk = entry->start_blk; + new_entry->count += entry->count; + rb_erase(node, &(db->bb_free_root)); + spin_lock(&sbi->s_md_lock); + list_del(&entry->list); + spin_unlock(&sbi->s_md_lock); + kmem_cache_free(ext4_free_ext_cachep, entry); } + } - BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); - md->blocks[md->num] = block + i; - md->num++; - if (md->num == EXT4_BB_MAX_BLOCKS) { - /* no more space, put full container on a sb's list */ - db->bb_md_cur = NULL; + node = rb_next(new_node); + if (node) { + entry = rb_entry(node, struct ext4_free_data, node); + if (can_merge(new_entry, entry)) { + new_entry->count += entry->count; + rb_erase(node, &(db->bb_free_root)); + spin_lock(&sbi->s_md_lock); + list_del(&entry->list); + spin_unlock(&sbi->s_md_lock); + kmem_cache_free(ext4_free_ext_cachep, entry); } } + /* Add the extent to active_transaction list */ + spin_lock(&sbi->s_md_lock); + list_add(&new_entry->list, &sbi->s_active_transaction); + spin_unlock(&sbi->s_md_lock); ext4_unlock_group(sb, group); return 0; } -- cgit v1.2.2 From 8a0aba733db1adb5e1f0e828889a18f4c1c512de Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 16 Oct 2008 10:06:27 -0400 Subject: ext4: let the block device know when unused blocks can be discarded Let the block device know when unused blocks can be discarded, using the new sb_issue_discard() interface. Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bd9b011941a2..815a22ea6078 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2653,6 +2653,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int err, count = 0, count2 = 0; struct ext4_free_data *entry; + ext4_fsblk_t discard_block; if (list_empty(&sbi->s_committed_transaction)) return; @@ -2696,6 +2697,12 @@ ext4_mb_free_committed_blocks(struct super_block *sb) page_cache_release(e4b.bd_bitmap_page); } ext4_unlock_group(sb, entry->group); + discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) + + entry->start_blk + + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); + trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id, + (unsigned long long) discard_block, entry->count); + sb_issue_discard(sb, discard_block, entry->count); kmem_cache_free(ext4_free_ext_cachep, entry); ext4_mb_release_desc(&e4b); -- cgit v1.2.2 From 0b09923eabd92f11a8b272dd3fd0347332d0e1e2 Mon Sep 17 00:00:00 2001 From: Manish Katiyar Date: Fri, 17 Oct 2008 14:58:45 -0400 Subject: ext4: Remove compile warnings when building w/o CONFIG_PROC_FS Signed-off-by: Manish Katiyar Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 815a22ea6078..da1da1fe2b14 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2720,6 +2720,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb) static int ext4_mb_init_per_dev_proc(struct super_block *sb) { +#ifdef CONFIG_PROC_FS mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; struct ext4_sb_info *sbi = EXT4_SB(sb); struct proc_dir_entry *proc; @@ -2743,10 +2744,14 @@ err_out: remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); return -ENOMEM; +#else + return 0; +#endif } static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) { +#ifdef CONFIG_PROC_FS struct ext4_sb_info *sbi = EXT4_SB(sb); if (sbi->s_proc == NULL) @@ -2758,7 +2763,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); - +#endif return 0; } -- cgit v1.2.2 From 3e624fc72fba09b6f999a9fbb87b64efccd38036 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 16 Oct 2008 20:00:24 -0400 Subject: ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback The multiblock allocator needs to be able to release blocks (and issue a blkdev discard request) when the transaction which freed those blocks is committed. Previously this was done via a polling mechanism when blocks are allocated or freed. A much better way of doing things is to create a jbd2 callback function and attaching the list of blocks to be freed directly to the transaction structure. Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 85 ++++++++++--------------------------------------------- 1 file changed, 15 insertions(+), 70 deletions(-) (limited to 'fs/ext4/mballoc.c') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index da1da1fe2b14..dfe17a134052 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2523,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) } spin_lock_init(&sbi->s_md_lock); - INIT_LIST_HEAD(&sbi->s_active_transaction); - INIT_LIST_HEAD(&sbi->s_closed_transaction); - INIT_LIST_HEAD(&sbi->s_committed_transaction); spin_lock_init(&sbi->s_bal_lock); sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; @@ -2554,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) ext4_mb_init_per_dev_proc(sb); ext4_mb_history_init(sb); + sbi->s_journal->j_commit_callback = release_blocks_on_commit; + printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); return 0; } @@ -2583,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb) struct ext4_group_info *grinfo; struct ext4_sb_info *sbi = EXT4_SB(sb); - /* release freed, non-committed blocks */ - spin_lock(&sbi->s_md_lock); - list_splice_init(&sbi->s_closed_transaction, - &sbi->s_committed_transaction); - list_splice_init(&sbi->s_active_transaction, - &sbi->s_committed_transaction); - spin_unlock(&sbi->s_md_lock); - ext4_mb_free_committed_blocks(sb); - if (sbi->s_group_info) { for (i = 0; i < sbi->s_groups_count; i++) { grinfo = ext4_get_group_info(sb, i); @@ -2645,36 +2635,25 @@ int ext4_mb_release(struct super_block *sb) return 0; } -static noinline_for_stack void -ext4_mb_free_committed_blocks(struct super_block *sb) +/* + * This function is called by the jbd2 layer once the commit has finished, + * so we know we can free the blocks that were released with that commit. + */ +static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) { + struct super_block *sb = journal->j_private; struct ext4_buddy e4b; struct ext4_group_info *db; - struct ext4_sb_info *sbi = EXT4_SB(sb); int err, count = 0, count2 = 0; struct ext4_free_data *entry; ext4_fsblk_t discard_block; + struct list_head *l, *ltmp; - if (list_empty(&sbi->s_committed_transaction)) - return; - - /* there is committed blocks to be freed yet */ - do { - /* get next array of blocks */ - entry = NULL; - spin_lock(&sbi->s_md_lock); - if (!list_empty(&sbi->s_committed_transaction)) { - entry = list_entry(sbi->s_committed_transaction.next, - struct ext4_free_data, list); - list_del(&entry->list); - } - spin_unlock(&sbi->s_md_lock); - - if (entry == NULL) - break; + list_for_each_safe(l, ltmp, &txn->t_private_list) { + entry = list_entry(l, struct ext4_free_data, list); mb_debug("gonna free %u blocks in group %lu (0x%p):", - entry->count, entry->group, entry); + entry->count, entry->group, entry); err = ext4_mb_load_buddy(sb, entry->group, &e4b); /* we expect to find existing buddy because it's pinned */ @@ -2706,7 +2685,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb) kmem_cache_free(ext4_free_ext_cachep, entry); ext4_mb_release_desc(&e4b); - } while (1); + } mb_debug("freed %u blocks in %u structures\n", count, count2); } @@ -4348,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, goto out1; } - ext4_mb_poll_new_transaction(sb, handle); - *errp = ext4_mb_initialize_context(ac, ar); if (*errp) { ar->len = 0; @@ -4408,36 +4385,6 @@ out1: return block; } -static void ext4_mb_poll_new_transaction(struct super_block *sb, - handle_t *handle) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (sbi->s_last_transaction == handle->h_transaction->t_tid) - return; - - /* new transaction! time to close last one and free blocks for - * committed transaction. we know that only transaction can be - * active, so previos transaction can be being logged and we - * know that transaction before previous is known to be already - * logged. this means that now we may free blocks freed in all - * transactions before previous one. hope I'm clear enough ... */ - - spin_lock(&sbi->s_md_lock); - if (sbi->s_last_transaction != handle->h_transaction->t_tid) { - mb_debug("new transaction %lu, old %lu\n", - (unsigned long) handle->h_transaction->t_tid, - (unsigned long) sbi->s_last_transaction); - list_splice_init(&sbi->s_closed_transaction, - &sbi->s_committed_transaction); - list_splice_init(&sbi->s_active_transaction, - &sbi->s_closed_transaction); - sbi->s_last_transaction = handle->h_transaction->t_tid; - } - spin_unlock(&sbi->s_md_lock); - - ext4_mb_free_committed_blocks(sb); -} /* * We can merge two free data extents only if the physical blocks @@ -4531,9 +4478,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, kmem_cache_free(ext4_free_ext_cachep, entry); } } - /* Add the extent to active_transaction list */ + /* Add the extent to transaction's private list */ spin_lock(&sbi->s_md_lock); - list_add(&new_entry->list, &sbi->s_active_transaction); + list_add(&new_entry->list, &handle->h_transaction->t_private_list); spin_unlock(&sbi->s_md_lock); ext4_unlock_group(sb, group); return 0; @@ -4562,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, *freed = 0; - ext4_mb_poll_new_transaction(sb, handle); - sbi = EXT4_SB(sb); es = EXT4_SB(sb)->s_es; if (block < le32_to_cpu(es->s_first_data_block) || -- cgit v1.2.2