aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c263
1 files changed, 135 insertions, 128 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b580714f0d85..dfe17a134052 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2300 } 2300 }
2301 2301
2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2303 meta_group_info[i]->bb_free_root.rb_node = NULL;;
2303 2304
2304#ifdef DOUBLE_CHECK 2305#ifdef DOUBLE_CHECK
2305 { 2306 {
@@ -2522,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2522 } 2523 }
2523 2524
2524 spin_lock_init(&sbi->s_md_lock); 2525 spin_lock_init(&sbi->s_md_lock);
2525 INIT_LIST_HEAD(&sbi->s_active_transaction);
2526 INIT_LIST_HEAD(&sbi->s_closed_transaction);
2527 INIT_LIST_HEAD(&sbi->s_committed_transaction);
2528 spin_lock_init(&sbi->s_bal_lock); 2526 spin_lock_init(&sbi->s_bal_lock);
2529 2527
2530 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; 2528 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
@@ -2553,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2553 ext4_mb_init_per_dev_proc(sb); 2551 ext4_mb_init_per_dev_proc(sb);
2554 ext4_mb_history_init(sb); 2552 ext4_mb_history_init(sb);
2555 2553
2554 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2555
2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); 2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2557 return 0; 2557 return 0;
2558} 2558}
@@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2568 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); 2568 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2569 list_del(&pa->pa_group_list); 2569 list_del(&pa->pa_group_list);
2570 count++; 2570 count++;
2571 kfree(pa); 2571 kmem_cache_free(ext4_pspace_cachep, pa);
2572 } 2572 }
2573 if (count) 2573 if (count)
2574 mb_debug("mballoc: %u PAs left\n", count); 2574 mb_debug("mballoc: %u PAs left\n", count);
@@ -2582,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
2582 struct ext4_group_info *grinfo; 2582 struct ext4_group_info *grinfo;
2583 struct ext4_sb_info *sbi = EXT4_SB(sb); 2583 struct ext4_sb_info *sbi = EXT4_SB(sb);
2584 2584
2585 /* release freed, non-committed blocks */
2586 spin_lock(&sbi->s_md_lock);
2587 list_splice_init(&sbi->s_closed_transaction,
2588 &sbi->s_committed_transaction);
2589 list_splice_init(&sbi->s_active_transaction,
2590 &sbi->s_committed_transaction);
2591 spin_unlock(&sbi->s_md_lock);
2592 ext4_mb_free_committed_blocks(sb);
2593
2594 if (sbi->s_group_info) { 2585 if (sbi->s_group_info) {
2595 for (i = 0; i < sbi->s_groups_count; i++) { 2586 for (i = 0; i < sbi->s_groups_count; i++) {
2596 grinfo = ext4_get_group_info(sb, i); 2587 grinfo = ext4_get_group_info(sb, i);
@@ -2644,61 +2635,57 @@ int ext4_mb_release(struct super_block *sb)
2644 return 0; 2635 return 0;
2645} 2636}
2646 2637
2647static noinline_for_stack void 2638/*
2648ext4_mb_free_committed_blocks(struct super_block *sb) 2639 * This function is called by the jbd2 layer once the commit has finished,
2640 * so we know we can free the blocks that were released with that commit.
2641 */
2642static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2649{ 2643{
2650 struct ext4_sb_info *sbi = EXT4_SB(sb); 2644 struct super_block *sb = journal->j_private;
2651 int err;
2652 int i;
2653 int count = 0;
2654 int count2 = 0;
2655 struct ext4_free_metadata *md;
2656 struct ext4_buddy e4b; 2645 struct ext4_buddy e4b;
2646 struct ext4_group_info *db;
2647 int err, count = 0, count2 = 0;
2648 struct ext4_free_data *entry;
2649 ext4_fsblk_t discard_block;
2650 struct list_head *l, *ltmp;
2657 2651
2658 if (list_empty(&sbi->s_committed_transaction)) 2652 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2659 return; 2653 entry = list_entry(l, struct ext4_free_data, list);
2660
2661 /* there is committed blocks to be freed yet */
2662 do {
2663 /* get next array of blocks */
2664 md = NULL;
2665 spin_lock(&sbi->s_md_lock);
2666 if (!list_empty(&sbi->s_committed_transaction)) {
2667 md = list_entry(sbi->s_committed_transaction.next,
2668 struct ext4_free_metadata, list);
2669 list_del(&md->list);
2670 }
2671 spin_unlock(&sbi->s_md_lock);
2672
2673 if (md == NULL)
2674 break;
2675 2654
2676 mb_debug("gonna free %u blocks in group %lu (0x%p):", 2655 mb_debug("gonna free %u blocks in group %lu (0x%p):",
2677 md->num, md->group, md); 2656 entry->count, entry->group, entry);
2678 2657
2679 err = ext4_mb_load_buddy(sb, md->group, &e4b); 2658 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2680 /* we expect to find existing buddy because it's pinned */ 2659 /* we expect to find existing buddy because it's pinned */
2681 BUG_ON(err != 0); 2660 BUG_ON(err != 0);
2682 2661
2662 db = e4b.bd_info;
2683 /* there are blocks to put in buddy to make them really free */ 2663 /* there are blocks to put in buddy to make them really free */
2684 count += md->num; 2664 count += entry->count;
2685 count2++; 2665 count2++;
2686 ext4_lock_group(sb, md->group); 2666 ext4_lock_group(sb, entry->group);
2687 for (i = 0; i < md->num; i++) { 2667 /* Take it out of per group rb tree */
2688 mb_debug(" %u", md->blocks[i]); 2668 rb_erase(&entry->node, &(db->bb_free_root));
2689 mb_free_blocks(NULL, &e4b, md->blocks[i], 1); 2669 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2670
2671 if (!db->bb_free_root.rb_node) {
2672 /* No more items in the per group rb tree
2673 * balance refcounts from ext4_mb_free_metadata()
2674 */
2675 page_cache_release(e4b.bd_buddy_page);
2676 page_cache_release(e4b.bd_bitmap_page);
2690 } 2677 }
2691 mb_debug("\n"); 2678 ext4_unlock_group(sb, entry->group);
2692 ext4_unlock_group(sb, md->group); 2679 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2693 2680 + entry->start_blk
2694 /* balance refcounts from ext4_mb_free_metadata() */ 2681 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2695 page_cache_release(e4b.bd_buddy_page); 2682 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
2696 page_cache_release(e4b.bd_bitmap_page); 2683 (unsigned long long) discard_block, entry->count);
2697 2684 sb_issue_discard(sb, discard_block, entry->count);
2698 kfree(md); 2685
2686 kmem_cache_free(ext4_free_ext_cachep, entry);
2699 ext4_mb_release_desc(&e4b); 2687 ext4_mb_release_desc(&e4b);
2700 2688 }
2701 } while (md);
2702 2689
2703 mb_debug("freed %u blocks in %u structures\n", count, count2); 2690 mb_debug("freed %u blocks in %u structures\n", count, count2);
2704} 2691}
@@ -2712,6 +2699,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2712 2699
2713static int ext4_mb_init_per_dev_proc(struct super_block *sb) 2700static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2714{ 2701{
2702#ifdef CONFIG_PROC_FS
2715 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; 2703 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2716 struct ext4_sb_info *sbi = EXT4_SB(sb); 2704 struct ext4_sb_info *sbi = EXT4_SB(sb);
2717 struct proc_dir_entry *proc; 2705 struct proc_dir_entry *proc;
@@ -2735,10 +2723,14 @@ err_out:
2735 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); 2723 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2736 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); 2724 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2737 return -ENOMEM; 2725 return -ENOMEM;
2726#else
2727 return 0;
2728#endif
2738} 2729}
2739 2730
2740static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) 2731static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2741{ 2732{
2733#ifdef CONFIG_PROC_FS
2742 struct ext4_sb_info *sbi = EXT4_SB(sb); 2734 struct ext4_sb_info *sbi = EXT4_SB(sb);
2743 2735
2744 if (sbi->s_proc == NULL) 2736 if (sbi->s_proc == NULL)
@@ -2750,7 +2742,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2750 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); 2742 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2751 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); 2743 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2752 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); 2744 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2753 2745#endif
2754 return 0; 2746 return 0;
2755} 2747}
2756 2748
@@ -2771,6 +2763,16 @@ int __init init_ext4_mballoc(void)
2771 kmem_cache_destroy(ext4_pspace_cachep); 2763 kmem_cache_destroy(ext4_pspace_cachep);
2772 return -ENOMEM; 2764 return -ENOMEM;
2773 } 2765 }
2766
2767 ext4_free_ext_cachep =
2768 kmem_cache_create("ext4_free_block_extents",
2769 sizeof(struct ext4_free_data),
2770 0, SLAB_RECLAIM_ACCOUNT, NULL);
2771 if (ext4_free_ext_cachep == NULL) {
2772 kmem_cache_destroy(ext4_pspace_cachep);
2773 kmem_cache_destroy(ext4_ac_cachep);
2774 return -ENOMEM;
2775 }
2774 return 0; 2776 return 0;
2775} 2777}
2776 2778
@@ -2779,6 +2781,7 @@ void exit_ext4_mballoc(void)
2779 /* XXX: synchronize_rcu(); */ 2781 /* XXX: synchronize_rcu(); */
2780 kmem_cache_destroy(ext4_pspace_cachep); 2782 kmem_cache_destroy(ext4_pspace_cachep);
2781 kmem_cache_destroy(ext4_ac_cachep); 2783 kmem_cache_destroy(ext4_ac_cachep);
2784 kmem_cache_destroy(ext4_free_ext_cachep);
2782} 2785}
2783 2786
2784 2787
@@ -4324,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4324 goto out1; 4327 goto out1;
4325 } 4328 }
4326 4329
4327 ext4_mb_poll_new_transaction(sb, handle);
4328
4329 *errp = ext4_mb_initialize_context(ac, ar); 4330 *errp = ext4_mb_initialize_context(ac, ar);
4330 if (*errp) { 4331 if (*errp) {
4331 ar->len = 0; 4332 ar->len = 0;
@@ -4384,35 +4385,20 @@ out1:
4384 4385
4385 return block; 4386 return block;
4386} 4387}
4387static void ext4_mb_poll_new_transaction(struct super_block *sb,
4388 handle_t *handle)
4389{
4390 struct ext4_sb_info *sbi = EXT4_SB(sb);
4391
4392 if (sbi->s_last_transaction == handle->h_transaction->t_tid)
4393 return;
4394
4395 /* new transaction! time to close last one and free blocks for
4396 * committed transaction. we know that only transaction can be
4397 * active, so previos transaction can be being logged and we
4398 * know that transaction before previous is known to be already
4399 * logged. this means that now we may free blocks freed in all
4400 * transactions before previous one. hope I'm clear enough ... */
4401 4388
4402 spin_lock(&sbi->s_md_lock); 4389/*
4403 if (sbi->s_last_transaction != handle->h_transaction->t_tid) { 4390 * We can merge two free data extents only if the physical blocks
4404 mb_debug("new transaction %lu, old %lu\n", 4391 * are contiguous, AND the extents were freed by the same transaction,
4405 (unsigned long) handle->h_transaction->t_tid, 4392 * AND the blocks are associated with the same group.
4406 (unsigned long) sbi->s_last_transaction); 4393 */
4407 list_splice_init(&sbi->s_closed_transaction, 4394static int can_merge(struct ext4_free_data *entry1,
4408 &sbi->s_committed_transaction); 4395 struct ext4_free_data *entry2)
4409 list_splice_init(&sbi->s_active_transaction, 4396{
4410 &sbi->s_closed_transaction); 4397 if ((entry1->t_tid == entry2->t_tid) &&
4411 sbi->s_last_transaction = handle->h_transaction->t_tid; 4398 (entry1->group == entry2->group) &&
4412 } 4399 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4413 spin_unlock(&sbi->s_md_lock); 4400 return 1;
4414 4401 return 0;
4415 ext4_mb_free_committed_blocks(sb);
4416} 4402}
4417 4403
4418static noinline_for_stack int 4404static noinline_for_stack int
@@ -4422,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 struct ext4_group_info *db = e4b->bd_info; 4408 struct ext4_group_info *db = e4b->bd_info;
4423 struct super_block *sb = e4b->bd_sb; 4409 struct super_block *sb = e4b->bd_sb;
4424 struct ext4_sb_info *sbi = EXT4_SB(sb); 4410 struct ext4_sb_info *sbi = EXT4_SB(sb);
4425 struct ext4_free_metadata *md; 4411 struct ext4_free_data *entry, *new_entry;
4426 int i; 4412 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4413 struct rb_node *parent = NULL, *new_node;
4414
4427 4415
4428 BUG_ON(e4b->bd_bitmap_page == NULL); 4416 BUG_ON(e4b->bd_bitmap_page == NULL);
4429 BUG_ON(e4b->bd_buddy_page == NULL); 4417 BUG_ON(e4b->bd_buddy_page == NULL);
4430 4418
4419 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4420 new_entry->start_blk = block;
4421 new_entry->group = group;
4422 new_entry->count = count;
4423 new_entry->t_tid = handle->h_transaction->t_tid;
4424 new_node = &new_entry->node;
4425
4431 ext4_lock_group(sb, group); 4426 ext4_lock_group(sb, group);
4432 for (i = 0; i < count; i++) { 4427 if (!*n) {
4433 md = db->bb_md_cur; 4428 /* first free block exent. We need to
4434 if (md && db->bb_tid != handle->h_transaction->t_tid) { 4429 protect buddy cache from being freed,
4435 db->bb_md_cur = NULL; 4430 * otherwise we'll refresh it from
4436 md = NULL; 4431 * on-disk bitmap and lose not-yet-available
4432 * blocks */
4433 page_cache_get(e4b->bd_buddy_page);
4434 page_cache_get(e4b->bd_bitmap_page);
4435 }
4436 while (*n) {
4437 parent = *n;
4438 entry = rb_entry(parent, struct ext4_free_data, node);
4439 if (block < entry->start_blk)
4440 n = &(*n)->rb_left;
4441 else if (block >= (entry->start_blk + entry->count))
4442 n = &(*n)->rb_right;
4443 else {
4444 ext4_error(sb, __func__,
4445 "Double free of blocks %d (%d %d)\n",
4446 block, entry->start_blk, entry->count);
4447 return 0;
4437 } 4448 }
4449 }
4438 4450
4439 if (md == NULL) { 4451 rb_link_node(new_node, parent, n);
4440 ext4_unlock_group(sb, group); 4452 rb_insert_color(new_node, &db->bb_free_root);
4441 md = kmalloc(sizeof(*md), GFP_NOFS); 4453
4442 if (md == NULL) 4454 /* Now try to see the extent can be merged to left and right */
4443 return -ENOMEM; 4455 node = rb_prev(new_node);
4444 md->num = 0; 4456 if (node) {
4445 md->group = group; 4457 entry = rb_entry(node, struct ext4_free_data, node);
4446 4458 if (can_merge(entry, new_entry)) {
4447 ext4_lock_group(sb, group); 4459 new_entry->start_blk = entry->start_blk;
4448 if (db->bb_md_cur == NULL) { 4460 new_entry->count += entry->count;
4449 spin_lock(&sbi->s_md_lock); 4461 rb_erase(node, &(db->bb_free_root));
4450 list_add(&md->list, &sbi->s_active_transaction); 4462 spin_lock(&sbi->s_md_lock);
4451 spin_unlock(&sbi->s_md_lock); 4463 list_del(&entry->list);
4452 /* protect buddy cache from being freed, 4464 spin_unlock(&sbi->s_md_lock);
4453 * otherwise we'll refresh it from 4465 kmem_cache_free(ext4_free_ext_cachep, entry);
4454 * on-disk bitmap and lose not-yet-available
4455 * blocks */
4456 page_cache_get(e4b->bd_buddy_page);
4457 page_cache_get(e4b->bd_bitmap_page);
4458 db->bb_md_cur = md;
4459 db->bb_tid = handle->h_transaction->t_tid;
4460 mb_debug("new md 0x%p for group %lu\n",
4461 md, md->group);
4462 } else {
4463 kfree(md);
4464 md = db->bb_md_cur;
4465 }
4466 } 4466 }
4467 }
4467 4468
4468 BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); 4469 node = rb_next(new_node);
4469 md->blocks[md->num] = block + i; 4470 if (node) {
4470 md->num++; 4471 entry = rb_entry(node, struct ext4_free_data, node);
4471 if (md->num == EXT4_BB_MAX_BLOCKS) { 4472 if (can_merge(new_entry, entry)) {
4472 /* no more space, put full container on a sb's list */ 4473 new_entry->count += entry->count;
4473 db->bb_md_cur = NULL; 4474 rb_erase(node, &(db->bb_free_root));
4475 spin_lock(&sbi->s_md_lock);
4476 list_del(&entry->list);
4477 spin_unlock(&sbi->s_md_lock);
4478 kmem_cache_free(ext4_free_ext_cachep, entry);
4474 } 4479 }
4475 } 4480 }
4481 /* Add the extent to transaction's private list */
4482 spin_lock(&sbi->s_md_lock);
4483 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4484 spin_unlock(&sbi->s_md_lock);
4476 ext4_unlock_group(sb, group); 4485 ext4_unlock_group(sb, group);
4477 return 0; 4486 return 0;
4478} 4487}
@@ -4500,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4500 4509
4501 *freed = 0; 4510 *freed = 0;
4502 4511
4503 ext4_mb_poll_new_transaction(sb, handle);
4504
4505 sbi = EXT4_SB(sb); 4512 sbi = EXT4_SB(sb);
4506 es = EXT4_SB(sb)->s_es; 4513 es = EXT4_SB(sb)->s_es;
4507 if (block < le32_to_cpu(es->s_first_data_block) || 4514 if (block < le32_to_cpu(es->s_first_data_block) ||