diff options
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 536 |
1 files changed, 245 insertions, 291 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 865e9ddb44d4..dfe17a134052 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | |||
477 | b2 = (unsigned char *) bitmap; | 477 | b2 = (unsigned char *) bitmap; |
478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { | 478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { |
479 | if (b1[i] != b2[i]) { | 479 | if (b1[i] != b2[i]) { |
480 | printk("corruption in group %lu at byte %u(%u):" | 480 | printk(KERN_ERR "corruption in group %lu " |
481 | " %x in copy != %x on disk/prealloc\n", | 481 | "at byte %u(%u): %x in copy != %x " |
482 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | 482 | "on disk/prealloc\n", |
483 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | ||
483 | BUG(); | 484 | BUG(); |
484 | } | 485 | } |
485 | } | 486 | } |
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
533 | void *buddy; | 534 | void *buddy; |
534 | void *buddy2; | 535 | void *buddy2; |
535 | 536 | ||
536 | if (!test_opt(sb, MBALLOC)) | ||
537 | return 0; | ||
538 | |||
539 | { | 537 | { |
540 | static int mb_check_counter; | 538 | static int mb_check_counter; |
541 | if (mb_check_counter++ % 100 != 0) | 539 | if (mb_check_counter++ % 100 != 0) |
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
784 | if (bh[i] == NULL) | 782 | if (bh[i] == NULL) |
785 | goto out; | 783 | goto out; |
786 | 784 | ||
787 | if (bh_uptodate_or_lock(bh[i])) | 785 | if (buffer_uptodate(bh[i]) && |
786 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
788 | continue; | 787 | continue; |
789 | 788 | ||
789 | lock_buffer(bh[i]); | ||
790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
792 | ext4_init_block_bitmap(sb, bh[i], | 792 | ext4_init_block_bitmap(sb, bh[i], |
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb) | |||
2169 | { | 2169 | { |
2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2171 | 2171 | ||
2172 | remove_proc_entry("mb_groups", sbi->s_mb_proc); | 2172 | if (sbi->s_proc != NULL) { |
2173 | remove_proc_entry("mb_history", sbi->s_mb_proc); | 2173 | remove_proc_entry("mb_groups", sbi->s_proc); |
2174 | 2174 | remove_proc_entry("mb_history", sbi->s_proc); | |
2175 | } | ||
2175 | kfree(sbi->s_mb_history); | 2176 | kfree(sbi->s_mb_history); |
2176 | } | 2177 | } |
2177 | 2178 | ||
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
2180 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2181 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2181 | int i; | 2182 | int i; |
2182 | 2183 | ||
2183 | if (sbi->s_mb_proc != NULL) { | 2184 | if (sbi->s_proc != NULL) { |
2184 | proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, | 2185 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, |
2185 | &ext4_mb_seq_history_fops, sb); | 2186 | &ext4_mb_seq_history_fops, sb); |
2186 | proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, | 2187 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
2187 | &ext4_mb_seq_groups_fops, sb); | 2188 | &ext4_mb_seq_groups_fops, sb); |
2188 | } | 2189 | } |
2189 | 2190 | ||
@@ -2299,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2299 | } | 2300 | } |
2300 | 2301 | ||
2301 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2303 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | ||
2302 | 2304 | ||
2303 | #ifdef DOUBLE_CHECK | 2305 | #ifdef DOUBLE_CHECK |
2304 | { | 2306 | { |
@@ -2485,19 +2487,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2485 | unsigned max; | 2487 | unsigned max; |
2486 | int ret; | 2488 | int ret; |
2487 | 2489 | ||
2488 | if (!test_opt(sb, MBALLOC)) | ||
2489 | return 0; | ||
2490 | |||
2491 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2490 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); |
2492 | 2491 | ||
2493 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2492 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
2494 | if (sbi->s_mb_offsets == NULL) { | 2493 | if (sbi->s_mb_offsets == NULL) { |
2495 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2496 | return -ENOMEM; | 2494 | return -ENOMEM; |
2497 | } | 2495 | } |
2498 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2496 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2499 | if (sbi->s_mb_maxs == NULL) { | 2497 | if (sbi->s_mb_maxs == NULL) { |
2500 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2501 | kfree(sbi->s_mb_maxs); | 2498 | kfree(sbi->s_mb_maxs); |
2502 | return -ENOMEM; | 2499 | return -ENOMEM; |
2503 | } | 2500 | } |
@@ -2520,16 +2517,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2520 | /* init file for buddy data */ | 2517 | /* init file for buddy data */ |
2521 | ret = ext4_mb_init_backend(sb); | 2518 | ret = ext4_mb_init_backend(sb); |
2522 | if (ret != 0) { | 2519 | if (ret != 0) { |
2523 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2524 | kfree(sbi->s_mb_offsets); | 2520 | kfree(sbi->s_mb_offsets); |
2525 | kfree(sbi->s_mb_maxs); | 2521 | kfree(sbi->s_mb_maxs); |
2526 | return ret; | 2522 | return ret; |
2527 | } | 2523 | } |
2528 | 2524 | ||
2529 | spin_lock_init(&sbi->s_md_lock); | 2525 | spin_lock_init(&sbi->s_md_lock); |
2530 | INIT_LIST_HEAD(&sbi->s_active_transaction); | ||
2531 | INIT_LIST_HEAD(&sbi->s_closed_transaction); | ||
2532 | INIT_LIST_HEAD(&sbi->s_committed_transaction); | ||
2533 | spin_lock_init(&sbi->s_bal_lock); | 2526 | spin_lock_init(&sbi->s_bal_lock); |
2534 | 2527 | ||
2535 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; | 2528 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; |
@@ -2540,17 +2533,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2540 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; | 2533 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; |
2541 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2534 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; |
2542 | 2535 | ||
2543 | i = sizeof(struct ext4_locality_group) * nr_cpu_ids; | 2536 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
2544 | sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); | ||
2545 | if (sbi->s_locality_groups == NULL) { | 2537 | if (sbi->s_locality_groups == NULL) { |
2546 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
2547 | kfree(sbi->s_mb_offsets); | 2538 | kfree(sbi->s_mb_offsets); |
2548 | kfree(sbi->s_mb_maxs); | 2539 | kfree(sbi->s_mb_maxs); |
2549 | return -ENOMEM; | 2540 | return -ENOMEM; |
2550 | } | 2541 | } |
2551 | for (i = 0; i < nr_cpu_ids; i++) { | 2542 | for_each_possible_cpu(i) { |
2552 | struct ext4_locality_group *lg; | 2543 | struct ext4_locality_group *lg; |
2553 | lg = &sbi->s_locality_groups[i]; | 2544 | lg = per_cpu_ptr(sbi->s_locality_groups, i); |
2554 | mutex_init(&lg->lg_mutex); | 2545 | mutex_init(&lg->lg_mutex); |
2555 | for (j = 0; j < PREALLOC_TB_SIZE; j++) | 2546 | for (j = 0; j < PREALLOC_TB_SIZE; j++) |
2556 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); | 2547 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); |
@@ -2560,7 +2551,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2560 | ext4_mb_init_per_dev_proc(sb); | 2551 | ext4_mb_init_per_dev_proc(sb); |
2561 | ext4_mb_history_init(sb); | 2552 | ext4_mb_history_init(sb); |
2562 | 2553 | ||
2563 | printk("EXT4-fs: mballoc enabled\n"); | 2554 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
2555 | |||
2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); | ||
2564 | return 0; | 2557 | return 0; |
2565 | } | 2558 | } |
2566 | 2559 | ||
@@ -2575,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2575 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); | 2568 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); |
2576 | list_del(&pa->pa_group_list); | 2569 | list_del(&pa->pa_group_list); |
2577 | count++; | 2570 | count++; |
2578 | kfree(pa); | 2571 | kmem_cache_free(ext4_pspace_cachep, pa); |
2579 | } | 2572 | } |
2580 | if (count) | 2573 | if (count) |
2581 | mb_debug("mballoc: %u PAs left\n", count); | 2574 | mb_debug("mballoc: %u PAs left\n", count); |
@@ -2589,18 +2582,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2589 | struct ext4_group_info *grinfo; | 2582 | struct ext4_group_info *grinfo; |
2590 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2591 | 2584 | ||
2592 | if (!test_opt(sb, MBALLOC)) | ||
2593 | return 0; | ||
2594 | |||
2595 | /* release freed, non-committed blocks */ | ||
2596 | spin_lock(&sbi->s_md_lock); | ||
2597 | list_splice_init(&sbi->s_closed_transaction, | ||
2598 | &sbi->s_committed_transaction); | ||
2599 | list_splice_init(&sbi->s_active_transaction, | ||
2600 | &sbi->s_committed_transaction); | ||
2601 | spin_unlock(&sbi->s_md_lock); | ||
2602 | ext4_mb_free_committed_blocks(sb); | ||
2603 | |||
2604 | if (sbi->s_group_info) { | 2585 | if (sbi->s_group_info) { |
2605 | for (i = 0; i < sbi->s_groups_count; i++) { | 2586 | for (i = 0; i < sbi->s_groups_count; i++) { |
2606 | grinfo = ext4_get_group_info(sb, i); | 2587 | grinfo = ext4_get_group_info(sb, i); |
@@ -2647,69 +2628,64 @@ int ext4_mb_release(struct super_block *sb) | |||
2647 | atomic_read(&sbi->s_mb_discarded)); | 2628 | atomic_read(&sbi->s_mb_discarded)); |
2648 | } | 2629 | } |
2649 | 2630 | ||
2650 | kfree(sbi->s_locality_groups); | 2631 | free_percpu(sbi->s_locality_groups); |
2651 | |||
2652 | ext4_mb_history_release(sb); | 2632 | ext4_mb_history_release(sb); |
2653 | ext4_mb_destroy_per_dev_proc(sb); | 2633 | ext4_mb_destroy_per_dev_proc(sb); |
2654 | 2634 | ||
2655 | return 0; | 2635 | return 0; |
2656 | } | 2636 | } |
2657 | 2637 | ||
2658 | static noinline_for_stack void | 2638 | /* |
2659 | ext4_mb_free_committed_blocks(struct super_block *sb) | 2639 | * This function is called by the jbd2 layer once the commit has finished, |
2640 | * so we know we can free the blocks that were released with that commit. | ||
2641 | */ | ||
2642 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | ||
2660 | { | 2643 | { |
2661 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2644 | struct super_block *sb = journal->j_private; |
2662 | int err; | ||
2663 | int i; | ||
2664 | int count = 0; | ||
2665 | int count2 = 0; | ||
2666 | struct ext4_free_metadata *md; | ||
2667 | struct ext4_buddy e4b; | 2645 | struct ext4_buddy e4b; |
2646 | struct ext4_group_info *db; | ||
2647 | int err, count = 0, count2 = 0; | ||
2648 | struct ext4_free_data *entry; | ||
2649 | ext4_fsblk_t discard_block; | ||
2650 | struct list_head *l, *ltmp; | ||
2668 | 2651 | ||
2669 | if (list_empty(&sbi->s_committed_transaction)) | 2652 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
2670 | return; | 2653 | entry = list_entry(l, struct ext4_free_data, list); |
2671 | |||
2672 | /* there is committed blocks to be freed yet */ | ||
2673 | do { | ||
2674 | /* get next array of blocks */ | ||
2675 | md = NULL; | ||
2676 | spin_lock(&sbi->s_md_lock); | ||
2677 | if (!list_empty(&sbi->s_committed_transaction)) { | ||
2678 | md = list_entry(sbi->s_committed_transaction.next, | ||
2679 | struct ext4_free_metadata, list); | ||
2680 | list_del(&md->list); | ||
2681 | } | ||
2682 | spin_unlock(&sbi->s_md_lock); | ||
2683 | |||
2684 | if (md == NULL) | ||
2685 | break; | ||
2686 | 2654 | ||
2687 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | 2655 | mb_debug("gonna free %u blocks in group %lu (0x%p):", |
2688 | md->num, md->group, md); | 2656 | entry->count, entry->group, entry); |
2689 | 2657 | ||
2690 | err = ext4_mb_load_buddy(sb, md->group, &e4b); | 2658 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2691 | /* we expect to find existing buddy because it's pinned */ | 2659 | /* we expect to find existing buddy because it's pinned */ |
2692 | BUG_ON(err != 0); | 2660 | BUG_ON(err != 0); |
2693 | 2661 | ||
2662 | db = e4b.bd_info; | ||
2694 | /* there are blocks to put in buddy to make them really free */ | 2663 | /* there are blocks to put in buddy to make them really free */ |
2695 | count += md->num; | 2664 | count += entry->count; |
2696 | count2++; | 2665 | count2++; |
2697 | ext4_lock_group(sb, md->group); | 2666 | ext4_lock_group(sb, entry->group); |
2698 | for (i = 0; i < md->num; i++) { | 2667 | /* Take it out of per group rb tree */ |
2699 | mb_debug(" %u", md->blocks[i]); | 2668 | rb_erase(&entry->node, &(db->bb_free_root)); |
2700 | mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | 2669 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); |
2670 | |||
2671 | if (!db->bb_free_root.rb_node) { | ||
2672 | /* No more items in the per group rb tree | ||
2673 | * balance refcounts from ext4_mb_free_metadata() | ||
2674 | */ | ||
2675 | page_cache_release(e4b.bd_buddy_page); | ||
2676 | page_cache_release(e4b.bd_bitmap_page); | ||
2701 | } | 2677 | } |
2702 | mb_debug("\n"); | 2678 | ext4_unlock_group(sb, entry->group); |
2703 | ext4_unlock_group(sb, md->group); | 2679 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) |
2704 | 2680 | + entry->start_blk | |
2705 | /* balance refcounts from ext4_mb_free_metadata() */ | 2681 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
2706 | page_cache_release(e4b.bd_buddy_page); | 2682 | trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id, |
2707 | page_cache_release(e4b.bd_bitmap_page); | 2683 | (unsigned long long) discard_block, entry->count); |
2708 | 2684 | sb_issue_discard(sb, discard_block, entry->count); | |
2709 | kfree(md); | 2685 | |
2686 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
2710 | ext4_mb_release_desc(&e4b); | 2687 | ext4_mb_release_desc(&e4b); |
2711 | 2688 | } | |
2712 | } while (md); | ||
2713 | 2689 | ||
2714 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2690 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
2715 | } | 2691 | } |
@@ -2721,119 +2697,52 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
2721 | #define EXT4_MB_STREAM_REQ "stream_req" | 2697 | #define EXT4_MB_STREAM_REQ "stream_req" |
2722 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" | 2698 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" |
2723 | 2699 | ||
2724 | |||
2725 | |||
2726 | #define MB_PROC_FOPS(name) \ | ||
2727 | static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ | ||
2728 | { \ | ||
2729 | struct ext4_sb_info *sbi = m->private; \ | ||
2730 | \ | ||
2731 | seq_printf(m, "%ld\n", sbi->s_mb_##name); \ | ||
2732 | return 0; \ | ||
2733 | } \ | ||
2734 | \ | ||
2735 | static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ | ||
2736 | { \ | ||
2737 | return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ | ||
2738 | } \ | ||
2739 | \ | ||
2740 | static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ | ||
2741 | const char __user *buf, size_t cnt, loff_t *ppos) \ | ||
2742 | { \ | ||
2743 | struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ | ||
2744 | char str[32]; \ | ||
2745 | long value; \ | ||
2746 | if (cnt >= sizeof(str)) \ | ||
2747 | return -EINVAL; \ | ||
2748 | if (copy_from_user(str, buf, cnt)) \ | ||
2749 | return -EFAULT; \ | ||
2750 | value = simple_strtol(str, NULL, 0); \ | ||
2751 | if (value <= 0) \ | ||
2752 | return -ERANGE; \ | ||
2753 | sbi->s_mb_##name = value; \ | ||
2754 | return cnt; \ | ||
2755 | } \ | ||
2756 | \ | ||
2757 | static const struct file_operations ext4_mb_##name##_proc_fops = { \ | ||
2758 | .owner = THIS_MODULE, \ | ||
2759 | .open = ext4_mb_##name##_proc_open, \ | ||
2760 | .read = seq_read, \ | ||
2761 | .llseek = seq_lseek, \ | ||
2762 | .release = single_release, \ | ||
2763 | .write = ext4_mb_##name##_proc_write, \ | ||
2764 | }; | ||
2765 | |||
2766 | MB_PROC_FOPS(stats); | ||
2767 | MB_PROC_FOPS(max_to_scan); | ||
2768 | MB_PROC_FOPS(min_to_scan); | ||
2769 | MB_PROC_FOPS(order2_reqs); | ||
2770 | MB_PROC_FOPS(stream_request); | ||
2771 | MB_PROC_FOPS(group_prealloc); | ||
2772 | |||
2773 | #define MB_PROC_HANDLER(name, var) \ | ||
2774 | do { \ | ||
2775 | proc = proc_create_data(name, mode, sbi->s_mb_proc, \ | ||
2776 | &ext4_mb_##var##_proc_fops, sbi); \ | ||
2777 | if (proc == NULL) { \ | ||
2778 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ | ||
2779 | goto err_out; \ | ||
2780 | } \ | ||
2781 | } while (0) | ||
2782 | |||
2783 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2700 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
2784 | { | 2701 | { |
2702 | #ifdef CONFIG_PROC_FS | ||
2785 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | 2703 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; |
2786 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2704 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2787 | struct proc_dir_entry *proc; | 2705 | struct proc_dir_entry *proc; |
2788 | char devname[64]; | ||
2789 | 2706 | ||
2790 | if (proc_root_ext4 == NULL) { | 2707 | if (sbi->s_proc == NULL) |
2791 | sbi->s_mb_proc = NULL; | ||
2792 | return -EINVAL; | 2708 | return -EINVAL; |
2793 | } | ||
2794 | bdevname(sb->s_bdev, devname); | ||
2795 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); | ||
2796 | |||
2797 | MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); | ||
2798 | MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); | ||
2799 | MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); | ||
2800 | MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); | ||
2801 | MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); | ||
2802 | MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); | ||
2803 | 2709 | ||
2710 | EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); | ||
2711 | EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); | ||
2712 | EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); | ||
2713 | EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); | ||
2714 | EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); | ||
2715 | EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); | ||
2804 | return 0; | 2716 | return 0; |
2805 | 2717 | ||
2806 | err_out: | 2718 | err_out: |
2807 | printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); | 2719 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2808 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2720 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2809 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2721 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2810 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2722 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2811 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2723 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2812 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2724 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2813 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
2814 | remove_proc_entry(devname, proc_root_ext4); | ||
2815 | sbi->s_mb_proc = NULL; | ||
2816 | |||
2817 | return -ENOMEM; | 2725 | return -ENOMEM; |
2726 | #else | ||
2727 | return 0; | ||
2728 | #endif | ||
2818 | } | 2729 | } |
2819 | 2730 | ||
2820 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | 2731 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) |
2821 | { | 2732 | { |
2733 | #ifdef CONFIG_PROC_FS | ||
2822 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2734 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2823 | char devname[64]; | ||
2824 | 2735 | ||
2825 | if (sbi->s_mb_proc == NULL) | 2736 | if (sbi->s_proc == NULL) |
2826 | return -EINVAL; | 2737 | return -EINVAL; |
2827 | 2738 | ||
2828 | bdevname(sb->s_bdev, devname); | 2739 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
2829 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2740 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
2830 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2741 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
2831 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2742 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2832 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2743 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2833 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2744 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2834 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | 2745 | #endif |
2835 | remove_proc_entry(devname, proc_root_ext4); | ||
2836 | |||
2837 | return 0; | 2746 | return 0; |
2838 | } | 2747 | } |
2839 | 2748 | ||
@@ -2854,11 +2763,16 @@ int __init init_ext4_mballoc(void) | |||
2854 | kmem_cache_destroy(ext4_pspace_cachep); | 2763 | kmem_cache_destroy(ext4_pspace_cachep); |
2855 | return -ENOMEM; | 2764 | return -ENOMEM; |
2856 | } | 2765 | } |
2857 | #ifdef CONFIG_PROC_FS | 2766 | |
2858 | proc_root_ext4 = proc_mkdir("fs/ext4", NULL); | 2767 | ext4_free_ext_cachep = |
2859 | if (proc_root_ext4 == NULL) | 2768 | kmem_cache_create("ext4_free_block_extents", |
2860 | printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); | 2769 | sizeof(struct ext4_free_data), |
2861 | #endif | 2770 | 0, SLAB_RECLAIM_ACCOUNT, NULL); |
2771 | if (ext4_free_ext_cachep == NULL) { | ||
2772 | kmem_cache_destroy(ext4_pspace_cachep); | ||
2773 | kmem_cache_destroy(ext4_ac_cachep); | ||
2774 | return -ENOMEM; | ||
2775 | } | ||
2862 | return 0; | 2776 | return 0; |
2863 | } | 2777 | } |
2864 | 2778 | ||
@@ -2867,9 +2781,7 @@ void exit_ext4_mballoc(void) | |||
2867 | /* XXX: synchronize_rcu(); */ | 2781 | /* XXX: synchronize_rcu(); */ |
2868 | kmem_cache_destroy(ext4_pspace_cachep); | 2782 | kmem_cache_destroy(ext4_pspace_cachep); |
2869 | kmem_cache_destroy(ext4_ac_cachep); | 2783 | kmem_cache_destroy(ext4_ac_cachep); |
2870 | #ifdef CONFIG_PROC_FS | 2784 | kmem_cache_destroy(ext4_free_ext_cachep); |
2871 | remove_proc_entry("fs/ext4", NULL); | ||
2872 | #endif | ||
2873 | } | 2785 | } |
2874 | 2786 | ||
2875 | 2787 | ||
@@ -2879,7 +2791,7 @@ void exit_ext4_mballoc(void) | |||
2879 | */ | 2791 | */ |
2880 | static noinline_for_stack int | 2792 | static noinline_for_stack int |
2881 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | 2793 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
2882 | handle_t *handle) | 2794 | handle_t *handle, unsigned long reserv_blks) |
2883 | { | 2795 | { |
2884 | struct buffer_head *bitmap_bh = NULL; | 2796 | struct buffer_head *bitmap_bh = NULL; |
2885 | struct ext4_super_block *es; | 2797 | struct ext4_super_block *es; |
@@ -2968,15 +2880,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2968 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); | 2880 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); |
2969 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
2970 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2882 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
2971 | 2883 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | |
2972 | /* | 2884 | /* |
2973 | * free blocks account has already be reduced/reserved | 2885 | * Now reduce the dirty block count also. Should not go negative |
2974 | * at write_begin() time for delayed allocation | ||
2975 | * do not double accounting | ||
2976 | */ | 2886 | */ |
2977 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2887 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
2978 | percpu_counter_sub(&sbi->s_freeblocks_counter, | 2888 | /* release all the reserved blocks if non delalloc */ |
2979 | ac->ac_b_ex.fe_len); | 2889 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); |
2890 | else | ||
2891 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | ||
2892 | ac->ac_b_ex.fe_len); | ||
2980 | 2893 | ||
2981 | if (sbi->s_log_groups_per_flex) { | 2894 | if (sbi->s_log_groups_per_flex) { |
2982 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2895 | ext4_group_t flex_group = ext4_flex_group(sbi, |
@@ -3282,6 +3195,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
3282 | } | 3195 | } |
3283 | 3196 | ||
3284 | /* | 3197 | /* |
3198 | * Return the prealloc space that have minimal distance | ||
3199 | * from the goal block. @cpa is the prealloc | ||
3200 | * space that is having currently known minimal distance | ||
3201 | * from the goal block. | ||
3202 | */ | ||
3203 | static struct ext4_prealloc_space * | ||
3204 | ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | ||
3205 | struct ext4_prealloc_space *pa, | ||
3206 | struct ext4_prealloc_space *cpa) | ||
3207 | { | ||
3208 | ext4_fsblk_t cur_distance, new_distance; | ||
3209 | |||
3210 | if (cpa == NULL) { | ||
3211 | atomic_inc(&pa->pa_count); | ||
3212 | return pa; | ||
3213 | } | ||
3214 | cur_distance = abs(goal_block - cpa->pa_pstart); | ||
3215 | new_distance = abs(goal_block - pa->pa_pstart); | ||
3216 | |||
3217 | if (cur_distance < new_distance) | ||
3218 | return cpa; | ||
3219 | |||
3220 | /* drop the previous reference */ | ||
3221 | atomic_dec(&cpa->pa_count); | ||
3222 | atomic_inc(&pa->pa_count); | ||
3223 | return pa; | ||
3224 | } | ||
3225 | |||
3226 | /* | ||
3285 | * search goal blocks in preallocated space | 3227 | * search goal blocks in preallocated space |
3286 | */ | 3228 | */ |
3287 | static noinline_for_stack int | 3229 | static noinline_for_stack int |
@@ -3290,7 +3232,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3290 | int order, i; | 3232 | int order, i; |
3291 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 3233 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
3292 | struct ext4_locality_group *lg; | 3234 | struct ext4_locality_group *lg; |
3293 | struct ext4_prealloc_space *pa; | 3235 | struct ext4_prealloc_space *pa, *cpa = NULL; |
3236 | ext4_fsblk_t goal_block; | ||
3294 | 3237 | ||
3295 | /* only data can be preallocated */ | 3238 | /* only data can be preallocated */ |
3296 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 3239 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
@@ -3333,6 +3276,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3333 | /* The max size of hash table is PREALLOC_TB_SIZE */ | 3276 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
3334 | order = PREALLOC_TB_SIZE - 1; | 3277 | order = PREALLOC_TB_SIZE - 1; |
3335 | 3278 | ||
3279 | goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + | ||
3280 | ac->ac_g_ex.fe_start + | ||
3281 | le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); | ||
3282 | /* | ||
3283 | * search for the prealloc space that is having | ||
3284 | * minimal distance from the goal block. | ||
3285 | */ | ||
3336 | for (i = order; i < PREALLOC_TB_SIZE; i++) { | 3286 | for (i = order; i < PREALLOC_TB_SIZE; i++) { |
3337 | rcu_read_lock(); | 3287 | rcu_read_lock(); |
3338 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], | 3288 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], |
@@ -3340,17 +3290,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3340 | spin_lock(&pa->pa_lock); | 3290 | spin_lock(&pa->pa_lock); |
3341 | if (pa->pa_deleted == 0 && | 3291 | if (pa->pa_deleted == 0 && |
3342 | pa->pa_free >= ac->ac_o_ex.fe_len) { | 3292 | pa->pa_free >= ac->ac_o_ex.fe_len) { |
3343 | atomic_inc(&pa->pa_count); | 3293 | |
3344 | ext4_mb_use_group_pa(ac, pa); | 3294 | cpa = ext4_mb_check_group_pa(goal_block, |
3345 | spin_unlock(&pa->pa_lock); | 3295 | pa, cpa); |
3346 | ac->ac_criteria = 20; | ||
3347 | rcu_read_unlock(); | ||
3348 | return 1; | ||
3349 | } | 3296 | } |
3350 | spin_unlock(&pa->pa_lock); | 3297 | spin_unlock(&pa->pa_lock); |
3351 | } | 3298 | } |
3352 | rcu_read_unlock(); | 3299 | rcu_read_unlock(); |
3353 | } | 3300 | } |
3301 | if (cpa) { | ||
3302 | ext4_mb_use_group_pa(ac, cpa); | ||
3303 | ac->ac_criteria = 20; | ||
3304 | return 1; | ||
3305 | } | ||
3354 | return 0; | 3306 | return 0; |
3355 | } | 3307 | } |
3356 | 3308 | ||
@@ -3845,7 +3797,7 @@ out: | |||
3845 | * | 3797 | * |
3846 | * FIXME!! Make sure it is valid at all the call sites | 3798 | * FIXME!! Make sure it is valid at all the call sites |
3847 | */ | 3799 | */ |
3848 | void ext4_mb_discard_inode_preallocations(struct inode *inode) | 3800 | void ext4_discard_preallocations(struct inode *inode) |
3849 | { | 3801 | { |
3850 | struct ext4_inode_info *ei = EXT4_I(inode); | 3802 | struct ext4_inode_info *ei = EXT4_I(inode); |
3851 | struct super_block *sb = inode->i_sb; | 3803 | struct super_block *sb = inode->i_sb; |
@@ -3857,7 +3809,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) | |||
3857 | struct ext4_buddy e4b; | 3809 | struct ext4_buddy e4b; |
3858 | int err; | 3810 | int err; |
3859 | 3811 | ||
3860 | if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { | 3812 | if (!S_ISREG(inode->i_mode)) { |
3861 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ | 3813 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ |
3862 | return; | 3814 | return; |
3863 | } | 3815 | } |
@@ -4055,8 +4007,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
4055 | * per cpu locality group is to reduce the contention between block | 4007 | * per cpu locality group is to reduce the contention between block |
4056 | * request from multiple CPUs. | 4008 | * request from multiple CPUs. |
4057 | */ | 4009 | */ |
4058 | ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; | 4010 | ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); |
4059 | put_cpu(); | ||
4060 | 4011 | ||
4061 | /* we're going to use group allocation */ | 4012 | /* we're going to use group allocation */ |
4062 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; | 4013 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; |
@@ -4330,33 +4281,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
4330 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | 4281 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, |
4331 | struct ext4_allocation_request *ar, int *errp) | 4282 | struct ext4_allocation_request *ar, int *errp) |
4332 | { | 4283 | { |
4284 | int freed; | ||
4333 | struct ext4_allocation_context *ac = NULL; | 4285 | struct ext4_allocation_context *ac = NULL; |
4334 | struct ext4_sb_info *sbi; | 4286 | struct ext4_sb_info *sbi; |
4335 | struct super_block *sb; | 4287 | struct super_block *sb; |
4336 | ext4_fsblk_t block = 0; | 4288 | ext4_fsblk_t block = 0; |
4337 | int freed; | 4289 | unsigned long inquota; |
4338 | int inquota; | 4290 | unsigned long reserv_blks = 0; |
4339 | 4291 | ||
4340 | sb = ar->inode->i_sb; | 4292 | sb = ar->inode->i_sb; |
4341 | sbi = EXT4_SB(sb); | 4293 | sbi = EXT4_SB(sb); |
4342 | 4294 | ||
4343 | if (!test_opt(sb, MBALLOC)) { | ||
4344 | block = ext4_old_new_blocks(handle, ar->inode, ar->goal, | ||
4345 | &(ar->len), errp); | ||
4346 | return block; | ||
4347 | } | ||
4348 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | 4295 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { |
4349 | /* | 4296 | /* |
4350 | * With delalloc we already reserved the blocks | 4297 | * With delalloc we already reserved the blocks |
4351 | */ | 4298 | */ |
4352 | ar->len = ext4_has_free_blocks(sbi, ar->len); | 4299 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { |
4353 | } | 4300 | /* let others to free the space */ |
4354 | 4301 | yield(); | |
4355 | if (ar->len == 0) { | 4302 | ar->len = ar->len >> 1; |
4356 | *errp = -ENOSPC; | 4303 | } |
4357 | return 0; | 4304 | if (!ar->len) { |
4305 | *errp = -ENOSPC; | ||
4306 | return 0; | ||
4307 | } | ||
4308 | reserv_blks = ar->len; | ||
4358 | } | 4309 | } |
4359 | |||
4360 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | 4310 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { |
4361 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4311 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
4362 | ar->len--; | 4312 | ar->len--; |
@@ -4377,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4377 | goto out1; | 4327 | goto out1; |
4378 | } | 4328 | } |
4379 | 4329 | ||
4380 | ext4_mb_poll_new_transaction(sb, handle); | ||
4381 | |||
4382 | *errp = ext4_mb_initialize_context(ac, ar); | 4330 | *errp = ext4_mb_initialize_context(ac, ar); |
4383 | if (*errp) { | 4331 | if (*errp) { |
4384 | ar->len = 0; | 4332 | ar->len = 0; |
@@ -4402,7 +4350,7 @@ repeat: | |||
4402 | } | 4350 | } |
4403 | 4351 | ||
4404 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4352 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4405 | *errp = ext4_mb_mark_diskspace_used(ac, handle); | 4353 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); |
4406 | if (*errp == -EAGAIN) { | 4354 | if (*errp == -EAGAIN) { |
4407 | ac->ac_b_ex.fe_group = 0; | 4355 | ac->ac_b_ex.fe_group = 0; |
4408 | ac->ac_b_ex.fe_start = 0; | 4356 | ac->ac_b_ex.fe_start = 0; |
@@ -4437,35 +4385,20 @@ out1: | |||
4437 | 4385 | ||
4438 | return block; | 4386 | return block; |
4439 | } | 4387 | } |
4440 | static void ext4_mb_poll_new_transaction(struct super_block *sb, | ||
4441 | handle_t *handle) | ||
4442 | { | ||
4443 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4444 | 4388 | ||
4445 | if (sbi->s_last_transaction == handle->h_transaction->t_tid) | 4389 | /* |
4446 | return; | 4390 | * We can merge two free data extents only if the physical blocks |
4447 | 4391 | * are contiguous, AND the extents were freed by the same transaction, | |
4448 | /* new transaction! time to close last one and free blocks for | 4392 | * AND the blocks are associated with the same group. |
4449 | * committed transaction. we know that only transaction can be | 4393 | */ |
4450 | * active, so previos transaction can be being logged and we | 4394 | static int can_merge(struct ext4_free_data *entry1, |
4451 | * know that transaction before previous is known to be already | 4395 | struct ext4_free_data *entry2) |
4452 | * logged. this means that now we may free blocks freed in all | 4396 | { |
4453 | * transactions before previous one. hope I'm clear enough ... */ | 4397 | if ((entry1->t_tid == entry2->t_tid) && |
4454 | 4398 | (entry1->group == entry2->group) && | |
4455 | spin_lock(&sbi->s_md_lock); | 4399 | ((entry1->start_blk + entry1->count) == entry2->start_blk)) |
4456 | if (sbi->s_last_transaction != handle->h_transaction->t_tid) { | 4400 | return 1; |
4457 | mb_debug("new transaction %lu, old %lu\n", | 4401 | return 0; |
4458 | (unsigned long) handle->h_transaction->t_tid, | ||
4459 | (unsigned long) sbi->s_last_transaction); | ||
4460 | list_splice_init(&sbi->s_closed_transaction, | ||
4461 | &sbi->s_committed_transaction); | ||
4462 | list_splice_init(&sbi->s_active_transaction, | ||
4463 | &sbi->s_closed_transaction); | ||
4464 | sbi->s_last_transaction = handle->h_transaction->t_tid; | ||
4465 | } | ||
4466 | spin_unlock(&sbi->s_md_lock); | ||
4467 | |||
4468 | ext4_mb_free_committed_blocks(sb); | ||
4469 | } | 4402 | } |
4470 | 4403 | ||
4471 | static noinline_for_stack int | 4404 | static noinline_for_stack int |
@@ -4475,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4475 | struct ext4_group_info *db = e4b->bd_info; | 4408 | struct ext4_group_info *db = e4b->bd_info; |
4476 | struct super_block *sb = e4b->bd_sb; | 4409 | struct super_block *sb = e4b->bd_sb; |
4477 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4410 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4478 | struct ext4_free_metadata *md; | 4411 | struct ext4_free_data *entry, *new_entry; |
4479 | int i; | 4412 | struct rb_node **n = &db->bb_free_root.rb_node, *node; |
4413 | struct rb_node *parent = NULL, *new_node; | ||
4414 | |||
4480 | 4415 | ||
4481 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4416 | BUG_ON(e4b->bd_bitmap_page == NULL); |
4482 | BUG_ON(e4b->bd_buddy_page == NULL); | 4417 | BUG_ON(e4b->bd_buddy_page == NULL); |
4483 | 4418 | ||
4419 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | ||
4420 | new_entry->start_blk = block; | ||
4421 | new_entry->group = group; | ||
4422 | new_entry->count = count; | ||
4423 | new_entry->t_tid = handle->h_transaction->t_tid; | ||
4424 | new_node = &new_entry->node; | ||
4425 | |||
4484 | ext4_lock_group(sb, group); | 4426 | ext4_lock_group(sb, group); |
4485 | for (i = 0; i < count; i++) { | 4427 | if (!*n) { |
4486 | md = db->bb_md_cur; | 4428 | /* first free block exent. We need to |
4487 | if (md && db->bb_tid != handle->h_transaction->t_tid) { | 4429 | protect buddy cache from being freed, |
4488 | db->bb_md_cur = NULL; | 4430 | * otherwise we'll refresh it from |
4489 | md = NULL; | 4431 | * on-disk bitmap and lose not-yet-available |
4432 | * blocks */ | ||
4433 | page_cache_get(e4b->bd_buddy_page); | ||
4434 | page_cache_get(e4b->bd_bitmap_page); | ||
4435 | } | ||
4436 | while (*n) { | ||
4437 | parent = *n; | ||
4438 | entry = rb_entry(parent, struct ext4_free_data, node); | ||
4439 | if (block < entry->start_blk) | ||
4440 | n = &(*n)->rb_left; | ||
4441 | else if (block >= (entry->start_blk + entry->count)) | ||
4442 | n = &(*n)->rb_right; | ||
4443 | else { | ||
4444 | ext4_error(sb, __func__, | ||
4445 | "Double free of blocks %d (%d %d)\n", | ||
4446 | block, entry->start_blk, entry->count); | ||
4447 | return 0; | ||
4490 | } | 4448 | } |
4449 | } | ||
4491 | 4450 | ||
4492 | if (md == NULL) { | 4451 | rb_link_node(new_node, parent, n); |
4493 | ext4_unlock_group(sb, group); | 4452 | rb_insert_color(new_node, &db->bb_free_root); |
4494 | md = kmalloc(sizeof(*md), GFP_NOFS); | 4453 | |
4495 | if (md == NULL) | 4454 | /* Now try to see the extent can be merged to left and right */ |
4496 | return -ENOMEM; | 4455 | node = rb_prev(new_node); |
4497 | md->num = 0; | 4456 | if (node) { |
4498 | md->group = group; | 4457 | entry = rb_entry(node, struct ext4_free_data, node); |
4499 | 4458 | if (can_merge(entry, new_entry)) { | |
4500 | ext4_lock_group(sb, group); | 4459 | new_entry->start_blk = entry->start_blk; |
4501 | if (db->bb_md_cur == NULL) { | 4460 | new_entry->count += entry->count; |
4502 | spin_lock(&sbi->s_md_lock); | 4461 | rb_erase(node, &(db->bb_free_root)); |
4503 | list_add(&md->list, &sbi->s_active_transaction); | 4462 | spin_lock(&sbi->s_md_lock); |
4504 | spin_unlock(&sbi->s_md_lock); | 4463 | list_del(&entry->list); |
4505 | /* protect buddy cache from being freed, | 4464 | spin_unlock(&sbi->s_md_lock); |
4506 | * otherwise we'll refresh it from | 4465 | kmem_cache_free(ext4_free_ext_cachep, entry); |
4507 | * on-disk bitmap and lose not-yet-available | ||
4508 | * blocks */ | ||
4509 | page_cache_get(e4b->bd_buddy_page); | ||
4510 | page_cache_get(e4b->bd_bitmap_page); | ||
4511 | db->bb_md_cur = md; | ||
4512 | db->bb_tid = handle->h_transaction->t_tid; | ||
4513 | mb_debug("new md 0x%p for group %lu\n", | ||
4514 | md, md->group); | ||
4515 | } else { | ||
4516 | kfree(md); | ||
4517 | md = db->bb_md_cur; | ||
4518 | } | ||
4519 | } | 4466 | } |
4467 | } | ||
4520 | 4468 | ||
4521 | BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); | 4469 | node = rb_next(new_node); |
4522 | md->blocks[md->num] = block + i; | 4470 | if (node) { |
4523 | md->num++; | 4471 | entry = rb_entry(node, struct ext4_free_data, node); |
4524 | if (md->num == EXT4_BB_MAX_BLOCKS) { | 4472 | if (can_merge(new_entry, entry)) { |
4525 | /* no more space, put full container on a sb's list */ | 4473 | new_entry->count += entry->count; |
4526 | db->bb_md_cur = NULL; | 4474 | rb_erase(node, &(db->bb_free_root)); |
4475 | spin_lock(&sbi->s_md_lock); | ||
4476 | list_del(&entry->list); | ||
4477 | spin_unlock(&sbi->s_md_lock); | ||
4478 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4527 | } | 4479 | } |
4528 | } | 4480 | } |
4481 | /* Add the extent to transaction's private list */ | ||
4482 | spin_lock(&sbi->s_md_lock); | ||
4483 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); | ||
4484 | spin_unlock(&sbi->s_md_lock); | ||
4529 | ext4_unlock_group(sb, group); | 4485 | ext4_unlock_group(sb, group); |
4530 | return 0; | 4486 | return 0; |
4531 | } | 4487 | } |
@@ -4553,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4553 | 4509 | ||
4554 | *freed = 0; | 4510 | *freed = 0; |
4555 | 4511 | ||
4556 | ext4_mb_poll_new_transaction(sb, handle); | ||
4557 | |||
4558 | sbi = EXT4_SB(sb); | 4512 | sbi = EXT4_SB(sb); |
4559 | es = EXT4_SB(sb)->s_es; | 4513 | es = EXT4_SB(sb)->s_es; |
4560 | if (block < le32_to_cpu(es->s_first_data_block) || | 4514 | if (block < le32_to_cpu(es->s_first_data_block) || |