aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c483
1 files changed, 199 insertions, 284 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e0e3a5eb1ddb..dfe17a134052 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
477 b2 = (unsigned char *) bitmap; 477 b2 = (unsigned char *) bitmap;
478 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { 478 for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
479 if (b1[i] != b2[i]) { 479 if (b1[i] != b2[i]) {
480 printk("corruption in group %lu at byte %u(%u):" 480 printk(KERN_ERR "corruption in group %lu "
481 " %x in copy != %x on disk/prealloc\n", 481 "at byte %u(%u): %x in copy != %x "
482 e4b->bd_group, i, i * 8, b1[i], b2[i]); 482 "on disk/prealloc\n",
483 e4b->bd_group, i, i * 8, b1[i], b2[i]);
483 BUG(); 484 BUG();
484 } 485 }
485 } 486 }
@@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
533 void *buddy; 534 void *buddy;
534 void *buddy2; 535 void *buddy2;
535 536
536 if (!test_opt(sb, MBALLOC))
537 return 0;
538
539 { 537 {
540 static int mb_check_counter; 538 static int mb_check_counter;
541 if (mb_check_counter++ % 100 != 0) 539 if (mb_check_counter++ % 100 != 0)
@@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
784 if (bh[i] == NULL) 782 if (bh[i] == NULL)
785 goto out; 783 goto out;
786 784
787 if (bh_uptodate_or_lock(bh[i])) 785 if (buffer_uptodate(bh[i]) &&
786 !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
788 continue; 787 continue;
789 788
789 lock_buffer(bh[i]);
790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); 790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
792 ext4_init_block_bitmap(sb, bh[i], 792 ext4_init_block_bitmap(sb, bh[i],
@@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb)
2169{ 2169{
2170 struct ext4_sb_info *sbi = EXT4_SB(sb); 2170 struct ext4_sb_info *sbi = EXT4_SB(sb);
2171 2171
2172 remove_proc_entry("mb_groups", sbi->s_mb_proc); 2172 if (sbi->s_proc != NULL) {
2173 remove_proc_entry("mb_history", sbi->s_mb_proc); 2173 remove_proc_entry("mb_groups", sbi->s_proc);
2174 2174 remove_proc_entry("mb_history", sbi->s_proc);
2175 }
2175 kfree(sbi->s_mb_history); 2176 kfree(sbi->s_mb_history);
2176} 2177}
2177 2178
@@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb)
2180 struct ext4_sb_info *sbi = EXT4_SB(sb); 2181 struct ext4_sb_info *sbi = EXT4_SB(sb);
2181 int i; 2182 int i;
2182 2183
2183 if (sbi->s_mb_proc != NULL) { 2184 if (sbi->s_proc != NULL) {
2184 proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, 2185 proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
2185 &ext4_mb_seq_history_fops, sb); 2186 &ext4_mb_seq_history_fops, sb);
2186 proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, 2187 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2187 &ext4_mb_seq_groups_fops, sb); 2188 &ext4_mb_seq_groups_fops, sb);
2188 } 2189 }
2189 2190
@@ -2299,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2299 } 2300 }
2300 2301
2301 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2303 meta_group_info[i]->bb_free_root.rb_node = NULL;;
2302 2304
2303#ifdef DOUBLE_CHECK 2305#ifdef DOUBLE_CHECK
2304 { 2306 {
@@ -2485,19 +2487,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2485 unsigned max; 2487 unsigned max;
2486 int ret; 2488 int ret;
2487 2489
2488 if (!test_opt(sb, MBALLOC))
2489 return 0;
2490
2491 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); 2490 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short);
2492 2491
2493 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); 2492 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2494 if (sbi->s_mb_offsets == NULL) { 2493 if (sbi->s_mb_offsets == NULL) {
2495 clear_opt(sbi->s_mount_opt, MBALLOC);
2496 return -ENOMEM; 2494 return -ENOMEM;
2497 } 2495 }
2498 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2496 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2499 if (sbi->s_mb_maxs == NULL) { 2497 if (sbi->s_mb_maxs == NULL) {
2500 clear_opt(sbi->s_mount_opt, MBALLOC);
2501 kfree(sbi->s_mb_maxs); 2498 kfree(sbi->s_mb_maxs);
2502 return -ENOMEM; 2499 return -ENOMEM;
2503 } 2500 }
@@ -2520,16 +2517,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2520 /* init file for buddy data */ 2517 /* init file for buddy data */
2521 ret = ext4_mb_init_backend(sb); 2518 ret = ext4_mb_init_backend(sb);
2522 if (ret != 0) { 2519 if (ret != 0) {
2523 clear_opt(sbi->s_mount_opt, MBALLOC);
2524 kfree(sbi->s_mb_offsets); 2520 kfree(sbi->s_mb_offsets);
2525 kfree(sbi->s_mb_maxs); 2521 kfree(sbi->s_mb_maxs);
2526 return ret; 2522 return ret;
2527 } 2523 }
2528 2524
2529 spin_lock_init(&sbi->s_md_lock); 2525 spin_lock_init(&sbi->s_md_lock);
2530 INIT_LIST_HEAD(&sbi->s_active_transaction);
2531 INIT_LIST_HEAD(&sbi->s_closed_transaction);
2532 INIT_LIST_HEAD(&sbi->s_committed_transaction);
2533 spin_lock_init(&sbi->s_bal_lock); 2526 spin_lock_init(&sbi->s_bal_lock);
2534 2527
2535 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; 2528 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
@@ -2540,17 +2533,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2540 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; 2533 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2541 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2534 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2542 2535
2543 i = sizeof(struct ext4_locality_group) * nr_cpu_ids; 2536 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2544 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
2545 if (sbi->s_locality_groups == NULL) { 2537 if (sbi->s_locality_groups == NULL) {
2546 clear_opt(sbi->s_mount_opt, MBALLOC);
2547 kfree(sbi->s_mb_offsets); 2538 kfree(sbi->s_mb_offsets);
2548 kfree(sbi->s_mb_maxs); 2539 kfree(sbi->s_mb_maxs);
2549 return -ENOMEM; 2540 return -ENOMEM;
2550 } 2541 }
2551 for (i = 0; i < nr_cpu_ids; i++) { 2542 for_each_possible_cpu(i) {
2552 struct ext4_locality_group *lg; 2543 struct ext4_locality_group *lg;
2553 lg = &sbi->s_locality_groups[i]; 2544 lg = per_cpu_ptr(sbi->s_locality_groups, i);
2554 mutex_init(&lg->lg_mutex); 2545 mutex_init(&lg->lg_mutex);
2555 for (j = 0; j < PREALLOC_TB_SIZE; j++) 2546 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2556 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); 2547 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
@@ -2560,7 +2551,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2560 ext4_mb_init_per_dev_proc(sb); 2551 ext4_mb_init_per_dev_proc(sb);
2561 ext4_mb_history_init(sb); 2552 ext4_mb_history_init(sb);
2562 2553
2563 printk("EXT4-fs: mballoc enabled\n"); 2554 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2555
2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2564 return 0; 2557 return 0;
2565} 2558}
2566 2559
@@ -2575,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2575 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); 2568 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2576 list_del(&pa->pa_group_list); 2569 list_del(&pa->pa_group_list);
2577 count++; 2570 count++;
2578 kfree(pa); 2571 kmem_cache_free(ext4_pspace_cachep, pa);
2579 } 2572 }
2580 if (count) 2573 if (count)
2581 mb_debug("mballoc: %u PAs left\n", count); 2574 mb_debug("mballoc: %u PAs left\n", count);
@@ -2589,18 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
2589 struct ext4_group_info *grinfo; 2582 struct ext4_group_info *grinfo;
2590 struct ext4_sb_info *sbi = EXT4_SB(sb); 2583 struct ext4_sb_info *sbi = EXT4_SB(sb);
2591 2584
2592 if (!test_opt(sb, MBALLOC))
2593 return 0;
2594
2595 /* release freed, non-committed blocks */
2596 spin_lock(&sbi->s_md_lock);
2597 list_splice_init(&sbi->s_closed_transaction,
2598 &sbi->s_committed_transaction);
2599 list_splice_init(&sbi->s_active_transaction,
2600 &sbi->s_committed_transaction);
2601 spin_unlock(&sbi->s_md_lock);
2602 ext4_mb_free_committed_blocks(sb);
2603
2604 if (sbi->s_group_info) { 2585 if (sbi->s_group_info) {
2605 for (i = 0; i < sbi->s_groups_count; i++) { 2586 for (i = 0; i < sbi->s_groups_count; i++) {
2606 grinfo = ext4_get_group_info(sb, i); 2587 grinfo = ext4_get_group_info(sb, i);
@@ -2647,69 +2628,64 @@ int ext4_mb_release(struct super_block *sb)
2647 atomic_read(&sbi->s_mb_discarded)); 2628 atomic_read(&sbi->s_mb_discarded));
2648 } 2629 }
2649 2630
2650 kfree(sbi->s_locality_groups); 2631 free_percpu(sbi->s_locality_groups);
2651
2652 ext4_mb_history_release(sb); 2632 ext4_mb_history_release(sb);
2653 ext4_mb_destroy_per_dev_proc(sb); 2633 ext4_mb_destroy_per_dev_proc(sb);
2654 2634
2655 return 0; 2635 return 0;
2656} 2636}
2657 2637
2658static noinline_for_stack void 2638/*
2659ext4_mb_free_committed_blocks(struct super_block *sb) 2639 * This function is called by the jbd2 layer once the commit has finished,
2640 * so we know we can free the blocks that were released with that commit.
2641 */
2642static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2660{ 2643{
2661 struct ext4_sb_info *sbi = EXT4_SB(sb); 2644 struct super_block *sb = journal->j_private;
2662 int err;
2663 int i;
2664 int count = 0;
2665 int count2 = 0;
2666 struct ext4_free_metadata *md;
2667 struct ext4_buddy e4b; 2645 struct ext4_buddy e4b;
2646 struct ext4_group_info *db;
2647 int err, count = 0, count2 = 0;
2648 struct ext4_free_data *entry;
2649 ext4_fsblk_t discard_block;
2650 struct list_head *l, *ltmp;
2668 2651
2669 if (list_empty(&sbi->s_committed_transaction)) 2652 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2670 return; 2653 entry = list_entry(l, struct ext4_free_data, list);
2671
2672 /* there is committed blocks to be freed yet */
2673 do {
2674 /* get next array of blocks */
2675 md = NULL;
2676 spin_lock(&sbi->s_md_lock);
2677 if (!list_empty(&sbi->s_committed_transaction)) {
2678 md = list_entry(sbi->s_committed_transaction.next,
2679 struct ext4_free_metadata, list);
2680 list_del(&md->list);
2681 }
2682 spin_unlock(&sbi->s_md_lock);
2683
2684 if (md == NULL)
2685 break;
2686 2654
2687 mb_debug("gonna free %u blocks in group %lu (0x%p):", 2655 mb_debug("gonna free %u blocks in group %lu (0x%p):",
2688 md->num, md->group, md); 2656 entry->count, entry->group, entry);
2689 2657
2690 err = ext4_mb_load_buddy(sb, md->group, &e4b); 2658 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2691 /* we expect to find existing buddy because it's pinned */ 2659 /* we expect to find existing buddy because it's pinned */
2692 BUG_ON(err != 0); 2660 BUG_ON(err != 0);
2693 2661
2662 db = e4b.bd_info;
2694 /* there are blocks to put in buddy to make them really free */ 2663 /* there are blocks to put in buddy to make them really free */
2695 count += md->num; 2664 count += entry->count;
2696 count2++; 2665 count2++;
2697 ext4_lock_group(sb, md->group); 2666 ext4_lock_group(sb, entry->group);
2698 for (i = 0; i < md->num; i++) { 2667 /* Take it out of per group rb tree */
2699 mb_debug(" %u", md->blocks[i]); 2668 rb_erase(&entry->node, &(db->bb_free_root));
2700 mb_free_blocks(NULL, &e4b, md->blocks[i], 1); 2669 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2670
2671 if (!db->bb_free_root.rb_node) {
2672 /* No more items in the per group rb tree
2673 * balance refcounts from ext4_mb_free_metadata()
2674 */
2675 page_cache_release(e4b.bd_buddy_page);
2676 page_cache_release(e4b.bd_bitmap_page);
2701 } 2677 }
2702 mb_debug("\n"); 2678 ext4_unlock_group(sb, entry->group);
2703 ext4_unlock_group(sb, md->group); 2679 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2704 2680 + entry->start_blk
2705 /* balance refcounts from ext4_mb_free_metadata() */ 2681 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2706 page_cache_release(e4b.bd_buddy_page); 2682 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
2707 page_cache_release(e4b.bd_bitmap_page); 2683 (unsigned long long) discard_block, entry->count);
2708 2684 sb_issue_discard(sb, discard_block, entry->count);
2709 kfree(md); 2685
2686 kmem_cache_free(ext4_free_ext_cachep, entry);
2710 ext4_mb_release_desc(&e4b); 2687 ext4_mb_release_desc(&e4b);
2711 2688 }
2712 } while (md);
2713 2689
2714 mb_debug("freed %u blocks in %u structures\n", count, count2); 2690 mb_debug("freed %u blocks in %u structures\n", count, count2);
2715} 2691}
@@ -2721,119 +2697,52 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2721#define EXT4_MB_STREAM_REQ "stream_req" 2697#define EXT4_MB_STREAM_REQ "stream_req"
2722#define EXT4_MB_GROUP_PREALLOC "group_prealloc" 2698#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
2723 2699
2724
2725
2726#define MB_PROC_FOPS(name) \
2727static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \
2728{ \
2729 struct ext4_sb_info *sbi = m->private; \
2730 \
2731 seq_printf(m, "%ld\n", sbi->s_mb_##name); \
2732 return 0; \
2733} \
2734 \
2735static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
2736{ \
2737 return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
2738} \
2739 \
2740static ssize_t ext4_mb_##name##_proc_write(struct file *file, \
2741 const char __user *buf, size_t cnt, loff_t *ppos) \
2742{ \
2743 struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
2744 char str[32]; \
2745 long value; \
2746 if (cnt >= sizeof(str)) \
2747 return -EINVAL; \
2748 if (copy_from_user(str, buf, cnt)) \
2749 return -EFAULT; \
2750 value = simple_strtol(str, NULL, 0); \
2751 if (value <= 0) \
2752 return -ERANGE; \
2753 sbi->s_mb_##name = value; \
2754 return cnt; \
2755} \
2756 \
2757static const struct file_operations ext4_mb_##name##_proc_fops = { \
2758 .owner = THIS_MODULE, \
2759 .open = ext4_mb_##name##_proc_open, \
2760 .read = seq_read, \
2761 .llseek = seq_lseek, \
2762 .release = single_release, \
2763 .write = ext4_mb_##name##_proc_write, \
2764};
2765
2766MB_PROC_FOPS(stats);
2767MB_PROC_FOPS(max_to_scan);
2768MB_PROC_FOPS(min_to_scan);
2769MB_PROC_FOPS(order2_reqs);
2770MB_PROC_FOPS(stream_request);
2771MB_PROC_FOPS(group_prealloc);
2772
2773#define MB_PROC_HANDLER(name, var) \
2774do { \
2775 proc = proc_create_data(name, mode, sbi->s_mb_proc, \
2776 &ext4_mb_##var##_proc_fops, sbi); \
2777 if (proc == NULL) { \
2778 printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
2779 goto err_out; \
2780 } \
2781} while (0)
2782
2783static int ext4_mb_init_per_dev_proc(struct super_block *sb) 2700static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2784{ 2701{
2702#ifdef CONFIG_PROC_FS
2785 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; 2703 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2786 struct ext4_sb_info *sbi = EXT4_SB(sb); 2704 struct ext4_sb_info *sbi = EXT4_SB(sb);
2787 struct proc_dir_entry *proc; 2705 struct proc_dir_entry *proc;
2788 char devname[64];
2789 2706
2790 if (proc_root_ext4 == NULL) { 2707 if (sbi->s_proc == NULL)
2791 sbi->s_mb_proc = NULL;
2792 return -EINVAL; 2708 return -EINVAL;
2793 }
2794 bdevname(sb->s_bdev, devname);
2795 sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
2796
2797 MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
2798 MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
2799 MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan);
2800 MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs);
2801 MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request);
2802 MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc);
2803 2709
2710 EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
2711 EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
2712 EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
2713 EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
2714 EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
2715 EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
2804 return 0; 2716 return 0;
2805 2717
2806err_out: 2718err_out:
2807 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); 2719 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2808 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); 2720 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2809 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); 2721 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2810 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); 2722 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2811 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); 2723 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2812 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); 2724 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2813 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
2814 remove_proc_entry(devname, proc_root_ext4);
2815 sbi->s_mb_proc = NULL;
2816
2817 return -ENOMEM; 2725 return -ENOMEM;
2726#else
2727 return 0;
2728#endif
2818} 2729}
2819 2730
2820static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) 2731static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2821{ 2732{
2733#ifdef CONFIG_PROC_FS
2822 struct ext4_sb_info *sbi = EXT4_SB(sb); 2734 struct ext4_sb_info *sbi = EXT4_SB(sb);
2823 char devname[64];
2824 2735
2825 if (sbi->s_mb_proc == NULL) 2736 if (sbi->s_proc == NULL)
2826 return -EINVAL; 2737 return -EINVAL;
2827 2738
2828 bdevname(sb->s_bdev, devname); 2739 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2829 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); 2740 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2830 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); 2741 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2831 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); 2742 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2832 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); 2743 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2833 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); 2744 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2834 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); 2745#endif
2835 remove_proc_entry(devname, proc_root_ext4);
2836
2837 return 0; 2746 return 0;
2838} 2747}
2839 2748
@@ -2854,11 +2763,16 @@ int __init init_ext4_mballoc(void)
2854 kmem_cache_destroy(ext4_pspace_cachep); 2763 kmem_cache_destroy(ext4_pspace_cachep);
2855 return -ENOMEM; 2764 return -ENOMEM;
2856 } 2765 }
2857#ifdef CONFIG_PROC_FS 2766
2858 proc_root_ext4 = proc_mkdir("fs/ext4", NULL); 2767 ext4_free_ext_cachep =
2859 if (proc_root_ext4 == NULL) 2768 kmem_cache_create("ext4_free_block_extents",
2860 printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); 2769 sizeof(struct ext4_free_data),
2861#endif 2770 0, SLAB_RECLAIM_ACCOUNT, NULL);
2771 if (ext4_free_ext_cachep == NULL) {
2772 kmem_cache_destroy(ext4_pspace_cachep);
2773 kmem_cache_destroy(ext4_ac_cachep);
2774 return -ENOMEM;
2775 }
2862 return 0; 2776 return 0;
2863} 2777}
2864 2778
@@ -2867,9 +2781,7 @@ void exit_ext4_mballoc(void)
2867 /* XXX: synchronize_rcu(); */ 2781 /* XXX: synchronize_rcu(); */
2868 kmem_cache_destroy(ext4_pspace_cachep); 2782 kmem_cache_destroy(ext4_pspace_cachep);
2869 kmem_cache_destroy(ext4_ac_cachep); 2783 kmem_cache_destroy(ext4_ac_cachep);
2870#ifdef CONFIG_PROC_FS 2784 kmem_cache_destroy(ext4_free_ext_cachep);
2871 remove_proc_entry("fs/ext4", NULL);
2872#endif
2873} 2785}
2874 2786
2875 2787
@@ -2879,7 +2791,7 @@ void exit_ext4_mballoc(void)
2879 */ 2791 */
2880static noinline_for_stack int 2792static noinline_for_stack int
2881ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2793ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2882 handle_t *handle) 2794 handle_t *handle, unsigned long reserv_blks)
2883{ 2795{
2884 struct buffer_head *bitmap_bh = NULL; 2796 struct buffer_head *bitmap_bh = NULL;
2885 struct ext4_super_block *es; 2797 struct ext4_super_block *es;
@@ -2968,15 +2880,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2968 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); 2880 le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
2969 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2881 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2970 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); 2882 spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
2971 2883 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
2972 /* 2884 /*
2973 * free blocks account has already be reduced/reserved 2885 * Now reduce the dirty block count also. Should not go negative
2974 * at write_begin() time for delayed allocation
2975 * do not double accounting
2976 */ 2886 */
2977 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2887 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2978 percpu_counter_sub(&sbi->s_freeblocks_counter, 2888 /* release all the reserved blocks if non delalloc */
2979 ac->ac_b_ex.fe_len); 2889 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2890 else
2891 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2892 ac->ac_b_ex.fe_len);
2980 2893
2981 if (sbi->s_log_groups_per_flex) { 2894 if (sbi->s_log_groups_per_flex) {
2982 ext4_group_t flex_group = ext4_flex_group(sbi, 2895 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3884,7 +3797,7 @@ out:
3884 * 3797 *
3885 * FIXME!! Make sure it is valid at all the call sites 3798 * FIXME!! Make sure it is valid at all the call sites
3886 */ 3799 */
3887void ext4_mb_discard_inode_preallocations(struct inode *inode) 3800void ext4_discard_preallocations(struct inode *inode)
3888{ 3801{
3889 struct ext4_inode_info *ei = EXT4_I(inode); 3802 struct ext4_inode_info *ei = EXT4_I(inode);
3890 struct super_block *sb = inode->i_sb; 3803 struct super_block *sb = inode->i_sb;
@@ -3896,7 +3809,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
3896 struct ext4_buddy e4b; 3809 struct ext4_buddy e4b;
3897 int err; 3810 int err;
3898 3811
3899 if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { 3812 if (!S_ISREG(inode->i_mode)) {
3900 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ 3813 /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/
3901 return; 3814 return;
3902 } 3815 }
@@ -4094,8 +4007,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4094 * per cpu locality group is to reduce the contention between block 4007 * per cpu locality group is to reduce the contention between block
4095 * request from multiple CPUs. 4008 * request from multiple CPUs.
4096 */ 4009 */
4097 ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; 4010 ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id());
4098 put_cpu();
4099 4011
4100 /* we're going to use group allocation */ 4012 /* we're going to use group allocation */
4101 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; 4013 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@ -4369,33 +4281,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4369ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, 4281ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4370 struct ext4_allocation_request *ar, int *errp) 4282 struct ext4_allocation_request *ar, int *errp)
4371{ 4283{
4284 int freed;
4372 struct ext4_allocation_context *ac = NULL; 4285 struct ext4_allocation_context *ac = NULL;
4373 struct ext4_sb_info *sbi; 4286 struct ext4_sb_info *sbi;
4374 struct super_block *sb; 4287 struct super_block *sb;
4375 ext4_fsblk_t block = 0; 4288 ext4_fsblk_t block = 0;
4376 int freed; 4289 unsigned long inquota;
4377 int inquota; 4290 unsigned long reserv_blks = 0;
4378 4291
4379 sb = ar->inode->i_sb; 4292 sb = ar->inode->i_sb;
4380 sbi = EXT4_SB(sb); 4293 sbi = EXT4_SB(sb);
4381 4294
4382 if (!test_opt(sb, MBALLOC)) {
4383 block = ext4_old_new_blocks(handle, ar->inode, ar->goal,
4384 &(ar->len), errp);
4385 return block;
4386 }
4387 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { 4295 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
4388 /* 4296 /*
4389 * With delalloc we already reserved the blocks 4297 * With delalloc we already reserved the blocks
4390 */ 4298 */
4391 ar->len = ext4_has_free_blocks(sbi, ar->len); 4299 while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
4392 } 4300 /* let others to free the space */
4393 4301 yield();
4394 if (ar->len == 0) { 4302 ar->len = ar->len >> 1;
4395 *errp = -ENOSPC; 4303 }
4396 return 0; 4304 if (!ar->len) {
4305 *errp = -ENOSPC;
4306 return 0;
4307 }
4308 reserv_blks = ar->len;
4397 } 4309 }
4398
4399 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { 4310 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
4400 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4311 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4401 ar->len--; 4312 ar->len--;
@@ -4416,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4416 goto out1; 4327 goto out1;
4417 } 4328 }
4418 4329
4419 ext4_mb_poll_new_transaction(sb, handle);
4420
4421 *errp = ext4_mb_initialize_context(ac, ar); 4330 *errp = ext4_mb_initialize_context(ac, ar);
4422 if (*errp) { 4331 if (*errp) {
4423 ar->len = 0; 4332 ar->len = 0;
@@ -4441,7 +4350,7 @@ repeat:
4441 } 4350 }
4442 4351
4443 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4352 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4444 *errp = ext4_mb_mark_diskspace_used(ac, handle); 4353 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
4445 if (*errp == -EAGAIN) { 4354 if (*errp == -EAGAIN) {
4446 ac->ac_b_ex.fe_group = 0; 4355 ac->ac_b_ex.fe_group = 0;
4447 ac->ac_b_ex.fe_start = 0; 4356 ac->ac_b_ex.fe_start = 0;
@@ -4476,35 +4385,20 @@ out1:
4476 4385
4477 return block; 4386 return block;
4478} 4387}
4479static void ext4_mb_poll_new_transaction(struct super_block *sb,
4480 handle_t *handle)
4481{
4482 struct ext4_sb_info *sbi = EXT4_SB(sb);
4483 4388
4484 if (sbi->s_last_transaction == handle->h_transaction->t_tid) 4389/*
4485 return; 4390 * We can merge two free data extents only if the physical blocks
4486 4391 * are contiguous, AND the extents were freed by the same transaction,
4487 /* new transaction! time to close last one and free blocks for 4392 * AND the blocks are associated with the same group.
4488 * committed transaction. we know that only transaction can be 4393 */
4489 * active, so previos transaction can be being logged and we 4394static int can_merge(struct ext4_free_data *entry1,
4490 * know that transaction before previous is known to be already 4395 struct ext4_free_data *entry2)
4491 * logged. this means that now we may free blocks freed in all 4396{
4492 * transactions before previous one. hope I'm clear enough ... */ 4397 if ((entry1->t_tid == entry2->t_tid) &&
4493 4398 (entry1->group == entry2->group) &&
4494 spin_lock(&sbi->s_md_lock); 4399 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4495 if (sbi->s_last_transaction != handle->h_transaction->t_tid) { 4400 return 1;
4496 mb_debug("new transaction %lu, old %lu\n", 4401 return 0;
4497 (unsigned long) handle->h_transaction->t_tid,
4498 (unsigned long) sbi->s_last_transaction);
4499 list_splice_init(&sbi->s_closed_transaction,
4500 &sbi->s_committed_transaction);
4501 list_splice_init(&sbi->s_active_transaction,
4502 &sbi->s_closed_transaction);
4503 sbi->s_last_transaction = handle->h_transaction->t_tid;
4504 }
4505 spin_unlock(&sbi->s_md_lock);
4506
4507 ext4_mb_free_committed_blocks(sb);
4508} 4402}
4509 4403
4510static noinline_for_stack int 4404static noinline_for_stack int
@@ -4514,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4514 struct ext4_group_info *db = e4b->bd_info; 4408 struct ext4_group_info *db = e4b->bd_info;
4515 struct super_block *sb = e4b->bd_sb; 4409 struct super_block *sb = e4b->bd_sb;
4516 struct ext4_sb_info *sbi = EXT4_SB(sb); 4410 struct ext4_sb_info *sbi = EXT4_SB(sb);
4517 struct ext4_free_metadata *md; 4411 struct ext4_free_data *entry, *new_entry;
4518 int i; 4412 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4413 struct rb_node *parent = NULL, *new_node;
4414
4519 4415
4520 BUG_ON(e4b->bd_bitmap_page == NULL); 4416 BUG_ON(e4b->bd_bitmap_page == NULL);
4521 BUG_ON(e4b->bd_buddy_page == NULL); 4417 BUG_ON(e4b->bd_buddy_page == NULL);
4522 4418
4419 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4420 new_entry->start_blk = block;
4421 new_entry->group = group;
4422 new_entry->count = count;
4423 new_entry->t_tid = handle->h_transaction->t_tid;
4424 new_node = &new_entry->node;
4425
4523 ext4_lock_group(sb, group); 4426 ext4_lock_group(sb, group);
4524 for (i = 0; i < count; i++) { 4427 if (!*n) {
4525 md = db->bb_md_cur; 4428 /* first free block exent. We need to
4526 if (md && db->bb_tid != handle->h_transaction->t_tid) { 4429 protect buddy cache from being freed,
4527 db->bb_md_cur = NULL; 4430 * otherwise we'll refresh it from
4528 md = NULL; 4431 * on-disk bitmap and lose not-yet-available
4432 * blocks */
4433 page_cache_get(e4b->bd_buddy_page);
4434 page_cache_get(e4b->bd_bitmap_page);
4435 }
4436 while (*n) {
4437 parent = *n;
4438 entry = rb_entry(parent, struct ext4_free_data, node);
4439 if (block < entry->start_blk)
4440 n = &(*n)->rb_left;
4441 else if (block >= (entry->start_blk + entry->count))
4442 n = &(*n)->rb_right;
4443 else {
4444 ext4_error(sb, __func__,
4445 "Double free of blocks %d (%d %d)\n",
4446 block, entry->start_blk, entry->count);
4447 return 0;
4529 } 4448 }
4449 }
4530 4450
4531 if (md == NULL) { 4451 rb_link_node(new_node, parent, n);
4532 ext4_unlock_group(sb, group); 4452 rb_insert_color(new_node, &db->bb_free_root);
4533 md = kmalloc(sizeof(*md), GFP_NOFS); 4453
4534 if (md == NULL) 4454 /* Now try to see the extent can be merged to left and right */
4535 return -ENOMEM; 4455 node = rb_prev(new_node);
4536 md->num = 0; 4456 if (node) {
4537 md->group = group; 4457 entry = rb_entry(node, struct ext4_free_data, node);
4538 4458 if (can_merge(entry, new_entry)) {
4539 ext4_lock_group(sb, group); 4459 new_entry->start_blk = entry->start_blk;
4540 if (db->bb_md_cur == NULL) { 4460 new_entry->count += entry->count;
4541 spin_lock(&sbi->s_md_lock); 4461 rb_erase(node, &(db->bb_free_root));
4542 list_add(&md->list, &sbi->s_active_transaction); 4462 spin_lock(&sbi->s_md_lock);
4543 spin_unlock(&sbi->s_md_lock); 4463 list_del(&entry->list);
4544 /* protect buddy cache from being freed, 4464 spin_unlock(&sbi->s_md_lock);
4545 * otherwise we'll refresh it from 4465 kmem_cache_free(ext4_free_ext_cachep, entry);
4546 * on-disk bitmap and lose not-yet-available
4547 * blocks */
4548 page_cache_get(e4b->bd_buddy_page);
4549 page_cache_get(e4b->bd_bitmap_page);
4550 db->bb_md_cur = md;
4551 db->bb_tid = handle->h_transaction->t_tid;
4552 mb_debug("new md 0x%p for group %lu\n",
4553 md, md->group);
4554 } else {
4555 kfree(md);
4556 md = db->bb_md_cur;
4557 }
4558 } 4466 }
4467 }
4559 4468
4560 BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); 4469 node = rb_next(new_node);
4561 md->blocks[md->num] = block + i; 4470 if (node) {
4562 md->num++; 4471 entry = rb_entry(node, struct ext4_free_data, node);
4563 if (md->num == EXT4_BB_MAX_BLOCKS) { 4472 if (can_merge(new_entry, entry)) {
4564 /* no more space, put full container on a sb's list */ 4473 new_entry->count += entry->count;
4565 db->bb_md_cur = NULL; 4474 rb_erase(node, &(db->bb_free_root));
4475 spin_lock(&sbi->s_md_lock);
4476 list_del(&entry->list);
4477 spin_unlock(&sbi->s_md_lock);
4478 kmem_cache_free(ext4_free_ext_cachep, entry);
4566 } 4479 }
4567 } 4480 }
4481 /* Add the extent to transaction's private list */
4482 spin_lock(&sbi->s_md_lock);
4483 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4484 spin_unlock(&sbi->s_md_lock);
4568 ext4_unlock_group(sb, group); 4485 ext4_unlock_group(sb, group);
4569 return 0; 4486 return 0;
4570} 4487}
@@ -4592,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4592 4509
4593 *freed = 0; 4510 *freed = 0;
4594 4511
4595 ext4_mb_poll_new_transaction(sb, handle);
4596
4597 sbi = EXT4_SB(sb); 4512 sbi = EXT4_SB(sb);
4598 es = EXT4_SB(sb)->s_es; 4513 es = EXT4_SB(sb)->s_es;
4599 if (block < le32_to_cpu(es->s_first_data_block) || 4514 if (block < le32_to_cpu(es->s_first_data_block) ||