diff options
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 180 |
1 files changed, 111 insertions, 69 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index bba12824defa..b423a364dca3 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #include "mballoc.h" | 24 | #include "mballoc.h" |
25 | #include <linux/debugfs.h> | 25 | #include <linux/debugfs.h> |
26 | #include <linux/slab.h> | ||
26 | #include <trace/events/ext4.h> | 27 | #include <trace/events/ext4.h> |
27 | 28 | ||
28 | /* | 29 | /* |
@@ -69,7 +70,7 @@ | |||
69 | * | 70 | * |
70 | * pa_lstart -> the logical start block for this prealloc space | 71 | * pa_lstart -> the logical start block for this prealloc space |
71 | * pa_pstart -> the physical start block for this prealloc space | 72 | * pa_pstart -> the physical start block for this prealloc space |
72 | * pa_len -> lenght for this prealloc space | 73 | * pa_len -> length for this prealloc space |
73 | * pa_free -> free space available in this prealloc space | 74 | * pa_free -> free space available in this prealloc space |
74 | * | 75 | * |
75 | * The inode preallocation space is used looking at the _logical_ start | 76 | * The inode preallocation space is used looking at the _logical_ start |
@@ -142,7 +143,7 @@ | |||
142 | * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The | 143 | * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The |
143 | * value of s_mb_order2_reqs can be tuned via | 144 | * value of s_mb_order2_reqs can be tuned via |
144 | * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to | 145 | * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to |
145 | * stripe size (sbi->s_stripe), we try to search for contigous block in | 146 | * stripe size (sbi->s_stripe), we try to search for contiguous block in |
146 | * stripe size. This should result in better allocation on RAID setups. If | 147 | * stripe size. This should result in better allocation on RAID setups. If |
147 | * not, we search in the specific group using bitmap for best extents. The | 148 | * not, we search in the specific group using bitmap for best extents. The |
148 | * tunable min_to_scan and max_to_scan control the behaviour here. | 149 | * tunable min_to_scan and max_to_scan control the behaviour here. |
@@ -441,10 +442,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
441 | for (i = 0; i < count; i++) { | 442 | for (i = 0; i < count; i++) { |
442 | if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { | 443 | if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { |
443 | ext4_fsblk_t blocknr; | 444 | ext4_fsblk_t blocknr; |
444 | blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); | 445 | |
446 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | ||
445 | blocknr += first + i; | 447 | blocknr += first + i; |
446 | blocknr += | ||
447 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
448 | ext4_grp_locked_error(sb, e4b->bd_group, | 448 | ext4_grp_locked_error(sb, e4b->bd_group, |
449 | __func__, "double-free of inode" | 449 | __func__, "double-free of inode" |
450 | " %lu's block %llu(bit %u in group %u)", | 450 | " %lu's block %llu(bit %u in group %u)", |
@@ -1255,10 +1255,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1255 | 1255 | ||
1256 | if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { | 1256 | if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { |
1257 | ext4_fsblk_t blocknr; | 1257 | ext4_fsblk_t blocknr; |
1258 | blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); | 1258 | |
1259 | blocknr = ext4_group_first_block_no(sb, e4b->bd_group); | ||
1259 | blocknr += block; | 1260 | blocknr += block; |
1260 | blocknr += | ||
1261 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
1262 | ext4_grp_locked_error(sb, e4b->bd_group, | 1261 | ext4_grp_locked_error(sb, e4b->bd_group, |
1263 | __func__, "double-free of inode" | 1262 | __func__, "double-free of inode" |
1264 | " %lu's block %llu(bit %u in group %u)", | 1263 | " %lu's block %llu(bit %u in group %u)", |
@@ -1631,7 +1630,6 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1631 | int max; | 1630 | int max; |
1632 | int err; | 1631 | int err; |
1633 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | 1632 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); |
1634 | struct ext4_super_block *es = sbi->s_es; | ||
1635 | struct ext4_free_extent ex; | 1633 | struct ext4_free_extent ex; |
1636 | 1634 | ||
1637 | if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) | 1635 | if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) |
@@ -1648,8 +1646,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1648 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { | 1646 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { |
1649 | ext4_fsblk_t start; | 1647 | ext4_fsblk_t start; |
1650 | 1648 | ||
1651 | start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + | 1649 | start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) + |
1652 | ex.fe_start + le32_to_cpu(es->s_first_data_block); | 1650 | ex.fe_start; |
1653 | /* use do_div to get remainder (would be 64-bit modulo) */ | 1651 | /* use do_div to get remainder (would be 64-bit modulo) */ |
1654 | if (do_div(start, sbi->s_stripe) == 0) { | 1652 | if (do_div(start, sbi->s_stripe) == 0) { |
1655 | ac->ac_found++; | 1653 | ac->ac_found++; |
@@ -1803,8 +1801,8 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1803 | BUG_ON(sbi->s_stripe == 0); | 1801 | BUG_ON(sbi->s_stripe == 0); |
1804 | 1802 | ||
1805 | /* find first stripe-aligned block in group */ | 1803 | /* find first stripe-aligned block in group */ |
1806 | first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) | 1804 | first_group_block = ext4_group_first_block_no(sb, e4b->bd_group); |
1807 | + le32_to_cpu(sbi->s_es->s_first_data_block); | 1805 | |
1808 | a = first_group_block + sbi->s_stripe - 1; | 1806 | a = first_group_block + sbi->s_stripe - 1; |
1809 | do_div(a, sbi->s_stripe); | 1807 | do_div(a, sbi->s_stripe); |
1810 | i = (a * sbi->s_stripe) - first_group_block; | 1808 | i = (a * sbi->s_stripe) - first_group_block; |
@@ -2256,7 +2254,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2256 | 2254 | ||
2257 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2255 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2258 | init_rwsem(&meta_group_info[i]->alloc_sem); | 2256 | init_rwsem(&meta_group_info[i]->alloc_sem); |
2259 | meta_group_info[i]->bb_free_root.rb_node = NULL; | 2257 | meta_group_info[i]->bb_free_root = RB_ROOT; |
2260 | 2258 | ||
2261 | #ifdef DOUBLE_CHECK | 2259 | #ifdef DOUBLE_CHECK |
2262 | { | 2260 | { |
@@ -2529,7 +2527,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2529 | struct ext4_group_info *db; | 2527 | struct ext4_group_info *db; |
2530 | int err, count = 0, count2 = 0; | 2528 | int err, count = 0, count2 = 0; |
2531 | struct ext4_free_data *entry; | 2529 | struct ext4_free_data *entry; |
2532 | ext4_fsblk_t discard_block; | ||
2533 | struct list_head *l, *ltmp; | 2530 | struct list_head *l, *ltmp; |
2534 | 2531 | ||
2535 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2532 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
@@ -2538,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2538 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", | 2535 | mb_debug(1, "gonna free %u blocks in group %u (0x%p):", |
2539 | entry->count, entry->group, entry); | 2536 | entry->count, entry->group, entry); |
2540 | 2537 | ||
2538 | if (test_opt(sb, DISCARD)) { | ||
2539 | ext4_fsblk_t discard_block; | ||
2540 | |||
2541 | discard_block = entry->start_blk + | ||
2542 | ext4_group_first_block_no(sb, entry->group); | ||
2543 | trace_ext4_discard_blocks(sb, | ||
2544 | (unsigned long long)discard_block, | ||
2545 | entry->count); | ||
2546 | sb_issue_discard(sb, discard_block, entry->count); | ||
2547 | } | ||
2548 | |||
2541 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2549 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2542 | /* we expect to find existing buddy because it's pinned */ | 2550 | /* we expect to find existing buddy because it's pinned */ |
2543 | BUG_ON(err != 0); | 2551 | BUG_ON(err != 0); |
@@ -2559,13 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2559 | page_cache_release(e4b.bd_bitmap_page); | 2567 | page_cache_release(e4b.bd_bitmap_page); |
2560 | } | 2568 | } |
2561 | ext4_unlock_group(sb, entry->group); | 2569 | ext4_unlock_group(sb, entry->group); |
2562 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) | ||
2563 | + entry->start_blk | ||
2564 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | ||
2565 | trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, | ||
2566 | entry->count); | ||
2567 | sb_issue_discard(sb, discard_block, entry->count); | ||
2568 | |||
2569 | kmem_cache_free(ext4_free_ext_cachep, entry); | 2570 | kmem_cache_free(ext4_free_ext_cachep, entry); |
2570 | ext4_mb_release_desc(&e4b); | 2571 | ext4_mb_release_desc(&e4b); |
2571 | } | 2572 | } |
@@ -2698,14 +2699,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2698 | if (err) | 2699 | if (err) |
2699 | goto out_err; | 2700 | goto out_err; |
2700 | 2701 | ||
2701 | block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) | 2702 | block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); |
2702 | + ac->ac_b_ex.fe_start | ||
2703 | + le32_to_cpu(es->s_first_data_block); | ||
2704 | 2703 | ||
2705 | len = ac->ac_b_ex.fe_len; | 2704 | len = ac->ac_b_ex.fe_len; |
2706 | if (!ext4_data_block_valid(sbi, block, len)) { | 2705 | if (!ext4_data_block_valid(sbi, block, len)) { |
2707 | ext4_error(sb, __func__, | 2706 | ext4_error(sb, "Allocating blocks %llu-%llu which overlap " |
2708 | "Allocating blocks %llu-%llu which overlap " | ||
2709 | "fs metadata\n", block, block+len); | 2707 | "fs metadata\n", block, block+len); |
2710 | /* File system mounted not to panic on error | 2708 | /* File system mounted not to panic on error |
2711 | * Fix the bitmap and repeat the block allocation | 2709 | * Fix the bitmap and repeat the block allocation |
@@ -2750,12 +2748,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2750 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2748 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
2751 | /* release all the reserved blocks if non delalloc */ | 2749 | /* release all the reserved blocks if non delalloc */ |
2752 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); | 2750 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); |
2753 | else { | ||
2754 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | ||
2755 | ac->ac_b_ex.fe_len); | ||
2756 | /* convert reserved quota blocks to real quota blocks */ | ||
2757 | vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len); | ||
2758 | } | ||
2759 | 2751 | ||
2760 | if (sbi->s_log_groups_per_flex) { | 2752 | if (sbi->s_log_groups_per_flex) { |
2761 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2753 | ext4_group_t flex_group = ext4_flex_group(sbi, |
@@ -3006,6 +2998,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) | |||
3006 | } | 2998 | } |
3007 | 2999 | ||
3008 | /* | 3000 | /* |
3001 | * Called on failure; free up any blocks from the inode PA for this | ||
3002 | * context. We don't need this for MB_GROUP_PA because we only change | ||
3003 | * pa_free in ext4_mb_release_context(), but on failure, we've already | ||
3004 | * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed. | ||
3005 | */ | ||
3006 | static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) | ||
3007 | { | ||
3008 | struct ext4_prealloc_space *pa = ac->ac_pa; | ||
3009 | int len; | ||
3010 | |||
3011 | if (pa && pa->pa_type == MB_INODE_PA) { | ||
3012 | len = ac->ac_b_ex.fe_len; | ||
3013 | pa->pa_free += len; | ||
3014 | } | ||
3015 | |||
3016 | } | ||
3017 | |||
3018 | /* | ||
3009 | * use blocks preallocated to inode | 3019 | * use blocks preallocated to inode |
3010 | */ | 3020 | */ |
3011 | static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, | 3021 | static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, |
@@ -3144,9 +3154,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3144 | /* The max size of hash table is PREALLOC_TB_SIZE */ | 3154 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
3145 | order = PREALLOC_TB_SIZE - 1; | 3155 | order = PREALLOC_TB_SIZE - 1; |
3146 | 3156 | ||
3147 | goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + | 3157 | goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex); |
3148 | ac->ac_g_ex.fe_start + | ||
3149 | le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); | ||
3150 | /* | 3158 | /* |
3151 | * search for the prealloc space that is having | 3159 | * search for the prealloc space that is having |
3152 | * minimal distance from the goal block. | 3160 | * minimal distance from the goal block. |
@@ -3509,8 +3517,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3509 | if (bit >= end) | 3517 | if (bit >= end) |
3510 | break; | 3518 | break; |
3511 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); | 3519 | next = mb_find_next_bit(bitmap_bh->b_data, end, bit); |
3512 | start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + | 3520 | start = ext4_group_first_block_no(sb, group) + bit; |
3513 | le32_to_cpu(sbi->s_es->s_first_data_block); | ||
3514 | mb_debug(1, " free preallocated %u/%u in group %u\n", | 3521 | mb_debug(1, " free preallocated %u/%u in group %u\n", |
3515 | (unsigned) start, (unsigned) next - bit, | 3522 | (unsigned) start, (unsigned) next - bit, |
3516 | (unsigned) group); | 3523 | (unsigned) group); |
@@ -3606,15 +3613,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3606 | 3613 | ||
3607 | bitmap_bh = ext4_read_block_bitmap(sb, group); | 3614 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
3608 | if (bitmap_bh == NULL) { | 3615 | if (bitmap_bh == NULL) { |
3609 | ext4_error(sb, __func__, "Error in reading block " | 3616 | ext4_error(sb, "Error reading block bitmap for %u", group); |
3610 | "bitmap for %u", group); | ||
3611 | return 0; | 3617 | return 0; |
3612 | } | 3618 | } |
3613 | 3619 | ||
3614 | err = ext4_mb_load_buddy(sb, group, &e4b); | 3620 | err = ext4_mb_load_buddy(sb, group, &e4b); |
3615 | if (err) { | 3621 | if (err) { |
3616 | ext4_error(sb, __func__, "Error in loading buddy " | 3622 | ext4_error(sb, "Error loading buddy information for %u", group); |
3617 | "information for %u", group); | ||
3618 | put_bh(bitmap_bh); | 3623 | put_bh(bitmap_bh); |
3619 | return 0; | 3624 | return 0; |
3620 | } | 3625 | } |
@@ -3787,15 +3792,15 @@ repeat: | |||
3787 | 3792 | ||
3788 | err = ext4_mb_load_buddy(sb, group, &e4b); | 3793 | err = ext4_mb_load_buddy(sb, group, &e4b); |
3789 | if (err) { | 3794 | if (err) { |
3790 | ext4_error(sb, __func__, "Error in loading buddy " | 3795 | ext4_error(sb, "Error loading buddy information for %u", |
3791 | "information for %u", group); | 3796 | group); |
3792 | continue; | 3797 | continue; |
3793 | } | 3798 | } |
3794 | 3799 | ||
3795 | bitmap_bh = ext4_read_block_bitmap(sb, group); | 3800 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
3796 | if (bitmap_bh == NULL) { | 3801 | if (bitmap_bh == NULL) { |
3797 | ext4_error(sb, __func__, "Error in reading block " | 3802 | ext4_error(sb, "Error reading block bitmap for %u", |
3798 | "bitmap for %u", group); | 3803 | group); |
3799 | ext4_mb_release_desc(&e4b); | 3804 | ext4_mb_release_desc(&e4b); |
3800 | continue; | 3805 | continue; |
3801 | } | 3806 | } |
@@ -3921,7 +3926,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
3921 | 3926 | ||
3922 | /* don't use group allocation for large files */ | 3927 | /* don't use group allocation for large files */ |
3923 | size = max(size, isize); | 3928 | size = max(size, isize); |
3924 | if (size >= sbi->s_mb_stream_request) { | 3929 | if (size > sbi->s_mb_stream_request) { |
3925 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; | 3930 | ac->ac_flags |= EXT4_MB_STREAM_ALLOC; |
3926 | return; | 3931 | return; |
3927 | } | 3932 | } |
@@ -3932,7 +3937,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
3932 | * per cpu locality group is to reduce the contention between block | 3937 | * per cpu locality group is to reduce the contention between block |
3933 | * request from multiple CPUs. | 3938 | * request from multiple CPUs. |
3934 | */ | 3939 | */ |
3935 | ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); | 3940 | ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups); |
3936 | 3941 | ||
3937 | /* we're going to use group allocation */ | 3942 | /* we're going to use group allocation */ |
3938 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; | 3943 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; |
@@ -4060,8 +4065,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4060 | 4065 | ||
4061 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); | 4066 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); |
4062 | if (ext4_mb_load_buddy(sb, group, &e4b)) { | 4067 | if (ext4_mb_load_buddy(sb, group, &e4b)) { |
4063 | ext4_error(sb, __func__, "Error in loading buddy " | 4068 | ext4_error(sb, "Error loading buddy information for %u", |
4064 | "information for %u", group); | 4069 | group); |
4065 | continue; | 4070 | continue; |
4066 | } | 4071 | } |
4067 | ext4_lock_group(sb, group); | 4072 | ext4_lock_group(sb, group); |
@@ -4237,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4237 | return 0; | 4242 | return 0; |
4238 | } | 4243 | } |
4239 | reserv_blks = ar->len; | 4244 | reserv_blks = ar->len; |
4240 | while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) { | 4245 | while (ar->len && dquot_alloc_block(ar->inode, ar->len)) { |
4241 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4246 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
4242 | ar->len--; | 4247 | ar->len--; |
4243 | } | 4248 | } |
@@ -4290,6 +4295,7 @@ repeat: | |||
4290 | ac->ac_status = AC_STATUS_CONTINUE; | 4295 | ac->ac_status = AC_STATUS_CONTINUE; |
4291 | goto repeat; | 4296 | goto repeat; |
4292 | } else if (*errp) { | 4297 | } else if (*errp) { |
4298 | ext4_discard_allocated_blocks(ac); | ||
4293 | ac->ac_b_ex.fe_len = 0; | 4299 | ac->ac_b_ex.fe_len = 0; |
4294 | ar->len = 0; | 4300 | ar->len = 0; |
4295 | ext4_mb_show_ac(ac); | 4301 | ext4_mb_show_ac(ac); |
@@ -4313,7 +4319,7 @@ out2: | |||
4313 | kmem_cache_free(ext4_ac_cachep, ac); | 4319 | kmem_cache_free(ext4_ac_cachep, ac); |
4314 | out1: | 4320 | out1: |
4315 | if (inquota && ar->len < inquota) | 4321 | if (inquota && ar->len < inquota) |
4316 | vfs_dq_free_block(ar->inode, inquota - ar->len); | 4322 | dquot_free_block(ar->inode, inquota - ar->len); |
4317 | out3: | 4323 | out3: |
4318 | if (!ar->len) { | 4324 | if (!ar->len) { |
4319 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) | 4325 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) |
@@ -4422,18 +4428,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4422 | return 0; | 4428 | return 0; |
4423 | } | 4429 | } |
4424 | 4430 | ||
4425 | /* | 4431 | /** |
4426 | * Main entry point into mballoc to free blocks | 4432 | * ext4_free_blocks() -- Free given blocks and update quota |
4433 | * @handle: handle for this transaction | ||
4434 | * @inode: inode | ||
4435 | * @block: start physical block to free | ||
4436 | * @count: number of blocks to count | ||
4437 | * @metadata: Are these metadata blocks | ||
4427 | */ | 4438 | */ |
4428 | void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | 4439 | void ext4_free_blocks(handle_t *handle, struct inode *inode, |
4429 | ext4_fsblk_t block, unsigned long count, | 4440 | struct buffer_head *bh, ext4_fsblk_t block, |
4430 | int metadata, unsigned long *freed) | 4441 | unsigned long count, int flags) |
4431 | { | 4442 | { |
4432 | struct buffer_head *bitmap_bh = NULL; | 4443 | struct buffer_head *bitmap_bh = NULL; |
4433 | struct super_block *sb = inode->i_sb; | 4444 | struct super_block *sb = inode->i_sb; |
4434 | struct ext4_allocation_context *ac = NULL; | 4445 | struct ext4_allocation_context *ac = NULL; |
4435 | struct ext4_group_desc *gdp; | 4446 | struct ext4_group_desc *gdp; |
4436 | struct ext4_super_block *es; | 4447 | struct ext4_super_block *es; |
4448 | unsigned long freed = 0; | ||
4437 | unsigned int overflow; | 4449 | unsigned int overflow; |
4438 | ext4_grpblk_t bit; | 4450 | ext4_grpblk_t bit; |
4439 | struct buffer_head *gd_bh; | 4451 | struct buffer_head *gd_bh; |
@@ -4443,21 +4455,49 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4443 | int err = 0; | 4455 | int err = 0; |
4444 | int ret; | 4456 | int ret; |
4445 | 4457 | ||
4446 | *freed = 0; | 4458 | if (bh) { |
4459 | if (block) | ||
4460 | BUG_ON(block != bh->b_blocknr); | ||
4461 | else | ||
4462 | block = bh->b_blocknr; | ||
4463 | } | ||
4447 | 4464 | ||
4448 | sbi = EXT4_SB(sb); | 4465 | sbi = EXT4_SB(sb); |
4449 | es = EXT4_SB(sb)->s_es; | 4466 | es = EXT4_SB(sb)->s_es; |
4450 | if (block < le32_to_cpu(es->s_first_data_block) || | 4467 | if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && |
4451 | block + count < block || | 4468 | !ext4_data_block_valid(sbi, block, count)) { |
4452 | block + count > ext4_blocks_count(es)) { | 4469 | ext4_error(sb, "Freeing blocks not in datazone - " |
4453 | ext4_error(sb, __func__, | 4470 | "block = %llu, count = %lu", block, count); |
4454 | "Freeing blocks not in datazone - " | ||
4455 | "block = %llu, count = %lu", block, count); | ||
4456 | goto error_return; | 4471 | goto error_return; |
4457 | } | 4472 | } |
4458 | 4473 | ||
4459 | ext4_debug("freeing block %llu\n", block); | 4474 | ext4_debug("freeing block %llu\n", block); |
4460 | trace_ext4_free_blocks(inode, block, count, metadata); | 4475 | trace_ext4_free_blocks(inode, block, count, flags); |
4476 | |||
4477 | if (flags & EXT4_FREE_BLOCKS_FORGET) { | ||
4478 | struct buffer_head *tbh = bh; | ||
4479 | int i; | ||
4480 | |||
4481 | BUG_ON(bh && (count > 1)); | ||
4482 | |||
4483 | for (i = 0; i < count; i++) { | ||
4484 | if (!bh) | ||
4485 | tbh = sb_find_get_block(inode->i_sb, | ||
4486 | block + i); | ||
4487 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | ||
4488 | inode, tbh, block + i); | ||
4489 | } | ||
4490 | } | ||
4491 | |||
4492 | /* | ||
4493 | * We need to make sure we don't reuse the freed block until | ||
4494 | * after the transaction is committed, which we can do by | ||
4495 | * treating the block as metadata, below. We make an | ||
4496 | * exception if the inode is to be written in writeback mode | ||
4497 | * since writeback mode has weak data consistency guarantees. | ||
4498 | */ | ||
4499 | if (!ext4_should_writeback_data(inode)) | ||
4500 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
4461 | 4501 | ||
4462 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4502 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
4463 | if (ac) { | 4503 | if (ac) { |
@@ -4495,8 +4535,7 @@ do_more: | |||
4495 | in_range(block + count - 1, ext4_inode_table(sb, gdp), | 4535 | in_range(block + count - 1, ext4_inode_table(sb, gdp), |
4496 | EXT4_SB(sb)->s_itb_per_group)) { | 4536 | EXT4_SB(sb)->s_itb_per_group)) { |
4497 | 4537 | ||
4498 | ext4_error(sb, __func__, | 4538 | ext4_error(sb, "Freeing blocks in system zone - " |
4499 | "Freeing blocks in system zone - " | ||
4500 | "Block = %llu, count = %lu", block, count); | 4539 | "Block = %llu, count = %lu", block, count); |
4501 | /* err = 0. ext4_std_error should be a no op */ | 4540 | /* err = 0. ext4_std_error should be a no op */ |
4502 | goto error_return; | 4541 | goto error_return; |
@@ -4533,7 +4572,8 @@ do_more: | |||
4533 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4572 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4534 | if (err) | 4573 | if (err) |
4535 | goto error_return; | 4574 | goto error_return; |
4536 | if (metadata && ext4_handle_valid(handle)) { | 4575 | |
4576 | if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) { | ||
4537 | struct ext4_free_data *new_entry; | 4577 | struct ext4_free_data *new_entry; |
4538 | /* | 4578 | /* |
4539 | * blocks being freed are metadata. these blocks shouldn't | 4579 | * blocks being freed are metadata. these blocks shouldn't |
@@ -4572,7 +4612,7 @@ do_more: | |||
4572 | 4612 | ||
4573 | ext4_mb_release_desc(&e4b); | 4613 | ext4_mb_release_desc(&e4b); |
4574 | 4614 | ||
4575 | *freed += count; | 4615 | freed += count; |
4576 | 4616 | ||
4577 | /* We dirtied the bitmap block */ | 4617 | /* We dirtied the bitmap block */ |
4578 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | 4618 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
@@ -4592,6 +4632,8 @@ do_more: | |||
4592 | } | 4632 | } |
4593 | sb->s_dirt = 1; | 4633 | sb->s_dirt = 1; |
4594 | error_return: | 4634 | error_return: |
4635 | if (freed) | ||
4636 | dquot_free_block(inode, freed); | ||
4595 | brelse(bitmap_bh); | 4637 | brelse(bitmap_bh); |
4596 | ext4_std_error(sb, err); | 4638 | ext4_std_error(sb, err); |
4597 | if (ac) | 4639 | if (ac) |