aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c180
1 files changed, 111 insertions, 69 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bba12824defa..b423a364dca3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -23,6 +23,7 @@
23 23
24#include "mballoc.h" 24#include "mballoc.h"
25#include <linux/debugfs.h> 25#include <linux/debugfs.h>
26#include <linux/slab.h>
26#include <trace/events/ext4.h> 27#include <trace/events/ext4.h>
27 28
28/* 29/*
@@ -69,7 +70,7 @@
69 * 70 *
70 * pa_lstart -> the logical start block for this prealloc space 71 * pa_lstart -> the logical start block for this prealloc space
71 * pa_pstart -> the physical start block for this prealloc space 72 * pa_pstart -> the physical start block for this prealloc space
72 * pa_len -> lenght for this prealloc space 73 * pa_len -> length for this prealloc space
73 * pa_free -> free space available in this prealloc space 74 * pa_free -> free space available in this prealloc space
74 * 75 *
75 * The inode preallocation space is used looking at the _logical_ start 76 * The inode preallocation space is used looking at the _logical_ start
@@ -142,7 +143,7 @@
142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The 143 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
143 * value of s_mb_order2_reqs can be tuned via 144 * value of s_mb_order2_reqs can be tuned via
144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to 145 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
145 * stripe size (sbi->s_stripe), we try to search for contigous block in 146 * stripe size (sbi->s_stripe), we try to search for contiguous block in
146 * stripe size. This should result in better allocation on RAID setups. If 147 * stripe size. This should result in better allocation on RAID setups. If
147 * not, we search in the specific group using bitmap for best extents. The 148 * not, we search in the specific group using bitmap for best extents. The
148 * tunable min_to_scan and max_to_scan control the behaviour here. 149 * tunable min_to_scan and max_to_scan control the behaviour here.
@@ -441,10 +442,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
441 for (i = 0; i < count; i++) { 442 for (i = 0; i < count; i++) {
442 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { 443 if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
443 ext4_fsblk_t blocknr; 444 ext4_fsblk_t blocknr;
444 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); 445
446 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
445 blocknr += first + i; 447 blocknr += first + i;
446 blocknr +=
447 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
448 ext4_grp_locked_error(sb, e4b->bd_group, 448 ext4_grp_locked_error(sb, e4b->bd_group,
449 __func__, "double-free of inode" 449 __func__, "double-free of inode"
450 " %lu's block %llu(bit %u in group %u)", 450 " %lu's block %llu(bit %u in group %u)",
@@ -1255,10 +1255,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1255 1255
1256 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { 1256 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
1257 ext4_fsblk_t blocknr; 1257 ext4_fsblk_t blocknr;
1258 blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb); 1258
1259 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1259 blocknr += block; 1260 blocknr += block;
1260 blocknr +=
1261 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
1262 ext4_grp_locked_error(sb, e4b->bd_group, 1261 ext4_grp_locked_error(sb, e4b->bd_group,
1263 __func__, "double-free of inode" 1262 __func__, "double-free of inode"
1264 " %lu's block %llu(bit %u in group %u)", 1263 " %lu's block %llu(bit %u in group %u)",
@@ -1631,7 +1630,6 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1631 int max; 1630 int max;
1632 int err; 1631 int err;
1633 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 1632 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1634 struct ext4_super_block *es = sbi->s_es;
1635 struct ext4_free_extent ex; 1633 struct ext4_free_extent ex;
1636 1634
1637 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) 1635 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
@@ -1648,8 +1646,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1648 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { 1646 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
1649 ext4_fsblk_t start; 1647 ext4_fsblk_t start;
1650 1648
1651 start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + 1649 start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
1652 ex.fe_start + le32_to_cpu(es->s_first_data_block); 1650 ex.fe_start;
1653 /* use do_div to get remainder (would be 64-bit modulo) */ 1651 /* use do_div to get remainder (would be 64-bit modulo) */
1654 if (do_div(start, sbi->s_stripe) == 0) { 1652 if (do_div(start, sbi->s_stripe) == 0) {
1655 ac->ac_found++; 1653 ac->ac_found++;
@@ -1803,8 +1801,8 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1803 BUG_ON(sbi->s_stripe == 0); 1801 BUG_ON(sbi->s_stripe == 0);
1804 1802
1805 /* find first stripe-aligned block in group */ 1803 /* find first stripe-aligned block in group */
1806 first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) 1804 first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
1807 + le32_to_cpu(sbi->s_es->s_first_data_block); 1805
1808 a = first_group_block + sbi->s_stripe - 1; 1806 a = first_group_block + sbi->s_stripe - 1;
1809 do_div(a, sbi->s_stripe); 1807 do_div(a, sbi->s_stripe);
1810 i = (a * sbi->s_stripe) - first_group_block; 1808 i = (a * sbi->s_stripe) - first_group_block;
@@ -2256,7 +2254,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2256 2254
2257 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2255 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2258 init_rwsem(&meta_group_info[i]->alloc_sem); 2256 init_rwsem(&meta_group_info[i]->alloc_sem);
2259 meta_group_info[i]->bb_free_root.rb_node = NULL; 2257 meta_group_info[i]->bb_free_root = RB_ROOT;
2260 2258
2261#ifdef DOUBLE_CHECK 2259#ifdef DOUBLE_CHECK
2262 { 2260 {
@@ -2529,7 +2527,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2529 struct ext4_group_info *db; 2527 struct ext4_group_info *db;
2530 int err, count = 0, count2 = 0; 2528 int err, count = 0, count2 = 0;
2531 struct ext4_free_data *entry; 2529 struct ext4_free_data *entry;
2532 ext4_fsblk_t discard_block;
2533 struct list_head *l, *ltmp; 2530 struct list_head *l, *ltmp;
2534 2531
2535 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2532 list_for_each_safe(l, ltmp, &txn->t_private_list) {
@@ -2538,6 +2535,17 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2538 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2535 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2539 entry->count, entry->group, entry); 2536 entry->count, entry->group, entry);
2540 2537
2538 if (test_opt(sb, DISCARD)) {
2539 ext4_fsblk_t discard_block;
2540
2541 discard_block = entry->start_blk +
2542 ext4_group_first_block_no(sb, entry->group);
2543 trace_ext4_discard_blocks(sb,
2544 (unsigned long long)discard_block,
2545 entry->count);
2546 sb_issue_discard(sb, discard_block, entry->count);
2547 }
2548
2541 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2549 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2542 /* we expect to find existing buddy because it's pinned */ 2550 /* we expect to find existing buddy because it's pinned */
2543 BUG_ON(err != 0); 2551 BUG_ON(err != 0);
@@ -2559,13 +2567,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2559 page_cache_release(e4b.bd_bitmap_page); 2567 page_cache_release(e4b.bd_bitmap_page);
2560 } 2568 }
2561 ext4_unlock_group(sb, entry->group); 2569 ext4_unlock_group(sb, entry->group);
2562 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2563 + entry->start_blk
2564 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2565 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
2566 entry->count);
2567 sb_issue_discard(sb, discard_block, entry->count);
2568
2569 kmem_cache_free(ext4_free_ext_cachep, entry); 2570 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b); 2571 ext4_mb_release_desc(&e4b);
2571 } 2572 }
@@ -2698,14 +2699,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2698 if (err) 2699 if (err)
2699 goto out_err; 2700 goto out_err;
2700 2701
2701 block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) 2702 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2702 + ac->ac_b_ex.fe_start
2703 + le32_to_cpu(es->s_first_data_block);
2704 2703
2705 len = ac->ac_b_ex.fe_len; 2704 len = ac->ac_b_ex.fe_len;
2706 if (!ext4_data_block_valid(sbi, block, len)) { 2705 if (!ext4_data_block_valid(sbi, block, len)) {
2707 ext4_error(sb, __func__, 2706 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2708 "Allocating blocks %llu-%llu which overlap "
2709 "fs metadata\n", block, block+len); 2707 "fs metadata\n", block, block+len);
2710 /* File system mounted not to panic on error 2708 /* File system mounted not to panic on error
2711 * Fix the bitmap and repeat the block allocation 2709 * Fix the bitmap and repeat the block allocation
@@ -2750,12 +2748,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2750 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2748 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2751 /* release all the reserved blocks if non delalloc */ 2749 /* release all the reserved blocks if non delalloc */
2752 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2750 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2753 else {
2754 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2755 ac->ac_b_ex.fe_len);
2756 /* convert reserved quota blocks to real quota blocks */
2757 vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
2758 }
2759 2751
2760 if (sbi->s_log_groups_per_flex) { 2752 if (sbi->s_log_groups_per_flex) {
2761 ext4_group_t flex_group = ext4_flex_group(sbi, 2753 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3006,6 +2998,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3006} 2998}
3007 2999
3008/* 3000/*
3001 * Called on failure; free up any blocks from the inode PA for this
3002 * context. We don't need this for MB_GROUP_PA because we only change
3003 * pa_free in ext4_mb_release_context(), but on failure, we've already
3004 * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
3005 */
3006static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3007{
3008 struct ext4_prealloc_space *pa = ac->ac_pa;
3009 int len;
3010
3011 if (pa && pa->pa_type == MB_INODE_PA) {
3012 len = ac->ac_b_ex.fe_len;
3013 pa->pa_free += len;
3014 }
3015
3016}
3017
3018/*
3009 * use blocks preallocated to inode 3019 * use blocks preallocated to inode
3010 */ 3020 */
3011static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3021static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
@@ -3144,9 +3154,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3144 /* The max size of hash table is PREALLOC_TB_SIZE */ 3154 /* The max size of hash table is PREALLOC_TB_SIZE */
3145 order = PREALLOC_TB_SIZE - 1; 3155 order = PREALLOC_TB_SIZE - 1;
3146 3156
3147 goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + 3157 goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
3148 ac->ac_g_ex.fe_start +
3149 le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
3150 /* 3158 /*
3151 * search for the prealloc space that is having 3159 * search for the prealloc space that is having
3152 * minimal distance from the goal block. 3160 * minimal distance from the goal block.
@@ -3509,8 +3517,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3509 if (bit >= end) 3517 if (bit >= end)
3510 break; 3518 break;
3511 next = mb_find_next_bit(bitmap_bh->b_data, end, bit); 3519 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3512 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + 3520 start = ext4_group_first_block_no(sb, group) + bit;
3513 le32_to_cpu(sbi->s_es->s_first_data_block);
3514 mb_debug(1, " free preallocated %u/%u in group %u\n", 3521 mb_debug(1, " free preallocated %u/%u in group %u\n",
3515 (unsigned) start, (unsigned) next - bit, 3522 (unsigned) start, (unsigned) next - bit,
3516 (unsigned) group); 3523 (unsigned) group);
@@ -3606,15 +3613,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3606 3613
3607 bitmap_bh = ext4_read_block_bitmap(sb, group); 3614 bitmap_bh = ext4_read_block_bitmap(sb, group);
3608 if (bitmap_bh == NULL) { 3615 if (bitmap_bh == NULL) {
3609 ext4_error(sb, __func__, "Error in reading block " 3616 ext4_error(sb, "Error reading block bitmap for %u", group);
3610 "bitmap for %u", group);
3611 return 0; 3617 return 0;
3612 } 3618 }
3613 3619
3614 err = ext4_mb_load_buddy(sb, group, &e4b); 3620 err = ext4_mb_load_buddy(sb, group, &e4b);
3615 if (err) { 3621 if (err) {
3616 ext4_error(sb, __func__, "Error in loading buddy " 3622 ext4_error(sb, "Error loading buddy information for %u", group);
3617 "information for %u", group);
3618 put_bh(bitmap_bh); 3623 put_bh(bitmap_bh);
3619 return 0; 3624 return 0;
3620 } 3625 }
@@ -3787,15 +3792,15 @@ repeat:
3787 3792
3788 err = ext4_mb_load_buddy(sb, group, &e4b); 3793 err = ext4_mb_load_buddy(sb, group, &e4b);
3789 if (err) { 3794 if (err) {
3790 ext4_error(sb, __func__, "Error in loading buddy " 3795 ext4_error(sb, "Error loading buddy information for %u",
3791 "information for %u", group); 3796 group);
3792 continue; 3797 continue;
3793 } 3798 }
3794 3799
3795 bitmap_bh = ext4_read_block_bitmap(sb, group); 3800 bitmap_bh = ext4_read_block_bitmap(sb, group);
3796 if (bitmap_bh == NULL) { 3801 if (bitmap_bh == NULL) {
3797 ext4_error(sb, __func__, "Error in reading block " 3802 ext4_error(sb, "Error reading block bitmap for %u",
3798 "bitmap for %u", group); 3803 group);
3799 ext4_mb_release_desc(&e4b); 3804 ext4_mb_release_desc(&e4b);
3800 continue; 3805 continue;
3801 } 3806 }
@@ -3921,7 +3926,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3921 3926
3922 /* don't use group allocation for large files */ 3927 /* don't use group allocation for large files */
3923 size = max(size, isize); 3928 size = max(size, isize);
3924 if (size >= sbi->s_mb_stream_request) { 3929 if (size > sbi->s_mb_stream_request) {
3925 ac->ac_flags |= EXT4_MB_STREAM_ALLOC; 3930 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
3926 return; 3931 return;
3927 } 3932 }
@@ -3932,7 +3937,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3932 * per cpu locality group is to reduce the contention between block 3937 * per cpu locality group is to reduce the contention between block
3933 * request from multiple CPUs. 3938 * request from multiple CPUs.
3934 */ 3939 */
3935 ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); 3940 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
3936 3941
3937 /* we're going to use group allocation */ 3942 /* we're going to use group allocation */
3938 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; 3943 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@ -4060,8 +4065,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4060 4065
4061 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 4066 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4062 if (ext4_mb_load_buddy(sb, group, &e4b)) { 4067 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4063 ext4_error(sb, __func__, "Error in loading buddy " 4068 ext4_error(sb, "Error loading buddy information for %u",
4064 "information for %u", group); 4069 group);
4065 continue; 4070 continue;
4066 } 4071 }
4067 ext4_lock_group(sb, group); 4072 ext4_lock_group(sb, group);
@@ -4237,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4237 return 0; 4242 return 0;
4238 } 4243 }
4239 reserv_blks = ar->len; 4244 reserv_blks = ar->len;
4240 while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) { 4245 while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
4241 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4246 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4242 ar->len--; 4247 ar->len--;
4243 } 4248 }
@@ -4290,6 +4295,7 @@ repeat:
4290 ac->ac_status = AC_STATUS_CONTINUE; 4295 ac->ac_status = AC_STATUS_CONTINUE;
4291 goto repeat; 4296 goto repeat;
4292 } else if (*errp) { 4297 } else if (*errp) {
4298 ext4_discard_allocated_blocks(ac);
4293 ac->ac_b_ex.fe_len = 0; 4299 ac->ac_b_ex.fe_len = 0;
4294 ar->len = 0; 4300 ar->len = 0;
4295 ext4_mb_show_ac(ac); 4301 ext4_mb_show_ac(ac);
@@ -4313,7 +4319,7 @@ out2:
4313 kmem_cache_free(ext4_ac_cachep, ac); 4319 kmem_cache_free(ext4_ac_cachep, ac);
4314out1: 4320out1:
4315 if (inquota && ar->len < inquota) 4321 if (inquota && ar->len < inquota)
4316 vfs_dq_free_block(ar->inode, inquota - ar->len); 4322 dquot_free_block(ar->inode, inquota - ar->len);
4317out3: 4323out3:
4318 if (!ar->len) { 4324 if (!ar->len) {
4319 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) 4325 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
@@ -4422,18 +4428,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 return 0; 4428 return 0;
4423} 4429}
4424 4430
4425/* 4431/**
4426 * Main entry point into mballoc to free blocks 4432 * ext4_free_blocks() -- Free given blocks and update quota
4433 * @handle: handle for this transaction
4434 * @inode: inode
4435 * @block: start physical block to free
4436 * @count: number of blocks to count
4437 * @metadata: Are these metadata blocks
4427 */ 4438 */
4428void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, 4439void ext4_free_blocks(handle_t *handle, struct inode *inode,
4429 ext4_fsblk_t block, unsigned long count, 4440 struct buffer_head *bh, ext4_fsblk_t block,
4430 int metadata, unsigned long *freed) 4441 unsigned long count, int flags)
4431{ 4442{
4432 struct buffer_head *bitmap_bh = NULL; 4443 struct buffer_head *bitmap_bh = NULL;
4433 struct super_block *sb = inode->i_sb; 4444 struct super_block *sb = inode->i_sb;
4434 struct ext4_allocation_context *ac = NULL; 4445 struct ext4_allocation_context *ac = NULL;
4435 struct ext4_group_desc *gdp; 4446 struct ext4_group_desc *gdp;
4436 struct ext4_super_block *es; 4447 struct ext4_super_block *es;
4448 unsigned long freed = 0;
4437 unsigned int overflow; 4449 unsigned int overflow;
4438 ext4_grpblk_t bit; 4450 ext4_grpblk_t bit;
4439 struct buffer_head *gd_bh; 4451 struct buffer_head *gd_bh;
@@ -4443,21 +4455,49 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4443 int err = 0; 4455 int err = 0;
4444 int ret; 4456 int ret;
4445 4457
4446 *freed = 0; 4458 if (bh) {
4459 if (block)
4460 BUG_ON(block != bh->b_blocknr);
4461 else
4462 block = bh->b_blocknr;
4463 }
4447 4464
4448 sbi = EXT4_SB(sb); 4465 sbi = EXT4_SB(sb);
4449 es = EXT4_SB(sb)->s_es; 4466 es = EXT4_SB(sb)->s_es;
4450 if (block < le32_to_cpu(es->s_first_data_block) || 4467 if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
4451 block + count < block || 4468 !ext4_data_block_valid(sbi, block, count)) {
4452 block + count > ext4_blocks_count(es)) { 4469 ext4_error(sb, "Freeing blocks not in datazone - "
4453 ext4_error(sb, __func__, 4470 "block = %llu, count = %lu", block, count);
4454 "Freeing blocks not in datazone - "
4455 "block = %llu, count = %lu", block, count);
4456 goto error_return; 4471 goto error_return;
4457 } 4472 }
4458 4473
4459 ext4_debug("freeing block %llu\n", block); 4474 ext4_debug("freeing block %llu\n", block);
4460 trace_ext4_free_blocks(inode, block, count, metadata); 4475 trace_ext4_free_blocks(inode, block, count, flags);
4476
4477 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4478 struct buffer_head *tbh = bh;
4479 int i;
4480
4481 BUG_ON(bh && (count > 1));
4482
4483 for (i = 0; i < count; i++) {
4484 if (!bh)
4485 tbh = sb_find_get_block(inode->i_sb,
4486 block + i);
4487 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4488 inode, tbh, block + i);
4489 }
4490 }
4491
4492 /*
4493 * We need to make sure we don't reuse the freed block until
4494 * after the transaction is committed, which we can do by
4495 * treating the block as metadata, below. We make an
4496 * exception if the inode is to be written in writeback mode
4497 * since writeback mode has weak data consistency guarantees.
4498 */
4499 if (!ext4_should_writeback_data(inode))
4500 flags |= EXT4_FREE_BLOCKS_METADATA;
4461 4501
4462 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4502 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4463 if (ac) { 4503 if (ac) {
@@ -4495,8 +4535,7 @@ do_more:
4495 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4535 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4496 EXT4_SB(sb)->s_itb_per_group)) { 4536 EXT4_SB(sb)->s_itb_per_group)) {
4497 4537
4498 ext4_error(sb, __func__, 4538 ext4_error(sb, "Freeing blocks in system zone - "
4499 "Freeing blocks in system zone - "
4500 "Block = %llu, count = %lu", block, count); 4539 "Block = %llu, count = %lu", block, count);
4501 /* err = 0. ext4_std_error should be a no op */ 4540 /* err = 0. ext4_std_error should be a no op */
4502 goto error_return; 4541 goto error_return;
@@ -4533,7 +4572,8 @@ do_more:
4533 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4572 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4534 if (err) 4573 if (err)
4535 goto error_return; 4574 goto error_return;
4536 if (metadata && ext4_handle_valid(handle)) { 4575
4576 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4537 struct ext4_free_data *new_entry; 4577 struct ext4_free_data *new_entry;
4538 /* 4578 /*
4539 * blocks being freed are metadata. these blocks shouldn't 4579 * blocks being freed are metadata. these blocks shouldn't
@@ -4572,7 +4612,7 @@ do_more:
4572 4612
4573 ext4_mb_release_desc(&e4b); 4613 ext4_mb_release_desc(&e4b);
4574 4614
4575 *freed += count; 4615 freed += count;
4576 4616
4577 /* We dirtied the bitmap block */ 4617 /* We dirtied the bitmap block */
4578 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4618 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -4592,6 +4632,8 @@ do_more:
4592 } 4632 }
4593 sb->s_dirt = 1; 4633 sb->s_dirt = 1;
4594error_return: 4634error_return:
4635 if (freed)
4636 dquot_free_block(inode, freed);
4595 brelse(bitmap_bh); 4637 brelse(bitmap_bh);
4596 ext4_std_error(sb, err); 4638 ext4_std_error(sb, err);
4597 if (ac) 4639 if (ac)