aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c111
1 files changed, 83 insertions, 28 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index bba12824def..d34afad3e13 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -142,7 +142,7 @@
142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The 142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
143 * value of s_mb_order2_reqs can be tuned via 143 * value of s_mb_order2_reqs can be tuned via
144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to 144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
145 * stripe size (sbi->s_stripe), we try to search for contigous block in 145 * stripe size (sbi->s_stripe), we try to search for contiguous block in
146 * stripe size. This should result in better allocation on RAID setups. If 146 * stripe size. This should result in better allocation on RAID setups. If
147 * not, we search in the specific group using bitmap for best extents. The 147 * not, we search in the specific group using bitmap for best extents. The
148 * tunable min_to_scan and max_to_scan control the behaviour here. 148 * tunable min_to_scan and max_to_scan control the behaviour here.
@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2529 struct ext4_group_info *db; 2529 struct ext4_group_info *db;
2530 int err, count = 0, count2 = 0; 2530 int err, count = 0, count2 = 0;
2531 struct ext4_free_data *entry; 2531 struct ext4_free_data *entry;
2532 ext4_fsblk_t discard_block;
2533 struct list_head *l, *ltmp; 2532 struct list_head *l, *ltmp;
2534 2533
2535 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2534 list_for_each_safe(l, ltmp, &txn->t_private_list) {
@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2559 page_cache_release(e4b.bd_bitmap_page); 2558 page_cache_release(e4b.bd_bitmap_page);
2560 } 2559 }
2561 ext4_unlock_group(sb, entry->group); 2560 ext4_unlock_group(sb, entry->group);
2562 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) 2561 if (test_opt(sb, DISCARD)) {
2563 + entry->start_blk 2562 ext4_fsblk_t discard_block;
2564 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 2563 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
2565 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, 2564
2566 entry->count); 2565 discard_block = (ext4_fsblk_t)entry->group *
2567 sb_issue_discard(sb, discard_block, entry->count); 2566 EXT4_BLOCKS_PER_GROUP(sb)
2568 2567 + entry->start_blk
2568 + le32_to_cpu(es->s_first_data_block);
2569 trace_ext4_discard_blocks(sb,
2570 (unsigned long long)discard_block,
2571 entry->count);
2572 sb_issue_discard(sb, discard_block, entry->count);
2573 }
2569 kmem_cache_free(ext4_free_ext_cachep, entry); 2574 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b); 2575 ext4_mb_release_desc(&e4b);
2571 } 2576 }
@@ -2750,12 +2755,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2750 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2755 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2751 /* release all the reserved blocks if non delalloc */ 2756 /* release all the reserved blocks if non delalloc */
2752 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2757 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
2753 else {
2754 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
2755 ac->ac_b_ex.fe_len);
2756 /* convert reserved quota blocks to real quota blocks */
2757 vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
2758 }
2759 2758
2760 if (sbi->s_log_groups_per_flex) { 2759 if (sbi->s_log_groups_per_flex) {
2761 ext4_group_t flex_group = ext4_flex_group(sbi, 2760 ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3006,6 +3005,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3006} 3005}
3007 3006
3008/* 3007/*
3008 * Called on failure; free up any blocks from the inode PA for this
3009 * context. We don't need this for MB_GROUP_PA because we only change
3010 * pa_free in ext4_mb_release_context(), but on failure, we've already
3011 * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
3012 */
3013static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3014{
3015 struct ext4_prealloc_space *pa = ac->ac_pa;
3016 int len;
3017
3018 if (pa && pa->pa_type == MB_INODE_PA) {
3019 len = ac->ac_b_ex.fe_len;
3020 pa->pa_free += len;
3021 }
3022
3023}
3024
3025/*
3009 * use blocks preallocated to inode 3026 * use blocks preallocated to inode
3010 */ 3027 */
3011static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3028static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
@@ -3932,7 +3949,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3932 * per cpu locality group is to reduce the contention between block 3949 * per cpu locality group is to reduce the contention between block
3933 * request from multiple CPUs. 3950 * request from multiple CPUs.
3934 */ 3951 */
3935 ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); 3952 ac->ac_lg = __this_cpu_ptr(sbi->s_locality_groups);
3936 3953
3937 /* we're going to use group allocation */ 3954 /* we're going to use group allocation */
3938 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; 3955 ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC;
@@ -4290,6 +4307,7 @@ repeat:
4290 ac->ac_status = AC_STATUS_CONTINUE; 4307 ac->ac_status = AC_STATUS_CONTINUE;
4291 goto repeat; 4308 goto repeat;
4292 } else if (*errp) { 4309 } else if (*errp) {
4310 ext4_discard_allocated_blocks(ac);
4293 ac->ac_b_ex.fe_len = 0; 4311 ac->ac_b_ex.fe_len = 0;
4294 ar->len = 0; 4312 ar->len = 0;
4295 ext4_mb_show_ac(ac); 4313 ext4_mb_show_ac(ac);
@@ -4422,18 +4440,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 return 0; 4440 return 0;
4423} 4441}
4424 4442
4425/* 4443/**
4426 * Main entry point into mballoc to free blocks 4444 * ext4_free_blocks() -- Free given blocks and update quota
4445 * @handle: handle for this transaction
4446 * @inode: inode
4447 * @block: start physical block to free
4448 * @count: number of blocks to count
4449 * @metadata: Are these metadata blocks
4427 */ 4450 */
4428void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, 4451void ext4_free_blocks(handle_t *handle, struct inode *inode,
4429 ext4_fsblk_t block, unsigned long count, 4452 struct buffer_head *bh, ext4_fsblk_t block,
4430 int metadata, unsigned long *freed) 4453 unsigned long count, int flags)
4431{ 4454{
4432 struct buffer_head *bitmap_bh = NULL; 4455 struct buffer_head *bitmap_bh = NULL;
4433 struct super_block *sb = inode->i_sb; 4456 struct super_block *sb = inode->i_sb;
4434 struct ext4_allocation_context *ac = NULL; 4457 struct ext4_allocation_context *ac = NULL;
4435 struct ext4_group_desc *gdp; 4458 struct ext4_group_desc *gdp;
4436 struct ext4_super_block *es; 4459 struct ext4_super_block *es;
4460 unsigned long freed = 0;
4437 unsigned int overflow; 4461 unsigned int overflow;
4438 ext4_grpblk_t bit; 4462 ext4_grpblk_t bit;
4439 struct buffer_head *gd_bh; 4463 struct buffer_head *gd_bh;
@@ -4443,13 +4467,16 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4443 int err = 0; 4467 int err = 0;
4444 int ret; 4468 int ret;
4445 4469
4446 *freed = 0; 4470 if (bh) {
4471 if (block)
4472 BUG_ON(block != bh->b_blocknr);
4473 else
4474 block = bh->b_blocknr;
4475 }
4447 4476
4448 sbi = EXT4_SB(sb); 4477 sbi = EXT4_SB(sb);
4449 es = EXT4_SB(sb)->s_es; 4478 es = EXT4_SB(sb)->s_es;
4450 if (block < le32_to_cpu(es->s_first_data_block) || 4479 if (!ext4_data_block_valid(sbi, block, count)) {
4451 block + count < block ||
4452 block + count > ext4_blocks_count(es)) {
4453 ext4_error(sb, __func__, 4480 ext4_error(sb, __func__,
4454 "Freeing blocks not in datazone - " 4481 "Freeing blocks not in datazone - "
4455 "block = %llu, count = %lu", block, count); 4482 "block = %llu, count = %lu", block, count);
@@ -4457,7 +4484,32 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4457 } 4484 }
4458 4485
4459 ext4_debug("freeing block %llu\n", block); 4486 ext4_debug("freeing block %llu\n", block);
4460 trace_ext4_free_blocks(inode, block, count, metadata); 4487 trace_ext4_free_blocks(inode, block, count, flags);
4488
4489 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4490 struct buffer_head *tbh = bh;
4491 int i;
4492
4493 BUG_ON(bh && (count > 1));
4494
4495 for (i = 0; i < count; i++) {
4496 if (!bh)
4497 tbh = sb_find_get_block(inode->i_sb,
4498 block + i);
4499 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4500 inode, tbh, block + i);
4501 }
4502 }
4503
4504 /*
4505 * We need to make sure we don't reuse the freed block until
4506 * after the transaction is committed, which we can do by
4507 * treating the block as metadata, below. We make an
4508 * exception if the inode is to be written in writeback mode
4509 * since writeback mode has weak data consistency guarantees.
4510 */
4511 if (!ext4_should_writeback_data(inode))
4512 flags |= EXT4_FREE_BLOCKS_METADATA;
4461 4513
4462 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4514 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4463 if (ac) { 4515 if (ac) {
@@ -4533,7 +4585,8 @@ do_more:
4533 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4585 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4534 if (err) 4586 if (err)
4535 goto error_return; 4587 goto error_return;
4536 if (metadata && ext4_handle_valid(handle)) { 4588
4589 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4537 struct ext4_free_data *new_entry; 4590 struct ext4_free_data *new_entry;
4538 /* 4591 /*
4539 * blocks being freed are metadata. these blocks shouldn't 4592 * blocks being freed are metadata. these blocks shouldn't
@@ -4572,7 +4625,7 @@ do_more:
4572 4625
4573 ext4_mb_release_desc(&e4b); 4626 ext4_mb_release_desc(&e4b);
4574 4627
4575 *freed += count; 4628 freed += count;
4576 4629
4577 /* We dirtied the bitmap block */ 4630 /* We dirtied the bitmap block */
4578 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4631 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -4592,6 +4645,8 @@ do_more:
4592 } 4645 }
4593 sb->s_dirt = 1; 4646 sb->s_dirt = 1;
4594error_return: 4647error_return:
4648 if (freed)
4649 vfs_dq_free_block(inode, freed);
4595 brelse(bitmap_bh); 4650 brelse(bitmap_bh);
4596 ext4_std_error(sb, err); 4651 ext4_std_error(sb, err);
4597 if (ac) 4652 if (ac)