aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c164
1 files changed, 104 insertions, 60 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 76e5fedc0a0b..dd0fcfcb35ce 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -420,6 +420,7 @@
420#define MB_DEFAULT_GROUP_PREALLOC 512 420#define MB_DEFAULT_GROUP_PREALLOC 512
421 421
422static struct kmem_cache *ext4_pspace_cachep; 422static struct kmem_cache *ext4_pspace_cachep;
423static struct kmem_cache *ext4_ac_cachep;
423 424
424#ifdef EXT4_BB_MAX_BLOCKS 425#ifdef EXT4_BB_MAX_BLOCKS
425#undef EXT4_BB_MAX_BLOCKS 426#undef EXT4_BB_MAX_BLOCKS
@@ -680,7 +681,6 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
680{ 681{
681 char *bb; 682 char *bb;
682 683
683 /* FIXME!! is this needed */
684 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 684 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b));
685 BUG_ON(max == NULL); 685 BUG_ON(max == NULL);
686 686
@@ -964,7 +964,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
964 grp->bb_fragments = fragments; 964 grp->bb_fragments = fragments;
965 965
966 if (free != grp->bb_free) { 966 if (free != grp->bb_free) {
967 printk(KERN_DEBUG 967 ext4_error(sb, __FUNCTION__,
968 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", 968 "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
969 group, free, grp->bb_free); 969 group, free, grp->bb_free);
970 grp->bb_free = free; 970 grp->bb_free = free;
@@ -1821,13 +1821,24 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1821 i = ext4_find_next_zero_bit(bitmap, 1821 i = ext4_find_next_zero_bit(bitmap,
1822 EXT4_BLOCKS_PER_GROUP(sb), i); 1822 EXT4_BLOCKS_PER_GROUP(sb), i);
1823 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { 1823 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) {
1824 BUG_ON(free != 0); 1824 /*
1825 * IF we corrupt the bitmap we won't find any
1826 * free blocks even though group info says we
1827 * we have free blocks
1828 */
1829 ext4_error(sb, __FUNCTION__, "%d free blocks as per "
1830 "group info. But bitmap says 0\n",
1831 free);
1825 break; 1832 break;
1826 } 1833 }
1827 1834
1828 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1835 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
1829 BUG_ON(ex.fe_len <= 0); 1836 BUG_ON(ex.fe_len <= 0);
1830 BUG_ON(free < ex.fe_len); 1837 if (free < ex.fe_len) {
1838 ext4_error(sb, __FUNCTION__, "%d free blocks as per "
1839 "group info. But got %d blocks\n",
1840 free, ex.fe_len);
1841 }
1831 1842
1832 ext4_mb_measure_extent(ac, &ex, e4b); 1843 ext4_mb_measure_extent(ac, &ex, e4b);
1833 1844
@@ -2959,12 +2970,19 @@ int __init init_ext4_mballoc(void)
2959 if (ext4_pspace_cachep == NULL) 2970 if (ext4_pspace_cachep == NULL)
2960 return -ENOMEM; 2971 return -ENOMEM;
2961 2972
2973 ext4_ac_cachep =
2974 kmem_cache_create("ext4_alloc_context",
2975 sizeof(struct ext4_allocation_context),
2976 0, SLAB_RECLAIM_ACCOUNT, NULL);
2977 if (ext4_ac_cachep == NULL) {
2978 kmem_cache_destroy(ext4_pspace_cachep);
2979 return -ENOMEM;
2980 }
2962#ifdef CONFIG_PROC_FS 2981#ifdef CONFIG_PROC_FS
2963 proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); 2982 proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs);
2964 if (proc_root_ext4 == NULL) 2983 if (proc_root_ext4 == NULL)
2965 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); 2984 printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT);
2966#endif 2985#endif
2967
2968 return 0; 2986 return 0;
2969} 2987}
2970 2988
@@ -2972,6 +2990,7 @@ void exit_ext4_mballoc(void)
2972{ 2990{
2973 /* XXX: synchronize_rcu(); */ 2991 /* XXX: synchronize_rcu(); */
2974 kmem_cache_destroy(ext4_pspace_cachep); 2992 kmem_cache_destroy(ext4_pspace_cachep);
2993 kmem_cache_destroy(ext4_ac_cachep);
2975#ifdef CONFIG_PROC_FS 2994#ifdef CONFIG_PROC_FS
2976 remove_proc_entry(EXT4_ROOT, proc_root_fs); 2995 remove_proc_entry(EXT4_ROOT, proc_root_fs);
2977#endif 2996#endif
@@ -3069,7 +3088,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3069 3088
3070out_err: 3089out_err:
3071 sb->s_dirt = 1; 3090 sb->s_dirt = 1;
3072 put_bh(bitmap_bh); 3091 brelse(bitmap_bh);
3073 return err; 3092 return err;
3074} 3093}
3075 3094
@@ -3354,13 +3373,10 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3354 ac->ac_pa = pa; 3373 ac->ac_pa = pa;
3355 3374
3356 /* we don't correct pa_pstart or pa_plen here to avoid 3375 /* we don't correct pa_pstart or pa_plen here to avoid
3357 * possible race when tte group is being loaded concurrently 3376 * possible race when the group is being loaded concurrently
3358 * instead we correct pa later, after blocks are marked 3377 * instead we correct pa later, after blocks are marked
3359 * in on-disk bitmap -- see ext4_mb_release_context() */ 3378 * in on-disk bitmap -- see ext4_mb_release_context()
3360 /* 3379 * Other CPUs are prevented from allocating from this pa by lg_mutex
3361 * FIXME!! but the other CPUs can look at this particular
3362 * pa and think that it have enought free blocks if we
3363 * don't update pa_free here right ?
3364 */ 3380 */
3365 mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); 3381 mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3366} 3382}
@@ -3699,7 +3715,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3699 struct buffer_head *bitmap_bh, 3715 struct buffer_head *bitmap_bh,
3700 struct ext4_prealloc_space *pa) 3716 struct ext4_prealloc_space *pa)
3701{ 3717{
3702 struct ext4_allocation_context ac; 3718 struct ext4_allocation_context *ac;
3703 struct super_block *sb = e4b->bd_sb; 3719 struct super_block *sb = e4b->bd_sb;
3704 struct ext4_sb_info *sbi = EXT4_SB(sb); 3720 struct ext4_sb_info *sbi = EXT4_SB(sb);
3705 unsigned long end; 3721 unsigned long end;
@@ -3715,9 +3731,13 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3715 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3731 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3716 end = bit + pa->pa_len; 3732 end = bit + pa->pa_len;
3717 3733
3718 ac.ac_sb = sb; 3734 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3719 ac.ac_inode = pa->pa_inode; 3735
3720 ac.ac_op = EXT4_MB_HISTORY_DISCARD; 3736 if (ac) {
3737 ac->ac_sb = sb;
3738 ac->ac_inode = pa->pa_inode;
3739 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3740 }
3721 3741
3722 while (bit < end) { 3742 while (bit < end) {
3723 bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit); 3743 bit = ext4_find_next_zero_bit(bitmap_bh->b_data, end, bit);
@@ -3733,24 +3753,28 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3733 (unsigned) group); 3753 (unsigned) group);
3734 free += next - bit; 3754 free += next - bit;
3735 3755
3736 ac.ac_b_ex.fe_group = group; 3756 if (ac) {
3737 ac.ac_b_ex.fe_start = bit; 3757 ac->ac_b_ex.fe_group = group;
3738 ac.ac_b_ex.fe_len = next - bit; 3758 ac->ac_b_ex.fe_start = bit;
3739 ac.ac_b_ex.fe_logical = 0; 3759 ac->ac_b_ex.fe_len = next - bit;
3740 ext4_mb_store_history(&ac); 3760 ac->ac_b_ex.fe_logical = 0;
3761 ext4_mb_store_history(ac);
3762 }
3741 3763
3742 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3764 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3743 bit = next + 1; 3765 bit = next + 1;
3744 } 3766 }
3745 if (free != pa->pa_free) { 3767 if (free != pa->pa_free) {
3746 printk(KERN_ERR "pa %p: logic %lu, phys. %lu, len %lu\n", 3768 printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
3747 pa, (unsigned long) pa->pa_lstart, 3769 pa, (unsigned long) pa->pa_lstart,
3748 (unsigned long) pa->pa_pstart, 3770 (unsigned long) pa->pa_pstart,
3749 (unsigned long) pa->pa_len); 3771 (unsigned long) pa->pa_len);
3750 printk(KERN_ERR "free %u, pa_free %u\n", free, pa->pa_free); 3772 ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
3773 free, pa->pa_free);
3751 } 3774 }
3752 BUG_ON(free != pa->pa_free);
3753 atomic_add(free, &sbi->s_mb_discarded); 3775 atomic_add(free, &sbi->s_mb_discarded);
3776 if (ac)
3777 kmem_cache_free(ext4_ac_cachep, ac);
3754 3778
3755 return err; 3779 return err;
3756} 3780}
@@ -3758,12 +3782,15 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
3758static int ext4_mb_release_group_pa(struct ext4_buddy *e4b, 3782static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3759 struct ext4_prealloc_space *pa) 3783 struct ext4_prealloc_space *pa)
3760{ 3784{
3761 struct ext4_allocation_context ac; 3785 struct ext4_allocation_context *ac;
3762 struct super_block *sb = e4b->bd_sb; 3786 struct super_block *sb = e4b->bd_sb;
3763 ext4_group_t group; 3787 ext4_group_t group;
3764 ext4_grpblk_t bit; 3788 ext4_grpblk_t bit;
3765 3789
3766 ac.ac_op = EXT4_MB_HISTORY_DISCARD; 3790 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3791
3792 if (ac)
3793 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3767 3794
3768 BUG_ON(pa->pa_deleted == 0); 3795 BUG_ON(pa->pa_deleted == 0);
3769 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3796 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
@@ -3771,13 +3798,16 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3771 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); 3798 mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
3772 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); 3799 atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
3773 3800
3774 ac.ac_sb = sb; 3801 if (ac) {
3775 ac.ac_inode = NULL; 3802 ac->ac_sb = sb;
3776 ac.ac_b_ex.fe_group = group; 3803 ac->ac_inode = NULL;
3777 ac.ac_b_ex.fe_start = bit; 3804 ac->ac_b_ex.fe_group = group;
3778 ac.ac_b_ex.fe_len = pa->pa_len; 3805 ac->ac_b_ex.fe_start = bit;
3779 ac.ac_b_ex.fe_logical = 0; 3806 ac->ac_b_ex.fe_len = pa->pa_len;
3780 ext4_mb_store_history(&ac); 3807 ac->ac_b_ex.fe_logical = 0;
3808 ext4_mb_store_history(ac);
3809 kmem_cache_free(ext4_ac_cachep, ac);
3810 }
3781 3811
3782 return 0; 3812 return 0;
3783} 3813}
@@ -4231,7 +4261,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
4231ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, 4261ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4232 struct ext4_allocation_request *ar, int *errp) 4262 struct ext4_allocation_request *ar, int *errp)
4233{ 4263{
4234 struct ext4_allocation_context ac; 4264 struct ext4_allocation_context *ac = NULL;
4235 struct ext4_sb_info *sbi; 4265 struct ext4_sb_info *sbi;
4236 struct super_block *sb; 4266 struct super_block *sb;
4237 ext4_fsblk_t block = 0; 4267 ext4_fsblk_t block = 0;
@@ -4257,53 +4287,60 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4257 } 4287 }
4258 inquota = ar->len; 4288 inquota = ar->len;
4259 4289
4290 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4291 if (!ac) {
4292 *errp = -ENOMEM;
4293 return 0;
4294 }
4295
4260 ext4_mb_poll_new_transaction(sb, handle); 4296 ext4_mb_poll_new_transaction(sb, handle);
4261 4297
4262 *errp = ext4_mb_initialize_context(&ac, ar); 4298 *errp = ext4_mb_initialize_context(ac, ar);
4263 if (*errp) { 4299 if (*errp) {
4264 ar->len = 0; 4300 ar->len = 0;
4265 goto out; 4301 goto out;
4266 } 4302 }
4267 4303
4268 ac.ac_op = EXT4_MB_HISTORY_PREALLOC; 4304 ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
4269 if (!ext4_mb_use_preallocated(&ac)) { 4305 if (!ext4_mb_use_preallocated(ac)) {
4270 4306
4271 ac.ac_op = EXT4_MB_HISTORY_ALLOC; 4307 ac->ac_op = EXT4_MB_HISTORY_ALLOC;
4272 ext4_mb_normalize_request(&ac, ar); 4308 ext4_mb_normalize_request(ac, ar);
4273 4309
4274repeat: 4310repeat:
4275 /* allocate space in core */ 4311 /* allocate space in core */
4276 ext4_mb_regular_allocator(&ac); 4312 ext4_mb_regular_allocator(ac);
4277 4313
4278 /* as we've just preallocated more space than 4314 /* as we've just preallocated more space than
4279 * user requested orinally, we store allocated 4315 * user requested orinally, we store allocated
4280 * space in a special descriptor */ 4316 * space in a special descriptor */
4281 if (ac.ac_status == AC_STATUS_FOUND && 4317 if (ac->ac_status == AC_STATUS_FOUND &&
4282 ac.ac_o_ex.fe_len < ac.ac_b_ex.fe_len) 4318 ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
4283 ext4_mb_new_preallocation(&ac); 4319 ext4_mb_new_preallocation(ac);
4284 } 4320 }
4285 4321
4286 if (likely(ac.ac_status == AC_STATUS_FOUND)) { 4322 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4287 ext4_mb_mark_diskspace_used(&ac, handle); 4323 ext4_mb_mark_diskspace_used(ac, handle);
4288 *errp = 0; 4324 *errp = 0;
4289 block = ext4_grp_offs_to_block(sb, &ac.ac_b_ex); 4325 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
4290 ar->len = ac.ac_b_ex.fe_len; 4326 ar->len = ac->ac_b_ex.fe_len;
4291 } else { 4327 } else {
4292 freed = ext4_mb_discard_preallocations(sb, ac.ac_o_ex.fe_len); 4328 freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
4293 if (freed) 4329 if (freed)
4294 goto repeat; 4330 goto repeat;
4295 *errp = -ENOSPC; 4331 *errp = -ENOSPC;
4296 ac.ac_b_ex.fe_len = 0; 4332 ac->ac_b_ex.fe_len = 0;
4297 ar->len = 0; 4333 ar->len = 0;
4298 ext4_mb_show_ac(&ac); 4334 ext4_mb_show_ac(ac);
4299 } 4335 }
4300 4336
4301 ext4_mb_release_context(&ac); 4337 ext4_mb_release_context(ac);
4302 4338
4303out: 4339out:
4304 if (ar->len < inquota) 4340 if (ar->len < inquota)
4305 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); 4341 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
4306 4342
4343 kmem_cache_free(ext4_ac_cachep, ac);
4307 return block; 4344 return block;
4308} 4345}
4309static void ext4_mb_poll_new_transaction(struct super_block *sb, 4346static void ext4_mb_poll_new_transaction(struct super_block *sb,
@@ -4405,9 +4442,9 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4405 unsigned long block, unsigned long count, 4442 unsigned long block, unsigned long count,
4406 int metadata, unsigned long *freed) 4443 int metadata, unsigned long *freed)
4407{ 4444{
4408 struct buffer_head *bitmap_bh = 0; 4445 struct buffer_head *bitmap_bh = NULL;
4409 struct super_block *sb = inode->i_sb; 4446 struct super_block *sb = inode->i_sb;
4410 struct ext4_allocation_context ac; 4447 struct ext4_allocation_context *ac = NULL;
4411 struct ext4_group_desc *gdp; 4448 struct ext4_group_desc *gdp;
4412 struct ext4_super_block *es; 4449 struct ext4_super_block *es;
4413 unsigned long overflow; 4450 unsigned long overflow;
@@ -4436,9 +4473,12 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4436 4473
4437 ext4_debug("freeing block %lu\n", block); 4474 ext4_debug("freeing block %lu\n", block);
4438 4475
4439 ac.ac_op = EXT4_MB_HISTORY_FREE; 4476 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4440 ac.ac_inode = inode; 4477 if (ac) {
4441 ac.ac_sb = sb; 4478 ac->ac_op = EXT4_MB_HISTORY_FREE;
4479 ac->ac_inode = inode;
4480 ac->ac_sb = sb;
4481 }
4442 4482
4443do_more: 4483do_more:
4444 overflow = 0; 4484 overflow = 0;
@@ -4504,10 +4544,12 @@ do_more:
4504 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4544 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4505 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 4545 err = ext4_journal_dirty_metadata(handle, bitmap_bh);
4506 4546
4507 ac.ac_b_ex.fe_group = block_group; 4547 if (ac) {
4508 ac.ac_b_ex.fe_start = bit; 4548 ac->ac_b_ex.fe_group = block_group;
4509 ac.ac_b_ex.fe_len = count; 4549 ac->ac_b_ex.fe_start = bit;
4510 ext4_mb_store_history(&ac); 4550 ac->ac_b_ex.fe_len = count;
4551 ext4_mb_store_history(ac);
4552 }
4511 4553
4512 if (metadata) { 4554 if (metadata) {
4513 /* blocks being freed are metadata. these blocks shouldn't 4555 /* blocks being freed are metadata. these blocks shouldn't
@@ -4548,5 +4590,7 @@ do_more:
4548error_return: 4590error_return:
4549 brelse(bitmap_bh); 4591 brelse(bitmap_bh);
4550 ext4_std_error(sb, err); 4592 ext4_std_error(sb, err);
4593 if (ac)
4594 kmem_cache_free(ext4_ac_cachep, ac);
4551 return; 4595 return;
4552} 4596}