aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c204
1 files changed, 80 insertions, 124 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9f61e62f435f..f871677a7984 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -46,22 +46,23 @@
46 * The allocation request involve request for multiple number of blocks 46 * The allocation request involve request for multiple number of blocks
47 * near to the goal(block) value specified. 47 * near to the goal(block) value specified.
48 * 48 *
49 * During initialization phase of the allocator we decide to use the group 49 * During initialization phase of the allocator we decide to use the
50 * preallocation or inode preallocation depending on the size file. The 50 * group preallocation or inode preallocation depending on the size of
51 * size of the file could be the resulting file size we would have after 51 * the file. The size of the file could be the resulting file size we
52 * allocation or the current file size which ever is larger. If the size is 52 * would have after allocation, or the current file size, which ever
53 * less that sbi->s_mb_stream_request we select the group 53 * is larger. If the size is less than sbi->s_mb_stream_request we
54 * preallocation. The default value of s_mb_stream_request is 16 54 * select to use the group preallocation. The default value of
55 * blocks. This can also be tuned via 55 * s_mb_stream_request is 16 blocks. This can also be tuned via
56 * /proc/fs/ext4/<partition>/stream_req. The value is represented in terms 56 * /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
57 * of number of blocks. 57 * terms of number of blocks.
58 * 58 *
59 * The main motivation for having small file use group preallocation is to 59 * The main motivation for having small file use group preallocation is to
60 * ensure that we have small file closer in the disk. 60 * ensure that we have small files closer together on the disk.
61 * 61 *
62 * First stage the allocator looks at the inode prealloc list 62 * First stage the allocator looks at the inode prealloc list,
63 * ext4_inode_info->i_prealloc_list contain list of prealloc spaces for 63 * ext4_inode_info->i_prealloc_list, which contains list of prealloc
64 * this particular inode. The inode prealloc space is represented as: 64 * spaces for this particular inode. The inode prealloc space is
65 * represented as:
65 * 66 *
66 * pa_lstart -> the logical start block for this prealloc space 67 * pa_lstart -> the logical start block for this prealloc space
67 * pa_pstart -> the physical start block for this prealloc space 68 * pa_pstart -> the physical start block for this prealloc space
@@ -121,29 +122,29 @@
121 * list. In case of inode preallocation we follow a list of heuristics 122 * list. In case of inode preallocation we follow a list of heuristics
122 * based on file size. This can be found in ext4_mb_normalize_request. If 123 * based on file size. This can be found in ext4_mb_normalize_request. If
123 * we are doing a group prealloc we try to normalize the request to 124 * we are doing a group prealloc we try to normalize the request to
124 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to 125 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
125 * 512 blocks. This can be tuned via 126 * 512 blocks. This can be tuned via
126 * /proc/fs/ext4/<partition/group_prealloc. The value is represented in 127 * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in
127 * terms of number of blocks. If we have mounted the file system with -O 128 * terms of number of blocks. If we have mounted the file system with -O
128 * stripe=<value> option the group prealloc request is normalized to the 129 * stripe=<value> option the group prealloc request is normalized to the
129 * stripe value (sbi->s_stripe) 130 * stripe value (sbi->s_stripe)
130 * 131 *
131 * The regular allocator(using the buddy cache) support few tunables. 132 * The regular allocator(using the buddy cache) supports few tunables.
132 * 133 *
133 * /proc/fs/ext4/<partition>/min_to_scan 134 * /sys/fs/ext4/<partition>/mb_min_to_scan
134 * /proc/fs/ext4/<partition>/max_to_scan 135 * /sys/fs/ext4/<partition>/mb_max_to_scan
135 * /proc/fs/ext4/<partition>/order2_req 136 * /sys/fs/ext4/<partition>/mb_order2_req
136 * 137 *
137 * The regular allocator use buddy scan only if the request len is power of 138 * The regular allocator uses buddy scan only if the request len is power of
138 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The 139 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
139 * value of s_mb_order2_reqs can be tuned via 140 * value of s_mb_order2_reqs can be tuned via
140 * /proc/fs/ext4/<partition>/order2_req. If the request len is equal to 141 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
141 * stripe size (sbi->s_stripe), we try to search for contigous block in 142 * stripe size (sbi->s_stripe), we try to search for contigous block in
142 * stripe size. This should result in better allocation on RAID setup. If 143 * stripe size. This should result in better allocation on RAID setups. If
143 * not we search in the specific group using bitmap for best extents. The 144 * not, we search in the specific group using bitmap for best extents. The
144 * tunable min_to_scan and max_to_scan controll the behaviour here. 145 * tunable min_to_scan and max_to_scan control the behaviour here.
145 * min_to_scan indicate how long the mballoc __must__ look for a best 146 * min_to_scan indicate how long the mballoc __must__ look for a best
146 * extent and max_to_scanindicate how long the mballoc __can__ look for a 147 * extent and max_to_scan indicates how long the mballoc __can__ look for a
147 * best extent in the found extents. Searching for the blocks starts with 148 * best extent in the found extents. Searching for the blocks starts with
148 * the group specified as the goal value in allocation context via 149 * the group specified as the goal value in allocation context via
149 * ac_g_ex. Each group is first checked based on the criteria whether it 150 * ac_g_ex. Each group is first checked based on the criteria whether it
@@ -337,8 +338,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
337 ext4_group_t group); 338 ext4_group_t group);
338static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 339static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
339 ext4_group_t group); 340 ext4_group_t group);
340static int ext4_mb_init_per_dev_proc(struct super_block *sb);
341static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
342static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 341static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
343 342
344 343
@@ -1726,6 +1725,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1726{ 1725{
1727 unsigned free, fragments; 1726 unsigned free, fragments;
1728 unsigned i, bits; 1727 unsigned i, bits;
1728 int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
1729 struct ext4_group_desc *desc; 1729 struct ext4_group_desc *desc;
1730 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); 1730 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1731 1731
@@ -1747,6 +1747,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1747 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1747 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
1748 return 0; 1748 return 0;
1749 1749
1750 /* Avoid using the first bg of a flexgroup for data files */
1751 if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
1752 (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
1753 ((group % flex_size) == 0))
1754 return 0;
1755
1750 bits = ac->ac_sb->s_blocksize_bits + 1; 1756 bits = ac->ac_sb->s_blocksize_bits + 1;
1751 for (i = ac->ac_2order; i <= bits; i++) 1757 for (i = ac->ac_2order; i <= bits; i++)
1752 if (grp->bb_counters[i] > 0) 1758 if (grp->bb_counters[i] > 0)
@@ -1971,7 +1977,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1971 /* 1977 /*
1972 * We search using buddy data only if the order of the request 1978 * We search using buddy data only if the order of the request
1973 * is greater than equal to the sbi_s_mb_order2_reqs 1979 * is greater than equal to the sbi_s_mb_order2_reqs
1974 * You can tune it via /proc/fs/ext4/<partition>/order2_req 1980 * You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
1975 */ 1981 */
1976 if (i >= sbi->s_mb_order2_reqs) { 1982 if (i >= sbi->s_mb_order2_reqs) {
1977 /* 1983 /*
@@ -2693,7 +2699,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2693 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); 2699 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);
2694 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2700 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2695 if (sbi->s_mb_maxs == NULL) { 2701 if (sbi->s_mb_maxs == NULL) {
2696 kfree(sbi->s_mb_maxs); 2702 kfree(sbi->s_mb_offsets);
2697 return -ENOMEM; 2703 return -ENOMEM;
2698 } 2704 }
2699 2705
@@ -2746,7 +2752,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2746 spin_lock_init(&lg->lg_prealloc_lock); 2752 spin_lock_init(&lg->lg_prealloc_lock);
2747 } 2753 }
2748 2754
2749 ext4_mb_init_per_dev_proc(sb);
2750 ext4_mb_history_init(sb); 2755 ext4_mb_history_init(sb);
2751 2756
2752 if (sbi->s_journal) 2757 if (sbi->s_journal)
@@ -2829,7 +2834,6 @@ int ext4_mb_release(struct super_block *sb)
2829 2834
2830 free_percpu(sbi->s_locality_groups); 2835 free_percpu(sbi->s_locality_groups);
2831 ext4_mb_history_release(sb); 2836 ext4_mb_history_release(sb);
2832 ext4_mb_destroy_per_dev_proc(sb);
2833 2837
2834 return 0; 2838 return 0;
2835} 2839}
@@ -2890,62 +2894,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2890 mb_debug("freed %u blocks in %u structures\n", count, count2); 2894 mb_debug("freed %u blocks in %u structures\n", count, count2);
2891} 2895}
2892 2896
2893#define EXT4_MB_STATS_NAME "stats"
2894#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan"
2895#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan"
2896#define EXT4_MB_ORDER2_REQ "order2_req"
2897#define EXT4_MB_STREAM_REQ "stream_req"
2898#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
2899
2900static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2901{
2902#ifdef CONFIG_PROC_FS
2903 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2904 struct ext4_sb_info *sbi = EXT4_SB(sb);
2905 struct proc_dir_entry *proc;
2906
2907 if (sbi->s_proc == NULL)
2908 return -EINVAL;
2909
2910 EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
2911 EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
2912 EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
2913 EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
2914 EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
2915 EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
2916 return 0;
2917
2918err_out:
2919 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2920 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2921 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2922 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2923 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2924 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2925 return -ENOMEM;
2926#else
2927 return 0;
2928#endif
2929}
2930
2931static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2932{
2933#ifdef CONFIG_PROC_FS
2934 struct ext4_sb_info *sbi = EXT4_SB(sb);
2935
2936 if (sbi->s_proc == NULL)
2937 return -EINVAL;
2938
2939 remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
2940 remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
2941 remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
2942 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2943 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2944 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2945#endif
2946 return 0;
2947}
2948
2949int __init init_ext4_mballoc(void) 2897int __init init_ext4_mballoc(void)
2950{ 2898{
2951 ext4_pspace_cachep = 2899 ext4_pspace_cachep =
@@ -3086,16 +3034,18 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
3086 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 3034 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
3087 /* release all the reserved blocks if non delalloc */ 3035 /* release all the reserved blocks if non delalloc */
3088 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 3036 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
3089 else 3037 else {
3090 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 3038 percpu_counter_sub(&sbi->s_dirtyblocks_counter,
3091 ac->ac_b_ex.fe_len); 3039 ac->ac_b_ex.fe_len);
3040 /* convert reserved quota blocks to real quota blocks */
3041 vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
3042 }
3092 3043
3093 if (sbi->s_log_groups_per_flex) { 3044 if (sbi->s_log_groups_per_flex) {
3094 ext4_group_t flex_group = ext4_flex_group(sbi, 3045 ext4_group_t flex_group = ext4_flex_group(sbi,
3095 ac->ac_b_ex.fe_group); 3046 ac->ac_b_ex.fe_group);
3096 spin_lock(sb_bgl_lock(sbi, flex_group)); 3047 atomic_sub(ac->ac_b_ex.fe_len,
3097 sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len; 3048 &sbi->s_flex_groups[flex_group].free_blocks);
3098 spin_unlock(sb_bgl_lock(sbi, flex_group));
3099 } 3049 }
3100 3050
3101 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 3051 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -3113,7 +3063,7 @@ out_err:
3113 * here we normalize request for locality group 3063 * here we normalize request for locality group
3114 * Group request are normalized to s_strip size if we set the same via mount 3064 * Group request are normalized to s_strip size if we set the same via mount
3115 * option. If not we set it to s_mb_group_prealloc which can be configured via 3065 * option. If not we set it to s_mb_group_prealloc which can be configured via
3116 * /proc/fs/ext4/<partition>/group_prealloc 3066 * /sys/fs/ext4/<partition>/mb_group_prealloc
3117 * 3067 *
3118 * XXX: should we try to preallocate more than the group has now? 3068 * XXX: should we try to preallocate more than the group has now?
3119 */ 3069 */
@@ -3605,8 +3555,11 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
3605 spin_unlock(&pa->pa_lock); 3555 spin_unlock(&pa->pa_lock);
3606 3556
3607 grp_blk = pa->pa_pstart; 3557 grp_blk = pa->pa_pstart;
3608 /* If linear, pa_pstart may be in the next group when pa is used up */ 3558 /*
3609 if (pa->pa_linear) 3559 * If doing group-based preallocation, pa_pstart may be in the
3560 * next group when pa is used up
3561 */
3562 if (pa->pa_type == MB_GROUP_PA)
3610 grp_blk--; 3563 grp_blk--;
3611 3564
3612 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL); 3565 ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
@@ -3701,7 +3654,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3701 INIT_LIST_HEAD(&pa->pa_inode_list); 3654 INIT_LIST_HEAD(&pa->pa_inode_list);
3702 INIT_LIST_HEAD(&pa->pa_group_list); 3655 INIT_LIST_HEAD(&pa->pa_group_list);
3703 pa->pa_deleted = 0; 3656 pa->pa_deleted = 0;
3704 pa->pa_linear = 0; 3657 pa->pa_type = MB_INODE_PA;
3705 3658
3706 mb_debug("new inode pa %p: %llu/%u for %u\n", pa, 3659 mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
3707 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3660 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
@@ -3764,7 +3717,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3764 INIT_LIST_HEAD(&pa->pa_inode_list); 3717 INIT_LIST_HEAD(&pa->pa_inode_list);
3765 INIT_LIST_HEAD(&pa->pa_group_list); 3718 INIT_LIST_HEAD(&pa->pa_group_list);
3766 pa->pa_deleted = 0; 3719 pa->pa_deleted = 0;
3767 pa->pa_linear = 1; 3720 pa->pa_type = MB_GROUP_PA;
3768 3721
3769 mb_debug("new group pa %p: %llu/%u for %u\n", pa, 3722 mb_debug("new group pa %p: %llu/%u for %u\n", pa,
3770 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3723 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
@@ -4018,7 +3971,7 @@ repeat:
4018 list_del_rcu(&pa->pa_inode_list); 3971 list_del_rcu(&pa->pa_inode_list);
4019 spin_unlock(pa->pa_obj_lock); 3972 spin_unlock(pa->pa_obj_lock);
4020 3973
4021 if (pa->pa_linear) 3974 if (pa->pa_type == MB_GROUP_PA)
4022 ext4_mb_release_group_pa(&e4b, pa, ac); 3975 ext4_mb_release_group_pa(&e4b, pa, ac);
4023 else 3976 else
4024 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac); 3977 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
@@ -4118,7 +4071,7 @@ repeat:
4118 spin_unlock(&ei->i_prealloc_lock); 4071 spin_unlock(&ei->i_prealloc_lock);
4119 4072
4120 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) { 4073 list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
4121 BUG_ON(pa->pa_linear != 0); 4074 BUG_ON(pa->pa_type != MB_INODE_PA);
4122 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 4075 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4123 4076
4124 err = ext4_mb_load_buddy(sb, group, &e4b); 4077 err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -4229,7 +4182,7 @@ static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4229 * file is determined by the current size or the resulting size after 4182 * file is determined by the current size or the resulting size after
4230 * allocation which ever is larger 4183 * allocation which ever is larger
4231 * 4184 *
4232 * One can tune this size via /proc/fs/ext4/<partition>/stream_req 4185 * One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
4233 */ 4186 */
4234static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) 4187static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4235{ 4188{
@@ -4370,7 +4323,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4370 continue; 4323 continue;
4371 } 4324 }
4372 /* only lg prealloc space */ 4325 /* only lg prealloc space */
4373 BUG_ON(!pa->pa_linear); 4326 BUG_ON(pa->pa_type != MB_GROUP_PA);
4374 4327
4375 /* seems this one can be freed ... */ 4328 /* seems this one can be freed ... */
4376 pa->pa_deleted = 1; 4329 pa->pa_deleted = 1;
@@ -4439,7 +4392,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4439 pa_inode_list) { 4392 pa_inode_list) {
4440 spin_lock(&tmp_pa->pa_lock); 4393 spin_lock(&tmp_pa->pa_lock);
4441 if (tmp_pa->pa_deleted) { 4394 if (tmp_pa->pa_deleted) {
4442 spin_unlock(&pa->pa_lock); 4395 spin_unlock(&tmp_pa->pa_lock);
4443 continue; 4396 continue;
4444 } 4397 }
4445 if (!added && pa->pa_free < tmp_pa->pa_free) { 4398 if (!added && pa->pa_free < tmp_pa->pa_free) {
@@ -4476,7 +4429,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4476{ 4429{
4477 struct ext4_prealloc_space *pa = ac->ac_pa; 4430 struct ext4_prealloc_space *pa = ac->ac_pa;
4478 if (pa) { 4431 if (pa) {
4479 if (pa->pa_linear) { 4432 if (pa->pa_type == MB_GROUP_PA) {
4480 /* see comment in ext4_mb_use_group_pa() */ 4433 /* see comment in ext4_mb_use_group_pa() */
4481 spin_lock(&pa->pa_lock); 4434 spin_lock(&pa->pa_lock);
4482 pa->pa_pstart += ac->ac_b_ex.fe_len; 4435 pa->pa_pstart += ac->ac_b_ex.fe_len;
@@ -4496,7 +4449,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4496 * doesn't grow big. We need to release 4449 * doesn't grow big. We need to release
4497 * alloc_semp before calling ext4_mb_add_n_trim() 4450 * alloc_semp before calling ext4_mb_add_n_trim()
4498 */ 4451 */
4499 if (pa->pa_linear && likely(pa->pa_free)) { 4452 if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
4500 spin_lock(pa->pa_obj_lock); 4453 spin_lock(pa->pa_obj_lock);
4501 list_del_rcu(&pa->pa_inode_list); 4454 list_del_rcu(&pa->pa_inode_list);
4502 spin_unlock(pa->pa_obj_lock); 4455 spin_unlock(pa->pa_obj_lock);
@@ -4544,7 +4497,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4544 struct ext4_sb_info *sbi; 4497 struct ext4_sb_info *sbi;
4545 struct super_block *sb; 4498 struct super_block *sb;
4546 ext4_fsblk_t block = 0; 4499 ext4_fsblk_t block = 0;
4547 unsigned int inquota; 4500 unsigned int inquota = 0;
4548 unsigned int reserv_blks = 0; 4501 unsigned int reserv_blks = 0;
4549 4502
4550 sb = ar->inode->i_sb; 4503 sb = ar->inode->i_sb;
@@ -4562,9 +4515,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4562 (unsigned long long) ar->pleft, 4515 (unsigned long long) ar->pleft,
4563 (unsigned long long) ar->pright); 4516 (unsigned long long) ar->pright);
4564 4517
4565 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { 4518 /*
4566 /* 4519 * For delayed allocation, we could skip the ENOSPC and
4567 * With delalloc we already reserved the blocks 4520 * EDQUOT check, as blocks and quotas have been already
4521 * reserved when data being copied into pagecache.
4522 */
4523 if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4524 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4525 else {
4526 /* Without delayed allocation we need to verify
4527 * there is enough free blocks to do block allocation
4528 * and verify allocation doesn't exceed the quota limits.
4568 */ 4529 */
4569 while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { 4530 while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
4570 /* let others to free the space */ 4531 /* let others to free the space */
@@ -4576,19 +4537,16 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4576 return 0; 4537 return 0;
4577 } 4538 }
4578 reserv_blks = ar->len; 4539 reserv_blks = ar->len;
4540 while (ar->len && vfs_dq_alloc_block(ar->inode, ar->len)) {
4541 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4542 ar->len--;
4543 }
4544 inquota = ar->len;
4545 if (ar->len == 0) {
4546 *errp = -EDQUOT;
4547 goto out3;
4548 }
4579 } 4549 }
4580 while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
4581 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4582 ar->len--;
4583 }
4584 if (ar->len == 0) {
4585 *errp = -EDQUOT;
4586 goto out3;
4587 }
4588 inquota = ar->len;
4589
4590 if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
4591 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4592 4550
4593 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4551 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4594 if (!ac) { 4552 if (!ac) {
@@ -4654,8 +4612,8 @@ repeat:
4654out2: 4612out2:
4655 kmem_cache_free(ext4_ac_cachep, ac); 4613 kmem_cache_free(ext4_ac_cachep, ac);
4656out1: 4614out1:
4657 if (ar->len < inquota) 4615 if (inquota && ar->len < inquota)
4658 DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); 4616 vfs_dq_free_block(ar->inode, inquota - ar->len);
4659out3: 4617out3:
4660 if (!ar->len) { 4618 if (!ar->len) {
4661 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) 4619 if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
@@ -4928,9 +4886,7 @@ do_more:
4928 4886
4929 if (sbi->s_log_groups_per_flex) { 4887 if (sbi->s_log_groups_per_flex) {
4930 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4888 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4931 spin_lock(sb_bgl_lock(sbi, flex_group)); 4889 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
4932 sbi->s_flex_groups[flex_group].free_blocks += count;
4933 spin_unlock(sb_bgl_lock(sbi, flex_group));
4934 } 4890 }
4935 4891
4936 ext4_mb_release_desc(&e4b); 4892 ext4_mb_release_desc(&e4b);