aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2010-05-16 10:48:47 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:51 -0400
commit0ca1f7ceb1991099ed5273885ebcf4323948c72e (patch)
tree10758d6a55c529aced177da3f6bf45cf26361913
parenta22285a6a32390195235171b89d157ed1a1fe932 (diff)
Btrfs: Update metadata reservation for delayed allocation
Introduce metadata reservation context for delayed allocation and update various related functions. This patch also introduces EXTENT_FIRST_DELALLOC control bit for set/clear_extent_bit. It tells set/clear_bit_hook whether they are processing the first extent_state with EXTENT_DELALLOC bit set. This change is important if set/clear_extent_bit involves multiple extent_state. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/ctree.h19
-rw-r--r--fs/btrfs/extent-tree.c361
-rw-r--r--fs/btrfs/extent_io.c63
-rw-r--r--fs/btrfs/extent_io.h8
-rw-r--r--fs/btrfs/file.c27
-rw-r--r--fs/btrfs/inode.c131
-rw-r--r--fs/btrfs/ioctl.c29
-rw-r--r--fs/btrfs/ordered-data.c7
9 files changed, 232 insertions, 415 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 7a4dee19983..40510d9351f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -137,8 +137,8 @@ struct btrfs_inode {
137 * of extent items we've reserved metadata for. 137 * of extent items we've reserved metadata for.
138 */ 138 */
139 spinlock_t accounting_lock; 139 spinlock_t accounting_lock;
140 atomic_t outstanding_extents;
140 int reserved_extents; 141 int reserved_extents;
141 int outstanding_extents;
142 142
143 /* 143 /*
144 * ordered_data_close is set by truncate when a file that used 144 * ordered_data_close is set by truncate when a file that used
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e0aa9fb563e..d4744192ead 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2079,19 +2079,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
2079u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); 2079u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
2080void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); 2080void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
2081void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 2081void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
2082 2082int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
2083int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, 2083void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
2084 struct inode *inode, int num_items);
2085int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
2086 struct inode *inode, int num_items);
2087int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
2088 u64 bytes);
2089void btrfs_free_reserved_data_space(struct btrfs_root *root,
2090 struct inode *inode, u64 bytes);
2091void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
2092 u64 bytes);
2093void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
2094 u64 bytes);
2095int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 2084int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
2096 struct btrfs_root *root, 2085 struct btrfs_root *root,
2097 int num_items, int *retries); 2086 int num_items, int *retries);
@@ -2099,6 +2088,10 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
2099 struct btrfs_root *root); 2088 struct btrfs_root *root);
2100int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, 2089int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
2101 struct btrfs_pending_snapshot *pending); 2090 struct btrfs_pending_snapshot *pending);
2091int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
2092void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
2093int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
2094void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
2102void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); 2095void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
2103struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); 2096struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
2104void btrfs_free_block_rsv(struct btrfs_root *root, 2097void btrfs_free_block_rsv(struct btrfs_root *root,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 657df6e002d..b1822e752b4 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -64,12 +64,6 @@ static int find_next_key(struct btrfs_path *path, int level,
64 struct btrfs_key *key); 64 struct btrfs_key *key);
65static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 65static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
66 int dump_block_groups); 66 int dump_block_groups);
67static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
68 struct btrfs_root *root,
69 struct btrfs_space_info *sinfo, u64 num_bytes);
70static int shrink_delalloc(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root,
72 struct btrfs_space_info *sinfo, u64 to_reclaim);
73 67
74static noinline int 68static noinline int
75block_group_cache_done(struct btrfs_block_group_cache *cache) 69block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2880,189 +2874,14 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2880 BTRFS_BLOCK_GROUP_DATA); 2874 BTRFS_BLOCK_GROUP_DATA);
2881} 2875}
2882 2876
2883static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2884{
2885 u64 num_bytes;
2886 int level;
2887
2888 level = BTRFS_MAX_LEVEL - 2;
2889 /*
2890 * NOTE: these calculations are absolutely the worst possible case.
2891 * This assumes that _every_ item we insert will require a new leaf, and
2892 * that the tree has grown to its maximum level size.
2893 */
2894
2895 /*
2896 * for every item we insert we could insert both an extent item and a
2897 * extent ref item. Then for ever item we insert, we will need to cow
2898 * both the original leaf, plus the leaf to the left and right of it.
2899 *
2900 * Unless we are talking about the extent root, then we just want the
2901 * number of items * 2, since we just need the extent item plus its ref.
2902 */
2903 if (root == root->fs_info->extent_root)
2904 num_bytes = num_items * 2;
2905 else
2906 num_bytes = (num_items + (2 * num_items)) * 3;
2907
2908 /*
2909 * num_bytes is total number of leaves we could need times the leaf
2910 * size, and then for every leaf we could end up cow'ing 2 nodes per
2911 * level, down to the leaf level.
2912 */
2913 num_bytes = (num_bytes * root->leafsize) +
2914 (num_bytes * (level * 2)) * root->nodesize;
2915
2916 return num_bytes;
2917}
2918
2919/*
2920 * Unreserve metadata space for delalloc. If we have less reserved credits than
2921 * we have extents, this function does nothing.
2922 */
2923int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2924 struct inode *inode, int num_items)
2925{
2926 struct btrfs_fs_info *info = root->fs_info;
2927 struct btrfs_space_info *meta_sinfo;
2928 u64 num_bytes;
2929 u64 alloc_target;
2930 bool bug = false;
2931
2932 /* get the space info for where the metadata will live */
2933 alloc_target = btrfs_get_alloc_profile(root, 0);
2934 meta_sinfo = __find_space_info(info, alloc_target);
2935
2936 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2937 num_items);
2938
2939 spin_lock(&meta_sinfo->lock);
2940 spin_lock(&BTRFS_I(inode)->accounting_lock);
2941 if (BTRFS_I(inode)->reserved_extents <=
2942 BTRFS_I(inode)->outstanding_extents) {
2943 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2944 spin_unlock(&meta_sinfo->lock);
2945 return 0;
2946 }
2947 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2948
2949 BTRFS_I(inode)->reserved_extents -= num_items;
2950 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2951
2952 if (meta_sinfo->bytes_delalloc < num_bytes) {
2953 bug = true;
2954 meta_sinfo->bytes_delalloc = 0;
2955 } else {
2956 meta_sinfo->bytes_delalloc -= num_bytes;
2957 }
2958 spin_unlock(&meta_sinfo->lock);
2959
2960 BUG_ON(bug);
2961
2962 return 0;
2963}
2964
2965static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2966{
2967 u64 thresh;
2968
2969 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2970 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2971 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2972 meta_sinfo->bytes_may_use;
2973
2974 thresh = meta_sinfo->total_bytes - thresh;
2975 thresh *= 80;
2976 do_div(thresh, 100);
2977 if (thresh <= meta_sinfo->bytes_delalloc)
2978 meta_sinfo->force_delalloc = 1;
2979 else
2980 meta_sinfo->force_delalloc = 0;
2981}
2982
2983/*
2984 * Reserve metadata space for delalloc.
2985 */
2986int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
2987 struct inode *inode, int num_items)
2988{
2989 struct btrfs_fs_info *info = root->fs_info;
2990 struct btrfs_space_info *meta_sinfo;
2991 u64 num_bytes;
2992 u64 used;
2993 u64 alloc_target;
2994 int flushed = 0;
2995 int force_delalloc;
2996
2997 /* get the space info for where the metadata will live */
2998 alloc_target = btrfs_get_alloc_profile(root, 0);
2999 meta_sinfo = __find_space_info(info, alloc_target);
3000
3001 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
3002 num_items);
3003again:
3004 spin_lock(&meta_sinfo->lock);
3005
3006 force_delalloc = meta_sinfo->force_delalloc;
3007
3008 if (unlikely(!meta_sinfo->bytes_root))
3009 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3010
3011 if (!flushed)
3012 meta_sinfo->bytes_delalloc += num_bytes;
3013
3014 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3015 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3016 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3017 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3018
3019 if (used > meta_sinfo->total_bytes) {
3020 flushed++;
3021
3022 if (flushed == 1) {
3023 if (maybe_allocate_chunk(NULL, root, meta_sinfo,
3024 num_bytes))
3025 goto again;
3026 flushed++;
3027 } else {
3028 spin_unlock(&meta_sinfo->lock);
3029 }
3030
3031 if (flushed == 2) {
3032 filemap_flush(inode->i_mapping);
3033 goto again;
3034 } else if (flushed == 3) {
3035 shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
3036 goto again;
3037 }
3038 spin_lock(&meta_sinfo->lock);
3039 meta_sinfo->bytes_delalloc -= num_bytes;
3040 spin_unlock(&meta_sinfo->lock);
3041 printk(KERN_ERR "enospc, has %d, reserved %d\n",
3042 BTRFS_I(inode)->outstanding_extents,
3043 BTRFS_I(inode)->reserved_extents);
3044 dump_space_info(meta_sinfo, 0, 0);
3045 return -ENOSPC;
3046 }
3047
3048 BTRFS_I(inode)->reserved_extents += num_items;
3049 check_force_delalloc(meta_sinfo);
3050 spin_unlock(&meta_sinfo->lock);
3051
3052 if (!flushed && force_delalloc)
3053 filemap_flush(inode->i_mapping);
3054
3055 return 0;
3056}
3057
3058/* 2877/*
3059 * This will check the space that the inode allocates from to make sure we have 2878 * This will check the space that the inode allocates from to make sure we have
3060 * enough space for bytes. 2879 * enough space for bytes.
3061 */ 2880 */
3062int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 2881int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3063 u64 bytes)
3064{ 2882{
3065 struct btrfs_space_info *data_sinfo; 2883 struct btrfs_space_info *data_sinfo;
2884 struct btrfs_root *root = BTRFS_I(inode)->root;
3066 u64 used; 2885 u64 used;
3067 int ret = 0, committed = 0; 2886 int ret = 0, committed = 0;
3068 2887
@@ -3147,12 +2966,13 @@ alloc:
3147} 2966}
3148 2967
3149/* 2968/*
3150 * if there was an error for whatever reason after calling 2969 * called when we are clearing an delalloc extent from the
3151 * btrfs_check_data_free_space, call this so we can cleanup the counters. 2970 * inode's io_tree or there was an error for whatever reason
2971 * after calling btrfs_check_data_free_space
3152 */ 2972 */
3153void btrfs_free_reserved_data_space(struct btrfs_root *root, 2973void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3154 struct inode *inode, u64 bytes)
3155{ 2974{
2975 struct btrfs_root *root = BTRFS_I(inode)->root;
3156 struct btrfs_space_info *data_sinfo; 2976 struct btrfs_space_info *data_sinfo;
3157 2977
3158 /* make sure bytes are sectorsize aligned */ 2978 /* make sure bytes are sectorsize aligned */
@@ -3165,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root,
3165 spin_unlock(&data_sinfo->lock); 2985 spin_unlock(&data_sinfo->lock);
3166} 2986}
3167 2987
3168/* called when we are adding a delalloc extent to the inode's io_tree */
3169void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
3170 u64 bytes)
3171{
3172 struct btrfs_space_info *data_sinfo;
3173
3174 /* get the space info for where this inode will be storing its data */
3175 data_sinfo = BTRFS_I(inode)->space_info;
3176
3177 /* make sure we have enough space to handle the data first */
3178 spin_lock(&data_sinfo->lock);
3179 data_sinfo->bytes_delalloc += bytes;
3180
3181 /*
3182 * we are adding a delalloc extent without calling
3183 * btrfs_check_data_free_space first. This happens on a weird
3184 * writepage condition, but shouldn't hurt our accounting
3185 */
3186 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
3187 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
3188 BTRFS_I(inode)->reserved_bytes = 0;
3189 } else {
3190 data_sinfo->bytes_may_use -= bytes;
3191 BTRFS_I(inode)->reserved_bytes -= bytes;
3192 }
3193
3194 spin_unlock(&data_sinfo->lock);
3195}
3196
3197/* called when we are clearing an delalloc extent from the inode's io_tree */
3198void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
3199 u64 bytes)
3200{
3201 struct btrfs_space_info *info;
3202
3203 info = BTRFS_I(inode)->space_info;
3204
3205 spin_lock(&info->lock);
3206 info->bytes_delalloc -= bytes;
3207 spin_unlock(&info->lock);
3208}
3209
3210static void force_metadata_allocation(struct btrfs_fs_info *info) 2988static void force_metadata_allocation(struct btrfs_fs_info *info)
3211{ 2989{
3212 struct list_head *head = &info->space_info; 2990 struct list_head *head = &info->space_info;
@@ -3331,18 +3109,19 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3331 * shrink metadata reservation for delalloc 3109 * shrink metadata reservation for delalloc
3332 */ 3110 */
3333static int shrink_delalloc(struct btrfs_trans_handle *trans, 3111static int shrink_delalloc(struct btrfs_trans_handle *trans,
3334 struct btrfs_root *root, 3112 struct btrfs_root *root, u64 to_reclaim)
3335 struct btrfs_space_info *sinfo, u64 to_reclaim)
3336{ 3113{
3114 struct btrfs_block_rsv *block_rsv;
3337 u64 reserved; 3115 u64 reserved;
3338 u64 max_reclaim; 3116 u64 max_reclaim;
3339 u64 reclaimed = 0; 3117 u64 reclaimed = 0;
3340 int pause = 1; 3118 int pause = 1;
3341 int ret; 3119 int ret;
3342 3120
3343 spin_lock(&sinfo->lock); 3121 block_rsv = &root->fs_info->delalloc_block_rsv;
3344 reserved = sinfo->bytes_delalloc; 3122 spin_lock(&block_rsv->lock);
3345 spin_unlock(&sinfo->lock); 3123 reserved = block_rsv->reserved;
3124 spin_unlock(&block_rsv->lock);
3346 3125
3347 if (reserved == 0) 3126 if (reserved == 0)
3348 return 0; 3127 return 0;
@@ -3361,11 +3140,11 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3361 pause = 1; 3140 pause = 1;
3362 } 3141 }
3363 3142
3364 spin_lock(&sinfo->lock); 3143 spin_lock(&block_rsv->lock);
3365 if (reserved > sinfo->bytes_delalloc) 3144 if (reserved > block_rsv->reserved)
3366 reclaimed = reserved - sinfo->bytes_delalloc; 3145 reclaimed = reserved - block_rsv->reserved;
3367 reserved = sinfo->bytes_delalloc; 3146 reserved = block_rsv->reserved;
3368 spin_unlock(&sinfo->lock); 3147 spin_unlock(&block_rsv->lock);
3369 3148
3370 if (reserved == 0 || reclaimed >= max_reclaim) 3149 if (reserved == 0 || reclaimed >= max_reclaim)
3371 break; 3150 break;
@@ -3394,7 +3173,7 @@ static int should_retry_reserve(struct btrfs_trans_handle *trans,
3394 if (trans && trans->transaction->in_commit) 3173 if (trans && trans->transaction->in_commit)
3395 return -ENOSPC; 3174 return -ENOSPC;
3396 3175
3397 ret = shrink_delalloc(trans, root, space_info, num_bytes); 3176 ret = shrink_delalloc(trans, root, num_bytes);
3398 if (ret) 3177 if (ret)
3399 return ret; 3178 return ret;
3400 3179
@@ -3754,6 +3533,108 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3754 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3533 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3755} 3534}
3756 3535
3536static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
3537{
3538 return num_bytes >>= 3;
3539}
3540
3541int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3542{
3543 struct btrfs_root *root = BTRFS_I(inode)->root;
3544 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3545 u64 to_reserve;
3546 int nr_extents;
3547 int retries = 0;
3548 int ret;
3549
3550 if (btrfs_transaction_in_commit(root->fs_info))
3551 schedule_timeout(1);
3552
3553 num_bytes = ALIGN(num_bytes, root->sectorsize);
3554again:
3555 spin_lock(&BTRFS_I(inode)->accounting_lock);
3556 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
3557 if (nr_extents > BTRFS_I(inode)->reserved_extents) {
3558 nr_extents -= BTRFS_I(inode)->reserved_extents;
3559 to_reserve = calc_trans_metadata_size(root, nr_extents);
3560 } else {
3561 nr_extents = 0;
3562 to_reserve = 0;
3563 }
3564
3565 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3566 ret = reserve_metadata_bytes(block_rsv, to_reserve);
3567 if (ret) {
3568 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3569 ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
3570 &retries);
3571 if (ret > 0)
3572 goto again;
3573 return ret;
3574 }
3575
3576 BTRFS_I(inode)->reserved_extents += nr_extents;
3577 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3578 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3579
3580 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3581
3582 if (block_rsv->size > 512 * 1024 * 1024)
3583 shrink_delalloc(NULL, root, to_reserve);
3584
3585 return 0;
3586}
3587
3588void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
3589{
3590 struct btrfs_root *root = BTRFS_I(inode)->root;
3591 u64 to_free;
3592 int nr_extents;
3593
3594 num_bytes = ALIGN(num_bytes, root->sectorsize);
3595 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
3596
3597 spin_lock(&BTRFS_I(inode)->accounting_lock);
3598 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
3599 if (nr_extents < BTRFS_I(inode)->reserved_extents) {
3600 nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents;
3601 BTRFS_I(inode)->reserved_extents -= nr_extents;
3602 } else {
3603 nr_extents = 0;
3604 }
3605 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3606
3607 to_free = calc_csum_metadata_size(inode, num_bytes);
3608 if (nr_extents > 0)
3609 to_free += calc_trans_metadata_size(root, nr_extents);
3610
3611 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
3612 to_free);
3613}
3614
3615int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
3616{
3617 int ret;
3618
3619 ret = btrfs_check_data_free_space(inode, num_bytes);
3620 if (ret)
3621 return ret;
3622
3623 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
3624 if (ret) {
3625 btrfs_free_reserved_data_space(inode, num_bytes);
3626 return ret;
3627 }
3628
3629 return 0;
3630}
3631
3632void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
3633{
3634 btrfs_delalloc_release_metadata(inode, num_bytes);
3635 btrfs_free_reserved_data_space(inode, num_bytes);
3636}
3637
3757static int update_block_group(struct btrfs_trans_handle *trans, 3638static int update_block_group(struct btrfs_trans_handle *trans,
3758 struct btrfs_root *root, 3639 struct btrfs_root *root,
3759 u64 bytenr, u64 num_bytes, int alloc) 3640 u64 bytenr, u64 num_bytes, int alloc)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d2d03684fab..1a57c17d402 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree,
335} 335}
336 336
337static int set_state_cb(struct extent_io_tree *tree, 337static int set_state_cb(struct extent_io_tree *tree,
338 struct extent_state *state, 338 struct extent_state *state, int *bits)
339 unsigned long bits)
340{ 339{
341 if (tree->ops && tree->ops->set_bit_hook) { 340 if (tree->ops && tree->ops->set_bit_hook) {
342 return tree->ops->set_bit_hook(tree->mapping->host, 341 return tree->ops->set_bit_hook(tree->mapping->host,
343 state->start, state->end, 342 state, bits);
344 state->state, bits);
345 } 343 }
346 344
347 return 0; 345 return 0;
348} 346}
349 347
350static void clear_state_cb(struct extent_io_tree *tree, 348static void clear_state_cb(struct extent_io_tree *tree,
351 struct extent_state *state, 349 struct extent_state *state, int *bits)
352 unsigned long bits)
353{ 350{
354 if (tree->ops && tree->ops->clear_bit_hook) 351 if (tree->ops && tree->ops->clear_bit_hook)
355 tree->ops->clear_bit_hook(tree->mapping->host, state, bits); 352 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
@@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree,
367 */ 364 */
368static int insert_state(struct extent_io_tree *tree, 365static int insert_state(struct extent_io_tree *tree,
369 struct extent_state *state, u64 start, u64 end, 366 struct extent_state *state, u64 start, u64 end,
370 int bits) 367 int *bits)
371{ 368{
372 struct rb_node *node; 369 struct rb_node *node;
370 int bits_to_set = *bits & ~EXTENT_CTLBITS;
373 int ret; 371 int ret;
374 372
375 if (end < start) { 373 if (end < start) {
@@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree,
384 if (ret) 382 if (ret)
385 return ret; 383 return ret;
386 384
387 if (bits & EXTENT_DIRTY) 385 if (bits_to_set & EXTENT_DIRTY)
388 tree->dirty_bytes += end - start + 1; 386 tree->dirty_bytes += end - start + 1;
389 state->state |= bits; 387 state->state |= bits_to_set;
390 node = tree_insert(&tree->state, end, &state->rb_node); 388 node = tree_insert(&tree->state, end, &state->rb_node);
391 if (node) { 389 if (node) {
392 struct extent_state *found; 390 struct extent_state *found;
@@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
456 * struct is freed and removed from the tree 454 * struct is freed and removed from the tree
457 */ 455 */
458static int clear_state_bit(struct extent_io_tree *tree, 456static int clear_state_bit(struct extent_io_tree *tree,
459 struct extent_state *state, int bits, int wake, 457 struct extent_state *state,
460 int delete) 458 int *bits, int wake)
461{ 459{
462 int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; 460 int bits_to_clear = *bits & ~EXTENT_CTLBITS;
463 int ret = state->state & bits_to_clear; 461 int ret = state->state & bits_to_clear;
464 462
465 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 463 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
466 u64 range = state->end - state->start + 1; 464 u64 range = state->end - state->start + 1;
467 WARN_ON(range > tree->dirty_bytes); 465 WARN_ON(range > tree->dirty_bytes);
468 tree->dirty_bytes -= range; 466 tree->dirty_bytes -= range;
@@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree,
471 state->state &= ~bits_to_clear; 469 state->state &= ~bits_to_clear;
472 if (wake) 470 if (wake)
473 wake_up(&state->wq); 471 wake_up(&state->wq);
474 if (delete || state->state == 0) { 472 if (state->state == 0) {
475 if (state->tree) { 473 if (state->tree) {
476 clear_state_cb(tree, state, state->state);
477 rb_erase(&state->rb_node, &tree->state); 474 rb_erase(&state->rb_node, &tree->state);
478 state->tree = NULL; 475 state->tree = NULL;
479 free_extent_state(state); 476 free_extent_state(state);
@@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
514 int set = 0; 511 int set = 0;
515 int clear = 0; 512 int clear = 0;
516 513
514 if (delete)
515 bits |= ~EXTENT_CTLBITS;
516 bits |= EXTENT_FIRST_DELALLOC;
517
517 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 518 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
518 clear = 1; 519 clear = 1;
519again: 520again:
@@ -580,8 +581,7 @@ hit_next:
580 if (err) 581 if (err)
581 goto out; 582 goto out;
582 if (state->end <= end) { 583 if (state->end <= end) {
583 set |= clear_state_bit(tree, state, bits, wake, 584 set |= clear_state_bit(tree, state, &bits, wake);
584 delete);
585 if (last_end == (u64)-1) 585 if (last_end == (u64)-1)
586 goto out; 586 goto out;
587 start = last_end + 1; 587 start = last_end + 1;
@@ -602,7 +602,7 @@ hit_next:
602 if (wake) 602 if (wake)
603 wake_up(&state->wq); 603 wake_up(&state->wq);
604 604
605 set |= clear_state_bit(tree, prealloc, bits, wake, delete); 605 set |= clear_state_bit(tree, prealloc, &bits, wake);
606 606
607 prealloc = NULL; 607 prealloc = NULL;
608 goto out; 608 goto out;
@@ -613,7 +613,7 @@ hit_next:
613 else 613 else
614 next_node = NULL; 614 next_node = NULL;
615 615
616 set |= clear_state_bit(tree, state, bits, wake, delete); 616 set |= clear_state_bit(tree, state, &bits, wake);
617 if (last_end == (u64)-1) 617 if (last_end == (u64)-1)
618 goto out; 618 goto out;
619 start = last_end + 1; 619 start = last_end + 1;
@@ -706,19 +706,19 @@ out:
706 706
707static int set_state_bits(struct extent_io_tree *tree, 707static int set_state_bits(struct extent_io_tree *tree,
708 struct extent_state *state, 708 struct extent_state *state,
709 int bits) 709 int *bits)
710{ 710{
711 int ret; 711 int ret;
712 int bits_to_set = *bits & ~EXTENT_CTLBITS;
712 713
713 ret = set_state_cb(tree, state, bits); 714 ret = set_state_cb(tree, state, bits);
714 if (ret) 715 if (ret)
715 return ret; 716 return ret;
716 717 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
717 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
718 u64 range = state->end - state->start + 1; 718 u64 range = state->end - state->start + 1;
719 tree->dirty_bytes += range; 719 tree->dirty_bytes += range;
720 } 720 }
721 state->state |= bits; 721 state->state |= bits_to_set;
722 722
723 return 0; 723 return 0;
724} 724}
@@ -757,6 +757,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
757 u64 last_start; 757 u64 last_start;
758 u64 last_end; 758 u64 last_end;
759 759
760 bits |= EXTENT_FIRST_DELALLOC;
760again: 761again:
761 if (!prealloc && (mask & __GFP_WAIT)) { 762 if (!prealloc && (mask & __GFP_WAIT)) {
762 prealloc = alloc_extent_state(mask); 763 prealloc = alloc_extent_state(mask);
@@ -778,7 +779,7 @@ again:
778 */ 779 */
779 node = tree_search(tree, start); 780 node = tree_search(tree, start);
780 if (!node) { 781 if (!node) {
781 err = insert_state(tree, prealloc, start, end, bits); 782 err = insert_state(tree, prealloc, start, end, &bits);
782 prealloc = NULL; 783 prealloc = NULL;
783 BUG_ON(err == -EEXIST); 784 BUG_ON(err == -EEXIST);
784 goto out; 785 goto out;
@@ -802,7 +803,7 @@ hit_next:
802 goto out; 803 goto out;
803 } 804 }
804 805
805 err = set_state_bits(tree, state, bits); 806 err = set_state_bits(tree, state, &bits);
806 if (err) 807 if (err)
807 goto out; 808 goto out;
808 809
@@ -852,7 +853,7 @@ hit_next:
852 if (err) 853 if (err)
853 goto out; 854 goto out;
854 if (state->end <= end) { 855 if (state->end <= end) {
855 err = set_state_bits(tree, state, bits); 856 err = set_state_bits(tree, state, &bits);
856 if (err) 857 if (err)
857 goto out; 858 goto out;
858 cache_state(state, cached_state); 859 cache_state(state, cached_state);
@@ -877,7 +878,7 @@ hit_next:
877 else 878 else
878 this_end = last_start - 1; 879 this_end = last_start - 1;
879 err = insert_state(tree, prealloc, start, this_end, 880 err = insert_state(tree, prealloc, start, this_end,
880 bits); 881 &bits);
881 BUG_ON(err == -EEXIST); 882 BUG_ON(err == -EEXIST);
882 if (err) { 883 if (err) {
883 prealloc = NULL; 884 prealloc = NULL;
@@ -903,7 +904,7 @@ hit_next:
903 err = split_state(tree, state, prealloc, end + 1); 904 err = split_state(tree, state, prealloc, end + 1);
904 BUG_ON(err == -EEXIST); 905 BUG_ON(err == -EEXIST);
905 906
906 err = set_state_bits(tree, prealloc, bits); 907 err = set_state_bits(tree, prealloc, &bits);
907 if (err) { 908 if (err) {
908 prealloc = NULL; 909 prealloc = NULL;
909 goto out; 910 goto out;
@@ -966,8 +967,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
966{ 967{
967 return clear_extent_bit(tree, start, end, 968 return clear_extent_bit(tree, start, end,
968 EXTENT_DIRTY | EXTENT_DELALLOC | 969 EXTENT_DIRTY | EXTENT_DELALLOC |
969 EXTENT_DO_ACCOUNTING, 0, 0, 970 EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
970 NULL, mask);
971} 971}
972 972
973int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 973int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1435,9 +1435,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1435 if (op & EXTENT_CLEAR_DELALLOC) 1435 if (op & EXTENT_CLEAR_DELALLOC)
1436 clear_bits |= EXTENT_DELALLOC; 1436 clear_bits |= EXTENT_DELALLOC;
1437 1437
1438 if (op & EXTENT_CLEAR_ACCOUNTING)
1439 clear_bits |= EXTENT_DO_ACCOUNTING;
1440
1441 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); 1438 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1442 if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 1439 if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
1443 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | 1440 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bbab4813646..86f10dc791d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -16,7 +16,9 @@
16#define EXTENT_BOUNDARY (1 << 9) 16#define EXTENT_BOUNDARY (1 << 9)
17#define EXTENT_NODATASUM (1 << 10) 17#define EXTENT_NODATASUM (1 << 10)
18#define EXTENT_DO_ACCOUNTING (1 << 11) 18#define EXTENT_DO_ACCOUNTING (1 << 11)
19#define EXTENT_FIRST_DELALLOC (1 << 12)
19#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 20#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
21#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
20 22
21/* flags for bio submission */ 23/* flags for bio submission */
22#define EXTENT_BIO_COMPRESSED 1 24#define EXTENT_BIO_COMPRESSED 1
@@ -69,10 +71,10 @@ struct extent_io_ops {
69 struct extent_state *state); 71 struct extent_state *state);
70 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 72 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
71 struct extent_state *state, int uptodate); 73 struct extent_state *state, int uptodate);
72 int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, 74 int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
73 unsigned long old, unsigned long bits); 75 int *bits);
74 int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, 76 int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
75 unsigned long bits); 77 int *bits);
76 int (*merge_extent_hook)(struct inode *inode, 78 int (*merge_extent_hook)(struct inode *inode,
77 struct extent_state *new, 79 struct extent_state *new,
78 struct extent_state *other); 80 struct extent_state *other);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 41e09e24e29..6d8f817eadb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -852,13 +852,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
852 852
853 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 853 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
854 854
855 /* do the reserve before the mutex lock in case we have to do some
856 * flushing. We wouldn't deadlock, but this is more polite.
857 */
858 err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
859 if (err)
860 goto out_nolock;
861
862 mutex_lock(&inode->i_mutex); 855 mutex_lock(&inode->i_mutex);
863 856
864 current->backing_dev_info = inode->i_mapping->backing_dev_info; 857 current->backing_dev_info = inode->i_mapping->backing_dev_info;
@@ -921,7 +914,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
921 WARN_ON(num_pages > nrptrs); 914 WARN_ON(num_pages > nrptrs);
922 memset(pages, 0, sizeof(struct page *) * nrptrs); 915 memset(pages, 0, sizeof(struct page *) * nrptrs);
923 916
924 ret = btrfs_check_data_free_space(root, inode, write_bytes); 917 ret = btrfs_delalloc_reserve_space(inode, write_bytes);
925 if (ret) 918 if (ret)
926 goto out; 919 goto out;
927 920
@@ -929,26 +922,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
929 pos, first_index, last_index, 922 pos, first_index, last_index,
930 write_bytes); 923 write_bytes);
931 if (ret) { 924 if (ret) {
932 btrfs_free_reserved_data_space(root, inode, 925 btrfs_delalloc_release_space(inode, write_bytes);
933 write_bytes);
934 goto out; 926 goto out;
935 } 927 }
936 928
937 ret = btrfs_copy_from_user(pos, num_pages, 929 ret = btrfs_copy_from_user(pos, num_pages,
938 write_bytes, pages, buf); 930 write_bytes, pages, buf);
939 if (ret) { 931 if (ret == 0) {
940 btrfs_free_reserved_data_space(root, inode, 932 dirty_and_release_pages(NULL, root, file, pages,
941 write_bytes); 933 num_pages, pos, write_bytes);
942 btrfs_drop_pages(pages, num_pages);
943 goto out;
944 } 934 }
945 935
946 ret = dirty_and_release_pages(NULL, root, file, pages,
947 num_pages, pos, write_bytes);
948 btrfs_drop_pages(pages, num_pages); 936 btrfs_drop_pages(pages, num_pages);
949 if (ret) { 937 if (ret) {
950 btrfs_free_reserved_data_space(root, inode, 938 btrfs_delalloc_release_space(inode, write_bytes);
951 write_bytes);
952 goto out; 939 goto out;
953 } 940 }
954 941
@@ -975,9 +962,7 @@ out:
975 mutex_unlock(&inode->i_mutex); 962 mutex_unlock(&inode->i_mutex);
976 if (ret) 963 if (ret)
977 err = ret; 964 err = ret;
978 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
979 965
980out_nolock:
981 kfree(pages); 966 kfree(pages);
982 if (pinned[0]) 967 if (pinned[0])
983 page_cache_release(pinned[0]); 968 page_cache_release(pinned[0]);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c4b0fd12df6..6e54665d37f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
252 inline_len, compressed_size, 252 inline_len, compressed_size,
253 compressed_pages); 253 compressed_pages);
254 BUG_ON(ret); 254 BUG_ON(ret);
255 btrfs_delalloc_release_metadata(inode, end + 1 - start);
255 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 256 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
256 return 0; 257 return 0;
257} 258}
@@ -414,6 +415,7 @@ again:
414 trans = btrfs_join_transaction(root, 1); 415 trans = btrfs_join_transaction(root, 1);
415 BUG_ON(!trans); 416 BUG_ON(!trans);
416 btrfs_set_trans_block_group(trans, inode); 417 btrfs_set_trans_block_group(trans, inode);
418 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
417 419
418 /* lets try to make an inline extent */ 420 /* lets try to make an inline extent */
419 if (ret || total_in < (actual_end - start)) { 421 if (ret || total_in < (actual_end - start)) {
@@ -439,7 +441,6 @@ again:
439 start, end, NULL, 441 start, end, NULL,
440 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 442 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
441 EXTENT_CLEAR_DELALLOC | 443 EXTENT_CLEAR_DELALLOC |
442 EXTENT_CLEAR_ACCOUNTING |
443 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 444 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
444 445
445 btrfs_end_transaction(trans, root); 446 btrfs_end_transaction(trans, root);
@@ -734,6 +735,7 @@ static noinline int cow_file_range(struct inode *inode,
734 trans = btrfs_join_transaction(root, 1); 735 trans = btrfs_join_transaction(root, 1);
735 BUG_ON(!trans); 736 BUG_ON(!trans);
736 btrfs_set_trans_block_group(trans, inode); 737 btrfs_set_trans_block_group(trans, inode);
738 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
737 739
738 actual_end = min_t(u64, isize, end + 1); 740 actual_end = min_t(u64, isize, end + 1);
739 741
@@ -753,7 +755,6 @@ static noinline int cow_file_range(struct inode *inode,
753 EXTENT_CLEAR_UNLOCK_PAGE | 755 EXTENT_CLEAR_UNLOCK_PAGE |
754 EXTENT_CLEAR_UNLOCK | 756 EXTENT_CLEAR_UNLOCK |
755 EXTENT_CLEAR_DELALLOC | 757 EXTENT_CLEAR_DELALLOC |
756 EXTENT_CLEAR_ACCOUNTING |
757 EXTENT_CLEAR_DIRTY | 758 EXTENT_CLEAR_DIRTY |
758 EXTENT_SET_WRITEBACK | 759 EXTENT_SET_WRITEBACK |
759 EXTENT_END_WRITEBACK); 760 EXTENT_END_WRITEBACK);
@@ -1226,15 +1227,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1226} 1227}
1227 1228
1228static int btrfs_split_extent_hook(struct inode *inode, 1229static int btrfs_split_extent_hook(struct inode *inode,
1229 struct extent_state *orig, u64 split) 1230 struct extent_state *orig, u64 split)
1230{ 1231{
1232 /* not delalloc, ignore it */
1231 if (!(orig->state & EXTENT_DELALLOC)) 1233 if (!(orig->state & EXTENT_DELALLOC))
1232 return 0; 1234 return 0;
1233 1235
1234 spin_lock(&BTRFS_I(inode)->accounting_lock); 1236 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1235 BTRFS_I(inode)->outstanding_extents++;
1236 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1237
1238 return 0; 1237 return 0;
1239} 1238}
1240 1239
@@ -1252,10 +1251,7 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1252 if (!(other->state & EXTENT_DELALLOC)) 1251 if (!(other->state & EXTENT_DELALLOC))
1253 return 0; 1252 return 0;
1254 1253
1255 spin_lock(&BTRFS_I(inode)->accounting_lock); 1254 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1256 BTRFS_I(inode)->outstanding_extents--;
1257 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1258
1259 return 0; 1255 return 0;
1260} 1256}
1261 1257
@@ -1264,8 +1260,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1264 * bytes in this file, and to maintain the list of inodes that 1260 * bytes in this file, and to maintain the list of inodes that
1265 * have pending delalloc work to be done. 1261 * have pending delalloc work to be done.
1266 */ 1262 */
1267static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, 1263static int btrfs_set_bit_hook(struct inode *inode,
1268 unsigned long old, unsigned long bits) 1264 struct extent_state *state, int *bits)
1269{ 1265{
1270 1266
1271 /* 1267 /*
@@ -1273,17 +1269,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1273 * but in this case, we are only testeing for the DELALLOC 1269 * but in this case, we are only testeing for the DELALLOC
1274 * bit, which is only set or cleared with irqs on 1270 * bit, which is only set or cleared with irqs on
1275 */ 1271 */
1276 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1272 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1277 struct btrfs_root *root = BTRFS_I(inode)->root; 1273 struct btrfs_root *root = BTRFS_I(inode)->root;
1274 u64 len = state->end + 1 - state->start;
1278 1275
1279 spin_lock(&BTRFS_I(inode)->accounting_lock); 1276 if (*bits & EXTENT_FIRST_DELALLOC)
1280 BTRFS_I(inode)->outstanding_extents++; 1277 *bits &= ~EXTENT_FIRST_DELALLOC;
1281 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1278 else
1282 btrfs_delalloc_reserve_space(root, inode, end - start + 1); 1279 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1283 1280
1284 spin_lock(&root->fs_info->delalloc_lock); 1281 spin_lock(&root->fs_info->delalloc_lock);
1285 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1282 BTRFS_I(inode)->delalloc_bytes += len;
1286 root->fs_info->delalloc_bytes += end - start + 1; 1283 root->fs_info->delalloc_bytes += len;
1287 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1284 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1288 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1285 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1289 &root->fs_info->delalloc_inodes); 1286 &root->fs_info->delalloc_inodes);
@@ -1297,45 +1294,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1297 * extent_io.c clear_bit_hook, see set_bit_hook for why 1294 * extent_io.c clear_bit_hook, see set_bit_hook for why
1298 */ 1295 */
1299static int btrfs_clear_bit_hook(struct inode *inode, 1296static int btrfs_clear_bit_hook(struct inode *inode,
1300 struct extent_state *state, unsigned long bits) 1297 struct extent_state *state, int *bits)
1301{ 1298{
1302 /* 1299 /*
1303 * set_bit and clear bit hooks normally require _irqsave/restore 1300 * set_bit and clear bit hooks normally require _irqsave/restore
1304 * but in this case, we are only testeing for the DELALLOC 1301 * but in this case, we are only testeing for the DELALLOC
1305 * bit, which is only set or cleared with irqs on 1302 * bit, which is only set or cleared with irqs on
1306 */ 1303 */
1307 if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1304 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1308 struct btrfs_root *root = BTRFS_I(inode)->root; 1305 struct btrfs_root *root = BTRFS_I(inode)->root;
1306 u64 len = state->end + 1 - state->start;
1309 1307
1310 if (bits & EXTENT_DO_ACCOUNTING) { 1308 if (*bits & EXTENT_FIRST_DELALLOC)
1311 spin_lock(&BTRFS_I(inode)->accounting_lock); 1309 *bits &= ~EXTENT_FIRST_DELALLOC;
1312 WARN_ON(!BTRFS_I(inode)->outstanding_extents); 1310 else if (!(*bits & EXTENT_DO_ACCOUNTING))
1313 BTRFS_I(inode)->outstanding_extents--; 1311 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1314 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1312
1315 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1313 if (*bits & EXTENT_DO_ACCOUNTING)
1316 } 1314 btrfs_delalloc_release_metadata(inode, len);
1315
1316 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
1317 btrfs_free_reserved_data_space(inode, len);
1317 1318
1318 spin_lock(&root->fs_info->delalloc_lock); 1319 spin_lock(&root->fs_info->delalloc_lock);
1319 if (state->end - state->start + 1 > 1320 root->fs_info->delalloc_bytes -= len;
1320 root->fs_info->delalloc_bytes) { 1321 BTRFS_I(inode)->delalloc_bytes -= len;
1321 printk(KERN_INFO "btrfs warning: delalloc account " 1322
1322 "%llu %llu\n",
1323 (unsigned long long)
1324 state->end - state->start + 1,
1325 (unsigned long long)
1326 root->fs_info->delalloc_bytes);
1327 btrfs_delalloc_free_space(root, inode, (u64)-1);
1328 root->fs_info->delalloc_bytes = 0;
1329 BTRFS_I(inode)->delalloc_bytes = 0;
1330 } else {
1331 btrfs_delalloc_free_space(root, inode,
1332 state->end -
1333 state->start + 1);
1334 root->fs_info->delalloc_bytes -= state->end -
1335 state->start + 1;
1336 BTRFS_I(inode)->delalloc_bytes -= state->end -
1337 state->start + 1;
1338 }
1339 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1323 if (BTRFS_I(inode)->delalloc_bytes == 0 &&
1340 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1324 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1341 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1325 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
@@ -1520,6 +1504,7 @@ again:
1520 goto again; 1504 goto again;
1521 } 1505 }
1522 1506
1507 BUG();
1523 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); 1508 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1524 ClearPageChecked(page); 1509 ClearPageChecked(page);
1525out: 1510out:
@@ -1650,7 +1635,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1650static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 1635static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1651{ 1636{
1652 struct btrfs_root *root = BTRFS_I(inode)->root; 1637 struct btrfs_root *root = BTRFS_I(inode)->root;
1653 struct btrfs_trans_handle *trans; 1638 struct btrfs_trans_handle *trans = NULL;
1654 struct btrfs_ordered_extent *ordered_extent = NULL; 1639 struct btrfs_ordered_extent *ordered_extent = NULL;
1655 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1640 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1656 struct extent_state *cached_state = NULL; 1641 struct extent_state *cached_state = NULL;
@@ -1668,9 +1653,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1668 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1653 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1669 if (!ret) { 1654 if (!ret) {
1670 trans = btrfs_join_transaction(root, 1); 1655 trans = btrfs_join_transaction(root, 1);
1656 btrfs_set_trans_block_group(trans, inode);
1657 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1671 ret = btrfs_update_inode(trans, root, inode); 1658 ret = btrfs_update_inode(trans, root, inode);
1672 BUG_ON(ret); 1659 BUG_ON(ret);
1673 btrfs_end_transaction(trans, root);
1674 } 1660 }
1675 goto out; 1661 goto out;
1676 } 1662 }
@@ -1680,6 +1666,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1680 0, &cached_state, GFP_NOFS); 1666 0, &cached_state, GFP_NOFS);
1681 1667
1682 trans = btrfs_join_transaction(root, 1); 1668 trans = btrfs_join_transaction(root, 1);
1669 btrfs_set_trans_block_group(trans, inode);
1670 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1683 1671
1684 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1672 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1685 compressed = 1; 1673 compressed = 1;
@@ -1711,12 +1699,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1711 add_pending_csums(trans, inode, ordered_extent->file_offset, 1699 add_pending_csums(trans, inode, ordered_extent->file_offset,
1712 &ordered_extent->list); 1700 &ordered_extent->list);
1713 1701
1714 /* this also removes the ordered extent from the tree */
1715 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1702 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1716 ret = btrfs_update_inode(trans, root, inode); 1703 ret = btrfs_update_inode(trans, root, inode);
1717 BUG_ON(ret); 1704 BUG_ON(ret);
1718 btrfs_end_transaction(trans, root);
1719out: 1705out:
1706 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1707 if (trans)
1708 btrfs_end_transaction(trans, root);
1720 /* once for us */ 1709 /* once for us */
1721 btrfs_put_ordered_extent(ordered_extent); 1710 btrfs_put_ordered_extent(ordered_extent);
1722 /* once for the tree */ 1711 /* once for the tree */
@@ -3219,11 +3208,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3219 3208
3220 if ((offset & (blocksize - 1)) == 0) 3209 if ((offset & (blocksize - 1)) == 0)
3221 goto out; 3210 goto out;
3222 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 3211 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
3223 if (ret)
3224 goto out;
3225
3226 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
3227 if (ret) 3212 if (ret)
3228 goto out; 3213 goto out;
3229 3214
@@ -3231,8 +3216,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3231again: 3216again:
3232 page = grab_cache_page(mapping, index); 3217 page = grab_cache_page(mapping, index);
3233 if (!page) { 3218 if (!page) {
3234 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3219 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3235 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3236 goto out; 3220 goto out;
3237 } 3221 }
3238 3222
@@ -3295,8 +3279,7 @@ again:
3295 3279
3296out_unlock: 3280out_unlock:
3297 if (ret) 3281 if (ret)
3298 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3282 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3299 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3300 unlock_page(page); 3283 unlock_page(page);
3301 page_cache_release(page); 3284 page_cache_release(page);
3302out: 3285out:
@@ -4878,6 +4861,7 @@ again:
4878 } 4861 }
4879 flush_dcache_page(page); 4862 flush_dcache_page(page);
4880 } else if (create && PageUptodate(page)) { 4863 } else if (create && PageUptodate(page)) {
4864 WARN_ON(1);
4881 if (!trans) { 4865 if (!trans) {
4882 kunmap(page); 4866 kunmap(page);
4883 free_extent_map(em); 4867 free_extent_map(em);
@@ -5142,7 +5126,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5142 u64 page_start; 5126 u64 page_start;
5143 u64 page_end; 5127 u64 page_end;
5144 5128
5145 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 5129 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
5146 if (ret) { 5130 if (ret) {
5147 if (ret == -ENOMEM) 5131 if (ret == -ENOMEM)
5148 ret = VM_FAULT_OOM; 5132 ret = VM_FAULT_OOM;
@@ -5151,13 +5135,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5151 goto out; 5135 goto out;
5152 } 5136 }
5153 5137
5154 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
5155 if (ret) {
5156 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5157 ret = VM_FAULT_SIGBUS;
5158 goto out;
5159 }
5160
5161 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 5138 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
5162again: 5139again:
5163 lock_page(page); 5140 lock_page(page);
@@ -5167,7 +5144,6 @@ again:
5167 5144
5168 if ((page->mapping != inode->i_mapping) || 5145 if ((page->mapping != inode->i_mapping) ||
5169 (page_start >= size)) { 5146 (page_start >= size)) {
5170 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5171 /* page got truncated out from underneath us */ 5147 /* page got truncated out from underneath us */
5172 goto out_unlock; 5148 goto out_unlock;
5173 } 5149 }
@@ -5208,7 +5184,6 @@ again:
5208 unlock_extent_cached(io_tree, page_start, page_end, 5184 unlock_extent_cached(io_tree, page_start, page_end,
5209 &cached_state, GFP_NOFS); 5185 &cached_state, GFP_NOFS);
5210 ret = VM_FAULT_SIGBUS; 5186 ret = VM_FAULT_SIGBUS;
5211 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5212 goto out_unlock; 5187 goto out_unlock;
5213 } 5188 }
5214 ret = 0; 5189 ret = 0;
@@ -5235,10 +5210,10 @@ again:
5235 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); 5210 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
5236 5211
5237out_unlock: 5212out_unlock:
5238 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
5239 if (!ret) 5213 if (!ret)
5240 return VM_FAULT_LOCKED; 5214 return VM_FAULT_LOCKED;
5241 unlock_page(page); 5215 unlock_page(page);
5216 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
5242out: 5217out:
5243 return ret; 5218 return ret;
5244} 5219}
@@ -5383,7 +5358,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
5383 ei->last_unlink_trans = 0; 5358 ei->last_unlink_trans = 0;
5384 5359
5385 spin_lock_init(&ei->accounting_lock); 5360 spin_lock_init(&ei->accounting_lock);
5386 ei->outstanding_extents = 0; 5361 atomic_set(&ei->outstanding_extents, 0);
5387 ei->reserved_extents = 0; 5362 ei->reserved_extents = 0;
5388 5363
5389 ei->ordered_data_close = 0; 5364 ei->ordered_data_close = 0;
@@ -5411,6 +5386,8 @@ void btrfs_destroy_inode(struct inode *inode)
5411 5386
5412 WARN_ON(!list_empty(&inode->i_dentry)); 5387 WARN_ON(!list_empty(&inode->i_dentry));
5413 WARN_ON(inode->i_data.nrpages); 5388 WARN_ON(inode->i_data.nrpages);
5389 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
5390 WARN_ON(BTRFS_I(inode)->reserved_extents);
5414 5391
5415 /* 5392 /*
5416 * This can happen where we create an inode, but somebody else also 5393 * This can happen where we create an inode, but somebody else also
@@ -5970,8 +5947,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5970 goto out; 5947 goto out;
5971 } 5948 }
5972 5949
5973 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, 5950 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
5974 alloc_end - alloc_start);
5975 if (ret) 5951 if (ret)
5976 goto out; 5952 goto out;
5977 5953
@@ -6037,8 +6013,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
6037 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 6013 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
6038 &cached_state, GFP_NOFS); 6014 &cached_state, GFP_NOFS);
6039 6015
6040 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, 6016 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
6041 alloc_end - alloc_start);
6042out: 6017out:
6043 mutex_unlock(&inode->i_mutex); 6018 mutex_unlock(&inode->i_mutex);
6044 return ret; 6019 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3066da468c6..6a706e69137 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -587,19 +587,9 @@ static int btrfs_defrag_file(struct file *file,
587 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 587 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
588 BTRFS_I(inode)->force_compress = 1; 588 BTRFS_I(inode)->force_compress = 1;
589 589
590 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 590 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
591 if (ret) { 591 if (ret)
592 ret = -ENOSPC; 592 goto err_unlock;
593 break;
594 }
595
596 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
597 if (ret) {
598 btrfs_free_reserved_data_space(root, inode,
599 PAGE_CACHE_SIZE);
600 ret = -ENOSPC;
601 break;
602 }
603again: 593again:
604 if (inode->i_size == 0 || 594 if (inode->i_size == 0 ||
605 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { 595 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
@@ -608,8 +598,10 @@ again:
608 } 598 }
609 599
610 page = grab_cache_page(inode->i_mapping, i); 600 page = grab_cache_page(inode->i_mapping, i);
611 if (!page) 601 if (!page) {
602 ret = -ENOMEM;
612 goto err_reservations; 603 goto err_reservations;
604 }
613 605
614 if (!PageUptodate(page)) { 606 if (!PageUptodate(page)) {
615 btrfs_readpage(NULL, page); 607 btrfs_readpage(NULL, page);
@@ -617,6 +609,7 @@ again:
617 if (!PageUptodate(page)) { 609 if (!PageUptodate(page)) {
618 unlock_page(page); 610 unlock_page(page);
619 page_cache_release(page); 611 page_cache_release(page);
612 ret = -EIO;
620 goto err_reservations; 613 goto err_reservations;
621 } 614 }
622 } 615 }
@@ -630,8 +623,7 @@ again:
630 wait_on_page_writeback(page); 623 wait_on_page_writeback(page);
631 624
632 if (PageDirty(page)) { 625 if (PageDirty(page)) {
633 btrfs_free_reserved_data_space(root, inode, 626 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
634 PAGE_CACHE_SIZE);
635 goto loop_unlock; 627 goto loop_unlock;
636 } 628 }
637 629
@@ -669,7 +661,6 @@ loop_unlock:
669 page_cache_release(page); 661 page_cache_release(page);
670 mutex_unlock(&inode->i_mutex); 662 mutex_unlock(&inode->i_mutex);
671 663
672 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
673 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); 664 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
674 i++; 665 i++;
675 } 666 }
@@ -699,9 +690,9 @@ loop_unlock:
699 return 0; 690 return 0;
700 691
701err_reservations: 692err_reservations:
693 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
694err_unlock:
702 mutex_unlock(&inode->i_mutex); 695 mutex_unlock(&inode->i_mutex);
703 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
704 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
705 return ret; 696 return ret;
706} 697}
707 698
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a127c0ebb2d..c9f1020572f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -311,13 +311,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
311 tree->last = NULL; 311 tree->last = NULL;
312 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 312 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
313 313
314 spin_lock(&BTRFS_I(inode)->accounting_lock);
315 WARN_ON(!BTRFS_I(inode)->outstanding_extents);
316 BTRFS_I(inode)->outstanding_extents--;
317 spin_unlock(&BTRFS_I(inode)->accounting_lock);
318 btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
319 inode, 1);
320
321 spin_lock(&root->fs_info->ordered_extent_lock); 314 spin_lock(&root->fs_info->ordered_extent_lock);
322 list_del_init(&entry->root_extent_list); 315 list_del_init(&entry->root_extent_list);
323 316