aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2010-05-16 10:48:47 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:51 -0400
commit0ca1f7ceb1991099ed5273885ebcf4323948c72e (patch)
tree10758d6a55c529aced177da3f6bf45cf26361913 /fs/btrfs/extent-tree.c
parenta22285a6a32390195235171b89d157ed1a1fe932 (diff)
Btrfs: Update metadata reservation for delayed allocation
Introduce metadata reservation context for delayed allocation and update various related functions. This patch also introduces EXTENT_FIRST_DELALLOC control bit for set/clear_extent_bit. It tells set/clear_bit_hook whether they are processing the first extent_state with EXTENT_DELALLOC bit set. This change is important if set/clear_extent_bit involves multiple extent_state. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c361
1 files changed, 121 insertions, 240 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 657df6e002d3..b1822e752b4a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -64,12 +64,6 @@ static int find_next_key(struct btrfs_path *path, int level,
64 struct btrfs_key *key); 64 struct btrfs_key *key);
65static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 65static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
66 int dump_block_groups); 66 int dump_block_groups);
67static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
68 struct btrfs_root *root,
69 struct btrfs_space_info *sinfo, u64 num_bytes);
70static int shrink_delalloc(struct btrfs_trans_handle *trans,
71 struct btrfs_root *root,
72 struct btrfs_space_info *sinfo, u64 to_reclaim);
73 67
74static noinline int 68static noinline int
75block_group_cache_done(struct btrfs_block_group_cache *cache) 69block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2880,189 +2874,14 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2880 BTRFS_BLOCK_GROUP_DATA); 2874 BTRFS_BLOCK_GROUP_DATA);
2881} 2875}
2882 2876
2883static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2884{
2885 u64 num_bytes;
2886 int level;
2887
2888 level = BTRFS_MAX_LEVEL - 2;
2889 /*
2890 * NOTE: these calculations are absolutely the worst possible case.
2891 * This assumes that _every_ item we insert will require a new leaf, and
2892 * that the tree has grown to its maximum level size.
2893 */
2894
2895 /*
2896 * for every item we insert we could insert both an extent item and a
2897 * extent ref item. Then for ever item we insert, we will need to cow
2898 * both the original leaf, plus the leaf to the left and right of it.
2899 *
2900 * Unless we are talking about the extent root, then we just want the
2901 * number of items * 2, since we just need the extent item plus its ref.
2902 */
2903 if (root == root->fs_info->extent_root)
2904 num_bytes = num_items * 2;
2905 else
2906 num_bytes = (num_items + (2 * num_items)) * 3;
2907
2908 /*
2909 * num_bytes is total number of leaves we could need times the leaf
2910 * size, and then for every leaf we could end up cow'ing 2 nodes per
2911 * level, down to the leaf level.
2912 */
2913 num_bytes = (num_bytes * root->leafsize) +
2914 (num_bytes * (level * 2)) * root->nodesize;
2915
2916 return num_bytes;
2917}
2918
2919/*
2920 * Unreserve metadata space for delalloc. If we have less reserved credits than
2921 * we have extents, this function does nothing.
2922 */
2923int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2924 struct inode *inode, int num_items)
2925{
2926 struct btrfs_fs_info *info = root->fs_info;
2927 struct btrfs_space_info *meta_sinfo;
2928 u64 num_bytes;
2929 u64 alloc_target;
2930 bool bug = false;
2931
2932 /* get the space info for where the metadata will live */
2933 alloc_target = btrfs_get_alloc_profile(root, 0);
2934 meta_sinfo = __find_space_info(info, alloc_target);
2935
2936 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2937 num_items);
2938
2939 spin_lock(&meta_sinfo->lock);
2940 spin_lock(&BTRFS_I(inode)->accounting_lock);
2941 if (BTRFS_I(inode)->reserved_extents <=
2942 BTRFS_I(inode)->outstanding_extents) {
2943 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2944 spin_unlock(&meta_sinfo->lock);
2945 return 0;
2946 }
2947 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2948
2949 BTRFS_I(inode)->reserved_extents -= num_items;
2950 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2951
2952 if (meta_sinfo->bytes_delalloc < num_bytes) {
2953 bug = true;
2954 meta_sinfo->bytes_delalloc = 0;
2955 } else {
2956 meta_sinfo->bytes_delalloc -= num_bytes;
2957 }
2958 spin_unlock(&meta_sinfo->lock);
2959
2960 BUG_ON(bug);
2961
2962 return 0;
2963}
2964
2965static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2966{
2967 u64 thresh;
2968
2969 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2970 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2971 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2972 meta_sinfo->bytes_may_use;
2973
2974 thresh = meta_sinfo->total_bytes - thresh;
2975 thresh *= 80;
2976 do_div(thresh, 100);
2977 if (thresh <= meta_sinfo->bytes_delalloc)
2978 meta_sinfo->force_delalloc = 1;
2979 else
2980 meta_sinfo->force_delalloc = 0;
2981}
2982
2983/*
2984 * Reserve metadata space for delalloc.
2985 */
2986int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
2987 struct inode *inode, int num_items)
2988{
2989 struct btrfs_fs_info *info = root->fs_info;
2990 struct btrfs_space_info *meta_sinfo;
2991 u64 num_bytes;
2992 u64 used;
2993 u64 alloc_target;
2994 int flushed = 0;
2995 int force_delalloc;
2996
2997 /* get the space info for where the metadata will live */
2998 alloc_target = btrfs_get_alloc_profile(root, 0);
2999 meta_sinfo = __find_space_info(info, alloc_target);
3000
3001 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
3002 num_items);
3003again:
3004 spin_lock(&meta_sinfo->lock);
3005
3006 force_delalloc = meta_sinfo->force_delalloc;
3007
3008 if (unlikely(!meta_sinfo->bytes_root))
3009 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3010
3011 if (!flushed)
3012 meta_sinfo->bytes_delalloc += num_bytes;
3013
3014 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3015 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3016 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3017 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3018
3019 if (used > meta_sinfo->total_bytes) {
3020 flushed++;
3021
3022 if (flushed == 1) {
3023 if (maybe_allocate_chunk(NULL, root, meta_sinfo,
3024 num_bytes))
3025 goto again;
3026 flushed++;
3027 } else {
3028 spin_unlock(&meta_sinfo->lock);
3029 }
3030
3031 if (flushed == 2) {
3032 filemap_flush(inode->i_mapping);
3033 goto again;
3034 } else if (flushed == 3) {
3035 shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
3036 goto again;
3037 }
3038 spin_lock(&meta_sinfo->lock);
3039 meta_sinfo->bytes_delalloc -= num_bytes;
3040 spin_unlock(&meta_sinfo->lock);
3041 printk(KERN_ERR "enospc, has %d, reserved %d\n",
3042 BTRFS_I(inode)->outstanding_extents,
3043 BTRFS_I(inode)->reserved_extents);
3044 dump_space_info(meta_sinfo, 0, 0);
3045 return -ENOSPC;
3046 }
3047
3048 BTRFS_I(inode)->reserved_extents += num_items;
3049 check_force_delalloc(meta_sinfo);
3050 spin_unlock(&meta_sinfo->lock);
3051
3052 if (!flushed && force_delalloc)
3053 filemap_flush(inode->i_mapping);
3054
3055 return 0;
3056}
3057
3058/* 2877/*
3059 * This will check the space that the inode allocates from to make sure we have 2878 * This will check the space that the inode allocates from to make sure we have
3060 * enough space for bytes. 2879 * enough space for bytes.
3061 */ 2880 */
3062int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 2881int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3063 u64 bytes)
3064{ 2882{
3065 struct btrfs_space_info *data_sinfo; 2883 struct btrfs_space_info *data_sinfo;
2884 struct btrfs_root *root = BTRFS_I(inode)->root;
3066 u64 used; 2885 u64 used;
3067 int ret = 0, committed = 0; 2886 int ret = 0, committed = 0;
3068 2887
@@ -3147,12 +2966,13 @@ alloc:
3147} 2966}
3148 2967
3149/* 2968/*
3150 * if there was an error for whatever reason after calling 2969 * called when we are clearing an delalloc extent from the
3151 * btrfs_check_data_free_space, call this so we can cleanup the counters. 2970 * inode's io_tree or there was an error for whatever reason
2971 * after calling btrfs_check_data_free_space
3152 */ 2972 */
3153void btrfs_free_reserved_data_space(struct btrfs_root *root, 2973void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3154 struct inode *inode, u64 bytes)
3155{ 2974{
2975 struct btrfs_root *root = BTRFS_I(inode)->root;
3156 struct btrfs_space_info *data_sinfo; 2976 struct btrfs_space_info *data_sinfo;
3157 2977
3158 /* make sure bytes are sectorsize aligned */ 2978 /* make sure bytes are sectorsize aligned */
@@ -3165,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root,
3165 spin_unlock(&data_sinfo->lock); 2985 spin_unlock(&data_sinfo->lock);
3166} 2986}
3167 2987
3168/* called when we are adding a delalloc extent to the inode's io_tree */
3169void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
3170 u64 bytes)
3171{
3172 struct btrfs_space_info *data_sinfo;
3173
3174 /* get the space info for where this inode will be storing its data */
3175 data_sinfo = BTRFS_I(inode)->space_info;
3176
3177 /* make sure we have enough space to handle the data first */
3178 spin_lock(&data_sinfo->lock);
3179 data_sinfo->bytes_delalloc += bytes;
3180
3181 /*
3182 * we are adding a delalloc extent without calling
3183 * btrfs_check_data_free_space first. This happens on a weird
3184 * writepage condition, but shouldn't hurt our accounting
3185 */
3186 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
3187 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
3188 BTRFS_I(inode)->reserved_bytes = 0;
3189 } else {
3190 data_sinfo->bytes_may_use -= bytes;
3191 BTRFS_I(inode)->reserved_bytes -= bytes;
3192 }
3193
3194 spin_unlock(&data_sinfo->lock);
3195}
3196
3197/* called when we are clearing an delalloc extent from the inode's io_tree */
3198void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
3199 u64 bytes)
3200{
3201 struct btrfs_space_info *info;
3202
3203 info = BTRFS_I(inode)->space_info;
3204
3205 spin_lock(&info->lock);
3206 info->bytes_delalloc -= bytes;
3207 spin_unlock(&info->lock);
3208}
3209
3210static void force_metadata_allocation(struct btrfs_fs_info *info) 2988static void force_metadata_allocation(struct btrfs_fs_info *info)
3211{ 2989{
3212 struct list_head *head = &info->space_info; 2990 struct list_head *head = &info->space_info;
@@ -3331,18 +3109,19 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3331 * shrink metadata reservation for delalloc 3109 * shrink metadata reservation for delalloc
3332 */ 3110 */
3333static int shrink_delalloc(struct btrfs_trans_handle *trans, 3111static int shrink_delalloc(struct btrfs_trans_handle *trans,
3334 struct btrfs_root *root, 3112 struct btrfs_root *root, u64 to_reclaim)
3335 struct btrfs_space_info *sinfo, u64 to_reclaim)
3336{ 3113{
3114 struct btrfs_block_rsv *block_rsv;
3337 u64 reserved; 3115 u64 reserved;
3338 u64 max_reclaim; 3116 u64 max_reclaim;
3339 u64 reclaimed = 0; 3117 u64 reclaimed = 0;
3340 int pause = 1; 3118 int pause = 1;
3341 int ret; 3119 int ret;
3342 3120
3343 spin_lock(&sinfo->lock); 3121 block_rsv = &root->fs_info->delalloc_block_rsv;
3344 reserved = sinfo->bytes_delalloc; 3122 spin_lock(&block_rsv->lock);
3345 spin_unlock(&sinfo->lock); 3123 reserved = block_rsv->reserved;
3124 spin_unlock(&block_rsv->lock);
3346 3125
3347 if (reserved == 0) 3126 if (reserved == 0)
3348 return 0; 3127 return 0;
@@ -3361,11 +3140,11 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3361 pause = 1; 3140 pause = 1;
3362 } 3141 }
3363 3142
3364 spin_lock(&sinfo->lock); 3143 spin_lock(&block_rsv->lock);
3365 if (reserved > sinfo->bytes_delalloc) 3144 if (reserved > block_rsv->reserved)
3366 reclaimed = reserved - sinfo->bytes_delalloc; 3145 reclaimed = reserved - block_rsv->reserved;
3367 reserved = sinfo->bytes_delalloc; 3146 reserved = block_rsv->reserved;
3368 spin_unlock(&sinfo->lock); 3147 spin_unlock(&block_rsv->lock);
3369 3148
3370 if (reserved == 0 || reclaimed >= max_reclaim) 3149 if (reserved == 0 || reclaimed >= max_reclaim)
3371 break; 3150 break;
@@ -3394,7 +3173,7 @@ static int should_retry_reserve(struct btrfs_trans_handle *trans,
3394 if (trans && trans->transaction->in_commit) 3173 if (trans && trans->transaction->in_commit)
3395 return -ENOSPC; 3174 return -ENOSPC;
3396 3175
3397 ret = shrink_delalloc(trans, root, space_info, num_bytes); 3176 ret = shrink_delalloc(trans, root, num_bytes);
3398 if (ret) 3177 if (ret)
3399 return ret; 3178 return ret;
3400 3179
@@ -3754,6 +3533,108 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3754 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3533 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3755} 3534}
3756 3535
3536static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
3537{
3538 return num_bytes >>= 3;
3539}
3540
3541int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3542{
3543 struct btrfs_root *root = BTRFS_I(inode)->root;
3544 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3545 u64 to_reserve;
3546 int nr_extents;
3547 int retries = 0;
3548 int ret;
3549
3550 if (btrfs_transaction_in_commit(root->fs_info))
3551 schedule_timeout(1);
3552
3553 num_bytes = ALIGN(num_bytes, root->sectorsize);
3554again:
3555 spin_lock(&BTRFS_I(inode)->accounting_lock);
3556 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
3557 if (nr_extents > BTRFS_I(inode)->reserved_extents) {
3558 nr_extents -= BTRFS_I(inode)->reserved_extents;
3559 to_reserve = calc_trans_metadata_size(root, nr_extents);
3560 } else {
3561 nr_extents = 0;
3562 to_reserve = 0;
3563 }
3564
3565 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3566 ret = reserve_metadata_bytes(block_rsv, to_reserve);
3567 if (ret) {
3568 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3569 ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
3570 &retries);
3571 if (ret > 0)
3572 goto again;
3573 return ret;
3574 }
3575
3576 BTRFS_I(inode)->reserved_extents += nr_extents;
3577 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3578 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3579
3580 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3581
3582 if (block_rsv->size > 512 * 1024 * 1024)
3583 shrink_delalloc(NULL, root, to_reserve);
3584
3585 return 0;
3586}
3587
3588void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
3589{
3590 struct btrfs_root *root = BTRFS_I(inode)->root;
3591 u64 to_free;
3592 int nr_extents;
3593
3594 num_bytes = ALIGN(num_bytes, root->sectorsize);
3595 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
3596
3597 spin_lock(&BTRFS_I(inode)->accounting_lock);
3598 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
3599 if (nr_extents < BTRFS_I(inode)->reserved_extents) {
3600 nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents;
3601 BTRFS_I(inode)->reserved_extents -= nr_extents;
3602 } else {
3603 nr_extents = 0;
3604 }
3605 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3606
3607 to_free = calc_csum_metadata_size(inode, num_bytes);
3608 if (nr_extents > 0)
3609 to_free += calc_trans_metadata_size(root, nr_extents);
3610
3611 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
3612 to_free);
3613}
3614
3615int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
3616{
3617 int ret;
3618
3619 ret = btrfs_check_data_free_space(inode, num_bytes);
3620 if (ret)
3621 return ret;
3622
3623 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
3624 if (ret) {
3625 btrfs_free_reserved_data_space(inode, num_bytes);
3626 return ret;
3627 }
3628
3629 return 0;
3630}
3631
3632void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
3633{
3634 btrfs_delalloc_release_metadata(inode, num_bytes);
3635 btrfs_free_reserved_data_space(inode, num_bytes);
3636}
3637
3757static int update_block_group(struct btrfs_trans_handle *trans, 3638static int update_block_group(struct btrfs_trans_handle *trans,
3758 struct btrfs_root *root, 3639 struct btrfs_root *root,
3759 u64 bytenr, u64 num_bytes, int alloc) 3640 u64 bytenr, u64 num_bytes, int alloc)