aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@citi.umich.edu>2009-10-27 18:45:17 -0400
committerJ. Bruce Fields <bfields@citi.umich.edu>2009-10-27 18:45:17 -0400
commite343eb0d60f74547e0aeb5bd151105c2e6cfe588 (patch)
tree92586df0daf3298262a957640e5c86679c963f41 /fs/btrfs/extent-tree.c
parentddc04fd4d5163aee9ebdb38a56c365b602e2b7b7 (diff)
parent012abeea669ea49636cf952d13298bb68654146a (diff)
Merge commit 'v2.6.32-rc5' into for-2.6.33
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c615
1 files changed, 525 insertions, 90 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 993f93ff7ba6..e238a0cdac67 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
68 struct extent_buffer **must_clean); 68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level, 69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key); 70 struct btrfs_key *key);
71static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
72 int dump_block_groups);
71 73
72static noinline int 74static noinline int
73block_group_cache_done(struct btrfs_block_group_cache *cache) 75block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -1566,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1566 return ret; 1568 return ret;
1567} 1569}
1568 1570
1569#ifdef BIO_RW_DISCARD
1570static void btrfs_issue_discard(struct block_device *bdev, 1571static void btrfs_issue_discard(struct block_device *bdev,
1571 u64 start, u64 len) 1572 u64 start, u64 len)
1572{ 1573{
1573 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1574 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1574 DISCARD_FL_BARRIER); 1575 DISCARD_FL_BARRIER);
1575} 1576}
1576#endif
1577 1577
1578static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1578static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1579 u64 num_bytes) 1579 u64 num_bytes)
1580{ 1580{
1581#ifdef BIO_RW_DISCARD
1582 int ret; 1581 int ret;
1583 u64 map_length = num_bytes; 1582 u64 map_length = num_bytes;
1584 struct btrfs_multi_bio *multi = NULL; 1583 struct btrfs_multi_bio *multi = NULL;
1585 1584
1585 if (!btrfs_test_opt(root, DISCARD))
1586 return 0;
1587
1586 /* Tell the block device(s) that the sectors can be discarded */ 1588 /* Tell the block device(s) that the sectors can be discarded */
1587 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1589 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1588 bytenr, &map_length, &multi, 0); 1590 bytenr, &map_length, &multi, 0);
@@ -1602,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1602 } 1604 }
1603 1605
1604 return ret; 1606 return ret;
1605#else
1606 return 0;
1607#endif
1608} 1607}
1609 1608
1610int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1609int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
@@ -2765,67 +2764,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2765 alloc_target); 2764 alloc_target);
2766} 2765}
2767 2766
2767static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2768{
2769 u64 num_bytes;
2770 int level;
2771
2772 level = BTRFS_MAX_LEVEL - 2;
2773 /*
2774 * NOTE: these calculations are absolutely the worst possible case.
2775 * This assumes that _every_ item we insert will require a new leaf, and
2776 * that the tree has grown to its maximum level size.
2777 */
2778
2779 /*
2780 * for every item we insert we could insert both an extent item and a
2781 * extent ref item. Then for ever item we insert, we will need to cow
2782 * both the original leaf, plus the leaf to the left and right of it.
2783 *
2784 * Unless we are talking about the extent root, then we just want the
2785 * number of items * 2, since we just need the extent item plus its ref.
2786 */
2787 if (root == root->fs_info->extent_root)
2788 num_bytes = num_items * 2;
2789 else
2790 num_bytes = (num_items + (2 * num_items)) * 3;
2791
2792 /*
2793 * num_bytes is total number of leaves we could need times the leaf
2794 * size, and then for every leaf we could end up cow'ing 2 nodes per
2795 * level, down to the leaf level.
2796 */
2797 num_bytes = (num_bytes * root->leafsize) +
2798 (num_bytes * (level * 2)) * root->nodesize;
2799
2800 return num_bytes;
2801}
2802
2768/* 2803/*
2769 * for now this just makes sure we have at least 5% of our metadata space free 2804 * Unreserve metadata space for delalloc. If we have less reserved credits than
2770 * for use. 2805 * we have extents, this function does nothing.
2771 */ 2806 */
2772int btrfs_check_metadata_free_space(struct btrfs_root *root) 2807int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2808 struct inode *inode, int num_items)
2773{ 2809{
2774 struct btrfs_fs_info *info = root->fs_info; 2810 struct btrfs_fs_info *info = root->fs_info;
2775 struct btrfs_space_info *meta_sinfo; 2811 struct btrfs_space_info *meta_sinfo;
2776 u64 alloc_target, thresh; 2812 u64 num_bytes;
2777 int committed = 0, ret; 2813 u64 alloc_target;
2814 bool bug = false;
2778 2815
2779 /* get the space info for where the metadata will live */ 2816 /* get the space info for where the metadata will live */
2780 alloc_target = btrfs_get_alloc_profile(root, 0); 2817 alloc_target = btrfs_get_alloc_profile(root, 0);
2781 meta_sinfo = __find_space_info(info, alloc_target); 2818 meta_sinfo = __find_space_info(info, alloc_target);
2782 if (!meta_sinfo)
2783 goto alloc;
2784 2819
2785again: 2820 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2821 num_items);
2822
2786 spin_lock(&meta_sinfo->lock); 2823 spin_lock(&meta_sinfo->lock);
2787 if (!meta_sinfo->full) 2824 spin_lock(&BTRFS_I(inode)->accounting_lock);
2788 thresh = meta_sinfo->total_bytes * 80; 2825 if (BTRFS_I(inode)->reserved_extents <=
2789 else 2826 BTRFS_I(inode)->outstanding_extents) {
2790 thresh = meta_sinfo->total_bytes * 95; 2827 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2828 spin_unlock(&meta_sinfo->lock);
2829 return 0;
2830 }
2831 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2832
2833 BTRFS_I(inode)->reserved_extents--;
2834 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2835
2836 if (meta_sinfo->bytes_delalloc < num_bytes) {
2837 bug = true;
2838 meta_sinfo->bytes_delalloc = 0;
2839 } else {
2840 meta_sinfo->bytes_delalloc -= num_bytes;
2841 }
2842 spin_unlock(&meta_sinfo->lock);
2791 2843
2844 BUG_ON(bug);
2845
2846 return 0;
2847}
2848
2849static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2850{
2851 u64 thresh;
2852
2853 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2854 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2855 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2856 meta_sinfo->bytes_may_use;
2857
2858 thresh = meta_sinfo->total_bytes - thresh;
2859 thresh *= 80;
2792 do_div(thresh, 100); 2860 do_div(thresh, 100);
2861 if (thresh <= meta_sinfo->bytes_delalloc)
2862 meta_sinfo->force_delalloc = 1;
2863 else
2864 meta_sinfo->force_delalloc = 0;
2865}
2793 2866
2794 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2867struct async_flush {
2795 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2868 struct btrfs_root *root;
2796 meta_sinfo->bytes_super > thresh) { 2869 struct btrfs_space_info *info;
2797 struct btrfs_trans_handle *trans; 2870 struct btrfs_work work;
2798 if (!meta_sinfo->full) { 2871};
2799 meta_sinfo->force_alloc = 1; 2872
2873static noinline void flush_delalloc_async(struct btrfs_work *work)
2874{
2875 struct async_flush *async;
2876 struct btrfs_root *root;
2877 struct btrfs_space_info *info;
2878
2879 async = container_of(work, struct async_flush, work);
2880 root = async->root;
2881 info = async->info;
2882
2883 btrfs_start_delalloc_inodes(root);
2884 wake_up(&info->flush_wait);
2885 btrfs_wait_ordered_extents(root, 0);
2886
2887 spin_lock(&info->lock);
2888 info->flushing = 0;
2889 spin_unlock(&info->lock);
2890 wake_up(&info->flush_wait);
2891
2892 kfree(async);
2893}
2894
2895static void wait_on_flush(struct btrfs_space_info *info)
2896{
2897 DEFINE_WAIT(wait);
2898 u64 used;
2899
2900 while (1) {
2901 prepare_to_wait(&info->flush_wait, &wait,
2902 TASK_UNINTERRUPTIBLE);
2903 spin_lock(&info->lock);
2904 if (!info->flushing) {
2905 spin_unlock(&info->lock);
2906 break;
2907 }
2908
2909 used = info->bytes_used + info->bytes_reserved +
2910 info->bytes_pinned + info->bytes_readonly +
2911 info->bytes_super + info->bytes_root +
2912 info->bytes_may_use + info->bytes_delalloc;
2913 if (used < info->total_bytes) {
2914 spin_unlock(&info->lock);
2915 break;
2916 }
2917 spin_unlock(&info->lock);
2918 schedule();
2919 }
2920 finish_wait(&info->flush_wait, &wait);
2921}
2922
2923static void flush_delalloc(struct btrfs_root *root,
2924 struct btrfs_space_info *info)
2925{
2926 struct async_flush *async;
2927 bool wait = false;
2928
2929 spin_lock(&info->lock);
2930
2931 if (!info->flushing) {
2932 info->flushing = 1;
2933 init_waitqueue_head(&info->flush_wait);
2934 } else {
2935 wait = true;
2936 }
2937
2938 spin_unlock(&info->lock);
2939
2940 if (wait) {
2941 wait_on_flush(info);
2942 return;
2943 }
2944
2945 async = kzalloc(sizeof(*async), GFP_NOFS);
2946 if (!async)
2947 goto flush;
2948
2949 async->root = root;
2950 async->info = info;
2951 async->work.func = flush_delalloc_async;
2952
2953 btrfs_queue_worker(&root->fs_info->enospc_workers,
2954 &async->work);
2955 wait_on_flush(info);
2956 return;
2957
2958flush:
2959 btrfs_start_delalloc_inodes(root);
2960 btrfs_wait_ordered_extents(root, 0);
2961
2962 spin_lock(&info->lock);
2963 info->flushing = 0;
2964 spin_unlock(&info->lock);
2965 wake_up(&info->flush_wait);
2966}
2967
2968static int maybe_allocate_chunk(struct btrfs_root *root,
2969 struct btrfs_space_info *info)
2970{
2971 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
2972 struct btrfs_trans_handle *trans;
2973 bool wait = false;
2974 int ret = 0;
2975 u64 min_metadata;
2976 u64 free_space;
2977
2978 free_space = btrfs_super_total_bytes(disk_super);
2979 /*
2980 * we allow the metadata to grow to a max of either 5gb or 5% of the
2981 * space in the volume.
2982 */
2983 min_metadata = min((u64)5 * 1024 * 1024 * 1024,
2984 div64_u64(free_space * 5, 100));
2985 if (info->total_bytes >= min_metadata) {
2986 spin_unlock(&info->lock);
2987 return 0;
2988 }
2989
2990 if (info->full) {
2991 spin_unlock(&info->lock);
2992 return 0;
2993 }
2994
2995 if (!info->allocating_chunk) {
2996 info->force_alloc = 1;
2997 info->allocating_chunk = 1;
2998 init_waitqueue_head(&info->allocate_wait);
2999 } else {
3000 wait = true;
3001 }
3002
3003 spin_unlock(&info->lock);
3004
3005 if (wait) {
3006 wait_event(info->allocate_wait,
3007 !info->allocating_chunk);
3008 return 1;
3009 }
3010
3011 trans = btrfs_start_transaction(root, 1);
3012 if (!trans) {
3013 ret = -ENOMEM;
3014 goto out;
3015 }
3016
3017 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3018 4096 + 2 * 1024 * 1024,
3019 info->flags, 0);
3020 btrfs_end_transaction(trans, root);
3021 if (ret)
3022 goto out;
3023out:
3024 spin_lock(&info->lock);
3025 info->allocating_chunk = 0;
3026 spin_unlock(&info->lock);
3027 wake_up(&info->allocate_wait);
3028
3029 if (ret)
3030 return 0;
3031 return 1;
3032}
3033
3034/*
3035 * Reserve metadata space for delalloc.
3036 */
3037int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
3038 struct inode *inode, int num_items)
3039{
3040 struct btrfs_fs_info *info = root->fs_info;
3041 struct btrfs_space_info *meta_sinfo;
3042 u64 num_bytes;
3043 u64 used;
3044 u64 alloc_target;
3045 int flushed = 0;
3046 int force_delalloc;
3047
3048 /* get the space info for where the metadata will live */
3049 alloc_target = btrfs_get_alloc_profile(root, 0);
3050 meta_sinfo = __find_space_info(info, alloc_target);
3051
3052 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
3053 num_items);
3054again:
3055 spin_lock(&meta_sinfo->lock);
3056
3057 force_delalloc = meta_sinfo->force_delalloc;
3058
3059 if (unlikely(!meta_sinfo->bytes_root))
3060 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3061
3062 if (!flushed)
3063 meta_sinfo->bytes_delalloc += num_bytes;
3064
3065 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3066 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3067 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3068 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3069
3070 if (used > meta_sinfo->total_bytes) {
3071 flushed++;
3072
3073 if (flushed == 1) {
3074 if (maybe_allocate_chunk(root, meta_sinfo))
3075 goto again;
3076 flushed++;
3077 } else {
2800 spin_unlock(&meta_sinfo->lock); 3078 spin_unlock(&meta_sinfo->lock);
2801alloc: 3079 }
2802 trans = btrfs_start_transaction(root, 1);
2803 if (!trans)
2804 return -ENOMEM;
2805 3080
2806 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3081 if (flushed == 2) {
2807 2 * 1024 * 1024, alloc_target, 0); 3082 filemap_flush(inode->i_mapping);
2808 btrfs_end_transaction(trans, root); 3083 goto again;
2809 if (!meta_sinfo) { 3084 } else if (flushed == 3) {
2810 meta_sinfo = __find_space_info(info, 3085 flush_delalloc(root, meta_sinfo);
2811 alloc_target);
2812 }
2813 goto again; 3086 goto again;
2814 } 3087 }
3088 spin_lock(&meta_sinfo->lock);
3089 meta_sinfo->bytes_delalloc -= num_bytes;
2815 spin_unlock(&meta_sinfo->lock); 3090 spin_unlock(&meta_sinfo->lock);
3091 printk(KERN_ERR "enospc, has %d, reserved %d\n",
3092 BTRFS_I(inode)->outstanding_extents,
3093 BTRFS_I(inode)->reserved_extents);
3094 dump_space_info(meta_sinfo, 0, 0);
3095 return -ENOSPC;
3096 }
2816 3097
2817 if (!committed) { 3098 BTRFS_I(inode)->reserved_extents++;
2818 committed = 1; 3099 check_force_delalloc(meta_sinfo);
2819 trans = btrfs_join_transaction(root, 1); 3100 spin_unlock(&meta_sinfo->lock);
2820 if (!trans) 3101
2821 return -ENOMEM; 3102 if (!flushed && force_delalloc)
2822 ret = btrfs_commit_transaction(trans, root); 3103 filemap_flush(inode->i_mapping);
2823 if (ret) 3104
2824 return ret; 3105 return 0;
3106}
3107
3108/*
3109 * unreserve num_items number of items worth of metadata space. This needs to
3110 * be paired with btrfs_reserve_metadata_space.
3111 *
3112 * NOTE: if you have the option, run this _AFTER_ you do a
3113 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
3114 * oprations which will result in more used metadata, so we want to make sure we
3115 * can do that without issue.
3116 */
3117int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
3118{
3119 struct btrfs_fs_info *info = root->fs_info;
3120 struct btrfs_space_info *meta_sinfo;
3121 u64 num_bytes;
3122 u64 alloc_target;
3123 bool bug = false;
3124
3125 /* get the space info for where the metadata will live */
3126 alloc_target = btrfs_get_alloc_profile(root, 0);
3127 meta_sinfo = __find_space_info(info, alloc_target);
3128
3129 num_bytes = calculate_bytes_needed(root, num_items);
3130
3131 spin_lock(&meta_sinfo->lock);
3132 if (meta_sinfo->bytes_may_use < num_bytes) {
3133 bug = true;
3134 meta_sinfo->bytes_may_use = 0;
3135 } else {
3136 meta_sinfo->bytes_may_use -= num_bytes;
3137 }
3138 spin_unlock(&meta_sinfo->lock);
3139
3140 BUG_ON(bug);
3141
3142 return 0;
3143}
3144
3145/*
3146 * Reserve some metadata space for use. We'll calculate the worste case number
3147 * of bytes that would be needed to modify num_items number of items. If we
3148 * have space, fantastic, if not, you get -ENOSPC. Please call
3149 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
3150 * items you reserved, since whatever metadata you needed should have already
3151 * been allocated.
3152 *
3153 * This will commit the transaction to make more space if we don't have enough
3154 * metadata space. THe only time we don't do this is if we're reserving space
3155 * inside of a transaction, then we will just return -ENOSPC and it is the
3156 * callers responsibility to handle it properly.
3157 */
3158int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
3159{
3160 struct btrfs_fs_info *info = root->fs_info;
3161 struct btrfs_space_info *meta_sinfo;
3162 u64 num_bytes;
3163 u64 used;
3164 u64 alloc_target;
3165 int retries = 0;
3166
3167 /* get the space info for where the metadata will live */
3168 alloc_target = btrfs_get_alloc_profile(root, 0);
3169 meta_sinfo = __find_space_info(info, alloc_target);
3170
3171 num_bytes = calculate_bytes_needed(root, num_items);
3172again:
3173 spin_lock(&meta_sinfo->lock);
3174
3175 if (unlikely(!meta_sinfo->bytes_root))
3176 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3177
3178 if (!retries)
3179 meta_sinfo->bytes_may_use += num_bytes;
3180
3181 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3182 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3183 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3184 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3185
3186 if (used > meta_sinfo->total_bytes) {
3187 retries++;
3188 if (retries == 1) {
3189 if (maybe_allocate_chunk(root, meta_sinfo))
3190 goto again;
3191 retries++;
3192 } else {
3193 spin_unlock(&meta_sinfo->lock);
3194 }
3195
3196 if (retries == 2) {
3197 flush_delalloc(root, meta_sinfo);
2825 goto again; 3198 goto again;
2826 } 3199 }
3200 spin_lock(&meta_sinfo->lock);
3201 meta_sinfo->bytes_may_use -= num_bytes;
3202 spin_unlock(&meta_sinfo->lock);
3203
3204 dump_space_info(meta_sinfo, 0, 0);
2827 return -ENOSPC; 3205 return -ENOSPC;
2828 } 3206 }
3207
3208 check_force_delalloc(meta_sinfo);
2829 spin_unlock(&meta_sinfo->lock); 3209 spin_unlock(&meta_sinfo->lock);
2830 3210
2831 return 0; 3211 return 0;
@@ -2888,7 +3268,7 @@ alloc:
2888 spin_unlock(&data_sinfo->lock); 3268 spin_unlock(&data_sinfo->lock);
2889 3269
2890 /* commit the current transaction and try again */ 3270 /* commit the current transaction and try again */
2891 if (!committed) { 3271 if (!committed && !root->fs_info->open_ioctl_trans) {
2892 committed = 1; 3272 committed = 1;
2893 trans = btrfs_join_transaction(root, 1); 3273 trans = btrfs_join_transaction(root, 1);
2894 if (!trans) 3274 if (!trans)
@@ -2916,7 +3296,7 @@ alloc:
2916 BTRFS_I(inode)->reserved_bytes += bytes; 3296 BTRFS_I(inode)->reserved_bytes += bytes;
2917 spin_unlock(&data_sinfo->lock); 3297 spin_unlock(&data_sinfo->lock);
2918 3298
2919 return btrfs_check_metadata_free_space(root); 3299 return 0;
2920} 3300}
2921 3301
2922/* 3302/*
@@ -3015,17 +3395,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3015 BUG_ON(!space_info); 3395 BUG_ON(!space_info);
3016 3396
3017 spin_lock(&space_info->lock); 3397 spin_lock(&space_info->lock);
3018 if (space_info->force_alloc) { 3398 if (space_info->force_alloc)
3019 force = 1; 3399 force = 1;
3020 space_info->force_alloc = 0;
3021 }
3022 if (space_info->full) { 3400 if (space_info->full) {
3023 spin_unlock(&space_info->lock); 3401 spin_unlock(&space_info->lock);
3024 goto out; 3402 goto out;
3025 } 3403 }
3026 3404
3027 thresh = space_info->total_bytes - space_info->bytes_readonly; 3405 thresh = space_info->total_bytes - space_info->bytes_readonly;
3028 thresh = div_factor(thresh, 6); 3406 thresh = div_factor(thresh, 8);
3029 if (!force && 3407 if (!force &&
3030 (space_info->bytes_used + space_info->bytes_pinned + 3408 (space_info->bytes_used + space_info->bytes_pinned +
3031 space_info->bytes_reserved + alloc_bytes) < thresh) { 3409 space_info->bytes_reserved + alloc_bytes) < thresh) {
@@ -3039,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3039 * we keep a reasonable number of metadata chunks allocated in the 3417 * we keep a reasonable number of metadata chunks allocated in the
3040 * FS as well. 3418 * FS as well.
3041 */ 3419 */
3042 if (flags & BTRFS_BLOCK_GROUP_DATA) { 3420 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3043 fs_info->data_chunk_allocations++; 3421 fs_info->data_chunk_allocations++;
3044 if (!(fs_info->data_chunk_allocations % 3422 if (!(fs_info->data_chunk_allocations %
3045 fs_info->metadata_ratio)) 3423 fs_info->metadata_ratio))
@@ -3047,8 +3425,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3047 } 3425 }
3048 3426
3049 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3427 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3428 spin_lock(&space_info->lock);
3050 if (ret) 3429 if (ret)
3051 space_info->full = 1; 3430 space_info->full = 1;
3431 space_info->force_alloc = 0;
3432 spin_unlock(&space_info->lock);
3052out: 3433out:
3053 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3434 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3054 return ret; 3435 return ret;
@@ -3306,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
3306 if (is_data) 3687 if (is_data)
3307 goto pinit; 3688 goto pinit;
3308 3689
3690 /*
3691 * discard is sloooow, and so triggering discards on
3692 * individual btree blocks isn't a good plan. Just
3693 * pin everything in discard mode.
3694 */
3695 if (btrfs_test_opt(root, DISCARD))
3696 goto pinit;
3697
3309 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3698 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
3310 if (!buf) 3699 if (!buf)
3311 goto pinit; 3700 goto pinit;
@@ -3747,6 +4136,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3747 int loop = 0; 4136 int loop = 0;
3748 bool found_uncached_bg = false; 4137 bool found_uncached_bg = false;
3749 bool failed_cluster_refill = false; 4138 bool failed_cluster_refill = false;
4139 bool failed_alloc = false;
3750 4140
3751 WARN_ON(num_bytes < root->sectorsize); 4141 WARN_ON(num_bytes < root->sectorsize);
3752 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4142 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3951,14 +4341,23 @@ refill_cluster:
3951 4341
3952 offset = btrfs_find_space_for_alloc(block_group, search_start, 4342 offset = btrfs_find_space_for_alloc(block_group, search_start,
3953 num_bytes, empty_size); 4343 num_bytes, empty_size);
3954 if (!offset && (cached || (!cached && 4344 /*
3955 loop == LOOP_CACHING_NOWAIT))) { 4345 * If we didn't find a chunk, and we haven't failed on this
3956 goto loop; 4346 * block group before, and this block group is in the middle of
3957 } else if (!offset && (!cached && 4347 * caching and we are ok with waiting, then go ahead and wait
3958 loop > LOOP_CACHING_NOWAIT)) { 4348 * for progress to be made, and set failed_alloc to true.
4349 *
4350 * If failed_alloc is true then we've already waited on this
4351 * block group once and should move on to the next block group.
4352 */
4353 if (!offset && !failed_alloc && !cached &&
4354 loop > LOOP_CACHING_NOWAIT) {
3959 wait_block_group_cache_progress(block_group, 4355 wait_block_group_cache_progress(block_group,
3960 num_bytes + empty_size); 4356 num_bytes + empty_size);
4357 failed_alloc = true;
3961 goto have_block_group; 4358 goto have_block_group;
4359 } else if (!offset) {
4360 goto loop;
3962 } 4361 }
3963checks: 4362checks:
3964 search_start = stripe_align(root, offset); 4363 search_start = stripe_align(root, offset);
@@ -4006,6 +4405,7 @@ checks:
4006 break; 4405 break;
4007loop: 4406loop:
4008 failed_cluster_refill = false; 4407 failed_cluster_refill = false;
4408 failed_alloc = false;
4009 btrfs_put_block_group(block_group); 4409 btrfs_put_block_group(block_group);
4010 } 4410 }
4011 up_read(&space_info->groups_sem); 4411 up_read(&space_info->groups_sem);
@@ -4063,21 +4463,32 @@ loop:
4063 return ret; 4463 return ret;
4064} 4464}
4065 4465
4066static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 4466static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4467 int dump_block_groups)
4067{ 4468{
4068 struct btrfs_block_group_cache *cache; 4469 struct btrfs_block_group_cache *cache;
4069 4470
4471 spin_lock(&info->lock);
4070 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4472 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
4071 (unsigned long long)(info->total_bytes - info->bytes_used - 4473 (unsigned long long)(info->total_bytes - info->bytes_used -
4072 info->bytes_pinned - info->bytes_reserved), 4474 info->bytes_pinned - info->bytes_reserved -
4475 info->bytes_super),
4073 (info->full) ? "" : "not "); 4476 (info->full) ? "" : "not ");
4074 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4477 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
4075 " may_use=%llu, used=%llu\n", 4478 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
4479 "\n",
4076 (unsigned long long)info->total_bytes, 4480 (unsigned long long)info->total_bytes,
4077 (unsigned long long)info->bytes_pinned, 4481 (unsigned long long)info->bytes_pinned,
4078 (unsigned long long)info->bytes_delalloc, 4482 (unsigned long long)info->bytes_delalloc,
4079 (unsigned long long)info->bytes_may_use, 4483 (unsigned long long)info->bytes_may_use,
4080 (unsigned long long)info->bytes_used); 4484 (unsigned long long)info->bytes_used,
4485 (unsigned long long)info->bytes_root,
4486 (unsigned long long)info->bytes_super,
4487 (unsigned long long)info->bytes_reserved);
4488 spin_unlock(&info->lock);
4489
4490 if (!dump_block_groups)
4491 return;
4081 4492
4082 down_read(&info->groups_sem); 4493 down_read(&info->groups_sem);
4083 list_for_each_entry(cache, &info->block_groups, list) { 4494 list_for_each_entry(cache, &info->block_groups, list) {
@@ -4145,7 +4556,7 @@ again:
4145 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4556 printk(KERN_ERR "btrfs allocation failed flags %llu, "
4146 "wanted %llu\n", (unsigned long long)data, 4557 "wanted %llu\n", (unsigned long long)data,
4147 (unsigned long long)num_bytes); 4558 (unsigned long long)num_bytes);
4148 dump_space_info(sinfo, num_bytes); 4559 dump_space_info(sinfo, num_bytes, 1);
4149 } 4560 }
4150 4561
4151 return ret; 4562 return ret;
@@ -4506,6 +4917,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4506 u64 bytenr; 4917 u64 bytenr;
4507 u64 generation; 4918 u64 generation;
4508 u64 refs; 4919 u64 refs;
4920 u64 flags;
4509 u64 last = 0; 4921 u64 last = 0;
4510 u32 nritems; 4922 u32 nritems;
4511 u32 blocksize; 4923 u32 blocksize;
@@ -4543,15 +4955,19 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4543 generation <= root->root_key.offset) 4955 generation <= root->root_key.offset)
4544 continue; 4956 continue;
4545 4957
4958 /* We don't lock the tree block, it's OK to be racy here */
4959 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4960 &refs, &flags);
4961 BUG_ON(ret);
4962 BUG_ON(refs == 0);
4963
4546 if (wc->stage == DROP_REFERENCE) { 4964 if (wc->stage == DROP_REFERENCE) {
4547 ret = btrfs_lookup_extent_info(trans, root,
4548 bytenr, blocksize,
4549 &refs, NULL);
4550 BUG_ON(ret);
4551 BUG_ON(refs == 0);
4552 if (refs == 1) 4965 if (refs == 1)
4553 goto reada; 4966 goto reada;
4554 4967
4968 if (wc->level == 1 &&
4969 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4970 continue;
4555 if (!wc->update_ref || 4971 if (!wc->update_ref ||
4556 generation <= root->root_key.offset) 4972 generation <= root->root_key.offset)
4557 continue; 4973 continue;
@@ -4560,6 +4976,10 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
4560 &wc->update_progress); 4976 &wc->update_progress);
4561 if (ret < 0) 4977 if (ret < 0)
4562 continue; 4978 continue;
4979 } else {
4980 if (wc->level == 1 &&
4981 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
4982 continue;
4563 } 4983 }
4564reada: 4984reada:
4565 ret = readahead_tree_block(root, bytenr, blocksize, 4985 ret = readahead_tree_block(root, bytenr, blocksize,
@@ -4583,7 +5003,7 @@ reada:
4583static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 5003static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4584 struct btrfs_root *root, 5004 struct btrfs_root *root,
4585 struct btrfs_path *path, 5005 struct btrfs_path *path,
4586 struct walk_control *wc) 5006 struct walk_control *wc, int lookup_info)
4587{ 5007{
4588 int level = wc->level; 5008 int level = wc->level;
4589 struct extent_buffer *eb = path->nodes[level]; 5009 struct extent_buffer *eb = path->nodes[level];
@@ -4598,8 +5018,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4598 * when reference count of tree block is 1, it won't increase 5018 * when reference count of tree block is 1, it won't increase
4599 * again. once full backref flag is set, we never clear it. 5019 * again. once full backref flag is set, we never clear it.
4600 */ 5020 */
4601 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5021 if (lookup_info &&
4602 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 5022 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
5023 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
4603 BUG_ON(!path->locks[level]); 5024 BUG_ON(!path->locks[level]);
4604 ret = btrfs_lookup_extent_info(trans, root, 5025 ret = btrfs_lookup_extent_info(trans, root,
4605 eb->start, eb->len, 5026 eb->start, eb->len,
@@ -4660,7 +5081,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4660static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5081static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4661 struct btrfs_root *root, 5082 struct btrfs_root *root,
4662 struct btrfs_path *path, 5083 struct btrfs_path *path,
4663 struct walk_control *wc) 5084 struct walk_control *wc, int *lookup_info)
4664{ 5085{
4665 u64 bytenr; 5086 u64 bytenr;
4666 u64 generation; 5087 u64 generation;
@@ -4680,8 +5101,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4680 * for the subtree 5101 * for the subtree
4681 */ 5102 */
4682 if (wc->stage == UPDATE_BACKREF && 5103 if (wc->stage == UPDATE_BACKREF &&
4683 generation <= root->root_key.offset) 5104 generation <= root->root_key.offset) {
5105 *lookup_info = 1;
4684 return 1; 5106 return 1;
5107 }
4685 5108
4686 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5109 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
4687 blocksize = btrfs_level_size(root, level - 1); 5110 blocksize = btrfs_level_size(root, level - 1);
@@ -4694,14 +5117,19 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4694 btrfs_tree_lock(next); 5117 btrfs_tree_lock(next);
4695 btrfs_set_lock_blocking(next); 5118 btrfs_set_lock_blocking(next);
4696 5119
4697 if (wc->stage == DROP_REFERENCE) { 5120 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
4698 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5121 &wc->refs[level - 1],
4699 &wc->refs[level - 1], 5122 &wc->flags[level - 1]);
4700 &wc->flags[level - 1]); 5123 BUG_ON(ret);
4701 BUG_ON(ret); 5124 BUG_ON(wc->refs[level - 1] == 0);
4702 BUG_ON(wc->refs[level - 1] == 0); 5125 *lookup_info = 0;
4703 5126
5127 if (wc->stage == DROP_REFERENCE) {
4704 if (wc->refs[level - 1] > 1) { 5128 if (wc->refs[level - 1] > 1) {
5129 if (level == 1 &&
5130 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5131 goto skip;
5132
4705 if (!wc->update_ref || 5133 if (!wc->update_ref ||
4706 generation <= root->root_key.offset) 5134 generation <= root->root_key.offset)
4707 goto skip; 5135 goto skip;
@@ -4715,12 +5143,17 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4715 wc->stage = UPDATE_BACKREF; 5143 wc->stage = UPDATE_BACKREF;
4716 wc->shared_level = level - 1; 5144 wc->shared_level = level - 1;
4717 } 5145 }
5146 } else {
5147 if (level == 1 &&
5148 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5149 goto skip;
4718 } 5150 }
4719 5151
4720 if (!btrfs_buffer_uptodate(next, generation)) { 5152 if (!btrfs_buffer_uptodate(next, generation)) {
4721 btrfs_tree_unlock(next); 5153 btrfs_tree_unlock(next);
4722 free_extent_buffer(next); 5154 free_extent_buffer(next);
4723 next = NULL; 5155 next = NULL;
5156 *lookup_info = 1;
4724 } 5157 }
4725 5158
4726 if (!next) { 5159 if (!next) {
@@ -4743,21 +5176,22 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
4743skip: 5176skip:
4744 wc->refs[level - 1] = 0; 5177 wc->refs[level - 1] = 0;
4745 wc->flags[level - 1] = 0; 5178 wc->flags[level - 1] = 0;
5179 if (wc->stage == DROP_REFERENCE) {
5180 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
5181 parent = path->nodes[level]->start;
5182 } else {
5183 BUG_ON(root->root_key.objectid !=
5184 btrfs_header_owner(path->nodes[level]));
5185 parent = 0;
5186 }
4746 5187
4747 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5188 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
4748 parent = path->nodes[level]->start; 5189 root->root_key.objectid, level - 1, 0);
4749 } else { 5190 BUG_ON(ret);
4750 BUG_ON(root->root_key.objectid !=
4751 btrfs_header_owner(path->nodes[level]));
4752 parent = 0;
4753 } 5191 }
4754
4755 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
4756 root->root_key.objectid, level - 1, 0);
4757 BUG_ON(ret);
4758
4759 btrfs_tree_unlock(next); 5192 btrfs_tree_unlock(next);
4760 free_extent_buffer(next); 5193 free_extent_buffer(next);
5194 *lookup_info = 1;
4761 return 1; 5195 return 1;
4762} 5196}
4763 5197
@@ -4871,6 +5305,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4871 struct walk_control *wc) 5305 struct walk_control *wc)
4872{ 5306{
4873 int level = wc->level; 5307 int level = wc->level;
5308 int lookup_info = 1;
4874 int ret; 5309 int ret;
4875 5310
4876 while (level >= 0) { 5311 while (level >= 0) {
@@ -4878,14 +5313,14 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4878 btrfs_header_nritems(path->nodes[level])) 5313 btrfs_header_nritems(path->nodes[level]))
4879 break; 5314 break;
4880 5315
4881 ret = walk_down_proc(trans, root, path, wc); 5316 ret = walk_down_proc(trans, root, path, wc, lookup_info);
4882 if (ret > 0) 5317 if (ret > 0)
4883 break; 5318 break;
4884 5319
4885 if (level == 0) 5320 if (level == 0)
4886 break; 5321 break;
4887 5322
4888 ret = do_walk_down(trans, root, path, wc); 5323 ret = do_walk_down(trans, root, path, wc, &lookup_info);
4889 if (ret > 0) { 5324 if (ret > 0) {
4890 path->slots[level]++; 5325 path->slots[level]++;
4891 continue; 5326 continue;