aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-30 12:05:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-30 12:05:48 -0400
commit925d169f5b86fe57e2f5264ea574cce9a89b719d (patch)
tree241d3156b427c6398bd3fc5efa9108635d0e189b /fs/btrfs/extent-tree.c
parentcdf01dd5443d0befc8c6a32cb2e3d2f568fd2558 (diff)
parent6418c96107a2b399848bb8cfc6e29f11ca74fb94 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (39 commits) Btrfs: deal with errors from updating the tree log Btrfs: allow subvol deletion by unprivileged user with -o user_subvol_rm_allowed Btrfs: make SNAP_DESTROY async Btrfs: add SNAP_CREATE_ASYNC ioctl Btrfs: add START_SYNC, WAIT_SYNC ioctls Btrfs: async transaction commit Btrfs: fix deadlock in btrfs_commit_transaction Btrfs: fix lockdep warning on clone ioctl Btrfs: fix clone ioctl where range is adjacent to extent Btrfs: fix delalloc checks in clone ioctl Btrfs: drop unused variable in block_alloc_rsv Btrfs: cleanup warnings from gcc 4.6 (nonbugs) Btrfs: Fix variables set but not read (bugs found by gcc 4.6) Btrfs: Use ERR_CAST helpers Btrfs: use memdup_user helpers Btrfs: fix raid code for removing missing drives Btrfs: Switch the extent buffer rbtree into a radix tree Btrfs: restructure try_release_extent_buffer() Btrfs: use the flusher threads for delalloc throttling Btrfs: tune the chunk allocation to 5% of the FS as metadata ... Fix up trivial conflicts in fs/btrfs/super.c and fs/fs-writeback.c, and remove use of INIT_RCU_HEAD in fs/btrfs/extent_io.c (that init macro was useless and removed in commit 5e8067adfdba: "rcu head remove init")
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c694
1 files changed, 540 insertions, 154 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b81ecdb101c..0c097f3aec41 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
242 return NULL; 242 return NULL;
243 } 243 }
244 244
245 /* We're loading it the fast way, so we don't have a caching_ctl. */
246 if (!cache->caching_ctl) {
247 spin_unlock(&cache->lock);
248 return NULL;
249 }
250
245 ctl = cache->caching_ctl; 251 ctl = cache->caching_ctl;
246 atomic_inc(&ctl->count); 252 atomic_inc(&ctl->count);
247 spin_unlock(&cache->lock); 253 spin_unlock(&cache->lock);
@@ -421,7 +427,9 @@ err:
421 return 0; 427 return 0;
422} 428}
423 429
424static int cache_block_group(struct btrfs_block_group_cache *cache) 430static int cache_block_group(struct btrfs_block_group_cache *cache,
431 struct btrfs_trans_handle *trans,
432 int load_cache_only)
425{ 433{
426 struct btrfs_fs_info *fs_info = cache->fs_info; 434 struct btrfs_fs_info *fs_info = cache->fs_info;
427 struct btrfs_caching_control *caching_ctl; 435 struct btrfs_caching_control *caching_ctl;
@@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
432 if (cache->cached != BTRFS_CACHE_NO) 440 if (cache->cached != BTRFS_CACHE_NO)
433 return 0; 441 return 0;
434 442
443 /*
444 * We can't do the read from on-disk cache during a commit since we need
445 * to have the normal tree locking.
446 */
447 if (!trans->transaction->in_commit) {
448 spin_lock(&cache->lock);
449 if (cache->cached != BTRFS_CACHE_NO) {
450 spin_unlock(&cache->lock);
451 return 0;
452 }
453 cache->cached = BTRFS_CACHE_STARTED;
454 spin_unlock(&cache->lock);
455
456 ret = load_free_space_cache(fs_info, cache);
457
458 spin_lock(&cache->lock);
459 if (ret == 1) {
460 cache->cached = BTRFS_CACHE_FINISHED;
461 cache->last_byte_to_unpin = (u64)-1;
462 } else {
463 cache->cached = BTRFS_CACHE_NO;
464 }
465 spin_unlock(&cache->lock);
466 if (ret == 1)
467 return 0;
468 }
469
470 if (load_cache_only)
471 return 0;
472
435 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 473 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
436 BUG_ON(!caching_ctl); 474 BUG_ON(!caching_ctl);
437 475
@@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
509 547
510 rcu_read_lock(); 548 rcu_read_lock();
511 list_for_each_entry_rcu(found, head, list) { 549 list_for_each_entry_rcu(found, head, list) {
512 if (found->flags == flags) { 550 if (found->flags & flags) {
513 rcu_read_unlock(); 551 rcu_read_unlock();
514 return found; 552 return found;
515 } 553 }
@@ -542,6 +580,15 @@ static u64 div_factor(u64 num, int factor)
542 return num; 580 return num;
543} 581}
544 582
583static u64 div_factor_fine(u64 num, int factor)
584{
585 if (factor == 100)
586 return num;
587 num *= factor;
588 do_div(num, 100);
589 return num;
590}
591
545u64 btrfs_find_block_group(struct btrfs_root *root, 592u64 btrfs_find_block_group(struct btrfs_root *root,
546 u64 search_start, u64 search_hint, int owner) 593 u64 search_start, u64 search_hint, int owner)
547{ 594{
@@ -2687,6 +2734,109 @@ next_block_group(struct btrfs_root *root,
2687 return cache; 2734 return cache;
2688} 2735}
2689 2736
2737static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2738 struct btrfs_trans_handle *trans,
2739 struct btrfs_path *path)
2740{
2741 struct btrfs_root *root = block_group->fs_info->tree_root;
2742 struct inode *inode = NULL;
2743 u64 alloc_hint = 0;
2744 int num_pages = 0;
2745 int retries = 0;
2746 int ret = 0;
2747
2748 /*
2749 * If this block group is smaller than 100 megs don't bother caching the
2750 * block group.
2751 */
2752 if (block_group->key.offset < (100 * 1024 * 1024)) {
2753 spin_lock(&block_group->lock);
2754 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
2755 spin_unlock(&block_group->lock);
2756 return 0;
2757 }
2758
2759again:
2760 inode = lookup_free_space_inode(root, block_group, path);
2761 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2762 ret = PTR_ERR(inode);
2763 btrfs_release_path(root, path);
2764 goto out;
2765 }
2766
2767 if (IS_ERR(inode)) {
2768 BUG_ON(retries);
2769 retries++;
2770
2771 if (block_group->ro)
2772 goto out_free;
2773
2774 ret = create_free_space_inode(root, trans, block_group, path);
2775 if (ret)
2776 goto out_free;
2777 goto again;
2778 }
2779
2780 /*
2781 * We want to set the generation to 0, that way if anything goes wrong
2782 * from here on out we know not to trust this cache when we load up next
2783 * time.
2784 */
2785 BTRFS_I(inode)->generation = 0;
2786 ret = btrfs_update_inode(trans, root, inode);
2787 WARN_ON(ret);
2788
2789 if (i_size_read(inode) > 0) {
2790 ret = btrfs_truncate_free_space_cache(root, trans, path,
2791 inode);
2792 if (ret)
2793 goto out_put;
2794 }
2795
2796 spin_lock(&block_group->lock);
2797 if (block_group->cached != BTRFS_CACHE_FINISHED) {
2798 spin_unlock(&block_group->lock);
2799 goto out_put;
2800 }
2801 spin_unlock(&block_group->lock);
2802
2803 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
2804 if (!num_pages)
2805 num_pages = 1;
2806
2807 /*
2808 * Just to make absolutely sure we have enough space, we're going to
2809 * preallocate 12 pages worth of space for each block group. In
2810 * practice we ought to use at most 8, but we need extra space so we can
2811 * add our header and have a terminator between the extents and the
2812 * bitmaps.
2813 */
2814 num_pages *= 16;
2815 num_pages *= PAGE_CACHE_SIZE;
2816
2817 ret = btrfs_check_data_free_space(inode, num_pages);
2818 if (ret)
2819 goto out_put;
2820
2821 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2822 num_pages, num_pages,
2823 &alloc_hint);
2824 btrfs_free_reserved_data_space(inode, num_pages);
2825out_put:
2826 iput(inode);
2827out_free:
2828 btrfs_release_path(root, path);
2829out:
2830 spin_lock(&block_group->lock);
2831 if (ret)
2832 block_group->disk_cache_state = BTRFS_DC_ERROR;
2833 else
2834 block_group->disk_cache_state = BTRFS_DC_SETUP;
2835 spin_unlock(&block_group->lock);
2836
2837 return ret;
2838}
2839
2690int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2840int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2691 struct btrfs_root *root) 2841 struct btrfs_root *root)
2692{ 2842{
@@ -2699,6 +2849,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2699 if (!path) 2849 if (!path)
2700 return -ENOMEM; 2850 return -ENOMEM;
2701 2851
2852again:
2853 while (1) {
2854 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2855 while (cache) {
2856 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
2857 break;
2858 cache = next_block_group(root, cache);
2859 }
2860 if (!cache) {
2861 if (last == 0)
2862 break;
2863 last = 0;
2864 continue;
2865 }
2866 err = cache_save_setup(cache, trans, path);
2867 last = cache->key.objectid + cache->key.offset;
2868 btrfs_put_block_group(cache);
2869 }
2870
2702 while (1) { 2871 while (1) {
2703 if (last == 0) { 2872 if (last == 0) {
2704 err = btrfs_run_delayed_refs(trans, root, 2873 err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2877,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2708 2877
2709 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2878 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2710 while (cache) { 2879 while (cache) {
2880 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
2881 btrfs_put_block_group(cache);
2882 goto again;
2883 }
2884
2711 if (cache->dirty) 2885 if (cache->dirty)
2712 break; 2886 break;
2713 cache = next_block_group(root, cache); 2887 cache = next_block_group(root, cache);
@@ -2719,6 +2893,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2719 continue; 2893 continue;
2720 } 2894 }
2721 2895
2896 if (cache->disk_cache_state == BTRFS_DC_SETUP)
2897 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
2722 cache->dirty = 0; 2898 cache->dirty = 0;
2723 last = cache->key.objectid + cache->key.offset; 2899 last = cache->key.objectid + cache->key.offset;
2724 2900
@@ -2727,6 +2903,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2727 btrfs_put_block_group(cache); 2903 btrfs_put_block_group(cache);
2728 } 2904 }
2729 2905
2906 while (1) {
2907 /*
2908 * I don't think this is needed since we're just marking our
2909 * preallocated extent as written, but just in case it can't
2910 * hurt.
2911 */
2912 if (last == 0) {
2913 err = btrfs_run_delayed_refs(trans, root,
2914 (unsigned long)-1);
2915 BUG_ON(err);
2916 }
2917
2918 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2919 while (cache) {
2920 /*
2921 * Really this shouldn't happen, but it could if we
2922 * couldn't write the entire preallocated extent and
2923 * splitting the extent resulted in a new block.
2924 */
2925 if (cache->dirty) {
2926 btrfs_put_block_group(cache);
2927 goto again;
2928 }
2929 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2930 break;
2931 cache = next_block_group(root, cache);
2932 }
2933 if (!cache) {
2934 if (last == 0)
2935 break;
2936 last = 0;
2937 continue;
2938 }
2939
2940 btrfs_write_out_cache(root, trans, cache, path);
2941
2942 /*
2943 * If we didn't have an error then the cache state is still
2944 * NEED_WRITE, so we can set it to WRITTEN.
2945 */
2946 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2947 cache->disk_cache_state = BTRFS_DC_WRITTEN;
2948 last = cache->key.objectid + cache->key.offset;
2949 btrfs_put_block_group(cache);
2950 }
2951
2730 btrfs_free_path(path); 2952 btrfs_free_path(path);
2731 return 0; 2953 return 0;
2732} 2954}
@@ -2762,6 +2984,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2762 if (found) { 2984 if (found) {
2763 spin_lock(&found->lock); 2985 spin_lock(&found->lock);
2764 found->total_bytes += total_bytes; 2986 found->total_bytes += total_bytes;
2987 found->disk_total += total_bytes * factor;
2765 found->bytes_used += bytes_used; 2988 found->bytes_used += bytes_used;
2766 found->disk_used += bytes_used * factor; 2989 found->disk_used += bytes_used * factor;
2767 found->full = 0; 2990 found->full = 0;
@@ -2781,6 +3004,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2781 BTRFS_BLOCK_GROUP_SYSTEM | 3004 BTRFS_BLOCK_GROUP_SYSTEM |
2782 BTRFS_BLOCK_GROUP_METADATA); 3005 BTRFS_BLOCK_GROUP_METADATA);
2783 found->total_bytes = total_bytes; 3006 found->total_bytes = total_bytes;
3007 found->disk_total = total_bytes * factor;
2784 found->bytes_used = bytes_used; 3008 found->bytes_used = bytes_used;
2785 found->disk_used = bytes_used * factor; 3009 found->disk_used = bytes_used * factor;
2786 found->bytes_pinned = 0; 3010 found->bytes_pinned = 0;
@@ -2882,11 +3106,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
2882 struct btrfs_space_info *data_sinfo; 3106 struct btrfs_space_info *data_sinfo;
2883 struct btrfs_root *root = BTRFS_I(inode)->root; 3107 struct btrfs_root *root = BTRFS_I(inode)->root;
2884 u64 used; 3108 u64 used;
2885 int ret = 0, committed = 0; 3109 int ret = 0, committed = 0, alloc_chunk = 1;
2886 3110
2887 /* make sure bytes are sectorsize aligned */ 3111 /* make sure bytes are sectorsize aligned */
2888 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3112 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2889 3113
3114 if (root == root->fs_info->tree_root) {
3115 alloc_chunk = 0;
3116 committed = 1;
3117 }
3118
2890 data_sinfo = BTRFS_I(inode)->space_info; 3119 data_sinfo = BTRFS_I(inode)->space_info;
2891 if (!data_sinfo) 3120 if (!data_sinfo)
2892 goto alloc; 3121 goto alloc;
@@ -2905,7 +3134,7 @@ again:
2905 * if we don't have enough free bytes in this space then we need 3134 * if we don't have enough free bytes in this space then we need
2906 * to alloc a new chunk. 3135 * to alloc a new chunk.
2907 */ 3136 */
2908 if (!data_sinfo->full) { 3137 if (!data_sinfo->full && alloc_chunk) {
2909 u64 alloc_target; 3138 u64 alloc_target;
2910 3139
2911 data_sinfo->force_alloc = 1; 3140 data_sinfo->force_alloc = 1;
@@ -2997,10 +3226,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
2997 rcu_read_unlock(); 3226 rcu_read_unlock();
2998} 3227}
2999 3228
3000static int should_alloc_chunk(struct btrfs_space_info *sinfo, 3229static int should_alloc_chunk(struct btrfs_root *root,
3001 u64 alloc_bytes) 3230 struct btrfs_space_info *sinfo, u64 alloc_bytes)
3002{ 3231{
3003 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3232 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3233 u64 thresh;
3004 3234
3005 if (sinfo->bytes_used + sinfo->bytes_reserved + 3235 if (sinfo->bytes_used + sinfo->bytes_reserved +
3006 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3236 alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3240,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
3010 alloc_bytes < div_factor(num_bytes, 8)) 3240 alloc_bytes < div_factor(num_bytes, 8))
3011 return 0; 3241 return 0;
3012 3242
3243 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3244 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3245
3246 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3247 return 0;
3248
3013 return 1; 3249 return 1;
3014} 3250}
3015 3251
@@ -3041,13 +3277,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3041 goto out; 3277 goto out;
3042 } 3278 }
3043 3279
3044 if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { 3280 if (!force && !should_alloc_chunk(extent_root, space_info,
3281 alloc_bytes)) {
3045 spin_unlock(&space_info->lock); 3282 spin_unlock(&space_info->lock);
3046 goto out; 3283 goto out;
3047 } 3284 }
3048 spin_unlock(&space_info->lock); 3285 spin_unlock(&space_info->lock);
3049 3286
3050 /* 3287 /*
3288 * If we have mixed data/metadata chunks we want to make sure we keep
3289 * allocating mixed chunks instead of individual chunks.
3290 */
3291 if (btrfs_mixed_space_info(space_info))
3292 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3293
3294 /*
3051 * if we're doing a data chunk, go ahead and make sure that 3295 * if we're doing a data chunk, go ahead and make sure that
3052 * we keep a reasonable number of metadata chunks allocated in the 3296 * we keep a reasonable number of metadata chunks allocated in the
3053 * FS as well. 3297 * FS as well.
@@ -3072,55 +3316,25 @@ out:
3072 return ret; 3316 return ret;
3073} 3317}
3074 3318
3075static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3076 struct btrfs_root *root,
3077 struct btrfs_space_info *sinfo, u64 num_bytes)
3078{
3079 int ret;
3080 int end_trans = 0;
3081
3082 if (sinfo->full)
3083 return 0;
3084
3085 spin_lock(&sinfo->lock);
3086 ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
3087 spin_unlock(&sinfo->lock);
3088 if (!ret)
3089 return 0;
3090
3091 if (!trans) {
3092 trans = btrfs_join_transaction(root, 1);
3093 BUG_ON(IS_ERR(trans));
3094 end_trans = 1;
3095 }
3096
3097 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3098 num_bytes + 2 * 1024 * 1024,
3099 get_alloc_profile(root, sinfo->flags), 0);
3100
3101 if (end_trans)
3102 btrfs_end_transaction(trans, root);
3103
3104 return ret == 1 ? 1 : 0;
3105}
3106
3107/* 3319/*
3108 * shrink metadata reservation for delalloc 3320 * shrink metadata reservation for delalloc
3109 */ 3321 */
3110static int shrink_delalloc(struct btrfs_trans_handle *trans, 3322static int shrink_delalloc(struct btrfs_trans_handle *trans,
3111 struct btrfs_root *root, u64 to_reclaim) 3323 struct btrfs_root *root, u64 to_reclaim, int sync)
3112{ 3324{
3113 struct btrfs_block_rsv *block_rsv; 3325 struct btrfs_block_rsv *block_rsv;
3326 struct btrfs_space_info *space_info;
3114 u64 reserved; 3327 u64 reserved;
3115 u64 max_reclaim; 3328 u64 max_reclaim;
3116 u64 reclaimed = 0; 3329 u64 reclaimed = 0;
3117 int pause = 1; 3330 int pause = 1;
3118 int ret; 3331 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3119 3332
3120 block_rsv = &root->fs_info->delalloc_block_rsv; 3333 block_rsv = &root->fs_info->delalloc_block_rsv;
3121 spin_lock(&block_rsv->lock); 3334 space_info = block_rsv->space_info;
3122 reserved = block_rsv->reserved; 3335
3123 spin_unlock(&block_rsv->lock); 3336 smp_mb();
3337 reserved = space_info->bytes_reserved;
3124 3338
3125 if (reserved == 0) 3339 if (reserved == 0)
3126 return 0; 3340 return 0;
@@ -3128,104 +3342,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3128 max_reclaim = min(reserved, to_reclaim); 3342 max_reclaim = min(reserved, to_reclaim);
3129 3343
3130 while (1) { 3344 while (1) {
3131 ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); 3345 /* have the flusher threads jump in and do some IO */
3132 if (!ret) { 3346 smp_mb();
3133 __set_current_state(TASK_INTERRUPTIBLE); 3347 nr_pages = min_t(unsigned long, nr_pages,
3134 schedule_timeout(pause); 3348 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
3135 pause <<= 1; 3349 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3136 if (pause > HZ / 10)
3137 pause = HZ / 10;
3138 } else {
3139 pause = 1;
3140 }
3141 3350
3142 spin_lock(&block_rsv->lock); 3351 spin_lock(&space_info->lock);
3143 if (reserved > block_rsv->reserved) 3352 if (reserved > space_info->bytes_reserved)
3144 reclaimed = reserved - block_rsv->reserved; 3353 reclaimed += reserved - space_info->bytes_reserved;
3145 reserved = block_rsv->reserved; 3354 reserved = space_info->bytes_reserved;
3146 spin_unlock(&block_rsv->lock); 3355 spin_unlock(&space_info->lock);
3147 3356
3148 if (reserved == 0 || reclaimed >= max_reclaim) 3357 if (reserved == 0 || reclaimed >= max_reclaim)
3149 break; 3358 break;
3150 3359
3151 if (trans && trans->transaction->blocked) 3360 if (trans && trans->transaction->blocked)
3152 return -EAGAIN; 3361 return -EAGAIN;
3362
3363 __set_current_state(TASK_INTERRUPTIBLE);
3364 schedule_timeout(pause);
3365 pause <<= 1;
3366 if (pause > HZ / 10)
3367 pause = HZ / 10;
3368
3153 } 3369 }
3154 return reclaimed >= to_reclaim; 3370 return reclaimed >= to_reclaim;
3155} 3371}
3156 3372
3157static int should_retry_reserve(struct btrfs_trans_handle *trans, 3373/*
3158 struct btrfs_root *root, 3374 * Retries tells us how many times we've called reserve_metadata_bytes. The
3159 struct btrfs_block_rsv *block_rsv, 3375 * idea is if this is the first call (retries == 0) then we will add to our
3160 u64 num_bytes, int *retries) 3376 * reserved count if we can't make the allocation in order to hold our place
3377 * while we go and try and free up space. That way for retries > 1 we don't try
3378 * and add space, we just check to see if the amount of unused space is >= the
3379 * total space, meaning that our reservation is valid.
3380 *
3381 * However if we don't intend to retry this reservation, pass -1 as retries so
3382 * that it short circuits this logic.
3383 */
3384static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3385 struct btrfs_root *root,
3386 struct btrfs_block_rsv *block_rsv,
3387 u64 orig_bytes, int flush)
3161{ 3388{
3162 struct btrfs_space_info *space_info = block_rsv->space_info; 3389 struct btrfs_space_info *space_info = block_rsv->space_info;
3163 int ret; 3390 u64 unused;
3391 u64 num_bytes = orig_bytes;
3392 int retries = 0;
3393 int ret = 0;
3394 bool reserved = false;
3395 bool committed = false;
3164 3396
3165 if ((*retries) > 2) 3397again:
3166 return -ENOSPC; 3398 ret = -ENOSPC;
3399 if (reserved)
3400 num_bytes = 0;
3167 3401
3168 ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); 3402 spin_lock(&space_info->lock);
3169 if (ret) 3403 unused = space_info->bytes_used + space_info->bytes_reserved +
3170 return 1; 3404 space_info->bytes_pinned + space_info->bytes_readonly +
3405 space_info->bytes_may_use;
3171 3406
3172 if (trans && trans->transaction->in_commit) 3407 /*
3173 return -ENOSPC; 3408 * The idea here is that we've not already over-reserved the block group
3409 * then we can go ahead and save our reservation first and then start
3410 * flushing if we need to. Otherwise if we've already overcommitted
3411 * lets start flushing stuff first and then come back and try to make
3412 * our reservation.
3413 */
3414 if (unused <= space_info->total_bytes) {
3415 unused -= space_info->total_bytes;
3416 if (unused >= num_bytes) {
3417 if (!reserved)
3418 space_info->bytes_reserved += orig_bytes;
3419 ret = 0;
3420 } else {
3421 /*
3422 * Ok set num_bytes to orig_bytes since we aren't
3423 * overocmmitted, this way we only try and reclaim what
3424 * we need.
3425 */
3426 num_bytes = orig_bytes;
3427 }
3428 } else {
3429 /*
3430 * Ok we're over committed, set num_bytes to the overcommitted
3431 * amount plus the amount of bytes that we need for this
3432 * reservation.
3433 */
3434 num_bytes = unused - space_info->total_bytes +
3435 (orig_bytes * (retries + 1));
3436 }
3174 3437
3175 ret = shrink_delalloc(trans, root, num_bytes); 3438 /*
3176 if (ret) 3439 * Couldn't make our reservation, save our place so while we're trying
3177 return ret; 3440 * to reclaim space we can actually use it instead of somebody else
3441 * stealing it from us.
3442 */
3443 if (ret && !reserved) {
3444 space_info->bytes_reserved += orig_bytes;
3445 reserved = true;
3446 }
3178 3447
3179 spin_lock(&space_info->lock);
3180 if (space_info->bytes_pinned < num_bytes)
3181 ret = 1;
3182 spin_unlock(&space_info->lock); 3448 spin_unlock(&space_info->lock);
3183 if (ret)
3184 return -ENOSPC;
3185
3186 (*retries)++;
3187 3449
3188 if (trans) 3450 if (!ret)
3189 return -EAGAIN; 3451 return 0;
3190 3452
3191 trans = btrfs_join_transaction(root, 1); 3453 if (!flush)
3192 BUG_ON(IS_ERR(trans)); 3454 goto out;
3193 ret = btrfs_commit_transaction(trans, root);
3194 BUG_ON(ret);
3195 3455
3196 return 1; 3456 /*
3197} 3457 * We do synchronous shrinking since we don't actually unreserve
3458 * metadata until after the IO is completed.
3459 */
3460 ret = shrink_delalloc(trans, root, num_bytes, 1);
3461 if (ret > 0)
3462 return 0;
3463 else if (ret < 0)
3464 goto out;
3198 3465
3199static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, 3466 /*
3200 u64 num_bytes) 3467 * So if we were overcommitted it's possible that somebody else flushed
3201{ 3468 * out enough space and we simply didn't have enough space to reclaim,
3202 struct btrfs_space_info *space_info = block_rsv->space_info; 3469 * so go back around and try again.
3203 u64 unused; 3470 */
3204 int ret = -ENOSPC; 3471 if (retries < 2) {
3472 retries++;
3473 goto again;
3474 }
3205 3475
3206 spin_lock(&space_info->lock); 3476 spin_lock(&space_info->lock);
3207 unused = space_info->bytes_used + space_info->bytes_reserved + 3477 /*
3208 space_info->bytes_pinned + space_info->bytes_readonly; 3478 * Not enough space to be reclaimed, don't bother committing the
3479 * transaction.
3480 */
3481 if (space_info->bytes_pinned < orig_bytes)
3482 ret = -ENOSPC;
3483 spin_unlock(&space_info->lock);
3484 if (ret)
3485 goto out;
3209 3486
3210 if (unused < space_info->total_bytes) 3487 ret = -EAGAIN;
3211 unused = space_info->total_bytes - unused; 3488 if (trans || committed)
3212 else 3489 goto out;
3213 unused = 0;
3214 3490
3215 if (unused >= num_bytes) { 3491 ret = -ENOSPC;
3216 if (block_rsv->priority >= 10) { 3492 trans = btrfs_join_transaction(root, 1);
3217 space_info->bytes_reserved += num_bytes; 3493 if (IS_ERR(trans))
3218 ret = 0; 3494 goto out;
3219 } else { 3495 ret = btrfs_commit_transaction(trans, root);
3220 if ((unused + block_rsv->reserved) * 3496 if (!ret) {
3221 block_rsv->priority >= 3497 trans = NULL;
3222 (num_bytes + block_rsv->reserved) * 10) { 3498 committed = true;
3223 space_info->bytes_reserved += num_bytes; 3499 goto again;
3224 ret = 0; 3500 }
3225 } 3501
3226 } 3502out:
3503 if (reserved) {
3504 spin_lock(&space_info->lock);
3505 space_info->bytes_reserved -= orig_bytes;
3506 spin_unlock(&space_info->lock);
3227 } 3507 }
3228 spin_unlock(&space_info->lock);
3229 3508
3230 return ret; 3509 return ret;
3231} 3510}
@@ -3327,18 +3606,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3327{ 3606{
3328 struct btrfs_block_rsv *block_rsv; 3607 struct btrfs_block_rsv *block_rsv;
3329 struct btrfs_fs_info *fs_info = root->fs_info; 3608 struct btrfs_fs_info *fs_info = root->fs_info;
3330 u64 alloc_target;
3331 3609
3332 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3610 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
3333 if (!block_rsv) 3611 if (!block_rsv)
3334 return NULL; 3612 return NULL;
3335 3613
3336 btrfs_init_block_rsv(block_rsv); 3614 btrfs_init_block_rsv(block_rsv);
3337
3338 alloc_target = btrfs_get_alloc_profile(root, 0);
3339 block_rsv->space_info = __find_space_info(fs_info, 3615 block_rsv->space_info = __find_space_info(fs_info,
3340 BTRFS_BLOCK_GROUP_METADATA); 3616 BTRFS_BLOCK_GROUP_METADATA);
3341
3342 return block_rsv; 3617 return block_rsv;
3343} 3618}
3344 3619
@@ -3369,23 +3644,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3369int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3644int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3370 struct btrfs_root *root, 3645 struct btrfs_root *root,
3371 struct btrfs_block_rsv *block_rsv, 3646 struct btrfs_block_rsv *block_rsv,
3372 u64 num_bytes, int *retries) 3647 u64 num_bytes)
3373{ 3648{
3374 int ret; 3649 int ret;
3375 3650
3376 if (num_bytes == 0) 3651 if (num_bytes == 0)
3377 return 0; 3652 return 0;
3378again: 3653
3379 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3654 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
3380 if (!ret) { 3655 if (!ret) {
3381 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3656 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3382 return 0; 3657 return 0;
3383 } 3658 }
3384 3659
3385 ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
3386 if (ret > 0)
3387 goto again;
3388
3389 return ret; 3660 return ret;
3390} 3661}
3391 3662
@@ -3420,7 +3691,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3420 return 0; 3691 return 0;
3421 3692
3422 if (block_rsv->refill_used) { 3693 if (block_rsv->refill_used) {
3423 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3694 ret = reserve_metadata_bytes(trans, root, block_rsv,
3695 num_bytes, 0);
3424 if (!ret) { 3696 if (!ret) {
3425 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3697 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3426 return 0; 3698 return 0;
@@ -3499,6 +3771,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3499 3771
3500 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3772 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3501 spin_lock(&sinfo->lock); 3773 spin_lock(&sinfo->lock);
3774 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
3775 data_used = 0;
3502 meta_used = sinfo->bytes_used; 3776 meta_used = sinfo->bytes_used;
3503 spin_unlock(&sinfo->lock); 3777 spin_unlock(&sinfo->lock);
3504 3778
@@ -3526,7 +3800,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3526 block_rsv->size = num_bytes; 3800 block_rsv->size = num_bytes;
3527 3801
3528 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3802 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
3529 sinfo->bytes_reserved + sinfo->bytes_readonly; 3803 sinfo->bytes_reserved + sinfo->bytes_readonly +
3804 sinfo->bytes_may_use;
3530 3805
3531 if (sinfo->total_bytes > num_bytes) { 3806 if (sinfo->total_bytes > num_bytes) {
3532 num_bytes = sinfo->total_bytes - num_bytes; 3807 num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3872,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3597 3872
3598int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3873int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3599 struct btrfs_root *root, 3874 struct btrfs_root *root,
3600 int num_items, int *retries) 3875 int num_items)
3601{ 3876{
3602 u64 num_bytes; 3877 u64 num_bytes;
3603 int ret; 3878 int ret;
@@ -3607,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3607 3882
3608 num_bytes = calc_trans_metadata_size(root, num_items); 3883 num_bytes = calc_trans_metadata_size(root, num_items);
3609 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3884 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3610 num_bytes, retries); 3885 num_bytes);
3611 if (!ret) { 3886 if (!ret) {
3612 trans->bytes_reserved += num_bytes; 3887 trans->bytes_reserved += num_bytes;
3613 trans->block_rsv = &root->fs_info->trans_block_rsv; 3888 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3956,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3681 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3956 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3682 u64 to_reserve; 3957 u64 to_reserve;
3683 int nr_extents; 3958 int nr_extents;
3684 int retries = 0;
3685 int ret; 3959 int ret;
3686 3960
3687 if (btrfs_transaction_in_commit(root->fs_info)) 3961 if (btrfs_transaction_in_commit(root->fs_info))
3688 schedule_timeout(1); 3962 schedule_timeout(1);
3689 3963
3690 num_bytes = ALIGN(num_bytes, root->sectorsize); 3964 num_bytes = ALIGN(num_bytes, root->sectorsize);
3691again: 3965
3692 spin_lock(&BTRFS_I(inode)->accounting_lock); 3966 spin_lock(&BTRFS_I(inode)->accounting_lock);
3693 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3967 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
3694 if (nr_extents > BTRFS_I(inode)->reserved_extents) { 3968 if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3972,14 @@ again:
3698 nr_extents = 0; 3972 nr_extents = 0;
3699 to_reserve = 0; 3973 to_reserve = 0;
3700 } 3974 }
3975 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3701 3976
3702 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3977 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3703 ret = reserve_metadata_bytes(block_rsv, to_reserve); 3978 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
3704 if (ret) { 3979 if (ret)
3705 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3706 ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
3707 &retries);
3708 if (ret > 0)
3709 goto again;
3710 return ret; 3980 return ret;
3711 }
3712 3981
3982 spin_lock(&BTRFS_I(inode)->accounting_lock);
3713 BTRFS_I(inode)->reserved_extents += nr_extents; 3983 BTRFS_I(inode)->reserved_extents += nr_extents;
3714 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3984 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3715 spin_unlock(&BTRFS_I(inode)->accounting_lock); 3985 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3987,7 @@ again:
3717 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3987 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3718 3988
3719 if (block_rsv->size > 512 * 1024 * 1024) 3989 if (block_rsv->size > 512 * 1024 * 1024)
3720 shrink_delalloc(NULL, root, to_reserve); 3990 shrink_delalloc(NULL, root, to_reserve, 0);
3721 3991
3722 return 0; 3992 return 0;
3723} 3993}
@@ -3776,12 +4046,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3776 struct btrfs_root *root, 4046 struct btrfs_root *root,
3777 u64 bytenr, u64 num_bytes, int alloc) 4047 u64 bytenr, u64 num_bytes, int alloc)
3778{ 4048{
3779 struct btrfs_block_group_cache *cache; 4049 struct btrfs_block_group_cache *cache = NULL;
3780 struct btrfs_fs_info *info = root->fs_info; 4050 struct btrfs_fs_info *info = root->fs_info;
3781 int factor;
3782 u64 total = num_bytes; 4051 u64 total = num_bytes;
3783 u64 old_val; 4052 u64 old_val;
3784 u64 byte_in_group; 4053 u64 byte_in_group;
4054 int factor;
3785 4055
3786 /* block accounting for super block */ 4056 /* block accounting for super block */
3787 spin_lock(&info->delalloc_lock); 4057 spin_lock(&info->delalloc_lock);
@@ -3803,11 +4073,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3803 factor = 2; 4073 factor = 2;
3804 else 4074 else
3805 factor = 1; 4075 factor = 1;
4076 /*
4077 * If this block group has free space cache written out, we
4078 * need to make sure to load it if we are removing space. This
4079 * is because we need the unpinning stage to actually add the
4080 * space back to the block group, otherwise we will leak space.
4081 */
4082 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4083 cache_block_group(cache, trans, 1);
4084
3806 byte_in_group = bytenr - cache->key.objectid; 4085 byte_in_group = bytenr - cache->key.objectid;
3807 WARN_ON(byte_in_group > cache->key.offset); 4086 WARN_ON(byte_in_group > cache->key.offset);
3808 4087
3809 spin_lock(&cache->space_info->lock); 4088 spin_lock(&cache->space_info->lock);
3810 spin_lock(&cache->lock); 4089 spin_lock(&cache->lock);
4090
4091 if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
4092 cache->disk_cache_state < BTRFS_DC_CLEAR)
4093 cache->disk_cache_state = BTRFS_DC_CLEAR;
4094
3811 cache->dirty = 1; 4095 cache->dirty = 1;
3812 old_val = btrfs_block_group_used(&cache->item); 4096 old_val = btrfs_block_group_used(&cache->item);
3813 num_bytes = min(total, cache->key.offset - byte_in_group); 4097 num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4838,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4554 bool found_uncached_bg = false; 4838 bool found_uncached_bg = false;
4555 bool failed_cluster_refill = false; 4839 bool failed_cluster_refill = false;
4556 bool failed_alloc = false; 4840 bool failed_alloc = false;
4841 bool use_cluster = true;
4557 u64 ideal_cache_percent = 0; 4842 u64 ideal_cache_percent = 0;
4558 u64 ideal_cache_offset = 0; 4843 u64 ideal_cache_offset = 0;
4559 4844
@@ -4568,16 +4853,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4568 return -ENOSPC; 4853 return -ENOSPC;
4569 } 4854 }
4570 4855
4856 /*
4857 * If the space info is for both data and metadata it means we have a
4858 * small filesystem and we can't use the clustering stuff.
4859 */
4860 if (btrfs_mixed_space_info(space_info))
4861 use_cluster = false;
4862
4571 if (orig_root->ref_cows || empty_size) 4863 if (orig_root->ref_cows || empty_size)
4572 allowed_chunk_alloc = 1; 4864 allowed_chunk_alloc = 1;
4573 4865
4574 if (data & BTRFS_BLOCK_GROUP_METADATA) { 4866 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
4575 last_ptr = &root->fs_info->meta_alloc_cluster; 4867 last_ptr = &root->fs_info->meta_alloc_cluster;
4576 if (!btrfs_test_opt(root, SSD)) 4868 if (!btrfs_test_opt(root, SSD))
4577 empty_cluster = 64 * 1024; 4869 empty_cluster = 64 * 1024;
4578 } 4870 }
4579 4871
4580 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 4872 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
4873 btrfs_test_opt(root, SSD)) {
4581 last_ptr = &root->fs_info->data_alloc_cluster; 4874 last_ptr = &root->fs_info->data_alloc_cluster;
4582 } 4875 }
4583 4876
@@ -4641,6 +4934,10 @@ have_block_group:
4641 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4934 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4642 u64 free_percent; 4935 u64 free_percent;
4643 4936
4937 ret = cache_block_group(block_group, trans, 1);
4938 if (block_group->cached == BTRFS_CACHE_FINISHED)
4939 goto have_block_group;
4940
4644 free_percent = btrfs_block_group_used(&block_group->item); 4941 free_percent = btrfs_block_group_used(&block_group->item);
4645 free_percent *= 100; 4942 free_percent *= 100;
4646 free_percent = div64_u64(free_percent, 4943 free_percent = div64_u64(free_percent,
@@ -4661,7 +4958,7 @@ have_block_group:
4661 if (loop > LOOP_CACHING_NOWAIT || 4958 if (loop > LOOP_CACHING_NOWAIT ||
4662 (loop > LOOP_FIND_IDEAL && 4959 (loop > LOOP_FIND_IDEAL &&
4663 atomic_read(&space_info->caching_threads) < 2)) { 4960 atomic_read(&space_info->caching_threads) < 2)) {
4664 ret = cache_block_group(block_group); 4961 ret = cache_block_group(block_group, trans, 0);
4665 BUG_ON(ret); 4962 BUG_ON(ret);
4666 } 4963 }
4667 found_uncached_bg = true; 4964 found_uncached_bg = true;
@@ -5218,7 +5515,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5218 u64 num_bytes = ins->offset; 5515 u64 num_bytes = ins->offset;
5219 5516
5220 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5517 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5221 cache_block_group(block_group); 5518 cache_block_group(block_group, trans, 0);
5222 caching_ctl = get_caching_control(block_group); 5519 caching_ctl = get_caching_control(block_group);
5223 5520
5224 if (!caching_ctl) { 5521 if (!caching_ctl) {
@@ -5308,7 +5605,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5308 block_rsv = get_block_rsv(trans, root); 5605 block_rsv = get_block_rsv(trans, root);
5309 5606
5310 if (block_rsv->size == 0) { 5607 if (block_rsv->size == 0) {
5311 ret = reserve_metadata_bytes(block_rsv, blocksize); 5608 ret = reserve_metadata_bytes(trans, root, block_rsv,
5609 blocksize, 0);
5312 if (ret) 5610 if (ret)
5313 return ERR_PTR(ret); 5611 return ERR_PTR(ret);
5314 return block_rsv; 5612 return block_rsv;
@@ -5318,11 +5616,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5318 if (!ret) 5616 if (!ret)
5319 return block_rsv; 5617 return block_rsv;
5320 5618
5321 WARN_ON(1);
5322 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
5323 block_rsv->size, block_rsv->reserved,
5324 block_rsv->freed[0], block_rsv->freed[1]);
5325
5326 return ERR_PTR(-ENOSPC); 5619 return ERR_PTR(-ENOSPC);
5327} 5620}
5328 5621
@@ -5421,7 +5714,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
5421 u64 generation; 5714 u64 generation;
5422 u64 refs; 5715 u64 refs;
5423 u64 flags; 5716 u64 flags;
5424 u64 last = 0;
5425 u32 nritems; 5717 u32 nritems;
5426 u32 blocksize; 5718 u32 blocksize;
5427 struct btrfs_key key; 5719 struct btrfs_key key;
@@ -5489,7 +5781,6 @@ reada:
5489 generation); 5781 generation);
5490 if (ret) 5782 if (ret)
5491 break; 5783 break;
5492 last = bytenr + blocksize;
5493 nread++; 5784 nread++;
5494 } 5785 }
5495 wc->reada_slot = slot; 5786 wc->reada_slot = slot;
@@ -7813,6 +8104,40 @@ out:
7813 return ret; 8104 return ret;
7814} 8105}
7815 8106
8107void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
8108{
8109 struct btrfs_block_group_cache *block_group;
8110 u64 last = 0;
8111
8112 while (1) {
8113 struct inode *inode;
8114
8115 block_group = btrfs_lookup_first_block_group(info, last);
8116 while (block_group) {
8117 spin_lock(&block_group->lock);
8118 if (block_group->iref)
8119 break;
8120 spin_unlock(&block_group->lock);
8121 block_group = next_block_group(info->tree_root,
8122 block_group);
8123 }
8124 if (!block_group) {
8125 if (last == 0)
8126 break;
8127 last = 0;
8128 continue;
8129 }
8130
8131 inode = block_group->inode;
8132 block_group->iref = 0;
8133 block_group->inode = NULL;
8134 spin_unlock(&block_group->lock);
8135 iput(inode);
8136 last = block_group->key.objectid + block_group->key.offset;
8137 btrfs_put_block_group(block_group);
8138 }
8139}
8140
7816int btrfs_free_block_groups(struct btrfs_fs_info *info) 8141int btrfs_free_block_groups(struct btrfs_fs_info *info)
7817{ 8142{
7818 struct btrfs_block_group_cache *block_group; 8143 struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8221,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7896 struct btrfs_key key; 8221 struct btrfs_key key;
7897 struct btrfs_key found_key; 8222 struct btrfs_key found_key;
7898 struct extent_buffer *leaf; 8223 struct extent_buffer *leaf;
8224 int need_clear = 0;
8225 u64 cache_gen;
7899 8226
7900 root = info->extent_root; 8227 root = info->extent_root;
7901 key.objectid = 0; 8228 key.objectid = 0;
@@ -7905,6 +8232,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7905 if (!path) 8232 if (!path)
7906 return -ENOMEM; 8233 return -ENOMEM;
7907 8234
8235 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
8236 if (cache_gen != 0 &&
8237 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
8238 need_clear = 1;
8239 if (btrfs_test_opt(root, CLEAR_CACHE))
8240 need_clear = 1;
8241 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
8242 printk(KERN_INFO "btrfs: disk space caching is enabled\n");
8243
7908 while (1) { 8244 while (1) {
7909 ret = find_first_block_group(root, path, &key); 8245 ret = find_first_block_group(root, path, &key);
7910 if (ret > 0) 8246 if (ret > 0)
@@ -7927,6 +8263,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7927 INIT_LIST_HEAD(&cache->list); 8263 INIT_LIST_HEAD(&cache->list);
7928 INIT_LIST_HEAD(&cache->cluster_list); 8264 INIT_LIST_HEAD(&cache->cluster_list);
7929 8265
8266 if (need_clear)
8267 cache->disk_cache_state = BTRFS_DC_CLEAR;
8268
7930 /* 8269 /*
7931 * we only want to have 32k of ram per block group for keeping 8270 * we only want to have 32k of ram per block group for keeping
7932 * track of free space, and if we pass 1/2 of that we want to 8271 * track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8370,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8031 cache->key.offset = size; 8370 cache->key.offset = size;
8032 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 8371 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8033 cache->sectorsize = root->sectorsize; 8372 cache->sectorsize = root->sectorsize;
8373 cache->fs_info = root->fs_info;
8034 8374
8035 /* 8375 /*
8036 * we only want to have 32k of ram per block group for keeping track 8376 * we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8427,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8087 struct btrfs_path *path; 8427 struct btrfs_path *path;
8088 struct btrfs_block_group_cache *block_group; 8428 struct btrfs_block_group_cache *block_group;
8089 struct btrfs_free_cluster *cluster; 8429 struct btrfs_free_cluster *cluster;
8430 struct btrfs_root *tree_root = root->fs_info->tree_root;
8090 struct btrfs_key key; 8431 struct btrfs_key key;
8432 struct inode *inode;
8091 int ret; 8433 int ret;
8434 int factor;
8092 8435
8093 root = root->fs_info->extent_root; 8436 root = root->fs_info->extent_root;
8094 8437
@@ -8097,6 +8440,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8097 BUG_ON(!block_group->ro); 8440 BUG_ON(!block_group->ro);
8098 8441
8099 memcpy(&key, &block_group->key, sizeof(key)); 8442 memcpy(&key, &block_group->key, sizeof(key));
8443 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
8444 BTRFS_BLOCK_GROUP_RAID1 |
8445 BTRFS_BLOCK_GROUP_RAID10))
8446 factor = 2;
8447 else
8448 factor = 1;
8100 8449
8101 /* make sure this block group isn't part of an allocation cluster */ 8450 /* make sure this block group isn't part of an allocation cluster */
8102 cluster = &root->fs_info->data_alloc_cluster; 8451 cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8465,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8116 path = btrfs_alloc_path(); 8465 path = btrfs_alloc_path();
8117 BUG_ON(!path); 8466 BUG_ON(!path);
8118 8467
8468 inode = lookup_free_space_inode(root, block_group, path);
8469 if (!IS_ERR(inode)) {
8470 btrfs_orphan_add(trans, inode);
8471 clear_nlink(inode);
8472 /* One for the block groups ref */
8473 spin_lock(&block_group->lock);
8474 if (block_group->iref) {
8475 block_group->iref = 0;
8476 block_group->inode = NULL;
8477 spin_unlock(&block_group->lock);
8478 iput(inode);
8479 } else {
8480 spin_unlock(&block_group->lock);
8481 }
8482 /* One for our lookup ref */
8483 iput(inode);
8484 }
8485
8486 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
8487 key.offset = block_group->key.objectid;
8488 key.type = 0;
8489
8490 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
8491 if (ret < 0)
8492 goto out;
8493 if (ret > 0)
8494 btrfs_release_path(tree_root, path);
8495 if (ret == 0) {
8496 ret = btrfs_del_item(trans, tree_root, path);
8497 if (ret)
8498 goto out;
8499 btrfs_release_path(tree_root, path);
8500 }
8501
8119 spin_lock(&root->fs_info->block_group_cache_lock); 8502 spin_lock(&root->fs_info->block_group_cache_lock);
8120 rb_erase(&block_group->cache_node, 8503 rb_erase(&block_group->cache_node,
8121 &root->fs_info->block_group_cache_tree); 8504 &root->fs_info->block_group_cache_tree);
@@ -8137,8 +8520,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8137 spin_lock(&block_group->space_info->lock); 8520 spin_lock(&block_group->space_info->lock);
8138 block_group->space_info->total_bytes -= block_group->key.offset; 8521 block_group->space_info->total_bytes -= block_group->key.offset;
8139 block_group->space_info->bytes_readonly -= block_group->key.offset; 8522 block_group->space_info->bytes_readonly -= block_group->key.offset;
8523 block_group->space_info->disk_total -= block_group->key.offset * factor;
8140 spin_unlock(&block_group->space_info->lock); 8524 spin_unlock(&block_group->space_info->lock);
8141 8525
8526 memcpy(&key, &block_group->key, sizeof(key));
8527
8142 btrfs_clear_space_info_full(root->fs_info); 8528 btrfs_clear_space_info_full(root->fs_info);
8143 8529
8144 btrfs_put_block_group(block_group); 8530 btrfs_put_block_group(block_group);