diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
| -rw-r--r-- | fs/btrfs/extent-tree.c | 694 |
1 files changed, 540 insertions, 154 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0b81ecdb101c..0c097f3aec41 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache) | |||
| 242 | return NULL; | 242 | return NULL; |
| 243 | } | 243 | } |
| 244 | 244 | ||
| 245 | /* We're loading it the fast way, so we don't have a caching_ctl. */ | ||
| 246 | if (!cache->caching_ctl) { | ||
| 247 | spin_unlock(&cache->lock); | ||
| 248 | return NULL; | ||
| 249 | } | ||
| 250 | |||
| 245 | ctl = cache->caching_ctl; | 251 | ctl = cache->caching_ctl; |
| 246 | atomic_inc(&ctl->count); | 252 | atomic_inc(&ctl->count); |
| 247 | spin_unlock(&cache->lock); | 253 | spin_unlock(&cache->lock); |
| @@ -421,7 +427,9 @@ err: | |||
| 421 | return 0; | 427 | return 0; |
| 422 | } | 428 | } |
| 423 | 429 | ||
| 424 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 430 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
| 431 | struct btrfs_trans_handle *trans, | ||
| 432 | int load_cache_only) | ||
| 425 | { | 433 | { |
| 426 | struct btrfs_fs_info *fs_info = cache->fs_info; | 434 | struct btrfs_fs_info *fs_info = cache->fs_info; |
| 427 | struct btrfs_caching_control *caching_ctl; | 435 | struct btrfs_caching_control *caching_ctl; |
| @@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache) | |||
| 432 | if (cache->cached != BTRFS_CACHE_NO) | 440 | if (cache->cached != BTRFS_CACHE_NO) |
| 433 | return 0; | 441 | return 0; |
| 434 | 442 | ||
| 443 | /* | ||
| 444 | * We can't do the read from on-disk cache during a commit since we need | ||
| 445 | * to have the normal tree locking. | ||
| 446 | */ | ||
| 447 | if (!trans->transaction->in_commit) { | ||
| 448 | spin_lock(&cache->lock); | ||
| 449 | if (cache->cached != BTRFS_CACHE_NO) { | ||
| 450 | spin_unlock(&cache->lock); | ||
| 451 | return 0; | ||
| 452 | } | ||
| 453 | cache->cached = BTRFS_CACHE_STARTED; | ||
| 454 | spin_unlock(&cache->lock); | ||
| 455 | |||
| 456 | ret = load_free_space_cache(fs_info, cache); | ||
| 457 | |||
| 458 | spin_lock(&cache->lock); | ||
| 459 | if (ret == 1) { | ||
| 460 | cache->cached = BTRFS_CACHE_FINISHED; | ||
| 461 | cache->last_byte_to_unpin = (u64)-1; | ||
| 462 | } else { | ||
| 463 | cache->cached = BTRFS_CACHE_NO; | ||
| 464 | } | ||
| 465 | spin_unlock(&cache->lock); | ||
| 466 | if (ret == 1) | ||
| 467 | return 0; | ||
| 468 | } | ||
| 469 | |||
| 470 | if (load_cache_only) | ||
| 471 | return 0; | ||
| 472 | |||
| 435 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | 473 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); |
| 436 | BUG_ON(!caching_ctl); | 474 | BUG_ON(!caching_ctl); |
| 437 | 475 | ||
| @@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
| 509 | 547 | ||
| 510 | rcu_read_lock(); | 548 | rcu_read_lock(); |
| 511 | list_for_each_entry_rcu(found, head, list) { | 549 | list_for_each_entry_rcu(found, head, list) { |
| 512 | if (found->flags == flags) { | 550 | if (found->flags & flags) { |
| 513 | rcu_read_unlock(); | 551 | rcu_read_unlock(); |
| 514 | return found; | 552 | return found; |
| 515 | } | 553 | } |
| @@ -542,6 +580,15 @@ static u64 div_factor(u64 num, int factor) | |||
| 542 | return num; | 580 | return num; |
| 543 | } | 581 | } |
| 544 | 582 | ||
| 583 | static u64 div_factor_fine(u64 num, int factor) | ||
| 584 | { | ||
| 585 | if (factor == 100) | ||
| 586 | return num; | ||
| 587 | num *= factor; | ||
| 588 | do_div(num, 100); | ||
| 589 | return num; | ||
| 590 | } | ||
| 591 | |||
| 545 | u64 btrfs_find_block_group(struct btrfs_root *root, | 592 | u64 btrfs_find_block_group(struct btrfs_root *root, |
| 546 | u64 search_start, u64 search_hint, int owner) | 593 | u64 search_start, u64 search_hint, int owner) |
| 547 | { | 594 | { |
| @@ -2687,6 +2734,109 @@ next_block_group(struct btrfs_root *root, | |||
| 2687 | return cache; | 2734 | return cache; |
| 2688 | } | 2735 | } |
| 2689 | 2736 | ||
| 2737 | static int cache_save_setup(struct btrfs_block_group_cache *block_group, | ||
| 2738 | struct btrfs_trans_handle *trans, | ||
| 2739 | struct btrfs_path *path) | ||
| 2740 | { | ||
| 2741 | struct btrfs_root *root = block_group->fs_info->tree_root; | ||
| 2742 | struct inode *inode = NULL; | ||
| 2743 | u64 alloc_hint = 0; | ||
| 2744 | int num_pages = 0; | ||
| 2745 | int retries = 0; | ||
| 2746 | int ret = 0; | ||
| 2747 | |||
| 2748 | /* | ||
| 2749 | * If this block group is smaller than 100 megs don't bother caching the | ||
| 2750 | * block group. | ||
| 2751 | */ | ||
| 2752 | if (block_group->key.offset < (100 * 1024 * 1024)) { | ||
| 2753 | spin_lock(&block_group->lock); | ||
| 2754 | block_group->disk_cache_state = BTRFS_DC_WRITTEN; | ||
| 2755 | spin_unlock(&block_group->lock); | ||
| 2756 | return 0; | ||
| 2757 | } | ||
| 2758 | |||
| 2759 | again: | ||
| 2760 | inode = lookup_free_space_inode(root, block_group, path); | ||
| 2761 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | ||
| 2762 | ret = PTR_ERR(inode); | ||
| 2763 | btrfs_release_path(root, path); | ||
| 2764 | goto out; | ||
| 2765 | } | ||
| 2766 | |||
| 2767 | if (IS_ERR(inode)) { | ||
| 2768 | BUG_ON(retries); | ||
| 2769 | retries++; | ||
| 2770 | |||
| 2771 | if (block_group->ro) | ||
| 2772 | goto out_free; | ||
| 2773 | |||
| 2774 | ret = create_free_space_inode(root, trans, block_group, path); | ||
| 2775 | if (ret) | ||
| 2776 | goto out_free; | ||
| 2777 | goto again; | ||
| 2778 | } | ||
| 2779 | |||
| 2780 | /* | ||
| 2781 | * We want to set the generation to 0, that way if anything goes wrong | ||
| 2782 | * from here on out we know not to trust this cache when we load up next | ||
| 2783 | * time. | ||
| 2784 | */ | ||
| 2785 | BTRFS_I(inode)->generation = 0; | ||
| 2786 | ret = btrfs_update_inode(trans, root, inode); | ||
| 2787 | WARN_ON(ret); | ||
| 2788 | |||
| 2789 | if (i_size_read(inode) > 0) { | ||
| 2790 | ret = btrfs_truncate_free_space_cache(root, trans, path, | ||
| 2791 | inode); | ||
| 2792 | if (ret) | ||
| 2793 | goto out_put; | ||
| 2794 | } | ||
| 2795 | |||
| 2796 | spin_lock(&block_group->lock); | ||
| 2797 | if (block_group->cached != BTRFS_CACHE_FINISHED) { | ||
| 2798 | spin_unlock(&block_group->lock); | ||
| 2799 | goto out_put; | ||
| 2800 | } | ||
| 2801 | spin_unlock(&block_group->lock); | ||
| 2802 | |||
| 2803 | num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); | ||
| 2804 | if (!num_pages) | ||
| 2805 | num_pages = 1; | ||
| 2806 | |||
| 2807 | /* | ||
| 2808 | * Just to make absolutely sure we have enough space, we're going to | ||
| 2809 | * preallocate 12 pages worth of space for each block group. In | ||
| 2810 | * practice we ought to use at most 8, but we need extra space so we can | ||
| 2811 | * add our header and have a terminator between the extents and the | ||
| 2812 | * bitmaps. | ||
| 2813 | */ | ||
| 2814 | num_pages *= 16; | ||
| 2815 | num_pages *= PAGE_CACHE_SIZE; | ||
| 2816 | |||
| 2817 | ret = btrfs_check_data_free_space(inode, num_pages); | ||
| 2818 | if (ret) | ||
| 2819 | goto out_put; | ||
| 2820 | |||
| 2821 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, | ||
| 2822 | num_pages, num_pages, | ||
| 2823 | &alloc_hint); | ||
| 2824 | btrfs_free_reserved_data_space(inode, num_pages); | ||
| 2825 | out_put: | ||
| 2826 | iput(inode); | ||
| 2827 | out_free: | ||
| 2828 | btrfs_release_path(root, path); | ||
| 2829 | out: | ||
| 2830 | spin_lock(&block_group->lock); | ||
| 2831 | if (ret) | ||
| 2832 | block_group->disk_cache_state = BTRFS_DC_ERROR; | ||
| 2833 | else | ||
| 2834 | block_group->disk_cache_state = BTRFS_DC_SETUP; | ||
| 2835 | spin_unlock(&block_group->lock); | ||
| 2836 | |||
| 2837 | return ret; | ||
| 2838 | } | ||
| 2839 | |||
| 2690 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | 2840 | int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, |
| 2691 | struct btrfs_root *root) | 2841 | struct btrfs_root *root) |
| 2692 | { | 2842 | { |
| @@ -2699,6 +2849,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
| 2699 | if (!path) | 2849 | if (!path) |
| 2700 | return -ENOMEM; | 2850 | return -ENOMEM; |
| 2701 | 2851 | ||
| 2852 | again: | ||
| 2853 | while (1) { | ||
| 2854 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
| 2855 | while (cache) { | ||
| 2856 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) | ||
| 2857 | break; | ||
| 2858 | cache = next_block_group(root, cache); | ||
| 2859 | } | ||
| 2860 | if (!cache) { | ||
| 2861 | if (last == 0) | ||
| 2862 | break; | ||
| 2863 | last = 0; | ||
| 2864 | continue; | ||
| 2865 | } | ||
| 2866 | err = cache_save_setup(cache, trans, path); | ||
| 2867 | last = cache->key.objectid + cache->key.offset; | ||
| 2868 | btrfs_put_block_group(cache); | ||
| 2869 | } | ||
| 2870 | |||
| 2702 | while (1) { | 2871 | while (1) { |
| 2703 | if (last == 0) { | 2872 | if (last == 0) { |
| 2704 | err = btrfs_run_delayed_refs(trans, root, | 2873 | err = btrfs_run_delayed_refs(trans, root, |
| @@ -2708,6 +2877,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
| 2708 | 2877 | ||
| 2709 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | 2878 | cache = btrfs_lookup_first_block_group(root->fs_info, last); |
| 2710 | while (cache) { | 2879 | while (cache) { |
| 2880 | if (cache->disk_cache_state == BTRFS_DC_CLEAR) { | ||
| 2881 | btrfs_put_block_group(cache); | ||
| 2882 | goto again; | ||
| 2883 | } | ||
| 2884 | |||
| 2711 | if (cache->dirty) | 2885 | if (cache->dirty) |
| 2712 | break; | 2886 | break; |
| 2713 | cache = next_block_group(root, cache); | 2887 | cache = next_block_group(root, cache); |
| @@ -2719,6 +2893,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
| 2719 | continue; | 2893 | continue; |
| 2720 | } | 2894 | } |
| 2721 | 2895 | ||
| 2896 | if (cache->disk_cache_state == BTRFS_DC_SETUP) | ||
| 2897 | cache->disk_cache_state = BTRFS_DC_NEED_WRITE; | ||
| 2722 | cache->dirty = 0; | 2898 | cache->dirty = 0; |
| 2723 | last = cache->key.objectid + cache->key.offset; | 2899 | last = cache->key.objectid + cache->key.offset; |
| 2724 | 2900 | ||
| @@ -2727,6 +2903,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
| 2727 | btrfs_put_block_group(cache); | 2903 | btrfs_put_block_group(cache); |
| 2728 | } | 2904 | } |
| 2729 | 2905 | ||
| 2906 | while (1) { | ||
| 2907 | /* | ||
| 2908 | * I don't think this is needed since we're just marking our | ||
| 2909 | * preallocated extent as written, but just in case it can't | ||
| 2910 | * hurt. | ||
| 2911 | */ | ||
| 2912 | if (last == 0) { | ||
| 2913 | err = btrfs_run_delayed_refs(trans, root, | ||
| 2914 | (unsigned long)-1); | ||
| 2915 | BUG_ON(err); | ||
| 2916 | } | ||
| 2917 | |||
| 2918 | cache = btrfs_lookup_first_block_group(root->fs_info, last); | ||
| 2919 | while (cache) { | ||
| 2920 | /* | ||
| 2921 | * Really this shouldn't happen, but it could if we | ||
| 2922 | * couldn't write the entire preallocated extent and | ||
| 2923 | * splitting the extent resulted in a new block. | ||
| 2924 | */ | ||
| 2925 | if (cache->dirty) { | ||
| 2926 | btrfs_put_block_group(cache); | ||
| 2927 | goto again; | ||
| 2928 | } | ||
| 2929 | if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
| 2930 | break; | ||
| 2931 | cache = next_block_group(root, cache); | ||
| 2932 | } | ||
| 2933 | if (!cache) { | ||
| 2934 | if (last == 0) | ||
| 2935 | break; | ||
| 2936 | last = 0; | ||
| 2937 | continue; | ||
| 2938 | } | ||
| 2939 | |||
| 2940 | btrfs_write_out_cache(root, trans, cache, path); | ||
| 2941 | |||
| 2942 | /* | ||
| 2943 | * If we didn't have an error then the cache state is still | ||
| 2944 | * NEED_WRITE, so we can set it to WRITTEN. | ||
| 2945 | */ | ||
| 2946 | if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) | ||
| 2947 | cache->disk_cache_state = BTRFS_DC_WRITTEN; | ||
| 2948 | last = cache->key.objectid + cache->key.offset; | ||
| 2949 | btrfs_put_block_group(cache); | ||
| 2950 | } | ||
| 2951 | |||
| 2730 | btrfs_free_path(path); | 2952 | btrfs_free_path(path); |
| 2731 | return 0; | 2953 | return 0; |
| 2732 | } | 2954 | } |
| @@ -2762,6 +2984,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 2762 | if (found) { | 2984 | if (found) { |
| 2763 | spin_lock(&found->lock); | 2985 | spin_lock(&found->lock); |
| 2764 | found->total_bytes += total_bytes; | 2986 | found->total_bytes += total_bytes; |
| 2987 | found->disk_total += total_bytes * factor; | ||
| 2765 | found->bytes_used += bytes_used; | 2988 | found->bytes_used += bytes_used; |
| 2766 | found->disk_used += bytes_used * factor; | 2989 | found->disk_used += bytes_used * factor; |
| 2767 | found->full = 0; | 2990 | found->full = 0; |
| @@ -2781,6 +3004,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 2781 | BTRFS_BLOCK_GROUP_SYSTEM | | 3004 | BTRFS_BLOCK_GROUP_SYSTEM | |
| 2782 | BTRFS_BLOCK_GROUP_METADATA); | 3005 | BTRFS_BLOCK_GROUP_METADATA); |
| 2783 | found->total_bytes = total_bytes; | 3006 | found->total_bytes = total_bytes; |
| 3007 | found->disk_total = total_bytes * factor; | ||
| 2784 | found->bytes_used = bytes_used; | 3008 | found->bytes_used = bytes_used; |
| 2785 | found->disk_used = bytes_used * factor; | 3009 | found->disk_used = bytes_used * factor; |
| 2786 | found->bytes_pinned = 0; | 3010 | found->bytes_pinned = 0; |
| @@ -2882,11 +3106,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) | |||
| 2882 | struct btrfs_space_info *data_sinfo; | 3106 | struct btrfs_space_info *data_sinfo; |
| 2883 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3107 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2884 | u64 used; | 3108 | u64 used; |
| 2885 | int ret = 0, committed = 0; | 3109 | int ret = 0, committed = 0, alloc_chunk = 1; |
| 2886 | 3110 | ||
| 2887 | /* make sure bytes are sectorsize aligned */ | 3111 | /* make sure bytes are sectorsize aligned */ |
| 2888 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3112 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| 2889 | 3113 | ||
| 3114 | if (root == root->fs_info->tree_root) { | ||
| 3115 | alloc_chunk = 0; | ||
| 3116 | committed = 1; | ||
| 3117 | } | ||
| 3118 | |||
| 2890 | data_sinfo = BTRFS_I(inode)->space_info; | 3119 | data_sinfo = BTRFS_I(inode)->space_info; |
| 2891 | if (!data_sinfo) | 3120 | if (!data_sinfo) |
| 2892 | goto alloc; | 3121 | goto alloc; |
| @@ -2905,7 +3134,7 @@ again: | |||
| 2905 | * if we don't have enough free bytes in this space then we need | 3134 | * if we don't have enough free bytes in this space then we need |
| 2906 | * to alloc a new chunk. | 3135 | * to alloc a new chunk. |
| 2907 | */ | 3136 | */ |
| 2908 | if (!data_sinfo->full) { | 3137 | if (!data_sinfo->full && alloc_chunk) { |
| 2909 | u64 alloc_target; | 3138 | u64 alloc_target; |
| 2910 | 3139 | ||
| 2911 | data_sinfo->force_alloc = 1; | 3140 | data_sinfo->force_alloc = 1; |
| @@ -2997,10 +3226,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
| 2997 | rcu_read_unlock(); | 3226 | rcu_read_unlock(); |
| 2998 | } | 3227 | } |
| 2999 | 3228 | ||
| 3000 | static int should_alloc_chunk(struct btrfs_space_info *sinfo, | 3229 | static int should_alloc_chunk(struct btrfs_root *root, |
| 3001 | u64 alloc_bytes) | 3230 | struct btrfs_space_info *sinfo, u64 alloc_bytes) |
| 3002 | { | 3231 | { |
| 3003 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | 3232 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; |
| 3233 | u64 thresh; | ||
| 3004 | 3234 | ||
| 3005 | if (sinfo->bytes_used + sinfo->bytes_reserved + | 3235 | if (sinfo->bytes_used + sinfo->bytes_reserved + |
| 3006 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | 3236 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) |
| @@ -3010,6 +3240,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo, | |||
| 3010 | alloc_bytes < div_factor(num_bytes, 8)) | 3240 | alloc_bytes < div_factor(num_bytes, 8)) |
| 3011 | return 0; | 3241 | return 0; |
| 3012 | 3242 | ||
| 3243 | thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); | ||
| 3244 | thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); | ||
| 3245 | |||
| 3246 | if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) | ||
| 3247 | return 0; | ||
| 3248 | |||
| 3013 | return 1; | 3249 | return 1; |
| 3014 | } | 3250 | } |
| 3015 | 3251 | ||
| @@ -3041,13 +3277,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3041 | goto out; | 3277 | goto out; |
| 3042 | } | 3278 | } |
| 3043 | 3279 | ||
| 3044 | if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { | 3280 | if (!force && !should_alloc_chunk(extent_root, space_info, |
| 3281 | alloc_bytes)) { | ||
| 3045 | spin_unlock(&space_info->lock); | 3282 | spin_unlock(&space_info->lock); |
| 3046 | goto out; | 3283 | goto out; |
| 3047 | } | 3284 | } |
| 3048 | spin_unlock(&space_info->lock); | 3285 | spin_unlock(&space_info->lock); |
| 3049 | 3286 | ||
| 3050 | /* | 3287 | /* |
| 3288 | * If we have mixed data/metadata chunks we want to make sure we keep | ||
| 3289 | * allocating mixed chunks instead of individual chunks. | ||
| 3290 | */ | ||
| 3291 | if (btrfs_mixed_space_info(space_info)) | ||
| 3292 | flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); | ||
| 3293 | |||
| 3294 | /* | ||
| 3051 | * if we're doing a data chunk, go ahead and make sure that | 3295 | * if we're doing a data chunk, go ahead and make sure that |
| 3052 | * we keep a reasonable number of metadata chunks allocated in the | 3296 | * we keep a reasonable number of metadata chunks allocated in the |
| 3053 | * FS as well. | 3297 | * FS as well. |
| @@ -3072,55 +3316,25 @@ out: | |||
| 3072 | return ret; | 3316 | return ret; |
| 3073 | } | 3317 | } |
| 3074 | 3318 | ||
| 3075 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | ||
| 3076 | struct btrfs_root *root, | ||
| 3077 | struct btrfs_space_info *sinfo, u64 num_bytes) | ||
| 3078 | { | ||
| 3079 | int ret; | ||
| 3080 | int end_trans = 0; | ||
| 3081 | |||
| 3082 | if (sinfo->full) | ||
| 3083 | return 0; | ||
| 3084 | |||
| 3085 | spin_lock(&sinfo->lock); | ||
| 3086 | ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024); | ||
| 3087 | spin_unlock(&sinfo->lock); | ||
| 3088 | if (!ret) | ||
| 3089 | return 0; | ||
| 3090 | |||
| 3091 | if (!trans) { | ||
| 3092 | trans = btrfs_join_transaction(root, 1); | ||
| 3093 | BUG_ON(IS_ERR(trans)); | ||
| 3094 | end_trans = 1; | ||
| 3095 | } | ||
| 3096 | |||
| 3097 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 3098 | num_bytes + 2 * 1024 * 1024, | ||
| 3099 | get_alloc_profile(root, sinfo->flags), 0); | ||
| 3100 | |||
| 3101 | if (end_trans) | ||
| 3102 | btrfs_end_transaction(trans, root); | ||
| 3103 | |||
| 3104 | return ret == 1 ? 1 : 0; | ||
| 3105 | } | ||
| 3106 | |||
| 3107 | /* | 3319 | /* |
| 3108 | * shrink metadata reservation for delalloc | 3320 | * shrink metadata reservation for delalloc |
| 3109 | */ | 3321 | */ |
| 3110 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | 3322 | static int shrink_delalloc(struct btrfs_trans_handle *trans, |
| 3111 | struct btrfs_root *root, u64 to_reclaim) | 3323 | struct btrfs_root *root, u64 to_reclaim, int sync) |
| 3112 | { | 3324 | { |
| 3113 | struct btrfs_block_rsv *block_rsv; | 3325 | struct btrfs_block_rsv *block_rsv; |
| 3326 | struct btrfs_space_info *space_info; | ||
| 3114 | u64 reserved; | 3327 | u64 reserved; |
| 3115 | u64 max_reclaim; | 3328 | u64 max_reclaim; |
| 3116 | u64 reclaimed = 0; | 3329 | u64 reclaimed = 0; |
| 3117 | int pause = 1; | 3330 | int pause = 1; |
| 3118 | int ret; | 3331 | int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
| 3119 | 3332 | ||
| 3120 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3333 | block_rsv = &root->fs_info->delalloc_block_rsv; |
| 3121 | spin_lock(&block_rsv->lock); | 3334 | space_info = block_rsv->space_info; |
| 3122 | reserved = block_rsv->reserved; | 3335 | |
| 3123 | spin_unlock(&block_rsv->lock); | 3336 | smp_mb(); |
| 3337 | reserved = space_info->bytes_reserved; | ||
| 3124 | 3338 | ||
| 3125 | if (reserved == 0) | 3339 | if (reserved == 0) |
| 3126 | return 0; | 3340 | return 0; |
| @@ -3128,104 +3342,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
| 3128 | max_reclaim = min(reserved, to_reclaim); | 3342 | max_reclaim = min(reserved, to_reclaim); |
| 3129 | 3343 | ||
| 3130 | while (1) { | 3344 | while (1) { |
| 3131 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | 3345 | /* have the flusher threads jump in and do some IO */ |
| 3132 | if (!ret) { | 3346 | smp_mb(); |
| 3133 | __set_current_state(TASK_INTERRUPTIBLE); | 3347 | nr_pages = min_t(unsigned long, nr_pages, |
| 3134 | schedule_timeout(pause); | 3348 | root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); |
| 3135 | pause <<= 1; | 3349 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); |
| 3136 | if (pause > HZ / 10) | ||
| 3137 | pause = HZ / 10; | ||
| 3138 | } else { | ||
| 3139 | pause = 1; | ||
| 3140 | } | ||
| 3141 | 3350 | ||
| 3142 | spin_lock(&block_rsv->lock); | 3351 | spin_lock(&space_info->lock); |
| 3143 | if (reserved > block_rsv->reserved) | 3352 | if (reserved > space_info->bytes_reserved) |
| 3144 | reclaimed = reserved - block_rsv->reserved; | 3353 | reclaimed += reserved - space_info->bytes_reserved; |
| 3145 | reserved = block_rsv->reserved; | 3354 | reserved = space_info->bytes_reserved; |
| 3146 | spin_unlock(&block_rsv->lock); | 3355 | spin_unlock(&space_info->lock); |
| 3147 | 3356 | ||
| 3148 | if (reserved == 0 || reclaimed >= max_reclaim) | 3357 | if (reserved == 0 || reclaimed >= max_reclaim) |
| 3149 | break; | 3358 | break; |
| 3150 | 3359 | ||
| 3151 | if (trans && trans->transaction->blocked) | 3360 | if (trans && trans->transaction->blocked) |
| 3152 | return -EAGAIN; | 3361 | return -EAGAIN; |
| 3362 | |||
| 3363 | __set_current_state(TASK_INTERRUPTIBLE); | ||
| 3364 | schedule_timeout(pause); | ||
| 3365 | pause <<= 1; | ||
| 3366 | if (pause > HZ / 10) | ||
| 3367 | pause = HZ / 10; | ||
| 3368 | |||
| 3153 | } | 3369 | } |
| 3154 | return reclaimed >= to_reclaim; | 3370 | return reclaimed >= to_reclaim; |
| 3155 | } | 3371 | } |
| 3156 | 3372 | ||
| 3157 | static int should_retry_reserve(struct btrfs_trans_handle *trans, | 3373 | /* |
| 3158 | struct btrfs_root *root, | 3374 | * Retries tells us how many times we've called reserve_metadata_bytes. The |
| 3159 | struct btrfs_block_rsv *block_rsv, | 3375 | * idea is if this is the first call (retries == 0) then we will add to our |
| 3160 | u64 num_bytes, int *retries) | 3376 | * reserved count if we can't make the allocation in order to hold our place |
| 3377 | * while we go and try and free up space. That way for retries > 1 we don't try | ||
| 3378 | * and add space, we just check to see if the amount of unused space is >= the | ||
| 3379 | * total space, meaning that our reservation is valid. | ||
| 3380 | * | ||
| 3381 | * However if we don't intend to retry this reservation, pass -1 as retries so | ||
| 3382 | * that it short circuits this logic. | ||
| 3383 | */ | ||
| 3384 | static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | ||
| 3385 | struct btrfs_root *root, | ||
| 3386 | struct btrfs_block_rsv *block_rsv, | ||
| 3387 | u64 orig_bytes, int flush) | ||
| 3161 | { | 3388 | { |
| 3162 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3389 | struct btrfs_space_info *space_info = block_rsv->space_info; |
| 3163 | int ret; | 3390 | u64 unused; |
| 3391 | u64 num_bytes = orig_bytes; | ||
| 3392 | int retries = 0; | ||
| 3393 | int ret = 0; | ||
| 3394 | bool reserved = false; | ||
| 3395 | bool committed = false; | ||
| 3164 | 3396 | ||
| 3165 | if ((*retries) > 2) | 3397 | again: |
| 3166 | return -ENOSPC; | 3398 | ret = -ENOSPC; |
| 3399 | if (reserved) | ||
| 3400 | num_bytes = 0; | ||
| 3167 | 3401 | ||
| 3168 | ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); | 3402 | spin_lock(&space_info->lock); |
| 3169 | if (ret) | 3403 | unused = space_info->bytes_used + space_info->bytes_reserved + |
| 3170 | return 1; | 3404 | space_info->bytes_pinned + space_info->bytes_readonly + |
| 3405 | space_info->bytes_may_use; | ||
| 3171 | 3406 | ||
| 3172 | if (trans && trans->transaction->in_commit) | 3407 | /* |
| 3173 | return -ENOSPC; | 3408 | * The idea here is that we've not already over-reserved the block group |
| 3409 | * then we can go ahead and save our reservation first and then start | ||
| 3410 | * flushing if we need to. Otherwise if we've already overcommitted | ||
| 3411 | * lets start flushing stuff first and then come back and try to make | ||
| 3412 | * our reservation. | ||
| 3413 | */ | ||
| 3414 | if (unused <= space_info->total_bytes) { | ||
| 3415 | unused -= space_info->total_bytes; | ||
| 3416 | if (unused >= num_bytes) { | ||
| 3417 | if (!reserved) | ||
| 3418 | space_info->bytes_reserved += orig_bytes; | ||
| 3419 | ret = 0; | ||
| 3420 | } else { | ||
| 3421 | /* | ||
| 3422 | * Ok set num_bytes to orig_bytes since we aren't | ||
| 3423 | * overocmmitted, this way we only try and reclaim what | ||
| 3424 | * we need. | ||
| 3425 | */ | ||
| 3426 | num_bytes = orig_bytes; | ||
| 3427 | } | ||
| 3428 | } else { | ||
| 3429 | /* | ||
| 3430 | * Ok we're over committed, set num_bytes to the overcommitted | ||
| 3431 | * amount plus the amount of bytes that we need for this | ||
| 3432 | * reservation. | ||
| 3433 | */ | ||
| 3434 | num_bytes = unused - space_info->total_bytes + | ||
| 3435 | (orig_bytes * (retries + 1)); | ||
| 3436 | } | ||
| 3174 | 3437 | ||
| 3175 | ret = shrink_delalloc(trans, root, num_bytes); | 3438 | /* |
| 3176 | if (ret) | 3439 | * Couldn't make our reservation, save our place so while we're trying |
| 3177 | return ret; | 3440 | * to reclaim space we can actually use it instead of somebody else |
| 3441 | * stealing it from us. | ||
| 3442 | */ | ||
| 3443 | if (ret && !reserved) { | ||
| 3444 | space_info->bytes_reserved += orig_bytes; | ||
| 3445 | reserved = true; | ||
| 3446 | } | ||
| 3178 | 3447 | ||
| 3179 | spin_lock(&space_info->lock); | ||
| 3180 | if (space_info->bytes_pinned < num_bytes) | ||
| 3181 | ret = 1; | ||
| 3182 | spin_unlock(&space_info->lock); | 3448 | spin_unlock(&space_info->lock); |
| 3183 | if (ret) | ||
| 3184 | return -ENOSPC; | ||
| 3185 | |||
| 3186 | (*retries)++; | ||
| 3187 | 3449 | ||
| 3188 | if (trans) | 3450 | if (!ret) |
| 3189 | return -EAGAIN; | 3451 | return 0; |
| 3190 | 3452 | ||
| 3191 | trans = btrfs_join_transaction(root, 1); | 3453 | if (!flush) |
| 3192 | BUG_ON(IS_ERR(trans)); | 3454 | goto out; |
| 3193 | ret = btrfs_commit_transaction(trans, root); | ||
| 3194 | BUG_ON(ret); | ||
| 3195 | 3455 | ||
| 3196 | return 1; | 3456 | /* |
| 3197 | } | 3457 | * We do synchronous shrinking since we don't actually unreserve |
| 3458 | * metadata until after the IO is completed. | ||
| 3459 | */ | ||
| 3460 | ret = shrink_delalloc(trans, root, num_bytes, 1); | ||
| 3461 | if (ret > 0) | ||
| 3462 | return 0; | ||
| 3463 | else if (ret < 0) | ||
| 3464 | goto out; | ||
| 3198 | 3465 | ||
| 3199 | static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, | 3466 | /* |
| 3200 | u64 num_bytes) | 3467 | * So if we were overcommitted it's possible that somebody else flushed |
| 3201 | { | 3468 | * out enough space and we simply didn't have enough space to reclaim, |
| 3202 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3469 | * so go back around and try again. |
| 3203 | u64 unused; | 3470 | */ |
| 3204 | int ret = -ENOSPC; | 3471 | if (retries < 2) { |
| 3472 | retries++; | ||
| 3473 | goto again; | ||
| 3474 | } | ||
| 3205 | 3475 | ||
| 3206 | spin_lock(&space_info->lock); | 3476 | spin_lock(&space_info->lock); |
| 3207 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3477 | /* |
| 3208 | space_info->bytes_pinned + space_info->bytes_readonly; | 3478 | * Not enough space to be reclaimed, don't bother committing the |
| 3479 | * transaction. | ||
| 3480 | */ | ||
| 3481 | if (space_info->bytes_pinned < orig_bytes) | ||
| 3482 | ret = -ENOSPC; | ||
| 3483 | spin_unlock(&space_info->lock); | ||
| 3484 | if (ret) | ||
| 3485 | goto out; | ||
| 3209 | 3486 | ||
| 3210 | if (unused < space_info->total_bytes) | 3487 | ret = -EAGAIN; |
| 3211 | unused = space_info->total_bytes - unused; | 3488 | if (trans || committed) |
| 3212 | else | 3489 | goto out; |
| 3213 | unused = 0; | ||
| 3214 | 3490 | ||
| 3215 | if (unused >= num_bytes) { | 3491 | ret = -ENOSPC; |
| 3216 | if (block_rsv->priority >= 10) { | 3492 | trans = btrfs_join_transaction(root, 1); |
| 3217 | space_info->bytes_reserved += num_bytes; | 3493 | if (IS_ERR(trans)) |
| 3218 | ret = 0; | 3494 | goto out; |
| 3219 | } else { | 3495 | ret = btrfs_commit_transaction(trans, root); |
| 3220 | if ((unused + block_rsv->reserved) * | 3496 | if (!ret) { |
| 3221 | block_rsv->priority >= | 3497 | trans = NULL; |
| 3222 | (num_bytes + block_rsv->reserved) * 10) { | 3498 | committed = true; |
| 3223 | space_info->bytes_reserved += num_bytes; | 3499 | goto again; |
| 3224 | ret = 0; | 3500 | } |
| 3225 | } | 3501 | |
| 3226 | } | 3502 | out: |
| 3503 | if (reserved) { | ||
| 3504 | spin_lock(&space_info->lock); | ||
| 3505 | space_info->bytes_reserved -= orig_bytes; | ||
| 3506 | spin_unlock(&space_info->lock); | ||
| 3227 | } | 3507 | } |
| 3228 | spin_unlock(&space_info->lock); | ||
| 3229 | 3508 | ||
| 3230 | return ret; | 3509 | return ret; |
| 3231 | } | 3510 | } |
| @@ -3327,18 +3606,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | |||
| 3327 | { | 3606 | { |
| 3328 | struct btrfs_block_rsv *block_rsv; | 3607 | struct btrfs_block_rsv *block_rsv; |
| 3329 | struct btrfs_fs_info *fs_info = root->fs_info; | 3608 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3330 | u64 alloc_target; | ||
| 3331 | 3609 | ||
| 3332 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); | 3610 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); |
| 3333 | if (!block_rsv) | 3611 | if (!block_rsv) |
| 3334 | return NULL; | 3612 | return NULL; |
| 3335 | 3613 | ||
| 3336 | btrfs_init_block_rsv(block_rsv); | 3614 | btrfs_init_block_rsv(block_rsv); |
| 3337 | |||
| 3338 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3339 | block_rsv->space_info = __find_space_info(fs_info, | 3615 | block_rsv->space_info = __find_space_info(fs_info, |
| 3340 | BTRFS_BLOCK_GROUP_METADATA); | 3616 | BTRFS_BLOCK_GROUP_METADATA); |
| 3341 | |||
| 3342 | return block_rsv; | 3617 | return block_rsv; |
| 3343 | } | 3618 | } |
| 3344 | 3619 | ||
| @@ -3369,23 +3644,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | |||
| 3369 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | 3644 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, |
| 3370 | struct btrfs_root *root, | 3645 | struct btrfs_root *root, |
| 3371 | struct btrfs_block_rsv *block_rsv, | 3646 | struct btrfs_block_rsv *block_rsv, |
| 3372 | u64 num_bytes, int *retries) | 3647 | u64 num_bytes) |
| 3373 | { | 3648 | { |
| 3374 | int ret; | 3649 | int ret; |
| 3375 | 3650 | ||
| 3376 | if (num_bytes == 0) | 3651 | if (num_bytes == 0) |
| 3377 | return 0; | 3652 | return 0; |
| 3378 | again: | 3653 | |
| 3379 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | 3654 | ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); |
| 3380 | if (!ret) { | 3655 | if (!ret) { |
| 3381 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | 3656 | block_rsv_add_bytes(block_rsv, num_bytes, 1); |
| 3382 | return 0; | 3657 | return 0; |
| 3383 | } | 3658 | } |
| 3384 | 3659 | ||
| 3385 | ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); | ||
| 3386 | if (ret > 0) | ||
| 3387 | goto again; | ||
| 3388 | |||
| 3389 | return ret; | 3660 | return ret; |
| 3390 | } | 3661 | } |
| 3391 | 3662 | ||
| @@ -3420,7 +3691,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
| 3420 | return 0; | 3691 | return 0; |
| 3421 | 3692 | ||
| 3422 | if (block_rsv->refill_used) { | 3693 | if (block_rsv->refill_used) { |
| 3423 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | 3694 | ret = reserve_metadata_bytes(trans, root, block_rsv, |
| 3695 | num_bytes, 0); | ||
| 3424 | if (!ret) { | 3696 | if (!ret) { |
| 3425 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | 3697 | block_rsv_add_bytes(block_rsv, num_bytes, 0); |
| 3426 | return 0; | 3698 | return 0; |
| @@ -3499,6 +3771,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | |||
| 3499 | 3771 | ||
| 3500 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 3772 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
| 3501 | spin_lock(&sinfo->lock); | 3773 | spin_lock(&sinfo->lock); |
| 3774 | if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) | ||
| 3775 | data_used = 0; | ||
| 3502 | meta_used = sinfo->bytes_used; | 3776 | meta_used = sinfo->bytes_used; |
| 3503 | spin_unlock(&sinfo->lock); | 3777 | spin_unlock(&sinfo->lock); |
| 3504 | 3778 | ||
| @@ -3526,7 +3800,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
| 3526 | block_rsv->size = num_bytes; | 3800 | block_rsv->size = num_bytes; |
| 3527 | 3801 | ||
| 3528 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | 3802 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + |
| 3529 | sinfo->bytes_reserved + sinfo->bytes_readonly; | 3803 | sinfo->bytes_reserved + sinfo->bytes_readonly + |
| 3804 | sinfo->bytes_may_use; | ||
| 3530 | 3805 | ||
| 3531 | if (sinfo->total_bytes > num_bytes) { | 3806 | if (sinfo->total_bytes > num_bytes) { |
| 3532 | num_bytes = sinfo->total_bytes - num_bytes; | 3807 | num_bytes = sinfo->total_bytes - num_bytes; |
| @@ -3597,7 +3872,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) | |||
| 3597 | 3872 | ||
| 3598 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | 3873 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
| 3599 | struct btrfs_root *root, | 3874 | struct btrfs_root *root, |
| 3600 | int num_items, int *retries) | 3875 | int num_items) |
| 3601 | { | 3876 | { |
| 3602 | u64 num_bytes; | 3877 | u64 num_bytes; |
| 3603 | int ret; | 3878 | int ret; |
| @@ -3607,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | |||
| 3607 | 3882 | ||
| 3608 | num_bytes = calc_trans_metadata_size(root, num_items); | 3883 | num_bytes = calc_trans_metadata_size(root, num_items); |
| 3609 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | 3884 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, |
| 3610 | num_bytes, retries); | 3885 | num_bytes); |
| 3611 | if (!ret) { | 3886 | if (!ret) { |
| 3612 | trans->bytes_reserved += num_bytes; | 3887 | trans->bytes_reserved += num_bytes; |
| 3613 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 3888 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| @@ -3681,14 +3956,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 3681 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 3956 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
| 3682 | u64 to_reserve; | 3957 | u64 to_reserve; |
| 3683 | int nr_extents; | 3958 | int nr_extents; |
| 3684 | int retries = 0; | ||
| 3685 | int ret; | 3959 | int ret; |
| 3686 | 3960 | ||
| 3687 | if (btrfs_transaction_in_commit(root->fs_info)) | 3961 | if (btrfs_transaction_in_commit(root->fs_info)) |
| 3688 | schedule_timeout(1); | 3962 | schedule_timeout(1); |
| 3689 | 3963 | ||
| 3690 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 3964 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
| 3691 | again: | 3965 | |
| 3692 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 3966 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
| 3693 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 3967 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; |
| 3694 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | 3968 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { |
| @@ -3698,18 +3972,14 @@ again: | |||
| 3698 | nr_extents = 0; | 3972 | nr_extents = 0; |
| 3699 | to_reserve = 0; | 3973 | to_reserve = 0; |
| 3700 | } | 3974 | } |
| 3975 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 3701 | 3976 | ||
| 3702 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 3977 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
| 3703 | ret = reserve_metadata_bytes(block_rsv, to_reserve); | 3978 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
| 3704 | if (ret) { | 3979 | if (ret) |
| 3705 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 3706 | ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, | ||
| 3707 | &retries); | ||
| 3708 | if (ret > 0) | ||
| 3709 | goto again; | ||
| 3710 | return ret; | 3980 | return ret; |
| 3711 | } | ||
| 3712 | 3981 | ||
| 3982 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 3713 | BTRFS_I(inode)->reserved_extents += nr_extents; | 3983 | BTRFS_I(inode)->reserved_extents += nr_extents; |
| 3714 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 3984 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
| 3715 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 3985 | spin_unlock(&BTRFS_I(inode)->accounting_lock); |
| @@ -3717,7 +3987,7 @@ again: | |||
| 3717 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 3987 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
| 3718 | 3988 | ||
| 3719 | if (block_rsv->size > 512 * 1024 * 1024) | 3989 | if (block_rsv->size > 512 * 1024 * 1024) |
| 3720 | shrink_delalloc(NULL, root, to_reserve); | 3990 | shrink_delalloc(NULL, root, to_reserve, 0); |
| 3721 | 3991 | ||
| 3722 | return 0; | 3992 | return 0; |
| 3723 | } | 3993 | } |
| @@ -3776,12 +4046,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 3776 | struct btrfs_root *root, | 4046 | struct btrfs_root *root, |
| 3777 | u64 bytenr, u64 num_bytes, int alloc) | 4047 | u64 bytenr, u64 num_bytes, int alloc) |
| 3778 | { | 4048 | { |
| 3779 | struct btrfs_block_group_cache *cache; | 4049 | struct btrfs_block_group_cache *cache = NULL; |
| 3780 | struct btrfs_fs_info *info = root->fs_info; | 4050 | struct btrfs_fs_info *info = root->fs_info; |
| 3781 | int factor; | ||
| 3782 | u64 total = num_bytes; | 4051 | u64 total = num_bytes; |
| 3783 | u64 old_val; | 4052 | u64 old_val; |
| 3784 | u64 byte_in_group; | 4053 | u64 byte_in_group; |
| 4054 | int factor; | ||
| 3785 | 4055 | ||
| 3786 | /* block accounting for super block */ | 4056 | /* block accounting for super block */ |
| 3787 | spin_lock(&info->delalloc_lock); | 4057 | spin_lock(&info->delalloc_lock); |
| @@ -3803,11 +4073,25 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 3803 | factor = 2; | 4073 | factor = 2; |
| 3804 | else | 4074 | else |
| 3805 | factor = 1; | 4075 | factor = 1; |
| 4076 | /* | ||
| 4077 | * If this block group has free space cache written out, we | ||
| 4078 | * need to make sure to load it if we are removing space. This | ||
| 4079 | * is because we need the unpinning stage to actually add the | ||
| 4080 | * space back to the block group, otherwise we will leak space. | ||
| 4081 | */ | ||
| 4082 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | ||
| 4083 | cache_block_group(cache, trans, 1); | ||
| 4084 | |||
| 3806 | byte_in_group = bytenr - cache->key.objectid; | 4085 | byte_in_group = bytenr - cache->key.objectid; |
| 3807 | WARN_ON(byte_in_group > cache->key.offset); | 4086 | WARN_ON(byte_in_group > cache->key.offset); |
| 3808 | 4087 | ||
| 3809 | spin_lock(&cache->space_info->lock); | 4088 | spin_lock(&cache->space_info->lock); |
| 3810 | spin_lock(&cache->lock); | 4089 | spin_lock(&cache->lock); |
| 4090 | |||
| 4091 | if (btrfs_super_cache_generation(&info->super_copy) != 0 && | ||
| 4092 | cache->disk_cache_state < BTRFS_DC_CLEAR) | ||
| 4093 | cache->disk_cache_state = BTRFS_DC_CLEAR; | ||
| 4094 | |||
| 3811 | cache->dirty = 1; | 4095 | cache->dirty = 1; |
| 3812 | old_val = btrfs_block_group_used(&cache->item); | 4096 | old_val = btrfs_block_group_used(&cache->item); |
| 3813 | num_bytes = min(total, cache->key.offset - byte_in_group); | 4097 | num_bytes = min(total, cache->key.offset - byte_in_group); |
| @@ -4554,6 +4838,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 4554 | bool found_uncached_bg = false; | 4838 | bool found_uncached_bg = false; |
| 4555 | bool failed_cluster_refill = false; | 4839 | bool failed_cluster_refill = false; |
| 4556 | bool failed_alloc = false; | 4840 | bool failed_alloc = false; |
| 4841 | bool use_cluster = true; | ||
| 4557 | u64 ideal_cache_percent = 0; | 4842 | u64 ideal_cache_percent = 0; |
| 4558 | u64 ideal_cache_offset = 0; | 4843 | u64 ideal_cache_offset = 0; |
| 4559 | 4844 | ||
| @@ -4568,16 +4853,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 4568 | return -ENOSPC; | 4853 | return -ENOSPC; |
| 4569 | } | 4854 | } |
| 4570 | 4855 | ||
| 4856 | /* | ||
| 4857 | * If the space info is for both data and metadata it means we have a | ||
| 4858 | * small filesystem and we can't use the clustering stuff. | ||
| 4859 | */ | ||
| 4860 | if (btrfs_mixed_space_info(space_info)) | ||
| 4861 | use_cluster = false; | ||
| 4862 | |||
| 4571 | if (orig_root->ref_cows || empty_size) | 4863 | if (orig_root->ref_cows || empty_size) |
| 4572 | allowed_chunk_alloc = 1; | 4864 | allowed_chunk_alloc = 1; |
| 4573 | 4865 | ||
| 4574 | if (data & BTRFS_BLOCK_GROUP_METADATA) { | 4866 | if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { |
| 4575 | last_ptr = &root->fs_info->meta_alloc_cluster; | 4867 | last_ptr = &root->fs_info->meta_alloc_cluster; |
| 4576 | if (!btrfs_test_opt(root, SSD)) | 4868 | if (!btrfs_test_opt(root, SSD)) |
| 4577 | empty_cluster = 64 * 1024; | 4869 | empty_cluster = 64 * 1024; |
| 4578 | } | 4870 | } |
| 4579 | 4871 | ||
| 4580 | if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { | 4872 | if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && |
| 4873 | btrfs_test_opt(root, SSD)) { | ||
| 4581 | last_ptr = &root->fs_info->data_alloc_cluster; | 4874 | last_ptr = &root->fs_info->data_alloc_cluster; |
| 4582 | } | 4875 | } |
| 4583 | 4876 | ||
| @@ -4641,6 +4934,10 @@ have_block_group: | |||
| 4641 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4934 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
| 4642 | u64 free_percent; | 4935 | u64 free_percent; |
| 4643 | 4936 | ||
| 4937 | ret = cache_block_group(block_group, trans, 1); | ||
| 4938 | if (block_group->cached == BTRFS_CACHE_FINISHED) | ||
| 4939 | goto have_block_group; | ||
| 4940 | |||
| 4644 | free_percent = btrfs_block_group_used(&block_group->item); | 4941 | free_percent = btrfs_block_group_used(&block_group->item); |
| 4645 | free_percent *= 100; | 4942 | free_percent *= 100; |
| 4646 | free_percent = div64_u64(free_percent, | 4943 | free_percent = div64_u64(free_percent, |
| @@ -4661,7 +4958,7 @@ have_block_group: | |||
| 4661 | if (loop > LOOP_CACHING_NOWAIT || | 4958 | if (loop > LOOP_CACHING_NOWAIT || |
| 4662 | (loop > LOOP_FIND_IDEAL && | 4959 | (loop > LOOP_FIND_IDEAL && |
| 4663 | atomic_read(&space_info->caching_threads) < 2)) { | 4960 | atomic_read(&space_info->caching_threads) < 2)) { |
| 4664 | ret = cache_block_group(block_group); | 4961 | ret = cache_block_group(block_group, trans, 0); |
| 4665 | BUG_ON(ret); | 4962 | BUG_ON(ret); |
| 4666 | } | 4963 | } |
| 4667 | found_uncached_bg = true; | 4964 | found_uncached_bg = true; |
| @@ -5218,7 +5515,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 5218 | u64 num_bytes = ins->offset; | 5515 | u64 num_bytes = ins->offset; |
| 5219 | 5516 | ||
| 5220 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 5517 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
| 5221 | cache_block_group(block_group); | 5518 | cache_block_group(block_group, trans, 0); |
| 5222 | caching_ctl = get_caching_control(block_group); | 5519 | caching_ctl = get_caching_control(block_group); |
| 5223 | 5520 | ||
| 5224 | if (!caching_ctl) { | 5521 | if (!caching_ctl) { |
| @@ -5308,7 +5605,8 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 5308 | block_rsv = get_block_rsv(trans, root); | 5605 | block_rsv = get_block_rsv(trans, root); |
| 5309 | 5606 | ||
| 5310 | if (block_rsv->size == 0) { | 5607 | if (block_rsv->size == 0) { |
| 5311 | ret = reserve_metadata_bytes(block_rsv, blocksize); | 5608 | ret = reserve_metadata_bytes(trans, root, block_rsv, |
| 5609 | blocksize, 0); | ||
| 5312 | if (ret) | 5610 | if (ret) |
| 5313 | return ERR_PTR(ret); | 5611 | return ERR_PTR(ret); |
| 5314 | return block_rsv; | 5612 | return block_rsv; |
| @@ -5318,11 +5616,6 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 5318 | if (!ret) | 5616 | if (!ret) |
| 5319 | return block_rsv; | 5617 | return block_rsv; |
| 5320 | 5618 | ||
| 5321 | WARN_ON(1); | ||
| 5322 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
| 5323 | block_rsv->size, block_rsv->reserved, | ||
| 5324 | block_rsv->freed[0], block_rsv->freed[1]); | ||
| 5325 | |||
| 5326 | return ERR_PTR(-ENOSPC); | 5619 | return ERR_PTR(-ENOSPC); |
| 5327 | } | 5620 | } |
| 5328 | 5621 | ||
| @@ -5421,7 +5714,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
| 5421 | u64 generation; | 5714 | u64 generation; |
| 5422 | u64 refs; | 5715 | u64 refs; |
| 5423 | u64 flags; | 5716 | u64 flags; |
| 5424 | u64 last = 0; | ||
| 5425 | u32 nritems; | 5717 | u32 nritems; |
| 5426 | u32 blocksize; | 5718 | u32 blocksize; |
| 5427 | struct btrfs_key key; | 5719 | struct btrfs_key key; |
| @@ -5489,7 +5781,6 @@ reada: | |||
| 5489 | generation); | 5781 | generation); |
| 5490 | if (ret) | 5782 | if (ret) |
| 5491 | break; | 5783 | break; |
| 5492 | last = bytenr + blocksize; | ||
| 5493 | nread++; | 5784 | nread++; |
| 5494 | } | 5785 | } |
| 5495 | wc->reada_slot = slot; | 5786 | wc->reada_slot = slot; |
| @@ -7813,6 +8104,40 @@ out: | |||
| 7813 | return ret; | 8104 | return ret; |
| 7814 | } | 8105 | } |
| 7815 | 8106 | ||
| 8107 | void btrfs_put_block_group_cache(struct btrfs_fs_info *info) | ||
| 8108 | { | ||
| 8109 | struct btrfs_block_group_cache *block_group; | ||
| 8110 | u64 last = 0; | ||
| 8111 | |||
| 8112 | while (1) { | ||
| 8113 | struct inode *inode; | ||
| 8114 | |||
| 8115 | block_group = btrfs_lookup_first_block_group(info, last); | ||
| 8116 | while (block_group) { | ||
| 8117 | spin_lock(&block_group->lock); | ||
| 8118 | if (block_group->iref) | ||
| 8119 | break; | ||
| 8120 | spin_unlock(&block_group->lock); | ||
| 8121 | block_group = next_block_group(info->tree_root, | ||
| 8122 | block_group); | ||
| 8123 | } | ||
| 8124 | if (!block_group) { | ||
| 8125 | if (last == 0) | ||
| 8126 | break; | ||
| 8127 | last = 0; | ||
| 8128 | continue; | ||
| 8129 | } | ||
| 8130 | |||
| 8131 | inode = block_group->inode; | ||
| 8132 | block_group->iref = 0; | ||
| 8133 | block_group->inode = NULL; | ||
| 8134 | spin_unlock(&block_group->lock); | ||
| 8135 | iput(inode); | ||
| 8136 | last = block_group->key.objectid + block_group->key.offset; | ||
| 8137 | btrfs_put_block_group(block_group); | ||
| 8138 | } | ||
| 8139 | } | ||
| 8140 | |||
| 7816 | int btrfs_free_block_groups(struct btrfs_fs_info *info) | 8141 | int btrfs_free_block_groups(struct btrfs_fs_info *info) |
| 7817 | { | 8142 | { |
| 7818 | struct btrfs_block_group_cache *block_group; | 8143 | struct btrfs_block_group_cache *block_group; |
| @@ -7896,6 +8221,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7896 | struct btrfs_key key; | 8221 | struct btrfs_key key; |
| 7897 | struct btrfs_key found_key; | 8222 | struct btrfs_key found_key; |
| 7898 | struct extent_buffer *leaf; | 8223 | struct extent_buffer *leaf; |
| 8224 | int need_clear = 0; | ||
| 8225 | u64 cache_gen; | ||
| 7899 | 8226 | ||
| 7900 | root = info->extent_root; | 8227 | root = info->extent_root; |
| 7901 | key.objectid = 0; | 8228 | key.objectid = 0; |
| @@ -7905,6 +8232,15 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7905 | if (!path) | 8232 | if (!path) |
| 7906 | return -ENOMEM; | 8233 | return -ENOMEM; |
| 7907 | 8234 | ||
| 8235 | cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); | ||
| 8236 | if (cache_gen != 0 && | ||
| 8237 | btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) | ||
| 8238 | need_clear = 1; | ||
| 8239 | if (btrfs_test_opt(root, CLEAR_CACHE)) | ||
| 8240 | need_clear = 1; | ||
| 8241 | if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) | ||
| 8242 | printk(KERN_INFO "btrfs: disk space caching is enabled\n"); | ||
| 8243 | |||
| 7908 | while (1) { | 8244 | while (1) { |
| 7909 | ret = find_first_block_group(root, path, &key); | 8245 | ret = find_first_block_group(root, path, &key); |
| 7910 | if (ret > 0) | 8246 | if (ret > 0) |
| @@ -7927,6 +8263,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7927 | INIT_LIST_HEAD(&cache->list); | 8263 | INIT_LIST_HEAD(&cache->list); |
| 7928 | INIT_LIST_HEAD(&cache->cluster_list); | 8264 | INIT_LIST_HEAD(&cache->cluster_list); |
| 7929 | 8265 | ||
| 8266 | if (need_clear) | ||
| 8267 | cache->disk_cache_state = BTRFS_DC_CLEAR; | ||
| 8268 | |||
| 7930 | /* | 8269 | /* |
| 7931 | * we only want to have 32k of ram per block group for keeping | 8270 | * we only want to have 32k of ram per block group for keeping |
| 7932 | * track of free space, and if we pass 1/2 of that we want to | 8271 | * track of free space, and if we pass 1/2 of that we want to |
| @@ -8031,6 +8370,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 8031 | cache->key.offset = size; | 8370 | cache->key.offset = size; |
| 8032 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; | 8371 | cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; |
| 8033 | cache->sectorsize = root->sectorsize; | 8372 | cache->sectorsize = root->sectorsize; |
| 8373 | cache->fs_info = root->fs_info; | ||
| 8034 | 8374 | ||
| 8035 | /* | 8375 | /* |
| 8036 | * we only want to have 32k of ram per block group for keeping track | 8376 | * we only want to have 32k of ram per block group for keeping track |
| @@ -8087,8 +8427,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 8087 | struct btrfs_path *path; | 8427 | struct btrfs_path *path; |
| 8088 | struct btrfs_block_group_cache *block_group; | 8428 | struct btrfs_block_group_cache *block_group; |
| 8089 | struct btrfs_free_cluster *cluster; | 8429 | struct btrfs_free_cluster *cluster; |
| 8430 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
| 8090 | struct btrfs_key key; | 8431 | struct btrfs_key key; |
| 8432 | struct inode *inode; | ||
| 8091 | int ret; | 8433 | int ret; |
| 8434 | int factor; | ||
| 8092 | 8435 | ||
| 8093 | root = root->fs_info->extent_root; | 8436 | root = root->fs_info->extent_root; |
| 8094 | 8437 | ||
| @@ -8097,6 +8440,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 8097 | BUG_ON(!block_group->ro); | 8440 | BUG_ON(!block_group->ro); |
| 8098 | 8441 | ||
| 8099 | memcpy(&key, &block_group->key, sizeof(key)); | 8442 | memcpy(&key, &block_group->key, sizeof(key)); |
| 8443 | if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
| 8444 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 8445 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 8446 | factor = 2; | ||
| 8447 | else | ||
| 8448 | factor = 1; | ||
| 8100 | 8449 | ||
| 8101 | /* make sure this block group isn't part of an allocation cluster */ | 8450 | /* make sure this block group isn't part of an allocation cluster */ |
| 8102 | cluster = &root->fs_info->data_alloc_cluster; | 8451 | cluster = &root->fs_info->data_alloc_cluster; |
| @@ -8116,6 +8465,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 8116 | path = btrfs_alloc_path(); | 8465 | path = btrfs_alloc_path(); |
| 8117 | BUG_ON(!path); | 8466 | BUG_ON(!path); |
| 8118 | 8467 | ||
| 8468 | inode = lookup_free_space_inode(root, block_group, path); | ||
| 8469 | if (!IS_ERR(inode)) { | ||
| 8470 | btrfs_orphan_add(trans, inode); | ||
| 8471 | clear_nlink(inode); | ||
| 8472 | /* One for the block groups ref */ | ||
| 8473 | spin_lock(&block_group->lock); | ||
| 8474 | if (block_group->iref) { | ||
| 8475 | block_group->iref = 0; | ||
| 8476 | block_group->inode = NULL; | ||
| 8477 | spin_unlock(&block_group->lock); | ||
| 8478 | iput(inode); | ||
| 8479 | } else { | ||
| 8480 | spin_unlock(&block_group->lock); | ||
| 8481 | } | ||
| 8482 | /* One for our lookup ref */ | ||
| 8483 | iput(inode); | ||
| 8484 | } | ||
| 8485 | |||
| 8486 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | ||
| 8487 | key.offset = block_group->key.objectid; | ||
| 8488 | key.type = 0; | ||
| 8489 | |||
| 8490 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); | ||
| 8491 | if (ret < 0) | ||
| 8492 | goto out; | ||
| 8493 | if (ret > 0) | ||
| 8494 | btrfs_release_path(tree_root, path); | ||
| 8495 | if (ret == 0) { | ||
| 8496 | ret = btrfs_del_item(trans, tree_root, path); | ||
| 8497 | if (ret) | ||
| 8498 | goto out; | ||
| 8499 | btrfs_release_path(tree_root, path); | ||
| 8500 | } | ||
| 8501 | |||
| 8119 | spin_lock(&root->fs_info->block_group_cache_lock); | 8502 | spin_lock(&root->fs_info->block_group_cache_lock); |
| 8120 | rb_erase(&block_group->cache_node, | 8503 | rb_erase(&block_group->cache_node, |
| 8121 | &root->fs_info->block_group_cache_tree); | 8504 | &root->fs_info->block_group_cache_tree); |
| @@ -8137,8 +8520,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 8137 | spin_lock(&block_group->space_info->lock); | 8520 | spin_lock(&block_group->space_info->lock); |
| 8138 | block_group->space_info->total_bytes -= block_group->key.offset; | 8521 | block_group->space_info->total_bytes -= block_group->key.offset; |
| 8139 | block_group->space_info->bytes_readonly -= block_group->key.offset; | 8522 | block_group->space_info->bytes_readonly -= block_group->key.offset; |
| 8523 | block_group->space_info->disk_total -= block_group->key.offset * factor; | ||
| 8140 | spin_unlock(&block_group->space_info->lock); | 8524 | spin_unlock(&block_group->space_info->lock); |
| 8141 | 8525 | ||
| 8526 | memcpy(&key, &block_group->key, sizeof(key)); | ||
| 8527 | |||
| 8142 | btrfs_clear_space_info_full(root->fs_info); | 8528 | btrfs_clear_space_info_full(root->fs_info); |
| 8143 | 8529 | ||
| 8144 | btrfs_put_block_group(block_group); | 8530 | btrfs_put_block_group(block_group); |
