diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 598 |
1 files changed, 514 insertions, 84 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 993f93ff7ba6..d0c4d584efad 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
68 | struct extent_buffer **must_clean); | 68 | struct extent_buffer **must_clean); |
69 | static int find_next_key(struct btrfs_path *path, int level, | 69 | static int find_next_key(struct btrfs_path *path, int level, |
70 | struct btrfs_key *key); | 70 | struct btrfs_key *key); |
71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | ||
72 | int dump_block_groups); | ||
71 | 73 | ||
72 | static noinline int | 74 | static noinline int |
73 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 75 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -2765,67 +2767,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | |||
2765 | alloc_target); | 2767 | alloc_target); |
2766 | } | 2768 | } |
2767 | 2769 | ||
2770 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
2771 | { | ||
2772 | u64 num_bytes; | ||
2773 | int level; | ||
2774 | |||
2775 | level = BTRFS_MAX_LEVEL - 2; | ||
2776 | /* | ||
2777 | * NOTE: these calculations are absolutely the worst possible case. | ||
2778 | * This assumes that _every_ item we insert will require a new leaf, and | ||
2779 | * that the tree has grown to its maximum level size. | ||
2780 | */ | ||
2781 | |||
2782 | /* | ||
2783 | * for every item we insert we could insert both an extent item and a | ||
2784 | * extent ref item. Then for ever item we insert, we will need to cow | ||
2785 | * both the original leaf, plus the leaf to the left and right of it. | ||
2786 | * | ||
2787 | * Unless we are talking about the extent root, then we just want the | ||
2788 | * number of items * 2, since we just need the extent item plus its ref. | ||
2789 | */ | ||
2790 | if (root == root->fs_info->extent_root) | ||
2791 | num_bytes = num_items * 2; | ||
2792 | else | ||
2793 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
2794 | |||
2795 | /* | ||
2796 | * num_bytes is total number of leaves we could need times the leaf | ||
2797 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
2798 | * level, down to the leaf level. | ||
2799 | */ | ||
2800 | num_bytes = (num_bytes * root->leafsize) + | ||
2801 | (num_bytes * (level * 2)) * root->nodesize; | ||
2802 | |||
2803 | return num_bytes; | ||
2804 | } | ||
2805 | |||
2768 | /* | 2806 | /* |
2769 | * for now this just makes sure we have at least 5% of our metadata space free | 2807 | * Unreserve metadata space for delalloc. If we have less reserved credits than |
2770 | * for use. | 2808 | * we have extents, this function does nothing. |
2771 | */ | 2809 | */ |
2772 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | 2810 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, |
2811 | struct inode *inode, int num_items) | ||
2773 | { | 2812 | { |
2774 | struct btrfs_fs_info *info = root->fs_info; | 2813 | struct btrfs_fs_info *info = root->fs_info; |
2775 | struct btrfs_space_info *meta_sinfo; | 2814 | struct btrfs_space_info *meta_sinfo; |
2776 | u64 alloc_target, thresh; | 2815 | u64 num_bytes; |
2777 | int committed = 0, ret; | 2816 | u64 alloc_target; |
2817 | bool bug = false; | ||
2778 | 2818 | ||
2779 | /* get the space info for where the metadata will live */ | 2819 | /* get the space info for where the metadata will live */ |
2780 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2820 | alloc_target = btrfs_get_alloc_profile(root, 0); |
2781 | meta_sinfo = __find_space_info(info, alloc_target); | 2821 | meta_sinfo = __find_space_info(info, alloc_target); |
2782 | if (!meta_sinfo) | ||
2783 | goto alloc; | ||
2784 | 2822 | ||
2785 | again: | 2823 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, |
2824 | num_items); | ||
2825 | |||
2786 | spin_lock(&meta_sinfo->lock); | 2826 | spin_lock(&meta_sinfo->lock); |
2787 | if (!meta_sinfo->full) | 2827 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
2788 | thresh = meta_sinfo->total_bytes * 80; | 2828 | if (BTRFS_I(inode)->reserved_extents <= |
2789 | else | 2829 | BTRFS_I(inode)->outstanding_extents) { |
2790 | thresh = meta_sinfo->total_bytes * 95; | 2830 | spin_unlock(&BTRFS_I(inode)->accounting_lock); |
2831 | spin_unlock(&meta_sinfo->lock); | ||
2832 | return 0; | ||
2833 | } | ||
2834 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2835 | |||
2836 | BTRFS_I(inode)->reserved_extents--; | ||
2837 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
2838 | |||
2839 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
2840 | bug = true; | ||
2841 | meta_sinfo->bytes_delalloc = 0; | ||
2842 | } else { | ||
2843 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2844 | } | ||
2845 | spin_unlock(&meta_sinfo->lock); | ||
2846 | |||
2847 | BUG_ON(bug); | ||
2791 | 2848 | ||
2849 | return 0; | ||
2850 | } | ||
2851 | |||
2852 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
2853 | { | ||
2854 | u64 thresh; | ||
2855 | |||
2856 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
2857 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
2858 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
2859 | meta_sinfo->bytes_may_use; | ||
2860 | |||
2861 | thresh = meta_sinfo->total_bytes - thresh; | ||
2862 | thresh *= 80; | ||
2792 | do_div(thresh, 100); | 2863 | do_div(thresh, 100); |
2864 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
2865 | meta_sinfo->force_delalloc = 1; | ||
2866 | else | ||
2867 | meta_sinfo->force_delalloc = 0; | ||
2868 | } | ||
2793 | 2869 | ||
2794 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2870 | struct async_flush { |
2795 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 2871 | struct btrfs_root *root; |
2796 | meta_sinfo->bytes_super > thresh) { | 2872 | struct btrfs_space_info *info; |
2797 | struct btrfs_trans_handle *trans; | 2873 | struct btrfs_work work; |
2798 | if (!meta_sinfo->full) { | 2874 | }; |
2799 | meta_sinfo->force_alloc = 1; | 2875 | |
2876 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
2877 | { | ||
2878 | struct async_flush *async; | ||
2879 | struct btrfs_root *root; | ||
2880 | struct btrfs_space_info *info; | ||
2881 | |||
2882 | async = container_of(work, struct async_flush, work); | ||
2883 | root = async->root; | ||
2884 | info = async->info; | ||
2885 | |||
2886 | btrfs_start_delalloc_inodes(root); | ||
2887 | wake_up(&info->flush_wait); | ||
2888 | btrfs_wait_ordered_extents(root, 0); | ||
2889 | |||
2890 | spin_lock(&info->lock); | ||
2891 | info->flushing = 0; | ||
2892 | spin_unlock(&info->lock); | ||
2893 | wake_up(&info->flush_wait); | ||
2894 | |||
2895 | kfree(async); | ||
2896 | } | ||
2897 | |||
2898 | static void wait_on_flush(struct btrfs_space_info *info) | ||
2899 | { | ||
2900 | DEFINE_WAIT(wait); | ||
2901 | u64 used; | ||
2902 | |||
2903 | while (1) { | ||
2904 | prepare_to_wait(&info->flush_wait, &wait, | ||
2905 | TASK_UNINTERRUPTIBLE); | ||
2906 | spin_lock(&info->lock); | ||
2907 | if (!info->flushing) { | ||
2908 | spin_unlock(&info->lock); | ||
2909 | break; | ||
2910 | } | ||
2911 | |||
2912 | used = info->bytes_used + info->bytes_reserved + | ||
2913 | info->bytes_pinned + info->bytes_readonly + | ||
2914 | info->bytes_super + info->bytes_root + | ||
2915 | info->bytes_may_use + info->bytes_delalloc; | ||
2916 | if (used < info->total_bytes) { | ||
2917 | spin_unlock(&info->lock); | ||
2918 | break; | ||
2919 | } | ||
2920 | spin_unlock(&info->lock); | ||
2921 | schedule(); | ||
2922 | } | ||
2923 | finish_wait(&info->flush_wait, &wait); | ||
2924 | } | ||
2925 | |||
2926 | static void flush_delalloc(struct btrfs_root *root, | ||
2927 | struct btrfs_space_info *info) | ||
2928 | { | ||
2929 | struct async_flush *async; | ||
2930 | bool wait = false; | ||
2931 | |||
2932 | spin_lock(&info->lock); | ||
2933 | |||
2934 | if (!info->flushing) { | ||
2935 | info->flushing = 1; | ||
2936 | init_waitqueue_head(&info->flush_wait); | ||
2937 | } else { | ||
2938 | wait = true; | ||
2939 | } | ||
2940 | |||
2941 | spin_unlock(&info->lock); | ||
2942 | |||
2943 | if (wait) { | ||
2944 | wait_on_flush(info); | ||
2945 | return; | ||
2946 | } | ||
2947 | |||
2948 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
2949 | if (!async) | ||
2950 | goto flush; | ||
2951 | |||
2952 | async->root = root; | ||
2953 | async->info = info; | ||
2954 | async->work.func = flush_delalloc_async; | ||
2955 | |||
2956 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
2957 | &async->work); | ||
2958 | wait_on_flush(info); | ||
2959 | return; | ||
2960 | |||
2961 | flush: | ||
2962 | btrfs_start_delalloc_inodes(root); | ||
2963 | btrfs_wait_ordered_extents(root, 0); | ||
2964 | |||
2965 | spin_lock(&info->lock); | ||
2966 | info->flushing = 0; | ||
2967 | spin_unlock(&info->lock); | ||
2968 | wake_up(&info->flush_wait); | ||
2969 | } | ||
2970 | |||
2971 | static int maybe_allocate_chunk(struct btrfs_root *root, | ||
2972 | struct btrfs_space_info *info) | ||
2973 | { | ||
2974 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
2975 | struct btrfs_trans_handle *trans; | ||
2976 | bool wait = false; | ||
2977 | int ret = 0; | ||
2978 | u64 min_metadata; | ||
2979 | u64 free_space; | ||
2980 | |||
2981 | free_space = btrfs_super_total_bytes(disk_super); | ||
2982 | /* | ||
2983 | * we allow the metadata to grow to a max of either 5gb or 5% of the | ||
2984 | * space in the volume. | ||
2985 | */ | ||
2986 | min_metadata = min((u64)5 * 1024 * 1024 * 1024, | ||
2987 | div64_u64(free_space * 5, 100)); | ||
2988 | if (info->total_bytes >= min_metadata) { | ||
2989 | spin_unlock(&info->lock); | ||
2990 | return 0; | ||
2991 | } | ||
2992 | |||
2993 | if (info->full) { | ||
2994 | spin_unlock(&info->lock); | ||
2995 | return 0; | ||
2996 | } | ||
2997 | |||
2998 | if (!info->allocating_chunk) { | ||
2999 | info->force_alloc = 1; | ||
3000 | info->allocating_chunk = 1; | ||
3001 | init_waitqueue_head(&info->allocate_wait); | ||
3002 | } else { | ||
3003 | wait = true; | ||
3004 | } | ||
3005 | |||
3006 | spin_unlock(&info->lock); | ||
3007 | |||
3008 | if (wait) { | ||
3009 | wait_event(info->allocate_wait, | ||
3010 | !info->allocating_chunk); | ||
3011 | return 1; | ||
3012 | } | ||
3013 | |||
3014 | trans = btrfs_start_transaction(root, 1); | ||
3015 | if (!trans) { | ||
3016 | ret = -ENOMEM; | ||
3017 | goto out; | ||
3018 | } | ||
3019 | |||
3020 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3021 | 4096 + 2 * 1024 * 1024, | ||
3022 | info->flags, 0); | ||
3023 | btrfs_end_transaction(trans, root); | ||
3024 | if (ret) | ||
3025 | goto out; | ||
3026 | out: | ||
3027 | spin_lock(&info->lock); | ||
3028 | info->allocating_chunk = 0; | ||
3029 | spin_unlock(&info->lock); | ||
3030 | wake_up(&info->allocate_wait); | ||
3031 | |||
3032 | if (ret) | ||
3033 | return 0; | ||
3034 | return 1; | ||
3035 | } | ||
3036 | |||
3037 | /* | ||
3038 | * Reserve metadata space for delalloc. | ||
3039 | */ | ||
3040 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
3041 | struct inode *inode, int num_items) | ||
3042 | { | ||
3043 | struct btrfs_fs_info *info = root->fs_info; | ||
3044 | struct btrfs_space_info *meta_sinfo; | ||
3045 | u64 num_bytes; | ||
3046 | u64 used; | ||
3047 | u64 alloc_target; | ||
3048 | int flushed = 0; | ||
3049 | int force_delalloc; | ||
3050 | |||
3051 | /* get the space info for where the metadata will live */ | ||
3052 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3053 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3054 | |||
3055 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
3056 | num_items); | ||
3057 | again: | ||
3058 | spin_lock(&meta_sinfo->lock); | ||
3059 | |||
3060 | force_delalloc = meta_sinfo->force_delalloc; | ||
3061 | |||
3062 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3063 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3064 | |||
3065 | if (!flushed) | ||
3066 | meta_sinfo->bytes_delalloc += num_bytes; | ||
3067 | |||
3068 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3069 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3070 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3071 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3072 | |||
3073 | if (used > meta_sinfo->total_bytes) { | ||
3074 | flushed++; | ||
3075 | |||
3076 | if (flushed == 1) { | ||
3077 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3078 | goto again; | ||
3079 | flushed++; | ||
3080 | } else { | ||
2800 | spin_unlock(&meta_sinfo->lock); | 3081 | spin_unlock(&meta_sinfo->lock); |
2801 | alloc: | 3082 | } |
2802 | trans = btrfs_start_transaction(root, 1); | ||
2803 | if (!trans) | ||
2804 | return -ENOMEM; | ||
2805 | 3083 | ||
2806 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3084 | if (flushed == 2) { |
2807 | 2 * 1024 * 1024, alloc_target, 0); | 3085 | filemap_flush(inode->i_mapping); |
2808 | btrfs_end_transaction(trans, root); | 3086 | goto again; |
2809 | if (!meta_sinfo) { | 3087 | } else if (flushed == 3) { |
2810 | meta_sinfo = __find_space_info(info, | 3088 | flush_delalloc(root, meta_sinfo); |
2811 | alloc_target); | ||
2812 | } | ||
2813 | goto again; | 3089 | goto again; |
2814 | } | 3090 | } |
3091 | spin_lock(&meta_sinfo->lock); | ||
3092 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2815 | spin_unlock(&meta_sinfo->lock); | 3093 | spin_unlock(&meta_sinfo->lock); |
3094 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
3095 | BTRFS_I(inode)->outstanding_extents, | ||
3096 | BTRFS_I(inode)->reserved_extents); | ||
3097 | dump_space_info(meta_sinfo, 0, 0); | ||
3098 | return -ENOSPC; | ||
3099 | } | ||
2816 | 3100 | ||
2817 | if (!committed) { | 3101 | BTRFS_I(inode)->reserved_extents++; |
2818 | committed = 1; | 3102 | check_force_delalloc(meta_sinfo); |
2819 | trans = btrfs_join_transaction(root, 1); | 3103 | spin_unlock(&meta_sinfo->lock); |
2820 | if (!trans) | 3104 | |
2821 | return -ENOMEM; | 3105 | if (!flushed && force_delalloc) |
2822 | ret = btrfs_commit_transaction(trans, root); | 3106 | filemap_flush(inode->i_mapping); |
2823 | if (ret) | 3107 | |
2824 | return ret; | 3108 | return 0; |
3109 | } | ||
3110 | |||
3111 | /* | ||
3112 | * unreserve num_items number of items worth of metadata space. This needs to | ||
3113 | * be paired with btrfs_reserve_metadata_space. | ||
3114 | * | ||
3115 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
3116 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
3117 | * oprations which will result in more used metadata, so we want to make sure we | ||
3118 | * can do that without issue. | ||
3119 | */ | ||
3120 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3121 | { | ||
3122 | struct btrfs_fs_info *info = root->fs_info; | ||
3123 | struct btrfs_space_info *meta_sinfo; | ||
3124 | u64 num_bytes; | ||
3125 | u64 alloc_target; | ||
3126 | bool bug = false; | ||
3127 | |||
3128 | /* get the space info for where the metadata will live */ | ||
3129 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3130 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3131 | |||
3132 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3133 | |||
3134 | spin_lock(&meta_sinfo->lock); | ||
3135 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
3136 | bug = true; | ||
3137 | meta_sinfo->bytes_may_use = 0; | ||
3138 | } else { | ||
3139 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3140 | } | ||
3141 | spin_unlock(&meta_sinfo->lock); | ||
3142 | |||
3143 | BUG_ON(bug); | ||
3144 | |||
3145 | return 0; | ||
3146 | } | ||
3147 | |||
3148 | /* | ||
3149 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
3150 | * of bytes that would be needed to modify num_items number of items. If we | ||
3151 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
3152 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
3153 | * items you reserved, since whatever metadata you needed should have already | ||
3154 | * been allocated. | ||
3155 | * | ||
3156 | * This will commit the transaction to make more space if we don't have enough | ||
3157 | * metadata space. THe only time we don't do this is if we're reserving space | ||
3158 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
3159 | * callers responsibility to handle it properly. | ||
3160 | */ | ||
3161 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3162 | { | ||
3163 | struct btrfs_fs_info *info = root->fs_info; | ||
3164 | struct btrfs_space_info *meta_sinfo; | ||
3165 | u64 num_bytes; | ||
3166 | u64 used; | ||
3167 | u64 alloc_target; | ||
3168 | int retries = 0; | ||
3169 | |||
3170 | /* get the space info for where the metadata will live */ | ||
3171 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3172 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3173 | |||
3174 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3175 | again: | ||
3176 | spin_lock(&meta_sinfo->lock); | ||
3177 | |||
3178 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3179 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3180 | |||
3181 | if (!retries) | ||
3182 | meta_sinfo->bytes_may_use += num_bytes; | ||
3183 | |||
3184 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3185 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3186 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3187 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3188 | |||
3189 | if (used > meta_sinfo->total_bytes) { | ||
3190 | retries++; | ||
3191 | if (retries == 1) { | ||
3192 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3193 | goto again; | ||
3194 | retries++; | ||
3195 | } else { | ||
3196 | spin_unlock(&meta_sinfo->lock); | ||
3197 | } | ||
3198 | |||
3199 | if (retries == 2) { | ||
3200 | flush_delalloc(root, meta_sinfo); | ||
2825 | goto again; | 3201 | goto again; |
2826 | } | 3202 | } |
3203 | spin_lock(&meta_sinfo->lock); | ||
3204 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3205 | spin_unlock(&meta_sinfo->lock); | ||
3206 | |||
3207 | dump_space_info(meta_sinfo, 0, 0); | ||
2827 | return -ENOSPC; | 3208 | return -ENOSPC; |
2828 | } | 3209 | } |
3210 | |||
3211 | check_force_delalloc(meta_sinfo); | ||
2829 | spin_unlock(&meta_sinfo->lock); | 3212 | spin_unlock(&meta_sinfo->lock); |
2830 | 3213 | ||
2831 | return 0; | 3214 | return 0; |
@@ -2888,7 +3271,7 @@ alloc: | |||
2888 | spin_unlock(&data_sinfo->lock); | 3271 | spin_unlock(&data_sinfo->lock); |
2889 | 3272 | ||
2890 | /* commit the current transaction and try again */ | 3273 | /* commit the current transaction and try again */ |
2891 | if (!committed) { | 3274 | if (!committed && !root->fs_info->open_ioctl_trans) { |
2892 | committed = 1; | 3275 | committed = 1; |
2893 | trans = btrfs_join_transaction(root, 1); | 3276 | trans = btrfs_join_transaction(root, 1); |
2894 | if (!trans) | 3277 | if (!trans) |
@@ -2916,7 +3299,7 @@ alloc: | |||
2916 | BTRFS_I(inode)->reserved_bytes += bytes; | 3299 | BTRFS_I(inode)->reserved_bytes += bytes; |
2917 | spin_unlock(&data_sinfo->lock); | 3300 | spin_unlock(&data_sinfo->lock); |
2918 | 3301 | ||
2919 | return btrfs_check_metadata_free_space(root); | 3302 | return 0; |
2920 | } | 3303 | } |
2921 | 3304 | ||
2922 | /* | 3305 | /* |
@@ -3015,17 +3398,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3015 | BUG_ON(!space_info); | 3398 | BUG_ON(!space_info); |
3016 | 3399 | ||
3017 | spin_lock(&space_info->lock); | 3400 | spin_lock(&space_info->lock); |
3018 | if (space_info->force_alloc) { | 3401 | if (space_info->force_alloc) |
3019 | force = 1; | 3402 | force = 1; |
3020 | space_info->force_alloc = 0; | ||
3021 | } | ||
3022 | if (space_info->full) { | 3403 | if (space_info->full) { |
3023 | spin_unlock(&space_info->lock); | 3404 | spin_unlock(&space_info->lock); |
3024 | goto out; | 3405 | goto out; |
3025 | } | 3406 | } |
3026 | 3407 | ||
3027 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3408 | thresh = space_info->total_bytes - space_info->bytes_readonly; |
3028 | thresh = div_factor(thresh, 6); | 3409 | thresh = div_factor(thresh, 8); |
3029 | if (!force && | 3410 | if (!force && |
3030 | (space_info->bytes_used + space_info->bytes_pinned + | 3411 | (space_info->bytes_used + space_info->bytes_pinned + |
3031 | space_info->bytes_reserved + alloc_bytes) < thresh) { | 3412 | space_info->bytes_reserved + alloc_bytes) < thresh) { |
@@ -3039,7 +3420,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3039 | * we keep a reasonable number of metadata chunks allocated in the | 3420 | * we keep a reasonable number of metadata chunks allocated in the |
3040 | * FS as well. | 3421 | * FS as well. |
3041 | */ | 3422 | */ |
3042 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | 3423 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
3043 | fs_info->data_chunk_allocations++; | 3424 | fs_info->data_chunk_allocations++; |
3044 | if (!(fs_info->data_chunk_allocations % | 3425 | if (!(fs_info->data_chunk_allocations % |
3045 | fs_info->metadata_ratio)) | 3426 | fs_info->metadata_ratio)) |
@@ -3047,8 +3428,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3047 | } | 3428 | } |
3048 | 3429 | ||
3049 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3430 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3431 | spin_lock(&space_info->lock); | ||
3050 | if (ret) | 3432 | if (ret) |
3051 | space_info->full = 1; | 3433 | space_info->full = 1; |
3434 | space_info->force_alloc = 0; | ||
3435 | spin_unlock(&space_info->lock); | ||
3052 | out: | 3436 | out: |
3053 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3437 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
3054 | return ret; | 3438 | return ret; |
@@ -3747,6 +4131,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3747 | int loop = 0; | 4131 | int loop = 0; |
3748 | bool found_uncached_bg = false; | 4132 | bool found_uncached_bg = false; |
3749 | bool failed_cluster_refill = false; | 4133 | bool failed_cluster_refill = false; |
4134 | bool failed_alloc = false; | ||
3750 | 4135 | ||
3751 | WARN_ON(num_bytes < root->sectorsize); | 4136 | WARN_ON(num_bytes < root->sectorsize); |
3752 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 4137 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3951,14 +4336,23 @@ refill_cluster: | |||
3951 | 4336 | ||
3952 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 4337 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
3953 | num_bytes, empty_size); | 4338 | num_bytes, empty_size); |
3954 | if (!offset && (cached || (!cached && | 4339 | /* |
3955 | loop == LOOP_CACHING_NOWAIT))) { | 4340 | * If we didn't find a chunk, and we haven't failed on this |
3956 | goto loop; | 4341 | * block group before, and this block group is in the middle of |
3957 | } else if (!offset && (!cached && | 4342 | * caching and we are ok with waiting, then go ahead and wait |
3958 | loop > LOOP_CACHING_NOWAIT)) { | 4343 | * for progress to be made, and set failed_alloc to true. |
4344 | * | ||
4345 | * If failed_alloc is true then we've already waited on this | ||
4346 | * block group once and should move on to the next block group. | ||
4347 | */ | ||
4348 | if (!offset && !failed_alloc && !cached && | ||
4349 | loop > LOOP_CACHING_NOWAIT) { | ||
3959 | wait_block_group_cache_progress(block_group, | 4350 | wait_block_group_cache_progress(block_group, |
3960 | num_bytes + empty_size); | 4351 | num_bytes + empty_size); |
4352 | failed_alloc = true; | ||
3961 | goto have_block_group; | 4353 | goto have_block_group; |
4354 | } else if (!offset) { | ||
4355 | goto loop; | ||
3962 | } | 4356 | } |
3963 | checks: | 4357 | checks: |
3964 | search_start = stripe_align(root, offset); | 4358 | search_start = stripe_align(root, offset); |
@@ -4006,6 +4400,7 @@ checks: | |||
4006 | break; | 4400 | break; |
4007 | loop: | 4401 | loop: |
4008 | failed_cluster_refill = false; | 4402 | failed_cluster_refill = false; |
4403 | failed_alloc = false; | ||
4009 | btrfs_put_block_group(block_group); | 4404 | btrfs_put_block_group(block_group); |
4010 | } | 4405 | } |
4011 | up_read(&space_info->groups_sem); | 4406 | up_read(&space_info->groups_sem); |
@@ -4063,21 +4458,32 @@ loop: | |||
4063 | return ret; | 4458 | return ret; |
4064 | } | 4459 | } |
4065 | 4460 | ||
4066 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 4461 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
4462 | int dump_block_groups) | ||
4067 | { | 4463 | { |
4068 | struct btrfs_block_group_cache *cache; | 4464 | struct btrfs_block_group_cache *cache; |
4069 | 4465 | ||
4466 | spin_lock(&info->lock); | ||
4070 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4467 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
4071 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4468 | (unsigned long long)(info->total_bytes - info->bytes_used - |
4072 | info->bytes_pinned - info->bytes_reserved), | 4469 | info->bytes_pinned - info->bytes_reserved - |
4470 | info->bytes_super), | ||
4073 | (info->full) ? "" : "not "); | 4471 | (info->full) ? "" : "not "); |
4074 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4472 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
4075 | " may_use=%llu, used=%llu\n", | 4473 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" |
4474 | "\n", | ||
4076 | (unsigned long long)info->total_bytes, | 4475 | (unsigned long long)info->total_bytes, |
4077 | (unsigned long long)info->bytes_pinned, | 4476 | (unsigned long long)info->bytes_pinned, |
4078 | (unsigned long long)info->bytes_delalloc, | 4477 | (unsigned long long)info->bytes_delalloc, |
4079 | (unsigned long long)info->bytes_may_use, | 4478 | (unsigned long long)info->bytes_may_use, |
4080 | (unsigned long long)info->bytes_used); | 4479 | (unsigned long long)info->bytes_used, |
4480 | (unsigned long long)info->bytes_root, | ||
4481 | (unsigned long long)info->bytes_super, | ||
4482 | (unsigned long long)info->bytes_reserved); | ||
4483 | spin_unlock(&info->lock); | ||
4484 | |||
4485 | if (!dump_block_groups) | ||
4486 | return; | ||
4081 | 4487 | ||
4082 | down_read(&info->groups_sem); | 4488 | down_read(&info->groups_sem); |
4083 | list_for_each_entry(cache, &info->block_groups, list) { | 4489 | list_for_each_entry(cache, &info->block_groups, list) { |
@@ -4145,7 +4551,7 @@ again: | |||
4145 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 4551 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
4146 | "wanted %llu\n", (unsigned long long)data, | 4552 | "wanted %llu\n", (unsigned long long)data, |
4147 | (unsigned long long)num_bytes); | 4553 | (unsigned long long)num_bytes); |
4148 | dump_space_info(sinfo, num_bytes); | 4554 | dump_space_info(sinfo, num_bytes, 1); |
4149 | } | 4555 | } |
4150 | 4556 | ||
4151 | return ret; | 4557 | return ret; |
@@ -4506,6 +4912,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
4506 | u64 bytenr; | 4912 | u64 bytenr; |
4507 | u64 generation; | 4913 | u64 generation; |
4508 | u64 refs; | 4914 | u64 refs; |
4915 | u64 flags; | ||
4509 | u64 last = 0; | 4916 | u64 last = 0; |
4510 | u32 nritems; | 4917 | u32 nritems; |
4511 | u32 blocksize; | 4918 | u32 blocksize; |
@@ -4543,15 +4950,19 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
4543 | generation <= root->root_key.offset) | 4950 | generation <= root->root_key.offset) |
4544 | continue; | 4951 | continue; |
4545 | 4952 | ||
4953 | /* We don't lock the tree block, it's OK to be racy here */ | ||
4954 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
4955 | &refs, &flags); | ||
4956 | BUG_ON(ret); | ||
4957 | BUG_ON(refs == 0); | ||
4958 | |||
4546 | if (wc->stage == DROP_REFERENCE) { | 4959 | if (wc->stage == DROP_REFERENCE) { |
4547 | ret = btrfs_lookup_extent_info(trans, root, | ||
4548 | bytenr, blocksize, | ||
4549 | &refs, NULL); | ||
4550 | BUG_ON(ret); | ||
4551 | BUG_ON(refs == 0); | ||
4552 | if (refs == 1) | 4960 | if (refs == 1) |
4553 | goto reada; | 4961 | goto reada; |
4554 | 4962 | ||
4963 | if (wc->level == 1 && | ||
4964 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
4965 | continue; | ||
4555 | if (!wc->update_ref || | 4966 | if (!wc->update_ref || |
4556 | generation <= root->root_key.offset) | 4967 | generation <= root->root_key.offset) |
4557 | continue; | 4968 | continue; |
@@ -4560,6 +4971,10 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
4560 | &wc->update_progress); | 4971 | &wc->update_progress); |
4561 | if (ret < 0) | 4972 | if (ret < 0) |
4562 | continue; | 4973 | continue; |
4974 | } else { | ||
4975 | if (wc->level == 1 && | ||
4976 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
4977 | continue; | ||
4563 | } | 4978 | } |
4564 | reada: | 4979 | reada: |
4565 | ret = readahead_tree_block(root, bytenr, blocksize, | 4980 | ret = readahead_tree_block(root, bytenr, blocksize, |
@@ -4583,7 +4998,7 @@ reada: | |||
4583 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | 4998 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, |
4584 | struct btrfs_root *root, | 4999 | struct btrfs_root *root, |
4585 | struct btrfs_path *path, | 5000 | struct btrfs_path *path, |
4586 | struct walk_control *wc) | 5001 | struct walk_control *wc, int lookup_info) |
4587 | { | 5002 | { |
4588 | int level = wc->level; | 5003 | int level = wc->level; |
4589 | struct extent_buffer *eb = path->nodes[level]; | 5004 | struct extent_buffer *eb = path->nodes[level]; |
@@ -4598,8 +5013,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4598 | * when reference count of tree block is 1, it won't increase | 5013 | * when reference count of tree block is 1, it won't increase |
4599 | * again. once full backref flag is set, we never clear it. | 5014 | * again. once full backref flag is set, we never clear it. |
4600 | */ | 5015 | */ |
4601 | if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || | 5016 | if (lookup_info && |
4602 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { | 5017 | ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || |
5018 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { | ||
4603 | BUG_ON(!path->locks[level]); | 5019 | BUG_ON(!path->locks[level]); |
4604 | ret = btrfs_lookup_extent_info(trans, root, | 5020 | ret = btrfs_lookup_extent_info(trans, root, |
4605 | eb->start, eb->len, | 5021 | eb->start, eb->len, |
@@ -4660,7 +5076,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4660 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | 5076 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, |
4661 | struct btrfs_root *root, | 5077 | struct btrfs_root *root, |
4662 | struct btrfs_path *path, | 5078 | struct btrfs_path *path, |
4663 | struct walk_control *wc) | 5079 | struct walk_control *wc, int *lookup_info) |
4664 | { | 5080 | { |
4665 | u64 bytenr; | 5081 | u64 bytenr; |
4666 | u64 generation; | 5082 | u64 generation; |
@@ -4680,8 +5096,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4680 | * for the subtree | 5096 | * for the subtree |
4681 | */ | 5097 | */ |
4682 | if (wc->stage == UPDATE_BACKREF && | 5098 | if (wc->stage == UPDATE_BACKREF && |
4683 | generation <= root->root_key.offset) | 5099 | generation <= root->root_key.offset) { |
5100 | *lookup_info = 1; | ||
4684 | return 1; | 5101 | return 1; |
5102 | } | ||
4685 | 5103 | ||
4686 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | 5104 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); |
4687 | blocksize = btrfs_level_size(root, level - 1); | 5105 | blocksize = btrfs_level_size(root, level - 1); |
@@ -4694,14 +5112,19 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4694 | btrfs_tree_lock(next); | 5112 | btrfs_tree_lock(next); |
4695 | btrfs_set_lock_blocking(next); | 5113 | btrfs_set_lock_blocking(next); |
4696 | 5114 | ||
4697 | if (wc->stage == DROP_REFERENCE) { | 5115 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, |
4698 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | 5116 | &wc->refs[level - 1], |
4699 | &wc->refs[level - 1], | 5117 | &wc->flags[level - 1]); |
4700 | &wc->flags[level - 1]); | 5118 | BUG_ON(ret); |
4701 | BUG_ON(ret); | 5119 | BUG_ON(wc->refs[level - 1] == 0); |
4702 | BUG_ON(wc->refs[level - 1] == 0); | 5120 | *lookup_info = 0; |
4703 | 5121 | ||
5122 | if (wc->stage == DROP_REFERENCE) { | ||
4704 | if (wc->refs[level - 1] > 1) { | 5123 | if (wc->refs[level - 1] > 1) { |
5124 | if (level == 1 && | ||
5125 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5126 | goto skip; | ||
5127 | |||
4705 | if (!wc->update_ref || | 5128 | if (!wc->update_ref || |
4706 | generation <= root->root_key.offset) | 5129 | generation <= root->root_key.offset) |
4707 | goto skip; | 5130 | goto skip; |
@@ -4715,12 +5138,17 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4715 | wc->stage = UPDATE_BACKREF; | 5138 | wc->stage = UPDATE_BACKREF; |
4716 | wc->shared_level = level - 1; | 5139 | wc->shared_level = level - 1; |
4717 | } | 5140 | } |
5141 | } else { | ||
5142 | if (level == 1 && | ||
5143 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5144 | goto skip; | ||
4718 | } | 5145 | } |
4719 | 5146 | ||
4720 | if (!btrfs_buffer_uptodate(next, generation)) { | 5147 | if (!btrfs_buffer_uptodate(next, generation)) { |
4721 | btrfs_tree_unlock(next); | 5148 | btrfs_tree_unlock(next); |
4722 | free_extent_buffer(next); | 5149 | free_extent_buffer(next); |
4723 | next = NULL; | 5150 | next = NULL; |
5151 | *lookup_info = 1; | ||
4724 | } | 5152 | } |
4725 | 5153 | ||
4726 | if (!next) { | 5154 | if (!next) { |
@@ -4743,21 +5171,22 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4743 | skip: | 5171 | skip: |
4744 | wc->refs[level - 1] = 0; | 5172 | wc->refs[level - 1] = 0; |
4745 | wc->flags[level - 1] = 0; | 5173 | wc->flags[level - 1] = 0; |
5174 | if (wc->stage == DROP_REFERENCE) { | ||
5175 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
5176 | parent = path->nodes[level]->start; | ||
5177 | } else { | ||
5178 | BUG_ON(root->root_key.objectid != | ||
5179 | btrfs_header_owner(path->nodes[level])); | ||
5180 | parent = 0; | ||
5181 | } | ||
4746 | 5182 | ||
4747 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | 5183 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, |
4748 | parent = path->nodes[level]->start; | 5184 | root->root_key.objectid, level - 1, 0); |
4749 | } else { | 5185 | BUG_ON(ret); |
4750 | BUG_ON(root->root_key.objectid != | ||
4751 | btrfs_header_owner(path->nodes[level])); | ||
4752 | parent = 0; | ||
4753 | } | 5186 | } |
4754 | |||
4755 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
4756 | root->root_key.objectid, level - 1, 0); | ||
4757 | BUG_ON(ret); | ||
4758 | |||
4759 | btrfs_tree_unlock(next); | 5187 | btrfs_tree_unlock(next); |
4760 | free_extent_buffer(next); | 5188 | free_extent_buffer(next); |
5189 | *lookup_info = 1; | ||
4761 | return 1; | 5190 | return 1; |
4762 | } | 5191 | } |
4763 | 5192 | ||
@@ -4871,6 +5300,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4871 | struct walk_control *wc) | 5300 | struct walk_control *wc) |
4872 | { | 5301 | { |
4873 | int level = wc->level; | 5302 | int level = wc->level; |
5303 | int lookup_info = 1; | ||
4874 | int ret; | 5304 | int ret; |
4875 | 5305 | ||
4876 | while (level >= 0) { | 5306 | while (level >= 0) { |
@@ -4878,14 +5308,14 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4878 | btrfs_header_nritems(path->nodes[level])) | 5308 | btrfs_header_nritems(path->nodes[level])) |
4879 | break; | 5309 | break; |
4880 | 5310 | ||
4881 | ret = walk_down_proc(trans, root, path, wc); | 5311 | ret = walk_down_proc(trans, root, path, wc, lookup_info); |
4882 | if (ret > 0) | 5312 | if (ret > 0) |
4883 | break; | 5313 | break; |
4884 | 5314 | ||
4885 | if (level == 0) | 5315 | if (level == 0) |
4886 | break; | 5316 | break; |
4887 | 5317 | ||
4888 | ret = do_walk_down(trans, root, path, wc); | 5318 | ret = do_walk_down(trans, root, path, wc, &lookup_info); |
4889 | if (ret > 0) { | 5319 | if (ret > 0) { |
4890 | path->slots[level]++; | 5320 | path->slots[level]++; |
4891 | continue; | 5321 | continue; |