diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 724 |
1 files changed, 611 insertions, 113 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 993f93ff7ba6..94627c4cc193 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
68 | struct extent_buffer **must_clean); | 68 | struct extent_buffer **must_clean); |
69 | static int find_next_key(struct btrfs_path *path, int level, | 69 | static int find_next_key(struct btrfs_path *path, int level, |
70 | struct btrfs_key *key); | 70 | struct btrfs_key *key); |
71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | ||
72 | int dump_block_groups); | ||
71 | 73 | ||
72 | static noinline int | 74 | static noinline int |
73 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 75 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -1566,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1566 | return ret; | 1568 | return ret; |
1567 | } | 1569 | } |
1568 | 1570 | ||
1569 | #ifdef BIO_RW_DISCARD | ||
1570 | static void btrfs_issue_discard(struct block_device *bdev, | 1571 | static void btrfs_issue_discard(struct block_device *bdev, |
1571 | u64 start, u64 len) | 1572 | u64 start, u64 len) |
1572 | { | 1573 | { |
1573 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1574 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
1574 | DISCARD_FL_BARRIER); | 1575 | DISCARD_FL_BARRIER); |
1575 | } | 1576 | } |
1576 | #endif | ||
1577 | 1577 | ||
1578 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1578 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
1579 | u64 num_bytes) | 1579 | u64 num_bytes) |
1580 | { | 1580 | { |
1581 | #ifdef BIO_RW_DISCARD | ||
1582 | int ret; | 1581 | int ret; |
1583 | u64 map_length = num_bytes; | 1582 | u64 map_length = num_bytes; |
1584 | struct btrfs_multi_bio *multi = NULL; | 1583 | struct btrfs_multi_bio *multi = NULL; |
1585 | 1584 | ||
1585 | if (!btrfs_test_opt(root, DISCARD)) | ||
1586 | return 0; | ||
1587 | |||
1586 | /* Tell the block device(s) that the sectors can be discarded */ | 1588 | /* Tell the block device(s) that the sectors can be discarded */ |
1587 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1589 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, |
1588 | bytenr, &map_length, &multi, 0); | 1590 | bytenr, &map_length, &multi, 0); |
@@ -1602,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1602 | } | 1604 | } |
1603 | 1605 | ||
1604 | return ret; | 1606 | return ret; |
1605 | #else | ||
1606 | return 0; | ||
1607 | #endif | ||
1608 | } | 1607 | } |
1609 | 1608 | ||
1610 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1609 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
@@ -2765,67 +2764,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | |||
2765 | alloc_target); | 2764 | alloc_target); |
2766 | } | 2765 | } |
2767 | 2766 | ||
2767 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
2768 | { | ||
2769 | u64 num_bytes; | ||
2770 | int level; | ||
2771 | |||
2772 | level = BTRFS_MAX_LEVEL - 2; | ||
2773 | /* | ||
2774 | * NOTE: these calculations are absolutely the worst possible case. | ||
2775 | * This assumes that _every_ item we insert will require a new leaf, and | ||
2776 | * that the tree has grown to its maximum level size. | ||
2777 | */ | ||
2778 | |||
2779 | /* | ||
2780 | * for every item we insert we could insert both an extent item and a | ||
2781 | * extent ref item. Then for ever item we insert, we will need to cow | ||
2782 | * both the original leaf, plus the leaf to the left and right of it. | ||
2783 | * | ||
2784 | * Unless we are talking about the extent root, then we just want the | ||
2785 | * number of items * 2, since we just need the extent item plus its ref. | ||
2786 | */ | ||
2787 | if (root == root->fs_info->extent_root) | ||
2788 | num_bytes = num_items * 2; | ||
2789 | else | ||
2790 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
2791 | |||
2792 | /* | ||
2793 | * num_bytes is total number of leaves we could need times the leaf | ||
2794 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
2795 | * level, down to the leaf level. | ||
2796 | */ | ||
2797 | num_bytes = (num_bytes * root->leafsize) + | ||
2798 | (num_bytes * (level * 2)) * root->nodesize; | ||
2799 | |||
2800 | return num_bytes; | ||
2801 | } | ||
2802 | |||
2768 | /* | 2803 | /* |
2769 | * for now this just makes sure we have at least 5% of our metadata space free | 2804 | * Unreserve metadata space for delalloc. If we have less reserved credits than |
2770 | * for use. | 2805 | * we have extents, this function does nothing. |
2771 | */ | 2806 | */ |
2772 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | 2807 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, |
2808 | struct inode *inode, int num_items) | ||
2773 | { | 2809 | { |
2774 | struct btrfs_fs_info *info = root->fs_info; | 2810 | struct btrfs_fs_info *info = root->fs_info; |
2775 | struct btrfs_space_info *meta_sinfo; | 2811 | struct btrfs_space_info *meta_sinfo; |
2776 | u64 alloc_target, thresh; | 2812 | u64 num_bytes; |
2777 | int committed = 0, ret; | 2813 | u64 alloc_target; |
2814 | bool bug = false; | ||
2778 | 2815 | ||
2779 | /* get the space info for where the metadata will live */ | 2816 | /* get the space info for where the metadata will live */ |
2780 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2817 | alloc_target = btrfs_get_alloc_profile(root, 0); |
2781 | meta_sinfo = __find_space_info(info, alloc_target); | 2818 | meta_sinfo = __find_space_info(info, alloc_target); |
2782 | if (!meta_sinfo) | ||
2783 | goto alloc; | ||
2784 | 2819 | ||
2785 | again: | 2820 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, |
2821 | num_items); | ||
2822 | |||
2786 | spin_lock(&meta_sinfo->lock); | 2823 | spin_lock(&meta_sinfo->lock); |
2787 | if (!meta_sinfo->full) | 2824 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
2788 | thresh = meta_sinfo->total_bytes * 80; | 2825 | if (BTRFS_I(inode)->reserved_extents <= |
2789 | else | 2826 | BTRFS_I(inode)->outstanding_extents) { |
2790 | thresh = meta_sinfo->total_bytes * 95; | 2827 | spin_unlock(&BTRFS_I(inode)->accounting_lock); |
2828 | spin_unlock(&meta_sinfo->lock); | ||
2829 | return 0; | ||
2830 | } | ||
2831 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2832 | |||
2833 | BTRFS_I(inode)->reserved_extents--; | ||
2834 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
2835 | |||
2836 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
2837 | bug = true; | ||
2838 | meta_sinfo->bytes_delalloc = 0; | ||
2839 | } else { | ||
2840 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2841 | } | ||
2842 | spin_unlock(&meta_sinfo->lock); | ||
2843 | |||
2844 | BUG_ON(bug); | ||
2791 | 2845 | ||
2846 | return 0; | ||
2847 | } | ||
2848 | |||
2849 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
2850 | { | ||
2851 | u64 thresh; | ||
2852 | |||
2853 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
2854 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
2855 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
2856 | meta_sinfo->bytes_may_use; | ||
2857 | |||
2858 | thresh = meta_sinfo->total_bytes - thresh; | ||
2859 | thresh *= 80; | ||
2792 | do_div(thresh, 100); | 2860 | do_div(thresh, 100); |
2861 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
2862 | meta_sinfo->force_delalloc = 1; | ||
2863 | else | ||
2864 | meta_sinfo->force_delalloc = 0; | ||
2865 | } | ||
2793 | 2866 | ||
2794 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2867 | struct async_flush { |
2795 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 2868 | struct btrfs_root *root; |
2796 | meta_sinfo->bytes_super > thresh) { | 2869 | struct btrfs_space_info *info; |
2797 | struct btrfs_trans_handle *trans; | 2870 | struct btrfs_work work; |
2798 | if (!meta_sinfo->full) { | 2871 | }; |
2799 | meta_sinfo->force_alloc = 1; | 2872 | |
2873 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
2874 | { | ||
2875 | struct async_flush *async; | ||
2876 | struct btrfs_root *root; | ||
2877 | struct btrfs_space_info *info; | ||
2878 | |||
2879 | async = container_of(work, struct async_flush, work); | ||
2880 | root = async->root; | ||
2881 | info = async->info; | ||
2882 | |||
2883 | btrfs_start_delalloc_inodes(root); | ||
2884 | wake_up(&info->flush_wait); | ||
2885 | btrfs_wait_ordered_extents(root, 0); | ||
2886 | |||
2887 | spin_lock(&info->lock); | ||
2888 | info->flushing = 0; | ||
2889 | spin_unlock(&info->lock); | ||
2890 | wake_up(&info->flush_wait); | ||
2891 | |||
2892 | kfree(async); | ||
2893 | } | ||
2894 | |||
2895 | static void wait_on_flush(struct btrfs_space_info *info) | ||
2896 | { | ||
2897 | DEFINE_WAIT(wait); | ||
2898 | u64 used; | ||
2899 | |||
2900 | while (1) { | ||
2901 | prepare_to_wait(&info->flush_wait, &wait, | ||
2902 | TASK_UNINTERRUPTIBLE); | ||
2903 | spin_lock(&info->lock); | ||
2904 | if (!info->flushing) { | ||
2905 | spin_unlock(&info->lock); | ||
2906 | break; | ||
2907 | } | ||
2908 | |||
2909 | used = info->bytes_used + info->bytes_reserved + | ||
2910 | info->bytes_pinned + info->bytes_readonly + | ||
2911 | info->bytes_super + info->bytes_root + | ||
2912 | info->bytes_may_use + info->bytes_delalloc; | ||
2913 | if (used < info->total_bytes) { | ||
2914 | spin_unlock(&info->lock); | ||
2915 | break; | ||
2916 | } | ||
2917 | spin_unlock(&info->lock); | ||
2918 | schedule(); | ||
2919 | } | ||
2920 | finish_wait(&info->flush_wait, &wait); | ||
2921 | } | ||
2922 | |||
2923 | static void flush_delalloc(struct btrfs_root *root, | ||
2924 | struct btrfs_space_info *info) | ||
2925 | { | ||
2926 | struct async_flush *async; | ||
2927 | bool wait = false; | ||
2928 | |||
2929 | spin_lock(&info->lock); | ||
2930 | |||
2931 | if (!info->flushing) { | ||
2932 | info->flushing = 1; | ||
2933 | init_waitqueue_head(&info->flush_wait); | ||
2934 | } else { | ||
2935 | wait = true; | ||
2936 | } | ||
2937 | |||
2938 | spin_unlock(&info->lock); | ||
2939 | |||
2940 | if (wait) { | ||
2941 | wait_on_flush(info); | ||
2942 | return; | ||
2943 | } | ||
2944 | |||
2945 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
2946 | if (!async) | ||
2947 | goto flush; | ||
2948 | |||
2949 | async->root = root; | ||
2950 | async->info = info; | ||
2951 | async->work.func = flush_delalloc_async; | ||
2952 | |||
2953 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
2954 | &async->work); | ||
2955 | wait_on_flush(info); | ||
2956 | return; | ||
2957 | |||
2958 | flush: | ||
2959 | btrfs_start_delalloc_inodes(root); | ||
2960 | btrfs_wait_ordered_extents(root, 0); | ||
2961 | |||
2962 | spin_lock(&info->lock); | ||
2963 | info->flushing = 0; | ||
2964 | spin_unlock(&info->lock); | ||
2965 | wake_up(&info->flush_wait); | ||
2966 | } | ||
2967 | |||
2968 | static int maybe_allocate_chunk(struct btrfs_root *root, | ||
2969 | struct btrfs_space_info *info) | ||
2970 | { | ||
2971 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
2972 | struct btrfs_trans_handle *trans; | ||
2973 | bool wait = false; | ||
2974 | int ret = 0; | ||
2975 | u64 min_metadata; | ||
2976 | u64 free_space; | ||
2977 | |||
2978 | free_space = btrfs_super_total_bytes(disk_super); | ||
2979 | /* | ||
2980 | * we allow the metadata to grow to a max of either 10gb or 5% of the | ||
2981 | * space in the volume. | ||
2982 | */ | ||
2983 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, | ||
2984 | div64_u64(free_space * 5, 100)); | ||
2985 | if (info->total_bytes >= min_metadata) { | ||
2986 | spin_unlock(&info->lock); | ||
2987 | return 0; | ||
2988 | } | ||
2989 | |||
2990 | if (info->full) { | ||
2991 | spin_unlock(&info->lock); | ||
2992 | return 0; | ||
2993 | } | ||
2994 | |||
2995 | if (!info->allocating_chunk) { | ||
2996 | info->force_alloc = 1; | ||
2997 | info->allocating_chunk = 1; | ||
2998 | init_waitqueue_head(&info->allocate_wait); | ||
2999 | } else { | ||
3000 | wait = true; | ||
3001 | } | ||
3002 | |||
3003 | spin_unlock(&info->lock); | ||
3004 | |||
3005 | if (wait) { | ||
3006 | wait_event(info->allocate_wait, | ||
3007 | !info->allocating_chunk); | ||
3008 | return 1; | ||
3009 | } | ||
3010 | |||
3011 | trans = btrfs_start_transaction(root, 1); | ||
3012 | if (!trans) { | ||
3013 | ret = -ENOMEM; | ||
3014 | goto out; | ||
3015 | } | ||
3016 | |||
3017 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3018 | 4096 + 2 * 1024 * 1024, | ||
3019 | info->flags, 0); | ||
3020 | btrfs_end_transaction(trans, root); | ||
3021 | if (ret) | ||
3022 | goto out; | ||
3023 | out: | ||
3024 | spin_lock(&info->lock); | ||
3025 | info->allocating_chunk = 0; | ||
3026 | spin_unlock(&info->lock); | ||
3027 | wake_up(&info->allocate_wait); | ||
3028 | |||
3029 | if (ret) | ||
3030 | return 0; | ||
3031 | return 1; | ||
3032 | } | ||
3033 | |||
3034 | /* | ||
3035 | * Reserve metadata space for delalloc. | ||
3036 | */ | ||
3037 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
3038 | struct inode *inode, int num_items) | ||
3039 | { | ||
3040 | struct btrfs_fs_info *info = root->fs_info; | ||
3041 | struct btrfs_space_info *meta_sinfo; | ||
3042 | u64 num_bytes; | ||
3043 | u64 used; | ||
3044 | u64 alloc_target; | ||
3045 | int flushed = 0; | ||
3046 | int force_delalloc; | ||
3047 | |||
3048 | /* get the space info for where the metadata will live */ | ||
3049 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3050 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3051 | |||
3052 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
3053 | num_items); | ||
3054 | again: | ||
3055 | spin_lock(&meta_sinfo->lock); | ||
3056 | |||
3057 | force_delalloc = meta_sinfo->force_delalloc; | ||
3058 | |||
3059 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3060 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3061 | |||
3062 | if (!flushed) | ||
3063 | meta_sinfo->bytes_delalloc += num_bytes; | ||
3064 | |||
3065 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3066 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3067 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3068 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3069 | |||
3070 | if (used > meta_sinfo->total_bytes) { | ||
3071 | flushed++; | ||
3072 | |||
3073 | if (flushed == 1) { | ||
3074 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3075 | goto again; | ||
3076 | flushed++; | ||
3077 | } else { | ||
2800 | spin_unlock(&meta_sinfo->lock); | 3078 | spin_unlock(&meta_sinfo->lock); |
2801 | alloc: | 3079 | } |
2802 | trans = btrfs_start_transaction(root, 1); | ||
2803 | if (!trans) | ||
2804 | return -ENOMEM; | ||
2805 | 3080 | ||
2806 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 3081 | if (flushed == 2) { |
2807 | 2 * 1024 * 1024, alloc_target, 0); | 3082 | filemap_flush(inode->i_mapping); |
2808 | btrfs_end_transaction(trans, root); | 3083 | goto again; |
2809 | if (!meta_sinfo) { | 3084 | } else if (flushed == 3) { |
2810 | meta_sinfo = __find_space_info(info, | 3085 | flush_delalloc(root, meta_sinfo); |
2811 | alloc_target); | ||
2812 | } | ||
2813 | goto again; | 3086 | goto again; |
2814 | } | 3087 | } |
3088 | spin_lock(&meta_sinfo->lock); | ||
3089 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2815 | spin_unlock(&meta_sinfo->lock); | 3090 | spin_unlock(&meta_sinfo->lock); |
3091 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
3092 | BTRFS_I(inode)->outstanding_extents, | ||
3093 | BTRFS_I(inode)->reserved_extents); | ||
3094 | dump_space_info(meta_sinfo, 0, 0); | ||
3095 | return -ENOSPC; | ||
3096 | } | ||
2816 | 3097 | ||
2817 | if (!committed) { | 3098 | BTRFS_I(inode)->reserved_extents++; |
2818 | committed = 1; | 3099 | check_force_delalloc(meta_sinfo); |
2819 | trans = btrfs_join_transaction(root, 1); | 3100 | spin_unlock(&meta_sinfo->lock); |
2820 | if (!trans) | 3101 | |
2821 | return -ENOMEM; | 3102 | if (!flushed && force_delalloc) |
2822 | ret = btrfs_commit_transaction(trans, root); | 3103 | filemap_flush(inode->i_mapping); |
2823 | if (ret) | 3104 | |
2824 | return ret; | 3105 | return 0; |
3106 | } | ||
3107 | |||
3108 | /* | ||
3109 | * unreserve num_items number of items worth of metadata space. This needs to | ||
3110 | * be paired with btrfs_reserve_metadata_space. | ||
3111 | * | ||
3112 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
3113 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
3114 | * oprations which will result in more used metadata, so we want to make sure we | ||
3115 | * can do that without issue. | ||
3116 | */ | ||
3117 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3118 | { | ||
3119 | struct btrfs_fs_info *info = root->fs_info; | ||
3120 | struct btrfs_space_info *meta_sinfo; | ||
3121 | u64 num_bytes; | ||
3122 | u64 alloc_target; | ||
3123 | bool bug = false; | ||
3124 | |||
3125 | /* get the space info for where the metadata will live */ | ||
3126 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3127 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3128 | |||
3129 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3130 | |||
3131 | spin_lock(&meta_sinfo->lock); | ||
3132 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
3133 | bug = true; | ||
3134 | meta_sinfo->bytes_may_use = 0; | ||
3135 | } else { | ||
3136 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3137 | } | ||
3138 | spin_unlock(&meta_sinfo->lock); | ||
3139 | |||
3140 | BUG_ON(bug); | ||
3141 | |||
3142 | return 0; | ||
3143 | } | ||
3144 | |||
3145 | /* | ||
3146 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
3147 | * of bytes that would be needed to modify num_items number of items. If we | ||
3148 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
3149 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
3150 | * items you reserved, since whatever metadata you needed should have already | ||
3151 | * been allocated. | ||
3152 | * | ||
3153 | * This will commit the transaction to make more space if we don't have enough | ||
3154 | * metadata space. THe only time we don't do this is if we're reserving space | ||
3155 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
3156 | * callers responsibility to handle it properly. | ||
3157 | */ | ||
3158 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3159 | { | ||
3160 | struct btrfs_fs_info *info = root->fs_info; | ||
3161 | struct btrfs_space_info *meta_sinfo; | ||
3162 | u64 num_bytes; | ||
3163 | u64 used; | ||
3164 | u64 alloc_target; | ||
3165 | int retries = 0; | ||
3166 | |||
3167 | /* get the space info for where the metadata will live */ | ||
3168 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3169 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3170 | |||
3171 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3172 | again: | ||
3173 | spin_lock(&meta_sinfo->lock); | ||
3174 | |||
3175 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3176 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3177 | |||
3178 | if (!retries) | ||
3179 | meta_sinfo->bytes_may_use += num_bytes; | ||
3180 | |||
3181 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3182 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3183 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3184 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3185 | |||
3186 | if (used > meta_sinfo->total_bytes) { | ||
3187 | retries++; | ||
3188 | if (retries == 1) { | ||
3189 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3190 | goto again; | ||
3191 | retries++; | ||
3192 | } else { | ||
3193 | spin_unlock(&meta_sinfo->lock); | ||
3194 | } | ||
3195 | |||
3196 | if (retries == 2) { | ||
3197 | flush_delalloc(root, meta_sinfo); | ||
2825 | goto again; | 3198 | goto again; |
2826 | } | 3199 | } |
3200 | spin_lock(&meta_sinfo->lock); | ||
3201 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3202 | spin_unlock(&meta_sinfo->lock); | ||
3203 | |||
3204 | dump_space_info(meta_sinfo, 0, 0); | ||
2827 | return -ENOSPC; | 3205 | return -ENOSPC; |
2828 | } | 3206 | } |
3207 | |||
3208 | check_force_delalloc(meta_sinfo); | ||
2829 | spin_unlock(&meta_sinfo->lock); | 3209 | spin_unlock(&meta_sinfo->lock); |
2830 | 3210 | ||
2831 | return 0; | 3211 | return 0; |
@@ -2888,7 +3268,7 @@ alloc: | |||
2888 | spin_unlock(&data_sinfo->lock); | 3268 | spin_unlock(&data_sinfo->lock); |
2889 | 3269 | ||
2890 | /* commit the current transaction and try again */ | 3270 | /* commit the current transaction and try again */ |
2891 | if (!committed) { | 3271 | if (!committed && !root->fs_info->open_ioctl_trans) { |
2892 | committed = 1; | 3272 | committed = 1; |
2893 | trans = btrfs_join_transaction(root, 1); | 3273 | trans = btrfs_join_transaction(root, 1); |
2894 | if (!trans) | 3274 | if (!trans) |
@@ -2916,7 +3296,7 @@ alloc: | |||
2916 | BTRFS_I(inode)->reserved_bytes += bytes; | 3296 | BTRFS_I(inode)->reserved_bytes += bytes; |
2917 | spin_unlock(&data_sinfo->lock); | 3297 | spin_unlock(&data_sinfo->lock); |
2918 | 3298 | ||
2919 | return btrfs_check_metadata_free_space(root); | 3299 | return 0; |
2920 | } | 3300 | } |
2921 | 3301 | ||
2922 | /* | 3302 | /* |
@@ -3015,17 +3395,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3015 | BUG_ON(!space_info); | 3395 | BUG_ON(!space_info); |
3016 | 3396 | ||
3017 | spin_lock(&space_info->lock); | 3397 | spin_lock(&space_info->lock); |
3018 | if (space_info->force_alloc) { | 3398 | if (space_info->force_alloc) |
3019 | force = 1; | 3399 | force = 1; |
3020 | space_info->force_alloc = 0; | ||
3021 | } | ||
3022 | if (space_info->full) { | 3400 | if (space_info->full) { |
3023 | spin_unlock(&space_info->lock); | 3401 | spin_unlock(&space_info->lock); |
3024 | goto out; | 3402 | goto out; |
3025 | } | 3403 | } |
3026 | 3404 | ||
3027 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3405 | thresh = space_info->total_bytes - space_info->bytes_readonly; |
3028 | thresh = div_factor(thresh, 6); | 3406 | thresh = div_factor(thresh, 8); |
3029 | if (!force && | 3407 | if (!force && |
3030 | (space_info->bytes_used + space_info->bytes_pinned + | 3408 | (space_info->bytes_used + space_info->bytes_pinned + |
3031 | space_info->bytes_reserved + alloc_bytes) < thresh) { | 3409 | space_info->bytes_reserved + alloc_bytes) < thresh) { |
@@ -3039,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3039 | * we keep a reasonable number of metadata chunks allocated in the | 3417 | * we keep a reasonable number of metadata chunks allocated in the |
3040 | * FS as well. | 3418 | * FS as well. |
3041 | */ | 3419 | */ |
3042 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | 3420 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
3043 | fs_info->data_chunk_allocations++; | 3421 | fs_info->data_chunk_allocations++; |
3044 | if (!(fs_info->data_chunk_allocations % | 3422 | if (!(fs_info->data_chunk_allocations % |
3045 | fs_info->metadata_ratio)) | 3423 | fs_info->metadata_ratio)) |
@@ -3047,8 +3425,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3047 | } | 3425 | } |
3048 | 3426 | ||
3049 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3427 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3428 | spin_lock(&space_info->lock); | ||
3050 | if (ret) | 3429 | if (ret) |
3051 | space_info->full = 1; | 3430 | space_info->full = 1; |
3431 | space_info->force_alloc = 0; | ||
3432 | spin_unlock(&space_info->lock); | ||
3052 | out: | 3433 | out: |
3053 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3434 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
3054 | return ret; | 3435 | return ret; |
@@ -3306,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
3306 | if (is_data) | 3687 | if (is_data) |
3307 | goto pinit; | 3688 | goto pinit; |
3308 | 3689 | ||
3690 | /* | ||
3691 | * discard is sloooow, and so triggering discards on | ||
3692 | * individual btree blocks isn't a good plan. Just | ||
3693 | * pin everything in discard mode. | ||
3694 | */ | ||
3695 | if (btrfs_test_opt(root, DISCARD)) | ||
3696 | goto pinit; | ||
3697 | |||
3309 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | 3698 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); |
3310 | if (!buf) | 3699 | if (!buf) |
3311 | goto pinit; | 3700 | goto pinit; |
@@ -3713,7 +4102,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
3713 | } | 4102 | } |
3714 | 4103 | ||
3715 | enum btrfs_loop_type { | 4104 | enum btrfs_loop_type { |
3716 | LOOP_CACHED_ONLY = 0, | 4105 | LOOP_FIND_IDEAL = 0, |
3717 | LOOP_CACHING_NOWAIT = 1, | 4106 | LOOP_CACHING_NOWAIT = 1, |
3718 | LOOP_CACHING_WAIT = 2, | 4107 | LOOP_CACHING_WAIT = 2, |
3719 | LOOP_ALLOC_CHUNK = 3, | 4108 | LOOP_ALLOC_CHUNK = 3, |
@@ -3742,11 +4131,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3742 | struct btrfs_block_group_cache *block_group = NULL; | 4131 | struct btrfs_block_group_cache *block_group = NULL; |
3743 | int empty_cluster = 2 * 1024 * 1024; | 4132 | int empty_cluster = 2 * 1024 * 1024; |
3744 | int allowed_chunk_alloc = 0; | 4133 | int allowed_chunk_alloc = 0; |
4134 | int done_chunk_alloc = 0; | ||
3745 | struct btrfs_space_info *space_info; | 4135 | struct btrfs_space_info *space_info; |
3746 | int last_ptr_loop = 0; | 4136 | int last_ptr_loop = 0; |
3747 | int loop = 0; | 4137 | int loop = 0; |
3748 | bool found_uncached_bg = false; | 4138 | bool found_uncached_bg = false; |
3749 | bool failed_cluster_refill = false; | 4139 | bool failed_cluster_refill = false; |
4140 | bool failed_alloc = false; | ||
4141 | u64 ideal_cache_percent = 0; | ||
4142 | u64 ideal_cache_offset = 0; | ||
3750 | 4143 | ||
3751 | WARN_ON(num_bytes < root->sectorsize); | 4144 | WARN_ON(num_bytes < root->sectorsize); |
3752 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 4145 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3782,14 +4175,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3782 | empty_cluster = 0; | 4175 | empty_cluster = 0; |
3783 | 4176 | ||
3784 | if (search_start == hint_byte) { | 4177 | if (search_start == hint_byte) { |
4178 | ideal_cache: | ||
3785 | block_group = btrfs_lookup_block_group(root->fs_info, | 4179 | block_group = btrfs_lookup_block_group(root->fs_info, |
3786 | search_start); | 4180 | search_start); |
3787 | /* | 4181 | /* |
3788 | * we don't want to use the block group if it doesn't match our | 4182 | * we don't want to use the block group if it doesn't match our |
3789 | * allocation bits, or if its not cached. | 4183 | * allocation bits, or if its not cached. |
4184 | * | ||
4185 | * However if we are re-searching with an ideal block group | ||
4186 | * picked out then we don't care that the block group is cached. | ||
3790 | */ | 4187 | */ |
3791 | if (block_group && block_group_bits(block_group, data) && | 4188 | if (block_group && block_group_bits(block_group, data) && |
3792 | block_group_cache_done(block_group)) { | 4189 | (block_group->cached != BTRFS_CACHE_NO || |
4190 | search_start == ideal_cache_offset)) { | ||
3793 | down_read(&space_info->groups_sem); | 4191 | down_read(&space_info->groups_sem); |
3794 | if (list_empty(&block_group->list) || | 4192 | if (list_empty(&block_group->list) || |
3795 | block_group->ro) { | 4193 | block_group->ro) { |
@@ -3801,13 +4199,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3801 | */ | 4199 | */ |
3802 | btrfs_put_block_group(block_group); | 4200 | btrfs_put_block_group(block_group); |
3803 | up_read(&space_info->groups_sem); | 4201 | up_read(&space_info->groups_sem); |
3804 | } else | 4202 | } else { |
3805 | goto have_block_group; | 4203 | goto have_block_group; |
4204 | } | ||
3806 | } else if (block_group) { | 4205 | } else if (block_group) { |
3807 | btrfs_put_block_group(block_group); | 4206 | btrfs_put_block_group(block_group); |
3808 | } | 4207 | } |
3809 | } | 4208 | } |
3810 | |||
3811 | search: | 4209 | search: |
3812 | down_read(&space_info->groups_sem); | 4210 | down_read(&space_info->groups_sem); |
3813 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4211 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
@@ -3819,28 +4217,45 @@ search: | |||
3819 | 4217 | ||
3820 | have_block_group: | 4218 | have_block_group: |
3821 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4219 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
4220 | u64 free_percent; | ||
4221 | |||
4222 | free_percent = btrfs_block_group_used(&block_group->item); | ||
4223 | free_percent *= 100; | ||
4224 | free_percent = div64_u64(free_percent, | ||
4225 | block_group->key.offset); | ||
4226 | free_percent = 100 - free_percent; | ||
4227 | if (free_percent > ideal_cache_percent && | ||
4228 | likely(!block_group->ro)) { | ||
4229 | ideal_cache_offset = block_group->key.objectid; | ||
4230 | ideal_cache_percent = free_percent; | ||
4231 | } | ||
4232 | |||
3822 | /* | 4233 | /* |
3823 | * we want to start caching kthreads, but not too many | 4234 | * We only want to start kthread caching if we are at |
3824 | * right off the bat so we don't overwhelm the system, | 4235 | * the point where we will wait for caching to make |
3825 | * so only start them if there are less than 2 and we're | 4236 | * progress, or if our ideal search is over and we've |
3826 | * in the initial allocation phase. | 4237 | * found somebody to start caching. |
3827 | */ | 4238 | */ |
3828 | if (loop > LOOP_CACHING_NOWAIT || | 4239 | if (loop > LOOP_CACHING_NOWAIT || |
3829 | atomic_read(&space_info->caching_threads) < 2) { | 4240 | (loop > LOOP_FIND_IDEAL && |
4241 | atomic_read(&space_info->caching_threads) < 2)) { | ||
3830 | ret = cache_block_group(block_group); | 4242 | ret = cache_block_group(block_group); |
3831 | BUG_ON(ret); | 4243 | BUG_ON(ret); |
3832 | } | 4244 | } |
3833 | } | ||
3834 | |||
3835 | cached = block_group_cache_done(block_group); | ||
3836 | if (unlikely(!cached)) { | ||
3837 | found_uncached_bg = true; | 4245 | found_uncached_bg = true; |
3838 | 4246 | ||
3839 | /* if we only want cached bgs, loop */ | 4247 | /* |
3840 | if (loop == LOOP_CACHED_ONLY) | 4248 | * If loop is set for cached only, try the next block |
4249 | * group. | ||
4250 | */ | ||
4251 | if (loop == LOOP_FIND_IDEAL) | ||
3841 | goto loop; | 4252 | goto loop; |
3842 | } | 4253 | } |
3843 | 4254 | ||
4255 | cached = block_group_cache_done(block_group); | ||
4256 | if (unlikely(!cached)) | ||
4257 | found_uncached_bg = true; | ||
4258 | |||
3844 | if (unlikely(block_group->ro)) | 4259 | if (unlikely(block_group->ro)) |
3845 | goto loop; | 4260 | goto loop; |
3846 | 4261 | ||
@@ -3951,14 +4366,23 @@ refill_cluster: | |||
3951 | 4366 | ||
3952 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 4367 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
3953 | num_bytes, empty_size); | 4368 | num_bytes, empty_size); |
3954 | if (!offset && (cached || (!cached && | 4369 | /* |
3955 | loop == LOOP_CACHING_NOWAIT))) { | 4370 | * If we didn't find a chunk, and we haven't failed on this |
3956 | goto loop; | 4371 | * block group before, and this block group is in the middle of |
3957 | } else if (!offset && (!cached && | 4372 | * caching and we are ok with waiting, then go ahead and wait |
3958 | loop > LOOP_CACHING_NOWAIT)) { | 4373 | * for progress to be made, and set failed_alloc to true. |
4374 | * | ||
4375 | * If failed_alloc is true then we've already waited on this | ||
4376 | * block group once and should move on to the next block group. | ||
4377 | */ | ||
4378 | if (!offset && !failed_alloc && !cached && | ||
4379 | loop > LOOP_CACHING_NOWAIT) { | ||
3959 | wait_block_group_cache_progress(block_group, | 4380 | wait_block_group_cache_progress(block_group, |
3960 | num_bytes + empty_size); | 4381 | num_bytes + empty_size); |
4382 | failed_alloc = true; | ||
3961 | goto have_block_group; | 4383 | goto have_block_group; |
4384 | } else if (!offset) { | ||
4385 | goto loop; | ||
3962 | } | 4386 | } |
3963 | checks: | 4387 | checks: |
3964 | search_start = stripe_align(root, offset); | 4388 | search_start = stripe_align(root, offset); |
@@ -4006,13 +4430,16 @@ checks: | |||
4006 | break; | 4430 | break; |
4007 | loop: | 4431 | loop: |
4008 | failed_cluster_refill = false; | 4432 | failed_cluster_refill = false; |
4433 | failed_alloc = false; | ||
4009 | btrfs_put_block_group(block_group); | 4434 | btrfs_put_block_group(block_group); |
4010 | } | 4435 | } |
4011 | up_read(&space_info->groups_sem); | 4436 | up_read(&space_info->groups_sem); |
4012 | 4437 | ||
4013 | /* LOOP_CACHED_ONLY, only search fully cached block groups | 4438 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
4014 | * LOOP_CACHING_NOWAIT, search partially cached block groups, but | 4439 | * for them to make caching progress. Also |
4015 | * dont wait foR them to finish caching | 4440 | * determine the best possible bg to cache |
4441 | * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking | ||
4442 | * caching kthreads as we move along | ||
4016 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching | 4443 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching |
4017 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again | 4444 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again |
4018 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | 4445 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
@@ -4021,12 +4448,47 @@ loop: | |||
4021 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4448 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
4022 | (found_uncached_bg || empty_size || empty_cluster || | 4449 | (found_uncached_bg || empty_size || empty_cluster || |
4023 | allowed_chunk_alloc)) { | 4450 | allowed_chunk_alloc)) { |
4024 | if (found_uncached_bg) { | 4451 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
4025 | found_uncached_bg = false; | 4452 | found_uncached_bg = false; |
4026 | if (loop < LOOP_CACHING_WAIT) { | 4453 | loop++; |
4027 | loop++; | 4454 | if (!ideal_cache_percent && |
4455 | atomic_read(&space_info->caching_threads)) | ||
4028 | goto search; | 4456 | goto search; |
4029 | } | 4457 | |
4458 | /* | ||
4459 | * 1 of the following 2 things have happened so far | ||
4460 | * | ||
4461 | * 1) We found an ideal block group for caching that | ||
4462 | * is mostly full and will cache quickly, so we might | ||
4463 | * as well wait for it. | ||
4464 | * | ||
4465 | * 2) We searched for cached only and we didn't find | ||
4466 | * anything, and we didn't start any caching kthreads | ||
4467 | * either, so chances are we will loop through and | ||
4468 | * start a couple caching kthreads, and then come back | ||
4469 | * around and just wait for them. This will be slower | ||
4470 | * because we will have 2 caching kthreads reading at | ||
4471 | * the same time when we could have just started one | ||
4472 | * and waited for it to get far enough to give us an | ||
4473 | * allocation, so go ahead and go to the wait caching | ||
4474 | * loop. | ||
4475 | */ | ||
4476 | loop = LOOP_CACHING_WAIT; | ||
4477 | search_start = ideal_cache_offset; | ||
4478 | ideal_cache_percent = 0; | ||
4479 | goto ideal_cache; | ||
4480 | } else if (loop == LOOP_FIND_IDEAL) { | ||
4481 | /* | ||
4482 | * Didn't find a uncached bg, wait on anything we find | ||
4483 | * next. | ||
4484 | */ | ||
4485 | loop = LOOP_CACHING_WAIT; | ||
4486 | goto search; | ||
4487 | } | ||
4488 | |||
4489 | if (loop < LOOP_CACHING_WAIT) { | ||
4490 | loop++; | ||
4491 | goto search; | ||
4030 | } | 4492 | } |
4031 | 4493 | ||
4032 | if (loop == LOOP_ALLOC_CHUNK) { | 4494 | if (loop == LOOP_ALLOC_CHUNK) { |
@@ -4038,7 +4500,8 @@ loop: | |||
4038 | ret = do_chunk_alloc(trans, root, num_bytes + | 4500 | ret = do_chunk_alloc(trans, root, num_bytes + |
4039 | 2 * 1024 * 1024, data, 1); | 4501 | 2 * 1024 * 1024, data, 1); |
4040 | allowed_chunk_alloc = 0; | 4502 | allowed_chunk_alloc = 0; |
4041 | } else { | 4503 | done_chunk_alloc = 1; |
4504 | } else if (!done_chunk_alloc) { | ||
4042 | space_info->force_alloc = 1; | 4505 | space_info->force_alloc = 1; |
4043 | } | 4506 | } |
4044 | 4507 | ||
@@ -4063,21 +4526,32 @@ loop: | |||
4063 | return ret; | 4526 | return ret; |
4064 | } | 4527 | } |
4065 | 4528 | ||
4066 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 4529 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
4530 | int dump_block_groups) | ||
4067 | { | 4531 | { |
4068 | struct btrfs_block_group_cache *cache; | 4532 | struct btrfs_block_group_cache *cache; |
4069 | 4533 | ||
4534 | spin_lock(&info->lock); | ||
4070 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4535 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
4071 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4536 | (unsigned long long)(info->total_bytes - info->bytes_used - |
4072 | info->bytes_pinned - info->bytes_reserved), | 4537 | info->bytes_pinned - info->bytes_reserved - |
4538 | info->bytes_super), | ||
4073 | (info->full) ? "" : "not "); | 4539 | (info->full) ? "" : "not "); |
4074 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4540 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
4075 | " may_use=%llu, used=%llu\n", | 4541 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" |
4542 | "\n", | ||
4076 | (unsigned long long)info->total_bytes, | 4543 | (unsigned long long)info->total_bytes, |
4077 | (unsigned long long)info->bytes_pinned, | 4544 | (unsigned long long)info->bytes_pinned, |
4078 | (unsigned long long)info->bytes_delalloc, | 4545 | (unsigned long long)info->bytes_delalloc, |
4079 | (unsigned long long)info->bytes_may_use, | 4546 | (unsigned long long)info->bytes_may_use, |
4080 | (unsigned long long)info->bytes_used); | 4547 | (unsigned long long)info->bytes_used, |
4548 | (unsigned long long)info->bytes_root, | ||
4549 | (unsigned long long)info->bytes_super, | ||
4550 | (unsigned long long)info->bytes_reserved); | ||
4551 | spin_unlock(&info->lock); | ||
4552 | |||
4553 | if (!dump_block_groups) | ||
4554 | return; | ||
4081 | 4555 | ||
4082 | down_read(&info->groups_sem); | 4556 | down_read(&info->groups_sem); |
4083 | list_for_each_entry(cache, &info->block_groups, list) { | 4557 | list_for_each_entry(cache, &info->block_groups, list) { |
@@ -4145,7 +4619,7 @@ again: | |||
4145 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 4619 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
4146 | "wanted %llu\n", (unsigned long long)data, | 4620 | "wanted %llu\n", (unsigned long long)data, |
4147 | (unsigned long long)num_bytes); | 4621 | (unsigned long long)num_bytes); |
4148 | dump_space_info(sinfo, num_bytes); | 4622 | dump_space_info(sinfo, num_bytes, 1); |
4149 | } | 4623 | } |
4150 | 4624 | ||
4151 | return ret; | 4625 | return ret; |
@@ -4506,6 +4980,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
4506 | u64 bytenr; | 4980 | u64 bytenr; |
4507 | u64 generation; | 4981 | u64 generation; |
4508 | u64 refs; | 4982 | u64 refs; |
4983 | u64 flags; | ||
4509 | u64 last = 0; | 4984 | u64 last = 0; |
4510 | u32 nritems; | 4985 | u32 nritems; |
4511 | u32 blocksize; | 4986 | u32 blocksize; |
@@ -4543,15 +5018,19 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
4543 | generation <= root->root_key.offset) | 5018 | generation <= root->root_key.offset) |
4544 | continue; | 5019 | continue; |
4545 | 5020 | ||
5021 | /* We don't lock the tree block, it's OK to be racy here */ | ||
5022 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
5023 | &refs, &flags); | ||
5024 | BUG_ON(ret); | ||
5025 | BUG_ON(refs == 0); | ||
5026 | |||
4546 | if (wc->stage == DROP_REFERENCE) { | 5027 | if (wc->stage == DROP_REFERENCE) { |
4547 | ret = btrfs_lookup_extent_info(trans, root, | ||
4548 | bytenr, blocksize, | ||
4549 | &refs, NULL); | ||
4550 | BUG_ON(ret); | ||
4551 | BUG_ON(refs == 0); | ||
4552 | if (refs == 1) | 5028 | if (refs == 1) |
4553 | goto reada; | 5029 | goto reada; |
4554 | 5030 | ||
5031 | if (wc->level == 1 && | ||
5032 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5033 | continue; | ||
4555 | if (!wc->update_ref || | 5034 | if (!wc->update_ref || |
4556 | generation <= root->root_key.offset) | 5035 | generation <= root->root_key.offset) |
4557 | continue; | 5036 | continue; |
@@ -4560,6 +5039,10 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, | |||
4560 | &wc->update_progress); | 5039 | &wc->update_progress); |
4561 | if (ret < 0) | 5040 | if (ret < 0) |
4562 | continue; | 5041 | continue; |
5042 | } else { | ||
5043 | if (wc->level == 1 && | ||
5044 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5045 | continue; | ||
4563 | } | 5046 | } |
4564 | reada: | 5047 | reada: |
4565 | ret = readahead_tree_block(root, bytenr, blocksize, | 5048 | ret = readahead_tree_block(root, bytenr, blocksize, |
@@ -4583,7 +5066,7 @@ reada: | |||
4583 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | 5066 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, |
4584 | struct btrfs_root *root, | 5067 | struct btrfs_root *root, |
4585 | struct btrfs_path *path, | 5068 | struct btrfs_path *path, |
4586 | struct walk_control *wc) | 5069 | struct walk_control *wc, int lookup_info) |
4587 | { | 5070 | { |
4588 | int level = wc->level; | 5071 | int level = wc->level; |
4589 | struct extent_buffer *eb = path->nodes[level]; | 5072 | struct extent_buffer *eb = path->nodes[level]; |
@@ -4598,8 +5081,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4598 | * when reference count of tree block is 1, it won't increase | 5081 | * when reference count of tree block is 1, it won't increase |
4599 | * again. once full backref flag is set, we never clear it. | 5082 | * again. once full backref flag is set, we never clear it. |
4600 | */ | 5083 | */ |
4601 | if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || | 5084 | if (lookup_info && |
4602 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { | 5085 | ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || |
5086 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { | ||
4603 | BUG_ON(!path->locks[level]); | 5087 | BUG_ON(!path->locks[level]); |
4604 | ret = btrfs_lookup_extent_info(trans, root, | 5088 | ret = btrfs_lookup_extent_info(trans, root, |
4605 | eb->start, eb->len, | 5089 | eb->start, eb->len, |
@@ -4660,7 +5144,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4660 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | 5144 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, |
4661 | struct btrfs_root *root, | 5145 | struct btrfs_root *root, |
4662 | struct btrfs_path *path, | 5146 | struct btrfs_path *path, |
4663 | struct walk_control *wc) | 5147 | struct walk_control *wc, int *lookup_info) |
4664 | { | 5148 | { |
4665 | u64 bytenr; | 5149 | u64 bytenr; |
4666 | u64 generation; | 5150 | u64 generation; |
@@ -4680,8 +5164,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4680 | * for the subtree | 5164 | * for the subtree |
4681 | */ | 5165 | */ |
4682 | if (wc->stage == UPDATE_BACKREF && | 5166 | if (wc->stage == UPDATE_BACKREF && |
4683 | generation <= root->root_key.offset) | 5167 | generation <= root->root_key.offset) { |
5168 | *lookup_info = 1; | ||
4684 | return 1; | 5169 | return 1; |
5170 | } | ||
4685 | 5171 | ||
4686 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | 5172 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); |
4687 | blocksize = btrfs_level_size(root, level - 1); | 5173 | blocksize = btrfs_level_size(root, level - 1); |
@@ -4694,14 +5180,19 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4694 | btrfs_tree_lock(next); | 5180 | btrfs_tree_lock(next); |
4695 | btrfs_set_lock_blocking(next); | 5181 | btrfs_set_lock_blocking(next); |
4696 | 5182 | ||
4697 | if (wc->stage == DROP_REFERENCE) { | 5183 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, |
4698 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | 5184 | &wc->refs[level - 1], |
4699 | &wc->refs[level - 1], | 5185 | &wc->flags[level - 1]); |
4700 | &wc->flags[level - 1]); | 5186 | BUG_ON(ret); |
4701 | BUG_ON(ret); | 5187 | BUG_ON(wc->refs[level - 1] == 0); |
4702 | BUG_ON(wc->refs[level - 1] == 0); | 5188 | *lookup_info = 0; |
4703 | 5189 | ||
5190 | if (wc->stage == DROP_REFERENCE) { | ||
4704 | if (wc->refs[level - 1] > 1) { | 5191 | if (wc->refs[level - 1] > 1) { |
5192 | if (level == 1 && | ||
5193 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5194 | goto skip; | ||
5195 | |||
4705 | if (!wc->update_ref || | 5196 | if (!wc->update_ref || |
4706 | generation <= root->root_key.offset) | 5197 | generation <= root->root_key.offset) |
4707 | goto skip; | 5198 | goto skip; |
@@ -4715,12 +5206,17 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4715 | wc->stage = UPDATE_BACKREF; | 5206 | wc->stage = UPDATE_BACKREF; |
4716 | wc->shared_level = level - 1; | 5207 | wc->shared_level = level - 1; |
4717 | } | 5208 | } |
5209 | } else { | ||
5210 | if (level == 1 && | ||
5211 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5212 | goto skip; | ||
4718 | } | 5213 | } |
4719 | 5214 | ||
4720 | if (!btrfs_buffer_uptodate(next, generation)) { | 5215 | if (!btrfs_buffer_uptodate(next, generation)) { |
4721 | btrfs_tree_unlock(next); | 5216 | btrfs_tree_unlock(next); |
4722 | free_extent_buffer(next); | 5217 | free_extent_buffer(next); |
4723 | next = NULL; | 5218 | next = NULL; |
5219 | *lookup_info = 1; | ||
4724 | } | 5220 | } |
4725 | 5221 | ||
4726 | if (!next) { | 5222 | if (!next) { |
@@ -4743,21 +5239,22 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
4743 | skip: | 5239 | skip: |
4744 | wc->refs[level - 1] = 0; | 5240 | wc->refs[level - 1] = 0; |
4745 | wc->flags[level - 1] = 0; | 5241 | wc->flags[level - 1] = 0; |
5242 | if (wc->stage == DROP_REFERENCE) { | ||
5243 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
5244 | parent = path->nodes[level]->start; | ||
5245 | } else { | ||
5246 | BUG_ON(root->root_key.objectid != | ||
5247 | btrfs_header_owner(path->nodes[level])); | ||
5248 | parent = 0; | ||
5249 | } | ||
4746 | 5250 | ||
4747 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | 5251 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, |
4748 | parent = path->nodes[level]->start; | 5252 | root->root_key.objectid, level - 1, 0); |
4749 | } else { | 5253 | BUG_ON(ret); |
4750 | BUG_ON(root->root_key.objectid != | ||
4751 | btrfs_header_owner(path->nodes[level])); | ||
4752 | parent = 0; | ||
4753 | } | 5254 | } |
4754 | |||
4755 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
4756 | root->root_key.objectid, level - 1, 0); | ||
4757 | BUG_ON(ret); | ||
4758 | |||
4759 | btrfs_tree_unlock(next); | 5255 | btrfs_tree_unlock(next); |
4760 | free_extent_buffer(next); | 5256 | free_extent_buffer(next); |
5257 | *lookup_info = 1; | ||
4761 | return 1; | 5258 | return 1; |
4762 | } | 5259 | } |
4763 | 5260 | ||
@@ -4871,6 +5368,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4871 | struct walk_control *wc) | 5368 | struct walk_control *wc) |
4872 | { | 5369 | { |
4873 | int level = wc->level; | 5370 | int level = wc->level; |
5371 | int lookup_info = 1; | ||
4874 | int ret; | 5372 | int ret; |
4875 | 5373 | ||
4876 | while (level >= 0) { | 5374 | while (level >= 0) { |
@@ -4878,14 +5376,14 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4878 | btrfs_header_nritems(path->nodes[level])) | 5376 | btrfs_header_nritems(path->nodes[level])) |
4879 | break; | 5377 | break; |
4880 | 5378 | ||
4881 | ret = walk_down_proc(trans, root, path, wc); | 5379 | ret = walk_down_proc(trans, root, path, wc, lookup_info); |
4882 | if (ret > 0) | 5380 | if (ret > 0) |
4883 | break; | 5381 | break; |
4884 | 5382 | ||
4885 | if (level == 0) | 5383 | if (level == 0) |
4886 | break; | 5384 | break; |
4887 | 5385 | ||
4888 | ret = do_walk_down(trans, root, path, wc); | 5386 | ret = do_walk_down(trans, root, path, wc, &lookup_info); |
4889 | if (ret > 0) { | 5387 | if (ret > 0) { |
4890 | path->slots[level]++; | 5388 | path->slots[level]++; |
4891 | continue; | 5389 | continue; |