diff options
| -rw-r--r-- | fs/btrfs/btrfs_inode.h | 8 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 23 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 389 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 92 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 13 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 11 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 224 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 21 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 10 |
10 files changed, 678 insertions, 115 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 82ee56bba299..a54d354cefcb 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -128,6 +128,14 @@ struct btrfs_inode { | |||
| 128 | u64 last_unlink_trans; | 128 | u64 last_unlink_trans; |
| 129 | 129 | ||
| 130 | /* | 130 | /* |
| 131 | * These two counters are for delalloc metadata reservations. We keep | ||
| 132 | * track of how many extents we've accounted for vs how many extents we | ||
| 133 | * have. | ||
| 134 | */ | ||
| 135 | int delalloc_reserved_extents; | ||
| 136 | int delalloc_extents; | ||
| 137 | |||
| 138 | /* | ||
| 131 | * ordered_data_close is set by truncate when a file that used | 139 | * ordered_data_close is set by truncate when a file that used |
| 132 | * to have good data has been truncated to zero. When it is set | 140 | * to have good data has been truncated to zero. When it is set |
| 133 | * the btrfs file release call will add this inode to the | 141 | * the btrfs file release call will add this inode to the |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80599b4e42bd..b3959a150c3b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -675,18 +675,19 @@ struct btrfs_space_info { | |||
| 675 | current allocations */ | 675 | current allocations */ |
| 676 | u64 bytes_readonly; /* total bytes that are read only */ | 676 | u64 bytes_readonly; /* total bytes that are read only */ |
| 677 | u64 bytes_super; /* total bytes reserved for the super blocks */ | 677 | u64 bytes_super; /* total bytes reserved for the super blocks */ |
| 678 | 678 | u64 bytes_root; /* the number of bytes needed to commit a | |
| 679 | /* delalloc accounting */ | 679 | transaction */ |
| 680 | u64 bytes_delalloc; /* number of bytes reserved for allocation, | ||
| 681 | this space is not necessarily reserved yet | ||
| 682 | by the allocator */ | ||
| 683 | u64 bytes_may_use; /* number of bytes that may be used for | 680 | u64 bytes_may_use; /* number of bytes that may be used for |
| 684 | delalloc */ | 681 | delalloc/allocations */ |
| 682 | u64 bytes_delalloc; /* number of bytes currently reserved for | ||
| 683 | delayed allocation */ | ||
| 685 | 684 | ||
| 686 | int full; /* indicates that we cannot allocate any more | 685 | int full; /* indicates that we cannot allocate any more |
| 687 | chunks for this space */ | 686 | chunks for this space */ |
| 688 | int force_alloc; /* set if we need to force a chunk alloc for | 687 | int force_alloc; /* set if we need to force a chunk alloc for |
| 689 | this space */ | 688 | this space */ |
| 689 | int force_delalloc; /* make people start doing filemap_flush until | ||
| 690 | we're under a threshold */ | ||
| 690 | 691 | ||
| 691 | struct list_head list; | 692 | struct list_head list; |
| 692 | 693 | ||
| @@ -695,6 +696,9 @@ struct btrfs_space_info { | |||
| 695 | spinlock_t lock; | 696 | spinlock_t lock; |
| 696 | struct rw_semaphore groups_sem; | 697 | struct rw_semaphore groups_sem; |
| 697 | atomic_t caching_threads; | 698 | atomic_t caching_threads; |
| 699 | |||
| 700 | int allocating_chunk; | ||
| 701 | wait_queue_head_t wait; | ||
| 698 | }; | 702 | }; |
| 699 | 703 | ||
| 700 | /* | 704 | /* |
| @@ -2022,7 +2026,12 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | |||
| 2022 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2026 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
| 2023 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2027 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| 2024 | 2028 | ||
| 2025 | int btrfs_check_metadata_free_space(struct btrfs_root *root); | 2029 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); |
| 2030 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | ||
| 2031 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2032 | struct inode *inode, int num_items); | ||
| 2033 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2034 | struct inode *inode, int num_items); | ||
| 2026 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2035 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, |
| 2027 | u64 bytes); | 2036 | u64 bytes); |
| 2028 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2037 | void btrfs_free_reserved_data_space(struct btrfs_root *root, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f1e905f7e701..ece8d1e26b5e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -1629,7 +1629,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1629 | fs_info->sb = sb; | 1629 | fs_info->sb = sb; |
| 1630 | fs_info->max_extent = (u64)-1; | 1630 | fs_info->max_extent = (u64)-1; |
| 1631 | fs_info->max_inline = 8192 * 1024; | 1631 | fs_info->max_inline = 8192 * 1024; |
| 1632 | fs_info->metadata_ratio = 8; | 1632 | fs_info->metadata_ratio = 0; |
| 1633 | 1633 | ||
| 1634 | fs_info->thread_pool_size = min_t(unsigned long, | 1634 | fs_info->thread_pool_size = min_t(unsigned long, |
| 1635 | num_online_cpus() + 2, 8); | 1635 | num_online_cpus() + 2, 8); |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 90d314eeff6d..a4b2b03cd682 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
| 68 | struct extent_buffer **must_clean); | 68 | struct extent_buffer **must_clean); |
| 69 | static int find_next_key(struct btrfs_path *path, int level, | 69 | static int find_next_key(struct btrfs_path *path, int level, |
| 70 | struct btrfs_key *key); | 70 | struct btrfs_key *key); |
| 71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | ||
| 72 | int dump_block_groups); | ||
| 71 | 73 | ||
| 72 | static noinline int | 74 | static noinline int |
| 73 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 75 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
| @@ -2764,67 +2766,346 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | |||
| 2764 | alloc_target); | 2766 | alloc_target); |
| 2765 | } | 2767 | } |
| 2766 | 2768 | ||
| 2769 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
| 2770 | { | ||
| 2771 | u64 num_bytes; | ||
| 2772 | int level; | ||
| 2773 | |||
| 2774 | level = BTRFS_MAX_LEVEL - 2; | ||
| 2775 | /* | ||
| 2776 | * NOTE: these calculations are absolutely the worst possible case. | ||
| 2777 | * This assumes that _every_ item we insert will require a new leaf, and | ||
| 2778 | * that the tree has grown to its maximum level size. | ||
| 2779 | */ | ||
| 2780 | |||
| 2781 | /* | ||
| 2782 | * for every item we insert we could insert both an extent item and a | ||
| 2783 | * extent ref item. Then for ever item we insert, we will need to cow | ||
| 2784 | * both the original leaf, plus the leaf to the left and right of it. | ||
| 2785 | * | ||
| 2786 | * Unless we are talking about the extent root, then we just want the | ||
| 2787 | * number of items * 2, since we just need the extent item plus its ref. | ||
| 2788 | */ | ||
| 2789 | if (root == root->fs_info->extent_root) | ||
| 2790 | num_bytes = num_items * 2; | ||
| 2791 | else | ||
| 2792 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
| 2793 | |||
| 2794 | /* | ||
| 2795 | * num_bytes is total number of leaves we could need times the leaf | ||
| 2796 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
| 2797 | * level, down to the leaf level. | ||
| 2798 | */ | ||
| 2799 | num_bytes = (num_bytes * root->leafsize) + | ||
| 2800 | (num_bytes * (level * 2)) * root->nodesize; | ||
| 2801 | |||
| 2802 | return num_bytes; | ||
| 2803 | } | ||
| 2804 | |||
| 2767 | /* | 2805 | /* |
| 2768 | * for now this just makes sure we have at least 5% of our metadata space free | 2806 | * Unreserve metadata space for delalloc. If we have less reserved credits than |
| 2769 | * for use. | 2807 | * we have extents, this function does nothing. |
| 2770 | */ | 2808 | */ |
| 2771 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | 2809 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, |
| 2810 | struct inode *inode, int num_items) | ||
| 2772 | { | 2811 | { |
| 2773 | struct btrfs_fs_info *info = root->fs_info; | 2812 | struct btrfs_fs_info *info = root->fs_info; |
| 2774 | struct btrfs_space_info *meta_sinfo; | 2813 | struct btrfs_space_info *meta_sinfo; |
| 2775 | u64 alloc_target, thresh; | 2814 | u64 num_bytes; |
| 2776 | int committed = 0, ret; | 2815 | u64 alloc_target; |
| 2816 | bool bug = false; | ||
| 2777 | 2817 | ||
| 2778 | /* get the space info for where the metadata will live */ | 2818 | /* get the space info for where the metadata will live */ |
| 2779 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2819 | alloc_target = btrfs_get_alloc_profile(root, 0); |
| 2780 | meta_sinfo = __find_space_info(info, alloc_target); | 2820 | meta_sinfo = __find_space_info(info, alloc_target); |
| 2781 | if (!meta_sinfo) | ||
| 2782 | goto alloc; | ||
| 2783 | 2821 | ||
| 2784 | again: | 2822 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, |
| 2823 | num_items); | ||
| 2824 | |||
| 2785 | spin_lock(&meta_sinfo->lock); | 2825 | spin_lock(&meta_sinfo->lock); |
| 2786 | if (!meta_sinfo->full) | 2826 | if (BTRFS_I(inode)->delalloc_reserved_extents <= |
| 2787 | thresh = meta_sinfo->total_bytes * 80; | 2827 | BTRFS_I(inode)->delalloc_extents) { |
| 2788 | else | 2828 | spin_unlock(&meta_sinfo->lock); |
| 2789 | thresh = meta_sinfo->total_bytes * 95; | 2829 | return 0; |
| 2830 | } | ||
| 2831 | |||
| 2832 | BTRFS_I(inode)->delalloc_reserved_extents--; | ||
| 2833 | BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); | ||
| 2834 | |||
| 2835 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
| 2836 | bug = true; | ||
| 2837 | meta_sinfo->bytes_delalloc = 0; | ||
| 2838 | } else { | ||
| 2839 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2840 | } | ||
| 2841 | spin_unlock(&meta_sinfo->lock); | ||
| 2790 | 2842 | ||
| 2843 | BUG_ON(bug); | ||
| 2844 | |||
| 2845 | return 0; | ||
| 2846 | } | ||
| 2847 | |||
| 2848 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
| 2849 | { | ||
| 2850 | u64 thresh; | ||
| 2851 | |||
| 2852 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 2853 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 2854 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 2855 | meta_sinfo->bytes_may_use; | ||
| 2856 | |||
| 2857 | thresh = meta_sinfo->total_bytes - thresh; | ||
| 2858 | thresh *= 80; | ||
| 2791 | do_div(thresh, 100); | 2859 | do_div(thresh, 100); |
| 2860 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
| 2861 | meta_sinfo->force_delalloc = 1; | ||
| 2862 | else | ||
| 2863 | meta_sinfo->force_delalloc = 0; | ||
| 2864 | } | ||
| 2792 | 2865 | ||
| 2793 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2866 | static int maybe_allocate_chunk(struct btrfs_root *root, |
| 2794 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 2867 | struct btrfs_space_info *info) |
| 2795 | meta_sinfo->bytes_super > thresh) { | 2868 | { |
| 2796 | struct btrfs_trans_handle *trans; | 2869 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; |
| 2797 | if (!meta_sinfo->full) { | 2870 | struct btrfs_trans_handle *trans; |
| 2798 | meta_sinfo->force_alloc = 1; | 2871 | bool wait = false; |
| 2872 | int ret = 0; | ||
| 2873 | u64 min_metadata; | ||
| 2874 | u64 free_space; | ||
| 2875 | |||
| 2876 | free_space = btrfs_super_total_bytes(disk_super); | ||
| 2877 | /* | ||
| 2878 | * we allow the metadata to grow to a max of either 5gb or 5% of the | ||
| 2879 | * space in the volume. | ||
| 2880 | */ | ||
| 2881 | min_metadata = min((u64)5 * 1024 * 1024 * 1024, | ||
| 2882 | div64_u64(free_space * 5, 100)); | ||
| 2883 | if (info->total_bytes >= min_metadata) { | ||
| 2884 | spin_unlock(&info->lock); | ||
| 2885 | return 0; | ||
| 2886 | } | ||
| 2887 | |||
| 2888 | if (info->full) { | ||
| 2889 | spin_unlock(&info->lock); | ||
| 2890 | return 0; | ||
| 2891 | } | ||
| 2892 | |||
| 2893 | if (!info->allocating_chunk) { | ||
| 2894 | info->force_alloc = 1; | ||
| 2895 | info->allocating_chunk = 1; | ||
| 2896 | init_waitqueue_head(&info->wait); | ||
| 2897 | } else { | ||
| 2898 | wait = true; | ||
| 2899 | } | ||
| 2900 | |||
| 2901 | spin_unlock(&info->lock); | ||
| 2902 | |||
| 2903 | if (wait) { | ||
| 2904 | wait_event(info->wait, | ||
| 2905 | !info->allocating_chunk); | ||
| 2906 | return 1; | ||
| 2907 | } | ||
| 2908 | |||
| 2909 | trans = btrfs_start_transaction(root, 1); | ||
| 2910 | if (!trans) { | ||
| 2911 | ret = -ENOMEM; | ||
| 2912 | goto out; | ||
| 2913 | } | ||
| 2914 | |||
| 2915 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 2916 | 4096 + 2 * 1024 * 1024, | ||
| 2917 | info->flags, 0); | ||
| 2918 | btrfs_end_transaction(trans, root); | ||
| 2919 | if (ret) | ||
| 2920 | goto out; | ||
| 2921 | out: | ||
| 2922 | spin_lock(&info->lock); | ||
| 2923 | info->allocating_chunk = 0; | ||
| 2924 | spin_unlock(&info->lock); | ||
| 2925 | wake_up(&info->wait); | ||
| 2926 | |||
| 2927 | if (ret) | ||
| 2928 | return 0; | ||
| 2929 | return 1; | ||
| 2930 | } | ||
| 2931 | |||
| 2932 | /* | ||
| 2933 | * Reserve metadata space for delalloc. | ||
| 2934 | */ | ||
| 2935 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2936 | struct inode *inode, int num_items) | ||
| 2937 | { | ||
| 2938 | struct btrfs_fs_info *info = root->fs_info; | ||
| 2939 | struct btrfs_space_info *meta_sinfo; | ||
| 2940 | u64 num_bytes; | ||
| 2941 | u64 used; | ||
| 2942 | u64 alloc_target; | ||
| 2943 | int flushed = 0; | ||
| 2944 | int force_delalloc; | ||
| 2945 | |||
| 2946 | /* get the space info for where the metadata will live */ | ||
| 2947 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 2948 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 2949 | |||
| 2950 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
| 2951 | num_items); | ||
| 2952 | again: | ||
| 2953 | spin_lock(&meta_sinfo->lock); | ||
| 2954 | |||
| 2955 | force_delalloc = meta_sinfo->force_delalloc; | ||
| 2956 | |||
| 2957 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 2958 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 2959 | |||
| 2960 | if (!flushed) | ||
| 2961 | meta_sinfo->bytes_delalloc += num_bytes; | ||
| 2962 | |||
| 2963 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 2964 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 2965 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 2966 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 2967 | |||
| 2968 | if (used > meta_sinfo->total_bytes) { | ||
| 2969 | flushed++; | ||
| 2970 | |||
| 2971 | if (flushed == 1) { | ||
| 2972 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 2973 | goto again; | ||
| 2974 | flushed++; | ||
| 2975 | } else { | ||
| 2799 | spin_unlock(&meta_sinfo->lock); | 2976 | spin_unlock(&meta_sinfo->lock); |
| 2800 | alloc: | 2977 | } |
| 2801 | trans = btrfs_start_transaction(root, 1); | ||
| 2802 | if (!trans) | ||
| 2803 | return -ENOMEM; | ||
| 2804 | 2978 | ||
| 2805 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2979 | if (flushed == 2) { |
| 2806 | 2 * 1024 * 1024, alloc_target, 0); | 2980 | filemap_flush(inode->i_mapping); |
| 2807 | btrfs_end_transaction(trans, root); | 2981 | goto again; |
| 2808 | if (!meta_sinfo) { | 2982 | } else if (flushed == 3) { |
| 2809 | meta_sinfo = __find_space_info(info, | 2983 | btrfs_start_delalloc_inodes(root); |
| 2810 | alloc_target); | 2984 | btrfs_wait_ordered_extents(root, 0); |
| 2811 | } | ||
| 2812 | goto again; | 2985 | goto again; |
| 2813 | } | 2986 | } |
| 2987 | spin_lock(&meta_sinfo->lock); | ||
| 2988 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2814 | spin_unlock(&meta_sinfo->lock); | 2989 | spin_unlock(&meta_sinfo->lock); |
| 2990 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
| 2991 | BTRFS_I(inode)->delalloc_extents, | ||
| 2992 | BTRFS_I(inode)->delalloc_reserved_extents); | ||
| 2993 | dump_space_info(meta_sinfo, 0, 0); | ||
| 2994 | return -ENOSPC; | ||
| 2995 | } | ||
| 2815 | 2996 | ||
| 2816 | if (!committed) { | 2997 | BTRFS_I(inode)->delalloc_reserved_extents++; |
| 2817 | committed = 1; | 2998 | check_force_delalloc(meta_sinfo); |
| 2818 | trans = btrfs_join_transaction(root, 1); | 2999 | spin_unlock(&meta_sinfo->lock); |
| 2819 | if (!trans) | 3000 | |
| 2820 | return -ENOMEM; | 3001 | if (!flushed && force_delalloc) |
| 2821 | ret = btrfs_commit_transaction(trans, root); | 3002 | filemap_flush(inode->i_mapping); |
| 2822 | if (ret) | 3003 | |
| 2823 | return ret; | 3004 | return 0; |
| 3005 | } | ||
| 3006 | |||
| 3007 | /* | ||
| 3008 | * unreserve num_items number of items worth of metadata space. This needs to | ||
| 3009 | * be paired with btrfs_reserve_metadata_space. | ||
| 3010 | * | ||
| 3011 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
| 3012 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
| 3013 | * oprations which will result in more used metadata, so we want to make sure we | ||
| 3014 | * can do that without issue. | ||
| 3015 | */ | ||
| 3016 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3017 | { | ||
| 3018 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3019 | struct btrfs_space_info *meta_sinfo; | ||
| 3020 | u64 num_bytes; | ||
| 3021 | u64 alloc_target; | ||
| 3022 | bool bug = false; | ||
| 3023 | |||
| 3024 | /* get the space info for where the metadata will live */ | ||
| 3025 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3026 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3027 | |||
| 3028 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3029 | |||
| 3030 | spin_lock(&meta_sinfo->lock); | ||
| 3031 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
| 3032 | bug = true; | ||
| 3033 | meta_sinfo->bytes_may_use = 0; | ||
| 3034 | } else { | ||
| 3035 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3036 | } | ||
| 3037 | spin_unlock(&meta_sinfo->lock); | ||
| 3038 | |||
| 3039 | BUG_ON(bug); | ||
| 3040 | |||
| 3041 | return 0; | ||
| 3042 | } | ||
| 3043 | |||
| 3044 | /* | ||
| 3045 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
| 3046 | * of bytes that would be needed to modify num_items number of items. If we | ||
| 3047 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
| 3048 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
| 3049 | * items you reserved, since whatever metadata you needed should have already | ||
| 3050 | * been allocated. | ||
| 3051 | * | ||
| 3052 | * This will commit the transaction to make more space if we don't have enough | ||
| 3053 | * metadata space. THe only time we don't do this is if we're reserving space | ||
| 3054 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
| 3055 | * callers responsibility to handle it properly. | ||
| 3056 | */ | ||
| 3057 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3058 | { | ||
| 3059 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3060 | struct btrfs_space_info *meta_sinfo; | ||
| 3061 | u64 num_bytes; | ||
| 3062 | u64 used; | ||
| 3063 | u64 alloc_target; | ||
| 3064 | int retries = 0; | ||
| 3065 | |||
| 3066 | /* get the space info for where the metadata will live */ | ||
| 3067 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3068 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3069 | |||
| 3070 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3071 | again: | ||
| 3072 | spin_lock(&meta_sinfo->lock); | ||
| 3073 | |||
| 3074 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3075 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3076 | |||
| 3077 | if (!retries) | ||
| 3078 | meta_sinfo->bytes_may_use += num_bytes; | ||
| 3079 | |||
| 3080 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3081 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3082 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3083 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3084 | |||
| 3085 | if (used > meta_sinfo->total_bytes) { | ||
| 3086 | retries++; | ||
| 3087 | if (retries == 1) { | ||
| 3088 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3089 | goto again; | ||
| 3090 | retries++; | ||
| 3091 | } else { | ||
| 3092 | spin_unlock(&meta_sinfo->lock); | ||
| 3093 | } | ||
| 3094 | |||
| 3095 | if (retries == 2) { | ||
| 3096 | btrfs_start_delalloc_inodes(root); | ||
| 3097 | btrfs_wait_ordered_extents(root, 0); | ||
| 2824 | goto again; | 3098 | goto again; |
| 2825 | } | 3099 | } |
| 3100 | spin_lock(&meta_sinfo->lock); | ||
| 3101 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3102 | spin_unlock(&meta_sinfo->lock); | ||
| 3103 | |||
| 3104 | dump_space_info(meta_sinfo, 0, 0); | ||
| 2826 | return -ENOSPC; | 3105 | return -ENOSPC; |
| 2827 | } | 3106 | } |
| 3107 | |||
| 3108 | check_force_delalloc(meta_sinfo); | ||
| 2828 | spin_unlock(&meta_sinfo->lock); | 3109 | spin_unlock(&meta_sinfo->lock); |
| 2829 | 3110 | ||
| 2830 | return 0; | 3111 | return 0; |
| @@ -2915,7 +3196,7 @@ alloc: | |||
| 2915 | BTRFS_I(inode)->reserved_bytes += bytes; | 3196 | BTRFS_I(inode)->reserved_bytes += bytes; |
| 2916 | spin_unlock(&data_sinfo->lock); | 3197 | spin_unlock(&data_sinfo->lock); |
| 2917 | 3198 | ||
| 2918 | return btrfs_check_metadata_free_space(root); | 3199 | return 0; |
| 2919 | } | 3200 | } |
| 2920 | 3201 | ||
| 2921 | /* | 3202 | /* |
| @@ -3014,17 +3295,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3014 | BUG_ON(!space_info); | 3295 | BUG_ON(!space_info); |
| 3015 | 3296 | ||
| 3016 | spin_lock(&space_info->lock); | 3297 | spin_lock(&space_info->lock); |
| 3017 | if (space_info->force_alloc) { | 3298 | if (space_info->force_alloc) |
| 3018 | force = 1; | 3299 | force = 1; |
| 3019 | space_info->force_alloc = 0; | ||
| 3020 | } | ||
| 3021 | if (space_info->full) { | 3300 | if (space_info->full) { |
| 3022 | spin_unlock(&space_info->lock); | 3301 | spin_unlock(&space_info->lock); |
| 3023 | goto out; | 3302 | goto out; |
| 3024 | } | 3303 | } |
| 3025 | 3304 | ||
| 3026 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3305 | thresh = space_info->total_bytes - space_info->bytes_readonly; |
| 3027 | thresh = div_factor(thresh, 6); | 3306 | thresh = div_factor(thresh, 8); |
| 3028 | if (!force && | 3307 | if (!force && |
| 3029 | (space_info->bytes_used + space_info->bytes_pinned + | 3308 | (space_info->bytes_used + space_info->bytes_pinned + |
| 3030 | space_info->bytes_reserved + alloc_bytes) < thresh) { | 3309 | space_info->bytes_reserved + alloc_bytes) < thresh) { |
| @@ -3038,7 +3317,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3038 | * we keep a reasonable number of metadata chunks allocated in the | 3317 | * we keep a reasonable number of metadata chunks allocated in the |
| 3039 | * FS as well. | 3318 | * FS as well. |
| 3040 | */ | 3319 | */ |
| 3041 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | 3320 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
| 3042 | fs_info->data_chunk_allocations++; | 3321 | fs_info->data_chunk_allocations++; |
| 3043 | if (!(fs_info->data_chunk_allocations % | 3322 | if (!(fs_info->data_chunk_allocations % |
| 3044 | fs_info->metadata_ratio)) | 3323 | fs_info->metadata_ratio)) |
| @@ -3046,8 +3325,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3046 | } | 3325 | } |
| 3047 | 3326 | ||
| 3048 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3327 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
| 3328 | spin_lock(&space_info->lock); | ||
| 3049 | if (ret) | 3329 | if (ret) |
| 3050 | space_info->full = 1; | 3330 | space_info->full = 1; |
| 3331 | space_info->force_alloc = 0; | ||
| 3332 | spin_unlock(&space_info->lock); | ||
| 3051 | out: | 3333 | out: |
| 3052 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3334 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
| 3053 | return ret; | 3335 | return ret; |
| @@ -4062,21 +4344,32 @@ loop: | |||
| 4062 | return ret; | 4344 | return ret; |
| 4063 | } | 4345 | } |
| 4064 | 4346 | ||
| 4065 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 4347 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| 4348 | int dump_block_groups) | ||
| 4066 | { | 4349 | { |
| 4067 | struct btrfs_block_group_cache *cache; | 4350 | struct btrfs_block_group_cache *cache; |
| 4068 | 4351 | ||
| 4352 | spin_lock(&info->lock); | ||
| 4069 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4353 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
| 4070 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4354 | (unsigned long long)(info->total_bytes - info->bytes_used - |
| 4071 | info->bytes_pinned - info->bytes_reserved), | 4355 | info->bytes_pinned - info->bytes_reserved - |
| 4356 | info->bytes_super), | ||
| 4072 | (info->full) ? "" : "not "); | 4357 | (info->full) ? "" : "not "); |
| 4073 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4358 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
| 4074 | " may_use=%llu, used=%llu\n", | 4359 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" |
| 4360 | "\n", | ||
| 4075 | (unsigned long long)info->total_bytes, | 4361 | (unsigned long long)info->total_bytes, |
| 4076 | (unsigned long long)info->bytes_pinned, | 4362 | (unsigned long long)info->bytes_pinned, |
| 4077 | (unsigned long long)info->bytes_delalloc, | 4363 | (unsigned long long)info->bytes_delalloc, |
| 4078 | (unsigned long long)info->bytes_may_use, | 4364 | (unsigned long long)info->bytes_may_use, |
| 4079 | (unsigned long long)info->bytes_used); | 4365 | (unsigned long long)info->bytes_used, |
| 4366 | (unsigned long long)info->bytes_root, | ||
| 4367 | (unsigned long long)info->bytes_super, | ||
| 4368 | (unsigned long long)info->bytes_reserved); | ||
| 4369 | spin_unlock(&info->lock); | ||
| 4370 | |||
| 4371 | if (!dump_block_groups) | ||
| 4372 | return; | ||
| 4080 | 4373 | ||
| 4081 | down_read(&info->groups_sem); | 4374 | down_read(&info->groups_sem); |
| 4082 | list_for_each_entry(cache, &info->block_groups, list) { | 4375 | list_for_each_entry(cache, &info->block_groups, list) { |
| @@ -4144,7 +4437,7 @@ again: | |||
| 4144 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 4437 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
| 4145 | "wanted %llu\n", (unsigned long long)data, | 4438 | "wanted %llu\n", (unsigned long long)data, |
| 4146 | (unsigned long long)num_bytes); | 4439 | (unsigned long long)num_bytes); |
| 4147 | dump_space_info(sinfo, num_bytes); | 4440 | dump_space_info(sinfo, num_bytes, 1); |
| 4148 | } | 4441 | } |
| 4149 | 4442 | ||
| 4150 | return ret; | 4443 | return ret; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0cb88f8146ea..de1793ba004a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -280,6 +280,14 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | |||
| 280 | return NULL; | 280 | return NULL; |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | ||
| 284 | struct extent_state *other) | ||
| 285 | { | ||
| 286 | if (tree->ops && tree->ops->merge_extent_hook) | ||
| 287 | tree->ops->merge_extent_hook(tree->mapping->host, new, | ||
| 288 | other); | ||
| 289 | } | ||
| 290 | |||
| 283 | /* | 291 | /* |
| 284 | * utility function to look for merge candidates inside a given range. | 292 | * utility function to look for merge candidates inside a given range. |
| 285 | * Any extents with matching state are merged together into a single | 293 | * Any extents with matching state are merged together into a single |
| @@ -303,6 +311,7 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 303 | other = rb_entry(other_node, struct extent_state, rb_node); | 311 | other = rb_entry(other_node, struct extent_state, rb_node); |
| 304 | if (other->end == state->start - 1 && | 312 | if (other->end == state->start - 1 && |
| 305 | other->state == state->state) { | 313 | other->state == state->state) { |
| 314 | merge_cb(tree, state, other); | ||
| 306 | state->start = other->start; | 315 | state->start = other->start; |
| 307 | other->tree = NULL; | 316 | other->tree = NULL; |
| 308 | rb_erase(&other->rb_node, &tree->state); | 317 | rb_erase(&other->rb_node, &tree->state); |
| @@ -314,33 +323,37 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 314 | other = rb_entry(other_node, struct extent_state, rb_node); | 323 | other = rb_entry(other_node, struct extent_state, rb_node); |
| 315 | if (other->start == state->end + 1 && | 324 | if (other->start == state->end + 1 && |
| 316 | other->state == state->state) { | 325 | other->state == state->state) { |
| 326 | merge_cb(tree, state, other); | ||
| 317 | other->start = state->start; | 327 | other->start = state->start; |
| 318 | state->tree = NULL; | 328 | state->tree = NULL; |
| 319 | rb_erase(&state->rb_node, &tree->state); | 329 | rb_erase(&state->rb_node, &tree->state); |
| 320 | free_extent_state(state); | 330 | free_extent_state(state); |
| 331 | state = NULL; | ||
| 321 | } | 332 | } |
| 322 | } | 333 | } |
| 334 | |||
| 323 | return 0; | 335 | return 0; |
| 324 | } | 336 | } |
| 325 | 337 | ||
| 326 | static void set_state_cb(struct extent_io_tree *tree, | 338 | static int set_state_cb(struct extent_io_tree *tree, |
| 327 | struct extent_state *state, | 339 | struct extent_state *state, |
| 328 | unsigned long bits) | 340 | unsigned long bits) |
| 329 | { | 341 | { |
| 330 | if (tree->ops && tree->ops->set_bit_hook) { | 342 | if (tree->ops && tree->ops->set_bit_hook) { |
| 331 | tree->ops->set_bit_hook(tree->mapping->host, state->start, | 343 | return tree->ops->set_bit_hook(tree->mapping->host, |
| 332 | state->end, state->state, bits); | 344 | state->start, state->end, |
| 345 | state->state, bits); | ||
| 333 | } | 346 | } |
| 347 | |||
| 348 | return 0; | ||
| 334 | } | 349 | } |
| 335 | 350 | ||
| 336 | static void clear_state_cb(struct extent_io_tree *tree, | 351 | static void clear_state_cb(struct extent_io_tree *tree, |
| 337 | struct extent_state *state, | 352 | struct extent_state *state, |
| 338 | unsigned long bits) | 353 | unsigned long bits) |
| 339 | { | 354 | { |
| 340 | if (tree->ops && tree->ops->clear_bit_hook) { | 355 | if (tree->ops && tree->ops->clear_bit_hook) |
| 341 | tree->ops->clear_bit_hook(tree->mapping->host, state->start, | 356 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
| 342 | state->end, state->state, bits); | ||
| 343 | } | ||
| 344 | } | 357 | } |
| 345 | 358 | ||
| 346 | /* | 359 | /* |
| @@ -358,6 +371,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 358 | int bits) | 371 | int bits) |
| 359 | { | 372 | { |
| 360 | struct rb_node *node; | 373 | struct rb_node *node; |
| 374 | int ret; | ||
| 361 | 375 | ||
| 362 | if (end < start) { | 376 | if (end < start) { |
| 363 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 377 | printk(KERN_ERR "btrfs end < start %llu %llu\n", |
| @@ -365,11 +379,14 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 365 | (unsigned long long)start); | 379 | (unsigned long long)start); |
| 366 | WARN_ON(1); | 380 | WARN_ON(1); |
| 367 | } | 381 | } |
| 368 | if (bits & EXTENT_DIRTY) | ||
| 369 | tree->dirty_bytes += end - start + 1; | ||
| 370 | state->start = start; | 382 | state->start = start; |
| 371 | state->end = end; | 383 | state->end = end; |
| 372 | set_state_cb(tree, state, bits); | 384 | ret = set_state_cb(tree, state, bits); |
| 385 | if (ret) | ||
| 386 | return ret; | ||
| 387 | |||
| 388 | if (bits & EXTENT_DIRTY) | ||
| 389 | tree->dirty_bytes += end - start + 1; | ||
| 373 | state->state |= bits; | 390 | state->state |= bits; |
| 374 | node = tree_insert(&tree->state, end, &state->rb_node); | 391 | node = tree_insert(&tree->state, end, &state->rb_node); |
| 375 | if (node) { | 392 | if (node) { |
| @@ -387,6 +404,15 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 387 | return 0; | 404 | return 0; |
| 388 | } | 405 | } |
| 389 | 406 | ||
| 407 | static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, | ||
| 408 | u64 split) | ||
| 409 | { | ||
| 410 | if (tree->ops && tree->ops->split_extent_hook) | ||
| 411 | return tree->ops->split_extent_hook(tree->mapping->host, | ||
| 412 | orig, split); | ||
| 413 | return 0; | ||
| 414 | } | ||
| 415 | |||
| 390 | /* | 416 | /* |
| 391 | * split a given extent state struct in two, inserting the preallocated | 417 | * split a given extent state struct in two, inserting the preallocated |
| 392 | * struct 'prealloc' as the newly created second half. 'split' indicates an | 418 | * struct 'prealloc' as the newly created second half. 'split' indicates an |
| @@ -405,6 +431,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 405 | struct extent_state *prealloc, u64 split) | 431 | struct extent_state *prealloc, u64 split) |
| 406 | { | 432 | { |
| 407 | struct rb_node *node; | 433 | struct rb_node *node; |
| 434 | |||
| 435 | split_cb(tree, orig, split); | ||
| 436 | |||
| 408 | prealloc->start = orig->start; | 437 | prealloc->start = orig->start; |
| 409 | prealloc->end = split - 1; | 438 | prealloc->end = split - 1; |
| 410 | prealloc->state = orig->state; | 439 | prealloc->state = orig->state; |
| @@ -542,8 +571,8 @@ hit_next: | |||
| 542 | if (err) | 571 | if (err) |
| 543 | goto out; | 572 | goto out; |
| 544 | if (state->end <= end) { | 573 | if (state->end <= end) { |
| 545 | set |= clear_state_bit(tree, state, bits, | 574 | set |= clear_state_bit(tree, state, bits, wake, |
| 546 | wake, delete); | 575 | delete); |
| 547 | if (last_end == (u64)-1) | 576 | if (last_end == (u64)-1) |
| 548 | goto out; | 577 | goto out; |
| 549 | start = last_end + 1; | 578 | start = last_end + 1; |
| @@ -561,12 +590,11 @@ hit_next: | |||
| 561 | prealloc = alloc_extent_state(GFP_ATOMIC); | 590 | prealloc = alloc_extent_state(GFP_ATOMIC); |
| 562 | err = split_state(tree, state, prealloc, end + 1); | 591 | err = split_state(tree, state, prealloc, end + 1); |
| 563 | BUG_ON(err == -EEXIST); | 592 | BUG_ON(err == -EEXIST); |
| 564 | |||
| 565 | if (wake) | 593 | if (wake) |
| 566 | wake_up(&state->wq); | 594 | wake_up(&state->wq); |
| 567 | 595 | ||
| 568 | set |= clear_state_bit(tree, prealloc, bits, | 596 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); |
| 569 | wake, delete); | 597 | |
| 570 | prealloc = NULL; | 598 | prealloc = NULL; |
| 571 | goto out; | 599 | goto out; |
| 572 | } | 600 | } |
| @@ -667,16 +695,23 @@ out: | |||
| 667 | return 0; | 695 | return 0; |
| 668 | } | 696 | } |
| 669 | 697 | ||
| 670 | static void set_state_bits(struct extent_io_tree *tree, | 698 | static int set_state_bits(struct extent_io_tree *tree, |
| 671 | struct extent_state *state, | 699 | struct extent_state *state, |
| 672 | int bits) | 700 | int bits) |
| 673 | { | 701 | { |
| 702 | int ret; | ||
| 703 | |||
| 704 | ret = set_state_cb(tree, state, bits); | ||
| 705 | if (ret) | ||
| 706 | return ret; | ||
| 707 | |||
| 674 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 708 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
| 675 | u64 range = state->end - state->start + 1; | 709 | u64 range = state->end - state->start + 1; |
| 676 | tree->dirty_bytes += range; | 710 | tree->dirty_bytes += range; |
| 677 | } | 711 | } |
| 678 | set_state_cb(tree, state, bits); | ||
| 679 | state->state |= bits; | 712 | state->state |= bits; |
| 713 | |||
| 714 | return 0; | ||
| 680 | } | 715 | } |
| 681 | 716 | ||
| 682 | static void cache_state(struct extent_state *state, | 717 | static void cache_state(struct extent_state *state, |
| @@ -758,7 +793,10 @@ hit_next: | |||
| 758 | goto out; | 793 | goto out; |
| 759 | } | 794 | } |
| 760 | 795 | ||
| 761 | set_state_bits(tree, state, bits); | 796 | err = set_state_bits(tree, state, bits); |
| 797 | if (err) | ||
| 798 | goto out; | ||
| 799 | |||
| 762 | cache_state(state, cached_state); | 800 | cache_state(state, cached_state); |
| 763 | merge_state(tree, state); | 801 | merge_state(tree, state); |
| 764 | if (last_end == (u64)-1) | 802 | if (last_end == (u64)-1) |
| @@ -805,7 +843,9 @@ hit_next: | |||
| 805 | if (err) | 843 | if (err) |
| 806 | goto out; | 844 | goto out; |
| 807 | if (state->end <= end) { | 845 | if (state->end <= end) { |
| 808 | set_state_bits(tree, state, bits); | 846 | err = set_state_bits(tree, state, bits); |
| 847 | if (err) | ||
| 848 | goto out; | ||
| 809 | cache_state(state, cached_state); | 849 | cache_state(state, cached_state); |
| 810 | merge_state(tree, state); | 850 | merge_state(tree, state); |
| 811 | if (last_end == (u64)-1) | 851 | if (last_end == (u64)-1) |
| @@ -829,11 +869,13 @@ hit_next: | |||
| 829 | this_end = last_start - 1; | 869 | this_end = last_start - 1; |
| 830 | err = insert_state(tree, prealloc, start, this_end, | 870 | err = insert_state(tree, prealloc, start, this_end, |
| 831 | bits); | 871 | bits); |
| 832 | cache_state(prealloc, cached_state); | ||
| 833 | prealloc = NULL; | ||
| 834 | BUG_ON(err == -EEXIST); | 872 | BUG_ON(err == -EEXIST); |
| 835 | if (err) | 873 | if (err) { |
| 874 | prealloc = NULL; | ||
| 836 | goto out; | 875 | goto out; |
| 876 | } | ||
| 877 | cache_state(prealloc, cached_state); | ||
| 878 | prealloc = NULL; | ||
| 837 | start = this_end + 1; | 879 | start = this_end + 1; |
| 838 | goto search_again; | 880 | goto search_again; |
| 839 | } | 881 | } |
| @@ -852,7 +894,11 @@ hit_next: | |||
| 852 | err = split_state(tree, state, prealloc, end + 1); | 894 | err = split_state(tree, state, prealloc, end + 1); |
| 853 | BUG_ON(err == -EEXIST); | 895 | BUG_ON(err == -EEXIST); |
| 854 | 896 | ||
| 855 | set_state_bits(tree, prealloc, bits); | 897 | err = set_state_bits(tree, prealloc, bits); |
| 898 | if (err) { | ||
| 899 | prealloc = NULL; | ||
| 900 | goto out; | ||
| 901 | } | ||
| 856 | cache_state(prealloc, cached_state); | 902 | cache_state(prealloc, cached_state); |
| 857 | merge_state(tree, prealloc); | 903 | merge_state(tree, prealloc); |
| 858 | prealloc = NULL; | 904 | prealloc = NULL; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 14ed16fd862d..4794ec891fed 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -60,8 +60,13 @@ struct extent_io_ops { | |||
| 60 | struct extent_state *state, int uptodate); | 60 | struct extent_state *state, int uptodate); |
| 61 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 61 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, |
| 62 | unsigned long old, unsigned long bits); | 62 | unsigned long old, unsigned long bits); |
| 63 | int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, | 63 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
| 64 | unsigned long old, unsigned long bits); | 64 | unsigned long bits); |
| 65 | int (*merge_extent_hook)(struct inode *inode, | ||
| 66 | struct extent_state *new, | ||
| 67 | struct extent_state *other); | ||
| 68 | int (*split_extent_hook)(struct inode *inode, | ||
| 69 | struct extent_state *orig, u64 split); | ||
| 65 | int (*write_cache_pages_lock_hook)(struct page *page); | 70 | int (*write_cache_pages_lock_hook)(struct page *page); |
| 66 | }; | 71 | }; |
| 67 | 72 | ||
| @@ -79,10 +84,14 @@ struct extent_state { | |||
| 79 | u64 start; | 84 | u64 start; |
| 80 | u64 end; /* inclusive */ | 85 | u64 end; /* inclusive */ |
| 81 | struct rb_node rb_node; | 86 | struct rb_node rb_node; |
| 87 | |||
| 88 | /* ADD NEW ELEMENTS AFTER THIS */ | ||
| 82 | struct extent_io_tree *tree; | 89 | struct extent_io_tree *tree; |
| 83 | wait_queue_head_t wq; | 90 | wait_queue_head_t wq; |
| 84 | atomic_t refs; | 91 | atomic_t refs; |
| 85 | unsigned long state; | 92 | unsigned long state; |
| 93 | u64 split_start; | ||
| 94 | u64 split_end; | ||
| 86 | 95 | ||
| 87 | /* for use by the FS */ | 96 | /* for use by the FS */ |
| 88 | u64 private; | 97 | u64 private; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 571ad3c13b47..1be96ba6f6bb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -123,7 +123,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| 124 | 124 | ||
| 125 | end_of_last_block = start_pos + num_bytes - 1; | 125 | end_of_last_block = start_pos + num_bytes - 1; |
| 126 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | 126 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); |
| 127 | if (err) | ||
| 128 | return err; | ||
| 129 | |||
| 127 | for (i = 0; i < num_pages; i++) { | 130 | for (i = 0; i < num_pages; i++) { |
| 128 | struct page *p = pages[i]; | 131 | struct page *p = pages[i]; |
| 129 | SetPageUptodate(p); | 132 | SetPageUptodate(p); |
| @@ -927,6 +930,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 927 | err = file_remove_suid(file); | 930 | err = file_remove_suid(file); |
| 928 | if (err) | 931 | if (err) |
| 929 | goto out_nolock; | 932 | goto out_nolock; |
| 933 | |||
| 934 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 935 | if (err) | ||
| 936 | goto out_nolock; | ||
| 937 | |||
| 930 | file_update_time(file); | 938 | file_update_time(file); |
| 931 | 939 | ||
| 932 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 940 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
| @@ -1028,6 +1036,7 @@ out: | |||
| 1028 | mutex_unlock(&inode->i_mutex); | 1036 | mutex_unlock(&inode->i_mutex); |
| 1029 | if (ret) | 1037 | if (ret) |
| 1030 | err = ret; | 1038 | err = ret; |
| 1039 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 1031 | 1040 | ||
| 1032 | out_nolock: | 1041 | out_nolock: |
| 1033 | kfree(pages); | 1042 | kfree(pages); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 01c5f8b5a34e..3cc5677f5440 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -1159,6 +1159,83 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1159 | return ret; | 1159 | return ret; |
| 1160 | } | 1160 | } |
| 1161 | 1161 | ||
| 1162 | static int btrfs_split_extent_hook(struct inode *inode, | ||
| 1163 | struct extent_state *orig, u64 split) | ||
| 1164 | { | ||
| 1165 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1166 | u64 size; | ||
| 1167 | |||
| 1168 | if (!(orig->state & EXTENT_DELALLOC)) | ||
| 1169 | return 0; | ||
| 1170 | |||
| 1171 | size = orig->end - orig->start + 1; | ||
| 1172 | if (size > root->fs_info->max_extent) { | ||
| 1173 | u64 num_extents; | ||
| 1174 | u64 new_size; | ||
| 1175 | |||
| 1176 | new_size = orig->end - split + 1; | ||
| 1177 | num_extents = div64_u64(size + root->fs_info->max_extent - 1, | ||
| 1178 | root->fs_info->max_extent); | ||
| 1179 | |||
| 1180 | /* | ||
| 1181 | * if we break a large extent up then leave delalloc_extents be, | ||
| 1182 | * since we've already accounted for the large extent. | ||
| 1183 | */ | ||
| 1184 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1185 | root->fs_info->max_extent) < num_extents) | ||
| 1186 | return 0; | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | BTRFS_I(inode)->delalloc_extents++; | ||
| 1190 | |||
| 1191 | return 0; | ||
| 1192 | } | ||
| 1193 | |||
| 1194 | /* | ||
| 1195 | * extent_io.c merge_extent_hook, used to track merged delayed allocation | ||
| 1196 | * extents so we can keep track of new extents that are just merged onto old | ||
| 1197 | * extents, such as when we are doing sequential writes, so we can properly | ||
| 1198 | * account for the metadata space we'll need. | ||
| 1199 | */ | ||
| 1200 | static int btrfs_merge_extent_hook(struct inode *inode, | ||
| 1201 | struct extent_state *new, | ||
| 1202 | struct extent_state *other) | ||
| 1203 | { | ||
| 1204 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1205 | u64 new_size, old_size; | ||
| 1206 | u64 num_extents; | ||
| 1207 | |||
| 1208 | /* not delalloc, ignore it */ | ||
| 1209 | if (!(other->state & EXTENT_DELALLOC)) | ||
| 1210 | return 0; | ||
| 1211 | |||
| 1212 | old_size = other->end - other->start + 1; | ||
| 1213 | if (new->start < other->start) | ||
| 1214 | new_size = other->end - new->start + 1; | ||
| 1215 | else | ||
| 1216 | new_size = new->end - other->start + 1; | ||
| 1217 | |||
| 1218 | /* we're not bigger than the max, unreserve the space and go */ | ||
| 1219 | if (new_size <= root->fs_info->max_extent) { | ||
| 1220 | BTRFS_I(inode)->delalloc_extents--; | ||
| 1221 | return 0; | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | /* | ||
| 1225 | * If we grew by another max_extent, just return, we want to keep that | ||
| 1226 | * reserved amount. | ||
| 1227 | */ | ||
| 1228 | num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, | ||
| 1229 | root->fs_info->max_extent); | ||
| 1230 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1231 | root->fs_info->max_extent) > num_extents) | ||
| 1232 | return 0; | ||
| 1233 | |||
| 1234 | BTRFS_I(inode)->delalloc_extents--; | ||
| 1235 | |||
| 1236 | return 0; | ||
| 1237 | } | ||
| 1238 | |||
| 1162 | /* | 1239 | /* |
| 1163 | * extent_io.c set_bit_hook, used to track delayed allocation | 1240 | * extent_io.c set_bit_hook, used to track delayed allocation |
| 1164 | * bytes in this file, and to maintain the list of inodes that | 1241 | * bytes in this file, and to maintain the list of inodes that |
| @@ -1167,6 +1244,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1167 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1244 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, |
| 1168 | unsigned long old, unsigned long bits) | 1245 | unsigned long old, unsigned long bits) |
| 1169 | { | 1246 | { |
| 1247 | |||
| 1170 | /* | 1248 | /* |
| 1171 | * set_bit and clear bit hooks normally require _irqsave/restore | 1249 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1172 | * but in this case, we are only testeing for the DELALLOC | 1250 | * but in this case, we are only testeing for the DELALLOC |
| @@ -1174,6 +1252,8 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1174 | */ | 1252 | */ |
| 1175 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1253 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
| 1176 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1254 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1255 | |||
| 1256 | BTRFS_I(inode)->delalloc_extents++; | ||
| 1177 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | 1257 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); |
| 1178 | spin_lock(&root->fs_info->delalloc_lock); | 1258 | spin_lock(&root->fs_info->delalloc_lock); |
| 1179 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1259 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; |
| @@ -1190,22 +1270,27 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1190 | /* | 1270 | /* |
| 1191 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1271 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
| 1192 | */ | 1272 | */ |
| 1193 | static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | 1273 | static int btrfs_clear_bit_hook(struct inode *inode, |
| 1194 | unsigned long old, unsigned long bits) | 1274 | struct extent_state *state, unsigned long bits) |
| 1195 | { | 1275 | { |
| 1196 | /* | 1276 | /* |
| 1197 | * set_bit and clear bit hooks normally require _irqsave/restore | 1277 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1198 | * but in this case, we are only testeing for the DELALLOC | 1278 | * but in this case, we are only testeing for the DELALLOC |
| 1199 | * bit, which is only set or cleared with irqs on | 1279 | * bit, which is only set or cleared with irqs on |
| 1200 | */ | 1280 | */ |
| 1201 | if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1281 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
| 1202 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1282 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1203 | 1283 | ||
| 1284 | BTRFS_I(inode)->delalloc_extents--; | ||
| 1285 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 1286 | |||
| 1204 | spin_lock(&root->fs_info->delalloc_lock); | 1287 | spin_lock(&root->fs_info->delalloc_lock); |
| 1205 | if (end - start + 1 > root->fs_info->delalloc_bytes) { | 1288 | if (state->end - state->start + 1 > |
| 1289 | root->fs_info->delalloc_bytes) { | ||
| 1206 | printk(KERN_INFO "btrfs warning: delalloc account " | 1290 | printk(KERN_INFO "btrfs warning: delalloc account " |
| 1207 | "%llu %llu\n", | 1291 | "%llu %llu\n", |
| 1208 | (unsigned long long)end - start + 1, | 1292 | (unsigned long long) |
| 1293 | state->end - state->start + 1, | ||
| 1209 | (unsigned long long) | 1294 | (unsigned long long) |
| 1210 | root->fs_info->delalloc_bytes); | 1295 | root->fs_info->delalloc_bytes); |
| 1211 | btrfs_delalloc_free_space(root, inode, (u64)-1); | 1296 | btrfs_delalloc_free_space(root, inode, (u64)-1); |
| @@ -1213,9 +1298,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1213 | BTRFS_I(inode)->delalloc_bytes = 0; | 1298 | BTRFS_I(inode)->delalloc_bytes = 0; |
| 1214 | } else { | 1299 | } else { |
| 1215 | btrfs_delalloc_free_space(root, inode, | 1300 | btrfs_delalloc_free_space(root, inode, |
| 1216 | end - start + 1); | 1301 | state->end - |
| 1217 | root->fs_info->delalloc_bytes -= end - start + 1; | 1302 | state->start + 1); |
| 1218 | BTRFS_I(inode)->delalloc_bytes -= end - start + 1; | 1303 | root->fs_info->delalloc_bytes -= state->end - |
| 1304 | state->start + 1; | ||
| 1305 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
| 1306 | state->start + 1; | ||
| 1219 | } | 1307 | } |
| 1220 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1308 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
| 1221 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1309 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
| @@ -2950,7 +3038,12 @@ again: | |||
| 2950 | goto again; | 3038 | goto again; |
| 2951 | } | 3039 | } |
| 2952 | 3040 | ||
| 2953 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 3041 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 3042 | if (ret) { | ||
| 3043 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3044 | goto out_unlock; | ||
| 3045 | } | ||
| 3046 | |||
| 2954 | ret = 0; | 3047 | ret = 0; |
| 2955 | if (offset != PAGE_CACHE_SIZE) { | 3048 | if (offset != PAGE_CACHE_SIZE) { |
| 2956 | kaddr = kmap(page); | 3049 | kaddr = kmap(page); |
| @@ -2981,15 +3074,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 2981 | u64 last_byte; | 3074 | u64 last_byte; |
| 2982 | u64 cur_offset; | 3075 | u64 cur_offset; |
| 2983 | u64 hole_size; | 3076 | u64 hole_size; |
| 2984 | int err; | 3077 | int err = 0; |
| 2985 | 3078 | ||
| 2986 | if (size <= hole_start) | 3079 | if (size <= hole_start) |
| 2987 | return 0; | 3080 | return 0; |
| 2988 | 3081 | ||
| 2989 | err = btrfs_check_metadata_free_space(root); | ||
| 2990 | if (err) | ||
| 2991 | return err; | ||
| 2992 | |||
| 2993 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | 3082 | btrfs_truncate_page(inode->i_mapping, inode->i_size); |
| 2994 | 3083 | ||
| 2995 | while (1) { | 3084 | while (1) { |
| @@ -3024,12 +3113,18 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 3024 | cur_offset, &hint_byte, 1); | 3113 | cur_offset, &hint_byte, 1); |
| 3025 | if (err) | 3114 | if (err) |
| 3026 | break; | 3115 | break; |
| 3116 | |||
| 3117 | err = btrfs_reserve_metadata_space(root, 1); | ||
| 3118 | if (err) | ||
| 3119 | break; | ||
| 3120 | |||
| 3027 | err = btrfs_insert_file_extent(trans, root, | 3121 | err = btrfs_insert_file_extent(trans, root, |
| 3028 | inode->i_ino, cur_offset, 0, | 3122 | inode->i_ino, cur_offset, 0, |
| 3029 | 0, hole_size, 0, hole_size, | 3123 | 0, hole_size, 0, hole_size, |
| 3030 | 0, 0, 0); | 3124 | 0, 0, 0); |
| 3031 | btrfs_drop_extent_cache(inode, hole_start, | 3125 | btrfs_drop_extent_cache(inode, hole_start, |
| 3032 | last_byte - 1, 0); | 3126 | last_byte - 1, 0); |
| 3127 | btrfs_unreserve_metadata_space(root, 1); | ||
| 3033 | } | 3128 | } |
| 3034 | free_extent_map(em); | 3129 | free_extent_map(em); |
| 3035 | cur_offset = last_byte; | 3130 | cur_offset = last_byte; |
| @@ -3990,11 +4085,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 3990 | if (!new_valid_dev(rdev)) | 4085 | if (!new_valid_dev(rdev)) |
| 3991 | return -EINVAL; | 4086 | return -EINVAL; |
| 3992 | 4087 | ||
| 3993 | err = btrfs_check_metadata_free_space(root); | 4088 | /* |
| 4089 | * 2 for inode item and ref | ||
| 4090 | * 2 for dir items | ||
| 4091 | * 1 for xattr if selinux is on | ||
| 4092 | */ | ||
| 4093 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 3994 | if (err) | 4094 | if (err) |
| 3995 | goto fail; | 4095 | return err; |
| 3996 | 4096 | ||
| 3997 | trans = btrfs_start_transaction(root, 1); | 4097 | trans = btrfs_start_transaction(root, 1); |
| 4098 | if (!trans) | ||
| 4099 | goto fail; | ||
| 3998 | btrfs_set_trans_block_group(trans, dir); | 4100 | btrfs_set_trans_block_group(trans, dir); |
| 3999 | 4101 | ||
| 4000 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4102 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -4032,6 +4134,7 @@ out_unlock: | |||
| 4032 | nr = trans->blocks_used; | 4134 | nr = trans->blocks_used; |
| 4033 | btrfs_end_transaction_throttle(trans, root); | 4135 | btrfs_end_transaction_throttle(trans, root); |
| 4034 | fail: | 4136 | fail: |
| 4137 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4035 | if (drop_inode) { | 4138 | if (drop_inode) { |
| 4036 | inode_dec_link_count(inode); | 4139 | inode_dec_link_count(inode); |
| 4037 | iput(inode); | 4140 | iput(inode); |
| @@ -4052,10 +4155,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4052 | u64 objectid; | 4155 | u64 objectid; |
| 4053 | u64 index = 0; | 4156 | u64 index = 0; |
| 4054 | 4157 | ||
| 4055 | err = btrfs_check_metadata_free_space(root); | 4158 | /* |
| 4159 | * 2 for inode item and ref | ||
| 4160 | * 2 for dir items | ||
| 4161 | * 1 for xattr if selinux is on | ||
| 4162 | */ | ||
| 4163 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 4056 | if (err) | 4164 | if (err) |
| 4057 | goto fail; | 4165 | return err; |
| 4166 | |||
| 4058 | trans = btrfs_start_transaction(root, 1); | 4167 | trans = btrfs_start_transaction(root, 1); |
| 4168 | if (!trans) | ||
| 4169 | goto fail; | ||
| 4059 | btrfs_set_trans_block_group(trans, dir); | 4170 | btrfs_set_trans_block_group(trans, dir); |
| 4060 | 4171 | ||
| 4061 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4172 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -4096,6 +4207,7 @@ out_unlock: | |||
| 4096 | nr = trans->blocks_used; | 4207 | nr = trans->blocks_used; |
| 4097 | btrfs_end_transaction_throttle(trans, root); | 4208 | btrfs_end_transaction_throttle(trans, root); |
| 4098 | fail: | 4209 | fail: |
| 4210 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4099 | if (drop_inode) { | 4211 | if (drop_inode) { |
| 4100 | inode_dec_link_count(inode); | 4212 | inode_dec_link_count(inode); |
| 4101 | iput(inode); | 4213 | iput(inode); |
| @@ -4118,10 +4230,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4118 | if (inode->i_nlink == 0) | 4230 | if (inode->i_nlink == 0) |
| 4119 | return -ENOENT; | 4231 | return -ENOENT; |
| 4120 | 4232 | ||
| 4121 | btrfs_inc_nlink(inode); | 4233 | /* |
| 4122 | err = btrfs_check_metadata_free_space(root); | 4234 | * 1 item for inode ref |
| 4235 | * 2 items for dir items | ||
| 4236 | */ | ||
| 4237 | err = btrfs_reserve_metadata_space(root, 3); | ||
| 4123 | if (err) | 4238 | if (err) |
| 4124 | goto fail; | 4239 | return err; |
| 4240 | |||
| 4241 | btrfs_inc_nlink(inode); | ||
| 4242 | |||
| 4125 | err = btrfs_set_inode_index(dir, &index); | 4243 | err = btrfs_set_inode_index(dir, &index); |
| 4126 | if (err) | 4244 | if (err) |
| 4127 | goto fail; | 4245 | goto fail; |
| @@ -4145,6 +4263,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4145 | nr = trans->blocks_used; | 4263 | nr = trans->blocks_used; |
| 4146 | btrfs_end_transaction_throttle(trans, root); | 4264 | btrfs_end_transaction_throttle(trans, root); |
| 4147 | fail: | 4265 | fail: |
| 4266 | btrfs_unreserve_metadata_space(root, 3); | ||
| 4148 | if (drop_inode) { | 4267 | if (drop_inode) { |
| 4149 | inode_dec_link_count(inode); | 4268 | inode_dec_link_count(inode); |
| 4150 | iput(inode); | 4269 | iput(inode); |
| @@ -4164,17 +4283,21 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 4164 | u64 index = 0; | 4283 | u64 index = 0; |
| 4165 | unsigned long nr = 1; | 4284 | unsigned long nr = 1; |
| 4166 | 4285 | ||
| 4167 | err = btrfs_check_metadata_free_space(root); | 4286 | /* |
| 4287 | * 2 items for inode and ref | ||
| 4288 | * 2 items for dir items | ||
| 4289 | * 1 for xattr if selinux is on | ||
| 4290 | */ | ||
| 4291 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 4168 | if (err) | 4292 | if (err) |
| 4169 | goto out_unlock; | 4293 | return err; |
| 4170 | 4294 | ||
| 4171 | trans = btrfs_start_transaction(root, 1); | 4295 | trans = btrfs_start_transaction(root, 1); |
| 4172 | btrfs_set_trans_block_group(trans, dir); | 4296 | if (!trans) { |
| 4173 | 4297 | err = -ENOMEM; | |
| 4174 | if (IS_ERR(trans)) { | ||
| 4175 | err = PTR_ERR(trans); | ||
| 4176 | goto out_unlock; | 4298 | goto out_unlock; |
| 4177 | } | 4299 | } |
| 4300 | btrfs_set_trans_block_group(trans, dir); | ||
| 4178 | 4301 | ||
| 4179 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4302 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| 4180 | if (err) { | 4303 | if (err) { |
| @@ -4223,6 +4346,7 @@ out_fail: | |||
| 4223 | btrfs_end_transaction_throttle(trans, root); | 4346 | btrfs_end_transaction_throttle(trans, root); |
| 4224 | 4347 | ||
| 4225 | out_unlock: | 4348 | out_unlock: |
| 4349 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4226 | if (drop_on_err) | 4350 | if (drop_on_err) |
| 4227 | iput(inode); | 4351 | iput(inode); |
| 4228 | btrfs_btree_balance_dirty(root, nr); | 4352 | btrfs_btree_balance_dirty(root, nr); |
| @@ -4747,6 +4871,13 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 4747 | goto out; | 4871 | goto out; |
| 4748 | } | 4872 | } |
| 4749 | 4873 | ||
| 4874 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 4875 | if (ret) { | ||
| 4876 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 4877 | ret = VM_FAULT_SIGBUS; | ||
| 4878 | goto out; | ||
| 4879 | } | ||
| 4880 | |||
| 4750 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 4881 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
| 4751 | again: | 4882 | again: |
| 4752 | lock_page(page); | 4883 | lock_page(page); |
| @@ -4778,7 +4909,12 @@ again: | |||
| 4778 | goto again; | 4909 | goto again; |
| 4779 | } | 4910 | } |
| 4780 | 4911 | ||
| 4781 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 4912 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 4913 | if (ret) { | ||
| 4914 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 4915 | ret = VM_FAULT_SIGBUS; | ||
| 4916 | goto out_unlock; | ||
| 4917 | } | ||
| 4782 | ret = 0; | 4918 | ret = 0; |
| 4783 | 4919 | ||
| 4784 | /* page is wholly or partially inside EOF */ | 4920 | /* page is wholly or partially inside EOF */ |
| @@ -4801,6 +4937,7 @@ again: | |||
| 4801 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 4937 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
| 4802 | 4938 | ||
| 4803 | out_unlock: | 4939 | out_unlock: |
| 4940 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 4804 | if (!ret) | 4941 | if (!ret) |
| 4805 | return VM_FAULT_LOCKED; | 4942 | return VM_FAULT_LOCKED; |
| 4806 | unlock_page(page); | 4943 | unlock_page(page); |
| @@ -4917,6 +5054,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 4917 | return NULL; | 5054 | return NULL; |
| 4918 | ei->last_trans = 0; | 5055 | ei->last_trans = 0; |
| 4919 | ei->logged_trans = 0; | 5056 | ei->logged_trans = 0; |
| 5057 | ei->delalloc_extents = 0; | ||
| 5058 | ei->delalloc_reserved_extents = 0; | ||
| 4920 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 5059 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
| 4921 | INIT_LIST_HEAD(&ei->i_orphan); | 5060 | INIT_LIST_HEAD(&ei->i_orphan); |
| 4922 | INIT_LIST_HEAD(&ei->ordered_operations); | 5061 | INIT_LIST_HEAD(&ei->ordered_operations); |
| @@ -5070,7 +5209,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 5070 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 5209 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 5071 | return -ENOTEMPTY; | 5210 | return -ENOTEMPTY; |
| 5072 | 5211 | ||
| 5073 | ret = btrfs_check_metadata_free_space(root); | 5212 | /* |
| 5213 | * 2 items for dir items | ||
| 5214 | * 1 item for orphan entry | ||
| 5215 | * 1 item for ref | ||
| 5216 | */ | ||
| 5217 | ret = btrfs_reserve_metadata_space(root, 4); | ||
| 5074 | if (ret) | 5218 | if (ret) |
| 5075 | return ret; | 5219 | return ret; |
| 5076 | 5220 | ||
| @@ -5185,6 +5329,8 @@ out_fail: | |||
| 5185 | 5329 | ||
| 5186 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 5330 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 5187 | up_read(&root->fs_info->subvol_sem); | 5331 | up_read(&root->fs_info->subvol_sem); |
| 5332 | |||
| 5333 | btrfs_unreserve_metadata_space(root, 4); | ||
| 5188 | return ret; | 5334 | return ret; |
| 5189 | } | 5335 | } |
| 5190 | 5336 | ||
| @@ -5256,11 +5402,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 5256 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 5402 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
| 5257 | return -ENAMETOOLONG; | 5403 | return -ENAMETOOLONG; |
| 5258 | 5404 | ||
| 5259 | err = btrfs_check_metadata_free_space(root); | 5405 | /* |
| 5406 | * 2 items for inode item and ref | ||
| 5407 | * 2 items for dir items | ||
| 5408 | * 1 item for xattr if selinux is on | ||
| 5409 | */ | ||
| 5410 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 5260 | if (err) | 5411 | if (err) |
| 5261 | goto out_fail; | 5412 | return err; |
| 5262 | 5413 | ||
| 5263 | trans = btrfs_start_transaction(root, 1); | 5414 | trans = btrfs_start_transaction(root, 1); |
| 5415 | if (!trans) | ||
| 5416 | goto out_fail; | ||
| 5264 | btrfs_set_trans_block_group(trans, dir); | 5417 | btrfs_set_trans_block_group(trans, dir); |
| 5265 | 5418 | ||
| 5266 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 5419 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -5341,6 +5494,7 @@ out_unlock: | |||
| 5341 | nr = trans->blocks_used; | 5494 | nr = trans->blocks_used; |
| 5342 | btrfs_end_transaction_throttle(trans, root); | 5495 | btrfs_end_transaction_throttle(trans, root); |
| 5343 | out_fail: | 5496 | out_fail: |
| 5497 | btrfs_unreserve_metadata_space(root, 5); | ||
| 5344 | if (drop_inode) { | 5498 | if (drop_inode) { |
| 5345 | inode_dec_link_count(inode); | 5499 | inode_dec_link_count(inode); |
| 5346 | iput(inode); | 5500 | iput(inode); |
| @@ -5362,6 +5516,11 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
| 5362 | 5516 | ||
| 5363 | while (num_bytes > 0) { | 5517 | while (num_bytes > 0) { |
| 5364 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 5518 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
| 5519 | |||
| 5520 | ret = btrfs_reserve_metadata_space(root, 1); | ||
| 5521 | if (ret) | ||
| 5522 | goto out; | ||
| 5523 | |||
| 5365 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 5524 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
| 5366 | root->sectorsize, 0, alloc_hint, | 5525 | root->sectorsize, 0, alloc_hint, |
| 5367 | (u64)-1, &ins, 1); | 5526 | (u64)-1, &ins, 1); |
| @@ -5381,6 +5540,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
| 5381 | num_bytes -= ins.offset; | 5540 | num_bytes -= ins.offset; |
| 5382 | cur_offset += ins.offset; | 5541 | cur_offset += ins.offset; |
| 5383 | alloc_hint = ins.objectid + ins.offset; | 5542 | alloc_hint = ins.objectid + ins.offset; |
| 5543 | btrfs_unreserve_metadata_space(root, 1); | ||
| 5384 | } | 5544 | } |
| 5385 | out: | 5545 | out: |
| 5386 | if (cur_offset > start) { | 5546 | if (cur_offset > start) { |
| @@ -5566,6 +5726,8 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
| 5566 | .readpage_io_failed_hook = btrfs_io_failed_hook, | 5726 | .readpage_io_failed_hook = btrfs_io_failed_hook, |
| 5567 | .set_bit_hook = btrfs_set_bit_hook, | 5727 | .set_bit_hook = btrfs_set_bit_hook, |
| 5568 | .clear_bit_hook = btrfs_clear_bit_hook, | 5728 | .clear_bit_hook = btrfs_clear_bit_hook, |
| 5729 | .merge_extent_hook = btrfs_merge_extent_hook, | ||
| 5730 | .split_extent_hook = btrfs_split_extent_hook, | ||
| 5569 | }; | 5731 | }; |
| 5570 | 5732 | ||
| 5571 | /* | 5733 | /* |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a8577a7f26ab..4de7ef6f8603 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -239,7 +239,13 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 239 | u64 index = 0; | 239 | u64 index = 0; |
| 240 | unsigned long nr = 1; | 240 | unsigned long nr = 1; |
| 241 | 241 | ||
| 242 | ret = btrfs_check_metadata_free_space(root); | 242 | /* |
| 243 | * 1 - inode item | ||
| 244 | * 2 - refs | ||
| 245 | * 1 - root item | ||
| 246 | * 2 - dir items | ||
| 247 | */ | ||
| 248 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 243 | if (ret) | 249 | if (ret) |
| 244 | return ret; | 250 | return ret; |
| 245 | 251 | ||
| @@ -340,6 +346,9 @@ fail: | |||
| 340 | err = btrfs_commit_transaction(trans, root); | 346 | err = btrfs_commit_transaction(trans, root); |
| 341 | if (err && !ret) | 347 | if (err && !ret) |
| 342 | ret = err; | 348 | ret = err; |
| 349 | |||
| 350 | btrfs_unreserve_metadata_space(root, 6); | ||
| 351 | btrfs_btree_balance_dirty(root, nr); | ||
| 343 | return ret; | 352 | return ret; |
| 344 | } | 353 | } |
| 345 | 354 | ||
| @@ -355,19 +364,27 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 355 | if (!root->ref_cows) | 364 | if (!root->ref_cows) |
| 356 | return -EINVAL; | 365 | return -EINVAL; |
| 357 | 366 | ||
| 358 | ret = btrfs_check_metadata_free_space(root); | 367 | /* |
| 368 | * 1 - inode item | ||
| 369 | * 2 - refs | ||
| 370 | * 1 - root item | ||
| 371 | * 2 - dir items | ||
| 372 | */ | ||
| 373 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 359 | if (ret) | 374 | if (ret) |
| 360 | goto fail_unlock; | 375 | goto fail_unlock; |
| 361 | 376 | ||
| 362 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 377 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 363 | if (!pending_snapshot) { | 378 | if (!pending_snapshot) { |
| 364 | ret = -ENOMEM; | 379 | ret = -ENOMEM; |
| 380 | btrfs_unreserve_metadata_space(root, 6); | ||
| 365 | goto fail_unlock; | 381 | goto fail_unlock; |
| 366 | } | 382 | } |
| 367 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | 383 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); |
| 368 | if (!pending_snapshot->name) { | 384 | if (!pending_snapshot->name) { |
| 369 | ret = -ENOMEM; | 385 | ret = -ENOMEM; |
| 370 | kfree(pending_snapshot); | 386 | kfree(pending_snapshot); |
| 387 | btrfs_unreserve_metadata_space(root, 6); | ||
| 371 | goto fail_unlock; | 388 | goto fail_unlock; |
| 372 | } | 389 | } |
| 373 | memcpy(pending_snapshot->name, name, namelen); | 390 | memcpy(pending_snapshot->name, name, namelen); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 88f866f85e7a..0b8f36d4400a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -186,6 +186,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 186 | h->alloc_exclude_start = 0; | 186 | h->alloc_exclude_start = 0; |
| 187 | h->delayed_ref_updates = 0; | 187 | h->delayed_ref_updates = 0; |
| 188 | 188 | ||
| 189 | if (!current->journal_info) | ||
| 190 | current->journal_info = h; | ||
| 191 | |||
| 189 | root->fs_info->running_transaction->use_count++; | 192 | root->fs_info->running_transaction->use_count++; |
| 190 | record_root_in_trans(h, root); | 193 | record_root_in_trans(h, root); |
| 191 | mutex_unlock(&root->fs_info->trans_mutex); | 194 | mutex_unlock(&root->fs_info->trans_mutex); |
| @@ -317,6 +320,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 317 | wake_up(&cur_trans->writer_wait); | 320 | wake_up(&cur_trans->writer_wait); |
| 318 | put_transaction(cur_trans); | 321 | put_transaction(cur_trans); |
| 319 | mutex_unlock(&info->trans_mutex); | 322 | mutex_unlock(&info->trans_mutex); |
| 323 | |||
| 324 | if (current->journal_info == trans) | ||
| 325 | current->journal_info = NULL; | ||
| 320 | memset(trans, 0, sizeof(*trans)); | 326 | memset(trans, 0, sizeof(*trans)); |
| 321 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 327 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 322 | 328 | ||
| @@ -743,6 +749,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 743 | memcpy(&pending->root_key, &key, sizeof(key)); | 749 | memcpy(&pending->root_key, &key, sizeof(key)); |
| 744 | fail: | 750 | fail: |
| 745 | kfree(new_root_item); | 751 | kfree(new_root_item); |
| 752 | btrfs_unreserve_metadata_space(root, 6); | ||
| 746 | return ret; | 753 | return ret; |
| 747 | } | 754 | } |
| 748 | 755 | ||
| @@ -1059,6 +1066,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1059 | 1066 | ||
| 1060 | mutex_unlock(&root->fs_info->trans_mutex); | 1067 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1061 | 1068 | ||
| 1069 | if (current->journal_info == trans) | ||
| 1070 | current->journal_info = NULL; | ||
| 1071 | |||
| 1062 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1072 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1063 | return ret; | 1073 | return ret; |
| 1064 | } | 1074 | } |
