aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2009-09-11 16:12:44 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-28 16:29:42 -0400
commit9ed74f2dba6ebf9f30b80554290bfc73cc3ef083 (patch)
tree763d58a4a11ceca26dcdaedefb1fd662c4e2fa8b /fs/btrfs
parentc65ddb52dc412c9b67681b1aa16cd1bac8434e24 (diff)
Btrfs: proper -ENOSPC handling
At the start of a transaction we do a btrfs_reserve_metadata_space() and specify how many items we plan on modifying. Then once we've done our modifications and such, just call btrfs_unreserve_metadata_space() for the same number of items we reserved. For keeping track of metadata needed for data I've had to add an extent_io op for when we merge extents. This lets us track space properly when we are doing sequential writes, so we don't end up reserving way more metadata space than what we need. The only place where the metadata space accounting is not done is in the relocation code. This is because Yan is going to be reworking that code in the near future, so running btrfs-vol -b could still possibly result in a ENOSPC related panic. This patch also turns off the metadata_ratio stuff in order to allow users to more efficiently use their disk space. This patch makes it so we track how much metadata we need for an inode's delayed allocation extents by tracking how many extents are currently waiting for allocation. It introduces two new callbacks for the extent_io tree's, merge_extent_hook and split_extent_hook. These help us keep track of when we merge delalloc extents together and split them up. Reservations are handled prior to any actually dirty'ing occurs, and then we unreserve after we dirty. btrfs_unreserve_metadata_for_delalloc() will make the appropriate unreservations as needed based on the number of reservations we currently have and the number of extents we currently have. Doing the reservation outside of doing any of the actual dirty'ing lets us do things like filemap_flush() the inode to try and force delalloc to happen, or as a last resort actually start allocation on all delalloc inodes in the fs. This has survived dbench, fs_mark and an fsx torture test. Signed-off-by: Josef Bacik <jbacik@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/btrfs_inode.h8
-rw-r--r--fs/btrfs/ctree.h23
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c389
-rw-r--r--fs/btrfs/extent_io.c92
-rw-r--r--fs/btrfs/extent_io.h13
-rw-r--r--fs/btrfs/file.c11
-rw-r--r--fs/btrfs/inode.c224
-rw-r--r--fs/btrfs/ioctl.c21
-rw-r--r--fs/btrfs/transaction.c10
10 files changed, 678 insertions, 115 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 82ee56bba299..a54d354cefcb 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -128,6 +128,14 @@ struct btrfs_inode {
128 u64 last_unlink_trans; 128 u64 last_unlink_trans;
129 129
130 /* 130 /*
131 * These two counters are for delalloc metadata reservations. We keep
132 * track of how many extents we've accounted for vs how many extents we
133 * have.
134 */
135 int delalloc_reserved_extents;
136 int delalloc_extents;
137
138 /*
131 * ordered_data_close is set by truncate when a file that used 139 * ordered_data_close is set by truncate when a file that used
132 * to have good data has been truncated to zero. When it is set 140 * to have good data has been truncated to zero. When it is set
133 * the btrfs file release call will add this inode to the 141 * the btrfs file release call will add this inode to the
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80599b4e42bd..b3959a150c3b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -675,18 +675,19 @@ struct btrfs_space_info {
675 current allocations */ 675 current allocations */
676 u64 bytes_readonly; /* total bytes that are read only */ 676 u64 bytes_readonly; /* total bytes that are read only */
677 u64 bytes_super; /* total bytes reserved for the super blocks */ 677 u64 bytes_super; /* total bytes reserved for the super blocks */
678 678 u64 bytes_root; /* the number of bytes needed to commit a
679 /* delalloc accounting */ 679 transaction */
680 u64 bytes_delalloc; /* number of bytes reserved for allocation,
681 this space is not necessarily reserved yet
682 by the allocator */
683 u64 bytes_may_use; /* number of bytes that may be used for 680 u64 bytes_may_use; /* number of bytes that may be used for
684 delalloc */ 681 delalloc/allocations */
682 u64 bytes_delalloc; /* number of bytes currently reserved for
683 delayed allocation */
685 684
686 int full; /* indicates that we cannot allocate any more 685 int full; /* indicates that we cannot allocate any more
687 chunks for this space */ 686 chunks for this space */
688 int force_alloc; /* set if we need to force a chunk alloc for 687 int force_alloc; /* set if we need to force a chunk alloc for
689 this space */ 688 this space */
689 int force_delalloc; /* make people start doing filemap_flush until
690 we're under a threshold */
690 691
691 struct list_head list; 692 struct list_head list;
692 693
@@ -695,6 +696,9 @@ struct btrfs_space_info {
695 spinlock_t lock; 696 spinlock_t lock;
696 struct rw_semaphore groups_sem; 697 struct rw_semaphore groups_sem;
697 atomic_t caching_threads; 698 atomic_t caching_threads;
699
700 int allocating_chunk;
701 wait_queue_head_t wait;
698}; 702};
699 703
700/* 704/*
@@ -2022,7 +2026,12 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
2022void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); 2026void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
2023void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 2027void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
2024 2028
2025int btrfs_check_metadata_free_space(struct btrfs_root *root); 2029int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items);
2030int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items);
2031int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2032 struct inode *inode, int num_items);
2033int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
2034 struct inode *inode, int num_items);
2026int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 2035int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
2027 u64 bytes); 2036 u64 bytes);
2028void btrfs_free_reserved_data_space(struct btrfs_root *root, 2037void btrfs_free_reserved_data_space(struct btrfs_root *root,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f1e905f7e701..ece8d1e26b5e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1629,7 +1629,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1629 fs_info->sb = sb; 1629 fs_info->sb = sb;
1630 fs_info->max_extent = (u64)-1; 1630 fs_info->max_extent = (u64)-1;
1631 fs_info->max_inline = 8192 * 1024; 1631 fs_info->max_inline = 8192 * 1024;
1632 fs_info->metadata_ratio = 8; 1632 fs_info->metadata_ratio = 0;
1633 1633
1634 fs_info->thread_pool_size = min_t(unsigned long, 1634 fs_info->thread_pool_size = min_t(unsigned long,
1635 num_online_cpus() + 2, 8); 1635 num_online_cpus() + 2, 8);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 90d314eeff6d..a4b2b03cd682 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -68,6 +68,8 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
68 struct extent_buffer **must_clean); 68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level, 69static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key); 70 struct btrfs_key *key);
71static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
72 int dump_block_groups);
71 73
72static noinline int 74static noinline int
73block_group_cache_done(struct btrfs_block_group_cache *cache) 75block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2764,67 +2766,346 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
2764 alloc_target); 2766 alloc_target);
2765} 2767}
2766 2768
2769static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2770{
2771 u64 num_bytes;
2772 int level;
2773
2774 level = BTRFS_MAX_LEVEL - 2;
2775 /*
2776 * NOTE: these calculations are absolutely the worst possible case.
2777 * This assumes that _every_ item we insert will require a new leaf, and
2778 * that the tree has grown to its maximum level size.
2779 */
2780
2781 /*
2782 * for every item we insert we could insert both an extent item and a
2783 * extent ref item. Then for ever item we insert, we will need to cow
2784 * both the original leaf, plus the leaf to the left and right of it.
2785 *
2786 * Unless we are talking about the extent root, then we just want the
2787 * number of items * 2, since we just need the extent item plus its ref.
2788 */
2789 if (root == root->fs_info->extent_root)
2790 num_bytes = num_items * 2;
2791 else
2792 num_bytes = (num_items + (2 * num_items)) * 3;
2793
2794 /*
2795 * num_bytes is total number of leaves we could need times the leaf
2796 * size, and then for every leaf we could end up cow'ing 2 nodes per
2797 * level, down to the leaf level.
2798 */
2799 num_bytes = (num_bytes * root->leafsize) +
2800 (num_bytes * (level * 2)) * root->nodesize;
2801
2802 return num_bytes;
2803}
2804
2767/* 2805/*
2768 * for now this just makes sure we have at least 5% of our metadata space free 2806 * Unreserve metadata space for delalloc. If we have less reserved credits than
2769 * for use. 2807 * we have extents, this function does nothing.
2770 */ 2808 */
2771int btrfs_check_metadata_free_space(struct btrfs_root *root) 2809int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2810 struct inode *inode, int num_items)
2772{ 2811{
2773 struct btrfs_fs_info *info = root->fs_info; 2812 struct btrfs_fs_info *info = root->fs_info;
2774 struct btrfs_space_info *meta_sinfo; 2813 struct btrfs_space_info *meta_sinfo;
2775 u64 alloc_target, thresh; 2814 u64 num_bytes;
2776 int committed = 0, ret; 2815 u64 alloc_target;
2816 bool bug = false;
2777 2817
2778 /* get the space info for where the metadata will live */ 2818 /* get the space info for where the metadata will live */
2779 alloc_target = btrfs_get_alloc_profile(root, 0); 2819 alloc_target = btrfs_get_alloc_profile(root, 0);
2780 meta_sinfo = __find_space_info(info, alloc_target); 2820 meta_sinfo = __find_space_info(info, alloc_target);
2781 if (!meta_sinfo)
2782 goto alloc;
2783 2821
2784again: 2822 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2823 num_items);
2824
2785 spin_lock(&meta_sinfo->lock); 2825 spin_lock(&meta_sinfo->lock);
2786 if (!meta_sinfo->full) 2826 if (BTRFS_I(inode)->delalloc_reserved_extents <=
2787 thresh = meta_sinfo->total_bytes * 80; 2827 BTRFS_I(inode)->delalloc_extents) {
2788 else 2828 spin_unlock(&meta_sinfo->lock);
2789 thresh = meta_sinfo->total_bytes * 95; 2829 return 0;
2830 }
2831
2832 BTRFS_I(inode)->delalloc_reserved_extents--;
2833 BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0);
2834
2835 if (meta_sinfo->bytes_delalloc < num_bytes) {
2836 bug = true;
2837 meta_sinfo->bytes_delalloc = 0;
2838 } else {
2839 meta_sinfo->bytes_delalloc -= num_bytes;
2840 }
2841 spin_unlock(&meta_sinfo->lock);
2790 2842
2843 BUG_ON(bug);
2844
2845 return 0;
2846}
2847
2848static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2849{
2850 u64 thresh;
2851
2852 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2853 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2854 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2855 meta_sinfo->bytes_may_use;
2856
2857 thresh = meta_sinfo->total_bytes - thresh;
2858 thresh *= 80;
2791 do_div(thresh, 100); 2859 do_div(thresh, 100);
2860 if (thresh <= meta_sinfo->bytes_delalloc)
2861 meta_sinfo->force_delalloc = 1;
2862 else
2863 meta_sinfo->force_delalloc = 0;
2864}
2792 2865
2793 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2866static int maybe_allocate_chunk(struct btrfs_root *root,
2794 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2867 struct btrfs_space_info *info)
2795 meta_sinfo->bytes_super > thresh) { 2868{
2796 struct btrfs_trans_handle *trans; 2869 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
2797 if (!meta_sinfo->full) { 2870 struct btrfs_trans_handle *trans;
2798 meta_sinfo->force_alloc = 1; 2871 bool wait = false;
2872 int ret = 0;
2873 u64 min_metadata;
2874 u64 free_space;
2875
2876 free_space = btrfs_super_total_bytes(disk_super);
2877 /*
2878 * we allow the metadata to grow to a max of either 5gb or 5% of the
2879 * space in the volume.
2880 */
2881 min_metadata = min((u64)5 * 1024 * 1024 * 1024,
2882 div64_u64(free_space * 5, 100));
2883 if (info->total_bytes >= min_metadata) {
2884 spin_unlock(&info->lock);
2885 return 0;
2886 }
2887
2888 if (info->full) {
2889 spin_unlock(&info->lock);
2890 return 0;
2891 }
2892
2893 if (!info->allocating_chunk) {
2894 info->force_alloc = 1;
2895 info->allocating_chunk = 1;
2896 init_waitqueue_head(&info->wait);
2897 } else {
2898 wait = true;
2899 }
2900
2901 spin_unlock(&info->lock);
2902
2903 if (wait) {
2904 wait_event(info->wait,
2905 !info->allocating_chunk);
2906 return 1;
2907 }
2908
2909 trans = btrfs_start_transaction(root, 1);
2910 if (!trans) {
2911 ret = -ENOMEM;
2912 goto out;
2913 }
2914
2915 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2916 4096 + 2 * 1024 * 1024,
2917 info->flags, 0);
2918 btrfs_end_transaction(trans, root);
2919 if (ret)
2920 goto out;
2921out:
2922 spin_lock(&info->lock);
2923 info->allocating_chunk = 0;
2924 spin_unlock(&info->lock);
2925 wake_up(&info->wait);
2926
2927 if (ret)
2928 return 0;
2929 return 1;
2930}
2931
2932/*
2933 * Reserve metadata space for delalloc.
2934 */
2935int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
2936 struct inode *inode, int num_items)
2937{
2938 struct btrfs_fs_info *info = root->fs_info;
2939 struct btrfs_space_info *meta_sinfo;
2940 u64 num_bytes;
2941 u64 used;
2942 u64 alloc_target;
2943 int flushed = 0;
2944 int force_delalloc;
2945
2946 /* get the space info for where the metadata will live */
2947 alloc_target = btrfs_get_alloc_profile(root, 0);
2948 meta_sinfo = __find_space_info(info, alloc_target);
2949
2950 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2951 num_items);
2952again:
2953 spin_lock(&meta_sinfo->lock);
2954
2955 force_delalloc = meta_sinfo->force_delalloc;
2956
2957 if (unlikely(!meta_sinfo->bytes_root))
2958 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
2959
2960 if (!flushed)
2961 meta_sinfo->bytes_delalloc += num_bytes;
2962
2963 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2964 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2965 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2966 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
2967
2968 if (used > meta_sinfo->total_bytes) {
2969 flushed++;
2970
2971 if (flushed == 1) {
2972 if (maybe_allocate_chunk(root, meta_sinfo))
2973 goto again;
2974 flushed++;
2975 } else {
2799 spin_unlock(&meta_sinfo->lock); 2976 spin_unlock(&meta_sinfo->lock);
2800alloc: 2977 }
2801 trans = btrfs_start_transaction(root, 1);
2802 if (!trans)
2803 return -ENOMEM;
2804 2978
2805 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2979 if (flushed == 2) {
2806 2 * 1024 * 1024, alloc_target, 0); 2980 filemap_flush(inode->i_mapping);
2807 btrfs_end_transaction(trans, root); 2981 goto again;
2808 if (!meta_sinfo) { 2982 } else if (flushed == 3) {
2809 meta_sinfo = __find_space_info(info, 2983 btrfs_start_delalloc_inodes(root);
2810 alloc_target); 2984 btrfs_wait_ordered_extents(root, 0);
2811 }
2812 goto again; 2985 goto again;
2813 } 2986 }
2987 spin_lock(&meta_sinfo->lock);
2988 meta_sinfo->bytes_delalloc -= num_bytes;
2814 spin_unlock(&meta_sinfo->lock); 2989 spin_unlock(&meta_sinfo->lock);
2990 printk(KERN_ERR "enospc, has %d, reserved %d\n",
2991 BTRFS_I(inode)->delalloc_extents,
2992 BTRFS_I(inode)->delalloc_reserved_extents);
2993 dump_space_info(meta_sinfo, 0, 0);
2994 return -ENOSPC;
2995 }
2815 2996
2816 if (!committed) { 2997 BTRFS_I(inode)->delalloc_reserved_extents++;
2817 committed = 1; 2998 check_force_delalloc(meta_sinfo);
2818 trans = btrfs_join_transaction(root, 1); 2999 spin_unlock(&meta_sinfo->lock);
2819 if (!trans) 3000
2820 return -ENOMEM; 3001 if (!flushed && force_delalloc)
2821 ret = btrfs_commit_transaction(trans, root); 3002 filemap_flush(inode->i_mapping);
2822 if (ret) 3003
2823 return ret; 3004 return 0;
3005}
3006
3007/*
3008 * unreserve num_items number of items worth of metadata space. This needs to
3009 * be paired with btrfs_reserve_metadata_space.
3010 *
3011 * NOTE: if you have the option, run this _AFTER_ you do a
3012 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
3013 * oprations which will result in more used metadata, so we want to make sure we
3014 * can do that without issue.
3015 */
3016int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
3017{
3018 struct btrfs_fs_info *info = root->fs_info;
3019 struct btrfs_space_info *meta_sinfo;
3020 u64 num_bytes;
3021 u64 alloc_target;
3022 bool bug = false;
3023
3024 /* get the space info for where the metadata will live */
3025 alloc_target = btrfs_get_alloc_profile(root, 0);
3026 meta_sinfo = __find_space_info(info, alloc_target);
3027
3028 num_bytes = calculate_bytes_needed(root, num_items);
3029
3030 spin_lock(&meta_sinfo->lock);
3031 if (meta_sinfo->bytes_may_use < num_bytes) {
3032 bug = true;
3033 meta_sinfo->bytes_may_use = 0;
3034 } else {
3035 meta_sinfo->bytes_may_use -= num_bytes;
3036 }
3037 spin_unlock(&meta_sinfo->lock);
3038
3039 BUG_ON(bug);
3040
3041 return 0;
3042}
3043
3044/*
3045 * Reserve some metadata space for use. We'll calculate the worste case number
3046 * of bytes that would be needed to modify num_items number of items. If we
3047 * have space, fantastic, if not, you get -ENOSPC. Please call
3048 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
3049 * items you reserved, since whatever metadata you needed should have already
3050 * been allocated.
3051 *
3052 * This will commit the transaction to make more space if we don't have enough
3053 * metadata space. THe only time we don't do this is if we're reserving space
3054 * inside of a transaction, then we will just return -ENOSPC and it is the
3055 * callers responsibility to handle it properly.
3056 */
3057int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
3058{
3059 struct btrfs_fs_info *info = root->fs_info;
3060 struct btrfs_space_info *meta_sinfo;
3061 u64 num_bytes;
3062 u64 used;
3063 u64 alloc_target;
3064 int retries = 0;
3065
3066 /* get the space info for where the metadata will live */
3067 alloc_target = btrfs_get_alloc_profile(root, 0);
3068 meta_sinfo = __find_space_info(info, alloc_target);
3069
3070 num_bytes = calculate_bytes_needed(root, num_items);
3071again:
3072 spin_lock(&meta_sinfo->lock);
3073
3074 if (unlikely(!meta_sinfo->bytes_root))
3075 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3076
3077 if (!retries)
3078 meta_sinfo->bytes_may_use += num_bytes;
3079
3080 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3081 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3082 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3083 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3084
3085 if (used > meta_sinfo->total_bytes) {
3086 retries++;
3087 if (retries == 1) {
3088 if (maybe_allocate_chunk(root, meta_sinfo))
3089 goto again;
3090 retries++;
3091 } else {
3092 spin_unlock(&meta_sinfo->lock);
3093 }
3094
3095 if (retries == 2) {
3096 btrfs_start_delalloc_inodes(root);
3097 btrfs_wait_ordered_extents(root, 0);
2824 goto again; 3098 goto again;
2825 } 3099 }
3100 spin_lock(&meta_sinfo->lock);
3101 meta_sinfo->bytes_may_use -= num_bytes;
3102 spin_unlock(&meta_sinfo->lock);
3103
3104 dump_space_info(meta_sinfo, 0, 0);
2826 return -ENOSPC; 3105 return -ENOSPC;
2827 } 3106 }
3107
3108 check_force_delalloc(meta_sinfo);
2828 spin_unlock(&meta_sinfo->lock); 3109 spin_unlock(&meta_sinfo->lock);
2829 3110
2830 return 0; 3111 return 0;
@@ -2915,7 +3196,7 @@ alloc:
2915 BTRFS_I(inode)->reserved_bytes += bytes; 3196 BTRFS_I(inode)->reserved_bytes += bytes;
2916 spin_unlock(&data_sinfo->lock); 3197 spin_unlock(&data_sinfo->lock);
2917 3198
2918 return btrfs_check_metadata_free_space(root); 3199 return 0;
2919} 3200}
2920 3201
2921/* 3202/*
@@ -3014,17 +3295,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3014 BUG_ON(!space_info); 3295 BUG_ON(!space_info);
3015 3296
3016 spin_lock(&space_info->lock); 3297 spin_lock(&space_info->lock);
3017 if (space_info->force_alloc) { 3298 if (space_info->force_alloc)
3018 force = 1; 3299 force = 1;
3019 space_info->force_alloc = 0;
3020 }
3021 if (space_info->full) { 3300 if (space_info->full) {
3022 spin_unlock(&space_info->lock); 3301 spin_unlock(&space_info->lock);
3023 goto out; 3302 goto out;
3024 } 3303 }
3025 3304
3026 thresh = space_info->total_bytes - space_info->bytes_readonly; 3305 thresh = space_info->total_bytes - space_info->bytes_readonly;
3027 thresh = div_factor(thresh, 6); 3306 thresh = div_factor(thresh, 8);
3028 if (!force && 3307 if (!force &&
3029 (space_info->bytes_used + space_info->bytes_pinned + 3308 (space_info->bytes_used + space_info->bytes_pinned +
3030 space_info->bytes_reserved + alloc_bytes) < thresh) { 3309 space_info->bytes_reserved + alloc_bytes) < thresh) {
@@ -3038,7 +3317,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3038 * we keep a reasonable number of metadata chunks allocated in the 3317 * we keep a reasonable number of metadata chunks allocated in the
3039 * FS as well. 3318 * FS as well.
3040 */ 3319 */
3041 if (flags & BTRFS_BLOCK_GROUP_DATA) { 3320 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3042 fs_info->data_chunk_allocations++; 3321 fs_info->data_chunk_allocations++;
3043 if (!(fs_info->data_chunk_allocations % 3322 if (!(fs_info->data_chunk_allocations %
3044 fs_info->metadata_ratio)) 3323 fs_info->metadata_ratio))
@@ -3046,8 +3325,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3046 } 3325 }
3047 3326
3048 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3327 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3328 spin_lock(&space_info->lock);
3049 if (ret) 3329 if (ret)
3050 space_info->full = 1; 3330 space_info->full = 1;
3331 space_info->force_alloc = 0;
3332 spin_unlock(&space_info->lock);
3051out: 3333out:
3052 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3334 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3053 return ret; 3335 return ret;
@@ -4062,21 +4344,32 @@ loop:
4062 return ret; 4344 return ret;
4063} 4345}
4064 4346
4065static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 4347static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4348 int dump_block_groups)
4066{ 4349{
4067 struct btrfs_block_group_cache *cache; 4350 struct btrfs_block_group_cache *cache;
4068 4351
4352 spin_lock(&info->lock);
4069 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4353 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
4070 (unsigned long long)(info->total_bytes - info->bytes_used - 4354 (unsigned long long)(info->total_bytes - info->bytes_used -
4071 info->bytes_pinned - info->bytes_reserved), 4355 info->bytes_pinned - info->bytes_reserved -
4356 info->bytes_super),
4072 (info->full) ? "" : "not "); 4357 (info->full) ? "" : "not ");
4073 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4358 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
4074 " may_use=%llu, used=%llu\n", 4359 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu"
4360 "\n",
4075 (unsigned long long)info->total_bytes, 4361 (unsigned long long)info->total_bytes,
4076 (unsigned long long)info->bytes_pinned, 4362 (unsigned long long)info->bytes_pinned,
4077 (unsigned long long)info->bytes_delalloc, 4363 (unsigned long long)info->bytes_delalloc,
4078 (unsigned long long)info->bytes_may_use, 4364 (unsigned long long)info->bytes_may_use,
4079 (unsigned long long)info->bytes_used); 4365 (unsigned long long)info->bytes_used,
4366 (unsigned long long)info->bytes_root,
4367 (unsigned long long)info->bytes_super,
4368 (unsigned long long)info->bytes_reserved);
4369 spin_unlock(&info->lock);
4370
4371 if (!dump_block_groups)
4372 return;
4080 4373
4081 down_read(&info->groups_sem); 4374 down_read(&info->groups_sem);
4082 list_for_each_entry(cache, &info->block_groups, list) { 4375 list_for_each_entry(cache, &info->block_groups, list) {
@@ -4144,7 +4437,7 @@ again:
4144 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4437 printk(KERN_ERR "btrfs allocation failed flags %llu, "
4145 "wanted %llu\n", (unsigned long long)data, 4438 "wanted %llu\n", (unsigned long long)data,
4146 (unsigned long long)num_bytes); 4439 (unsigned long long)num_bytes);
4147 dump_space_info(sinfo, num_bytes); 4440 dump_space_info(sinfo, num_bytes, 1);
4148 } 4441 }
4149 4442
4150 return ret; 4443 return ret;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0cb88f8146ea..de1793ba004a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -280,6 +280,14 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
280 return NULL; 280 return NULL;
281} 281}
282 282
283static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
284 struct extent_state *other)
285{
286 if (tree->ops && tree->ops->merge_extent_hook)
287 tree->ops->merge_extent_hook(tree->mapping->host, new,
288 other);
289}
290
283/* 291/*
284 * utility function to look for merge candidates inside a given range. 292 * utility function to look for merge candidates inside a given range.
285 * Any extents with matching state are merged together into a single 293 * Any extents with matching state are merged together into a single
@@ -303,6 +311,7 @@ static int merge_state(struct extent_io_tree *tree,
303 other = rb_entry(other_node, struct extent_state, rb_node); 311 other = rb_entry(other_node, struct extent_state, rb_node);
304 if (other->end == state->start - 1 && 312 if (other->end == state->start - 1 &&
305 other->state == state->state) { 313 other->state == state->state) {
314 merge_cb(tree, state, other);
306 state->start = other->start; 315 state->start = other->start;
307 other->tree = NULL; 316 other->tree = NULL;
308 rb_erase(&other->rb_node, &tree->state); 317 rb_erase(&other->rb_node, &tree->state);
@@ -314,33 +323,37 @@ static int merge_state(struct extent_io_tree *tree,
314 other = rb_entry(other_node, struct extent_state, rb_node); 323 other = rb_entry(other_node, struct extent_state, rb_node);
315 if (other->start == state->end + 1 && 324 if (other->start == state->end + 1 &&
316 other->state == state->state) { 325 other->state == state->state) {
326 merge_cb(tree, state, other);
317 other->start = state->start; 327 other->start = state->start;
318 state->tree = NULL; 328 state->tree = NULL;
319 rb_erase(&state->rb_node, &tree->state); 329 rb_erase(&state->rb_node, &tree->state);
320 free_extent_state(state); 330 free_extent_state(state);
331 state = NULL;
321 } 332 }
322 } 333 }
334
323 return 0; 335 return 0;
324} 336}
325 337
326static void set_state_cb(struct extent_io_tree *tree, 338static int set_state_cb(struct extent_io_tree *tree,
327 struct extent_state *state, 339 struct extent_state *state,
328 unsigned long bits) 340 unsigned long bits)
329{ 341{
330 if (tree->ops && tree->ops->set_bit_hook) { 342 if (tree->ops && tree->ops->set_bit_hook) {
331 tree->ops->set_bit_hook(tree->mapping->host, state->start, 343 return tree->ops->set_bit_hook(tree->mapping->host,
332 state->end, state->state, bits); 344 state->start, state->end,
345 state->state, bits);
333 } 346 }
347
348 return 0;
334} 349}
335 350
336static void clear_state_cb(struct extent_io_tree *tree, 351static void clear_state_cb(struct extent_io_tree *tree,
337 struct extent_state *state, 352 struct extent_state *state,
338 unsigned long bits) 353 unsigned long bits)
339{ 354{
340 if (tree->ops && tree->ops->clear_bit_hook) { 355 if (tree->ops && tree->ops->clear_bit_hook)
341 tree->ops->clear_bit_hook(tree->mapping->host, state->start, 356 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
342 state->end, state->state, bits);
343 }
344} 357}
345 358
346/* 359/*
@@ -358,6 +371,7 @@ static int insert_state(struct extent_io_tree *tree,
358 int bits) 371 int bits)
359{ 372{
360 struct rb_node *node; 373 struct rb_node *node;
374 int ret;
361 375
362 if (end < start) { 376 if (end < start) {
363 printk(KERN_ERR "btrfs end < start %llu %llu\n", 377 printk(KERN_ERR "btrfs end < start %llu %llu\n",
@@ -365,11 +379,14 @@ static int insert_state(struct extent_io_tree *tree,
365 (unsigned long long)start); 379 (unsigned long long)start);
366 WARN_ON(1); 380 WARN_ON(1);
367 } 381 }
368 if (bits & EXTENT_DIRTY)
369 tree->dirty_bytes += end - start + 1;
370 state->start = start; 382 state->start = start;
371 state->end = end; 383 state->end = end;
372 set_state_cb(tree, state, bits); 384 ret = set_state_cb(tree, state, bits);
385 if (ret)
386 return ret;
387
388 if (bits & EXTENT_DIRTY)
389 tree->dirty_bytes += end - start + 1;
373 state->state |= bits; 390 state->state |= bits;
374 node = tree_insert(&tree->state, end, &state->rb_node); 391 node = tree_insert(&tree->state, end, &state->rb_node);
375 if (node) { 392 if (node) {
@@ -387,6 +404,15 @@ static int insert_state(struct extent_io_tree *tree,
387 return 0; 404 return 0;
388} 405}
389 406
407static int split_cb(struct extent_io_tree *tree, struct extent_state *orig,
408 u64 split)
409{
410 if (tree->ops && tree->ops->split_extent_hook)
411 return tree->ops->split_extent_hook(tree->mapping->host,
412 orig, split);
413 return 0;
414}
415
390/* 416/*
391 * split a given extent state struct in two, inserting the preallocated 417 * split a given extent state struct in two, inserting the preallocated
392 * struct 'prealloc' as the newly created second half. 'split' indicates an 418 * struct 'prealloc' as the newly created second half. 'split' indicates an
@@ -405,6 +431,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
405 struct extent_state *prealloc, u64 split) 431 struct extent_state *prealloc, u64 split)
406{ 432{
407 struct rb_node *node; 433 struct rb_node *node;
434
435 split_cb(tree, orig, split);
436
408 prealloc->start = orig->start; 437 prealloc->start = orig->start;
409 prealloc->end = split - 1; 438 prealloc->end = split - 1;
410 prealloc->state = orig->state; 439 prealloc->state = orig->state;
@@ -542,8 +571,8 @@ hit_next:
542 if (err) 571 if (err)
543 goto out; 572 goto out;
544 if (state->end <= end) { 573 if (state->end <= end) {
545 set |= clear_state_bit(tree, state, bits, 574 set |= clear_state_bit(tree, state, bits, wake,
546 wake, delete); 575 delete);
547 if (last_end == (u64)-1) 576 if (last_end == (u64)-1)
548 goto out; 577 goto out;
549 start = last_end + 1; 578 start = last_end + 1;
@@ -561,12 +590,11 @@ hit_next:
561 prealloc = alloc_extent_state(GFP_ATOMIC); 590 prealloc = alloc_extent_state(GFP_ATOMIC);
562 err = split_state(tree, state, prealloc, end + 1); 591 err = split_state(tree, state, prealloc, end + 1);
563 BUG_ON(err == -EEXIST); 592 BUG_ON(err == -EEXIST);
564
565 if (wake) 593 if (wake)
566 wake_up(&state->wq); 594 wake_up(&state->wq);
567 595
568 set |= clear_state_bit(tree, prealloc, bits, 596 set |= clear_state_bit(tree, prealloc, bits, wake, delete);
569 wake, delete); 597
570 prealloc = NULL; 598 prealloc = NULL;
571 goto out; 599 goto out;
572 } 600 }
@@ -667,16 +695,23 @@ out:
667 return 0; 695 return 0;
668} 696}
669 697
670static void set_state_bits(struct extent_io_tree *tree, 698static int set_state_bits(struct extent_io_tree *tree,
671 struct extent_state *state, 699 struct extent_state *state,
672 int bits) 700 int bits)
673{ 701{
702 int ret;
703
704 ret = set_state_cb(tree, state, bits);
705 if (ret)
706 return ret;
707
674 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 708 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
675 u64 range = state->end - state->start + 1; 709 u64 range = state->end - state->start + 1;
676 tree->dirty_bytes += range; 710 tree->dirty_bytes += range;
677 } 711 }
678 set_state_cb(tree, state, bits);
679 state->state |= bits; 712 state->state |= bits;
713
714 return 0;
680} 715}
681 716
682static void cache_state(struct extent_state *state, 717static void cache_state(struct extent_state *state,
@@ -758,7 +793,10 @@ hit_next:
758 goto out; 793 goto out;
759 } 794 }
760 795
761 set_state_bits(tree, state, bits); 796 err = set_state_bits(tree, state, bits);
797 if (err)
798 goto out;
799
762 cache_state(state, cached_state); 800 cache_state(state, cached_state);
763 merge_state(tree, state); 801 merge_state(tree, state);
764 if (last_end == (u64)-1) 802 if (last_end == (u64)-1)
@@ -805,7 +843,9 @@ hit_next:
805 if (err) 843 if (err)
806 goto out; 844 goto out;
807 if (state->end <= end) { 845 if (state->end <= end) {
808 set_state_bits(tree, state, bits); 846 err = set_state_bits(tree, state, bits);
847 if (err)
848 goto out;
809 cache_state(state, cached_state); 849 cache_state(state, cached_state);
810 merge_state(tree, state); 850 merge_state(tree, state);
811 if (last_end == (u64)-1) 851 if (last_end == (u64)-1)
@@ -829,11 +869,13 @@ hit_next:
829 this_end = last_start - 1; 869 this_end = last_start - 1;
830 err = insert_state(tree, prealloc, start, this_end, 870 err = insert_state(tree, prealloc, start, this_end,
831 bits); 871 bits);
832 cache_state(prealloc, cached_state);
833 prealloc = NULL;
834 BUG_ON(err == -EEXIST); 872 BUG_ON(err == -EEXIST);
835 if (err) 873 if (err) {
874 prealloc = NULL;
836 goto out; 875 goto out;
876 }
877 cache_state(prealloc, cached_state);
878 prealloc = NULL;
837 start = this_end + 1; 879 start = this_end + 1;
838 goto search_again; 880 goto search_again;
839 } 881 }
@@ -852,7 +894,11 @@ hit_next:
852 err = split_state(tree, state, prealloc, end + 1); 894 err = split_state(tree, state, prealloc, end + 1);
853 BUG_ON(err == -EEXIST); 895 BUG_ON(err == -EEXIST);
854 896
855 set_state_bits(tree, prealloc, bits); 897 err = set_state_bits(tree, prealloc, bits);
898 if (err) {
899 prealloc = NULL;
900 goto out;
901 }
856 cache_state(prealloc, cached_state); 902 cache_state(prealloc, cached_state);
857 merge_state(tree, prealloc); 903 merge_state(tree, prealloc);
858 prealloc = NULL; 904 prealloc = NULL;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 14ed16fd862d..4794ec891fed 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -60,8 +60,13 @@ struct extent_io_ops {
60 struct extent_state *state, int uptodate); 60 struct extent_state *state, int uptodate);
61 int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, 61 int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
62 unsigned long old, unsigned long bits); 62 unsigned long old, unsigned long bits);
63 int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, 63 int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
64 unsigned long old, unsigned long bits); 64 unsigned long bits);
65 int (*merge_extent_hook)(struct inode *inode,
66 struct extent_state *new,
67 struct extent_state *other);
68 int (*split_extent_hook)(struct inode *inode,
69 struct extent_state *orig, u64 split);
65 int (*write_cache_pages_lock_hook)(struct page *page); 70 int (*write_cache_pages_lock_hook)(struct page *page);
66}; 71};
67 72
@@ -79,10 +84,14 @@ struct extent_state {
79 u64 start; 84 u64 start;
80 u64 end; /* inclusive */ 85 u64 end; /* inclusive */
81 struct rb_node rb_node; 86 struct rb_node rb_node;
87
88 /* ADD NEW ELEMENTS AFTER THIS */
82 struct extent_io_tree *tree; 89 struct extent_io_tree *tree;
83 wait_queue_head_t wq; 90 wait_queue_head_t wq;
84 atomic_t refs; 91 atomic_t refs;
85 unsigned long state; 92 unsigned long state;
93 u64 split_start;
94 u64 split_end;
86 95
87 /* for use by the FS */ 96 /* for use by the FS */
88 u64 private; 97 u64 private;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 571ad3c13b47..1be96ba6f6bb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -123,7 +123,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
123 root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 123 root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
124 124
125 end_of_last_block = start_pos + num_bytes - 1; 125 end_of_last_block = start_pos + num_bytes - 1;
126 btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); 126 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
127 if (err)
128 return err;
129
127 for (i = 0; i < num_pages; i++) { 130 for (i = 0; i < num_pages; i++) {
128 struct page *p = pages[i]; 131 struct page *p = pages[i];
129 SetPageUptodate(p); 132 SetPageUptodate(p);
@@ -927,6 +930,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
927 err = file_remove_suid(file); 930 err = file_remove_suid(file);
928 if (err) 931 if (err)
929 goto out_nolock; 932 goto out_nolock;
933
934 err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
935 if (err)
936 goto out_nolock;
937
930 file_update_time(file); 938 file_update_time(file);
931 939
932 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 940 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
@@ -1028,6 +1036,7 @@ out:
1028 mutex_unlock(&inode->i_mutex); 1036 mutex_unlock(&inode->i_mutex);
1029 if (ret) 1037 if (ret)
1030 err = ret; 1038 err = ret;
1039 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
1031 1040
1032out_nolock: 1041out_nolock:
1033 kfree(pages); 1042 kfree(pages);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 01c5f8b5a34e..3cc5677f5440 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1159,6 +1159,83 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1159 return ret; 1159 return ret;
1160} 1160}
1161 1161
1162static int btrfs_split_extent_hook(struct inode *inode,
1163 struct extent_state *orig, u64 split)
1164{
1165 struct btrfs_root *root = BTRFS_I(inode)->root;
1166 u64 size;
1167
1168 if (!(orig->state & EXTENT_DELALLOC))
1169 return 0;
1170
1171 size = orig->end - orig->start + 1;
1172 if (size > root->fs_info->max_extent) {
1173 u64 num_extents;
1174 u64 new_size;
1175
1176 new_size = orig->end - split + 1;
1177 num_extents = div64_u64(size + root->fs_info->max_extent - 1,
1178 root->fs_info->max_extent);
1179
1180 /*
1181 * if we break a large extent up then leave delalloc_extents be,
1182 * since we've already accounted for the large extent.
1183 */
1184 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1185 root->fs_info->max_extent) < num_extents)
1186 return 0;
1187 }
1188
1189 BTRFS_I(inode)->delalloc_extents++;
1190
1191 return 0;
1192}
1193
1194/*
1195 * extent_io.c merge_extent_hook, used to track merged delayed allocation
1196 * extents so we can keep track of new extents that are just merged onto old
1197 * extents, such as when we are doing sequential writes, so we can properly
1198 * account for the metadata space we'll need.
1199 */
1200static int btrfs_merge_extent_hook(struct inode *inode,
1201 struct extent_state *new,
1202 struct extent_state *other)
1203{
1204 struct btrfs_root *root = BTRFS_I(inode)->root;
1205 u64 new_size, old_size;
1206 u64 num_extents;
1207
1208 /* not delalloc, ignore it */
1209 if (!(other->state & EXTENT_DELALLOC))
1210 return 0;
1211
1212 old_size = other->end - other->start + 1;
1213 if (new->start < other->start)
1214 new_size = other->end - new->start + 1;
1215 else
1216 new_size = new->end - other->start + 1;
1217
1218 /* we're not bigger than the max, unreserve the space and go */
1219 if (new_size <= root->fs_info->max_extent) {
1220 BTRFS_I(inode)->delalloc_extents--;
1221 return 0;
1222 }
1223
1224 /*
1225 * If we grew by another max_extent, just return, we want to keep that
1226 * reserved amount.
1227 */
1228 num_extents = div64_u64(old_size + root->fs_info->max_extent - 1,
1229 root->fs_info->max_extent);
1230 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1231 root->fs_info->max_extent) > num_extents)
1232 return 0;
1233
1234 BTRFS_I(inode)->delalloc_extents--;
1235
1236 return 0;
1237}
1238
1162/* 1239/*
1163 * extent_io.c set_bit_hook, used to track delayed allocation 1240 * extent_io.c set_bit_hook, used to track delayed allocation
1164 * bytes in this file, and to maintain the list of inodes that 1241 * bytes in this file, and to maintain the list of inodes that
@@ -1167,6 +1244,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1167static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, 1244static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1168 unsigned long old, unsigned long bits) 1245 unsigned long old, unsigned long bits)
1169{ 1246{
1247
1170 /* 1248 /*
1171 * set_bit and clear bit hooks normally require _irqsave/restore 1249 * set_bit and clear bit hooks normally require _irqsave/restore
1172 * but in this case, we are only testeing for the DELALLOC 1250 * but in this case, we are only testeing for the DELALLOC
@@ -1174,6 +1252,8 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1174 */ 1252 */
1175 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1253 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
1176 struct btrfs_root *root = BTRFS_I(inode)->root; 1254 struct btrfs_root *root = BTRFS_I(inode)->root;
1255
1256 BTRFS_I(inode)->delalloc_extents++;
1177 btrfs_delalloc_reserve_space(root, inode, end - start + 1); 1257 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1178 spin_lock(&root->fs_info->delalloc_lock); 1258 spin_lock(&root->fs_info->delalloc_lock);
1179 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1259 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
@@ -1190,22 +1270,27 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1190/* 1270/*
1191 * extent_io.c clear_bit_hook, see set_bit_hook for why 1271 * extent_io.c clear_bit_hook, see set_bit_hook for why
1192 */ 1272 */
1193static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, 1273static int btrfs_clear_bit_hook(struct inode *inode,
1194 unsigned long old, unsigned long bits) 1274 struct extent_state *state, unsigned long bits)
1195{ 1275{
1196 /* 1276 /*
1197 * set_bit and clear bit hooks normally require _irqsave/restore 1277 * set_bit and clear bit hooks normally require _irqsave/restore
1198 * but in this case, we are only testeing for the DELALLOC 1278 * but in this case, we are only testeing for the DELALLOC
1199 * bit, which is only set or cleared with irqs on 1279 * bit, which is only set or cleared with irqs on
1200 */ 1280 */
1201 if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1281 if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
1202 struct btrfs_root *root = BTRFS_I(inode)->root; 1282 struct btrfs_root *root = BTRFS_I(inode)->root;
1203 1283
1284 BTRFS_I(inode)->delalloc_extents--;
1285 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
1286
1204 spin_lock(&root->fs_info->delalloc_lock); 1287 spin_lock(&root->fs_info->delalloc_lock);
1205 if (end - start + 1 > root->fs_info->delalloc_bytes) { 1288 if (state->end - state->start + 1 >
1289 root->fs_info->delalloc_bytes) {
1206 printk(KERN_INFO "btrfs warning: delalloc account " 1290 printk(KERN_INFO "btrfs warning: delalloc account "
1207 "%llu %llu\n", 1291 "%llu %llu\n",
1208 (unsigned long long)end - start + 1, 1292 (unsigned long long)
1293 state->end - state->start + 1,
1209 (unsigned long long) 1294 (unsigned long long)
1210 root->fs_info->delalloc_bytes); 1295 root->fs_info->delalloc_bytes);
1211 btrfs_delalloc_free_space(root, inode, (u64)-1); 1296 btrfs_delalloc_free_space(root, inode, (u64)-1);
@@ -1213,9 +1298,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
1213 BTRFS_I(inode)->delalloc_bytes = 0; 1298 BTRFS_I(inode)->delalloc_bytes = 0;
1214 } else { 1299 } else {
1215 btrfs_delalloc_free_space(root, inode, 1300 btrfs_delalloc_free_space(root, inode,
1216 end - start + 1); 1301 state->end -
1217 root->fs_info->delalloc_bytes -= end - start + 1; 1302 state->start + 1);
1218 BTRFS_I(inode)->delalloc_bytes -= end - start + 1; 1303 root->fs_info->delalloc_bytes -= state->end -
1304 state->start + 1;
1305 BTRFS_I(inode)->delalloc_bytes -= state->end -
1306 state->start + 1;
1219 } 1307 }
1220 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1308 if (BTRFS_I(inode)->delalloc_bytes == 0 &&
1221 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1309 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
@@ -2950,7 +3038,12 @@ again:
2950 goto again; 3038 goto again;
2951 } 3039 }
2952 3040
2953 btrfs_set_extent_delalloc(inode, page_start, page_end); 3041 ret = btrfs_set_extent_delalloc(inode, page_start, page_end);
3042 if (ret) {
3043 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3044 goto out_unlock;
3045 }
3046
2954 ret = 0; 3047 ret = 0;
2955 if (offset != PAGE_CACHE_SIZE) { 3048 if (offset != PAGE_CACHE_SIZE) {
2956 kaddr = kmap(page); 3049 kaddr = kmap(page);
@@ -2981,15 +3074,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2981 u64 last_byte; 3074 u64 last_byte;
2982 u64 cur_offset; 3075 u64 cur_offset;
2983 u64 hole_size; 3076 u64 hole_size;
2984 int err; 3077 int err = 0;
2985 3078
2986 if (size <= hole_start) 3079 if (size <= hole_start)
2987 return 0; 3080 return 0;
2988 3081
2989 err = btrfs_check_metadata_free_space(root);
2990 if (err)
2991 return err;
2992
2993 btrfs_truncate_page(inode->i_mapping, inode->i_size); 3082 btrfs_truncate_page(inode->i_mapping, inode->i_size);
2994 3083
2995 while (1) { 3084 while (1) {
@@ -3024,12 +3113,18 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3024 cur_offset, &hint_byte, 1); 3113 cur_offset, &hint_byte, 1);
3025 if (err) 3114 if (err)
3026 break; 3115 break;
3116
3117 err = btrfs_reserve_metadata_space(root, 1);
3118 if (err)
3119 break;
3120
3027 err = btrfs_insert_file_extent(trans, root, 3121 err = btrfs_insert_file_extent(trans, root,
3028 inode->i_ino, cur_offset, 0, 3122 inode->i_ino, cur_offset, 0,
3029 0, hole_size, 0, hole_size, 3123 0, hole_size, 0, hole_size,
3030 0, 0, 0); 3124 0, 0, 0);
3031 btrfs_drop_extent_cache(inode, hole_start, 3125 btrfs_drop_extent_cache(inode, hole_start,
3032 last_byte - 1, 0); 3126 last_byte - 1, 0);
3127 btrfs_unreserve_metadata_space(root, 1);
3033 } 3128 }
3034 free_extent_map(em); 3129 free_extent_map(em);
3035 cur_offset = last_byte; 3130 cur_offset = last_byte;
@@ -3990,11 +4085,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
3990 if (!new_valid_dev(rdev)) 4085 if (!new_valid_dev(rdev))
3991 return -EINVAL; 4086 return -EINVAL;
3992 4087
3993 err = btrfs_check_metadata_free_space(root); 4088 /*
4089 * 2 for inode item and ref
4090 * 2 for dir items
4091 * 1 for xattr if selinux is on
4092 */
4093 err = btrfs_reserve_metadata_space(root, 5);
3994 if (err) 4094 if (err)
3995 goto fail; 4095 return err;
3996 4096
3997 trans = btrfs_start_transaction(root, 1); 4097 trans = btrfs_start_transaction(root, 1);
4098 if (!trans)
4099 goto fail;
3998 btrfs_set_trans_block_group(trans, dir); 4100 btrfs_set_trans_block_group(trans, dir);
3999 4101
4000 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); 4102 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
@@ -4032,6 +4134,7 @@ out_unlock:
4032 nr = trans->blocks_used; 4134 nr = trans->blocks_used;
4033 btrfs_end_transaction_throttle(trans, root); 4135 btrfs_end_transaction_throttle(trans, root);
4034fail: 4136fail:
4137 btrfs_unreserve_metadata_space(root, 5);
4035 if (drop_inode) { 4138 if (drop_inode) {
4036 inode_dec_link_count(inode); 4139 inode_dec_link_count(inode);
4037 iput(inode); 4140 iput(inode);
@@ -4052,10 +4155,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4052 u64 objectid; 4155 u64 objectid;
4053 u64 index = 0; 4156 u64 index = 0;
4054 4157
4055 err = btrfs_check_metadata_free_space(root); 4158 /*
4159 * 2 for inode item and ref
4160 * 2 for dir items
4161 * 1 for xattr if selinux is on
4162 */
4163 err = btrfs_reserve_metadata_space(root, 5);
4056 if (err) 4164 if (err)
4057 goto fail; 4165 return err;
4166
4058 trans = btrfs_start_transaction(root, 1); 4167 trans = btrfs_start_transaction(root, 1);
4168 if (!trans)
4169 goto fail;
4059 btrfs_set_trans_block_group(trans, dir); 4170 btrfs_set_trans_block_group(trans, dir);
4060 4171
4061 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); 4172 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
@@ -4096,6 +4207,7 @@ out_unlock:
4096 nr = trans->blocks_used; 4207 nr = trans->blocks_used;
4097 btrfs_end_transaction_throttle(trans, root); 4208 btrfs_end_transaction_throttle(trans, root);
4098fail: 4209fail:
4210 btrfs_unreserve_metadata_space(root, 5);
4099 if (drop_inode) { 4211 if (drop_inode) {
4100 inode_dec_link_count(inode); 4212 inode_dec_link_count(inode);
4101 iput(inode); 4213 iput(inode);
@@ -4118,10 +4230,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4118 if (inode->i_nlink == 0) 4230 if (inode->i_nlink == 0)
4119 return -ENOENT; 4231 return -ENOENT;
4120 4232
4121 btrfs_inc_nlink(inode); 4233 /*
4122 err = btrfs_check_metadata_free_space(root); 4234 * 1 item for inode ref
4235 * 2 items for dir items
4236 */
4237 err = btrfs_reserve_metadata_space(root, 3);
4123 if (err) 4238 if (err)
4124 goto fail; 4239 return err;
4240
4241 btrfs_inc_nlink(inode);
4242
4125 err = btrfs_set_inode_index(dir, &index); 4243 err = btrfs_set_inode_index(dir, &index);
4126 if (err) 4244 if (err)
4127 goto fail; 4245 goto fail;
@@ -4145,6 +4263,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4145 nr = trans->blocks_used; 4263 nr = trans->blocks_used;
4146 btrfs_end_transaction_throttle(trans, root); 4264 btrfs_end_transaction_throttle(trans, root);
4147fail: 4265fail:
4266 btrfs_unreserve_metadata_space(root, 3);
4148 if (drop_inode) { 4267 if (drop_inode) {
4149 inode_dec_link_count(inode); 4268 inode_dec_link_count(inode);
4150 iput(inode); 4269 iput(inode);
@@ -4164,17 +4283,21 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4164 u64 index = 0; 4283 u64 index = 0;
4165 unsigned long nr = 1; 4284 unsigned long nr = 1;
4166 4285
4167 err = btrfs_check_metadata_free_space(root); 4286 /*
4287 * 2 items for inode and ref
4288 * 2 items for dir items
4289 * 1 for xattr if selinux is on
4290 */
4291 err = btrfs_reserve_metadata_space(root, 5);
4168 if (err) 4292 if (err)
4169 goto out_unlock; 4293 return err;
4170 4294
4171 trans = btrfs_start_transaction(root, 1); 4295 trans = btrfs_start_transaction(root, 1);
4172 btrfs_set_trans_block_group(trans, dir); 4296 if (!trans) {
4173 4297 err = -ENOMEM;
4174 if (IS_ERR(trans)) {
4175 err = PTR_ERR(trans);
4176 goto out_unlock; 4298 goto out_unlock;
4177 } 4299 }
4300 btrfs_set_trans_block_group(trans, dir);
4178 4301
4179 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); 4302 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4180 if (err) { 4303 if (err) {
@@ -4223,6 +4346,7 @@ out_fail:
4223 btrfs_end_transaction_throttle(trans, root); 4346 btrfs_end_transaction_throttle(trans, root);
4224 4347
4225out_unlock: 4348out_unlock:
4349 btrfs_unreserve_metadata_space(root, 5);
4226 if (drop_on_err) 4350 if (drop_on_err)
4227 iput(inode); 4351 iput(inode);
4228 btrfs_btree_balance_dirty(root, nr); 4352 btrfs_btree_balance_dirty(root, nr);
@@ -4747,6 +4871,13 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4747 goto out; 4871 goto out;
4748 } 4872 }
4749 4873
4874 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
4875 if (ret) {
4876 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
4877 ret = VM_FAULT_SIGBUS;
4878 goto out;
4879 }
4880
4750 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 4881 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
4751again: 4882again:
4752 lock_page(page); 4883 lock_page(page);
@@ -4778,7 +4909,12 @@ again:
4778 goto again; 4909 goto again;
4779 } 4910 }
4780 4911
4781 btrfs_set_extent_delalloc(inode, page_start, page_end); 4912 ret = btrfs_set_extent_delalloc(inode, page_start, page_end);
4913 if (ret) {
4914 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4915 ret = VM_FAULT_SIGBUS;
4916 goto out_unlock;
4917 }
4782 ret = 0; 4918 ret = 0;
4783 4919
4784 /* page is wholly or partially inside EOF */ 4920 /* page is wholly or partially inside EOF */
@@ -4801,6 +4937,7 @@ again:
4801 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4937 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4802 4938
4803out_unlock: 4939out_unlock:
4940 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
4804 if (!ret) 4941 if (!ret)
4805 return VM_FAULT_LOCKED; 4942 return VM_FAULT_LOCKED;
4806 unlock_page(page); 4943 unlock_page(page);
@@ -4917,6 +5054,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
4917 return NULL; 5054 return NULL;
4918 ei->last_trans = 0; 5055 ei->last_trans = 0;
4919 ei->logged_trans = 0; 5056 ei->logged_trans = 0;
5057 ei->delalloc_extents = 0;
5058 ei->delalloc_reserved_extents = 0;
4920 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 5059 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
4921 INIT_LIST_HEAD(&ei->i_orphan); 5060 INIT_LIST_HEAD(&ei->i_orphan);
4922 INIT_LIST_HEAD(&ei->ordered_operations); 5061 INIT_LIST_HEAD(&ei->ordered_operations);
@@ -5070,7 +5209,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5070 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) 5209 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
5071 return -ENOTEMPTY; 5210 return -ENOTEMPTY;
5072 5211
5073 ret = btrfs_check_metadata_free_space(root); 5212 /*
5213 * 2 items for dir items
5214 * 1 item for orphan entry
5215 * 1 item for ref
5216 */
5217 ret = btrfs_reserve_metadata_space(root, 4);
5074 if (ret) 5218 if (ret)
5075 return ret; 5219 return ret;
5076 5220
@@ -5185,6 +5329,8 @@ out_fail:
5185 5329
5186 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 5330 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5187 up_read(&root->fs_info->subvol_sem); 5331 up_read(&root->fs_info->subvol_sem);
5332
5333 btrfs_unreserve_metadata_space(root, 4);
5188 return ret; 5334 return ret;
5189} 5335}
5190 5336
@@ -5256,11 +5402,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5256 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 5402 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
5257 return -ENAMETOOLONG; 5403 return -ENAMETOOLONG;
5258 5404
5259 err = btrfs_check_metadata_free_space(root); 5405 /*
5406 * 2 items for inode item and ref
5407 * 2 items for dir items
5408 * 1 item for xattr if selinux is on
5409 */
5410 err = btrfs_reserve_metadata_space(root, 5);
5260 if (err) 5411 if (err)
5261 goto out_fail; 5412 return err;
5262 5413
5263 trans = btrfs_start_transaction(root, 1); 5414 trans = btrfs_start_transaction(root, 1);
5415 if (!trans)
5416 goto out_fail;
5264 btrfs_set_trans_block_group(trans, dir); 5417 btrfs_set_trans_block_group(trans, dir);
5265 5418
5266 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); 5419 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
@@ -5341,6 +5494,7 @@ out_unlock:
5341 nr = trans->blocks_used; 5494 nr = trans->blocks_used;
5342 btrfs_end_transaction_throttle(trans, root); 5495 btrfs_end_transaction_throttle(trans, root);
5343out_fail: 5496out_fail:
5497 btrfs_unreserve_metadata_space(root, 5);
5344 if (drop_inode) { 5498 if (drop_inode) {
5345 inode_dec_link_count(inode); 5499 inode_dec_link_count(inode);
5346 iput(inode); 5500 iput(inode);
@@ -5362,6 +5516,11 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5362 5516
5363 while (num_bytes > 0) { 5517 while (num_bytes > 0) {
5364 alloc_size = min(num_bytes, root->fs_info->max_extent); 5518 alloc_size = min(num_bytes, root->fs_info->max_extent);
5519
5520 ret = btrfs_reserve_metadata_space(root, 1);
5521 if (ret)
5522 goto out;
5523
5365 ret = btrfs_reserve_extent(trans, root, alloc_size, 5524 ret = btrfs_reserve_extent(trans, root, alloc_size,
5366 root->sectorsize, 0, alloc_hint, 5525 root->sectorsize, 0, alloc_hint,
5367 (u64)-1, &ins, 1); 5526 (u64)-1, &ins, 1);
@@ -5381,6 +5540,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5381 num_bytes -= ins.offset; 5540 num_bytes -= ins.offset;
5382 cur_offset += ins.offset; 5541 cur_offset += ins.offset;
5383 alloc_hint = ins.objectid + ins.offset; 5542 alloc_hint = ins.objectid + ins.offset;
5543 btrfs_unreserve_metadata_space(root, 1);
5384 } 5544 }
5385out: 5545out:
5386 if (cur_offset > start) { 5546 if (cur_offset > start) {
@@ -5566,6 +5726,8 @@ static struct extent_io_ops btrfs_extent_io_ops = {
5566 .readpage_io_failed_hook = btrfs_io_failed_hook, 5726 .readpage_io_failed_hook = btrfs_io_failed_hook,
5567 .set_bit_hook = btrfs_set_bit_hook, 5727 .set_bit_hook = btrfs_set_bit_hook,
5568 .clear_bit_hook = btrfs_clear_bit_hook, 5728 .clear_bit_hook = btrfs_clear_bit_hook,
5729 .merge_extent_hook = btrfs_merge_extent_hook,
5730 .split_extent_hook = btrfs_split_extent_hook,
5569}; 5731};
5570 5732
5571/* 5733/*
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a8577a7f26ab..4de7ef6f8603 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -239,7 +239,13 @@ static noinline int create_subvol(struct btrfs_root *root,
239 u64 index = 0; 239 u64 index = 0;
240 unsigned long nr = 1; 240 unsigned long nr = 1;
241 241
242 ret = btrfs_check_metadata_free_space(root); 242 /*
243 * 1 - inode item
244 * 2 - refs
245 * 1 - root item
246 * 2 - dir items
247 */
248 ret = btrfs_reserve_metadata_space(root, 6);
243 if (ret) 249 if (ret)
244 return ret; 250 return ret;
245 251
@@ -340,6 +346,9 @@ fail:
340 err = btrfs_commit_transaction(trans, root); 346 err = btrfs_commit_transaction(trans, root);
341 if (err && !ret) 347 if (err && !ret)
342 ret = err; 348 ret = err;
349
350 btrfs_unreserve_metadata_space(root, 6);
351 btrfs_btree_balance_dirty(root, nr);
343 return ret; 352 return ret;
344} 353}
345 354
@@ -355,19 +364,27 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
355 if (!root->ref_cows) 364 if (!root->ref_cows)
356 return -EINVAL; 365 return -EINVAL;
357 366
358 ret = btrfs_check_metadata_free_space(root); 367 /*
368 * 1 - inode item
369 * 2 - refs
370 * 1 - root item
371 * 2 - dir items
372 */
373 ret = btrfs_reserve_metadata_space(root, 6);
359 if (ret) 374 if (ret)
360 goto fail_unlock; 375 goto fail_unlock;
361 376
362 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 377 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
363 if (!pending_snapshot) { 378 if (!pending_snapshot) {
364 ret = -ENOMEM; 379 ret = -ENOMEM;
380 btrfs_unreserve_metadata_space(root, 6);
365 goto fail_unlock; 381 goto fail_unlock;
366 } 382 }
367 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); 383 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
368 if (!pending_snapshot->name) { 384 if (!pending_snapshot->name) {
369 ret = -ENOMEM; 385 ret = -ENOMEM;
370 kfree(pending_snapshot); 386 kfree(pending_snapshot);
387 btrfs_unreserve_metadata_space(root, 6);
371 goto fail_unlock; 388 goto fail_unlock;
372 } 389 }
373 memcpy(pending_snapshot->name, name, namelen); 390 memcpy(pending_snapshot->name, name, namelen);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 88f866f85e7a..0b8f36d4400a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -186,6 +186,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
186 h->alloc_exclude_start = 0; 186 h->alloc_exclude_start = 0;
187 h->delayed_ref_updates = 0; 187 h->delayed_ref_updates = 0;
188 188
189 if (!current->journal_info)
190 current->journal_info = h;
191
189 root->fs_info->running_transaction->use_count++; 192 root->fs_info->running_transaction->use_count++;
190 record_root_in_trans(h, root); 193 record_root_in_trans(h, root);
191 mutex_unlock(&root->fs_info->trans_mutex); 194 mutex_unlock(&root->fs_info->trans_mutex);
@@ -317,6 +320,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
317 wake_up(&cur_trans->writer_wait); 320 wake_up(&cur_trans->writer_wait);
318 put_transaction(cur_trans); 321 put_transaction(cur_trans);
319 mutex_unlock(&info->trans_mutex); 322 mutex_unlock(&info->trans_mutex);
323
324 if (current->journal_info == trans)
325 current->journal_info = NULL;
320 memset(trans, 0, sizeof(*trans)); 326 memset(trans, 0, sizeof(*trans));
321 kmem_cache_free(btrfs_trans_handle_cachep, trans); 327 kmem_cache_free(btrfs_trans_handle_cachep, trans);
322 328
@@ -743,6 +749,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
743 memcpy(&pending->root_key, &key, sizeof(key)); 749 memcpy(&pending->root_key, &key, sizeof(key));
744fail: 750fail:
745 kfree(new_root_item); 751 kfree(new_root_item);
752 btrfs_unreserve_metadata_space(root, 6);
746 return ret; 753 return ret;
747} 754}
748 755
@@ -1059,6 +1066,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1059 1066
1060 mutex_unlock(&root->fs_info->trans_mutex); 1067 mutex_unlock(&root->fs_info->trans_mutex);
1061 1068
1069 if (current->journal_info == trans)
1070 current->journal_info = NULL;
1071
1062 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1072 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1063 return ret; 1073 return ret;
1064} 1074}