aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c2264
1 files changed, 1366 insertions, 898 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1727b26fb194..32d094002a57 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -22,6 +22,7 @@
22#include <linux/sort.h> 22#include <linux/sort.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/kthread.h> 24#include <linux/kthread.h>
25#include <linux/slab.h>
25#include "compat.h" 26#include "compat.h"
26#include "hash.h" 27#include "hash.h"
27#include "ctree.h" 28#include "ctree.h"
@@ -34,10 +35,9 @@
34 35
35static int update_block_group(struct btrfs_trans_handle *trans, 36static int update_block_group(struct btrfs_trans_handle *trans,
36 struct btrfs_root *root, 37 struct btrfs_root *root,
37 u64 bytenr, u64 num_bytes, int alloc, 38 u64 bytenr, u64 num_bytes, int alloc);
38 int mark_free); 39static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
39static int update_reserved_extents(struct btrfs_block_group_cache *cache, 40 u64 num_bytes, int reserve, int sinfo);
40 u64 num_bytes, int reserve);
41static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
42 struct btrfs_root *root, 42 struct btrfs_root *root,
43 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 bytenr, u64 num_bytes, u64 parent,
@@ -60,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
60static int do_chunk_alloc(struct btrfs_trans_handle *trans, 60static int do_chunk_alloc(struct btrfs_trans_handle *trans,
61 struct btrfs_root *extent_root, u64 alloc_bytes, 61 struct btrfs_root *extent_root, u64 alloc_bytes,
62 u64 flags, int force); 62 u64 flags, int force);
63static int pin_down_bytes(struct btrfs_trans_handle *trans,
64 struct btrfs_root *root,
65 struct btrfs_path *path,
66 u64 bytenr, u64 num_bytes,
67 int is_data, int reserved,
68 struct extent_buffer **must_clean);
69static int find_next_key(struct btrfs_path *path, int level, 63static int find_next_key(struct btrfs_path *path, int level,
70 struct btrfs_key *key); 64 struct btrfs_key *key);
71static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 65static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
@@ -90,8 +84,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
90 84
91void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 85void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
92{ 86{
93 if (atomic_dec_and_test(&cache->count)) 87 if (atomic_dec_and_test(&cache->count)) {
88 WARN_ON(cache->pinned > 0);
89 WARN_ON(cache->reserved > 0);
90 WARN_ON(cache->reserved_pinned > 0);
94 kfree(cache); 91 kfree(cache);
92 }
95} 93}
96 94
97/* 95/*
@@ -318,7 +316,7 @@ static int caching_kthread(void *data)
318 316
319 exclude_super_stripes(extent_root, block_group); 317 exclude_super_stripes(extent_root, block_group);
320 spin_lock(&block_group->space_info->lock); 318 spin_lock(&block_group->space_info->lock);
321 block_group->space_info->bytes_super += block_group->bytes_super; 319 block_group->space_info->bytes_readonly += block_group->bytes_super;
322 spin_unlock(&block_group->space_info->lock); 320 spin_unlock(&block_group->space_info->lock);
323 321
324 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 322 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
@@ -506,6 +504,9 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
506 struct list_head *head = &info->space_info; 504 struct list_head *head = &info->space_info;
507 struct btrfs_space_info *found; 505 struct btrfs_space_info *found;
508 506
507 flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
508 BTRFS_BLOCK_GROUP_METADATA;
509
509 rcu_read_lock(); 510 rcu_read_lock();
510 list_for_each_entry_rcu(found, head, list) { 511 list_for_each_entry_rcu(found, head, list) {
511 if (found->flags == flags) { 512 if (found->flags == flags) {
@@ -609,6 +610,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
609} 610}
610 611
611/* 612/*
613 * helper function to lookup reference count and flags of extent.
614 *
615 * the head node for delayed ref is used to store the sum of all the
616 * reference count modifications queued up in the rbtree. the head
617 * node may also store the extent flags to set. This way you can check
618 * to see what the reference count and extent flags would be if all of
619 * the delayed refs are not processed.
620 */
621int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
622 struct btrfs_root *root, u64 bytenr,
623 u64 num_bytes, u64 *refs, u64 *flags)
624{
625 struct btrfs_delayed_ref_head *head;
626 struct btrfs_delayed_ref_root *delayed_refs;
627 struct btrfs_path *path;
628 struct btrfs_extent_item *ei;
629 struct extent_buffer *leaf;
630 struct btrfs_key key;
631 u32 item_size;
632 u64 num_refs;
633 u64 extent_flags;
634 int ret;
635
636 path = btrfs_alloc_path();
637 if (!path)
638 return -ENOMEM;
639
640 key.objectid = bytenr;
641 key.type = BTRFS_EXTENT_ITEM_KEY;
642 key.offset = num_bytes;
643 if (!trans) {
644 path->skip_locking = 1;
645 path->search_commit_root = 1;
646 }
647again:
648 ret = btrfs_search_slot(trans, root->fs_info->extent_root,
649 &key, path, 0, 0);
650 if (ret < 0)
651 goto out_free;
652
653 if (ret == 0) {
654 leaf = path->nodes[0];
655 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
656 if (item_size >= sizeof(*ei)) {
657 ei = btrfs_item_ptr(leaf, path->slots[0],
658 struct btrfs_extent_item);
659 num_refs = btrfs_extent_refs(leaf, ei);
660 extent_flags = btrfs_extent_flags(leaf, ei);
661 } else {
662#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
663 struct btrfs_extent_item_v0 *ei0;
664 BUG_ON(item_size != sizeof(*ei0));
665 ei0 = btrfs_item_ptr(leaf, path->slots[0],
666 struct btrfs_extent_item_v0);
667 num_refs = btrfs_extent_refs_v0(leaf, ei0);
668 /* FIXME: this isn't correct for data */
669 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
670#else
671 BUG();
672#endif
673 }
674 BUG_ON(num_refs == 0);
675 } else {
676 num_refs = 0;
677 extent_flags = 0;
678 ret = 0;
679 }
680
681 if (!trans)
682 goto out;
683
684 delayed_refs = &trans->transaction->delayed_refs;
685 spin_lock(&delayed_refs->lock);
686 head = btrfs_find_delayed_ref_head(trans, bytenr);
687 if (head) {
688 if (!mutex_trylock(&head->mutex)) {
689 atomic_inc(&head->node.refs);
690 spin_unlock(&delayed_refs->lock);
691
692 btrfs_release_path(root->fs_info->extent_root, path);
693
694 mutex_lock(&head->mutex);
695 mutex_unlock(&head->mutex);
696 btrfs_put_delayed_ref(&head->node);
697 goto again;
698 }
699 if (head->extent_op && head->extent_op->update_flags)
700 extent_flags |= head->extent_op->flags_to_set;
701 else
702 BUG_ON(num_refs == 0);
703
704 num_refs += head->node.ref_mod;
705 mutex_unlock(&head->mutex);
706 }
707 spin_unlock(&delayed_refs->lock);
708out:
709 WARN_ON(num_refs == 0);
710 if (refs)
711 *refs = num_refs;
712 if (flags)
713 *flags = extent_flags;
714out_free:
715 btrfs_free_path(path);
716 return ret;
717}
718
719/*
612 * Back reference rules. Back refs have three main goals: 720 * Back reference rules. Back refs have three main goals:
613 * 721 *
614 * 1) differentiate between all holders of references to an extent so that 722 * 1) differentiate between all holders of references to an extent so that
@@ -1588,7 +1696,7 @@ static void btrfs_issue_discard(struct block_device *bdev,
1588 u64 start, u64 len) 1696 u64 start, u64 len)
1589{ 1697{
1590 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1698 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1591 DISCARD_FL_BARRIER); 1699 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
1592} 1700}
1593 1701
1594static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1702static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
@@ -1870,7 +1978,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1870 return ret; 1978 return ret;
1871} 1979}
1872 1980
1873
1874/* helper function to actually process a single delayed ref entry */ 1981/* helper function to actually process a single delayed ref entry */
1875static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1982static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1876 struct btrfs_root *root, 1983 struct btrfs_root *root,
@@ -1890,32 +1997,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
1890 BUG_ON(extent_op); 1997 BUG_ON(extent_op);
1891 head = btrfs_delayed_node_to_head(node); 1998 head = btrfs_delayed_node_to_head(node);
1892 if (insert_reserved) { 1999 if (insert_reserved) {
1893 int mark_free = 0; 2000 btrfs_pin_extent(root, node->bytenr,
1894 struct extent_buffer *must_clean = NULL; 2001 node->num_bytes, 1);
1895
1896 ret = pin_down_bytes(trans, root, NULL,
1897 node->bytenr, node->num_bytes,
1898 head->is_data, 1, &must_clean);
1899 if (ret > 0)
1900 mark_free = 1;
1901
1902 if (must_clean) {
1903 clean_tree_block(NULL, root, must_clean);
1904 btrfs_tree_unlock(must_clean);
1905 free_extent_buffer(must_clean);
1906 }
1907 if (head->is_data) { 2002 if (head->is_data) {
1908 ret = btrfs_del_csums(trans, root, 2003 ret = btrfs_del_csums(trans, root,
1909 node->bytenr, 2004 node->bytenr,
1910 node->num_bytes); 2005 node->num_bytes);
1911 BUG_ON(ret); 2006 BUG_ON(ret);
1912 } 2007 }
1913 if (mark_free) {
1914 ret = btrfs_free_reserved_extent(root,
1915 node->bytenr,
1916 node->num_bytes);
1917 BUG_ON(ret);
1918 }
1919 } 2008 }
1920 mutex_unlock(&head->mutex); 2009 mutex_unlock(&head->mutex);
1921 return 0; 2010 return 0;
@@ -2346,6 +2435,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2346 ret = 0; 2435 ret = 0;
2347out: 2436out:
2348 btrfs_free_path(path); 2437 btrfs_free_path(path);
2438 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2439 WARN_ON(ret > 0);
2349 return ret; 2440 return ret;
2350} 2441}
2351 2442
@@ -2659,12 +2750,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2659 struct btrfs_space_info **space_info) 2750 struct btrfs_space_info **space_info)
2660{ 2751{
2661 struct btrfs_space_info *found; 2752 struct btrfs_space_info *found;
2753 int i;
2754 int factor;
2755
2756 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
2757 BTRFS_BLOCK_GROUP_RAID10))
2758 factor = 2;
2759 else
2760 factor = 1;
2662 2761
2663 found = __find_space_info(info, flags); 2762 found = __find_space_info(info, flags);
2664 if (found) { 2763 if (found) {
2665 spin_lock(&found->lock); 2764 spin_lock(&found->lock);
2666 found->total_bytes += total_bytes; 2765 found->total_bytes += total_bytes;
2667 found->bytes_used += bytes_used; 2766 found->bytes_used += bytes_used;
2767 found->disk_used += bytes_used * factor;
2668 found->full = 0; 2768 found->full = 0;
2669 spin_unlock(&found->lock); 2769 spin_unlock(&found->lock);
2670 *space_info = found; 2770 *space_info = found;
@@ -2674,16 +2774,20 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2674 if (!found) 2774 if (!found)
2675 return -ENOMEM; 2775 return -ENOMEM;
2676 2776
2677 INIT_LIST_HEAD(&found->block_groups); 2777 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
2778 INIT_LIST_HEAD(&found->block_groups[i]);
2678 init_rwsem(&found->groups_sem); 2779 init_rwsem(&found->groups_sem);
2679 spin_lock_init(&found->lock); 2780 spin_lock_init(&found->lock);
2680 found->flags = flags; 2781 found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
2782 BTRFS_BLOCK_GROUP_SYSTEM |
2783 BTRFS_BLOCK_GROUP_METADATA);
2681 found->total_bytes = total_bytes; 2784 found->total_bytes = total_bytes;
2682 found->bytes_used = bytes_used; 2785 found->bytes_used = bytes_used;
2786 found->disk_used = bytes_used * factor;
2683 found->bytes_pinned = 0; 2787 found->bytes_pinned = 0;
2684 found->bytes_reserved = 0; 2788 found->bytes_reserved = 0;
2685 found->bytes_readonly = 0; 2789 found->bytes_readonly = 0;
2686 found->bytes_delalloc = 0; 2790 found->bytes_may_use = 0;
2687 found->full = 0; 2791 found->full = 0;
2688 found->force_alloc = 0; 2792 found->force_alloc = 0;
2689 *space_info = found; 2793 *space_info = found;
@@ -2708,19 +2812,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
2708 } 2812 }
2709} 2813}
2710 2814
2711static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
2712{
2713 spin_lock(&cache->space_info->lock);
2714 spin_lock(&cache->lock);
2715 if (!cache->ro) {
2716 cache->space_info->bytes_readonly += cache->key.offset -
2717 btrfs_block_group_used(&cache->item);
2718 cache->ro = 1;
2719 }
2720 spin_unlock(&cache->lock);
2721 spin_unlock(&cache->space_info->lock);
2722}
2723
2724u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2815u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
2725{ 2816{
2726 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2817 u64 num_devices = root->fs_info->fs_devices->rw_devices;
@@ -2749,492 +2840,49 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
2749 return flags; 2840 return flags;
2750} 2841}
2751 2842
2752static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) 2843static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
2753{ 2844{
2754 struct btrfs_fs_info *info = root->fs_info; 2845 if (flags & BTRFS_BLOCK_GROUP_DATA)
2755 u64 alloc_profile; 2846 flags |= root->fs_info->avail_data_alloc_bits &
2756 2847 root->fs_info->data_alloc_profile;
2757 if (data) { 2848 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
2758 alloc_profile = info->avail_data_alloc_bits & 2849 flags |= root->fs_info->avail_system_alloc_bits &
2759 info->data_alloc_profile; 2850 root->fs_info->system_alloc_profile;
2760 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 2851 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
2761 } else if (root == root->fs_info->chunk_root) { 2852 flags |= root->fs_info->avail_metadata_alloc_bits &
2762 alloc_profile = info->avail_system_alloc_bits & 2853 root->fs_info->metadata_alloc_profile;
2763 info->system_alloc_profile; 2854 return btrfs_reduce_alloc_profile(root, flags);
2764 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2765 } else {
2766 alloc_profile = info->avail_metadata_alloc_bits &
2767 info->metadata_alloc_profile;
2768 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2769 }
2770
2771 return btrfs_reduce_alloc_profile(root, data);
2772} 2855}
2773 2856
2774void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2857static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
2775{ 2858{
2776 u64 alloc_target; 2859 u64 flags;
2777
2778 alloc_target = btrfs_get_alloc_profile(root, 1);
2779 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
2780 alloc_target);
2781}
2782
2783static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
2784{
2785 u64 num_bytes;
2786 int level;
2787
2788 level = BTRFS_MAX_LEVEL - 2;
2789 /*
2790 * NOTE: these calculations are absolutely the worst possible case.
2791 * This assumes that _every_ item we insert will require a new leaf, and
2792 * that the tree has grown to its maximum level size.
2793 */
2794
2795 /*
2796 * for every item we insert we could insert both an extent item and a
2797 * extent ref item. Then for ever item we insert, we will need to cow
2798 * both the original leaf, plus the leaf to the left and right of it.
2799 *
2800 * Unless we are talking about the extent root, then we just want the
2801 * number of items * 2, since we just need the extent item plus its ref.
2802 */
2803 if (root == root->fs_info->extent_root)
2804 num_bytes = num_items * 2;
2805 else
2806 num_bytes = (num_items + (2 * num_items)) * 3;
2807
2808 /*
2809 * num_bytes is total number of leaves we could need times the leaf
2810 * size, and then for every leaf we could end up cow'ing 2 nodes per
2811 * level, down to the leaf level.
2812 */
2813 num_bytes = (num_bytes * root->leafsize) +
2814 (num_bytes * (level * 2)) * root->nodesize;
2815
2816 return num_bytes;
2817}
2818
2819/*
2820 * Unreserve metadata space for delalloc. If we have less reserved credits than
2821 * we have extents, this function does nothing.
2822 */
2823int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2824 struct inode *inode, int num_items)
2825{
2826 struct btrfs_fs_info *info = root->fs_info;
2827 struct btrfs_space_info *meta_sinfo;
2828 u64 num_bytes;
2829 u64 alloc_target;
2830 bool bug = false;
2831
2832 /* get the space info for where the metadata will live */
2833 alloc_target = btrfs_get_alloc_profile(root, 0);
2834 meta_sinfo = __find_space_info(info, alloc_target);
2835
2836 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
2837 num_items);
2838
2839 spin_lock(&meta_sinfo->lock);
2840 spin_lock(&BTRFS_I(inode)->accounting_lock);
2841 if (BTRFS_I(inode)->reserved_extents <=
2842 BTRFS_I(inode)->outstanding_extents) {
2843 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2844 spin_unlock(&meta_sinfo->lock);
2845 return 0;
2846 }
2847 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2848
2849 BTRFS_I(inode)->reserved_extents--;
2850 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2851
2852 if (meta_sinfo->bytes_delalloc < num_bytes) {
2853 bug = true;
2854 meta_sinfo->bytes_delalloc = 0;
2855 } else {
2856 meta_sinfo->bytes_delalloc -= num_bytes;
2857 }
2858 spin_unlock(&meta_sinfo->lock);
2859
2860 BUG_ON(bug);
2861
2862 return 0;
2863}
2864
2865static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
2866{
2867 u64 thresh;
2868
2869 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
2870 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
2871 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
2872 meta_sinfo->bytes_may_use;
2873 2860
2874 thresh = meta_sinfo->total_bytes - thresh; 2861 if (data)
2875 thresh *= 80; 2862 flags = BTRFS_BLOCK_GROUP_DATA;
2876 do_div(thresh, 100); 2863 else if (root == root->fs_info->chunk_root)
2877 if (thresh <= meta_sinfo->bytes_delalloc) 2864 flags = BTRFS_BLOCK_GROUP_SYSTEM;
2878 meta_sinfo->force_delalloc = 1;
2879 else 2865 else
2880 meta_sinfo->force_delalloc = 0; 2866 flags = BTRFS_BLOCK_GROUP_METADATA;
2881}
2882
2883struct async_flush {
2884 struct btrfs_root *root;
2885 struct btrfs_space_info *info;
2886 struct btrfs_work work;
2887};
2888
2889static noinline void flush_delalloc_async(struct btrfs_work *work)
2890{
2891 struct async_flush *async;
2892 struct btrfs_root *root;
2893 struct btrfs_space_info *info;
2894
2895 async = container_of(work, struct async_flush, work);
2896 root = async->root;
2897 info = async->info;
2898
2899 btrfs_start_delalloc_inodes(root, 0);
2900 wake_up(&info->flush_wait);
2901 btrfs_wait_ordered_extents(root, 0, 0);
2902
2903 spin_lock(&info->lock);
2904 info->flushing = 0;
2905 spin_unlock(&info->lock);
2906 wake_up(&info->flush_wait);
2907
2908 kfree(async);
2909}
2910
2911static void wait_on_flush(struct btrfs_space_info *info)
2912{
2913 DEFINE_WAIT(wait);
2914 u64 used;
2915
2916 while (1) {
2917 prepare_to_wait(&info->flush_wait, &wait,
2918 TASK_UNINTERRUPTIBLE);
2919 spin_lock(&info->lock);
2920 if (!info->flushing) {
2921 spin_unlock(&info->lock);
2922 break;
2923 }
2924
2925 used = info->bytes_used + info->bytes_reserved +
2926 info->bytes_pinned + info->bytes_readonly +
2927 info->bytes_super + info->bytes_root +
2928 info->bytes_may_use + info->bytes_delalloc;
2929 if (used < info->total_bytes) {
2930 spin_unlock(&info->lock);
2931 break;
2932 }
2933 spin_unlock(&info->lock);
2934 schedule();
2935 }
2936 finish_wait(&info->flush_wait, &wait);
2937}
2938
2939static void flush_delalloc(struct btrfs_root *root,
2940 struct btrfs_space_info *info)
2941{
2942 struct async_flush *async;
2943 bool wait = false;
2944
2945 spin_lock(&info->lock);
2946
2947 if (!info->flushing) {
2948 info->flushing = 1;
2949 init_waitqueue_head(&info->flush_wait);
2950 } else {
2951 wait = true;
2952 }
2953
2954 spin_unlock(&info->lock);
2955
2956 if (wait) {
2957 wait_on_flush(info);
2958 return;
2959 }
2960
2961 async = kzalloc(sizeof(*async), GFP_NOFS);
2962 if (!async)
2963 goto flush;
2964
2965 async->root = root;
2966 async->info = info;
2967 async->work.func = flush_delalloc_async;
2968
2969 btrfs_queue_worker(&root->fs_info->enospc_workers,
2970 &async->work);
2971 wait_on_flush(info);
2972 return;
2973
2974flush:
2975 btrfs_start_delalloc_inodes(root, 0);
2976 btrfs_wait_ordered_extents(root, 0, 0);
2977
2978 spin_lock(&info->lock);
2979 info->flushing = 0;
2980 spin_unlock(&info->lock);
2981 wake_up(&info->flush_wait);
2982}
2983
2984static int maybe_allocate_chunk(struct btrfs_root *root,
2985 struct btrfs_space_info *info)
2986{
2987 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
2988 struct btrfs_trans_handle *trans;
2989 bool wait = false;
2990 int ret = 0;
2991 u64 min_metadata;
2992 u64 free_space;
2993
2994 free_space = btrfs_super_total_bytes(disk_super);
2995 /*
2996 * we allow the metadata to grow to a max of either 10gb or 5% of the
2997 * space in the volume.
2998 */
2999 min_metadata = min((u64)10 * 1024 * 1024 * 1024,
3000 div64_u64(free_space * 5, 100));
3001 if (info->total_bytes >= min_metadata) {
3002 spin_unlock(&info->lock);
3003 return 0;
3004 }
3005
3006 if (info->full) {
3007 spin_unlock(&info->lock);
3008 return 0;
3009 }
3010
3011 if (!info->allocating_chunk) {
3012 info->force_alloc = 1;
3013 info->allocating_chunk = 1;
3014 init_waitqueue_head(&info->allocate_wait);
3015 } else {
3016 wait = true;
3017 }
3018
3019 spin_unlock(&info->lock);
3020
3021 if (wait) {
3022 wait_event(info->allocate_wait,
3023 !info->allocating_chunk);
3024 return 1;
3025 }
3026
3027 trans = btrfs_start_transaction(root, 1);
3028 if (!trans) {
3029 ret = -ENOMEM;
3030 goto out;
3031 }
3032
3033 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3034 4096 + 2 * 1024 * 1024,
3035 info->flags, 0);
3036 btrfs_end_transaction(trans, root);
3037 if (ret)
3038 goto out;
3039out:
3040 spin_lock(&info->lock);
3041 info->allocating_chunk = 0;
3042 spin_unlock(&info->lock);
3043 wake_up(&info->allocate_wait);
3044
3045 if (ret)
3046 return 0;
3047 return 1;
3048}
3049
3050/*
3051 * Reserve metadata space for delalloc.
3052 */
3053int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
3054 struct inode *inode, int num_items)
3055{
3056 struct btrfs_fs_info *info = root->fs_info;
3057 struct btrfs_space_info *meta_sinfo;
3058 u64 num_bytes;
3059 u64 used;
3060 u64 alloc_target;
3061 int flushed = 0;
3062 int force_delalloc;
3063
3064 /* get the space info for where the metadata will live */
3065 alloc_target = btrfs_get_alloc_profile(root, 0);
3066 meta_sinfo = __find_space_info(info, alloc_target);
3067
3068 num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
3069 num_items);
3070again:
3071 spin_lock(&meta_sinfo->lock);
3072
3073 force_delalloc = meta_sinfo->force_delalloc;
3074
3075 if (unlikely(!meta_sinfo->bytes_root))
3076 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3077
3078 if (!flushed)
3079 meta_sinfo->bytes_delalloc += num_bytes;
3080
3081 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3082 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3083 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3084 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3085
3086 if (used > meta_sinfo->total_bytes) {
3087 flushed++;
3088
3089 if (flushed == 1) {
3090 if (maybe_allocate_chunk(root, meta_sinfo))
3091 goto again;
3092 flushed++;
3093 } else {
3094 spin_unlock(&meta_sinfo->lock);
3095 }
3096
3097 if (flushed == 2) {
3098 filemap_flush(inode->i_mapping);
3099 goto again;
3100 } else if (flushed == 3) {
3101 flush_delalloc(root, meta_sinfo);
3102 goto again;
3103 }
3104 spin_lock(&meta_sinfo->lock);
3105 meta_sinfo->bytes_delalloc -= num_bytes;
3106 spin_unlock(&meta_sinfo->lock);
3107 printk(KERN_ERR "enospc, has %d, reserved %d\n",
3108 BTRFS_I(inode)->outstanding_extents,
3109 BTRFS_I(inode)->reserved_extents);
3110 dump_space_info(meta_sinfo, 0, 0);
3111 return -ENOSPC;
3112 }
3113 2867
3114 BTRFS_I(inode)->reserved_extents++; 2868 return get_alloc_profile(root, flags);
3115 check_force_delalloc(meta_sinfo);
3116 spin_unlock(&meta_sinfo->lock);
3117
3118 if (!flushed && force_delalloc)
3119 filemap_flush(inode->i_mapping);
3120
3121 return 0;
3122} 2869}
3123 2870
3124/* 2871void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
3125 * unreserve num_items number of items worth of metadata space. This needs to
3126 * be paired with btrfs_reserve_metadata_space.
3127 *
3128 * NOTE: if you have the option, run this _AFTER_ you do a
3129 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref
3130 * oprations which will result in more used metadata, so we want to make sure we
3131 * can do that without issue.
3132 */
3133int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items)
3134{
3135 struct btrfs_fs_info *info = root->fs_info;
3136 struct btrfs_space_info *meta_sinfo;
3137 u64 num_bytes;
3138 u64 alloc_target;
3139 bool bug = false;
3140
3141 /* get the space info for where the metadata will live */
3142 alloc_target = btrfs_get_alloc_profile(root, 0);
3143 meta_sinfo = __find_space_info(info, alloc_target);
3144
3145 num_bytes = calculate_bytes_needed(root, num_items);
3146
3147 spin_lock(&meta_sinfo->lock);
3148 if (meta_sinfo->bytes_may_use < num_bytes) {
3149 bug = true;
3150 meta_sinfo->bytes_may_use = 0;
3151 } else {
3152 meta_sinfo->bytes_may_use -= num_bytes;
3153 }
3154 spin_unlock(&meta_sinfo->lock);
3155
3156 BUG_ON(bug);
3157
3158 return 0;
3159}
3160
3161/*
3162 * Reserve some metadata space for use. We'll calculate the worste case number
3163 * of bytes that would be needed to modify num_items number of items. If we
3164 * have space, fantastic, if not, you get -ENOSPC. Please call
3165 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of
3166 * items you reserved, since whatever metadata you needed should have already
3167 * been allocated.
3168 *
3169 * This will commit the transaction to make more space if we don't have enough
3170 * metadata space. THe only time we don't do this is if we're reserving space
3171 * inside of a transaction, then we will just return -ENOSPC and it is the
3172 * callers responsibility to handle it properly.
3173 */
3174int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items)
3175{ 2872{
3176 struct btrfs_fs_info *info = root->fs_info; 2873 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
3177 struct btrfs_space_info *meta_sinfo; 2874 BTRFS_BLOCK_GROUP_DATA);
3178 u64 num_bytes;
3179 u64 used;
3180 u64 alloc_target;
3181 int retries = 0;
3182
3183 /* get the space info for where the metadata will live */
3184 alloc_target = btrfs_get_alloc_profile(root, 0);
3185 meta_sinfo = __find_space_info(info, alloc_target);
3186
3187 num_bytes = calculate_bytes_needed(root, num_items);
3188again:
3189 spin_lock(&meta_sinfo->lock);
3190
3191 if (unlikely(!meta_sinfo->bytes_root))
3192 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
3193
3194 if (!retries)
3195 meta_sinfo->bytes_may_use += num_bytes;
3196
3197 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
3198 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
3199 meta_sinfo->bytes_super + meta_sinfo->bytes_root +
3200 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
3201
3202 if (used > meta_sinfo->total_bytes) {
3203 retries++;
3204 if (retries == 1) {
3205 if (maybe_allocate_chunk(root, meta_sinfo))
3206 goto again;
3207 retries++;
3208 } else {
3209 spin_unlock(&meta_sinfo->lock);
3210 }
3211
3212 if (retries == 2) {
3213 flush_delalloc(root, meta_sinfo);
3214 goto again;
3215 }
3216 spin_lock(&meta_sinfo->lock);
3217 meta_sinfo->bytes_may_use -= num_bytes;
3218 spin_unlock(&meta_sinfo->lock);
3219
3220 dump_space_info(meta_sinfo, 0, 0);
3221 return -ENOSPC;
3222 }
3223
3224 check_force_delalloc(meta_sinfo);
3225 spin_unlock(&meta_sinfo->lock);
3226
3227 return 0;
3228} 2875}
3229 2876
3230/* 2877/*
3231 * This will check the space that the inode allocates from to make sure we have 2878 * This will check the space that the inode allocates from to make sure we have
3232 * enough space for bytes. 2879 * enough space for bytes.
3233 */ 2880 */
3234int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 2881int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3235 u64 bytes)
3236{ 2882{
3237 struct btrfs_space_info *data_sinfo; 2883 struct btrfs_space_info *data_sinfo;
2884 struct btrfs_root *root = BTRFS_I(inode)->root;
2885 u64 used;
3238 int ret = 0, committed = 0; 2886 int ret = 0, committed = 0;
3239 2887
3240 /* make sure bytes are sectorsize aligned */ 2888 /* make sure bytes are sectorsize aligned */
@@ -3247,10 +2895,11 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
3247again: 2895again:
3248 /* make sure we have enough space to handle the data first */ 2896 /* make sure we have enough space to handle the data first */
3249 spin_lock(&data_sinfo->lock); 2897 spin_lock(&data_sinfo->lock);
3250 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 2898 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
3251 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 2899 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
3252 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 2900 data_sinfo->bytes_may_use;
3253 data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { 2901
2902 if (used + bytes > data_sinfo->total_bytes) {
3254 struct btrfs_trans_handle *trans; 2903 struct btrfs_trans_handle *trans;
3255 2904
3256 /* 2905 /*
@@ -3264,15 +2913,15 @@ again:
3264 spin_unlock(&data_sinfo->lock); 2913 spin_unlock(&data_sinfo->lock);
3265alloc: 2914alloc:
3266 alloc_target = btrfs_get_alloc_profile(root, 1); 2915 alloc_target = btrfs_get_alloc_profile(root, 1);
3267 trans = btrfs_start_transaction(root, 1); 2916 trans = btrfs_join_transaction(root, 1);
3268 if (!trans) 2917 if (IS_ERR(trans))
3269 return -ENOMEM; 2918 return PTR_ERR(trans);
3270 2919
3271 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2920 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3272 bytes + 2 * 1024 * 1024, 2921 bytes + 2 * 1024 * 1024,
3273 alloc_target, 0); 2922 alloc_target, 0);
3274 btrfs_end_transaction(trans, root); 2923 btrfs_end_transaction(trans, root);
3275 if (ret) 2924 if (ret < 0)
3276 return ret; 2925 return ret;
3277 2926
3278 if (!data_sinfo) { 2927 if (!data_sinfo) {
@@ -3287,25 +2936,26 @@ alloc:
3287 if (!committed && !root->fs_info->open_ioctl_trans) { 2936 if (!committed && !root->fs_info->open_ioctl_trans) {
3288 committed = 1; 2937 committed = 1;
3289 trans = btrfs_join_transaction(root, 1); 2938 trans = btrfs_join_transaction(root, 1);
3290 if (!trans) 2939 if (IS_ERR(trans))
3291 return -ENOMEM; 2940 return PTR_ERR(trans);
3292 ret = btrfs_commit_transaction(trans, root); 2941 ret = btrfs_commit_transaction(trans, root);
3293 if (ret) 2942 if (ret)
3294 return ret; 2943 return ret;
3295 goto again; 2944 goto again;
3296 } 2945 }
3297 2946
3298 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2947#if 0 /* I hope we never need this code again, just in case */
3299 ", %llu bytes_used, %llu bytes_reserved, " 2948 printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
3300 "%llu bytes_pinned, %llu bytes_readonly, %llu may use " 2949 "%llu bytes_reserved, " "%llu bytes_pinned, "
3301 "%llu total\n", (unsigned long long)bytes, 2950 "%llu bytes_readonly, %llu may use %llu total\n",
3302 (unsigned long long)data_sinfo->bytes_delalloc, 2951 (unsigned long long)bytes,
3303 (unsigned long long)data_sinfo->bytes_used, 2952 (unsigned long long)data_sinfo->bytes_used,
3304 (unsigned long long)data_sinfo->bytes_reserved, 2953 (unsigned long long)data_sinfo->bytes_reserved,
3305 (unsigned long long)data_sinfo->bytes_pinned, 2954 (unsigned long long)data_sinfo->bytes_pinned,
3306 (unsigned long long)data_sinfo->bytes_readonly, 2955 (unsigned long long)data_sinfo->bytes_readonly,
3307 (unsigned long long)data_sinfo->bytes_may_use, 2956 (unsigned long long)data_sinfo->bytes_may_use,
3308 (unsigned long long)data_sinfo->total_bytes); 2957 (unsigned long long)data_sinfo->total_bytes);
2958#endif
3309 return -ENOSPC; 2959 return -ENOSPC;
3310 } 2960 }
3311 data_sinfo->bytes_may_use += bytes; 2961 data_sinfo->bytes_may_use += bytes;
@@ -3316,12 +2966,13 @@ alloc:
3316} 2966}
3317 2967
3318/* 2968/*
3319 * if there was an error for whatever reason after calling 2969 * called when we are clearing an delalloc extent from the
3320 * btrfs_check_data_free_space, call this so we can cleanup the counters. 2970 * inode's io_tree or there was an error for whatever reason
2971 * after calling btrfs_check_data_free_space
3321 */ 2972 */
3322void btrfs_free_reserved_data_space(struct btrfs_root *root, 2973void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3323 struct inode *inode, u64 bytes)
3324{ 2974{
2975 struct btrfs_root *root = BTRFS_I(inode)->root;
3325 struct btrfs_space_info *data_sinfo; 2976 struct btrfs_space_info *data_sinfo;
3326 2977
3327 /* make sure bytes are sectorsize aligned */ 2978 /* make sure bytes are sectorsize aligned */
@@ -3334,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root,
3334 spin_unlock(&data_sinfo->lock); 2985 spin_unlock(&data_sinfo->lock);
3335} 2986}
3336 2987
3337/* called when we are adding a delalloc extent to the inode's io_tree */
3338void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
3339 u64 bytes)
3340{
3341 struct btrfs_space_info *data_sinfo;
3342
3343 /* get the space info for where this inode will be storing its data */
3344 data_sinfo = BTRFS_I(inode)->space_info;
3345
3346 /* make sure we have enough space to handle the data first */
3347 spin_lock(&data_sinfo->lock);
3348 data_sinfo->bytes_delalloc += bytes;
3349
3350 /*
3351 * we are adding a delalloc extent without calling
3352 * btrfs_check_data_free_space first. This happens on a weird
3353 * writepage condition, but shouldn't hurt our accounting
3354 */
3355 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
3356 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
3357 BTRFS_I(inode)->reserved_bytes = 0;
3358 } else {
3359 data_sinfo->bytes_may_use -= bytes;
3360 BTRFS_I(inode)->reserved_bytes -= bytes;
3361 }
3362
3363 spin_unlock(&data_sinfo->lock);
3364}
3365
3366/* called when we are clearing an delalloc extent from the inode's io_tree */
3367void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
3368 u64 bytes)
3369{
3370 struct btrfs_space_info *info;
3371
3372 info = BTRFS_I(inode)->space_info;
3373
3374 spin_lock(&info->lock);
3375 info->bytes_delalloc -= bytes;
3376 spin_unlock(&info->lock);
3377}
3378
3379static void force_metadata_allocation(struct btrfs_fs_info *info) 2988static void force_metadata_allocation(struct btrfs_fs_info *info)
3380{ 2989{
3381 struct list_head *head = &info->space_info; 2990 struct list_head *head = &info->space_info;
@@ -3389,13 +2998,28 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3389 rcu_read_unlock(); 2998 rcu_read_unlock();
3390} 2999}
3391 3000
3001static int should_alloc_chunk(struct btrfs_space_info *sinfo,
3002 u64 alloc_bytes)
3003{
3004 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3005
3006 if (sinfo->bytes_used + sinfo->bytes_reserved +
3007 alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3008 return 0;
3009
3010 if (sinfo->bytes_used + sinfo->bytes_reserved +
3011 alloc_bytes < div_factor(num_bytes, 8))
3012 return 0;
3013
3014 return 1;
3015}
3016
3392static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3017static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3393 struct btrfs_root *extent_root, u64 alloc_bytes, 3018 struct btrfs_root *extent_root, u64 alloc_bytes,
3394 u64 flags, int force) 3019 u64 flags, int force)
3395{ 3020{
3396 struct btrfs_space_info *space_info; 3021 struct btrfs_space_info *space_info;
3397 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3022 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3398 u64 thresh;
3399 int ret = 0; 3023 int ret = 0;
3400 3024
3401 mutex_lock(&fs_info->chunk_mutex); 3025 mutex_lock(&fs_info->chunk_mutex);
@@ -3418,11 +3042,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3418 goto out; 3042 goto out;
3419 } 3043 }
3420 3044
3421 thresh = space_info->total_bytes - space_info->bytes_readonly; 3045 if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
3422 thresh = div_factor(thresh, 8);
3423 if (!force &&
3424 (space_info->bytes_used + space_info->bytes_pinned +
3425 space_info->bytes_reserved + alloc_bytes) < thresh) {
3426 spin_unlock(&space_info->lock); 3046 spin_unlock(&space_info->lock);
3427 goto out; 3047 goto out;
3428 } 3048 }
@@ -3444,6 +3064,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3444 spin_lock(&space_info->lock); 3064 spin_lock(&space_info->lock);
3445 if (ret) 3065 if (ret)
3446 space_info->full = 1; 3066 space_info->full = 1;
3067 else
3068 ret = 1;
3447 space_info->force_alloc = 0; 3069 space_info->force_alloc = 0;
3448 spin_unlock(&space_info->lock); 3070 spin_unlock(&space_info->lock);
3449out: 3071out:
@@ -3451,13 +3073,713 @@ out:
3451 return ret; 3073 return ret;
3452} 3074}
3453 3075
3076static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3077 struct btrfs_root *root,
3078 struct btrfs_space_info *sinfo, u64 num_bytes)
3079{
3080 int ret;
3081 int end_trans = 0;
3082
3083 if (sinfo->full)
3084 return 0;
3085
3086 spin_lock(&sinfo->lock);
3087 ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
3088 spin_unlock(&sinfo->lock);
3089 if (!ret)
3090 return 0;
3091
3092 if (!trans) {
3093 trans = btrfs_join_transaction(root, 1);
3094 BUG_ON(IS_ERR(trans));
3095 end_trans = 1;
3096 }
3097
3098 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3099 num_bytes + 2 * 1024 * 1024,
3100 get_alloc_profile(root, sinfo->flags), 0);
3101
3102 if (end_trans)
3103 btrfs_end_transaction(trans, root);
3104
3105 return ret == 1 ? 1 : 0;
3106}
3107
3108/*
3109 * shrink metadata reservation for delalloc
3110 */
3111static int shrink_delalloc(struct btrfs_trans_handle *trans,
3112 struct btrfs_root *root, u64 to_reclaim)
3113{
3114 struct btrfs_block_rsv *block_rsv;
3115 u64 reserved;
3116 u64 max_reclaim;
3117 u64 reclaimed = 0;
3118 int pause = 1;
3119 int ret;
3120
3121 block_rsv = &root->fs_info->delalloc_block_rsv;
3122 spin_lock(&block_rsv->lock);
3123 reserved = block_rsv->reserved;
3124 spin_unlock(&block_rsv->lock);
3125
3126 if (reserved == 0)
3127 return 0;
3128
3129 max_reclaim = min(reserved, to_reclaim);
3130
3131 while (1) {
3132 ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
3133 if (!ret) {
3134 __set_current_state(TASK_INTERRUPTIBLE);
3135 schedule_timeout(pause);
3136 pause <<= 1;
3137 if (pause > HZ / 10)
3138 pause = HZ / 10;
3139 } else {
3140 pause = 1;
3141 }
3142
3143 spin_lock(&block_rsv->lock);
3144 if (reserved > block_rsv->reserved)
3145 reclaimed = reserved - block_rsv->reserved;
3146 reserved = block_rsv->reserved;
3147 spin_unlock(&block_rsv->lock);
3148
3149 if (reserved == 0 || reclaimed >= max_reclaim)
3150 break;
3151
3152 if (trans && trans->transaction->blocked)
3153 return -EAGAIN;
3154 }
3155 return reclaimed >= to_reclaim;
3156}
3157
3158static int should_retry_reserve(struct btrfs_trans_handle *trans,
3159 struct btrfs_root *root,
3160 struct btrfs_block_rsv *block_rsv,
3161 u64 num_bytes, int *retries)
3162{
3163 struct btrfs_space_info *space_info = block_rsv->space_info;
3164 int ret;
3165
3166 if ((*retries) > 2)
3167 return -ENOSPC;
3168
3169 ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
3170 if (ret)
3171 return 1;
3172
3173 if (trans && trans->transaction->in_commit)
3174 return -ENOSPC;
3175
3176 ret = shrink_delalloc(trans, root, num_bytes);
3177 if (ret)
3178 return ret;
3179
3180 spin_lock(&space_info->lock);
3181 if (space_info->bytes_pinned < num_bytes)
3182 ret = 1;
3183 spin_unlock(&space_info->lock);
3184 if (ret)
3185 return -ENOSPC;
3186
3187 (*retries)++;
3188
3189 if (trans)
3190 return -EAGAIN;
3191
3192 trans = btrfs_join_transaction(root, 1);
3193 BUG_ON(IS_ERR(trans));
3194 ret = btrfs_commit_transaction(trans, root);
3195 BUG_ON(ret);
3196
3197 return 1;
3198}
3199
3200static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
3201 u64 num_bytes)
3202{
3203 struct btrfs_space_info *space_info = block_rsv->space_info;
3204 u64 unused;
3205 int ret = -ENOSPC;
3206
3207 spin_lock(&space_info->lock);
3208 unused = space_info->bytes_used + space_info->bytes_reserved +
3209 space_info->bytes_pinned + space_info->bytes_readonly;
3210
3211 if (unused < space_info->total_bytes)
3212 unused = space_info->total_bytes - unused;
3213 else
3214 unused = 0;
3215
3216 if (unused >= num_bytes) {
3217 if (block_rsv->priority >= 10) {
3218 space_info->bytes_reserved += num_bytes;
3219 ret = 0;
3220 } else {
3221 if ((unused + block_rsv->reserved) *
3222 block_rsv->priority >=
3223 (num_bytes + block_rsv->reserved) * 10) {
3224 space_info->bytes_reserved += num_bytes;
3225 ret = 0;
3226 }
3227 }
3228 }
3229 spin_unlock(&space_info->lock);
3230
3231 return ret;
3232}
3233
3234static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
3235 struct btrfs_root *root)
3236{
3237 struct btrfs_block_rsv *block_rsv;
3238 if (root->ref_cows)
3239 block_rsv = trans->block_rsv;
3240 else
3241 block_rsv = root->block_rsv;
3242
3243 if (!block_rsv)
3244 block_rsv = &root->fs_info->empty_block_rsv;
3245
3246 return block_rsv;
3247}
3248
3249static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
3250 u64 num_bytes)
3251{
3252 int ret = -ENOSPC;
3253 spin_lock(&block_rsv->lock);
3254 if (block_rsv->reserved >= num_bytes) {
3255 block_rsv->reserved -= num_bytes;
3256 if (block_rsv->reserved < block_rsv->size)
3257 block_rsv->full = 0;
3258 ret = 0;
3259 }
3260 spin_unlock(&block_rsv->lock);
3261 return ret;
3262}
3263
3264static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
3265 u64 num_bytes, int update_size)
3266{
3267 spin_lock(&block_rsv->lock);
3268 block_rsv->reserved += num_bytes;
3269 if (update_size)
3270 block_rsv->size += num_bytes;
3271 else if (block_rsv->reserved >= block_rsv->size)
3272 block_rsv->full = 1;
3273 spin_unlock(&block_rsv->lock);
3274}
3275
3276void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3277 struct btrfs_block_rsv *dest, u64 num_bytes)
3278{
3279 struct btrfs_space_info *space_info = block_rsv->space_info;
3280
3281 spin_lock(&block_rsv->lock);
3282 if (num_bytes == (u64)-1)
3283 num_bytes = block_rsv->size;
3284 block_rsv->size -= num_bytes;
3285 if (block_rsv->reserved >= block_rsv->size) {
3286 num_bytes = block_rsv->reserved - block_rsv->size;
3287 block_rsv->reserved = block_rsv->size;
3288 block_rsv->full = 1;
3289 } else {
3290 num_bytes = 0;
3291 }
3292 spin_unlock(&block_rsv->lock);
3293
3294 if (num_bytes > 0) {
3295 if (dest) {
3296 block_rsv_add_bytes(dest, num_bytes, 0);
3297 } else {
3298 spin_lock(&space_info->lock);
3299 space_info->bytes_reserved -= num_bytes;
3300 spin_unlock(&space_info->lock);
3301 }
3302 }
3303}
3304
3305static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
3306 struct btrfs_block_rsv *dst, u64 num_bytes)
3307{
3308 int ret;
3309
3310 ret = block_rsv_use_bytes(src, num_bytes);
3311 if (ret)
3312 return ret;
3313
3314 block_rsv_add_bytes(dst, num_bytes, 1);
3315 return 0;
3316}
3317
3318void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
3319{
3320 memset(rsv, 0, sizeof(*rsv));
3321 spin_lock_init(&rsv->lock);
3322 atomic_set(&rsv->usage, 1);
3323 rsv->priority = 6;
3324 INIT_LIST_HEAD(&rsv->list);
3325}
3326
3327struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3328{
3329 struct btrfs_block_rsv *block_rsv;
3330 struct btrfs_fs_info *fs_info = root->fs_info;
3331 u64 alloc_target;
3332
3333 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
3334 if (!block_rsv)
3335 return NULL;
3336
3337 btrfs_init_block_rsv(block_rsv);
3338
3339 alloc_target = btrfs_get_alloc_profile(root, 0);
3340 block_rsv->space_info = __find_space_info(fs_info,
3341 BTRFS_BLOCK_GROUP_METADATA);
3342
3343 return block_rsv;
3344}
3345
3346void btrfs_free_block_rsv(struct btrfs_root *root,
3347 struct btrfs_block_rsv *rsv)
3348{
3349 if (rsv && atomic_dec_and_test(&rsv->usage)) {
3350 btrfs_block_rsv_release(root, rsv, (u64)-1);
3351 if (!rsv->durable)
3352 kfree(rsv);
3353 }
3354}
3355
3356/*
3357 * make the block_rsv struct be able to capture freed space.
3358 * the captured space will re-add to the the block_rsv struct
3359 * after transaction commit
3360 */
3361void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3362 struct btrfs_block_rsv *block_rsv)
3363{
3364 block_rsv->durable = 1;
3365 mutex_lock(&fs_info->durable_block_rsv_mutex);
3366 list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
3367 mutex_unlock(&fs_info->durable_block_rsv_mutex);
3368}
3369
3370int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3371 struct btrfs_root *root,
3372 struct btrfs_block_rsv *block_rsv,
3373 u64 num_bytes, int *retries)
3374{
3375 int ret;
3376
3377 if (num_bytes == 0)
3378 return 0;
3379again:
3380 ret = reserve_metadata_bytes(block_rsv, num_bytes);
3381 if (!ret) {
3382 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3383 return 0;
3384 }
3385
3386 ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
3387 if (ret > 0)
3388 goto again;
3389
3390 return ret;
3391}
3392
3393int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3394 struct btrfs_root *root,
3395 struct btrfs_block_rsv *block_rsv,
3396 u64 min_reserved, int min_factor)
3397{
3398 u64 num_bytes = 0;
3399 int commit_trans = 0;
3400 int ret = -ENOSPC;
3401
3402 if (!block_rsv)
3403 return 0;
3404
3405 spin_lock(&block_rsv->lock);
3406 if (min_factor > 0)
3407 num_bytes = div_factor(block_rsv->size, min_factor);
3408 if (min_reserved > num_bytes)
3409 num_bytes = min_reserved;
3410
3411 if (block_rsv->reserved >= num_bytes) {
3412 ret = 0;
3413 } else {
3414 num_bytes -= block_rsv->reserved;
3415 if (block_rsv->durable &&
3416 block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
3417 commit_trans = 1;
3418 }
3419 spin_unlock(&block_rsv->lock);
3420 if (!ret)
3421 return 0;
3422
3423 if (block_rsv->refill_used) {
3424 ret = reserve_metadata_bytes(block_rsv, num_bytes);
3425 if (!ret) {
3426 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3427 return 0;
3428 }
3429 }
3430
3431 if (commit_trans) {
3432 if (trans)
3433 return -EAGAIN;
3434
3435 trans = btrfs_join_transaction(root, 1);
3436 BUG_ON(IS_ERR(trans));
3437 ret = btrfs_commit_transaction(trans, root);
3438 return 0;
3439 }
3440
3441 WARN_ON(1);
3442 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
3443 block_rsv->size, block_rsv->reserved,
3444 block_rsv->freed[0], block_rsv->freed[1]);
3445
3446 return -ENOSPC;
3447}
3448
3449int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
3450 struct btrfs_block_rsv *dst_rsv,
3451 u64 num_bytes)
3452{
3453 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3454}
3455
3456void btrfs_block_rsv_release(struct btrfs_root *root,
3457 struct btrfs_block_rsv *block_rsv,
3458 u64 num_bytes)
3459{
3460 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3461 if (global_rsv->full || global_rsv == block_rsv ||
3462 block_rsv->space_info != global_rsv->space_info)
3463 global_rsv = NULL;
3464 block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
3465}
3466
3467/*
3468 * helper to calculate size of global block reservation.
3469 * the desired value is sum of space used by extent tree,
3470 * checksum tree and root tree
3471 */
3472static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3473{
3474 struct btrfs_space_info *sinfo;
3475 u64 num_bytes;
3476 u64 meta_used;
3477 u64 data_used;
3478 int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
3479#if 0
3480 /*
3481 * per tree used space accounting can be inaccuracy, so we
3482 * can't rely on it.
3483 */
3484 spin_lock(&fs_info->extent_root->accounting_lock);
3485 num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
3486 spin_unlock(&fs_info->extent_root->accounting_lock);
3487
3488 spin_lock(&fs_info->csum_root->accounting_lock);
3489 num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
3490 spin_unlock(&fs_info->csum_root->accounting_lock);
3491
3492 spin_lock(&fs_info->tree_root->accounting_lock);
3493 num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
3494 spin_unlock(&fs_info->tree_root->accounting_lock);
3495#endif
3496 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
3497 spin_lock(&sinfo->lock);
3498 data_used = sinfo->bytes_used;
3499 spin_unlock(&sinfo->lock);
3500
3501 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3502 spin_lock(&sinfo->lock);
3503 meta_used = sinfo->bytes_used;
3504 spin_unlock(&sinfo->lock);
3505
3506 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
3507 csum_size * 2;
3508 num_bytes += div64_u64(data_used + meta_used, 50);
3509
3510 if (num_bytes * 3 > meta_used)
3511 num_bytes = div64_u64(meta_used, 3);
3512
3513 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
3514}
3515
3516static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3517{
3518 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
3519 struct btrfs_space_info *sinfo = block_rsv->space_info;
3520 u64 num_bytes;
3521
3522 num_bytes = calc_global_metadata_size(fs_info);
3523
3524 spin_lock(&block_rsv->lock);
3525 spin_lock(&sinfo->lock);
3526
3527 block_rsv->size = num_bytes;
3528
3529 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
3530 sinfo->bytes_reserved + sinfo->bytes_readonly;
3531
3532 if (sinfo->total_bytes > num_bytes) {
3533 num_bytes = sinfo->total_bytes - num_bytes;
3534 block_rsv->reserved += num_bytes;
3535 sinfo->bytes_reserved += num_bytes;
3536 }
3537
3538 if (block_rsv->reserved >= block_rsv->size) {
3539 num_bytes = block_rsv->reserved - block_rsv->size;
3540 sinfo->bytes_reserved -= num_bytes;
3541 block_rsv->reserved = block_rsv->size;
3542 block_rsv->full = 1;
3543 }
3544#if 0
3545 printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
3546 block_rsv->size, block_rsv->reserved);
3547#endif
3548 spin_unlock(&sinfo->lock);
3549 spin_unlock(&block_rsv->lock);
3550}
3551
3552static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
3553{
3554 struct btrfs_space_info *space_info;
3555
3556 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3557 fs_info->chunk_block_rsv.space_info = space_info;
3558 fs_info->chunk_block_rsv.priority = 10;
3559
3560 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3561 fs_info->global_block_rsv.space_info = space_info;
3562 fs_info->global_block_rsv.priority = 10;
3563 fs_info->global_block_rsv.refill_used = 1;
3564 fs_info->delalloc_block_rsv.space_info = space_info;
3565 fs_info->trans_block_rsv.space_info = space_info;
3566 fs_info->empty_block_rsv.space_info = space_info;
3567 fs_info->empty_block_rsv.priority = 10;
3568
3569 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
3570 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
3571 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
3572 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
3573 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
3574
3575 btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv);
3576
3577 btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);
3578
3579 update_global_block_rsv(fs_info);
3580}
3581
3582static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3583{
3584 block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1);
3585 WARN_ON(fs_info->delalloc_block_rsv.size > 0);
3586 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
3587 WARN_ON(fs_info->trans_block_rsv.size > 0);
3588 WARN_ON(fs_info->trans_block_rsv.reserved > 0);
3589 WARN_ON(fs_info->chunk_block_rsv.size > 0);
3590 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3591}
3592
3593static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3594{
3595 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3596 3 * num_items;
3597}
3598
3599int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3600 struct btrfs_root *root,
3601 int num_items, int *retries)
3602{
3603 u64 num_bytes;
3604 int ret;
3605
3606 if (num_items == 0 || root->fs_info->chunk_root == root)
3607 return 0;
3608
3609 num_bytes = calc_trans_metadata_size(root, num_items);
3610 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3611 num_bytes, retries);
3612 if (!ret) {
3613 trans->bytes_reserved += num_bytes;
3614 trans->block_rsv = &root->fs_info->trans_block_rsv;
3615 }
3616 return ret;
3617}
3618
3619void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
3620 struct btrfs_root *root)
3621{
3622 if (!trans->bytes_reserved)
3623 return;
3624
3625 BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv);
3626 btrfs_block_rsv_release(root, trans->block_rsv,
3627 trans->bytes_reserved);
3628 trans->bytes_reserved = 0;
3629}
3630
3631int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
3632 struct inode *inode)
3633{
3634 struct btrfs_root *root = BTRFS_I(inode)->root;
3635 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
3636 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
3637
3638 /*
3639 * one for deleting orphan item, one for updating inode and
3640 * two for calling btrfs_truncate_inode_items.
3641 *
3642 * btrfs_truncate_inode_items is a delete operation, it frees
3643 * more space than it uses in most cases. So two units of
3644 * metadata space should be enough for calling it many times.
3645 * If all of the metadata space is used, we can commit
3646 * transaction and use space it freed.
3647 */
3648 u64 num_bytes = calc_trans_metadata_size(root, 4);
3649 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3650}
3651
3652void btrfs_orphan_release_metadata(struct inode *inode)
3653{
3654 struct btrfs_root *root = BTRFS_I(inode)->root;
3655 u64 num_bytes = calc_trans_metadata_size(root, 4);
3656 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
3657}
3658
3659int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
3660 struct btrfs_pending_snapshot *pending)
3661{
3662 struct btrfs_root *root = pending->root;
3663 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
3664 struct btrfs_block_rsv *dst_rsv = &pending->block_rsv;
3665 /*
3666 * two for root back/forward refs, two for directory entries
3667 * and one for root of the snapshot.
3668 */
3669 u64 num_bytes = calc_trans_metadata_size(root, 5);
3670 dst_rsv->space_info = src_rsv->space_info;
3671 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3672}
3673
3674static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
3675{
3676 return num_bytes >>= 3;
3677}
3678
3679int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3680{
3681 struct btrfs_root *root = BTRFS_I(inode)->root;
3682 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3683 u64 to_reserve;
3684 int nr_extents;
3685 int retries = 0;
3686 int ret;
3687
3688 if (btrfs_transaction_in_commit(root->fs_info))
3689 schedule_timeout(1);
3690
3691 num_bytes = ALIGN(num_bytes, root->sectorsize);
3692again:
3693 spin_lock(&BTRFS_I(inode)->accounting_lock);
3694 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
3695 if (nr_extents > BTRFS_I(inode)->reserved_extents) {
3696 nr_extents -= BTRFS_I(inode)->reserved_extents;
3697 to_reserve = calc_trans_metadata_size(root, nr_extents);
3698 } else {
3699 nr_extents = 0;
3700 to_reserve = 0;
3701 }
3702
3703 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3704 ret = reserve_metadata_bytes(block_rsv, to_reserve);
3705 if (ret) {
3706 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3707 ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
3708 &retries);
3709 if (ret > 0)
3710 goto again;
3711 return ret;
3712 }
3713
3714 BTRFS_I(inode)->reserved_extents += nr_extents;
3715 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3716 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3717
3718 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3719
3720 if (block_rsv->size > 512 * 1024 * 1024)
3721 shrink_delalloc(NULL, root, to_reserve);
3722
3723 return 0;
3724}
3725
3726void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
3727{
3728 struct btrfs_root *root = BTRFS_I(inode)->root;
3729 u64 to_free;
3730 int nr_extents;
3731
3732 num_bytes = ALIGN(num_bytes, root->sectorsize);
3733 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
3734
3735 spin_lock(&BTRFS_I(inode)->accounting_lock);
3736 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
3737 if (nr_extents < BTRFS_I(inode)->reserved_extents) {
3738 nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents;
3739 BTRFS_I(inode)->reserved_extents -= nr_extents;
3740 } else {
3741 nr_extents = 0;
3742 }
3743 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3744
3745 to_free = calc_csum_metadata_size(inode, num_bytes);
3746 if (nr_extents > 0)
3747 to_free += calc_trans_metadata_size(root, nr_extents);
3748
3749 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
3750 to_free);
3751}
3752
3753int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
3754{
3755 int ret;
3756
3757 ret = btrfs_check_data_free_space(inode, num_bytes);
3758 if (ret)
3759 return ret;
3760
3761 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
3762 if (ret) {
3763 btrfs_free_reserved_data_space(inode, num_bytes);
3764 return ret;
3765 }
3766
3767 return 0;
3768}
3769
3770void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
3771{
3772 btrfs_delalloc_release_metadata(inode, num_bytes);
3773 btrfs_free_reserved_data_space(inode, num_bytes);
3774}
3775
3454static int update_block_group(struct btrfs_trans_handle *trans, 3776static int update_block_group(struct btrfs_trans_handle *trans,
3455 struct btrfs_root *root, 3777 struct btrfs_root *root,
3456 u64 bytenr, u64 num_bytes, int alloc, 3778 u64 bytenr, u64 num_bytes, int alloc)
3457 int mark_free)
3458{ 3779{
3459 struct btrfs_block_group_cache *cache; 3780 struct btrfs_block_group_cache *cache;
3460 struct btrfs_fs_info *info = root->fs_info; 3781 struct btrfs_fs_info *info = root->fs_info;
3782 int factor;
3461 u64 total = num_bytes; 3783 u64 total = num_bytes;
3462 u64 old_val; 3784 u64 old_val;
3463 u64 byte_in_group; 3785 u64 byte_in_group;
@@ -3476,6 +3798,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3476 cache = btrfs_lookup_block_group(info, bytenr); 3798 cache = btrfs_lookup_block_group(info, bytenr);
3477 if (!cache) 3799 if (!cache)
3478 return -1; 3800 return -1;
3801 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
3802 BTRFS_BLOCK_GROUP_RAID1 |
3803 BTRFS_BLOCK_GROUP_RAID10))
3804 factor = 2;
3805 else
3806 factor = 1;
3479 byte_in_group = bytenr - cache->key.objectid; 3807 byte_in_group = bytenr - cache->key.objectid;
3480 WARN_ON(byte_in_group > cache->key.offset); 3808 WARN_ON(byte_in_group > cache->key.offset);
3481 3809
@@ -3488,31 +3816,24 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3488 old_val += num_bytes; 3816 old_val += num_bytes;
3489 btrfs_set_block_group_used(&cache->item, old_val); 3817 btrfs_set_block_group_used(&cache->item, old_val);
3490 cache->reserved -= num_bytes; 3818 cache->reserved -= num_bytes;
3491 cache->space_info->bytes_used += num_bytes;
3492 cache->space_info->bytes_reserved -= num_bytes; 3819 cache->space_info->bytes_reserved -= num_bytes;
3493 if (cache->ro) 3820 cache->space_info->bytes_used += num_bytes;
3494 cache->space_info->bytes_readonly -= num_bytes; 3821 cache->space_info->disk_used += num_bytes * factor;
3495 spin_unlock(&cache->lock); 3822 spin_unlock(&cache->lock);
3496 spin_unlock(&cache->space_info->lock); 3823 spin_unlock(&cache->space_info->lock);
3497 } else { 3824 } else {
3498 old_val -= num_bytes; 3825 old_val -= num_bytes;
3499 cache->space_info->bytes_used -= num_bytes;
3500 if (cache->ro)
3501 cache->space_info->bytes_readonly += num_bytes;
3502 btrfs_set_block_group_used(&cache->item, old_val); 3826 btrfs_set_block_group_used(&cache->item, old_val);
3827 cache->pinned += num_bytes;
3828 cache->space_info->bytes_pinned += num_bytes;
3829 cache->space_info->bytes_used -= num_bytes;
3830 cache->space_info->disk_used -= num_bytes * factor;
3503 spin_unlock(&cache->lock); 3831 spin_unlock(&cache->lock);
3504 spin_unlock(&cache->space_info->lock); 3832 spin_unlock(&cache->space_info->lock);
3505 if (mark_free) {
3506 int ret;
3507
3508 ret = btrfs_discard_extent(root, bytenr,
3509 num_bytes);
3510 WARN_ON(ret);
3511 3833
3512 ret = btrfs_add_free_space(cache, bytenr, 3834 set_extent_dirty(info->pinned_extents,
3513 num_bytes); 3835 bytenr, bytenr + num_bytes - 1,
3514 WARN_ON(ret); 3836 GFP_NOFS | __GFP_NOFAIL);
3515 }
3516 } 3837 }
3517 btrfs_put_block_group(cache); 3838 btrfs_put_block_group(cache);
3518 total -= num_bytes; 3839 total -= num_bytes;
@@ -3536,18 +3857,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
3536 return bytenr; 3857 return bytenr;
3537} 3858}
3538 3859
3539/* 3860static int pin_down_extent(struct btrfs_root *root,
3540 * this function must be called within transaction 3861 struct btrfs_block_group_cache *cache,
3541 */ 3862 u64 bytenr, u64 num_bytes, int reserved)
3542int btrfs_pin_extent(struct btrfs_root *root,
3543 u64 bytenr, u64 num_bytes, int reserved)
3544{ 3863{
3545 struct btrfs_fs_info *fs_info = root->fs_info;
3546 struct btrfs_block_group_cache *cache;
3547
3548 cache = btrfs_lookup_block_group(fs_info, bytenr);
3549 BUG_ON(!cache);
3550
3551 spin_lock(&cache->space_info->lock); 3864 spin_lock(&cache->space_info->lock);
3552 spin_lock(&cache->lock); 3865 spin_lock(&cache->lock);
3553 cache->pinned += num_bytes; 3866 cache->pinned += num_bytes;
@@ -3559,28 +3872,68 @@ int btrfs_pin_extent(struct btrfs_root *root,
3559 spin_unlock(&cache->lock); 3872 spin_unlock(&cache->lock);
3560 spin_unlock(&cache->space_info->lock); 3873 spin_unlock(&cache->space_info->lock);
3561 3874
3562 btrfs_put_block_group(cache); 3875 set_extent_dirty(root->fs_info->pinned_extents, bytenr,
3876 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
3877 return 0;
3878}
3563 3879
3564 set_extent_dirty(fs_info->pinned_extents, 3880/*
3565 bytenr, bytenr + num_bytes - 1, GFP_NOFS); 3881 * this function must be called within transaction
3882 */
3883int btrfs_pin_extent(struct btrfs_root *root,
3884 u64 bytenr, u64 num_bytes, int reserved)
3885{
3886 struct btrfs_block_group_cache *cache;
3887
3888 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
3889 BUG_ON(!cache);
3890
3891 pin_down_extent(root, cache, bytenr, num_bytes, reserved);
3892
3893 btrfs_put_block_group(cache);
3566 return 0; 3894 return 0;
3567} 3895}
3568 3896
3569static int update_reserved_extents(struct btrfs_block_group_cache *cache, 3897/*
3570 u64 num_bytes, int reserve) 3898 * update size of reserved extents. this function may return -EAGAIN
3899 * if 'reserve' is true or 'sinfo' is false.
3900 */
3901static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
3902 u64 num_bytes, int reserve, int sinfo)
3571{ 3903{
3572 spin_lock(&cache->space_info->lock); 3904 int ret = 0;
3573 spin_lock(&cache->lock); 3905 if (sinfo) {
3574 if (reserve) { 3906 struct btrfs_space_info *space_info = cache->space_info;
3575 cache->reserved += num_bytes; 3907 spin_lock(&space_info->lock);
3576 cache->space_info->bytes_reserved += num_bytes; 3908 spin_lock(&cache->lock);
3909 if (reserve) {
3910 if (cache->ro) {
3911 ret = -EAGAIN;
3912 } else {
3913 cache->reserved += num_bytes;
3914 space_info->bytes_reserved += num_bytes;
3915 }
3916 } else {
3917 if (cache->ro)
3918 space_info->bytes_readonly += num_bytes;
3919 cache->reserved -= num_bytes;
3920 space_info->bytes_reserved -= num_bytes;
3921 }
3922 spin_unlock(&cache->lock);
3923 spin_unlock(&space_info->lock);
3577 } else { 3924 } else {
3578 cache->reserved -= num_bytes; 3925 spin_lock(&cache->lock);
3579 cache->space_info->bytes_reserved -= num_bytes; 3926 if (cache->ro) {
3927 ret = -EAGAIN;
3928 } else {
3929 if (reserve)
3930 cache->reserved += num_bytes;
3931 else
3932 cache->reserved -= num_bytes;
3933 }
3934 spin_unlock(&cache->lock);
3580 } 3935 }
3581 spin_unlock(&cache->lock); 3936 return ret;
3582 spin_unlock(&cache->space_info->lock);
3583 return 0;
3584} 3937}
3585 3938
3586int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 3939int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
@@ -3611,6 +3964,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
3611 fs_info->pinned_extents = &fs_info->freed_extents[0]; 3964 fs_info->pinned_extents = &fs_info->freed_extents[0];
3612 3965
3613 up_write(&fs_info->extent_commit_sem); 3966 up_write(&fs_info->extent_commit_sem);
3967
3968 update_global_block_rsv(fs_info);
3614 return 0; 3969 return 0;
3615} 3970}
3616 3971
@@ -3637,14 +3992,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
3637 btrfs_add_free_space(cache, start, len); 3992 btrfs_add_free_space(cache, start, len);
3638 } 3993 }
3639 3994
3995 start += len;
3996
3640 spin_lock(&cache->space_info->lock); 3997 spin_lock(&cache->space_info->lock);
3641 spin_lock(&cache->lock); 3998 spin_lock(&cache->lock);
3642 cache->pinned -= len; 3999 cache->pinned -= len;
3643 cache->space_info->bytes_pinned -= len; 4000 cache->space_info->bytes_pinned -= len;
4001 if (cache->ro) {
4002 cache->space_info->bytes_readonly += len;
4003 } else if (cache->reserved_pinned > 0) {
4004 len = min(len, cache->reserved_pinned);
4005 cache->reserved_pinned -= len;
4006 cache->space_info->bytes_reserved += len;
4007 }
3644 spin_unlock(&cache->lock); 4008 spin_unlock(&cache->lock);
3645 spin_unlock(&cache->space_info->lock); 4009 spin_unlock(&cache->space_info->lock);
3646
3647 start += len;
3648 } 4010 }
3649 4011
3650 if (cache) 4012 if (cache)
@@ -3657,8 +4019,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3657{ 4019{
3658 struct btrfs_fs_info *fs_info = root->fs_info; 4020 struct btrfs_fs_info *fs_info = root->fs_info;
3659 struct extent_io_tree *unpin; 4021 struct extent_io_tree *unpin;
4022 struct btrfs_block_rsv *block_rsv;
4023 struct btrfs_block_rsv *next_rsv;
3660 u64 start; 4024 u64 start;
3661 u64 end; 4025 u64 end;
4026 int idx;
3662 int ret; 4027 int ret;
3663 4028
3664 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4029 if (fs_info->pinned_extents == &fs_info->freed_extents[0])
@@ -3679,59 +4044,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3679 cond_resched(); 4044 cond_resched();
3680 } 4045 }
3681 4046
3682 return ret; 4047 mutex_lock(&fs_info->durable_block_rsv_mutex);
3683} 4048 list_for_each_entry_safe(block_rsv, next_rsv,
3684 4049 &fs_info->durable_block_rsv_list, list) {
3685static int pin_down_bytes(struct btrfs_trans_handle *trans,
3686 struct btrfs_root *root,
3687 struct btrfs_path *path,
3688 u64 bytenr, u64 num_bytes,
3689 int is_data, int reserved,
3690 struct extent_buffer **must_clean)
3691{
3692 int err = 0;
3693 struct extent_buffer *buf;
3694
3695 if (is_data)
3696 goto pinit;
3697
3698 /*
3699 * discard is sloooow, and so triggering discards on
3700 * individual btree blocks isn't a good plan. Just
3701 * pin everything in discard mode.
3702 */
3703 if (btrfs_test_opt(root, DISCARD))
3704 goto pinit;
3705 4050
3706 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 4051 idx = trans->transid & 0x1;
3707 if (!buf) 4052 if (block_rsv->freed[idx] > 0) {
3708 goto pinit; 4053 block_rsv_add_bytes(block_rsv,
4054 block_rsv->freed[idx], 0);
4055 block_rsv->freed[idx] = 0;
4056 }
4057 if (atomic_read(&block_rsv->usage) == 0) {
4058 btrfs_block_rsv_release(root, block_rsv, (u64)-1);
3709 4059
3710 /* we can reuse a block if it hasn't been written 4060 if (block_rsv->freed[0] == 0 &&
3711 * and it is from this transaction. We can't 4061 block_rsv->freed[1] == 0) {
3712 * reuse anything from the tree log root because 4062 list_del_init(&block_rsv->list);
3713 * it has tiny sub-transactions. 4063 kfree(block_rsv);
3714 */ 4064 }
3715 if (btrfs_buffer_uptodate(buf, 0) && 4065 } else {
3716 btrfs_try_tree_lock(buf)) { 4066 btrfs_block_rsv_release(root, block_rsv, 0);
3717 u64 header_owner = btrfs_header_owner(buf);
3718 u64 header_transid = btrfs_header_generation(buf);
3719 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
3720 header_transid == trans->transid &&
3721 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
3722 *must_clean = buf;
3723 return 1;
3724 } 4067 }
3725 btrfs_tree_unlock(buf);
3726 } 4068 }
3727 free_extent_buffer(buf); 4069 mutex_unlock(&fs_info->durable_block_rsv_mutex);
3728pinit:
3729 if (path)
3730 btrfs_set_path_blocking(path);
3731 /* unlocks the pinned mutex */
3732 btrfs_pin_extent(root, bytenr, num_bytes, reserved);
3733 4070
3734 BUG_ON(err < 0);
3735 return 0; 4071 return 0;
3736} 4072}
3737 4073
@@ -3892,9 +4228,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3892 BUG_ON(ret); 4228 BUG_ON(ret);
3893 } 4229 }
3894 } else { 4230 } else {
3895 int mark_free = 0;
3896 struct extent_buffer *must_clean = NULL;
3897
3898 if (found_extent) { 4231 if (found_extent) {
3899 BUG_ON(is_data && refs_to_drop != 4232 BUG_ON(is_data && refs_to_drop !=
3900 extent_data_ref_count(root, path, iref)); 4233 extent_data_ref_count(root, path, iref));
@@ -3907,31 +4240,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3907 } 4240 }
3908 } 4241 }
3909 4242
3910 ret = pin_down_bytes(trans, root, path, bytenr,
3911 num_bytes, is_data, 0, &must_clean);
3912 if (ret > 0)
3913 mark_free = 1;
3914 BUG_ON(ret < 0);
3915 /*
3916 * it is going to be very rare for someone to be waiting
3917 * on the block we're freeing. del_items might need to
3918 * schedule, so rather than get fancy, just force it
3919 * to blocking here
3920 */
3921 if (must_clean)
3922 btrfs_set_lock_blocking(must_clean);
3923
3924 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4243 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3925 num_to_del); 4244 num_to_del);
3926 BUG_ON(ret); 4245 BUG_ON(ret);
3927 btrfs_release_path(extent_root, path); 4246 btrfs_release_path(extent_root, path);
3928 4247
3929 if (must_clean) {
3930 clean_tree_block(NULL, root, must_clean);
3931 btrfs_tree_unlock(must_clean);
3932 free_extent_buffer(must_clean);
3933 }
3934
3935 if (is_data) { 4248 if (is_data) {
3936 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4249 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
3937 BUG_ON(ret); 4250 BUG_ON(ret);
@@ -3941,8 +4254,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3941 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 4254 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
3942 } 4255 }
3943 4256
3944 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 4257 ret = update_block_group(trans, root, bytenr, num_bytes, 0);
3945 mark_free);
3946 BUG_ON(ret); 4258 BUG_ON(ret);
3947 } 4259 }
3948 btrfs_free_path(path); 4260 btrfs_free_path(path);
@@ -3950,7 +4262,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
3950} 4262}
3951 4263
3952/* 4264/*
3953 * when we free an extent, it is possible (and likely) that we free the last 4265 * when we free an block, it is possible (and likely) that we free the last
3954 * delayed ref for that extent as well. This searches the delayed ref tree for 4266 * delayed ref for that extent as well. This searches the delayed ref tree for
3955 * a given extent, and if there are no other delayed refs to be processed, it 4267 * a given extent, and if there are no other delayed refs to be processed, it
3956 * removes it from the tree. 4268 * removes it from the tree.
@@ -3962,7 +4274,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
3962 struct btrfs_delayed_ref_root *delayed_refs; 4274 struct btrfs_delayed_ref_root *delayed_refs;
3963 struct btrfs_delayed_ref_node *ref; 4275 struct btrfs_delayed_ref_node *ref;
3964 struct rb_node *node; 4276 struct rb_node *node;
3965 int ret; 4277 int ret = 0;
3966 4278
3967 delayed_refs = &trans->transaction->delayed_refs; 4279 delayed_refs = &trans->transaction->delayed_refs;
3968 spin_lock(&delayed_refs->lock); 4280 spin_lock(&delayed_refs->lock);
@@ -4014,17 +4326,100 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
4014 list_del_init(&head->cluster); 4326 list_del_init(&head->cluster);
4015 spin_unlock(&delayed_refs->lock); 4327 spin_unlock(&delayed_refs->lock);
4016 4328
4017 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 4329 BUG_ON(head->extent_op);
4018 &head->node, head->extent_op, 4330 if (head->must_insert_reserved)
4019 head->must_insert_reserved); 4331 ret = 1;
4020 BUG_ON(ret); 4332
4333 mutex_unlock(&head->mutex);
4021 btrfs_put_delayed_ref(&head->node); 4334 btrfs_put_delayed_ref(&head->node);
4022 return 0; 4335 return ret;
4023out: 4336out:
4024 spin_unlock(&delayed_refs->lock); 4337 spin_unlock(&delayed_refs->lock);
4025 return 0; 4338 return 0;
4026} 4339}
4027 4340
4341void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4342 struct btrfs_root *root,
4343 struct extent_buffer *buf,
4344 u64 parent, int last_ref)
4345{
4346 struct btrfs_block_rsv *block_rsv;
4347 struct btrfs_block_group_cache *cache = NULL;
4348 int ret;
4349
4350 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4351 ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
4352 parent, root->root_key.objectid,
4353 btrfs_header_level(buf),
4354 BTRFS_DROP_DELAYED_REF, NULL);
4355 BUG_ON(ret);
4356 }
4357
4358 if (!last_ref)
4359 return;
4360
4361 block_rsv = get_block_rsv(trans, root);
4362 cache = btrfs_lookup_block_group(root->fs_info, buf->start);
4363 if (block_rsv->space_info != cache->space_info)
4364 goto out;
4365
4366 if (btrfs_header_generation(buf) == trans->transid) {
4367 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4368 ret = check_ref_cleanup(trans, root, buf->start);
4369 if (!ret)
4370 goto pin;
4371 }
4372
4373 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
4374 pin_down_extent(root, cache, buf->start, buf->len, 1);
4375 goto pin;
4376 }
4377
4378 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
4379
4380 btrfs_add_free_space(cache, buf->start, buf->len);
4381 ret = update_reserved_bytes(cache, buf->len, 0, 0);
4382 if (ret == -EAGAIN) {
4383 /* block group became read-only */
4384 update_reserved_bytes(cache, buf->len, 0, 1);
4385 goto out;
4386 }
4387
4388 ret = 1;
4389 spin_lock(&block_rsv->lock);
4390 if (block_rsv->reserved < block_rsv->size) {
4391 block_rsv->reserved += buf->len;
4392 ret = 0;
4393 }
4394 spin_unlock(&block_rsv->lock);
4395
4396 if (ret) {
4397 spin_lock(&cache->space_info->lock);
4398 cache->space_info->bytes_reserved -= buf->len;
4399 spin_unlock(&cache->space_info->lock);
4400 }
4401 goto out;
4402 }
4403pin:
4404 if (block_rsv->durable && !cache->ro) {
4405 ret = 0;
4406 spin_lock(&cache->lock);
4407 if (!cache->ro) {
4408 cache->reserved_pinned += buf->len;
4409 ret = 1;
4410 }
4411 spin_unlock(&cache->lock);
4412
4413 if (ret) {
4414 spin_lock(&block_rsv->lock);
4415 block_rsv->freed[trans->transid & 0x1] += buf->len;
4416 spin_unlock(&block_rsv->lock);
4417 }
4418 }
4419out:
4420 btrfs_put_block_group(cache);
4421}
4422
4028int btrfs_free_extent(struct btrfs_trans_handle *trans, 4423int btrfs_free_extent(struct btrfs_trans_handle *trans,
4029 struct btrfs_root *root, 4424 struct btrfs_root *root,
4030 u64 bytenr, u64 num_bytes, u64 parent, 4425 u64 bytenr, u64 num_bytes, u64 parent,
@@ -4046,8 +4441,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4046 parent, root_objectid, (int)owner, 4441 parent, root_objectid, (int)owner,
4047 BTRFS_DROP_DELAYED_REF, NULL); 4442 BTRFS_DROP_DELAYED_REF, NULL);
4048 BUG_ON(ret); 4443 BUG_ON(ret);
4049 ret = check_ref_cleanup(trans, root, bytenr);
4050 BUG_ON(ret);
4051 } else { 4444 } else {
4052 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 4445 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
4053 parent, root_objectid, owner, 4446 parent, root_objectid, owner,
@@ -4057,21 +4450,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
4057 return ret; 4450 return ret;
4058} 4451}
4059 4452
4060int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4061 struct btrfs_root *root,
4062 u64 bytenr, u32 blocksize,
4063 u64 parent, u64 root_objectid, int level)
4064{
4065 u64 used;
4066 spin_lock(&root->node_lock);
4067 used = btrfs_root_used(&root->root_item) - blocksize;
4068 btrfs_set_root_used(&root->root_item, used);
4069 spin_unlock(&root->node_lock);
4070
4071 return btrfs_free_extent(trans, root, bytenr, blocksize,
4072 parent, root_objectid, level, 0);
4073}
4074
4075static u64 stripe_align(struct btrfs_root *root, u64 val) 4453static u64 stripe_align(struct btrfs_root *root, u64 val)
4076{ 4454{
4077 u64 mask = ((u64)root->stripesize - 1); 4455 u64 mask = ((u64)root->stripesize - 1);
@@ -4124,6 +4502,22 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
4124 return 0; 4502 return 0;
4125} 4503}
4126 4504
4505static int get_block_group_index(struct btrfs_block_group_cache *cache)
4506{
4507 int index;
4508 if (cache->flags & BTRFS_BLOCK_GROUP_RAID10)
4509 index = 0;
4510 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1)
4511 index = 1;
4512 else if (cache->flags & BTRFS_BLOCK_GROUP_DUP)
4513 index = 2;
4514 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0)
4515 index = 3;
4516 else
4517 index = 4;
4518 return index;
4519}
4520
4127enum btrfs_loop_type { 4521enum btrfs_loop_type {
4128 LOOP_FIND_IDEAL = 0, 4522 LOOP_FIND_IDEAL = 0,
4129 LOOP_CACHING_NOWAIT = 1, 4523 LOOP_CACHING_NOWAIT = 1,
@@ -4145,7 +4539,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4145 u64 num_bytes, u64 empty_size, 4539 u64 num_bytes, u64 empty_size,
4146 u64 search_start, u64 search_end, 4540 u64 search_start, u64 search_end,
4147 u64 hint_byte, struct btrfs_key *ins, 4541 u64 hint_byte, struct btrfs_key *ins,
4148 u64 exclude_start, u64 exclude_nr,
4149 int data) 4542 int data)
4150{ 4543{
4151 int ret = 0; 4544 int ret = 0;
@@ -4158,6 +4551,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4158 struct btrfs_space_info *space_info; 4551 struct btrfs_space_info *space_info;
4159 int last_ptr_loop = 0; 4552 int last_ptr_loop = 0;
4160 int loop = 0; 4553 int loop = 0;
4554 int index = 0;
4161 bool found_uncached_bg = false; 4555 bool found_uncached_bg = false;
4162 bool failed_cluster_refill = false; 4556 bool failed_cluster_refill = false;
4163 bool failed_alloc = false; 4557 bool failed_alloc = false;
@@ -4170,6 +4564,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4170 ins->offset = 0; 4564 ins->offset = 0;
4171 4565
4172 space_info = __find_space_info(root->fs_info, data); 4566 space_info = __find_space_info(root->fs_info, data);
4567 if (!space_info) {
4568 printk(KERN_ERR "No space info for %d\n", data);
4569 return -ENOSPC;
4570 }
4173 4571
4174 if (orig_root->ref_cows || empty_size) 4572 if (orig_root->ref_cows || empty_size)
4175 allowed_chunk_alloc = 1; 4573 allowed_chunk_alloc = 1;
@@ -4223,6 +4621,7 @@ ideal_cache:
4223 btrfs_put_block_group(block_group); 4621 btrfs_put_block_group(block_group);
4224 up_read(&space_info->groups_sem); 4622 up_read(&space_info->groups_sem);
4225 } else { 4623 } else {
4624 index = get_block_group_index(block_group);
4226 goto have_block_group; 4625 goto have_block_group;
4227 } 4626 }
4228 } else if (block_group) { 4627 } else if (block_group) {
@@ -4231,7 +4630,8 @@ ideal_cache:
4231 } 4630 }
4232search: 4631search:
4233 down_read(&space_info->groups_sem); 4632 down_read(&space_info->groups_sem);
4234 list_for_each_entry(block_group, &space_info->block_groups, list) { 4633 list_for_each_entry(block_group, &space_info->block_groups[index],
4634 list) {
4235 u64 offset; 4635 u64 offset;
4236 int cached; 4636 int cached;
4237 4637
@@ -4422,23 +4822,22 @@ checks:
4422 goto loop; 4822 goto loop;
4423 } 4823 }
4424 4824
4425 if (exclude_nr > 0 && 4825 ins->objectid = search_start;
4426 (search_start + num_bytes > exclude_start && 4826 ins->offset = num_bytes;
4427 search_start < exclude_start + exclude_nr)) {
4428 search_start = exclude_start + exclude_nr;
4429 4827
4828 if (offset < search_start)
4829 btrfs_add_free_space(block_group, offset,
4830 search_start - offset);
4831 BUG_ON(offset > search_start);
4832
4833 ret = update_reserved_bytes(block_group, num_bytes, 1,
4834 (data & BTRFS_BLOCK_GROUP_DATA));
4835 if (ret == -EAGAIN) {
4430 btrfs_add_free_space(block_group, offset, num_bytes); 4836 btrfs_add_free_space(block_group, offset, num_bytes);
4431 /*
4432 * if search_start is still in this block group
4433 * then we just re-search this block group
4434 */
4435 if (search_start >= block_group->key.objectid &&
4436 search_start < (block_group->key.objectid +
4437 block_group->key.offset))
4438 goto have_block_group;
4439 goto loop; 4837 goto loop;
4440 } 4838 }
4441 4839
4840 /* we are all good, lets return */
4442 ins->objectid = search_start; 4841 ins->objectid = search_start;
4443 ins->offset = num_bytes; 4842 ins->offset = num_bytes;
4444 4843
@@ -4446,18 +4845,18 @@ checks:
4446 btrfs_add_free_space(block_group, offset, 4845 btrfs_add_free_space(block_group, offset,
4447 search_start - offset); 4846 search_start - offset);
4448 BUG_ON(offset > search_start); 4847 BUG_ON(offset > search_start);
4449
4450 update_reserved_extents(block_group, num_bytes, 1);
4451
4452 /* we are all good, lets return */
4453 break; 4848 break;
4454loop: 4849loop:
4455 failed_cluster_refill = false; 4850 failed_cluster_refill = false;
4456 failed_alloc = false; 4851 failed_alloc = false;
4852 BUG_ON(index != get_block_group_index(block_group));
4457 btrfs_put_block_group(block_group); 4853 btrfs_put_block_group(block_group);
4458 } 4854 }
4459 up_read(&space_info->groups_sem); 4855 up_read(&space_info->groups_sem);
4460 4856
4857 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
4858 goto search;
4859
4461 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for 4860 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
4462 * for them to make caching progress. Also 4861 * for them to make caching progress. Also
4463 * determine the best possible bg to cache 4862 * determine the best possible bg to cache
@@ -4471,6 +4870,7 @@ loop:
4471 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 4870 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
4472 (found_uncached_bg || empty_size || empty_cluster || 4871 (found_uncached_bg || empty_size || empty_cluster ||
4473 allowed_chunk_alloc)) { 4872 allowed_chunk_alloc)) {
4873 index = 0;
4474 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 4874 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
4475 found_uncached_bg = false; 4875 found_uncached_bg = false;
4476 loop++; 4876 loop++;
@@ -4553,31 +4953,30 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4553 int dump_block_groups) 4953 int dump_block_groups)
4554{ 4954{
4555 struct btrfs_block_group_cache *cache; 4955 struct btrfs_block_group_cache *cache;
4956 int index = 0;
4556 4957
4557 spin_lock(&info->lock); 4958 spin_lock(&info->lock);
4558 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4959 printk(KERN_INFO "space_info has %llu free, is %sfull\n",
4559 (unsigned long long)(info->total_bytes - info->bytes_used - 4960 (unsigned long long)(info->total_bytes - info->bytes_used -
4560 info->bytes_pinned - info->bytes_reserved - 4961 info->bytes_pinned - info->bytes_reserved -
4561 info->bytes_super), 4962 info->bytes_readonly),
4562 (info->full) ? "" : "not "); 4963 (info->full) ? "" : "not ");
4563 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4964 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
4564 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" 4965 "reserved=%llu, may_use=%llu, readonly=%llu\n",
4565 "\n",
4566 (unsigned long long)info->total_bytes, 4966 (unsigned long long)info->total_bytes,
4967 (unsigned long long)info->bytes_used,
4567 (unsigned long long)info->bytes_pinned, 4968 (unsigned long long)info->bytes_pinned,
4568 (unsigned long long)info->bytes_delalloc, 4969 (unsigned long long)info->bytes_reserved,
4569 (unsigned long long)info->bytes_may_use, 4970 (unsigned long long)info->bytes_may_use,
4570 (unsigned long long)info->bytes_used, 4971 (unsigned long long)info->bytes_readonly);
4571 (unsigned long long)info->bytes_root,
4572 (unsigned long long)info->bytes_super,
4573 (unsigned long long)info->bytes_reserved);
4574 spin_unlock(&info->lock); 4972 spin_unlock(&info->lock);
4575 4973
4576 if (!dump_block_groups) 4974 if (!dump_block_groups)
4577 return; 4975 return;
4578 4976
4579 down_read(&info->groups_sem); 4977 down_read(&info->groups_sem);
4580 list_for_each_entry(cache, &info->block_groups, list) { 4978again:
4979 list_for_each_entry(cache, &info->block_groups[index], list) {
4581 spin_lock(&cache->lock); 4980 spin_lock(&cache->lock);
4582 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 4981 printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
4583 "%llu pinned %llu reserved\n", 4982 "%llu pinned %llu reserved\n",
@@ -4589,6 +4988,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
4589 btrfs_dump_free_space(cache, bytes); 4988 btrfs_dump_free_space(cache, bytes);
4590 spin_unlock(&cache->lock); 4989 spin_unlock(&cache->lock);
4591 } 4990 }
4991 if (++index < BTRFS_NR_RAID_TYPES)
4992 goto again;
4592 up_read(&info->groups_sem); 4993 up_read(&info->groups_sem);
4593} 4994}
4594 4995
@@ -4614,9 +5015,8 @@ again:
4614 5015
4615 WARN_ON(num_bytes < root->sectorsize); 5016 WARN_ON(num_bytes < root->sectorsize);
4616 ret = find_free_extent(trans, root, num_bytes, empty_size, 5017 ret = find_free_extent(trans, root, num_bytes, empty_size,
4617 search_start, search_end, hint_byte, ins, 5018 search_start, search_end, hint_byte,
4618 trans->alloc_exclude_start, 5019 ins, data);
4619 trans->alloc_exclude_nr, data);
4620 5020
4621 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 5021 if (ret == -ENOSPC && num_bytes > min_alloc_size) {
4622 num_bytes = num_bytes >> 1; 5022 num_bytes = num_bytes >> 1;
@@ -4654,7 +5054,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
4654 ret = btrfs_discard_extent(root, start, len); 5054 ret = btrfs_discard_extent(root, start, len);
4655 5055
4656 btrfs_add_free_space(cache, start, len); 5056 btrfs_add_free_space(cache, start, len);
4657 update_reserved_extents(cache, len, 0); 5057 update_reserved_bytes(cache, len, 0, 1);
4658 btrfs_put_block_group(cache); 5058 btrfs_put_block_group(cache);
4659 5059
4660 return ret; 5060 return ret;
@@ -4717,8 +5117,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
4717 btrfs_mark_buffer_dirty(path->nodes[0]); 5117 btrfs_mark_buffer_dirty(path->nodes[0]);
4718 btrfs_free_path(path); 5118 btrfs_free_path(path);
4719 5119
4720 ret = update_block_group(trans, root, ins->objectid, ins->offset, 5120 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
4721 1, 0);
4722 if (ret) { 5121 if (ret) {
4723 printk(KERN_ERR "btrfs update block group failed for %llu " 5122 printk(KERN_ERR "btrfs update block group failed for %llu "
4724 "%llu\n", (unsigned long long)ins->objectid, 5123 "%llu\n", (unsigned long long)ins->objectid,
@@ -4778,8 +5177,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
4778 btrfs_mark_buffer_dirty(leaf); 5177 btrfs_mark_buffer_dirty(leaf);
4779 btrfs_free_path(path); 5178 btrfs_free_path(path);
4780 5179
4781 ret = update_block_group(trans, root, ins->objectid, ins->offset, 5180 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
4782 1, 0);
4783 if (ret) { 5181 if (ret) {
4784 printk(KERN_ERR "btrfs update block group failed for %llu " 5182 printk(KERN_ERR "btrfs update block group failed for %llu "
4785 "%llu\n", (unsigned long long)ins->objectid, 5183 "%llu\n", (unsigned long long)ins->objectid,
@@ -4855,73 +5253,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4855 put_caching_control(caching_ctl); 5253 put_caching_control(caching_ctl);
4856 } 5254 }
4857 5255
4858 update_reserved_extents(block_group, ins->offset, 1); 5256 ret = update_reserved_bytes(block_group, ins->offset, 1, 1);
5257 BUG_ON(ret);
4859 btrfs_put_block_group(block_group); 5258 btrfs_put_block_group(block_group);
4860 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5259 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
4861 0, owner, offset, ins, 1); 5260 0, owner, offset, ins, 1);
4862 return ret; 5261 return ret;
4863} 5262}
4864 5263
4865/*
4866 * finds a free extent and does all the dirty work required for allocation
4867 * returns the key for the extent through ins, and a tree buffer for
4868 * the first block of the extent through buf.
4869 *
4870 * returns 0 if everything worked, non-zero otherwise.
4871 */
4872static int alloc_tree_block(struct btrfs_trans_handle *trans,
4873 struct btrfs_root *root,
4874 u64 num_bytes, u64 parent, u64 root_objectid,
4875 struct btrfs_disk_key *key, int level,
4876 u64 empty_size, u64 hint_byte, u64 search_end,
4877 struct btrfs_key *ins)
4878{
4879 int ret;
4880 u64 flags = 0;
4881
4882 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4883 empty_size, hint_byte, search_end,
4884 ins, 0);
4885 if (ret)
4886 return ret;
4887
4888 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4889 if (parent == 0)
4890 parent = ins->objectid;
4891 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4892 } else
4893 BUG_ON(parent > 0);
4894
4895 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
4896 struct btrfs_delayed_extent_op *extent_op;
4897 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
4898 BUG_ON(!extent_op);
4899 if (key)
4900 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4901 else
4902 memset(&extent_op->key, 0, sizeof(extent_op->key));
4903 extent_op->flags_to_set = flags;
4904 extent_op->update_key = 1;
4905 extent_op->update_flags = 1;
4906 extent_op->is_data = 0;
4907
4908 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
4909 ins->offset, parent, root_objectid,
4910 level, BTRFS_ADD_DELAYED_EXTENT,
4911 extent_op);
4912 BUG_ON(ret);
4913 }
4914
4915 if (root_objectid == root->root_key.objectid) {
4916 u64 used;
4917 spin_lock(&root->node_lock);
4918 used = btrfs_root_used(&root->root_item) + num_bytes;
4919 btrfs_set_root_used(&root->root_item, used);
4920 spin_unlock(&root->node_lock);
4921 }
4922 return ret;
4923}
4924
4925struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 5264struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4926 struct btrfs_root *root, 5265 struct btrfs_root *root,
4927 u64 bytenr, u32 blocksize, 5266 u64 bytenr, u32 blocksize,
@@ -4960,8 +5299,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
4960 return buf; 5299 return buf;
4961} 5300}
4962 5301
5302static struct btrfs_block_rsv *
5303use_block_rsv(struct btrfs_trans_handle *trans,
5304 struct btrfs_root *root, u32 blocksize)
5305{
5306 struct btrfs_block_rsv *block_rsv;
5307 int ret;
5308
5309 block_rsv = get_block_rsv(trans, root);
5310
5311 if (block_rsv->size == 0) {
5312 ret = reserve_metadata_bytes(block_rsv, blocksize);
5313 if (ret)
5314 return ERR_PTR(ret);
5315 return block_rsv;
5316 }
5317
5318 ret = block_rsv_use_bytes(block_rsv, blocksize);
5319 if (!ret)
5320 return block_rsv;
5321
5322 WARN_ON(1);
5323 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
5324 block_rsv->size, block_rsv->reserved,
5325 block_rsv->freed[0], block_rsv->freed[1]);
5326
5327 return ERR_PTR(-ENOSPC);
5328}
5329
5330static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
5331{
5332 block_rsv_add_bytes(block_rsv, blocksize, 0);
5333 block_rsv_release_bytes(block_rsv, NULL, 0);
5334}
5335
4963/* 5336/*
4964 * helper function to allocate a block for a given tree 5337 * finds a free extent and does all the dirty work required for allocation
5338 * returns the key for the extent through ins, and a tree buffer for
5339 * the first block of the extent through buf.
5340 *
4965 * returns the tree buffer or NULL. 5341 * returns the tree buffer or NULL.
4966 */ 5342 */
4967struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 5343struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
@@ -4971,18 +5347,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4971 u64 hint, u64 empty_size) 5347 u64 hint, u64 empty_size)
4972{ 5348{
4973 struct btrfs_key ins; 5349 struct btrfs_key ins;
4974 int ret; 5350 struct btrfs_block_rsv *block_rsv;
4975 struct extent_buffer *buf; 5351 struct extent_buffer *buf;
5352 u64 flags = 0;
5353 int ret;
4976 5354
4977 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, 5355
4978 key, level, empty_size, hint, (u64)-1, &ins); 5356 block_rsv = use_block_rsv(trans, root, blocksize);
5357 if (IS_ERR(block_rsv))
5358 return ERR_CAST(block_rsv);
5359
5360 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
5361 empty_size, hint, (u64)-1, &ins, 0);
4979 if (ret) { 5362 if (ret) {
4980 BUG_ON(ret > 0); 5363 unuse_block_rsv(block_rsv, blocksize);
4981 return ERR_PTR(ret); 5364 return ERR_PTR(ret);
4982 } 5365 }
4983 5366
4984 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 5367 buf = btrfs_init_new_buffer(trans, root, ins.objectid,
4985 blocksize, level); 5368 blocksize, level);
5369 BUG_ON(IS_ERR(buf));
5370
5371 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
5372 if (parent == 0)
5373 parent = ins.objectid;
5374 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5375 } else
5376 BUG_ON(parent > 0);
5377
5378 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
5379 struct btrfs_delayed_extent_op *extent_op;
5380 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
5381 BUG_ON(!extent_op);
5382 if (key)
5383 memcpy(&extent_op->key, key, sizeof(extent_op->key));
5384 else
5385 memset(&extent_op->key, 0, sizeof(extent_op->key));
5386 extent_op->flags_to_set = flags;
5387 extent_op->update_key = 1;
5388 extent_op->update_flags = 1;
5389 extent_op->is_data = 0;
5390
5391 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
5392 ins.offset, parent, root_objectid,
5393 level, BTRFS_ADD_DELAYED_EXTENT,
5394 extent_op);
5395 BUG_ON(ret);
5396 }
4986 return buf; 5397 return buf;
4987} 5398}
4988 5399
@@ -5205,6 +5616,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5205 next = btrfs_find_tree_block(root, bytenr, blocksize); 5616 next = btrfs_find_tree_block(root, bytenr, blocksize);
5206 if (!next) { 5617 if (!next) {
5207 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 5618 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
5619 if (!next)
5620 return -ENOMEM;
5208 reada = 1; 5621 reada = 1;
5209 } 5622 }
5210 btrfs_tree_lock(next); 5623 btrfs_tree_lock(next);
@@ -5305,7 +5718,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5305 struct btrfs_path *path, 5718 struct btrfs_path *path,
5306 struct walk_control *wc) 5719 struct walk_control *wc)
5307{ 5720{
5308 int ret = 0; 5721 int ret;
5309 int level = wc->level; 5722 int level = wc->level;
5310 struct extent_buffer *eb = path->nodes[level]; 5723 struct extent_buffer *eb = path->nodes[level];
5311 u64 parent = 0; 5724 u64 parent = 0;
@@ -5383,13 +5796,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
5383 btrfs_header_owner(path->nodes[level + 1])); 5796 btrfs_header_owner(path->nodes[level + 1]));
5384 } 5797 }
5385 5798
5386 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, 5799 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
5387 root->root_key.objectid, level, 0);
5388 BUG_ON(ret);
5389out: 5800out:
5390 wc->refs[level] = 0; 5801 wc->refs[level] = 0;
5391 wc->flags[level] = 0; 5802 wc->flags[level] = 0;
5392 return ret; 5803 return 0;
5393} 5804}
5394 5805
5395static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 5806static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
@@ -5417,7 +5828,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5417 if (ret > 0) { 5828 if (ret > 0) {
5418 path->slots[level]++; 5829 path->slots[level]++;
5419 continue; 5830 continue;
5420 } 5831 } else if (ret < 0)
5832 return ret;
5421 level = wc->level; 5833 level = wc->level;
5422 } 5834 }
5423 return 0; 5835 return 0;
@@ -5466,7 +5878,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
5466 * also make sure backrefs for the shared block and all lower level 5878 * also make sure backrefs for the shared block and all lower level
5467 * blocks are properly updated. 5879 * blocks are properly updated.
5468 */ 5880 */
5469int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) 5881int btrfs_drop_snapshot(struct btrfs_root *root,
5882 struct btrfs_block_rsv *block_rsv, int update_ref)
5470{ 5883{
5471 struct btrfs_path *path; 5884 struct btrfs_path *path;
5472 struct btrfs_trans_handle *trans; 5885 struct btrfs_trans_handle *trans;
@@ -5484,7 +5897,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5484 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5897 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5485 BUG_ON(!wc); 5898 BUG_ON(!wc);
5486 5899
5487 trans = btrfs_start_transaction(tree_root, 1); 5900 trans = btrfs_start_transaction(tree_root, 0);
5901 if (block_rsv)
5902 trans->block_rsv = block_rsv;
5488 5903
5489 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 5904 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5490 level = btrfs_header_level(root->node); 5905 level = btrfs_header_level(root->node);
@@ -5572,22 +5987,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5572 } 5987 }
5573 5988
5574 BUG_ON(wc->level == 0); 5989 BUG_ON(wc->level == 0);
5575 if (trans->transaction->in_commit || 5990 if (btrfs_should_end_transaction(trans, tree_root)) {
5576 trans->transaction->delayed_refs.flushing) {
5577 ret = btrfs_update_root(trans, tree_root, 5991 ret = btrfs_update_root(trans, tree_root,
5578 &root->root_key, 5992 &root->root_key,
5579 root_item); 5993 root_item);
5580 BUG_ON(ret); 5994 BUG_ON(ret);
5581 5995
5582 btrfs_end_transaction(trans, tree_root); 5996 btrfs_end_transaction_throttle(trans, tree_root);
5583 trans = btrfs_start_transaction(tree_root, 1); 5997 trans = btrfs_start_transaction(tree_root, 0);
5584 } else { 5998 if (block_rsv)
5585 unsigned long update; 5999 trans->block_rsv = block_rsv;
5586 update = trans->delayed_ref_updates;
5587 trans->delayed_ref_updates = 0;
5588 if (update)
5589 btrfs_run_delayed_refs(trans, tree_root,
5590 update);
5591 } 6000 }
5592 } 6001 }
5593 btrfs_release_path(root, path); 6002 btrfs_release_path(root, path);
@@ -5615,7 +6024,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
5615 kfree(root); 6024 kfree(root);
5616 } 6025 }
5617out: 6026out:
5618 btrfs_end_transaction(trans, tree_root); 6027 btrfs_end_transaction_throttle(trans, tree_root);
5619 kfree(wc); 6028 kfree(wc);
5620 btrfs_free_path(path); 6029 btrfs_free_path(path);
5621 return err; 6030 return err;
@@ -7211,48 +7620,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7211 return flags; 7620 return flags;
7212} 7621}
7213 7622
7214static int __alloc_chunk_for_shrink(struct btrfs_root *root, 7623static int set_block_group_ro(struct btrfs_block_group_cache *cache)
7215 struct btrfs_block_group_cache *shrink_block_group,
7216 int force)
7217{ 7624{
7218 struct btrfs_trans_handle *trans; 7625 struct btrfs_space_info *sinfo = cache->space_info;
7219 u64 new_alloc_flags; 7626 u64 num_bytes;
7220 u64 calc; 7627 int ret = -ENOSPC;
7221 7628
7222 spin_lock(&shrink_block_group->lock); 7629 if (cache->ro)
7223 if (btrfs_block_group_used(&shrink_block_group->item) + 7630 return 0;
7224 shrink_block_group->reserved > 0) {
7225 spin_unlock(&shrink_block_group->lock);
7226 7631
7227 trans = btrfs_start_transaction(root, 1); 7632 spin_lock(&sinfo->lock);
7228 spin_lock(&shrink_block_group->lock); 7633 spin_lock(&cache->lock);
7634 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
7635 cache->bytes_super - btrfs_block_group_used(&cache->item);
7636
7637 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
7638 sinfo->bytes_may_use + sinfo->bytes_readonly +
7639 cache->reserved_pinned + num_bytes < sinfo->total_bytes) {
7640 sinfo->bytes_readonly += num_bytes;
7641 sinfo->bytes_reserved += cache->reserved_pinned;
7642 cache->reserved_pinned = 0;
7643 cache->ro = 1;
7644 ret = 0;
7645 }
7646 spin_unlock(&cache->lock);
7647 spin_unlock(&sinfo->lock);
7648 return ret;
7649}
7229 7650
7230 new_alloc_flags = update_block_group_flags(root, 7651int btrfs_set_block_group_ro(struct btrfs_root *root,
7231 shrink_block_group->flags); 7652 struct btrfs_block_group_cache *cache)
7232 if (new_alloc_flags != shrink_block_group->flags) {
7233 calc =
7234 btrfs_block_group_used(&shrink_block_group->item);
7235 } else {
7236 calc = shrink_block_group->key.offset;
7237 }
7238 spin_unlock(&shrink_block_group->lock);
7239 7653
7240 do_chunk_alloc(trans, root->fs_info->extent_root, 7654{
7241 calc + 2 * 1024 * 1024, new_alloc_flags, force); 7655 struct btrfs_trans_handle *trans;
7656 u64 alloc_flags;
7657 int ret;
7242 7658
7243 btrfs_end_transaction(trans, root); 7659 BUG_ON(cache->ro);
7244 } else
7245 spin_unlock(&shrink_block_group->lock);
7246 return 0;
7247}
7248 7660
7661 trans = btrfs_join_transaction(root, 1);
7662 BUG_ON(IS_ERR(trans));
7249 7663
7250int btrfs_prepare_block_group_relocation(struct btrfs_root *root, 7664 alloc_flags = update_block_group_flags(root, cache->flags);
7251 struct btrfs_block_group_cache *group) 7665 if (alloc_flags != cache->flags)
7666 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
7667
7668 ret = set_block_group_ro(cache);
7669 if (!ret)
7670 goto out;
7671 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
7672 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
7673 if (ret < 0)
7674 goto out;
7675 ret = set_block_group_ro(cache);
7676out:
7677 btrfs_end_transaction(trans, root);
7678 return ret;
7679}
7252 7680
7681int btrfs_set_block_group_rw(struct btrfs_root *root,
7682 struct btrfs_block_group_cache *cache)
7253{ 7683{
7254 __alloc_chunk_for_shrink(root, group, 1); 7684 struct btrfs_space_info *sinfo = cache->space_info;
7255 set_block_group_readonly(group); 7685 u64 num_bytes;
7686
7687 BUG_ON(!cache->ro);
7688
7689 spin_lock(&sinfo->lock);
7690 spin_lock(&cache->lock);
7691 num_bytes = cache->key.offset - cache->reserved - cache->pinned -
7692 cache->bytes_super - btrfs_block_group_used(&cache->item);
7693 sinfo->bytes_readonly -= num_bytes;
7694 cache->ro = 0;
7695 spin_unlock(&cache->lock);
7696 spin_unlock(&sinfo->lock);
7256 return 0; 7697 return 0;
7257} 7698}
7258 7699
@@ -7369,7 +7810,6 @@ static int find_first_block_group(struct btrfs_root *root,
7369 } 7810 }
7370 path->slots[0]++; 7811 path->slots[0]++;
7371 } 7812 }
7372 ret = -ENOENT;
7373out: 7813out:
7374 return ret; 7814 return ret;
7375} 7815}
@@ -7420,17 +7860,33 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7420 */ 7860 */
7421 synchronize_rcu(); 7861 synchronize_rcu();
7422 7862
7863 release_global_block_rsv(info);
7864
7423 while(!list_empty(&info->space_info)) { 7865 while(!list_empty(&info->space_info)) {
7424 space_info = list_entry(info->space_info.next, 7866 space_info = list_entry(info->space_info.next,
7425 struct btrfs_space_info, 7867 struct btrfs_space_info,
7426 list); 7868 list);
7427 7869 if (space_info->bytes_pinned > 0 ||
7870 space_info->bytes_reserved > 0) {
7871 WARN_ON(1);
7872 dump_space_info(space_info, 0, 0);
7873 }
7428 list_del(&space_info->list); 7874 list_del(&space_info->list);
7429 kfree(space_info); 7875 kfree(space_info);
7430 } 7876 }
7431 return 0; 7877 return 0;
7432} 7878}
7433 7879
7880static void __link_block_group(struct btrfs_space_info *space_info,
7881 struct btrfs_block_group_cache *cache)
7882{
7883 int index = get_block_group_index(cache);
7884
7885 down_write(&space_info->groups_sem);
7886 list_add_tail(&cache->list, &space_info->block_groups[index]);
7887 up_write(&space_info->groups_sem);
7888}
7889
7434int btrfs_read_block_groups(struct btrfs_root *root) 7890int btrfs_read_block_groups(struct btrfs_root *root)
7435{ 7891{
7436 struct btrfs_path *path; 7892 struct btrfs_path *path;
@@ -7452,10 +7908,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7452 7908
7453 while (1) { 7909 while (1) {
7454 ret = find_first_block_group(root, path, &key); 7910 ret = find_first_block_group(root, path, &key);
7455 if (ret > 0) { 7911 if (ret > 0)
7456 ret = 0; 7912 break;
7457 goto error;
7458 }
7459 if (ret != 0) 7913 if (ret != 0)
7460 goto error; 7914 goto error;
7461 7915
@@ -7464,7 +7918,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7464 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7918 cache = kzalloc(sizeof(*cache), GFP_NOFS);
7465 if (!cache) { 7919 if (!cache) {
7466 ret = -ENOMEM; 7920 ret = -ENOMEM;
7467 break; 7921 goto error;
7468 } 7922 }
7469 7923
7470 atomic_set(&cache->count, 1); 7924 atomic_set(&cache->count, 1);
@@ -7521,20 +7975,36 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7521 BUG_ON(ret); 7975 BUG_ON(ret);
7522 cache->space_info = space_info; 7976 cache->space_info = space_info;
7523 spin_lock(&cache->space_info->lock); 7977 spin_lock(&cache->space_info->lock);
7524 cache->space_info->bytes_super += cache->bytes_super; 7978 cache->space_info->bytes_readonly += cache->bytes_super;
7525 spin_unlock(&cache->space_info->lock); 7979 spin_unlock(&cache->space_info->lock);
7526 7980
7527 down_write(&space_info->groups_sem); 7981 __link_block_group(space_info, cache);
7528 list_add_tail(&cache->list, &space_info->block_groups);
7529 up_write(&space_info->groups_sem);
7530 7982
7531 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7983 ret = btrfs_add_block_group_cache(root->fs_info, cache);
7532 BUG_ON(ret); 7984 BUG_ON(ret);
7533 7985
7534 set_avail_alloc_bits(root->fs_info, cache->flags); 7986 set_avail_alloc_bits(root->fs_info, cache->flags);
7535 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7987 if (btrfs_chunk_readonly(root, cache->key.objectid))
7536 set_block_group_readonly(cache); 7988 set_block_group_ro(cache);
7537 } 7989 }
7990
7991 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
7992 if (!(get_alloc_profile(root, space_info->flags) &
7993 (BTRFS_BLOCK_GROUP_RAID10 |
7994 BTRFS_BLOCK_GROUP_RAID1 |
7995 BTRFS_BLOCK_GROUP_DUP)))
7996 continue;
7997 /*
7998 * avoid allocating from un-mirrored block group if there are
7999 * mirrored block groups.
8000 */
8001 list_for_each_entry(cache, &space_info->block_groups[3], list)
8002 set_block_group_ro(cache);
8003 list_for_each_entry(cache, &space_info->block_groups[4], list)
8004 set_block_group_ro(cache);
8005 }
8006
8007 init_global_block_rsv(info);
7538 ret = 0; 8008 ret = 0;
7539error: 8009error:
7540 btrfs_free_path(path); 8010 btrfs_free_path(path);
@@ -7595,12 +8065,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
7595 BUG_ON(ret); 8065 BUG_ON(ret);
7596 8066
7597 spin_lock(&cache->space_info->lock); 8067 spin_lock(&cache->space_info->lock);
7598 cache->space_info->bytes_super += cache->bytes_super; 8068 cache->space_info->bytes_readonly += cache->bytes_super;
7599 spin_unlock(&cache->space_info->lock); 8069 spin_unlock(&cache->space_info->lock);
7600 8070
7601 down_write(&cache->space_info->groups_sem); 8071 __link_block_group(cache->space_info, cache);
7602 list_add_tail(&cache->list, &cache->space_info->block_groups);
7603 up_write(&cache->space_info->groups_sem);
7604 8072
7605 ret = btrfs_add_block_group_cache(root->fs_info, cache); 8073 ret = btrfs_add_block_group_cache(root->fs_info, cache);
7606 BUG_ON(ret); 8074 BUG_ON(ret);