aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2010-06-21 14:48:16 -0400
committerJosef Bacik <josef@redhat.com>2010-10-28 15:59:09 -0400
commit0af3d00bad38d3bb9912a60928ad0669f17bdb76 (patch)
treeabbf4c773138a33dcde483ac60f016c4b5e55dcc /fs
parentf6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff)
Btrfs: create special free space cache inode
In order to save free space cache, we need an inode to hold the data, and we need a special item to point at the right inode for the right block group. So first, create a special item that will point to the right inode, and the number of extent entries we will have and the number of bitmaps we will have. We truncate and pre-allocate space everytime to make sure it's uptodate. This feature will be turned on as soon as you mount with -o space_cache, however it is safe to boot into old kernels, they will just generate the cache the old fashion way. When you boot back into a newer kernel we will notice that we modified and not the cache and automatically discard the cache. Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h74
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c231
-rw-r--r--fs/btrfs/free-space-cache.c155
-rw-r--r--fs/btrfs/free-space-cache.h11
-rw-r--r--fs/btrfs/inode.c95
-rw-r--r--fs/btrfs/relocation.c91
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/transaction.c43
-rw-r--r--fs/btrfs/transaction.h4
10 files changed, 668 insertions, 46 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eaf286abad17..46f52e1beade 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
99 */ 99 */
100#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL 100#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
101 101
102/* For storing free space cache */
103#define BTRFS_FREE_SPACE_OBJECTID -11ULL
104
102/* dummy objectid represents multiple objectids */ 105/* dummy objectid represents multiple objectids */
103#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 106#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
104 107
@@ -265,6 +268,22 @@ struct btrfs_chunk {
265 /* additional stripes go here */ 268 /* additional stripes go here */
266} __attribute__ ((__packed__)); 269} __attribute__ ((__packed__));
267 270
271#define BTRFS_FREE_SPACE_EXTENT 1
272#define BTRFS_FREE_SPACE_BITMAP 2
273
274struct btrfs_free_space_entry {
275 __le64 offset;
276 __le64 bytes;
277 u8 type;
278} __attribute__ ((__packed__));
279
280struct btrfs_free_space_header {
281 struct btrfs_disk_key location;
282 __le64 generation;
283 __le64 num_entries;
284 __le64 num_bitmaps;
285} __attribute__ ((__packed__));
286
268static inline unsigned long btrfs_chunk_item_size(int num_stripes) 287static inline unsigned long btrfs_chunk_item_size(int num_stripes)
269{ 288{
270 BUG_ON(num_stripes == 0); 289 BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
365 384
366 char label[BTRFS_LABEL_SIZE]; 385 char label[BTRFS_LABEL_SIZE];
367 386
387 __le64 cache_generation;
388
368 /* future expansion */ 389 /* future expansion */
369 __le64 reserved[32]; 390 __le64 reserved[31];
370 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; 391 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
371} __attribute__ ((__packed__)); 392} __attribute__ ((__packed__));
372 393
@@ -375,12 +396,12 @@ struct btrfs_super_block {
375 * ones specified below then we will fail to mount 396 * ones specified below then we will fail to mount
376 */ 397 */
377#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 398#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
378#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0) 399#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
379 400
380#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 401#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
381#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 402#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
382#define BTRFS_FEATURE_INCOMPAT_SUPP \ 403#define BTRFS_FEATURE_INCOMPAT_SUPP \
383 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 404 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
384 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL) 405 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
385 406
386/* 407/*
@@ -750,6 +771,14 @@ enum btrfs_caching_type {
750 BTRFS_CACHE_FINISHED = 2, 771 BTRFS_CACHE_FINISHED = 2,
751}; 772};
752 773
774enum btrfs_disk_cache_state {
775 BTRFS_DC_WRITTEN = 0,
776 BTRFS_DC_ERROR = 1,
777 BTRFS_DC_CLEAR = 2,
778 BTRFS_DC_SETUP = 3,
779 BTRFS_DC_NEED_WRITE = 4,
780};
781
753struct btrfs_caching_control { 782struct btrfs_caching_control {
754 struct list_head list; 783 struct list_head list;
755 struct mutex mutex; 784 struct mutex mutex;
@@ -763,6 +792,7 @@ struct btrfs_block_group_cache {
763 struct btrfs_key key; 792 struct btrfs_key key;
764 struct btrfs_block_group_item item; 793 struct btrfs_block_group_item item;
765 struct btrfs_fs_info *fs_info; 794 struct btrfs_fs_info *fs_info;
795 struct inode *inode;
766 spinlock_t lock; 796 spinlock_t lock;
767 u64 pinned; 797 u64 pinned;
768 u64 reserved; 798 u64 reserved;
@@ -773,8 +803,11 @@ struct btrfs_block_group_cache {
773 int extents_thresh; 803 int extents_thresh;
774 int free_extents; 804 int free_extents;
775 int total_bitmaps; 805 int total_bitmaps;
776 int ro; 806 int ro:1;
777 int dirty; 807 int dirty:1;
808 int iref:1;
809
810 int disk_cache_state;
778 811
779 /* cache tracking stuff */ 812 /* cache tracking stuff */
780 int cached; 813 int cached;
@@ -1192,6 +1225,7 @@ struct btrfs_root {
1192#define BTRFS_MOUNT_NOSSD (1 << 9) 1225#define BTRFS_MOUNT_NOSSD (1 << 9)
1193#define BTRFS_MOUNT_DISCARD (1 << 10) 1226#define BTRFS_MOUNT_DISCARD (1 << 10)
1194#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) 1227#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
1228#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1195 1229
1196#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1230#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1197#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1231#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1699,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
1665 write_eb_member(eb, item, struct btrfs_dir_item, location, key); 1699 write_eb_member(eb, item, struct btrfs_dir_item, location, key);
1666} 1700}
1667 1701
1702BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
1703 num_entries, 64);
1704BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
1705 num_bitmaps, 64);
1706BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
1707 generation, 64);
1708
1709static inline void btrfs_free_space_key(struct extent_buffer *eb,
1710 struct btrfs_free_space_header *h,
1711 struct btrfs_disk_key *key)
1712{
1713 read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
1714}
1715
1716static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
1717 struct btrfs_free_space_header *h,
1718 struct btrfs_disk_key *key)
1719{
1720 write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
1721}
1722
1668/* struct btrfs_disk_key */ 1723/* struct btrfs_disk_key */
1669BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, 1724BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
1670 objectid, 64); 1725 objectid, 64);
@@ -1876,6 +1931,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
1876 incompat_flags, 64); 1931 incompat_flags, 64);
1877BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, 1932BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
1878 csum_type, 16); 1933 csum_type, 16);
1934BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
1935 cache_generation, 64);
1879 1936
1880static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 1937static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
1881{ 1938{
@@ -2115,6 +2172,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
2115 struct btrfs_block_group_cache *cache); 2172 struct btrfs_block_group_cache *cache);
2116int btrfs_set_block_group_rw(struct btrfs_root *root, 2173int btrfs_set_block_group_rw(struct btrfs_root *root,
2117 struct btrfs_block_group_cache *cache); 2174 struct btrfs_block_group_cache *cache);
2175void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
2118/* ctree.c */ 2176/* ctree.c */
2119int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2177int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2120 int level, int *slot); 2178 int level, int *slot);
@@ -2426,6 +2484,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
2426int btrfs_prealloc_file_range(struct inode *inode, int mode, 2484int btrfs_prealloc_file_range(struct inode *inode, int mode,
2427 u64 start, u64 num_bytes, u64 min_size, 2485 u64 start, u64 num_bytes, u64 min_size,
2428 loff_t actual_len, u64 *alloc_hint); 2486 loff_t actual_len, u64 *alloc_hint);
2487int btrfs_prealloc_file_range_trans(struct inode *inode,
2488 struct btrfs_trans_handle *trans, int mode,
2489 u64 start, u64 num_bytes, u64 min_size,
2490 loff_t actual_len, u64 *alloc_hint);
2429extern const struct dentry_operations btrfs_dentry_operations; 2491extern const struct dentry_operations btrfs_dentry_operations;
2430 2492
2431/* ioctl.c */ 2493/* ioctl.c */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 64f10082f048..45cf64fc1e3e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1685,7 +1685,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1685 __setup_root(4096, 4096, 4096, 4096, tree_root, 1685 __setup_root(4096, 4096, 4096, 4096, tree_root,
1686 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1686 fs_info, BTRFS_ROOT_TREE_OBJECTID);
1687 1687
1688
1689 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1688 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1690 if (!bh) 1689 if (!bh)
1691 goto fail_iput; 1690 goto fail_iput;
@@ -1993,6 +1992,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1993 if (!(sb->s_flags & MS_RDONLY)) { 1992 if (!(sb->s_flags & MS_RDONLY)) {
1994 down_read(&fs_info->cleanup_work_sem); 1993 down_read(&fs_info->cleanup_work_sem);
1995 btrfs_orphan_cleanup(fs_info->fs_root); 1994 btrfs_orphan_cleanup(fs_info->fs_root);
1995 btrfs_orphan_cleanup(fs_info->tree_root);
1996 up_read(&fs_info->cleanup_work_sem); 1996 up_read(&fs_info->cleanup_work_sem);
1997 } 1997 }
1998 1998
@@ -2421,6 +2421,7 @@ int close_ctree(struct btrfs_root *root)
2421 fs_info->closing = 1; 2421 fs_info->closing = 1;
2422 smp_mb(); 2422 smp_mb();
2423 2423
2424 btrfs_put_block_group_cache(fs_info);
2424 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2425 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
2425 ret = btrfs_commit_super(root); 2426 ret = btrfs_commit_super(root);
2426 if (ret) 2427 if (ret)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 32d094002a57..aab40fb3faed 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2688,6 +2688,109 @@ next_block_group(struct btrfs_root *root,
2688 return cache; 2688 return cache;
2689} 2689}
2690 2690
2691static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2692 struct btrfs_trans_handle *trans,
2693 struct btrfs_path *path)
2694{
2695 struct btrfs_root *root = block_group->fs_info->tree_root;
2696 struct inode *inode = NULL;
2697 u64 alloc_hint = 0;
2698 int num_pages = 0;
2699 int retries = 0;
2700 int ret = 0;
2701
2702 /*
2703 * If this block group is smaller than 100 megs don't bother caching the
2704 * block group.
2705 */
2706 if (block_group->key.offset < (100 * 1024 * 1024)) {
2707 spin_lock(&block_group->lock);
2708 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
2709 spin_unlock(&block_group->lock);
2710 return 0;
2711 }
2712
2713again:
2714 inode = lookup_free_space_inode(root, block_group, path);
2715 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2716 ret = PTR_ERR(inode);
2717 btrfs_release_path(root, path);
2718 goto out;
2719 }
2720
2721 if (IS_ERR(inode)) {
2722 BUG_ON(retries);
2723 retries++;
2724
2725 if (block_group->ro)
2726 goto out_free;
2727
2728 ret = create_free_space_inode(root, trans, block_group, path);
2729 if (ret)
2730 goto out_free;
2731 goto again;
2732 }
2733
2734 /*
2735 * We want to set the generation to 0, that way if anything goes wrong
2736 * from here on out we know not to trust this cache when we load up next
2737 * time.
2738 */
2739 BTRFS_I(inode)->generation = 0;
2740 ret = btrfs_update_inode(trans, root, inode);
2741 WARN_ON(ret);
2742
2743 if (i_size_read(inode) > 0) {
2744 ret = btrfs_truncate_free_space_cache(root, trans, path,
2745 inode);
2746 if (ret)
2747 goto out_put;
2748 }
2749
2750 spin_lock(&block_group->lock);
2751 if (block_group->cached != BTRFS_CACHE_FINISHED) {
2752 spin_unlock(&block_group->lock);
2753 goto out_put;
2754 }
2755 spin_unlock(&block_group->lock);
2756
2757 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
2758 if (!num_pages)
2759 num_pages = 1;
2760
2761 /*
2762 * Just to make absolutely sure we have enough space, we're going to
2763 * preallocate 12 pages worth of space for each block group. In
2764 * practice we ought to use at most 8, but we need extra space so we can
2765 * add our header and have a terminator between the extents and the
2766 * bitmaps.
2767 */
2768 num_pages *= 16;
2769 num_pages *= PAGE_CACHE_SIZE;
2770
2771 ret = btrfs_check_data_free_space(inode, num_pages);
2772 if (ret)
2773 goto out_put;
2774
2775 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2776 num_pages, num_pages,
2777 &alloc_hint);
2778 btrfs_free_reserved_data_space(inode, num_pages);
2779out_put:
2780 iput(inode);
2781out_free:
2782 btrfs_release_path(root, path);
2783out:
2784 spin_lock(&block_group->lock);
2785 if (ret)
2786 block_group->disk_cache_state = BTRFS_DC_ERROR;
2787 else
2788 block_group->disk_cache_state = BTRFS_DC_SETUP;
2789 spin_unlock(&block_group->lock);
2790
2791 return ret;
2792}
2793
2691int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2794int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2692 struct btrfs_root *root) 2795 struct btrfs_root *root)
2693{ 2796{
@@ -2700,6 +2803,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2700 if (!path) 2803 if (!path)
2701 return -ENOMEM; 2804 return -ENOMEM;
2702 2805
2806again:
2807 while (1) {
2808 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2809 while (cache) {
2810 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
2811 break;
2812 cache = next_block_group(root, cache);
2813 }
2814 if (!cache) {
2815 if (last == 0)
2816 break;
2817 last = 0;
2818 continue;
2819 }
2820 err = cache_save_setup(cache, trans, path);
2821 last = cache->key.objectid + cache->key.offset;
2822 btrfs_put_block_group(cache);
2823 }
2824
2703 while (1) { 2825 while (1) {
2704 if (last == 0) { 2826 if (last == 0) {
2705 err = btrfs_run_delayed_refs(trans, root, 2827 err = btrfs_run_delayed_refs(trans, root,
@@ -2709,6 +2831,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2709 2831
2710 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2832 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2711 while (cache) { 2833 while (cache) {
2834 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
2835 btrfs_put_block_group(cache);
2836 goto again;
2837 }
2838
2712 if (cache->dirty) 2839 if (cache->dirty)
2713 break; 2840 break;
2714 cache = next_block_group(root, cache); 2841 cache = next_block_group(root, cache);
@@ -2883,11 +3010,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
2883 struct btrfs_space_info *data_sinfo; 3010 struct btrfs_space_info *data_sinfo;
2884 struct btrfs_root *root = BTRFS_I(inode)->root; 3011 struct btrfs_root *root = BTRFS_I(inode)->root;
2885 u64 used; 3012 u64 used;
2886 int ret = 0, committed = 0; 3013 int ret = 0, committed = 0, alloc_chunk = 1;
2887 3014
2888 /* make sure bytes are sectorsize aligned */ 3015 /* make sure bytes are sectorsize aligned */
2889 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3016 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2890 3017
3018 if (root == root->fs_info->tree_root) {
3019 alloc_chunk = 0;
3020 committed = 1;
3021 }
3022
2891 data_sinfo = BTRFS_I(inode)->space_info; 3023 data_sinfo = BTRFS_I(inode)->space_info;
2892 if (!data_sinfo) 3024 if (!data_sinfo)
2893 goto alloc; 3025 goto alloc;
@@ -2906,7 +3038,7 @@ again:
2906 * if we don't have enough free bytes in this space then we need 3038 * if we don't have enough free bytes in this space then we need
2907 * to alloc a new chunk. 3039 * to alloc a new chunk.
2908 */ 3040 */
2909 if (!data_sinfo->full) { 3041 if (!data_sinfo->full && alloc_chunk) {
2910 u64 alloc_target; 3042 u64 alloc_target;
2911 3043
2912 data_sinfo->force_alloc = 1; 3044 data_sinfo->force_alloc = 1;
@@ -3777,12 +3909,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3777 struct btrfs_root *root, 3909 struct btrfs_root *root,
3778 u64 bytenr, u64 num_bytes, int alloc) 3910 u64 bytenr, u64 num_bytes, int alloc)
3779{ 3911{
3780 struct btrfs_block_group_cache *cache; 3912 struct btrfs_block_group_cache *cache = NULL;
3781 struct btrfs_fs_info *info = root->fs_info; 3913 struct btrfs_fs_info *info = root->fs_info;
3782 int factor;
3783 u64 total = num_bytes; 3914 u64 total = num_bytes;
3784 u64 old_val; 3915 u64 old_val;
3785 u64 byte_in_group; 3916 u64 byte_in_group;
3917 int factor;
3786 3918
3787 /* block accounting for super block */ 3919 /* block accounting for super block */
3788 spin_lock(&info->delalloc_lock); 3920 spin_lock(&info->delalloc_lock);
@@ -3804,11 +3936,17 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3804 factor = 2; 3936 factor = 2;
3805 else 3937 else
3806 factor = 1; 3938 factor = 1;
3939
3807 byte_in_group = bytenr - cache->key.objectid; 3940 byte_in_group = bytenr - cache->key.objectid;
3808 WARN_ON(byte_in_group > cache->key.offset); 3941 WARN_ON(byte_in_group > cache->key.offset);
3809 3942
3810 spin_lock(&cache->space_info->lock); 3943 spin_lock(&cache->space_info->lock);
3811 spin_lock(&cache->lock); 3944 spin_lock(&cache->lock);
3945
3946 if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
3947 cache->disk_cache_state < BTRFS_DC_CLEAR)
3948 cache->disk_cache_state = BTRFS_DC_CLEAR;
3949
3812 cache->dirty = 1; 3950 cache->dirty = 1;
3813 old_val = btrfs_block_group_used(&cache->item); 3951 old_val = btrfs_block_group_used(&cache->item);
3814 num_bytes = min(total, cache->key.offset - byte_in_group); 3952 num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -7814,6 +7952,40 @@ out:
7814 return ret; 7952 return ret;
7815} 7953}
7816 7954
7955void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
7956{
7957 struct btrfs_block_group_cache *block_group;
7958 u64 last = 0;
7959
7960 while (1) {
7961 struct inode *inode;
7962
7963 block_group = btrfs_lookup_first_block_group(info, last);
7964 while (block_group) {
7965 spin_lock(&block_group->lock);
7966 if (block_group->iref)
7967 break;
7968 spin_unlock(&block_group->lock);
7969 block_group = next_block_group(info->tree_root,
7970 block_group);
7971 }
7972 if (!block_group) {
7973 if (last == 0)
7974 break;
7975 last = 0;
7976 continue;
7977 }
7978
7979 inode = block_group->inode;
7980 block_group->iref = 0;
7981 block_group->inode = NULL;
7982 spin_unlock(&block_group->lock);
7983 iput(inode);
7984 last = block_group->key.objectid + block_group->key.offset;
7985 btrfs_put_block_group(block_group);
7986 }
7987}
7988
7817int btrfs_free_block_groups(struct btrfs_fs_info *info) 7989int btrfs_free_block_groups(struct btrfs_fs_info *info)
7818{ 7990{
7819 struct btrfs_block_group_cache *block_group; 7991 struct btrfs_block_group_cache *block_group;
@@ -7897,6 +8069,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7897 struct btrfs_key key; 8069 struct btrfs_key key;
7898 struct btrfs_key found_key; 8070 struct btrfs_key found_key;
7899 struct extent_buffer *leaf; 8071 struct extent_buffer *leaf;
8072 int need_clear = 0;
8073 u64 cache_gen;
7900 8074
7901 root = info->extent_root; 8075 root = info->extent_root;
7902 key.objectid = 0; 8076 key.objectid = 0;
@@ -7906,6 +8080,11 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7906 if (!path) 8080 if (!path)
7907 return -ENOMEM; 8081 return -ENOMEM;
7908 8082
8083 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
8084 if (cache_gen != 0 &&
8085 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
8086 need_clear = 1;
8087
7909 while (1) { 8088 while (1) {
7910 ret = find_first_block_group(root, path, &key); 8089 ret = find_first_block_group(root, path, &key);
7911 if (ret > 0) 8090 if (ret > 0)
@@ -7928,6 +8107,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7928 INIT_LIST_HEAD(&cache->list); 8107 INIT_LIST_HEAD(&cache->list);
7929 INIT_LIST_HEAD(&cache->cluster_list); 8108 INIT_LIST_HEAD(&cache->cluster_list);
7930 8109
8110 if (need_clear)
8111 cache->disk_cache_state = BTRFS_DC_CLEAR;
8112
7931 /* 8113 /*
7932 * we only want to have 32k of ram per block group for keeping 8114 * we only want to have 32k of ram per block group for keeping
7933 * track of free space, and if we pass 1/2 of that we want to 8115 * track of free space, and if we pass 1/2 of that we want to
@@ -8032,6 +8214,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8032 cache->key.offset = size; 8214 cache->key.offset = size;
8033 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 8215 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8034 cache->sectorsize = root->sectorsize; 8216 cache->sectorsize = root->sectorsize;
8217 cache->fs_info = root->fs_info;
8035 8218
8036 /* 8219 /*
8037 * we only want to have 32k of ram per block group for keeping track 8220 * we only want to have 32k of ram per block group for keeping track
@@ -8088,7 +8271,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8088 struct btrfs_path *path; 8271 struct btrfs_path *path;
8089 struct btrfs_block_group_cache *block_group; 8272 struct btrfs_block_group_cache *block_group;
8090 struct btrfs_free_cluster *cluster; 8273 struct btrfs_free_cluster *cluster;
8274 struct btrfs_root *tree_root = root->fs_info->tree_root;
8091 struct btrfs_key key; 8275 struct btrfs_key key;
8276 struct inode *inode;
8092 int ret; 8277 int ret;
8093 8278
8094 root = root->fs_info->extent_root; 8279 root = root->fs_info->extent_root;
@@ -8097,8 +8282,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8097 BUG_ON(!block_group); 8282 BUG_ON(!block_group);
8098 BUG_ON(!block_group->ro); 8283 BUG_ON(!block_group->ro);
8099 8284
8100 memcpy(&key, &block_group->key, sizeof(key));
8101
8102 /* make sure this block group isn't part of an allocation cluster */ 8285 /* make sure this block group isn't part of an allocation cluster */
8103 cluster = &root->fs_info->data_alloc_cluster; 8286 cluster = &root->fs_info->data_alloc_cluster;
8104 spin_lock(&cluster->refill_lock); 8287 spin_lock(&cluster->refill_lock);
@@ -8117,6 +8300,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8117 path = btrfs_alloc_path(); 8300 path = btrfs_alloc_path();
8118 BUG_ON(!path); 8301 BUG_ON(!path);
8119 8302
8303 inode = lookup_free_space_inode(root, block_group, path);
8304 if (!IS_ERR(inode)) {
8305 btrfs_orphan_add(trans, inode);
8306 clear_nlink(inode);
8307 /* One for the block groups ref */
8308 spin_lock(&block_group->lock);
8309 if (block_group->iref) {
8310 block_group->iref = 0;
8311 block_group->inode = NULL;
8312 spin_unlock(&block_group->lock);
8313 iput(inode);
8314 } else {
8315 spin_unlock(&block_group->lock);
8316 }
8317 /* One for our lookup ref */
8318 iput(inode);
8319 }
8320
8321 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
8322 key.offset = block_group->key.objectid;
8323 key.type = 0;
8324
8325 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
8326 if (ret < 0)
8327 goto out;
8328 if (ret > 0)
8329 btrfs_release_path(tree_root, path);
8330 if (ret == 0) {
8331 ret = btrfs_del_item(trans, tree_root, path);
8332 if (ret)
8333 goto out;
8334 btrfs_release_path(tree_root, path);
8335 }
8336
8120 spin_lock(&root->fs_info->block_group_cache_lock); 8337 spin_lock(&root->fs_info->block_group_cache_lock);
8121 rb_erase(&block_group->cache_node, 8338 rb_erase(&block_group->cache_node,
8122 &root->fs_info->block_group_cache_tree); 8339 &root->fs_info->block_group_cache_tree);
@@ -8140,6 +8357,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8140 block_group->space_info->bytes_readonly -= block_group->key.offset; 8357 block_group->space_info->bytes_readonly -= block_group->key.offset;
8141 spin_unlock(&block_group->space_info->lock); 8358 spin_unlock(&block_group->space_info->lock);
8142 8359
8360 memcpy(&key, &block_group->key, sizeof(key));
8361
8143 btrfs_clear_space_info_full(root->fs_info); 8362 btrfs_clear_space_info_full(root->fs_info);
8144 8363
8145 btrfs_put_block_group(block_group); 8364 btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f488fac04d99..05efcc7061a7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,165 @@
23#include "ctree.h" 23#include "ctree.h"
24#include "free-space-cache.h" 24#include "free-space-cache.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h"
26 27
27#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
28#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29 30
31struct inode *lookup_free_space_inode(struct btrfs_root *root,
32 struct btrfs_block_group_cache
33 *block_group, struct btrfs_path *path)
34{
35 struct btrfs_key key;
36 struct btrfs_key location;
37 struct btrfs_disk_key disk_key;
38 struct btrfs_free_space_header *header;
39 struct extent_buffer *leaf;
40 struct inode *inode = NULL;
41 int ret;
42
43 spin_lock(&block_group->lock);
44 if (block_group->inode)
45 inode = igrab(block_group->inode);
46 spin_unlock(&block_group->lock);
47 if (inode)
48 return inode;
49
50 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
51 key.offset = block_group->key.objectid;
52 key.type = 0;
53
54 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
55 if (ret < 0)
56 return ERR_PTR(ret);
57 if (ret > 0) {
58 btrfs_release_path(root, path);
59 return ERR_PTR(-ENOENT);
60 }
61
62 leaf = path->nodes[0];
63 header = btrfs_item_ptr(leaf, path->slots[0],
64 struct btrfs_free_space_header);
65 btrfs_free_space_key(leaf, header, &disk_key);
66 btrfs_disk_key_to_cpu(&location, &disk_key);
67 btrfs_release_path(root, path);
68
69 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
70 if (!inode)
71 return ERR_PTR(-ENOENT);
72 if (IS_ERR(inode))
73 return inode;
74 if (is_bad_inode(inode)) {
75 iput(inode);
76 return ERR_PTR(-ENOENT);
77 }
78
79 spin_lock(&block_group->lock);
80 if (!root->fs_info->closing) {
81 block_group->inode = igrab(inode);
82 block_group->iref = 1;
83 }
84 spin_unlock(&block_group->lock);
85
86 return inode;
87}
88
89int create_free_space_inode(struct btrfs_root *root,
90 struct btrfs_trans_handle *trans,
91 struct btrfs_block_group_cache *block_group,
92 struct btrfs_path *path)
93{
94 struct btrfs_key key;
95 struct btrfs_disk_key disk_key;
96 struct btrfs_free_space_header *header;
97 struct btrfs_inode_item *inode_item;
98 struct extent_buffer *leaf;
99 u64 objectid;
100 int ret;
101
102 ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
103 if (ret < 0)
104 return ret;
105
106 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
107 if (ret)
108 return ret;
109
110 leaf = path->nodes[0];
111 inode_item = btrfs_item_ptr(leaf, path->slots[0],
112 struct btrfs_inode_item);
113 btrfs_item_key(leaf, &disk_key, path->slots[0]);
114 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
115 sizeof(*inode_item));
116 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
117 btrfs_set_inode_size(leaf, inode_item, 0);
118 btrfs_set_inode_nbytes(leaf, inode_item, 0);
119 btrfs_set_inode_uid(leaf, inode_item, 0);
120 btrfs_set_inode_gid(leaf, inode_item, 0);
121 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
122 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
123 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
124 btrfs_set_inode_nlink(leaf, inode_item, 1);
125 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
126 btrfs_set_inode_block_group(leaf, inode_item,
127 block_group->key.objectid);
128 btrfs_mark_buffer_dirty(leaf);
129 btrfs_release_path(root, path);
130
131 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
132 key.offset = block_group->key.objectid;
133 key.type = 0;
134
135 ret = btrfs_insert_empty_item(trans, root, path, &key,
136 sizeof(struct btrfs_free_space_header));
137 if (ret < 0) {
138 btrfs_release_path(root, path);
139 return ret;
140 }
141 leaf = path->nodes[0];
142 header = btrfs_item_ptr(leaf, path->slots[0],
143 struct btrfs_free_space_header);
144 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
145 btrfs_set_free_space_key(leaf, header, &disk_key);
146 btrfs_mark_buffer_dirty(leaf);
147 btrfs_release_path(root, path);
148
149 return 0;
150}
151
152int btrfs_truncate_free_space_cache(struct btrfs_root *root,
153 struct btrfs_trans_handle *trans,
154 struct btrfs_path *path,
155 struct inode *inode)
156{
157 loff_t oldsize;
158 int ret = 0;
159
160 trans->block_rsv = root->orphan_block_rsv;
161 ret = btrfs_block_rsv_check(trans, root,
162 root->orphan_block_rsv,
163 0, 5);
164 if (ret)
165 return ret;
166
167 oldsize = i_size_read(inode);
168 btrfs_i_size_write(inode, 0);
169 truncate_pagecache(inode, oldsize, 0);
170
171 /*
172 * We don't need an orphan item because truncating the free space cache
173 * will never be split across transactions.
174 */
175 ret = btrfs_truncate_inode_items(trans, root, inode,
176 0, BTRFS_EXTENT_DATA_KEY);
177 if (ret) {
178 WARN_ON(1);
179 return ret;
180 }
181
182 return btrfs_update_inode(trans, root, inode);
183}
184
30static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 185static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
31 u64 offset) 186 u64 offset)
32{ 187{
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 890a8e79011b..45be29e5f01e 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,17 @@ struct btrfs_free_space {
27 struct list_head list; 27 struct list_head list;
28}; 28};
29 29
30struct inode *lookup_free_space_inode(struct btrfs_root *root,
31 struct btrfs_block_group_cache
32 *block_group, struct btrfs_path *path);
33int create_free_space_inode(struct btrfs_root *root,
34 struct btrfs_trans_handle *trans,
35 struct btrfs_block_group_cache *block_group,
36 struct btrfs_path *path);
37int btrfs_truncate_free_space_cache(struct btrfs_root *root,
38 struct btrfs_trans_handle *trans,
39 struct btrfs_path *path,
40 struct inode *inode);
30int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 41int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
31 u64 bytenr, u64 size); 42 u64 bytenr, u64 size);
32int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 43int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c03864406af3..1af1ea88e8a8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1700,6 +1700,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1700 ordered_extent->len); 1700 ordered_extent->len);
1701 BUG_ON(ret); 1701 BUG_ON(ret);
1702 } else { 1702 } else {
1703 BUG_ON(root == root->fs_info->tree_root);
1703 ret = insert_reserved_file_extent(trans, inode, 1704 ret = insert_reserved_file_extent(trans, inode,
1704 ordered_extent->file_offset, 1705 ordered_extent->file_offset,
1705 ordered_extent->start, 1706 ordered_extent->start,
@@ -3196,7 +3197,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3196 3197
3197 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3198 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3198 3199
3199 if (root->ref_cows) 3200 if (root->ref_cows || root == root->fs_info->tree_root)
3200 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3201 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3201 3202
3202 path = btrfs_alloc_path(); 3203 path = btrfs_alloc_path();
@@ -3344,7 +3345,8 @@ delete:
3344 } else { 3345 } else {
3345 break; 3346 break;
3346 } 3347 }
3347 if (found_extent && root->ref_cows) { 3348 if (found_extent && (root->ref_cows ||
3349 root == root->fs_info->tree_root)) {
3348 btrfs_set_path_blocking(path); 3350 btrfs_set_path_blocking(path);
3349 ret = btrfs_free_extent(trans, root, extent_start, 3351 ret = btrfs_free_extent(trans, root, extent_start,
3350 extent_num_bytes, 0, 3352 extent_num_bytes, 0,
@@ -3675,7 +3677,8 @@ void btrfs_evict_inode(struct inode *inode)
3675 int ret; 3677 int ret;
3676 3678
3677 truncate_inode_pages(&inode->i_data, 0); 3679 truncate_inode_pages(&inode->i_data, 0);
3678 if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0) 3680 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3681 root == root->fs_info->tree_root))
3679 goto no_delete; 3682 goto no_delete;
3680 3683
3681 if (is_bad_inode(inode)) { 3684 if (is_bad_inode(inode)) {
@@ -3888,7 +3891,14 @@ static void inode_tree_del(struct inode *inode)
3888 } 3891 }
3889 spin_unlock(&root->inode_lock); 3892 spin_unlock(&root->inode_lock);
3890 3893
3891 if (empty && btrfs_root_refs(&root->root_item) == 0) { 3894 /*
3895 * Free space cache has inodes in the tree root, but the tree root has a
3896 * root_refs of 0, so this could end up dropping the tree root as a
3897 * snapshot, so we need the extra !root->fs_info->tree_root check to
3898 * make sure we don't drop it.
3899 */
3900 if (empty && btrfs_root_refs(&root->root_item) == 0 &&
3901 root != root->fs_info->tree_root) {
3892 synchronize_srcu(&root->fs_info->subvol_srcu); 3902 synchronize_srcu(&root->fs_info->subvol_srcu);
3893 spin_lock(&root->inode_lock); 3903 spin_lock(&root->inode_lock);
3894 empty = RB_EMPTY_ROOT(&root->inode_tree); 3904 empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4282,14 +4292,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4282 struct btrfs_root *root = BTRFS_I(inode)->root; 4292 struct btrfs_root *root = BTRFS_I(inode)->root;
4283 struct btrfs_trans_handle *trans; 4293 struct btrfs_trans_handle *trans;
4284 int ret = 0; 4294 int ret = 0;
4295 bool nolock = false;
4285 4296
4286 if (BTRFS_I(inode)->dummy_inode) 4297 if (BTRFS_I(inode)->dummy_inode)
4287 return 0; 4298 return 0;
4288 4299
4300 smp_mb();
4301 nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
4302
4289 if (wbc->sync_mode == WB_SYNC_ALL) { 4303 if (wbc->sync_mode == WB_SYNC_ALL) {
4290 trans = btrfs_join_transaction(root, 1); 4304 if (nolock)
4305 trans = btrfs_join_transaction_nolock(root, 1);
4306 else
4307 trans = btrfs_join_transaction(root, 1);
4291 btrfs_set_trans_block_group(trans, inode); 4308 btrfs_set_trans_block_group(trans, inode);
4292 ret = btrfs_commit_transaction(trans, root); 4309 if (nolock)
4310 ret = btrfs_end_transaction_nolock(trans, root);
4311 else
4312 ret = btrfs_commit_transaction(trans, root);
4293 } 4313 }
4294 return ret; 4314 return ret;
4295} 4315}
@@ -6308,6 +6328,21 @@ void btrfs_destroy_inode(struct inode *inode)
6308 spin_unlock(&root->fs_info->ordered_extent_lock); 6328 spin_unlock(&root->fs_info->ordered_extent_lock);
6309 } 6329 }
6310 6330
6331 if (root == root->fs_info->tree_root) {
6332 struct btrfs_block_group_cache *block_group;
6333
6334 block_group = btrfs_lookup_block_group(root->fs_info,
6335 BTRFS_I(inode)->block_group);
6336 if (block_group && block_group->inode == inode) {
6337 spin_lock(&block_group->lock);
6338 block_group->inode = NULL;
6339 spin_unlock(&block_group->lock);
6340 btrfs_put_block_group(block_group);
6341 } else if (block_group) {
6342 btrfs_put_block_group(block_group);
6343 }
6344 }
6345
6311 spin_lock(&root->orphan_lock); 6346 spin_lock(&root->orphan_lock);
6312 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6347 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6313 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6348 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6340,7 +6375,8 @@ int btrfs_drop_inode(struct inode *inode)
6340{ 6375{
6341 struct btrfs_root *root = BTRFS_I(inode)->root; 6376 struct btrfs_root *root = BTRFS_I(inode)->root;
6342 6377
6343 if (btrfs_root_refs(&root->root_item) == 0) 6378 if (btrfs_root_refs(&root->root_item) == 0 &&
6379 root != root->fs_info->tree_root)
6344 return 1; 6380 return 1;
6345 else 6381 else
6346 return generic_drop_inode(inode); 6382 return generic_drop_inode(inode);
@@ -6757,27 +6793,33 @@ out_unlock:
6757 return err; 6793 return err;
6758} 6794}
6759 6795
6760int btrfs_prealloc_file_range(struct inode *inode, int mode, 6796static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
6761 u64 start, u64 num_bytes, u64 min_size, 6797 u64 start, u64 num_bytes, u64 min_size,
6762 loff_t actual_len, u64 *alloc_hint) 6798 loff_t actual_len, u64 *alloc_hint,
6799 struct btrfs_trans_handle *trans)
6763{ 6800{
6764 struct btrfs_trans_handle *trans;
6765 struct btrfs_root *root = BTRFS_I(inode)->root; 6801 struct btrfs_root *root = BTRFS_I(inode)->root;
6766 struct btrfs_key ins; 6802 struct btrfs_key ins;
6767 u64 cur_offset = start; 6803 u64 cur_offset = start;
6768 int ret = 0; 6804 int ret = 0;
6805 bool own_trans = true;
6769 6806
6807 if (trans)
6808 own_trans = false;
6770 while (num_bytes > 0) { 6809 while (num_bytes > 0) {
6771 trans = btrfs_start_transaction(root, 3); 6810 if (own_trans) {
6772 if (IS_ERR(trans)) { 6811 trans = btrfs_start_transaction(root, 3);
6773 ret = PTR_ERR(trans); 6812 if (IS_ERR(trans)) {
6774 break; 6813 ret = PTR_ERR(trans);
6814 break;
6815 }
6775 } 6816 }
6776 6817
6777 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 6818 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
6778 0, *alloc_hint, (u64)-1, &ins, 1); 6819 0, *alloc_hint, (u64)-1, &ins, 1);
6779 if (ret) { 6820 if (ret) {
6780 btrfs_end_transaction(trans, root); 6821 if (own_trans)
6822 btrfs_end_transaction(trans, root);
6781 break; 6823 break;
6782 } 6824 }
6783 6825
@@ -6810,11 +6852,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
6810 ret = btrfs_update_inode(trans, root, inode); 6852 ret = btrfs_update_inode(trans, root, inode);
6811 BUG_ON(ret); 6853 BUG_ON(ret);
6812 6854
6813 btrfs_end_transaction(trans, root); 6855 if (own_trans)
6856 btrfs_end_transaction(trans, root);
6814 } 6857 }
6815 return ret; 6858 return ret;
6816} 6859}
6817 6860
6861int btrfs_prealloc_file_range(struct inode *inode, int mode,
6862 u64 start, u64 num_bytes, u64 min_size,
6863 loff_t actual_len, u64 *alloc_hint)
6864{
6865 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
6866 min_size, actual_len, alloc_hint,
6867 NULL);
6868}
6869
6870int btrfs_prealloc_file_range_trans(struct inode *inode,
6871 struct btrfs_trans_handle *trans, int mode,
6872 u64 start, u64 num_bytes, u64 min_size,
6873 loff_t actual_len, u64 *alloc_hint)
6874{
6875 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
6876 min_size, actual_len, alloc_hint, trans);
6877}
6878
6818static long btrfs_fallocate(struct inode *inode, int mode, 6879static long btrfs_fallocate(struct inode *inode, int mode,
6819 loff_t offset, loff_t len) 6880 loff_t offset, loff_t len)
6820{ 6881{
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b37d723b9d4a..af339eee55b8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
29#include "locking.h" 29#include "locking.h"
30#include "btrfs_inode.h" 30#include "btrfs_inode.h"
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h"
32 33
33/* 34/*
34 * backref_node, mapping_node and tree_block start with this 35 * backref_node, mapping_node and tree_block start with this
@@ -3191,6 +3192,54 @@ static int block_use_full_backref(struct reloc_control *rc,
3191 return ret; 3192 return ret;
3192} 3193}
3193 3194
3195static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3196 struct inode *inode, u64 ino)
3197{
3198 struct btrfs_key key;
3199 struct btrfs_path *path;
3200 struct btrfs_root *root = fs_info->tree_root;
3201 struct btrfs_trans_handle *trans;
3202 unsigned long nr;
3203 int ret = 0;
3204
3205 if (inode)
3206 goto truncate;
3207
3208 key.objectid = ino;
3209 key.type = BTRFS_INODE_ITEM_KEY;
3210 key.offset = 0;
3211
3212 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3213 if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
3214 if (inode && !IS_ERR(inode))
3215 iput(inode);
3216 return -ENOENT;
3217 }
3218
3219truncate:
3220 path = btrfs_alloc_path();
3221 if (!path) {
3222 ret = -ENOMEM;
3223 goto out;
3224 }
3225
3226 trans = btrfs_join_transaction(root, 0);
3227 if (IS_ERR(trans)) {
3228 btrfs_free_path(path);
3229 goto out;
3230 }
3231
3232 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
3233
3234 btrfs_free_path(path);
3235 nr = trans->blocks_used;
3236 btrfs_end_transaction(trans, root);
3237 btrfs_btree_balance_dirty(root, nr);
3238out:
3239 iput(inode);
3240 return ret;
3241}
3242
3194/* 3243/*
3195 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3244 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3196 * this function scans fs tree to find blocks reference the data extent 3245 * this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3266,27 @@ static int find_data_references(struct reloc_control *rc,
3217 int counted; 3266 int counted;
3218 int ret; 3267 int ret;
3219 3268
3220 path = btrfs_alloc_path();
3221 if (!path)
3222 return -ENOMEM;
3223
3224 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3269 ref_root = btrfs_extent_data_ref_root(leaf, ref);
3225 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3270 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3226 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3271 ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3227 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3272 ref_count = btrfs_extent_data_ref_count(leaf, ref);
3228 3273
3274 /*
3275 * This is an extent belonging to the free space cache, lets just delete
3276 * it and redo the search.
3277 */
3278 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3279 ret = delete_block_group_cache(rc->extent_root->fs_info,
3280 NULL, ref_objectid);
3281 if (ret != -ENOENT)
3282 return ret;
3283 ret = 0;
3284 }
3285
3286 path = btrfs_alloc_path();
3287 if (!path)
3288 return -ENOMEM;
3289
3229 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3290 root = read_fs_root(rc->extent_root->fs_info, ref_root);
3230 if (IS_ERR(root)) { 3291 if (IS_ERR(root)) {
3231 err = PTR_ERR(root); 3292 err = PTR_ERR(root);
@@ -3860,6 +3921,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3860{ 3921{
3861 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3922 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3862 struct reloc_control *rc; 3923 struct reloc_control *rc;
3924 struct inode *inode;
3925 struct btrfs_path *path;
3863 int ret; 3926 int ret;
3864 int rw = 0; 3927 int rw = 0;
3865 int err = 0; 3928 int err = 0;
@@ -3882,6 +3945,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3882 rw = 1; 3945 rw = 1;
3883 } 3946 }
3884 3947
3948 path = btrfs_alloc_path();
3949 if (!path) {
3950 err = -ENOMEM;
3951 goto out;
3952 }
3953
3954 inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
3955 path);
3956 btrfs_free_path(path);
3957
3958 if (!IS_ERR(inode))
3959 ret = delete_block_group_cache(fs_info, inode, 0);
3960 else
3961 ret = PTR_ERR(inode);
3962
3963 if (ret && ret != -ENOENT) {
3964 err = ret;
3965 goto out;
3966 }
3967
3885 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3968 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3886 if (IS_ERR(rc->data_inode)) { 3969 if (IS_ERR(rc->data_inode)) {
3887 err = PTR_ERR(rc->data_inode); 3970 err = PTR_ERR(rc->data_inode);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1776dbd8dc98..5c23eb8d6ba3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -68,7 +68,7 @@ enum {
68 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 68 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
69 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 69 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
70 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 70 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
71 Opt_discard, Opt_err, 71 Opt_discard, Opt_space_cache, Opt_err,
72}; 72};
73 73
74static match_table_t tokens = { 74static match_table_t tokens = {
@@ -92,6 +92,7 @@ static match_table_t tokens = {
92 {Opt_flushoncommit, "flushoncommit"}, 92 {Opt_flushoncommit, "flushoncommit"},
93 {Opt_ratio, "metadata_ratio=%d"}, 93 {Opt_ratio, "metadata_ratio=%d"},
94 {Opt_discard, "discard"}, 94 {Opt_discard, "discard"},
95 {Opt_space_cache, "space_cache"},
95 {Opt_err, NULL}, 96 {Opt_err, NULL},
96}; 97};
97 98
@@ -235,6 +236,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
235 case Opt_discard: 236 case Opt_discard:
236 btrfs_set_opt(info->mount_opt, DISCARD); 237 btrfs_set_opt(info->mount_opt, DISCARD);
237 break; 238 break;
239 case Opt_space_cache:
240 printk(KERN_INFO "btrfs: enabling disk space caching\n");
241 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
242 break;
238 case Opt_err: 243 case Opt_err:
239 printk(KERN_INFO "btrfs: unrecognized mount option " 244 printk(KERN_INFO "btrfs: unrecognized mount option "
240 "'%s'\n", p); 245 "'%s'\n", p);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66cc63b..e7144c48ed79 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
163 TRANS_START, 163 TRANS_START,
164 TRANS_JOIN, 164 TRANS_JOIN,
165 TRANS_USERSPACE, 165 TRANS_USERSPACE,
166 TRANS_JOIN_NOLOCK,
166}; 167};
167 168
168static int may_wait_transaction(struct btrfs_root *root, int type) 169static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -186,7 +187,8 @@ again:
186 if (!h) 187 if (!h)
187 return ERR_PTR(-ENOMEM); 188 return ERR_PTR(-ENOMEM);
188 189
189 mutex_lock(&root->fs_info->trans_mutex); 190 if (type != TRANS_JOIN_NOLOCK)
191 mutex_lock(&root->fs_info->trans_mutex);
190 if (may_wait_transaction(root, type)) 192 if (may_wait_transaction(root, type))
191 wait_current_trans(root); 193 wait_current_trans(root);
192 194
@@ -195,7 +197,8 @@ again:
195 197
196 cur_trans = root->fs_info->running_transaction; 198 cur_trans = root->fs_info->running_transaction;
197 cur_trans->use_count++; 199 cur_trans->use_count++;
198 mutex_unlock(&root->fs_info->trans_mutex); 200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex);
199 202
200 h->transid = cur_trans->transid; 203 h->transid = cur_trans->transid;
201 h->transaction = cur_trans; 204 h->transaction = cur_trans;
@@ -224,9 +227,11 @@ again:
224 } 227 }
225 } 228 }
226 229
227 mutex_lock(&root->fs_info->trans_mutex); 230 if (type != TRANS_JOIN_NOLOCK)
231 mutex_lock(&root->fs_info->trans_mutex);
228 record_root_in_trans(h, root); 232 record_root_in_trans(h, root);
229 mutex_unlock(&root->fs_info->trans_mutex); 233 if (type != TRANS_JOIN_NOLOCK)
234 mutex_unlock(&root->fs_info->trans_mutex);
230 235
231 if (!current->journal_info && type != TRANS_USERSPACE) 236 if (!current->journal_info && type != TRANS_USERSPACE)
232 current->journal_info = h; 237 current->journal_info = h;
@@ -244,6 +249,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
244 return start_transaction(root, 0, TRANS_JOIN); 249 return start_transaction(root, 0, TRANS_JOIN);
245} 250}
246 251
252struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
253 int num_blocks)
254{
255 return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
256}
257
247struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 258struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
248 int num_blocks) 259 int num_blocks)
249{ 260{
@@ -348,7 +359,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
348} 359}
349 360
350static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 361static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
351 struct btrfs_root *root, int throttle) 362 struct btrfs_root *root, int throttle, int lock)
352{ 363{
353 struct btrfs_transaction *cur_trans = trans->transaction; 364 struct btrfs_transaction *cur_trans = trans->transaction;
354 struct btrfs_fs_info *info = root->fs_info; 365 struct btrfs_fs_info *info = root->fs_info;
@@ -376,18 +387,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
376 387
377 btrfs_trans_release_metadata(trans, root); 388 btrfs_trans_release_metadata(trans, root);
378 389
379 if (!root->fs_info->open_ioctl_trans && 390 if (lock && !root->fs_info->open_ioctl_trans &&
380 should_end_transaction(trans, root)) 391 should_end_transaction(trans, root))
381 trans->transaction->blocked = 1; 392 trans->transaction->blocked = 1;
382 393
383 if (cur_trans->blocked && !cur_trans->in_commit) { 394 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
384 if (throttle) 395 if (throttle)
385 return btrfs_commit_transaction(trans, root); 396 return btrfs_commit_transaction(trans, root);
386 else 397 else
387 wake_up_process(info->transaction_kthread); 398 wake_up_process(info->transaction_kthread);
388 } 399 }
389 400
390 mutex_lock(&info->trans_mutex); 401 if (lock)
402 mutex_lock(&info->trans_mutex);
391 WARN_ON(cur_trans != info->running_transaction); 403 WARN_ON(cur_trans != info->running_transaction);
392 WARN_ON(cur_trans->num_writers < 1); 404 WARN_ON(cur_trans->num_writers < 1);
393 cur_trans->num_writers--; 405 cur_trans->num_writers--;
@@ -395,7 +407,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
395 if (waitqueue_active(&cur_trans->writer_wait)) 407 if (waitqueue_active(&cur_trans->writer_wait))
396 wake_up(&cur_trans->writer_wait); 408 wake_up(&cur_trans->writer_wait);
397 put_transaction(cur_trans); 409 put_transaction(cur_trans);
398 mutex_unlock(&info->trans_mutex); 410 if (lock)
411 mutex_unlock(&info->trans_mutex);
399 412
400 if (current->journal_info == trans) 413 if (current->journal_info == trans)
401 current->journal_info = NULL; 414 current->journal_info = NULL;
@@ -411,13 +424,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
411int btrfs_end_transaction(struct btrfs_trans_handle *trans, 424int btrfs_end_transaction(struct btrfs_trans_handle *trans,
412 struct btrfs_root *root) 425 struct btrfs_root *root)
413{ 426{
414 return __btrfs_end_transaction(trans, root, 0); 427 return __btrfs_end_transaction(trans, root, 0, 1);
415} 428}
416 429
417int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 430int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root) 431 struct btrfs_root *root)
419{ 432{
420 return __btrfs_end_transaction(trans, root, 1); 433 return __btrfs_end_transaction(trans, root, 1, 1);
434}
435
436int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
437 struct btrfs_root *root)
438{
439 return __btrfs_end_transaction(trans, root, 0, 0);
421} 440}
422 441
423/* 442/*
@@ -966,6 +985,8 @@ static void update_super_roots(struct btrfs_root *root)
966 super->root = root_item->bytenr; 985 super->root = root_item->bytenr;
967 super->generation = root_item->generation; 986 super->generation = root_item->generation;
968 super->root_level = root_item->level; 987 super->root_level = root_item->level;
988 if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
989 super->cache_generation = root_item->generation;
969} 990}
970 991
971int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 992int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e104986d0bfd..15f83e1c1ef7 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,10 +87,14 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
87 87
88int btrfs_end_transaction(struct btrfs_trans_handle *trans, 88int btrfs_end_transaction(struct btrfs_trans_handle *trans,
89 struct btrfs_root *root); 89 struct btrfs_root *root);
90int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
91 struct btrfs_root *root);
90struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 92struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
91 int num_items); 93 int num_items);
92struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 94struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
93 int num_blocks); 95 int num_blocks);
96struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
97 int num_blocks);
94struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 98struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
95 int num_blocks); 99 int num_blocks);
96int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 100int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,