aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-24 10:40:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-24 10:40:02 -0400
commitba0e4ae88f0f71b42ad8734e0c371d321554f13b (patch)
treeb71b4969f2eb4b72f6a090d6cf02696db11b051d /fs/btrfs
parent1aef882f023eb7c24d6d77f001bd0ba956fdd861 (diff)
parente082f56313f374d723b0366978ddb062c8fe79ea (diff)
Merge branch 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "I've been running these through a longer set of load tests because my commits change the free space cache writeout. It fixes commit stalls on large filesystems (~20T space used and up) that we have been triggering here. We were seeing new writers blocked for 10 seconds or more during commits, which is far from good. Josef and I fixed up ENOSPC aborts when deleting huge files (3T or more), that are triggered because our metadata reservations were not properly accounting for crcs and were not replenishing during the truncate. Also in this series, a number of qgroup fixes from Fujitsu and Dave Sterba collected most of the pending cleanups from the list" * 'for-linus-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (93 commits) btrfs: quota: Update quota tree after qgroup relationship change. btrfs: quota: Automatically update related qgroups or mark INCONSISTENT flags when assigning/deleting a qgroup relations. btrfs: qgroup: clear STATUS_FLAG_ON in disabling quota. btrfs: Update btrfs qgroup status item when rescan is done. btrfs: qgroup: Fix dead judgement on qgroup_rescan_leaf() return value. btrfs: Don't allow subvolid >= (1 << BTRFS_QGROUP_LEVEL_SHIFT) to be created btrfs: Check qgroup level in kernel qgroup assign. btrfs: qgroup: allow to remove qgroup which has parent but no child. btrfs: qgroup: return EINVAL if level of parent is not higher than child's. btrfs: qgroup: do a reservation in a higher level. Btrfs: qgroup, Account data space in more proper timings. Btrfs: qgroup: Introduce a may_use to account space_info->bytes_may_use. Btrfs: qgroup: free reserved in exceeding quota. Btrfs: qgroup: cleanup, remove an unsued parameter in btrfs_create_qgroup(). btrfs: qgroup: fix limit args override whole limit struct btrfs: qgroup: update limit info in function btrfs_run_qgroups(). btrfs: qgroup: consolidate the parameter of fucntion update_qgroup_limit_item(). btrfs: qgroup: update qgroup in memory at the same time when we update it in btree. btrfs: qgroup: inherit limit info from srcgroup in creating snapshot. btrfs: Support busy loop of write and delete ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/async-thread.c4
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/backref.c4
-rw-r--r--fs/btrfs/btrfs_inode.h14
-rw-r--r--fs/btrfs/check-integrity.c9
-rw-r--r--fs/btrfs/compression.c4
-rw-r--r--fs/btrfs/compression.h4
-rw-r--r--fs/btrfs/ctree.c62
-rw-r--r--fs/btrfs/ctree.h46
-rw-r--r--fs/btrfs/delayed-inode.c9
-rw-r--r--fs/btrfs/delayed-ref.c22
-rw-r--r--fs/btrfs/delayed-ref.h10
-rw-r--r--fs/btrfs/dev-replace.c6
-rw-r--r--fs/btrfs/disk-io.c570
-rw-r--r--fs/btrfs/disk-io.h4
-rw-r--r--fs/btrfs/extent-tree.c476
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file-item.c6
-rw-r--r--fs/btrfs/file.c65
-rw-r--r--fs/btrfs/free-space-cache.c301
-rw-r--r--fs/btrfs/free-space-cache.h9
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c146
-rw-r--r--fs/btrfs/ioctl.c33
-rw-r--r--fs/btrfs/lzo.c2
-rw-r--r--fs/btrfs/math.h6
-rw-r--r--fs/btrfs/props.c2
-rw-r--r--fs/btrfs/qgroup.c348
-rw-r--r--fs/btrfs/qgroup.h3
-rw-r--r--fs/btrfs/raid56.c16
-rw-r--r--fs/btrfs/relocation.c11
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/send.c83
-rw-r--r--fs/btrfs/super.c23
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/sysfs.h22
-rw-r--r--fs/btrfs/tests/qgroup-tests.c4
-rw-r--r--fs/btrfs/transaction.c54
-rw-r--r--fs/btrfs/transaction.h12
-rw-r--r--fs/btrfs/tree-log.c382
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c140
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/btrfs/xattr.c53
-rw-r--r--fs/btrfs/zlib.c2
46 files changed, 2113 insertions, 897 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 4dabeb893b7c..df9932b00d08 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -87,7 +87,7 @@ BTRFS_WORK_HELPER(scrubwrc_helper);
87BTRFS_WORK_HELPER(scrubnc_helper); 87BTRFS_WORK_HELPER(scrubnc_helper);
88 88
89static struct __btrfs_workqueue * 89static struct __btrfs_workqueue *
90__btrfs_alloc_workqueue(const char *name, int flags, int max_active, 90__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
91 int thresh) 91 int thresh)
92{ 92{
93 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); 93 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -132,7 +132,7 @@ static inline void
132__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); 132__btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
133 133
134struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, 134struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
135 int flags, 135 unsigned int flags,
136 int max_active, 136 int max_active,
137 int thresh) 137 int thresh)
138{ 138{
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index e386c29ef1f6..ec2ee477f8ba 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -66,7 +66,7 @@ BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
66BTRFS_WORK_HELPER_PROTO(scrubnc_helper); 66BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
67 67
68struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, 68struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
69 int flags, 69 unsigned int flags,
70 int max_active, 70 int max_active,
71 int thresh); 71 int thresh);
72void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper, 72void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f55721ff9385..9de772ee0031 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1206,7 +1206,7 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
1206 struct ulist *roots = NULL; 1206 struct ulist *roots = NULL;
1207 struct ulist_iterator uiter; 1207 struct ulist_iterator uiter;
1208 struct ulist_node *node; 1208 struct ulist_node *node;
1209 struct seq_list elem = {}; 1209 struct seq_list elem = SEQ_LIST_INIT(elem);
1210 int ret = 0; 1210 int ret = 0;
1211 1211
1212 tmp = ulist_alloc(GFP_NOFS); 1212 tmp = ulist_alloc(GFP_NOFS);
@@ -1610,7 +1610,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1610 struct ulist *roots = NULL; 1610 struct ulist *roots = NULL;
1611 struct ulist_node *ref_node = NULL; 1611 struct ulist_node *ref_node = NULL;
1612 struct ulist_node *root_node = NULL; 1612 struct ulist_node *root_node = NULL;
1613 struct seq_list tree_mod_seq_elem = {}; 1613 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
1614 struct ulist_iterator ref_uiter; 1614 struct ulist_iterator ref_uiter;
1615 struct ulist_iterator root_uiter; 1615 struct ulist_iterator root_uiter;
1616 1616
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index de5e4f2adfea..0ef5cc13fae2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -66,7 +66,11 @@ struct btrfs_inode {
66 */ 66 */
67 struct btrfs_key location; 67 struct btrfs_key location;
68 68
69 /* Lock for counters */ 69 /*
70 * Lock for counters and all fields used to determine if the inode is in
71 * the log or not (last_trans, last_sub_trans, last_log_commit,
72 * logged_trans).
73 */
70 spinlock_t lock; 74 spinlock_t lock;
71 75
72 /* the extent_tree has caches of all the extent mappings to disk */ 76 /* the extent_tree has caches of all the extent mappings to disk */
@@ -250,6 +254,9 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
250 254
251static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) 255static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
252{ 256{
257 int ret = 0;
258
259 spin_lock(&BTRFS_I(inode)->lock);
253 if (BTRFS_I(inode)->logged_trans == generation && 260 if (BTRFS_I(inode)->logged_trans == generation &&
254 BTRFS_I(inode)->last_sub_trans <= 261 BTRFS_I(inode)->last_sub_trans <=
255 BTRFS_I(inode)->last_log_commit && 262 BTRFS_I(inode)->last_log_commit &&
@@ -263,9 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
263 */ 270 */
264 smp_mb(); 271 smp_mb();
265 if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents)) 272 if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
266 return 1; 273 ret = 1;
267 } 274 }
268 return 0; 275 spin_unlock(&BTRFS_I(inode)->lock);
276 return ret;
269} 277}
270 278
271#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1 279#define BTRFS_DIO_ORIG_BIO_SUBMITTED 0x1
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index d897ef803b3b..ce7dec88f4b8 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2990,8 +2990,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
2990 (unsigned long long)bio->bi_iter.bi_sector, 2990 (unsigned long long)bio->bi_iter.bi_sector,
2991 dev_bytenr, bio->bi_bdev); 2991 dev_bytenr, bio->bi_bdev);
2992 2992
2993 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 2993 mapped_datav = kmalloc_array(bio->bi_vcnt,
2994 GFP_NOFS); 2994 sizeof(*mapped_datav), GFP_NOFS);
2995 if (!mapped_datav) 2995 if (!mapped_datav)
2996 goto leave; 2996 goto leave;
2997 cur_bytenr = dev_bytenr; 2997 cur_bytenr = dev_bytenr;
@@ -3241,8 +3241,5 @@ void btrfsic_unmount(struct btrfs_root *root,
3241 3241
3242 mutex_unlock(&btrfsic_mutex); 3242 mutex_unlock(&btrfsic_mutex);
3243 3243
3244 if (is_vmalloc_addr(state)) 3244 kvfree(state);
3245 vfree(state);
3246 else
3247 kfree(state);
3248} 3245}
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index e9df8862012c..ce62324c78e7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -622,7 +622,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
622 cb->orig_bio = bio; 622 cb->orig_bio = bio;
623 623
624 nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE); 624 nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE);
625 cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, 625 cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
626 GFP_NOFS); 626 GFP_NOFS);
627 if (!cb->compressed_pages) 627 if (!cb->compressed_pages)
628 goto fail1; 628 goto fail1;
@@ -750,7 +750,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
750static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; 750static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
751static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; 751static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
752 752
753static struct btrfs_compress_op *btrfs_compress_op[] = { 753static const struct btrfs_compress_op * const btrfs_compress_op[] = {
754 &btrfs_zlib_compress, 754 &btrfs_zlib_compress,
755 &btrfs_lzo_compress, 755 &btrfs_lzo_compress,
756}; 756};
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index d181f70caae0..13a4dc0436c9 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -77,7 +77,7 @@ struct btrfs_compress_op {
77 size_t srclen, size_t destlen); 77 size_t srclen, size_t destlen);
78}; 78};
79 79
80extern struct btrfs_compress_op btrfs_zlib_compress; 80extern const struct btrfs_compress_op btrfs_zlib_compress;
81extern struct btrfs_compress_op btrfs_lzo_compress; 81extern const struct btrfs_compress_op btrfs_lzo_compress;
82 82
83#endif 83#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6d67f32e648d..0f11ebc92f02 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -578,7 +578,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
578 if (!tree_mod_need_log(fs_info, eb)) 578 if (!tree_mod_need_log(fs_info, eb))
579 return 0; 579 return 0;
580 580
581 tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags); 581 tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
582 if (!tm_list) 582 if (!tm_list)
583 return -ENOMEM; 583 return -ENOMEM;
584 584
@@ -677,7 +677,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
677 677
678 if (log_removal && btrfs_header_level(old_root) > 0) { 678 if (log_removal && btrfs_header_level(old_root) > 0) {
679 nritems = btrfs_header_nritems(old_root); 679 nritems = btrfs_header_nritems(old_root);
680 tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *), 680 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
681 flags); 681 flags);
682 if (!tm_list) { 682 if (!tm_list) {
683 ret = -ENOMEM; 683 ret = -ENOMEM;
@@ -814,7 +814,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
814 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) 814 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
815 return 0; 815 return 0;
816 816
817 tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *), 817 tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
818 GFP_NOFS); 818 GFP_NOFS);
819 if (!tm_list) 819 if (!tm_list)
820 return -ENOMEM; 820 return -ENOMEM;
@@ -905,8 +905,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
905 return 0; 905 return 0;
906 906
907 nritems = btrfs_header_nritems(eb); 907 nritems = btrfs_header_nritems(eb);
908 tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *), 908 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
909 GFP_NOFS);
910 if (!tm_list) 909 if (!tm_list)
911 return -ENOMEM; 910 return -ENOMEM;
912 911
@@ -1073,7 +1072,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1073 ret = btrfs_dec_ref(trans, root, buf, 1); 1072 ret = btrfs_dec_ref(trans, root, buf, 1);
1074 BUG_ON(ret); /* -ENOMEM */ 1073 BUG_ON(ret); /* -ENOMEM */
1075 } 1074 }
1076 clean_tree_block(trans, root, buf); 1075 clean_tree_block(trans, root->fs_info, buf);
1077 *last_ref = 1; 1076 *last_ref = 1;
1078 } 1077 }
1079 return 0; 1078 return 0;
@@ -1678,7 +1677,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1678 continue; 1677 continue;
1679 } 1678 }
1680 1679
1681 cur = btrfs_find_tree_block(root, blocknr); 1680 cur = btrfs_find_tree_block(root->fs_info, blocknr);
1682 if (cur) 1681 if (cur)
1683 uptodate = btrfs_buffer_uptodate(cur, gen, 0); 1682 uptodate = btrfs_buffer_uptodate(cur, gen, 0);
1684 else 1683 else
@@ -1943,7 +1942,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1943 1942
1944 path->locks[level] = 0; 1943 path->locks[level] = 0;
1945 path->nodes[level] = NULL; 1944 path->nodes[level] = NULL;
1946 clean_tree_block(trans, root, mid); 1945 clean_tree_block(trans, root->fs_info, mid);
1947 btrfs_tree_unlock(mid); 1946 btrfs_tree_unlock(mid);
1948 /* once for the path */ 1947 /* once for the path */
1949 free_extent_buffer(mid); 1948 free_extent_buffer(mid);
@@ -1997,7 +1996,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1997 if (wret < 0 && wret != -ENOSPC) 1996 if (wret < 0 && wret != -ENOSPC)
1998 ret = wret; 1997 ret = wret;
1999 if (btrfs_header_nritems(right) == 0) { 1998 if (btrfs_header_nritems(right) == 0) {
2000 clean_tree_block(trans, root, right); 1999 clean_tree_block(trans, root->fs_info, right);
2001 btrfs_tree_unlock(right); 2000 btrfs_tree_unlock(right);
2002 del_ptr(root, path, level + 1, pslot + 1); 2001 del_ptr(root, path, level + 1, pslot + 1);
2003 root_sub_used(root, right->len); 2002 root_sub_used(root, right->len);
@@ -2041,7 +2040,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
2041 BUG_ON(wret == 1); 2040 BUG_ON(wret == 1);
2042 } 2041 }
2043 if (btrfs_header_nritems(mid) == 0) { 2042 if (btrfs_header_nritems(mid) == 0) {
2044 clean_tree_block(trans, root, mid); 2043 clean_tree_block(trans, root->fs_info, mid);
2045 btrfs_tree_unlock(mid); 2044 btrfs_tree_unlock(mid);
2046 del_ptr(root, path, level + 1, pslot); 2045 del_ptr(root, path, level + 1, pslot);
2047 root_sub_used(root, mid->len); 2046 root_sub_used(root, mid->len);
@@ -2259,7 +2258,7 @@ static void reada_for_search(struct btrfs_root *root,
2259 2258
2260 search = btrfs_node_blockptr(node, slot); 2259 search = btrfs_node_blockptr(node, slot);
2261 blocksize = root->nodesize; 2260 blocksize = root->nodesize;
2262 eb = btrfs_find_tree_block(root, search); 2261 eb = btrfs_find_tree_block(root->fs_info, search);
2263 if (eb) { 2262 if (eb) {
2264 free_extent_buffer(eb); 2263 free_extent_buffer(eb);
2265 return; 2264 return;
@@ -2319,7 +2318,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2319 if (slot > 0) { 2318 if (slot > 0) {
2320 block1 = btrfs_node_blockptr(parent, slot - 1); 2319 block1 = btrfs_node_blockptr(parent, slot - 1);
2321 gen = btrfs_node_ptr_generation(parent, slot - 1); 2320 gen = btrfs_node_ptr_generation(parent, slot - 1);
2322 eb = btrfs_find_tree_block(root, block1); 2321 eb = btrfs_find_tree_block(root->fs_info, block1);
2323 /* 2322 /*
2324 * if we get -eagain from btrfs_buffer_uptodate, we 2323 * if we get -eagain from btrfs_buffer_uptodate, we
2325 * don't want to return eagain here. That will loop 2324 * don't want to return eagain here. That will loop
@@ -2332,7 +2331,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
2332 if (slot + 1 < nritems) { 2331 if (slot + 1 < nritems) {
2333 block2 = btrfs_node_blockptr(parent, slot + 1); 2332 block2 = btrfs_node_blockptr(parent, slot + 1);
2334 gen = btrfs_node_ptr_generation(parent, slot + 1); 2333 gen = btrfs_node_ptr_generation(parent, slot + 1);
2335 eb = btrfs_find_tree_block(root, block2); 2334 eb = btrfs_find_tree_block(root->fs_info, block2);
2336 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) 2335 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
2337 block2 = 0; 2336 block2 = 0;
2338 free_extent_buffer(eb); 2337 free_extent_buffer(eb);
@@ -2450,7 +2449,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
2450 blocknr = btrfs_node_blockptr(b, slot); 2449 blocknr = btrfs_node_blockptr(b, slot);
2451 gen = btrfs_node_ptr_generation(b, slot); 2450 gen = btrfs_node_ptr_generation(b, slot);
2452 2451
2453 tmp = btrfs_find_tree_block(root, blocknr); 2452 tmp = btrfs_find_tree_block(root->fs_info, blocknr);
2454 if (tmp) { 2453 if (tmp) {
2455 /* first we do an atomic uptodate check */ 2454 /* first we do an atomic uptodate check */
2456 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { 2455 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
@@ -3126,7 +3125,8 @@ again:
3126 * higher levels 3125 * higher levels
3127 * 3126 *
3128 */ 3127 */
3129static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path, 3128static void fixup_low_keys(struct btrfs_fs_info *fs_info,
3129 struct btrfs_path *path,
3130 struct btrfs_disk_key *key, int level) 3130 struct btrfs_disk_key *key, int level)
3131{ 3131{
3132 int i; 3132 int i;
@@ -3137,7 +3137,7 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
3137 if (!path->nodes[i]) 3137 if (!path->nodes[i])
3138 break; 3138 break;
3139 t = path->nodes[i]; 3139 t = path->nodes[i];
3140 tree_mod_log_set_node_key(root->fs_info, t, tslot, 1); 3140 tree_mod_log_set_node_key(fs_info, t, tslot, 1);
3141 btrfs_set_node_key(t, key, tslot); 3141 btrfs_set_node_key(t, key, tslot);
3142 btrfs_mark_buffer_dirty(path->nodes[i]); 3142 btrfs_mark_buffer_dirty(path->nodes[i]);
3143 if (tslot != 0) 3143 if (tslot != 0)
@@ -3151,7 +3151,8 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
3151 * This function isn't completely safe. It's the caller's responsibility 3151 * This function isn't completely safe. It's the caller's responsibility
3152 * that the new key won't break the order 3152 * that the new key won't break the order
3153 */ 3153 */
3154void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, 3154void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3155 struct btrfs_path *path,
3155 struct btrfs_key *new_key) 3156 struct btrfs_key *new_key)
3156{ 3157{
3157 struct btrfs_disk_key disk_key; 3158 struct btrfs_disk_key disk_key;
@@ -3173,7 +3174,7 @@ void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
3173 btrfs_set_item_key(eb, &disk_key, slot); 3174 btrfs_set_item_key(eb, &disk_key, slot);
3174 btrfs_mark_buffer_dirty(eb); 3175 btrfs_mark_buffer_dirty(eb);
3175 if (slot == 0) 3176 if (slot == 0)
3176 fixup_low_keys(root, path, &disk_key, 1); 3177 fixup_low_keys(fs_info, path, &disk_key, 1);
3177} 3178}
3178 3179
3179/* 3180/*
@@ -3692,7 +3693,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
3692 if (left_nritems) 3693 if (left_nritems)
3693 btrfs_mark_buffer_dirty(left); 3694 btrfs_mark_buffer_dirty(left);
3694 else 3695 else
3695 clean_tree_block(trans, root, left); 3696 clean_tree_block(trans, root->fs_info, left);
3696 3697
3697 btrfs_mark_buffer_dirty(right); 3698 btrfs_mark_buffer_dirty(right);
3698 3699
@@ -3704,7 +3705,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
3704 if (path->slots[0] >= left_nritems) { 3705 if (path->slots[0] >= left_nritems) {
3705 path->slots[0] -= left_nritems; 3706 path->slots[0] -= left_nritems;
3706 if (btrfs_header_nritems(path->nodes[0]) == 0) 3707 if (btrfs_header_nritems(path->nodes[0]) == 0)
3707 clean_tree_block(trans, root, path->nodes[0]); 3708 clean_tree_block(trans, root->fs_info, path->nodes[0]);
3708 btrfs_tree_unlock(path->nodes[0]); 3709 btrfs_tree_unlock(path->nodes[0]);
3709 free_extent_buffer(path->nodes[0]); 3710 free_extent_buffer(path->nodes[0]);
3710 path->nodes[0] = right; 3711 path->nodes[0] = right;
@@ -3928,10 +3929,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3928 if (right_nritems) 3929 if (right_nritems)
3929 btrfs_mark_buffer_dirty(right); 3930 btrfs_mark_buffer_dirty(right);
3930 else 3931 else
3931 clean_tree_block(trans, root, right); 3932 clean_tree_block(trans, root->fs_info, right);
3932 3933
3933 btrfs_item_key(right, &disk_key, 0); 3934 btrfs_item_key(right, &disk_key, 0);
3934 fixup_low_keys(root, path, &disk_key, 1); 3935 fixup_low_keys(root->fs_info, path, &disk_key, 1);
3935 3936
3936 /* then fixup the leaf pointer in the path */ 3937 /* then fixup the leaf pointer in the path */
3937 if (path->slots[0] < push_items) { 3938 if (path->slots[0] < push_items) {
@@ -4168,6 +4169,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
4168 int mid; 4169 int mid;
4169 int slot; 4170 int slot;
4170 struct extent_buffer *right; 4171 struct extent_buffer *right;
4172 struct btrfs_fs_info *fs_info = root->fs_info;
4171 int ret = 0; 4173 int ret = 0;
4172 int wret; 4174 int wret;
4173 int split; 4175 int split;
@@ -4271,10 +4273,10 @@ again:
4271 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); 4273 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
4272 btrfs_set_header_owner(right, root->root_key.objectid); 4274 btrfs_set_header_owner(right, root->root_key.objectid);
4273 btrfs_set_header_level(right, 0); 4275 btrfs_set_header_level(right, 0);
4274 write_extent_buffer(right, root->fs_info->fsid, 4276 write_extent_buffer(right, fs_info->fsid,
4275 btrfs_header_fsid(), BTRFS_FSID_SIZE); 4277 btrfs_header_fsid(), BTRFS_FSID_SIZE);
4276 4278
4277 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 4279 write_extent_buffer(right, fs_info->chunk_tree_uuid,
4278 btrfs_header_chunk_tree_uuid(right), 4280 btrfs_header_chunk_tree_uuid(right),
4279 BTRFS_UUID_SIZE); 4281 BTRFS_UUID_SIZE);
4280 4282
@@ -4297,7 +4299,7 @@ again:
4297 path->nodes[0] = right; 4299 path->nodes[0] = right;
4298 path->slots[0] = 0; 4300 path->slots[0] = 0;
4299 if (path->slots[1] == 0) 4301 if (path->slots[1] == 0)
4300 fixup_low_keys(root, path, &disk_key, 1); 4302 fixup_low_keys(fs_info, path, &disk_key, 1);
4301 } 4303 }
4302 btrfs_mark_buffer_dirty(right); 4304 btrfs_mark_buffer_dirty(right);
4303 return ret; 4305 return ret;
@@ -4615,7 +4617,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
4615 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 4617 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4616 btrfs_set_item_key(leaf, &disk_key, slot); 4618 btrfs_set_item_key(leaf, &disk_key, slot);
4617 if (slot == 0) 4619 if (slot == 0)
4618 fixup_low_keys(root, path, &disk_key, 1); 4620 fixup_low_keys(root->fs_info, path, &disk_key, 1);
4619 } 4621 }
4620 4622
4621 item = btrfs_item_nr(slot); 4623 item = btrfs_item_nr(slot);
@@ -4716,7 +4718,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4716 4718
4717 if (path->slots[0] == 0) { 4719 if (path->slots[0] == 0) {
4718 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 4720 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
4719 fixup_low_keys(root, path, &disk_key, 1); 4721 fixup_low_keys(root->fs_info, path, &disk_key, 1);
4720 } 4722 }
4721 btrfs_unlock_up_safe(path, 1); 4723 btrfs_unlock_up_safe(path, 1);
4722 4724
@@ -4888,7 +4890,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4888 struct btrfs_disk_key disk_key; 4890 struct btrfs_disk_key disk_key;
4889 4891
4890 btrfs_node_key(parent, &disk_key, 0); 4892 btrfs_node_key(parent, &disk_key, 0);
4891 fixup_low_keys(root, path, &disk_key, level + 1); 4893 fixup_low_keys(root->fs_info, path, &disk_key, level + 1);
4892 } 4894 }
4893 btrfs_mark_buffer_dirty(parent); 4895 btrfs_mark_buffer_dirty(parent);
4894} 4896}
@@ -4981,7 +4983,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4981 btrfs_set_header_level(leaf, 0); 4983 btrfs_set_header_level(leaf, 0);
4982 } else { 4984 } else {
4983 btrfs_set_path_blocking(path); 4985 btrfs_set_path_blocking(path);
4984 clean_tree_block(trans, root, leaf); 4986 clean_tree_block(trans, root->fs_info, leaf);
4985 btrfs_del_leaf(trans, root, path, leaf); 4987 btrfs_del_leaf(trans, root, path, leaf);
4986 } 4988 }
4987 } else { 4989 } else {
@@ -4990,7 +4992,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4990 struct btrfs_disk_key disk_key; 4992 struct btrfs_disk_key disk_key;
4991 4993
4992 btrfs_item_key(leaf, &disk_key, 0); 4994 btrfs_item_key(leaf, &disk_key, 0);
4993 fixup_low_keys(root, path, &disk_key, 1); 4995 fixup_low_keys(root->fs_info, path, &disk_key, 1);
4994 } 4996 }
4995 4997
4996 /* delete the leaf if it is mostly empty */ 4998 /* delete the leaf if it is mostly empty */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f9c89cae39ee..6f364e1d8d3d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1061,6 +1061,12 @@ struct btrfs_block_group_item {
1061 __le64 flags; 1061 __le64 flags;
1062} __attribute__ ((__packed__)); 1062} __attribute__ ((__packed__));
1063 1063
1064#define BTRFS_QGROUP_LEVEL_SHIFT 48
1065static inline u64 btrfs_qgroup_level(u64 qgroupid)
1066{
1067 return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
1068}
1069
1064/* 1070/*
1065 * is subvolume quota turned on? 1071 * is subvolume quota turned on?
1066 */ 1072 */
@@ -1256,6 +1262,20 @@ struct btrfs_caching_control {
1256 atomic_t count; 1262 atomic_t count;
1257}; 1263};
1258 1264
1265struct btrfs_io_ctl {
1266 void *cur, *orig;
1267 struct page *page;
1268 struct page **pages;
1269 struct btrfs_root *root;
1270 struct inode *inode;
1271 unsigned long size;
1272 int index;
1273 int num_pages;
1274 int entries;
1275 int bitmaps;
1276 unsigned check_crcs:1;
1277};
1278
1259struct btrfs_block_group_cache { 1279struct btrfs_block_group_cache {
1260 struct btrfs_key key; 1280 struct btrfs_key key;
1261 struct btrfs_block_group_item item; 1281 struct btrfs_block_group_item item;
@@ -1321,6 +1341,9 @@ struct btrfs_block_group_cache {
1321 1341
1322 /* For dirty block groups */ 1342 /* For dirty block groups */
1323 struct list_head dirty_list; 1343 struct list_head dirty_list;
1344 struct list_head io_list;
1345
1346 struct btrfs_io_ctl io_ctl;
1324}; 1347};
1325 1348
1326/* delayed seq elem */ 1349/* delayed seq elem */
@@ -1329,6 +1352,8 @@ struct seq_list {
1329 u64 seq; 1352 u64 seq;
1330}; 1353};
1331 1354
1355#define SEQ_LIST_INIT(name) { .list = LIST_HEAD_INIT((name).list), .seq = 0 }
1356
1332enum btrfs_orphan_cleanup_state { 1357enum btrfs_orphan_cleanup_state {
1333 ORPHAN_CLEANUP_STARTED = 1, 1358 ORPHAN_CLEANUP_STARTED = 1,
1334 ORPHAN_CLEANUP_DONE = 2, 1359 ORPHAN_CLEANUP_DONE = 2,
@@ -1472,6 +1497,12 @@ struct btrfs_fs_info {
1472 struct mutex chunk_mutex; 1497 struct mutex chunk_mutex;
1473 struct mutex volume_mutex; 1498 struct mutex volume_mutex;
1474 1499
1500 /*
1501 * this is taken to make sure we don't set block groups ro after
1502 * the free space cache has been allocated on them
1503 */
1504 struct mutex ro_block_group_mutex;
1505
1475 /* this is used during read/modify/write to make sure 1506 /* this is used during read/modify/write to make sure
1476 * no two ios are trying to mod the same stripe at the same 1507 * no two ios are trying to mod the same stripe at the same
1477 * time 1508 * time
@@ -1513,6 +1544,7 @@ struct btrfs_fs_info {
1513 1544
1514 spinlock_t delayed_iput_lock; 1545 spinlock_t delayed_iput_lock;
1515 struct list_head delayed_iputs; 1546 struct list_head delayed_iputs;
1547 struct rw_semaphore delayed_iput_sem;
1516 1548
1517 /* this protects tree_mod_seq_list */ 1549 /* this protects tree_mod_seq_list */
1518 spinlock_t tree_mod_seq_lock; 1550 spinlock_t tree_mod_seq_lock;
@@ -3295,6 +3327,9 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
3295} 3327}
3296 3328
3297/* extent-tree.c */ 3329/* extent-tree.c */
3330
3331u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
3332
3298static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, 3333static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
3299 unsigned num_items) 3334 unsigned num_items)
3300{ 3335{
@@ -3385,6 +3420,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
3385 u64 bytenr, u64 num_bytes, u64 parent, 3420 u64 bytenr, u64 num_bytes, u64 parent,
3386 u64 root_objectid, u64 owner, u64 offset, int no_quota); 3421 u64 root_objectid, u64 owner, u64 offset, int no_quota);
3387 3422
3423int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3424 struct btrfs_root *root);
3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3425int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3389 struct btrfs_root *root); 3426 struct btrfs_root *root);
3390int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, 3427int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
@@ -3417,7 +3454,7 @@ enum btrfs_reserve_flush_enum {
3417 BTRFS_RESERVE_FLUSH_ALL, 3454 BTRFS_RESERVE_FLUSH_ALL,
3418}; 3455};
3419 3456
3420int btrfs_check_data_free_space(struct inode *inode, u64 bytes); 3457int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes);
3421void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); 3458void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
3422void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 3459void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
3423 struct btrfs_root *root); 3460 struct btrfs_root *root);
@@ -3440,6 +3477,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
3440 unsigned short type); 3477 unsigned short type);
3441void btrfs_free_block_rsv(struct btrfs_root *root, 3478void btrfs_free_block_rsv(struct btrfs_root *root,
3442 struct btrfs_block_rsv *rsv); 3479 struct btrfs_block_rsv *rsv);
3480void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
3443int btrfs_block_rsv_add(struct btrfs_root *root, 3481int btrfs_block_rsv_add(struct btrfs_root *root,
3444 struct btrfs_block_rsv *block_rsv, u64 num_bytes, 3482 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
3445 enum btrfs_reserve_flush_enum flush); 3483 enum btrfs_reserve_flush_enum flush);
@@ -3486,7 +3524,8 @@ int btrfs_previous_item(struct btrfs_root *root,
3486 int type); 3524 int type);
3487int btrfs_previous_extent_item(struct btrfs_root *root, 3525int btrfs_previous_extent_item(struct btrfs_root *root,
3488 struct btrfs_path *path, u64 min_objectid); 3526 struct btrfs_path *path, u64 min_objectid);
3489void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, 3527void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3528 struct btrfs_path *path,
3490 struct btrfs_key *new_key); 3529 struct btrfs_key *new_key);
3491struct extent_buffer *btrfs_root_node(struct btrfs_root *root); 3530struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
3492struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); 3531struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
@@ -4180,7 +4219,8 @@ int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
4180static inline int is_fstree(u64 rootid) 4219static inline int is_fstree(u64 rootid)
4181{ 4220{
4182 if (rootid == BTRFS_FS_TREE_OBJECTID || 4221 if (rootid == BTRFS_FS_TREE_OBJECTID ||
4183 (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID) 4222 ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID &&
4223 !btrfs_qgroup_level(rootid)))
4184 return 1; 4224 return 1;
4185 return 0; 4225 return 0;
4186} 4226}
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 82f0c7c95474..cde698a07d21 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1383,7 +1383,7 @@ out:
1383 1383
1384 1384
1385static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, 1385static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1386 struct btrfs_root *root, int nr) 1386 struct btrfs_fs_info *fs_info, int nr)
1387{ 1387{
1388 struct btrfs_async_delayed_work *async_work; 1388 struct btrfs_async_delayed_work *async_work;
1389 1389
@@ -1399,7 +1399,7 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1399 btrfs_async_run_delayed_root, NULL, NULL); 1399 btrfs_async_run_delayed_root, NULL, NULL);
1400 async_work->nr = nr; 1400 async_work->nr = nr;
1401 1401
1402 btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); 1402 btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1403 return 0; 1403 return 0;
1404} 1404}
1405 1405
@@ -1426,6 +1426,7 @@ static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1426void btrfs_balance_delayed_items(struct btrfs_root *root) 1426void btrfs_balance_delayed_items(struct btrfs_root *root)
1427{ 1427{
1428 struct btrfs_delayed_root *delayed_root; 1428 struct btrfs_delayed_root *delayed_root;
1429 struct btrfs_fs_info *fs_info = root->fs_info;
1429 1430
1430 delayed_root = btrfs_get_delayed_root(root); 1431 delayed_root = btrfs_get_delayed_root(root);
1431 1432
@@ -1438,7 +1439,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
1438 1439
1439 seq = atomic_read(&delayed_root->items_seq); 1440 seq = atomic_read(&delayed_root->items_seq);
1440 1441
1441 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0); 1442 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1442 if (ret) 1443 if (ret)
1443 return; 1444 return;
1444 1445
@@ -1447,7 +1448,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
1447 return; 1448 return;
1448 } 1449 }
1449 1450
1450 btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH); 1451 btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1451} 1452}
1452 1453
1453/* Will return 0 or -ENOMEM */ 1454/* Will return 0 or -ENOMEM */
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 6d16bea94e1c..8f8ed7d20bac 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -489,11 +489,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
489 * existing and update must have the same bytenr 489 * existing and update must have the same bytenr
490 */ 490 */
491static noinline void 491static noinline void
492update_existing_head_ref(struct btrfs_delayed_ref_node *existing, 492update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
493 struct btrfs_delayed_ref_node *existing,
493 struct btrfs_delayed_ref_node *update) 494 struct btrfs_delayed_ref_node *update)
494{ 495{
495 struct btrfs_delayed_ref_head *existing_ref; 496 struct btrfs_delayed_ref_head *existing_ref;
496 struct btrfs_delayed_ref_head *ref; 497 struct btrfs_delayed_ref_head *ref;
498 int old_ref_mod;
497 499
498 existing_ref = btrfs_delayed_node_to_head(existing); 500 existing_ref = btrfs_delayed_node_to_head(existing);
499 ref = btrfs_delayed_node_to_head(update); 501 ref = btrfs_delayed_node_to_head(update);
@@ -541,7 +543,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
541 * only need the lock for this case cause we could be processing it 543 * only need the lock for this case cause we could be processing it
542 * currently, for refs we just added we know we're a-ok. 544 * currently, for refs we just added we know we're a-ok.
543 */ 545 */
546 old_ref_mod = existing_ref->total_ref_mod;
544 existing->ref_mod += update->ref_mod; 547 existing->ref_mod += update->ref_mod;
548 existing_ref->total_ref_mod += update->ref_mod;
549
550 /*
551 * If we are going to from a positive ref mod to a negative or vice
552 * versa we need to make sure to adjust pending_csums accordingly.
553 */
554 if (existing_ref->is_data) {
555 if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
556 delayed_refs->pending_csums -= existing->num_bytes;
557 if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
558 delayed_refs->pending_csums += existing->num_bytes;
559 }
545 spin_unlock(&existing_ref->lock); 560 spin_unlock(&existing_ref->lock);
546} 561}
547 562
@@ -605,6 +620,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
605 head_ref->is_data = is_data; 620 head_ref->is_data = is_data;
606 head_ref->ref_root = RB_ROOT; 621 head_ref->ref_root = RB_ROOT;
607 head_ref->processing = 0; 622 head_ref->processing = 0;
623 head_ref->total_ref_mod = count_mod;
608 624
609 spin_lock_init(&head_ref->lock); 625 spin_lock_init(&head_ref->lock);
610 mutex_init(&head_ref->mutex); 626 mutex_init(&head_ref->mutex);
@@ -614,7 +630,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
614 existing = htree_insert(&delayed_refs->href_root, 630 existing = htree_insert(&delayed_refs->href_root,
615 &head_ref->href_node); 631 &head_ref->href_node);
616 if (existing) { 632 if (existing) {
617 update_existing_head_ref(&existing->node, ref); 633 update_existing_head_ref(delayed_refs, &existing->node, ref);
618 /* 634 /*
619 * we've updated the existing ref, free the newly 635 * we've updated the existing ref, free the newly
620 * allocated ref 636 * allocated ref
@@ -622,6 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
622 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 638 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
623 head_ref = existing; 639 head_ref = existing;
624 } else { 640 } else {
641 if (is_data && count_mod < 0)
642 delayed_refs->pending_csums += num_bytes;
625 delayed_refs->num_heads++; 643 delayed_refs->num_heads++;
626 delayed_refs->num_heads_ready++; 644 delayed_refs->num_heads_ready++;
627 atomic_inc(&delayed_refs->num_entries); 645 atomic_inc(&delayed_refs->num_entries);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index a764e2340d48..5eb0892396d0 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -88,6 +88,14 @@ struct btrfs_delayed_ref_head {
88 struct rb_node href_node; 88 struct rb_node href_node;
89 89
90 struct btrfs_delayed_extent_op *extent_op; 90 struct btrfs_delayed_extent_op *extent_op;
91
92 /*
93 * This is used to track the final ref_mod from all the refs associated
94 * with this head ref, this is not adjusted as delayed refs are run,
95 * this is meant to track if we need to do the csum accounting or not.
96 */
97 int total_ref_mod;
98
91 /* 99 /*
92 * when a new extent is allocated, it is just reserved in memory 100 * when a new extent is allocated, it is just reserved in memory
93 * The actual extent isn't inserted into the extent allocation tree 101 * The actual extent isn't inserted into the extent allocation tree
@@ -138,6 +146,8 @@ struct btrfs_delayed_ref_root {
138 /* total number of head nodes ready for processing */ 146 /* total number of head nodes ready for processing */
139 unsigned long num_heads_ready; 147 unsigned long num_heads_ready;
140 148
149 u64 pending_csums;
150
141 /* 151 /*
142 * set when the tree is flushing before a transaction commit, 152 * set when the tree is flushing before a transaction commit,
143 * used by the throttling code to decide if new updates need 153 * used by the throttling code to decide if new updates need
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5ec03d999c37..0573848c7333 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -670,8 +670,8 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
670 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 670 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
671 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: 671 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
672 srcdev = dev_replace->srcdev; 672 srcdev = dev_replace->srcdev;
673 args->status.progress_1000 = div64_u64(dev_replace->cursor_left, 673 args->status.progress_1000 = div_u64(dev_replace->cursor_left,
674 div64_u64(btrfs_device_get_total_bytes(srcdev), 1000)); 674 div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
675 break; 675 break;
676 } 676 }
677 btrfs_dev_replace_unlock(dev_replace); 677 btrfs_dev_replace_unlock(dev_replace);
@@ -806,7 +806,7 @@ static int btrfs_dev_replace_kthread(void *data)
806 btrfs_dev_replace_status(fs_info, status_args); 806 btrfs_dev_replace_status(fs_info, status_args);
807 progress = status_args->status.progress_1000; 807 progress = status_args->status.progress_1000;
808 kfree(status_args); 808 kfree(status_args);
809 do_div(progress, 10); 809 progress = div_u64(progress, 10);
810 printk_in_rcu(KERN_INFO 810 printk_in_rcu(KERN_INFO
811 "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", 811 "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
812 dev_replace->srcdev->missing ? "<missing disk>" : 812 dev_replace->srcdev->missing ? "<missing disk>" :
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 639f2663ed3f..2ef9a4b72d06 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -54,7 +54,7 @@
54#include <asm/cpufeature.h> 54#include <asm/cpufeature.h>
55#endif 55#endif
56 56
57static struct extent_io_ops btree_extent_io_ops; 57static const struct extent_io_ops btree_extent_io_ops;
58static void end_workqueue_fn(struct btrfs_work *work); 58static void end_workqueue_fn(struct btrfs_work *work);
59static void free_fs_root(struct btrfs_root *root); 59static void free_fs_root(struct btrfs_root *root);
60static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 60static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
@@ -274,10 +274,11 @@ void btrfs_csum_final(u32 crc, char *result)
274 * compute the csum for a btree block, and either verify it or write it 274 * compute the csum for a btree block, and either verify it or write it
275 * into the csum field of the block. 275 * into the csum field of the block.
276 */ 276 */
277static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, 277static int csum_tree_block(struct btrfs_fs_info *fs_info,
278 struct extent_buffer *buf,
278 int verify) 279 int verify)
279{ 280{
280 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 281 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
281 char *result = NULL; 282 char *result = NULL;
282 unsigned long len; 283 unsigned long len;
283 unsigned long cur_len; 284 unsigned long cur_len;
@@ -302,7 +303,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
302 offset += cur_len; 303 offset += cur_len;
303 } 304 }
304 if (csum_size > sizeof(inline_result)) { 305 if (csum_size > sizeof(inline_result)) {
305 result = kzalloc(csum_size * sizeof(char), GFP_NOFS); 306 result = kzalloc(csum_size, GFP_NOFS);
306 if (!result) 307 if (!result)
307 return 1; 308 return 1;
308 } else { 309 } else {
@@ -321,7 +322,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
321 printk_ratelimited(KERN_WARNING 322 printk_ratelimited(KERN_WARNING
322 "BTRFS: %s checksum verify failed on %llu wanted %X found %X " 323 "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
323 "level %d\n", 324 "level %d\n",
324 root->fs_info->sb->s_id, buf->start, 325 fs_info->sb->s_id, buf->start,
325 val, found, btrfs_header_level(buf)); 326 val, found, btrfs_header_level(buf));
326 if (result != (char *)&inline_result) 327 if (result != (char *)&inline_result)
327 kfree(result); 328 kfree(result);
@@ -418,12 +419,6 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
418 419
419 if (memcmp(raw_disk_sb, result, csum_size)) 420 if (memcmp(raw_disk_sb, result, csum_size))
420 ret = 1; 421 ret = 1;
421
422 if (ret && btrfs_super_generation(disk_sb) < 10) {
423 printk(KERN_WARNING
424 "BTRFS: super block crcs don't match, older mkfs detected\n");
425 ret = 0;
426 }
427 } 422 }
428 423
429 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { 424 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
@@ -501,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
501 * we only fill in the checksum field in the first page of a multi-page block 496 * we only fill in the checksum field in the first page of a multi-page block
502 */ 497 */
503 498
504static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 499static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
505{ 500{
506 u64 start = page_offset(page); 501 u64 start = page_offset(page);
507 u64 found_start; 502 u64 found_start;
@@ -513,14 +508,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
513 found_start = btrfs_header_bytenr(eb); 508 found_start = btrfs_header_bytenr(eb);
514 if (WARN_ON(found_start != start || !PageUptodate(page))) 509 if (WARN_ON(found_start != start || !PageUptodate(page)))
515 return 0; 510 return 0;
516 csum_tree_block(root, eb, 0); 511 csum_tree_block(fs_info, eb, 0);
517 return 0; 512 return 0;
518} 513}
519 514
520static int check_tree_block_fsid(struct btrfs_root *root, 515static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
521 struct extent_buffer *eb) 516 struct extent_buffer *eb)
522{ 517{
523 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 518 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
524 u8 fsid[BTRFS_UUID_SIZE]; 519 u8 fsid[BTRFS_UUID_SIZE];
525 int ret = 1; 520 int ret = 1;
526 521
@@ -640,7 +635,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
640 ret = -EIO; 635 ret = -EIO;
641 goto err; 636 goto err;
642 } 637 }
643 if (check_tree_block_fsid(root, eb)) { 638 if (check_tree_block_fsid(root->fs_info, eb)) {
644 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n", 639 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
645 eb->fs_info->sb->s_id, eb->start); 640 eb->fs_info->sb->s_id, eb->start);
646 ret = -EIO; 641 ret = -EIO;
@@ -657,7 +652,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
657 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), 652 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
658 eb, found_level); 653 eb, found_level);
659 654
660 ret = csum_tree_block(root, eb, 1); 655 ret = csum_tree_block(root->fs_info, eb, 1);
661 if (ret) { 656 if (ret) {
662 ret = -EIO; 657 ret = -EIO;
663 goto err; 658 goto err;
@@ -882,7 +877,7 @@ static int btree_csum_one_bio(struct bio *bio)
882 877
883 bio_for_each_segment_all(bvec, bio, i) { 878 bio_for_each_segment_all(bvec, bio, i) {
884 root = BTRFS_I(bvec->bv_page->mapping->host)->root; 879 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
885 ret = csum_dirty_buffer(root, bvec->bv_page); 880 ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
886 if (ret) 881 if (ret)
887 break; 882 break;
888 } 883 }
@@ -1119,10 +1114,10 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
1119 return 0; 1114 return 0;
1120} 1115}
1121 1116
1122struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 1117struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
1123 u64 bytenr) 1118 u64 bytenr)
1124{ 1119{
1125 return find_extent_buffer(root->fs_info, bytenr); 1120 return find_extent_buffer(fs_info, bytenr);
1126} 1121}
1127 1122
1128struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 1123struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
@@ -1165,11 +1160,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1165 1160
1166} 1161}
1167 1162
1168void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1163void clean_tree_block(struct btrfs_trans_handle *trans,
1164 struct btrfs_fs_info *fs_info,
1169 struct extent_buffer *buf) 1165 struct extent_buffer *buf)
1170{ 1166{
1171 struct btrfs_fs_info *fs_info = root->fs_info;
1172
1173 if (btrfs_header_generation(buf) == 1167 if (btrfs_header_generation(buf) ==
1174 fs_info->running_transaction->transid) { 1168 fs_info->running_transaction->transid) {
1175 btrfs_assert_tree_locked(buf); 1169 btrfs_assert_tree_locked(buf);
@@ -2146,6 +2140,267 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
2146 } 2140 }
2147} 2141}
2148 2142
2143static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
2144{
2145 mutex_init(&fs_info->scrub_lock);
2146 atomic_set(&fs_info->scrubs_running, 0);
2147 atomic_set(&fs_info->scrub_pause_req, 0);
2148 atomic_set(&fs_info->scrubs_paused, 0);
2149 atomic_set(&fs_info->scrub_cancel_req, 0);
2150 init_waitqueue_head(&fs_info->scrub_pause_wait);
2151 fs_info->scrub_workers_refcnt = 0;
2152}
2153
2154static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
2155{
2156 spin_lock_init(&fs_info->balance_lock);
2157 mutex_init(&fs_info->balance_mutex);
2158 atomic_set(&fs_info->balance_running, 0);
2159 atomic_set(&fs_info->balance_pause_req, 0);
2160 atomic_set(&fs_info->balance_cancel_req, 0);
2161 fs_info->balance_ctl = NULL;
2162 init_waitqueue_head(&fs_info->balance_wait_q);
2163}
2164
2165static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info,
2166 struct btrfs_root *tree_root)
2167{
2168 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
2169 set_nlink(fs_info->btree_inode, 1);
2170 /*
2171 * we set the i_size on the btree inode to the max possible int.
2172 * the real end of the address space is determined by all of
2173 * the devices in the system
2174 */
2175 fs_info->btree_inode->i_size = OFFSET_MAX;
2176 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
2177
2178 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2179 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
2180 fs_info->btree_inode->i_mapping);
2181 BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
2182 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
2183
2184 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
2185
2186 BTRFS_I(fs_info->btree_inode)->root = tree_root;
2187 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
2188 sizeof(struct btrfs_key));
2189 set_bit(BTRFS_INODE_DUMMY,
2190 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2191 btrfs_insert_inode_hash(fs_info->btree_inode);
2192}
2193
2194static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
2195{
2196 fs_info->dev_replace.lock_owner = 0;
2197 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2198 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
2199 mutex_init(&fs_info->dev_replace.lock_management_lock);
2200 mutex_init(&fs_info->dev_replace.lock);
2201 init_waitqueue_head(&fs_info->replace_wait);
2202}
2203
2204static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
2205{
2206 spin_lock_init(&fs_info->qgroup_lock);
2207 mutex_init(&fs_info->qgroup_ioctl_lock);
2208 fs_info->qgroup_tree = RB_ROOT;
2209 fs_info->qgroup_op_tree = RB_ROOT;
2210 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2211 fs_info->qgroup_seq = 1;
2212 fs_info->quota_enabled = 0;
2213 fs_info->pending_quota_state = 0;
2214 fs_info->qgroup_ulist = NULL;
2215 mutex_init(&fs_info->qgroup_rescan_lock);
2216}
2217
2218static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
2219 struct btrfs_fs_devices *fs_devices)
2220{
2221 int max_active = fs_info->thread_pool_size;
2222 unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
2223
2224 fs_info->workers =
2225 btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
2226 max_active, 16);
2227
2228 fs_info->delalloc_workers =
2229 btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
2230
2231 fs_info->flush_workers =
2232 btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
2233
2234 fs_info->caching_workers =
2235 btrfs_alloc_workqueue("cache", flags, max_active, 0);
2236
2237 /*
2238 * a higher idle thresh on the submit workers makes it much more
2239 * likely that bios will be send down in a sane order to the
2240 * devices
2241 */
2242 fs_info->submit_workers =
2243 btrfs_alloc_workqueue("submit", flags,
2244 min_t(u64, fs_devices->num_devices,
2245 max_active), 64);
2246
2247 fs_info->fixup_workers =
2248 btrfs_alloc_workqueue("fixup", flags, 1, 0);
2249
2250 /*
2251 * endios are largely parallel and should have a very
2252 * low idle thresh
2253 */
2254 fs_info->endio_workers =
2255 btrfs_alloc_workqueue("endio", flags, max_active, 4);
2256 fs_info->endio_meta_workers =
2257 btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
2258 fs_info->endio_meta_write_workers =
2259 btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
2260 fs_info->endio_raid56_workers =
2261 btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
2262 fs_info->endio_repair_workers =
2263 btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
2264 fs_info->rmw_workers =
2265 btrfs_alloc_workqueue("rmw", flags, max_active, 2);
2266 fs_info->endio_write_workers =
2267 btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
2268 fs_info->endio_freespace_worker =
2269 btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
2270 fs_info->delayed_workers =
2271 btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
2272 fs_info->readahead_workers =
2273 btrfs_alloc_workqueue("readahead", flags, max_active, 2);
2274 fs_info->qgroup_rescan_workers =
2275 btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
2276 fs_info->extent_workers =
2277 btrfs_alloc_workqueue("extent-refs", flags,
2278 min_t(u64, fs_devices->num_devices,
2279 max_active), 8);
2280
2281 if (!(fs_info->workers && fs_info->delalloc_workers &&
2282 fs_info->submit_workers && fs_info->flush_workers &&
2283 fs_info->endio_workers && fs_info->endio_meta_workers &&
2284 fs_info->endio_meta_write_workers &&
2285 fs_info->endio_repair_workers &&
2286 fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
2287 fs_info->endio_freespace_worker && fs_info->rmw_workers &&
2288 fs_info->caching_workers && fs_info->readahead_workers &&
2289 fs_info->fixup_workers && fs_info->delayed_workers &&
2290 fs_info->extent_workers &&
2291 fs_info->qgroup_rescan_workers)) {
2292 return -ENOMEM;
2293 }
2294
2295 return 0;
2296}
2297
2298static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
2299 struct btrfs_fs_devices *fs_devices)
2300{
2301 int ret;
2302 struct btrfs_root *tree_root = fs_info->tree_root;
2303 struct btrfs_root *log_tree_root;
2304 struct btrfs_super_block *disk_super = fs_info->super_copy;
2305 u64 bytenr = btrfs_super_log_root(disk_super);
2306
2307 if (fs_devices->rw_devices == 0) {
2308 printk(KERN_WARNING "BTRFS: log replay required "
2309 "on RO media\n");
2310 return -EIO;
2311 }
2312
2313 log_tree_root = btrfs_alloc_root(fs_info);
2314 if (!log_tree_root)
2315 return -ENOMEM;
2316
2317 __setup_root(tree_root->nodesize, tree_root->sectorsize,
2318 tree_root->stripesize, log_tree_root, fs_info,
2319 BTRFS_TREE_LOG_OBJECTID);
2320
2321 log_tree_root->node = read_tree_block(tree_root, bytenr,
2322 fs_info->generation + 1);
2323 if (!log_tree_root->node ||
2324 !extent_buffer_uptodate(log_tree_root->node)) {
2325 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2326 free_extent_buffer(log_tree_root->node);
2327 kfree(log_tree_root);
2328 return -EIO;
2329 }
2330 /* returns with log_tree_root freed on success */
2331 ret = btrfs_recover_log_trees(log_tree_root);
2332 if (ret) {
2333 btrfs_error(tree_root->fs_info, ret,
2334 "Failed to recover log tree");
2335 free_extent_buffer(log_tree_root->node);
2336 kfree(log_tree_root);
2337 return ret;
2338 }
2339
2340 if (fs_info->sb->s_flags & MS_RDONLY) {
2341 ret = btrfs_commit_super(tree_root);
2342 if (ret)
2343 return ret;
2344 }
2345
2346 return 0;
2347}
2348
2349static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
2350 struct btrfs_root *tree_root)
2351{
2352 struct btrfs_root *root;
2353 struct btrfs_key location;
2354 int ret;
2355
2356 location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
2357 location.type = BTRFS_ROOT_ITEM_KEY;
2358 location.offset = 0;
2359
2360 root = btrfs_read_tree_root(tree_root, &location);
2361 if (IS_ERR(root))
2362 return PTR_ERR(root);
2363 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2364 fs_info->extent_root = root;
2365
2366 location.objectid = BTRFS_DEV_TREE_OBJECTID;
2367 root = btrfs_read_tree_root(tree_root, &location);
2368 if (IS_ERR(root))
2369 return PTR_ERR(root);
2370 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2371 fs_info->dev_root = root;
2372 btrfs_init_devices_late(fs_info);
2373
2374 location.objectid = BTRFS_CSUM_TREE_OBJECTID;
2375 root = btrfs_read_tree_root(tree_root, &location);
2376 if (IS_ERR(root))
2377 return PTR_ERR(root);
2378 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2379 fs_info->csum_root = root;
2380
2381 location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
2382 root = btrfs_read_tree_root(tree_root, &location);
2383 if (!IS_ERR(root)) {
2384 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2385 fs_info->quota_enabled = 1;
2386 fs_info->pending_quota_state = 1;
2387 fs_info->quota_root = root;
2388 }
2389
2390 location.objectid = BTRFS_UUID_TREE_OBJECTID;
2391 root = btrfs_read_tree_root(tree_root, &location);
2392 if (IS_ERR(root)) {
2393 ret = PTR_ERR(root);
2394 if (ret != -ENOENT)
2395 return ret;
2396 } else {
2397 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
2398 fs_info->uuid_root = root;
2399 }
2400
2401 return 0;
2402}
2403
2149int open_ctree(struct super_block *sb, 2404int open_ctree(struct super_block *sb,
2150 struct btrfs_fs_devices *fs_devices, 2405 struct btrfs_fs_devices *fs_devices,
2151 char *options) 2406 char *options)
@@ -2160,21 +2415,12 @@ int open_ctree(struct super_block *sb,
2160 struct btrfs_super_block *disk_super; 2415 struct btrfs_super_block *disk_super;
2161 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2416 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2162 struct btrfs_root *tree_root; 2417 struct btrfs_root *tree_root;
2163 struct btrfs_root *extent_root;
2164 struct btrfs_root *csum_root;
2165 struct btrfs_root *chunk_root; 2418 struct btrfs_root *chunk_root;
2166 struct btrfs_root *dev_root;
2167 struct btrfs_root *quota_root;
2168 struct btrfs_root *uuid_root;
2169 struct btrfs_root *log_tree_root;
2170 int ret; 2419 int ret;
2171 int err = -EINVAL; 2420 int err = -EINVAL;
2172 int num_backups_tried = 0; 2421 int num_backups_tried = 0;
2173 int backup_index = 0; 2422 int backup_index = 0;
2174 int max_active; 2423 int max_active;
2175 int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
2176 bool create_uuid_tree;
2177 bool check_uuid_tree;
2178 2424
2179 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); 2425 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
2180 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); 2426 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2241,11 +2487,12 @@ int open_ctree(struct super_block *sb,
2241 spin_lock_init(&fs_info->qgroup_op_lock); 2487 spin_lock_init(&fs_info->qgroup_op_lock);
2242 spin_lock_init(&fs_info->buffer_lock); 2488 spin_lock_init(&fs_info->buffer_lock);
2243 spin_lock_init(&fs_info->unused_bgs_lock); 2489 spin_lock_init(&fs_info->unused_bgs_lock);
2244 mutex_init(&fs_info->unused_bg_unpin_mutex);
2245 rwlock_init(&fs_info->tree_mod_log_lock); 2490 rwlock_init(&fs_info->tree_mod_log_lock);
2491 mutex_init(&fs_info->unused_bg_unpin_mutex);
2246 mutex_init(&fs_info->reloc_mutex); 2492 mutex_init(&fs_info->reloc_mutex);
2247 mutex_init(&fs_info->delalloc_root_mutex); 2493 mutex_init(&fs_info->delalloc_root_mutex);
2248 seqlock_init(&fs_info->profiles_lock); 2494 seqlock_init(&fs_info->profiles_lock);
2495 init_rwsem(&fs_info->delayed_iput_sem);
2249 2496
2250 init_completion(&fs_info->kobj_unregister); 2497 init_completion(&fs_info->kobj_unregister);
2251 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); 2498 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2276,7 +2523,7 @@ int open_ctree(struct super_block *sb,
2276 fs_info->free_chunk_space = 0; 2523 fs_info->free_chunk_space = 0;
2277 fs_info->tree_mod_log = RB_ROOT; 2524 fs_info->tree_mod_log = RB_ROOT;
2278 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 2525 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
2279 fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64); 2526 fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
2280 /* readahead state */ 2527 /* readahead state */
2281 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2528 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
2282 spin_lock_init(&fs_info->reada_lock); 2529 spin_lock_init(&fs_info->reada_lock);
@@ -2294,55 +2541,18 @@ int open_ctree(struct super_block *sb,
2294 } 2541 }
2295 btrfs_init_delayed_root(fs_info->delayed_root); 2542 btrfs_init_delayed_root(fs_info->delayed_root);
2296 2543
2297 mutex_init(&fs_info->scrub_lock); 2544 btrfs_init_scrub(fs_info);
2298 atomic_set(&fs_info->scrubs_running, 0);
2299 atomic_set(&fs_info->scrub_pause_req, 0);
2300 atomic_set(&fs_info->scrubs_paused, 0);
2301 atomic_set(&fs_info->scrub_cancel_req, 0);
2302 init_waitqueue_head(&fs_info->replace_wait);
2303 init_waitqueue_head(&fs_info->scrub_pause_wait);
2304 fs_info->scrub_workers_refcnt = 0;
2305#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 2545#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2306 fs_info->check_integrity_print_mask = 0; 2546 fs_info->check_integrity_print_mask = 0;
2307#endif 2547#endif
2308 2548 btrfs_init_balance(fs_info);
2309 spin_lock_init(&fs_info->balance_lock);
2310 mutex_init(&fs_info->balance_mutex);
2311 atomic_set(&fs_info->balance_running, 0);
2312 atomic_set(&fs_info->balance_pause_req, 0);
2313 atomic_set(&fs_info->balance_cancel_req, 0);
2314 fs_info->balance_ctl = NULL;
2315 init_waitqueue_head(&fs_info->balance_wait_q);
2316 btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work); 2549 btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
2317 2550
2318 sb->s_blocksize = 4096; 2551 sb->s_blocksize = 4096;
2319 sb->s_blocksize_bits = blksize_bits(4096); 2552 sb->s_blocksize_bits = blksize_bits(4096);
2320 sb->s_bdi = &fs_info->bdi; 2553 sb->s_bdi = &fs_info->bdi;
2321 2554
2322 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; 2555 btrfs_init_btree_inode(fs_info, tree_root);
2323 set_nlink(fs_info->btree_inode, 1);
2324 /*
2325 * we set the i_size on the btree inode to the max possible int.
2326 * the real end of the address space is determined by all of
2327 * the devices in the system
2328 */
2329 fs_info->btree_inode->i_size = OFFSET_MAX;
2330 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
2331
2332 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
2333 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
2334 fs_info->btree_inode->i_mapping);
2335 BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
2336 extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
2337
2338 BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
2339
2340 BTRFS_I(fs_info->btree_inode)->root = tree_root;
2341 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
2342 sizeof(struct btrfs_key));
2343 set_bit(BTRFS_INODE_DUMMY,
2344 &BTRFS_I(fs_info->btree_inode)->runtime_flags);
2345 btrfs_insert_inode_hash(fs_info->btree_inode);
2346 2556
2347 spin_lock_init(&fs_info->block_group_cache_lock); 2557 spin_lock_init(&fs_info->block_group_cache_lock);
2348 fs_info->block_group_cache_tree = RB_ROOT; 2558 fs_info->block_group_cache_tree = RB_ROOT;
@@ -2363,26 +2573,14 @@ int open_ctree(struct super_block *sb,
2363 mutex_init(&fs_info->transaction_kthread_mutex); 2573 mutex_init(&fs_info->transaction_kthread_mutex);
2364 mutex_init(&fs_info->cleaner_mutex); 2574 mutex_init(&fs_info->cleaner_mutex);
2365 mutex_init(&fs_info->volume_mutex); 2575 mutex_init(&fs_info->volume_mutex);
2576 mutex_init(&fs_info->ro_block_group_mutex);
2366 init_rwsem(&fs_info->commit_root_sem); 2577 init_rwsem(&fs_info->commit_root_sem);
2367 init_rwsem(&fs_info->cleanup_work_sem); 2578 init_rwsem(&fs_info->cleanup_work_sem);
2368 init_rwsem(&fs_info->subvol_sem); 2579 init_rwsem(&fs_info->subvol_sem);
2369 sema_init(&fs_info->uuid_tree_rescan_sem, 1); 2580 sema_init(&fs_info->uuid_tree_rescan_sem, 1);
2370 fs_info->dev_replace.lock_owner = 0;
2371 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2372 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
2373 mutex_init(&fs_info->dev_replace.lock_management_lock);
2374 mutex_init(&fs_info->dev_replace.lock);
2375 2581
2376 spin_lock_init(&fs_info->qgroup_lock); 2582 btrfs_init_dev_replace_locks(fs_info);
2377 mutex_init(&fs_info->qgroup_ioctl_lock); 2583 btrfs_init_qgroup(fs_info);
2378 fs_info->qgroup_tree = RB_ROOT;
2379 fs_info->qgroup_op_tree = RB_ROOT;
2380 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2381 fs_info->qgroup_seq = 1;
2382 fs_info->quota_enabled = 0;
2383 fs_info->pending_quota_state = 0;
2384 fs_info->qgroup_ulist = NULL;
2385 mutex_init(&fs_info->qgroup_rescan_lock);
2386 2584
2387 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 2585 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
2388 btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 2586 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2554,75 +2752,9 @@ int open_ctree(struct super_block *sb,
2554 2752
2555 max_active = fs_info->thread_pool_size; 2753 max_active = fs_info->thread_pool_size;
2556 2754
2557 fs_info->workers = 2755 ret = btrfs_init_workqueues(fs_info, fs_devices);
2558 btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, 2756 if (ret) {
2559 max_active, 16); 2757 err = ret;
2560
2561 fs_info->delalloc_workers =
2562 btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
2563
2564 fs_info->flush_workers =
2565 btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
2566
2567 fs_info->caching_workers =
2568 btrfs_alloc_workqueue("cache", flags, max_active, 0);
2569
2570 /*
2571 * a higher idle thresh on the submit workers makes it much more
2572 * likely that bios will be send down in a sane order to the
2573 * devices
2574 */
2575 fs_info->submit_workers =
2576 btrfs_alloc_workqueue("submit", flags,
2577 min_t(u64, fs_devices->num_devices,
2578 max_active), 64);
2579
2580 fs_info->fixup_workers =
2581 btrfs_alloc_workqueue("fixup", flags, 1, 0);
2582
2583 /*
2584 * endios are largely parallel and should have a very
2585 * low idle thresh
2586 */
2587 fs_info->endio_workers =
2588 btrfs_alloc_workqueue("endio", flags, max_active, 4);
2589 fs_info->endio_meta_workers =
2590 btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
2591 fs_info->endio_meta_write_workers =
2592 btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
2593 fs_info->endio_raid56_workers =
2594 btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
2595 fs_info->endio_repair_workers =
2596 btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
2597 fs_info->rmw_workers =
2598 btrfs_alloc_workqueue("rmw", flags, max_active, 2);
2599 fs_info->endio_write_workers =
2600 btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
2601 fs_info->endio_freespace_worker =
2602 btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
2603 fs_info->delayed_workers =
2604 btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
2605 fs_info->readahead_workers =
2606 btrfs_alloc_workqueue("readahead", flags, max_active, 2);
2607 fs_info->qgroup_rescan_workers =
2608 btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
2609 fs_info->extent_workers =
2610 btrfs_alloc_workqueue("extent-refs", flags,
2611 min_t(u64, fs_devices->num_devices,
2612 max_active), 8);
2613
2614 if (!(fs_info->workers && fs_info->delalloc_workers &&
2615 fs_info->submit_workers && fs_info->flush_workers &&
2616 fs_info->endio_workers && fs_info->endio_meta_workers &&
2617 fs_info->endio_meta_write_workers &&
2618 fs_info->endio_repair_workers &&
2619 fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
2620 fs_info->endio_freespace_worker && fs_info->rmw_workers &&
2621 fs_info->caching_workers && fs_info->readahead_workers &&
2622 fs_info->fixup_workers && fs_info->delayed_workers &&
2623 fs_info->extent_workers &&
2624 fs_info->qgroup_rescan_workers)) {
2625 err = -ENOMEM;
2626 goto fail_sb_buffer; 2758 goto fail_sb_buffer;
2627 } 2759 }
2628 2760
@@ -2688,7 +2820,7 @@ int open_ctree(struct super_block *sb,
2688 * keep the device that is marked to be the target device for the 2820 * keep the device that is marked to be the target device for the
2689 * dev_replace procedure 2821 * dev_replace procedure
2690 */ 2822 */
2691 btrfs_close_extra_devices(fs_info, fs_devices, 0); 2823 btrfs_close_extra_devices(fs_devices, 0);
2692 2824
2693 if (!fs_devices->latest_bdev) { 2825 if (!fs_devices->latest_bdev) {
2694 printk(KERN_ERR "BTRFS: failed to read devices on %s\n", 2826 printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
@@ -2714,61 +2846,9 @@ retry_root_backup:
2714 tree_root->commit_root = btrfs_root_node(tree_root); 2846 tree_root->commit_root = btrfs_root_node(tree_root);
2715 btrfs_set_root_refs(&tree_root->root_item, 1); 2847 btrfs_set_root_refs(&tree_root->root_item, 1);
2716 2848
2717 location.objectid = BTRFS_EXTENT_TREE_OBJECTID; 2849 ret = btrfs_read_roots(fs_info, tree_root);
2718 location.type = BTRFS_ROOT_ITEM_KEY; 2850 if (ret)
2719 location.offset = 0;
2720
2721 extent_root = btrfs_read_tree_root(tree_root, &location);
2722 if (IS_ERR(extent_root)) {
2723 ret = PTR_ERR(extent_root);
2724 goto recovery_tree_root;
2725 }
2726 set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state);
2727 fs_info->extent_root = extent_root;
2728
2729 location.objectid = BTRFS_DEV_TREE_OBJECTID;
2730 dev_root = btrfs_read_tree_root(tree_root, &location);
2731 if (IS_ERR(dev_root)) {
2732 ret = PTR_ERR(dev_root);
2733 goto recovery_tree_root;
2734 }
2735 set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state);
2736 fs_info->dev_root = dev_root;
2737 btrfs_init_devices_late(fs_info);
2738
2739 location.objectid = BTRFS_CSUM_TREE_OBJECTID;
2740 csum_root = btrfs_read_tree_root(tree_root, &location);
2741 if (IS_ERR(csum_root)) {
2742 ret = PTR_ERR(csum_root);
2743 goto recovery_tree_root; 2851 goto recovery_tree_root;
2744 }
2745 set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state);
2746 fs_info->csum_root = csum_root;
2747
2748 location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
2749 quota_root = btrfs_read_tree_root(tree_root, &location);
2750 if (!IS_ERR(quota_root)) {
2751 set_bit(BTRFS_ROOT_TRACK_DIRTY, &quota_root->state);
2752 fs_info->quota_enabled = 1;
2753 fs_info->pending_quota_state = 1;
2754 fs_info->quota_root = quota_root;
2755 }
2756
2757 location.objectid = BTRFS_UUID_TREE_OBJECTID;
2758 uuid_root = btrfs_read_tree_root(tree_root, &location);
2759 if (IS_ERR(uuid_root)) {
2760 ret = PTR_ERR(uuid_root);
2761 if (ret != -ENOENT)
2762 goto recovery_tree_root;
2763 create_uuid_tree = true;
2764 check_uuid_tree = false;
2765 } else {
2766 set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state);
2767 fs_info->uuid_root = uuid_root;
2768 create_uuid_tree = false;
2769 check_uuid_tree =
2770 generation != btrfs_super_uuid_tree_generation(disk_super);
2771 }
2772 2852
2773 fs_info->generation = generation; 2853 fs_info->generation = generation;
2774 fs_info->last_trans_committed = generation; 2854 fs_info->last_trans_committed = generation;
@@ -2792,7 +2872,7 @@ retry_root_backup:
2792 goto fail_block_groups; 2872 goto fail_block_groups;
2793 } 2873 }
2794 2874
2795 btrfs_close_extra_devices(fs_info, fs_devices, 1); 2875 btrfs_close_extra_devices(fs_devices, 1);
2796 2876
2797 ret = btrfs_sysfs_add_one(fs_info); 2877 ret = btrfs_sysfs_add_one(fs_info);
2798 if (ret) { 2878 if (ret) {
@@ -2806,7 +2886,7 @@ retry_root_backup:
2806 goto fail_sysfs; 2886 goto fail_sysfs;
2807 } 2887 }
2808 2888
2809 ret = btrfs_read_block_groups(extent_root); 2889 ret = btrfs_read_block_groups(fs_info->extent_root);
2810 if (ret) { 2890 if (ret) {
2811 printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret); 2891 printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
2812 goto fail_sysfs; 2892 goto fail_sysfs;
@@ -2864,48 +2944,11 @@ retry_root_backup:
2864 2944
2865 /* do not make disk changes in broken FS */ 2945 /* do not make disk changes in broken FS */
2866 if (btrfs_super_log_root(disk_super) != 0) { 2946 if (btrfs_super_log_root(disk_super) != 0) {
2867 u64 bytenr = btrfs_super_log_root(disk_super); 2947 ret = btrfs_replay_log(fs_info, fs_devices);
2868
2869 if (fs_devices->rw_devices == 0) {
2870 printk(KERN_WARNING "BTRFS: log replay required "
2871 "on RO media\n");
2872 err = -EIO;
2873 goto fail_qgroup;
2874 }
2875
2876 log_tree_root = btrfs_alloc_root(fs_info);
2877 if (!log_tree_root) {
2878 err = -ENOMEM;
2879 goto fail_qgroup;
2880 }
2881
2882 __setup_root(nodesize, sectorsize, stripesize,
2883 log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
2884
2885 log_tree_root->node = read_tree_block(tree_root, bytenr,
2886 generation + 1);
2887 if (!log_tree_root->node ||
2888 !extent_buffer_uptodate(log_tree_root->node)) {
2889 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2890 free_extent_buffer(log_tree_root->node);
2891 kfree(log_tree_root);
2892 goto fail_qgroup;
2893 }
2894 /* returns with log_tree_root freed on success */
2895 ret = btrfs_recover_log_trees(log_tree_root);
2896 if (ret) { 2948 if (ret) {
2897 btrfs_error(tree_root->fs_info, ret, 2949 err = ret;
2898 "Failed to recover log tree");
2899 free_extent_buffer(log_tree_root->node);
2900 kfree(log_tree_root);
2901 goto fail_qgroup; 2950 goto fail_qgroup;
2902 } 2951 }
2903
2904 if (sb->s_flags & MS_RDONLY) {
2905 ret = btrfs_commit_super(tree_root);
2906 if (ret)
2907 goto fail_qgroup;
2908 }
2909 } 2952 }
2910 2953
2911 ret = btrfs_find_orphan_roots(tree_root); 2954 ret = btrfs_find_orphan_roots(tree_root);
@@ -2966,7 +3009,7 @@ retry_root_backup:
2966 3009
2967 btrfs_qgroup_rescan_resume(fs_info); 3010 btrfs_qgroup_rescan_resume(fs_info);
2968 3011
2969 if (create_uuid_tree) { 3012 if (!fs_info->uuid_root) {
2970 pr_info("BTRFS: creating UUID tree\n"); 3013 pr_info("BTRFS: creating UUID tree\n");
2971 ret = btrfs_create_uuid_tree(fs_info); 3014 ret = btrfs_create_uuid_tree(fs_info);
2972 if (ret) { 3015 if (ret) {
@@ -2975,8 +3018,9 @@ retry_root_backup:
2975 close_ctree(tree_root); 3018 close_ctree(tree_root);
2976 return ret; 3019 return ret;
2977 } 3020 }
2978 } else if (check_uuid_tree || 3021 } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
2979 btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { 3022 fs_info->generation !=
3023 btrfs_super_uuid_tree_generation(disk_super)) {
2980 pr_info("BTRFS: checking UUID tree\n"); 3024 pr_info("BTRFS: checking UUID tree\n");
2981 ret = btrfs_check_uuid_tree(fs_info); 3025 ret = btrfs_check_uuid_tree(fs_info);
2982 if (ret) { 3026 if (ret) {
@@ -3668,7 +3712,7 @@ void close_ctree(struct btrfs_root *root)
3668 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3712 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3669 ret = btrfs_commit_super(root); 3713 ret = btrfs_commit_super(root);
3670 if (ret) 3714 if (ret)
3671 btrfs_err(root->fs_info, "commit super ret %d", ret); 3715 btrfs_err(fs_info, "commit super ret %d", ret);
3672 } 3716 }
3673 3717
3674 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) 3718 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
@@ -3680,10 +3724,10 @@ void close_ctree(struct btrfs_root *root)
3680 fs_info->closing = 2; 3724 fs_info->closing = 2;
3681 smp_mb(); 3725 smp_mb();
3682 3726
3683 btrfs_free_qgroup_config(root->fs_info); 3727 btrfs_free_qgroup_config(fs_info);
3684 3728
3685 if (percpu_counter_sum(&fs_info->delalloc_bytes)) { 3729 if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
3686 btrfs_info(root->fs_info, "at unmount delalloc count %lld", 3730 btrfs_info(fs_info, "at unmount delalloc count %lld",
3687 percpu_counter_sum(&fs_info->delalloc_bytes)); 3731 percpu_counter_sum(&fs_info->delalloc_bytes));
3688 } 3732 }
3689 3733
@@ -3723,7 +3767,7 @@ void close_ctree(struct btrfs_root *root)
3723 3767
3724 btrfs_free_stripe_hash_table(fs_info); 3768 btrfs_free_stripe_hash_table(fs_info);
3725 3769
3726 btrfs_free_block_rsv(root, root->orphan_block_rsv); 3770 __btrfs_free_block_rsv(root->orphan_block_rsv);
3727 root->orphan_block_rsv = NULL; 3771 root->orphan_block_rsv = NULL;
3728 3772
3729 lock_chunks(root); 3773 lock_chunks(root);
@@ -4134,7 +4178,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
4134 4178
4135 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); 4179 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
4136 while (start <= end) { 4180 while (start <= end) {
4137 eb = btrfs_find_tree_block(root, start); 4181 eb = btrfs_find_tree_block(root->fs_info, start);
4138 start += root->nodesize; 4182 start += root->nodesize;
4139 if (!eb) 4183 if (!eb)
4140 continue; 4184 continue;
@@ -4285,7 +4329,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
4285 return 0; 4329 return 0;
4286} 4330}
4287 4331
4288static struct extent_io_ops btree_extent_io_ops = { 4332static const struct extent_io_ops btree_extent_io_ops = {
4289 .readpage_end_io_hook = btree_readpage_end_io_hook, 4333 .readpage_end_io_hook = btree_readpage_end_io_hook,
4290 .readpage_io_failed_hook = btree_io_failed_hook, 4334 .readpage_io_failed_hook = btree_io_failed_hook,
4291 .submit_bio_hook = btree_submit_bio_hook, 4335 .submit_bio_hook = btree_submit_bio_hook,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 27d44c0fd236..d4cbfeeeedd4 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,7 +52,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
52struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 52struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
53 u64 bytenr); 53 u64 bytenr);
54void clean_tree_block(struct btrfs_trans_handle *trans, 54void clean_tree_block(struct btrfs_trans_handle *trans,
55 struct btrfs_root *root, struct extent_buffer *buf); 55 struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
56int open_ctree(struct super_block *sb, 56int open_ctree(struct super_block *sb,
57 struct btrfs_fs_devices *fs_devices, 57 struct btrfs_fs_devices *fs_devices,
58 char *options); 58 char *options);
@@ -61,7 +61,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root, int max_mirrors); 61 struct btrfs_root *root, int max_mirrors);
62struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 62struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
63int btrfs_commit_super(struct btrfs_root *root); 63int btrfs_commit_super(struct btrfs_root *root);
64struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 64struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
65 u64 bytenr); 65 u64 bytenr);
66struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, 66struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
67 struct btrfs_key *location); 67 struct btrfs_key *location);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8b353ad02f03..1eef4ee01d1a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2538,6 +2538,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2538 * list before we release it. 2538 * list before we release it.
2539 */ 2539 */
2540 if (btrfs_delayed_ref_is_head(ref)) { 2540 if (btrfs_delayed_ref_is_head(ref)) {
2541 if (locked_ref->is_data &&
2542 locked_ref->total_ref_mod < 0) {
2543 spin_lock(&delayed_refs->lock);
2544 delayed_refs->pending_csums -= ref->num_bytes;
2545 spin_unlock(&delayed_refs->lock);
2546 }
2541 btrfs_delayed_ref_unlock(locked_ref); 2547 btrfs_delayed_ref_unlock(locked_ref);
2542 locked_ref = NULL; 2548 locked_ref = NULL;
2543 } 2549 }
@@ -2561,8 +2567,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2561 */ 2567 */
2562 spin_lock(&delayed_refs->lock); 2568 spin_lock(&delayed_refs->lock);
2563 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime; 2569 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2564 avg = div64_u64(avg, 4); 2570 fs_info->avg_delayed_ref_runtime = avg >> 2; /* div by 4 */
2565 fs_info->avg_delayed_ref_runtime = avg;
2566 spin_unlock(&delayed_refs->lock); 2571 spin_unlock(&delayed_refs->lock);
2567 } 2572 }
2568 return 0; 2573 return 0;
@@ -2624,7 +2629,26 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2624 * We don't ever fill up leaves all the way so multiply by 2 just to be 2629 * We don't ever fill up leaves all the way so multiply by 2 just to be
2625 * closer to what we're really going to want to ouse. 2630 * closer to what we're really going to want to ouse.
2626 */ 2631 */
2627 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); 2632 return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2633}
2634
2635/*
2636 * Takes the number of bytes to be csumm'ed and figures out how many leaves it
2637 * would require to store the csums for that many bytes.
2638 */
2639u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
2640{
2641 u64 csum_size;
2642 u64 num_csums_per_leaf;
2643 u64 num_csums;
2644
2645 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
2646 num_csums_per_leaf = div64_u64(csum_size,
2647 (u64)btrfs_super_csum_size(root->fs_info->super_copy));
2648 num_csums = div64_u64(csum_bytes, root->sectorsize);
2649 num_csums += num_csums_per_leaf - 1;
2650 num_csums = div64_u64(num_csums, num_csums_per_leaf);
2651 return num_csums;
2628} 2652}
2629 2653
2630int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, 2654int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2632,7 +2656,9 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2632{ 2656{
2633 struct btrfs_block_rsv *global_rsv; 2657 struct btrfs_block_rsv *global_rsv;
2634 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; 2658 u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
2635 u64 num_bytes; 2659 u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
2660 u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
2661 u64 num_bytes, num_dirty_bgs_bytes;
2636 int ret = 0; 2662 int ret = 0;
2637 2663
2638 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 2664 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
@@ -2640,17 +2666,22 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2640 if (num_heads > 1) 2666 if (num_heads > 1)
2641 num_bytes += (num_heads - 1) * root->nodesize; 2667 num_bytes += (num_heads - 1) * root->nodesize;
2642 num_bytes <<= 1; 2668 num_bytes <<= 1;
2669 num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
2670 num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
2671 num_dirty_bgs);
2643 global_rsv = &root->fs_info->global_block_rsv; 2672 global_rsv = &root->fs_info->global_block_rsv;
2644 2673
2645 /* 2674 /*
2646 * If we can't allocate any more chunks lets make sure we have _lots_ of 2675 * If we can't allocate any more chunks lets make sure we have _lots_ of
2647 * wiggle room since running delayed refs can create more delayed refs. 2676 * wiggle room since running delayed refs can create more delayed refs.
2648 */ 2677 */
2649 if (global_rsv->space_info->full) 2678 if (global_rsv->space_info->full) {
2679 num_dirty_bgs_bytes <<= 1;
2650 num_bytes <<= 1; 2680 num_bytes <<= 1;
2681 }
2651 2682
2652 spin_lock(&global_rsv->lock); 2683 spin_lock(&global_rsv->lock);
2653 if (global_rsv->reserved <= num_bytes) 2684 if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
2654 ret = 1; 2685 ret = 1;
2655 spin_unlock(&global_rsv->lock); 2686 spin_unlock(&global_rsv->lock);
2656 return ret; 2687 return ret;
@@ -3193,7 +3224,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3193 struct inode *inode = NULL; 3224 struct inode *inode = NULL;
3194 u64 alloc_hint = 0; 3225 u64 alloc_hint = 0;
3195 int dcs = BTRFS_DC_ERROR; 3226 int dcs = BTRFS_DC_ERROR;
3196 int num_pages = 0; 3227 u64 num_pages = 0;
3197 int retries = 0; 3228 int retries = 0;
3198 int ret = 0; 3229 int ret = 0;
3199 3230
@@ -3267,7 +3298,7 @@ again:
3267 if (ret) 3298 if (ret)
3268 goto out_put; 3299 goto out_put;
3269 3300
3270 ret = btrfs_truncate_free_space_cache(root, trans, inode); 3301 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
3271 if (ret) 3302 if (ret)
3272 goto out_put; 3303 goto out_put;
3273 } 3304 }
@@ -3293,14 +3324,14 @@ again:
3293 * taking up quite a bit since it's not folded into the other space 3324 * taking up quite a bit since it's not folded into the other space
3294 * cache. 3325 * cache.
3295 */ 3326 */
3296 num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); 3327 num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024);
3297 if (!num_pages) 3328 if (!num_pages)
3298 num_pages = 1; 3329 num_pages = 1;
3299 3330
3300 num_pages *= 16; 3331 num_pages *= 16;
3301 num_pages *= PAGE_CACHE_SIZE; 3332 num_pages *= PAGE_CACHE_SIZE;
3302 3333
3303 ret = btrfs_check_data_free_space(inode, num_pages); 3334 ret = btrfs_check_data_free_space(inode, num_pages, num_pages);
3304 if (ret) 3335 if (ret)
3305 goto out_put; 3336 goto out_put;
3306 3337
@@ -3351,16 +3382,156 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3351 return 0; 3382 return 0;
3352} 3383}
3353 3384
3354int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3385/*
3386 * transaction commit does final block group cache writeback during a
3387 * critical section where nothing is allowed to change the FS. This is
3388 * required in order for the cache to actually match the block group,
3389 * but can introduce a lot of latency into the commit.
3390 *
3391 * So, btrfs_start_dirty_block_groups is here to kick off block group
3392 * cache IO. There's a chance we'll have to redo some of it if the
3393 * block group changes again during the commit, but it greatly reduces
3394 * the commit latency by getting rid of the easy block groups while
3395 * we're still allowing others to join the commit.
3396 */
3397int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
3355 struct btrfs_root *root) 3398 struct btrfs_root *root)
3356{ 3399{
3357 struct btrfs_block_group_cache *cache; 3400 struct btrfs_block_group_cache *cache;
3358 struct btrfs_transaction *cur_trans = trans->transaction; 3401 struct btrfs_transaction *cur_trans = trans->transaction;
3359 int ret = 0; 3402 int ret = 0;
3360 struct btrfs_path *path; 3403 int should_put;
3404 struct btrfs_path *path = NULL;
3405 LIST_HEAD(dirty);
3406 struct list_head *io = &cur_trans->io_bgs;
3407 int num_started = 0;
3408 int loops = 0;
3409
3410 spin_lock(&cur_trans->dirty_bgs_lock);
3411 if (!list_empty(&cur_trans->dirty_bgs)) {
3412 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3413 }
3414 spin_unlock(&cur_trans->dirty_bgs_lock);
3361 3415
3362 if (list_empty(&cur_trans->dirty_bgs)) 3416again:
3417 if (list_empty(&dirty)) {
3418 btrfs_free_path(path);
3363 return 0; 3419 return 0;
3420 }
3421
3422 /*
3423 * make sure all the block groups on our dirty list actually
3424 * exist
3425 */
3426 btrfs_create_pending_block_groups(trans, root);
3427
3428 if (!path) {
3429 path = btrfs_alloc_path();
3430 if (!path)
3431 return -ENOMEM;
3432 }
3433
3434 while (!list_empty(&dirty)) {
3435 cache = list_first_entry(&dirty,
3436 struct btrfs_block_group_cache,
3437 dirty_list);
3438
3439 /*
3440 * cache_write_mutex is here only to save us from balance
3441 * deleting this block group while we are writing out the
3442 * cache
3443 */
3444 mutex_lock(&trans->transaction->cache_write_mutex);
3445
3446 /*
3447 * this can happen if something re-dirties a block
3448 * group that is already under IO. Just wait for it to
3449 * finish and then do it all again
3450 */
3451 if (!list_empty(&cache->io_list)) {
3452 list_del_init(&cache->io_list);
3453 btrfs_wait_cache_io(root, trans, cache,
3454 &cache->io_ctl, path,
3455 cache->key.objectid);
3456 btrfs_put_block_group(cache);
3457 }
3458
3459
3460 /*
3461 * btrfs_wait_cache_io uses the cache->dirty_list to decide
3462 * if it should update the cache_state. Don't delete
3463 * until after we wait.
3464 *
3465 * Since we're not running in the commit critical section
3466 * we need the dirty_bgs_lock to protect from update_block_group
3467 */
3468 spin_lock(&cur_trans->dirty_bgs_lock);
3469 list_del_init(&cache->dirty_list);
3470 spin_unlock(&cur_trans->dirty_bgs_lock);
3471
3472 should_put = 1;
3473
3474 cache_save_setup(cache, trans, path);
3475
3476 if (cache->disk_cache_state == BTRFS_DC_SETUP) {
3477 cache->io_ctl.inode = NULL;
3478 ret = btrfs_write_out_cache(root, trans, cache, path);
3479 if (ret == 0 && cache->io_ctl.inode) {
3480 num_started++;
3481 should_put = 0;
3482
3483 /*
3484 * the cache_write_mutex is protecting
3485 * the io_list
3486 */
3487 list_add_tail(&cache->io_list, io);
3488 } else {
3489 /*
3490 * if we failed to write the cache, the
3491 * generation will be bad and life goes on
3492 */
3493 ret = 0;
3494 }
3495 }
3496 if (!ret)
3497 ret = write_one_cache_group(trans, root, path, cache);
3498 mutex_unlock(&trans->transaction->cache_write_mutex);
3499
3500 /* if its not on the io list, we need to put the block group */
3501 if (should_put)
3502 btrfs_put_block_group(cache);
3503
3504 if (ret)
3505 break;
3506 }
3507
3508 /*
3509 * go through delayed refs for all the stuff we've just kicked off
3510 * and then loop back (just once)
3511 */
3512 ret = btrfs_run_delayed_refs(trans, root, 0);
3513 if (!ret && loops == 0) {
3514 loops++;
3515 spin_lock(&cur_trans->dirty_bgs_lock);
3516 list_splice_init(&cur_trans->dirty_bgs, &dirty);
3517 spin_unlock(&cur_trans->dirty_bgs_lock);
3518 goto again;
3519 }
3520
3521 btrfs_free_path(path);
3522 return ret;
3523}
3524
3525int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3526 struct btrfs_root *root)
3527{
3528 struct btrfs_block_group_cache *cache;
3529 struct btrfs_transaction *cur_trans = trans->transaction;
3530 int ret = 0;
3531 int should_put;
3532 struct btrfs_path *path;
3533 struct list_head *io = &cur_trans->io_bgs;
3534 int num_started = 0;
3364 3535
3365 path = btrfs_alloc_path(); 3536 path = btrfs_alloc_path();
3366 if (!path) 3537 if (!path)
@@ -3376,16 +3547,61 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3376 cache = list_first_entry(&cur_trans->dirty_bgs, 3547 cache = list_first_entry(&cur_trans->dirty_bgs,
3377 struct btrfs_block_group_cache, 3548 struct btrfs_block_group_cache,
3378 dirty_list); 3549 dirty_list);
3550
3551 /*
3552 * this can happen if cache_save_setup re-dirties a block
3553 * group that is already under IO. Just wait for it to
3554 * finish and then do it all again
3555 */
3556 if (!list_empty(&cache->io_list)) {
3557 list_del_init(&cache->io_list);
3558 btrfs_wait_cache_io(root, trans, cache,
3559 &cache->io_ctl, path,
3560 cache->key.objectid);
3561 btrfs_put_block_group(cache);
3562 }
3563
3564 /*
3565 * don't remove from the dirty list until after we've waited
3566 * on any pending IO
3567 */
3379 list_del_init(&cache->dirty_list); 3568 list_del_init(&cache->dirty_list);
3380 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 3569 should_put = 1;
3381 cache_save_setup(cache, trans, path); 3570
3571 cache_save_setup(cache, trans, path);
3572
3382 if (!ret) 3573 if (!ret)
3383 ret = btrfs_run_delayed_refs(trans, root, 3574 ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
3384 (unsigned long) -1); 3575
3385 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) 3576 if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
3386 btrfs_write_out_cache(root, trans, cache, path); 3577 cache->io_ctl.inode = NULL;
3578 ret = btrfs_write_out_cache(root, trans, cache, path);
3579 if (ret == 0 && cache->io_ctl.inode) {
3580 num_started++;
3581 should_put = 0;
3582 list_add_tail(&cache->io_list, io);
3583 } else {
3584 /*
3585 * if we failed to write the cache, the
3586 * generation will be bad and life goes on
3587 */
3588 ret = 0;
3589 }
3590 }
3387 if (!ret) 3591 if (!ret)
3388 ret = write_one_cache_group(trans, root, path, cache); 3592 ret = write_one_cache_group(trans, root, path, cache);
3593
3594 /* if its not on the io list, we need to put the block group */
3595 if (should_put)
3596 btrfs_put_block_group(cache);
3597 }
3598
3599 while (!list_empty(io)) {
3600 cache = list_first_entry(io, struct btrfs_block_group_cache,
3601 io_list);
3602 list_del_init(&cache->io_list);
3603 btrfs_wait_cache_io(root, trans, cache,
3604 &cache->io_ctl, path, cache->key.objectid);
3389 btrfs_put_block_group(cache); 3605 btrfs_put_block_group(cache);
3390 } 3606 }
3391 3607
@@ -3635,19 +3851,21 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
3635 * This will check the space that the inode allocates from to make sure we have 3851 * This will check the space that the inode allocates from to make sure we have
3636 * enough space for bytes. 3852 * enough space for bytes.
3637 */ 3853 */
3638int btrfs_check_data_free_space(struct inode *inode, u64 bytes) 3854int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
3639{ 3855{
3640 struct btrfs_space_info *data_sinfo; 3856 struct btrfs_space_info *data_sinfo;
3641 struct btrfs_root *root = BTRFS_I(inode)->root; 3857 struct btrfs_root *root = BTRFS_I(inode)->root;
3642 struct btrfs_fs_info *fs_info = root->fs_info; 3858 struct btrfs_fs_info *fs_info = root->fs_info;
3643 u64 used; 3859 u64 used;
3644 int ret = 0, committed = 0, alloc_chunk = 1; 3860 int ret = 0;
3861 int need_commit = 2;
3862 int have_pinned_space;
3645 3863
3646 /* make sure bytes are sectorsize aligned */ 3864 /* make sure bytes are sectorsize aligned */
3647 bytes = ALIGN(bytes, root->sectorsize); 3865 bytes = ALIGN(bytes, root->sectorsize);
3648 3866
3649 if (btrfs_is_free_space_inode(inode)) { 3867 if (btrfs_is_free_space_inode(inode)) {
3650 committed = 1; 3868 need_commit = 0;
3651 ASSERT(current->journal_info); 3869 ASSERT(current->journal_info);
3652 } 3870 }
3653 3871
@@ -3669,7 +3887,7 @@ again:
3669 * if we don't have enough free bytes in this space then we need 3887 * if we don't have enough free bytes in this space then we need
3670 * to alloc a new chunk. 3888 * to alloc a new chunk.
3671 */ 3889 */
3672 if (!data_sinfo->full && alloc_chunk) { 3890 if (!data_sinfo->full) {
3673 u64 alloc_target; 3891 u64 alloc_target;
3674 3892
3675 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; 3893 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
@@ -3697,8 +3915,10 @@ alloc:
3697 if (ret < 0) { 3915 if (ret < 0) {
3698 if (ret != -ENOSPC) 3916 if (ret != -ENOSPC)
3699 return ret; 3917 return ret;
3700 else 3918 else {
3919 have_pinned_space = 1;
3701 goto commit_trans; 3920 goto commit_trans;
3921 }
3702 } 3922 }
3703 3923
3704 if (!data_sinfo) 3924 if (!data_sinfo)
@@ -3709,26 +3929,39 @@ alloc:
3709 3929
3710 /* 3930 /*
3711 * If we don't have enough pinned space to deal with this 3931 * If we don't have enough pinned space to deal with this
3712 * allocation don't bother committing the transaction. 3932 * allocation, and no removed chunk in current transaction,
3933 * don't bother committing the transaction.
3713 */ 3934 */
3714 if (percpu_counter_compare(&data_sinfo->total_bytes_pinned, 3935 have_pinned_space = percpu_counter_compare(
3715 bytes) < 0) 3936 &data_sinfo->total_bytes_pinned,
3716 committed = 1; 3937 used + bytes - data_sinfo->total_bytes);
3717 spin_unlock(&data_sinfo->lock); 3938 spin_unlock(&data_sinfo->lock);
3718 3939
3719 /* commit the current transaction and try again */ 3940 /* commit the current transaction and try again */
3720commit_trans: 3941commit_trans:
3721 if (!committed && 3942 if (need_commit &&
3722 !atomic_read(&root->fs_info->open_ioctl_trans)) { 3943 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3723 committed = 1; 3944 need_commit--;
3724 3945
3725 trans = btrfs_join_transaction(root); 3946 trans = btrfs_join_transaction(root);
3726 if (IS_ERR(trans)) 3947 if (IS_ERR(trans))
3727 return PTR_ERR(trans); 3948 return PTR_ERR(trans);
3728 ret = btrfs_commit_transaction(trans, root); 3949 if (have_pinned_space >= 0 ||
3729 if (ret) 3950 trans->transaction->have_free_bgs ||
3730 return ret; 3951 need_commit > 0) {
3731 goto again; 3952 ret = btrfs_commit_transaction(trans, root);
3953 if (ret)
3954 return ret;
3955 /*
3956 * make sure that all running delayed iput are
3957 * done
3958 */
3959 down_write(&root->fs_info->delayed_iput_sem);
3960 up_write(&root->fs_info->delayed_iput_sem);
3961 goto again;
3962 } else {
3963 btrfs_end_transaction(trans, root);
3964 }
3732 } 3965 }
3733 3966
3734 trace_btrfs_space_reservation(root->fs_info, 3967 trace_btrfs_space_reservation(root->fs_info,
@@ -3736,12 +3969,16 @@ commit_trans:
3736 data_sinfo->flags, bytes, 1); 3969 data_sinfo->flags, bytes, 1);
3737 return -ENOSPC; 3970 return -ENOSPC;
3738 } 3971 }
3972 ret = btrfs_qgroup_reserve(root, write_bytes);
3973 if (ret)
3974 goto out;
3739 data_sinfo->bytes_may_use += bytes; 3975 data_sinfo->bytes_may_use += bytes;
3740 trace_btrfs_space_reservation(root->fs_info, "space_info", 3976 trace_btrfs_space_reservation(root->fs_info, "space_info",
3741 data_sinfo->flags, bytes, 1); 3977 data_sinfo->flags, bytes, 1);
3978out:
3742 spin_unlock(&data_sinfo->lock); 3979 spin_unlock(&data_sinfo->lock);
3743 3980
3744 return 0; 3981 return ret;
3745} 3982}
3746 3983
3747/* 3984/*
@@ -4298,8 +4535,13 @@ out:
4298static inline int need_do_async_reclaim(struct btrfs_space_info *space_info, 4535static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
4299 struct btrfs_fs_info *fs_info, u64 used) 4536 struct btrfs_fs_info *fs_info, u64 used)
4300{ 4537{
4301 return (used >= div_factor_fine(space_info->total_bytes, 98) && 4538 u64 thresh = div_factor_fine(space_info->total_bytes, 98);
4302 !btrfs_fs_closing(fs_info) && 4539
4540 /* If we're just plain full then async reclaim just slows us down. */
4541 if (space_info->bytes_used >= thresh)
4542 return 0;
4543
4544 return (used >= thresh && !btrfs_fs_closing(fs_info) &&
4303 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); 4545 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
4304} 4546}
4305 4547
@@ -4354,10 +4596,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
4354 if (!btrfs_need_do_async_reclaim(space_info, fs_info, 4596 if (!btrfs_need_do_async_reclaim(space_info, fs_info,
4355 flush_state)) 4597 flush_state))
4356 return; 4598 return;
4357 } while (flush_state <= COMMIT_TRANS); 4599 } while (flush_state < COMMIT_TRANS);
4358
4359 if (btrfs_need_do_async_reclaim(space_info, fs_info, flush_state))
4360 queue_work(system_unbound_wq, work);
4361} 4600}
4362 4601
4363void btrfs_init_async_reclaim_work(struct work_struct *work) 4602void btrfs_init_async_reclaim_work(struct work_struct *work)
@@ -4700,6 +4939,11 @@ void btrfs_free_block_rsv(struct btrfs_root *root,
4700 kfree(rsv); 4939 kfree(rsv);
4701} 4940}
4702 4941
4942void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
4943{
4944 kfree(rsv);
4945}
4946
4703int btrfs_block_rsv_add(struct btrfs_root *root, 4947int btrfs_block_rsv_add(struct btrfs_root *root,
4704 struct btrfs_block_rsv *block_rsv, u64 num_bytes, 4948 struct btrfs_block_rsv *block_rsv, u64 num_bytes,
4705 enum btrfs_reserve_flush_enum flush) 4949 enum btrfs_reserve_flush_enum flush)
@@ -4812,10 +5056,10 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
4812 5056
4813 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * 5057 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
4814 csum_size * 2; 5058 csum_size * 2;
4815 num_bytes += div64_u64(data_used + meta_used, 50); 5059 num_bytes += div_u64(data_used + meta_used, 50);
4816 5060
4817 if (num_bytes * 3 > meta_used) 5061 if (num_bytes * 3 > meta_used)
4818 num_bytes = div64_u64(meta_used, 3); 5062 num_bytes = div_u64(meta_used, 3);
4819 5063
4820 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); 5064 return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
4821} 5065}
@@ -4998,8 +5242,6 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
4998 u64 qgroup_reserved) 5242 u64 qgroup_reserved)
4999{ 5243{
5000 btrfs_block_rsv_release(root, rsv, (u64)-1); 5244 btrfs_block_rsv_release(root, rsv, (u64)-1);
5001 if (qgroup_reserved)
5002 btrfs_qgroup_free(root, qgroup_reserved);
5003} 5245}
5004 5246
5005/** 5247/**
@@ -5066,30 +5308,18 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
5066 int reserve) 5308 int reserve)
5067{ 5309{
5068 struct btrfs_root *root = BTRFS_I(inode)->root; 5310 struct btrfs_root *root = BTRFS_I(inode)->root;
5069 u64 csum_size; 5311 u64 old_csums, num_csums;
5070 int num_csums_per_leaf;
5071 int num_csums;
5072 int old_csums;
5073 5312
5074 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM && 5313 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
5075 BTRFS_I(inode)->csum_bytes == 0) 5314 BTRFS_I(inode)->csum_bytes == 0)
5076 return 0; 5315 return 0;
5077 5316
5078 old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); 5317 old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5079 if (reserve) 5318 if (reserve)
5080 BTRFS_I(inode)->csum_bytes += num_bytes; 5319 BTRFS_I(inode)->csum_bytes += num_bytes;
5081 else 5320 else
5082 BTRFS_I(inode)->csum_bytes -= num_bytes; 5321 BTRFS_I(inode)->csum_bytes -= num_bytes;
5083 csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); 5322 num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
5084 num_csums_per_leaf = (int)div64_u64(csum_size,
5085 sizeof(struct btrfs_csum_item) +
5086 sizeof(struct btrfs_disk_key));
5087 num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
5088 num_csums = num_csums + num_csums_per_leaf - 1;
5089 num_csums = num_csums / num_csums_per_leaf;
5090
5091 old_csums = old_csums + num_csums_per_leaf - 1;
5092 old_csums = old_csums / num_csums_per_leaf;
5093 5323
5094 /* No change, no need to reserve more */ 5324 /* No change, no need to reserve more */
5095 if (old_csums == num_csums) 5325 if (old_csums == num_csums)
@@ -5163,8 +5393,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5163 spin_unlock(&BTRFS_I(inode)->lock); 5393 spin_unlock(&BTRFS_I(inode)->lock);
5164 5394
5165 if (root->fs_info->quota_enabled) { 5395 if (root->fs_info->quota_enabled) {
5166 ret = btrfs_qgroup_reserve(root, num_bytes + 5396 ret = btrfs_qgroup_reserve(root, nr_extents * root->nodesize);
5167 nr_extents * root->nodesize);
5168 if (ret) 5397 if (ret)
5169 goto out_fail; 5398 goto out_fail;
5170 } 5399 }
@@ -5172,8 +5401,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5172 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 5401 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
5173 if (unlikely(ret)) { 5402 if (unlikely(ret)) {
5174 if (root->fs_info->quota_enabled) 5403 if (root->fs_info->quota_enabled)
5175 btrfs_qgroup_free(root, num_bytes + 5404 btrfs_qgroup_free(root, nr_extents * root->nodesize);
5176 nr_extents * root->nodesize);
5177 goto out_fail; 5405 goto out_fail;
5178 } 5406 }
5179 5407
@@ -5290,10 +5518,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5290 5518
5291 trace_btrfs_space_reservation(root->fs_info, "delalloc", 5519 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5292 btrfs_ino(inode), to_free, 0); 5520 btrfs_ino(inode), to_free, 0);
5293 if (root->fs_info->quota_enabled) {
5294 btrfs_qgroup_free(root, num_bytes +
5295 dropped * root->nodesize);
5296 }
5297 5521
5298 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 5522 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
5299 to_free); 5523 to_free);
@@ -5318,7 +5542,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
5318{ 5542{
5319 int ret; 5543 int ret;
5320 5544
5321 ret = btrfs_check_data_free_space(inode, num_bytes); 5545 ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes);
5322 if (ret) 5546 if (ret)
5323 return ret; 5547 return ret;
5324 5548
@@ -5390,14 +5614,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
5390 if (!alloc && cache->cached == BTRFS_CACHE_NO) 5614 if (!alloc && cache->cached == BTRFS_CACHE_NO)
5391 cache_block_group(cache, 1); 5615 cache_block_group(cache, 1);
5392 5616
5393 spin_lock(&trans->transaction->dirty_bgs_lock);
5394 if (list_empty(&cache->dirty_list)) {
5395 list_add_tail(&cache->dirty_list,
5396 &trans->transaction->dirty_bgs);
5397 btrfs_get_block_group(cache);
5398 }
5399 spin_unlock(&trans->transaction->dirty_bgs_lock);
5400
5401 byte_in_group = bytenr - cache->key.objectid; 5617 byte_in_group = bytenr - cache->key.objectid;
5402 WARN_ON(byte_in_group > cache->key.offset); 5618 WARN_ON(byte_in_group > cache->key.offset);
5403 5619
@@ -5446,6 +5662,16 @@ static int update_block_group(struct btrfs_trans_handle *trans,
5446 spin_unlock(&info->unused_bgs_lock); 5662 spin_unlock(&info->unused_bgs_lock);
5447 } 5663 }
5448 } 5664 }
5665
5666 spin_lock(&trans->transaction->dirty_bgs_lock);
5667 if (list_empty(&cache->dirty_list)) {
5668 list_add_tail(&cache->dirty_list,
5669 &trans->transaction->dirty_bgs);
5670 trans->transaction->num_dirty_bgs++;
5671 btrfs_get_block_group(cache);
5672 }
5673 spin_unlock(&trans->transaction->dirty_bgs_lock);
5674
5449 btrfs_put_block_group(cache); 5675 btrfs_put_block_group(cache);
5450 total -= num_bytes; 5676 total -= num_bytes;
5451 bytenr += num_bytes; 5677 bytenr += num_bytes;
@@ -6956,15 +7182,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6956 return -ENOSPC; 7182 return -ENOSPC;
6957 } 7183 }
6958 7184
6959 if (btrfs_test_opt(root, DISCARD))
6960 ret = btrfs_discard_extent(root, start, len, NULL);
6961
6962 if (pin) 7185 if (pin)
6963 pin_down_extent(root, cache, start, len, 1); 7186 pin_down_extent(root, cache, start, len, 1);
6964 else { 7187 else {
7188 if (btrfs_test_opt(root, DISCARD))
7189 ret = btrfs_discard_extent(root, start, len, NULL);
6965 btrfs_add_free_space(cache, start, len); 7190 btrfs_add_free_space(cache, start, len);
6966 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); 7191 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
6967 } 7192 }
7193
6968 btrfs_put_block_group(cache); 7194 btrfs_put_block_group(cache);
6969 7195
6970 trace_btrfs_reserved_extent_free(root, start, len); 7196 trace_btrfs_reserved_extent_free(root, start, len);
@@ -7095,9 +7321,9 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
7095 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 7321 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
7096 ins, size); 7322 ins, size);
7097 if (ret) { 7323 if (ret) {
7324 btrfs_free_path(path);
7098 btrfs_free_and_pin_reserved_extent(root, ins->objectid, 7325 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
7099 root->nodesize); 7326 root->nodesize);
7100 btrfs_free_path(path);
7101 return ret; 7327 return ret;
7102 } 7328 }
7103 7329
@@ -7217,7 +7443,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
7217 btrfs_set_header_generation(buf, trans->transid); 7443 btrfs_set_header_generation(buf, trans->transid);
7218 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); 7444 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
7219 btrfs_tree_lock(buf); 7445 btrfs_tree_lock(buf);
7220 clean_tree_block(trans, root, buf); 7446 clean_tree_block(trans, root->fs_info, buf);
7221 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); 7447 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
7222 7448
7223 btrfs_set_lock_blocking(buf); 7449 btrfs_set_lock_blocking(buf);
@@ -7815,7 +8041,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7815 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 8041 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
7816 blocksize = root->nodesize; 8042 blocksize = root->nodesize;
7817 8043
7818 next = btrfs_find_tree_block(root, bytenr); 8044 next = btrfs_find_tree_block(root->fs_info, bytenr);
7819 if (!next) { 8045 if (!next) {
7820 next = btrfs_find_create_tree_block(root, bytenr); 8046 next = btrfs_find_create_tree_block(root, bytenr);
7821 if (!next) 8047 if (!next)
@@ -8016,7 +8242,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
8016 btrfs_set_lock_blocking(eb); 8242 btrfs_set_lock_blocking(eb);
8017 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 8243 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
8018 } 8244 }
8019 clean_tree_block(trans, root, eb); 8245 clean_tree_block(trans, root->fs_info, eb);
8020 } 8246 }
8021 8247
8022 if (eb == root->node) { 8248 if (eb == root->node) {
@@ -8533,10 +8759,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8533 8759
8534 BUG_ON(cache->ro); 8760 BUG_ON(cache->ro);
8535 8761
8762again:
8536 trans = btrfs_join_transaction(root); 8763 trans = btrfs_join_transaction(root);
8537 if (IS_ERR(trans)) 8764 if (IS_ERR(trans))
8538 return PTR_ERR(trans); 8765 return PTR_ERR(trans);
8539 8766
8767 /*
8768 * we're not allowed to set block groups readonly after the dirty
8769 * block groups cache has started writing. If it already started,
8770 * back off and let this transaction commit
8771 */
8772 mutex_lock(&root->fs_info->ro_block_group_mutex);
8773 if (trans->transaction->dirty_bg_run) {
8774 u64 transid = trans->transid;
8775
8776 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8777 btrfs_end_transaction(trans, root);
8778
8779 ret = btrfs_wait_for_commit(root, transid);
8780 if (ret)
8781 return ret;
8782 goto again;
8783 }
8784
8785
8540 ret = set_block_group_ro(cache, 0); 8786 ret = set_block_group_ro(cache, 0);
8541 if (!ret) 8787 if (!ret)
8542 goto out; 8788 goto out;
@@ -8551,6 +8797,7 @@ out:
8551 alloc_flags = update_block_group_flags(root, cache->flags); 8797 alloc_flags = update_block_group_flags(root, cache->flags);
8552 check_system_chunk(trans, root, alloc_flags); 8798 check_system_chunk(trans, root, alloc_flags);
8553 } 8799 }
8800 mutex_unlock(&root->fs_info->ro_block_group_mutex);
8554 8801
8555 btrfs_end_transaction(trans, root); 8802 btrfs_end_transaction(trans, root);
8556 return ret; 8803 return ret;
@@ -8720,7 +8967,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
8720 min_free <<= 1; 8967 min_free <<= 1;
8721 } else if (index == BTRFS_RAID_RAID0) { 8968 } else if (index == BTRFS_RAID_RAID0) {
8722 dev_min = fs_devices->rw_devices; 8969 dev_min = fs_devices->rw_devices;
8723 do_div(min_free, dev_min); 8970 min_free = div64_u64(min_free, dev_min);
8724 } 8971 }
8725 8972
8726 /* We need to do this so that we can look at pending chunks */ 8973 /* We need to do this so that we can look at pending chunks */
@@ -8992,6 +9239,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
8992 INIT_LIST_HEAD(&cache->bg_list); 9239 INIT_LIST_HEAD(&cache->bg_list);
8993 INIT_LIST_HEAD(&cache->ro_list); 9240 INIT_LIST_HEAD(&cache->ro_list);
8994 INIT_LIST_HEAD(&cache->dirty_list); 9241 INIT_LIST_HEAD(&cache->dirty_list);
9242 INIT_LIST_HEAD(&cache->io_list);
8995 btrfs_init_free_space_ctl(cache); 9243 btrfs_init_free_space_ctl(cache);
8996 atomic_set(&cache->trimming, 0); 9244 atomic_set(&cache->trimming, 0);
8997 9245
@@ -9355,7 +9603,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9355 goto out; 9603 goto out;
9356 } 9604 }
9357 9605
9606 /*
9607 * get the inode first so any iput calls done for the io_list
9608 * aren't the final iput (no unlinks allowed now)
9609 */
9358 inode = lookup_free_space_inode(tree_root, block_group, path); 9610 inode = lookup_free_space_inode(tree_root, block_group, path);
9611
9612 mutex_lock(&trans->transaction->cache_write_mutex);
9613 /*
9614 * make sure our free spache cache IO is done before remove the
9615 * free space inode
9616 */
9617 spin_lock(&trans->transaction->dirty_bgs_lock);
9618 if (!list_empty(&block_group->io_list)) {
9619 list_del_init(&block_group->io_list);
9620
9621 WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
9622
9623 spin_unlock(&trans->transaction->dirty_bgs_lock);
9624 btrfs_wait_cache_io(root, trans, block_group,
9625 &block_group->io_ctl, path,
9626 block_group->key.objectid);
9627 btrfs_put_block_group(block_group);
9628 spin_lock(&trans->transaction->dirty_bgs_lock);
9629 }
9630
9631 if (!list_empty(&block_group->dirty_list)) {
9632 list_del_init(&block_group->dirty_list);
9633 btrfs_put_block_group(block_group);
9634 }
9635 spin_unlock(&trans->transaction->dirty_bgs_lock);
9636 mutex_unlock(&trans->transaction->cache_write_mutex);
9637
9359 if (!IS_ERR(inode)) { 9638 if (!IS_ERR(inode)) {
9360 ret = btrfs_orphan_add(trans, inode); 9639 ret = btrfs_orphan_add(trans, inode);
9361 if (ret) { 9640 if (ret) {
@@ -9448,18 +9727,29 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
9448 9727
9449 spin_lock(&trans->transaction->dirty_bgs_lock); 9728 spin_lock(&trans->transaction->dirty_bgs_lock);
9450 if (!list_empty(&block_group->dirty_list)) { 9729 if (!list_empty(&block_group->dirty_list)) {
9451 list_del_init(&block_group->dirty_list); 9730 WARN_ON(1);
9452 btrfs_put_block_group(block_group); 9731 }
9732 if (!list_empty(&block_group->io_list)) {
9733 WARN_ON(1);
9453 } 9734 }
9454 spin_unlock(&trans->transaction->dirty_bgs_lock); 9735 spin_unlock(&trans->transaction->dirty_bgs_lock);
9455
9456 btrfs_remove_free_space_cache(block_group); 9736 btrfs_remove_free_space_cache(block_group);
9457 9737
9458 spin_lock(&block_group->space_info->lock); 9738 spin_lock(&block_group->space_info->lock);
9459 list_del_init(&block_group->ro_list); 9739 list_del_init(&block_group->ro_list);
9740
9741 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
9742 WARN_ON(block_group->space_info->total_bytes
9743 < block_group->key.offset);
9744 WARN_ON(block_group->space_info->bytes_readonly
9745 < block_group->key.offset);
9746 WARN_ON(block_group->space_info->disk_total
9747 < block_group->key.offset * factor);
9748 }
9460 block_group->space_info->total_bytes -= block_group->key.offset; 9749 block_group->space_info->total_bytes -= block_group->key.offset;
9461 block_group->space_info->bytes_readonly -= block_group->key.offset; 9750 block_group->space_info->bytes_readonly -= block_group->key.offset;
9462 block_group->space_info->disk_total -= block_group->key.offset * factor; 9751 block_group->space_info->disk_total -= block_group->key.offset * factor;
9752
9463 spin_unlock(&block_group->space_info->lock); 9753 spin_unlock(&block_group->space_info->lock);
9464 9754
9465 memcpy(&key, &block_group->key, sizeof(key)); 9755 memcpy(&key, &block_group->key, sizeof(key));
@@ -9647,8 +9937,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
9647 mutex_unlock(&fs_info->unused_bg_unpin_mutex); 9937 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
9648 9938
9649 /* Reset pinned so btrfs_put_block_group doesn't complain */ 9939 /* Reset pinned so btrfs_put_block_group doesn't complain */
9940 spin_lock(&space_info->lock);
9941 spin_lock(&block_group->lock);
9942
9943 space_info->bytes_pinned -= block_group->pinned;
9944 space_info->bytes_readonly += block_group->pinned;
9945 percpu_counter_add(&space_info->total_bytes_pinned,
9946 -block_group->pinned);
9650 block_group->pinned = 0; 9947 block_group->pinned = 0;
9651 9948
9949 spin_unlock(&block_group->lock);
9950 spin_unlock(&space_info->lock);
9951
9652 /* 9952 /*
9653 * Btrfs_remove_chunk will abort the transaction if things go 9953 * Btrfs_remove_chunk will abort the transaction if things go
9654 * horribly wrong. 9954 * horribly wrong.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d688cfe5d496..782f3bc4651d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4514,8 +4514,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4514 } 4514 }
4515 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 4515 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
4516 em_len, flags); 4516 em_len, flags);
4517 if (ret) 4517 if (ret) {
4518 if (ret == 1)
4519 ret = 0;
4518 goto out_free; 4520 goto out_free;
4521 }
4519 } 4522 }
4520out_free: 4523out_free:
4521 free_extent_map(em); 4524 free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 695b0ccfb755..c668f36898d3 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -97,7 +97,7 @@ struct extent_io_tree {
97 u64 dirty_bytes; 97 u64 dirty_bytes;
98 int track_uptodate; 98 int track_uptodate;
99 spinlock_t lock; 99 spinlock_t lock;
100 struct extent_io_ops *ops; 100 const struct extent_io_ops *ops;
101}; 101};
102 102
103struct extent_state { 103struct extent_state {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 84a2d1868271..58ece6558430 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -185,8 +185,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
185 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; 185 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
186 if (!dst) { 186 if (!dst) {
187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { 187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
188 btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size, 188 btrfs_bio->csum_allocated = kmalloc_array(nblocks,
189 GFP_NOFS); 189 csum_size, GFP_NOFS);
190 if (!btrfs_bio->csum_allocated) { 190 if (!btrfs_bio->csum_allocated) {
191 btrfs_free_path(path); 191 btrfs_free_path(path);
192 return -ENOMEM; 192 return -ENOMEM;
@@ -553,7 +553,7 @@ static noinline void truncate_one_csum(struct btrfs_root *root,
553 btrfs_truncate_item(root, path, new_size, 0); 553 btrfs_truncate_item(root, path, new_size, 0);
554 554
555 key->offset = end_byte; 555 key->offset = end_byte;
556 btrfs_set_item_key_safe(root, path, key); 556 btrfs_set_item_key_safe(root->fs_info, path, key);
557 } else { 557 } else {
558 BUG(); 558 BUG();
559 } 559 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index faa7d390841b..467620a3b1f9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -273,11 +273,7 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
273 defrag = rb_entry(node, struct inode_defrag, rb_node); 273 defrag = rb_entry(node, struct inode_defrag, rb_node);
274 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 274 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
275 275
276 if (need_resched()) { 276 cond_resched_lock(&fs_info->defrag_inodes_lock);
277 spin_unlock(&fs_info->defrag_inodes_lock);
278 cond_resched();
279 spin_lock(&fs_info->defrag_inodes_lock);
280 }
281 277
282 node = rb_first(&fs_info->defrag_inodes); 278 node = rb_first(&fs_info->defrag_inodes);
283 } 279 }
@@ -868,7 +864,7 @@ next_slot:
868 864
869 memcpy(&new_key, &key, sizeof(new_key)); 865 memcpy(&new_key, &key, sizeof(new_key));
870 new_key.offset = end; 866 new_key.offset = end;
871 btrfs_set_item_key_safe(root, path, &new_key); 867 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
872 868
873 extent_offset += end - key.offset; 869 extent_offset += end - key.offset;
874 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 870 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@ -1126,7 +1122,7 @@ again:
1126 ino, bytenr, orig_offset, 1122 ino, bytenr, orig_offset,
1127 &other_start, &other_end)) { 1123 &other_start, &other_end)) {
1128 new_key.offset = end; 1124 new_key.offset = end;
1129 btrfs_set_item_key_safe(root, path, &new_key); 1125 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
1130 fi = btrfs_item_ptr(leaf, path->slots[0], 1126 fi = btrfs_item_ptr(leaf, path->slots[0],
1131 struct btrfs_file_extent_item); 1127 struct btrfs_file_extent_item);
1132 btrfs_set_file_extent_generation(leaf, fi, 1128 btrfs_set_file_extent_generation(leaf, fi,
@@ -1160,7 +1156,7 @@ again:
1160 trans->transid); 1156 trans->transid);
1161 path->slots[0]++; 1157 path->slots[0]++;
1162 new_key.offset = start; 1158 new_key.offset = start;
1163 btrfs_set_item_key_safe(root, path, &new_key); 1159 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
1164 1160
1165 fi = btrfs_item_ptr(leaf, path->slots[0], 1161 fi = btrfs_item_ptr(leaf, path->slots[0],
1166 struct btrfs_file_extent_item); 1162 struct btrfs_file_extent_item);
@@ -1485,7 +1481,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1485 PAGE_CACHE_SIZE / (sizeof(struct page *))); 1481 PAGE_CACHE_SIZE / (sizeof(struct page *)));
1486 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); 1482 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1487 nrptrs = max(nrptrs, 8); 1483 nrptrs = max(nrptrs, 8);
1488 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 1484 pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
1489 if (!pages) 1485 if (!pages)
1490 return -ENOMEM; 1486 return -ENOMEM;
1491 1487
@@ -1514,7 +1510,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1514 } 1510 }
1515 1511
1516 reserve_bytes = num_pages << PAGE_CACHE_SHIFT; 1512 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1517 ret = btrfs_check_data_free_space(inode, reserve_bytes); 1513 ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes);
1518 if (ret == -ENOSPC && 1514 if (ret == -ENOSPC &&
1519 (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | 1515 (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1520 BTRFS_INODE_PREALLOC))) { 1516 BTRFS_INODE_PREALLOC))) {
@@ -1635,8 +1631,8 @@ again:
1635 btrfs_end_write_no_snapshoting(root); 1631 btrfs_end_write_no_snapshoting(root);
1636 1632
1637 if (only_release_metadata && copied > 0) { 1633 if (only_release_metadata && copied > 0) {
1638 u64 lockstart = round_down(pos, root->sectorsize); 1634 lockstart = round_down(pos, root->sectorsize);
1639 u64 lockend = lockstart + 1635 lockend = lockstart +
1640 (dirty_pages << PAGE_CACHE_SHIFT) - 1; 1636 (dirty_pages << PAGE_CACHE_SHIFT) - 1;
1641 1637
1642 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 1638 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
@@ -1809,7 +1805,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1809 * otherwise subsequent syncs to a file that's been synced in this 1805 * otherwise subsequent syncs to a file that's been synced in this
1810 * transaction will appear to have already occured. 1806 * transaction will appear to have already occured.
1811 */ 1807 */
1808 spin_lock(&BTRFS_I(inode)->lock);
1812 BTRFS_I(inode)->last_sub_trans = root->log_transid; 1809 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1810 spin_unlock(&BTRFS_I(inode)->lock);
1813 if (num_written > 0) { 1811 if (num_written > 0) {
1814 err = generic_write_sync(file, pos, num_written); 1812 err = generic_write_sync(file, pos, num_written);
1815 if (err < 0) 1813 if (err < 0)
@@ -2162,7 +2160,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
2162 u64 num_bytes; 2160 u64 num_bytes;
2163 2161
2164 key.offset = offset; 2162 key.offset = offset;
2165 btrfs_set_item_key_safe(root, path, &key); 2163 btrfs_set_item_key_safe(root->fs_info, path, &key);
2166 fi = btrfs_item_ptr(leaf, path->slots[0], 2164 fi = btrfs_item_ptr(leaf, path->slots[0],
2167 struct btrfs_file_extent_item); 2165 struct btrfs_file_extent_item);
2168 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - 2166 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@ -2545,7 +2543,6 @@ static long btrfs_fallocate(struct file *file, int mode,
2545{ 2543{
2546 struct inode *inode = file_inode(file); 2544 struct inode *inode = file_inode(file);
2547 struct extent_state *cached_state = NULL; 2545 struct extent_state *cached_state = NULL;
2548 struct btrfs_root *root = BTRFS_I(inode)->root;
2549 u64 cur_offset; 2546 u64 cur_offset;
2550 u64 last_byte; 2547 u64 last_byte;
2551 u64 alloc_start; 2548 u64 alloc_start;
@@ -2570,14 +2567,9 @@ static long btrfs_fallocate(struct file *file, int mode,
2570 * Make sure we have enough space before we do the 2567 * Make sure we have enough space before we do the
2571 * allocation. 2568 * allocation.
2572 */ 2569 */
2573 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); 2570 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start);
2574 if (ret) 2571 if (ret)
2575 return ret; 2572 return ret;
2576 if (root->fs_info->quota_enabled) {
2577 ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
2578 if (ret)
2579 goto out_reserve_fail;
2580 }
2581 2573
2582 mutex_lock(&inode->i_mutex); 2574 mutex_lock(&inode->i_mutex);
2583 ret = inode_newsize_ok(inode, alloc_end); 2575 ret = inode_newsize_ok(inode, alloc_end);
@@ -2667,23 +2659,35 @@ static long btrfs_fallocate(struct file *file, int mode,
2667 1 << inode->i_blkbits, 2659 1 << inode->i_blkbits,
2668 offset + len, 2660 offset + len,
2669 &alloc_hint); 2661 &alloc_hint);
2670
2671 if (ret < 0) {
2672 free_extent_map(em);
2673 break;
2674 }
2675 } else if (actual_end > inode->i_size && 2662 } else if (actual_end > inode->i_size &&
2676 !(mode & FALLOC_FL_KEEP_SIZE)) { 2663 !(mode & FALLOC_FL_KEEP_SIZE)) {
2664 struct btrfs_trans_handle *trans;
2665 struct btrfs_root *root = BTRFS_I(inode)->root;
2666
2677 /* 2667 /*
2678 * We didn't need to allocate any more space, but we 2668 * We didn't need to allocate any more space, but we
2679 * still extended the size of the file so we need to 2669 * still extended the size of the file so we need to
2680 * update i_size. 2670 * update i_size and the inode item.
2681 */ 2671 */
2682 inode->i_ctime = CURRENT_TIME; 2672 trans = btrfs_start_transaction(root, 1);
2683 i_size_write(inode, actual_end); 2673 if (IS_ERR(trans)) {
2684 btrfs_ordered_update_i_size(inode, actual_end, NULL); 2674 ret = PTR_ERR(trans);
2675 } else {
2676 inode->i_ctime = CURRENT_TIME;
2677 i_size_write(inode, actual_end);
2678 btrfs_ordered_update_i_size(inode, actual_end,
2679 NULL);
2680 ret = btrfs_update_inode(trans, root, inode);
2681 if (ret)
2682 btrfs_end_transaction(trans, root);
2683 else
2684 ret = btrfs_end_transaction(trans,
2685 root);
2686 }
2685 } 2687 }
2686 free_extent_map(em); 2688 free_extent_map(em);
2689 if (ret < 0)
2690 break;
2687 2691
2688 cur_offset = last_byte; 2692 cur_offset = last_byte;
2689 if (cur_offset >= alloc_end) { 2693 if (cur_offset >= alloc_end) {
@@ -2695,9 +2699,6 @@ static long btrfs_fallocate(struct file *file, int mode,
2695 &cached_state, GFP_NOFS); 2699 &cached_state, GFP_NOFS);
2696out: 2700out:
2697 mutex_unlock(&inode->i_mutex); 2701 mutex_unlock(&inode->i_mutex);
2698 if (root->fs_info->quota_enabled)
2699 btrfs_qgroup_free(root, alloc_end - alloc_start);
2700out_reserve_fail:
2701 /* Let go of our reservation. */ 2702 /* Let go of our reservation. */
2702 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); 2703 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
2703 return ret; 2704 return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index a71978578fa7..253cb74b0e27 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -85,7 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
85 } 85 }
86 86
87 mapping_set_gfp_mask(inode->i_mapping, 87 mapping_set_gfp_mask(inode->i_mapping,
88 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 88 mapping_gfp_mask(inode->i_mapping) &
89 ~(GFP_NOFS & ~__GFP_HIGHMEM));
89 90
90 return inode; 91 return inode;
91} 92}
@@ -170,13 +171,13 @@ static int __create_free_space_inode(struct btrfs_root *root,
170 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 171 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
171 key.offset = offset; 172 key.offset = offset;
172 key.type = 0; 173 key.type = 0;
173
174 ret = btrfs_insert_empty_item(trans, root, path, &key, 174 ret = btrfs_insert_empty_item(trans, root, path, &key,
175 sizeof(struct btrfs_free_space_header)); 175 sizeof(struct btrfs_free_space_header));
176 if (ret < 0) { 176 if (ret < 0) {
177 btrfs_release_path(path); 177 btrfs_release_path(path);
178 return ret; 178 return ret;
179 } 179 }
180
180 leaf = path->nodes[0]; 181 leaf = path->nodes[0];
181 header = btrfs_item_ptr(leaf, path->slots[0], 182 header = btrfs_item_ptr(leaf, path->slots[0],
182 struct btrfs_free_space_header); 183 struct btrfs_free_space_header);
@@ -225,9 +226,37 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
225 226
226int btrfs_truncate_free_space_cache(struct btrfs_root *root, 227int btrfs_truncate_free_space_cache(struct btrfs_root *root,
227 struct btrfs_trans_handle *trans, 228 struct btrfs_trans_handle *trans,
229 struct btrfs_block_group_cache *block_group,
228 struct inode *inode) 230 struct inode *inode)
229{ 231{
230 int ret = 0; 232 int ret = 0;
233 struct btrfs_path *path = btrfs_alloc_path();
234
235 if (!path) {
236 ret = -ENOMEM;
237 goto fail;
238 }
239
240 if (block_group) {
241 mutex_lock(&trans->transaction->cache_write_mutex);
242 if (!list_empty(&block_group->io_list)) {
243 list_del_init(&block_group->io_list);
244
245 btrfs_wait_cache_io(root, trans, block_group,
246 &block_group->io_ctl, path,
247 block_group->key.objectid);
248 btrfs_put_block_group(block_group);
249 }
250
251 /*
252 * now that we've truncated the cache away, its no longer
253 * setup or written
254 */
255 spin_lock(&block_group->lock);
256 block_group->disk_cache_state = BTRFS_DC_CLEAR;
257 spin_unlock(&block_group->lock);
258 }
259 btrfs_free_path(path);
231 260
232 btrfs_i_size_write(inode, 0); 261 btrfs_i_size_write(inode, 0);
233 truncate_pagecache(inode, 0); 262 truncate_pagecache(inode, 0);
@@ -235,15 +264,23 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
235 /* 264 /*
236 * We don't need an orphan item because truncating the free space cache 265 * We don't need an orphan item because truncating the free space cache
237 * will never be split across transactions. 266 * will never be split across transactions.
267 * We don't need to check for -EAGAIN because we're a free space
268 * cache inode
238 */ 269 */
239 ret = btrfs_truncate_inode_items(trans, root, inode, 270 ret = btrfs_truncate_inode_items(trans, root, inode,
240 0, BTRFS_EXTENT_DATA_KEY); 271 0, BTRFS_EXTENT_DATA_KEY);
241 if (ret) { 272 if (ret) {
273 mutex_unlock(&trans->transaction->cache_write_mutex);
242 btrfs_abort_transaction(trans, root, ret); 274 btrfs_abort_transaction(trans, root, ret);
243 return ret; 275 return ret;
244 } 276 }
245 277
246 ret = btrfs_update_inode(trans, root, inode); 278 ret = btrfs_update_inode(trans, root, inode);
279
280 if (block_group)
281 mutex_unlock(&trans->transaction->cache_write_mutex);
282
283fail:
247 if (ret) 284 if (ret)
248 btrfs_abort_transaction(trans, root, ret); 285 btrfs_abort_transaction(trans, root, ret);
249 286
@@ -269,18 +306,7 @@ static int readahead_cache(struct inode *inode)
269 return 0; 306 return 0;
270} 307}
271 308
272struct io_ctl { 309static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
273 void *cur, *orig;
274 struct page *page;
275 struct page **pages;
276 struct btrfs_root *root;
277 unsigned long size;
278 int index;
279 int num_pages;
280 unsigned check_crcs:1;
281};
282
283static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
284 struct btrfs_root *root, int write) 310 struct btrfs_root *root, int write)
285{ 311{
286 int num_pages; 312 int num_pages;
@@ -296,45 +322,46 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
296 (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) 322 (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
297 return -ENOSPC; 323 return -ENOSPC;
298 324
299 memset(io_ctl, 0, sizeof(struct io_ctl)); 325 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
300 326
301 io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); 327 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
302 if (!io_ctl->pages) 328 if (!io_ctl->pages)
303 return -ENOMEM; 329 return -ENOMEM;
304 330
305 io_ctl->num_pages = num_pages; 331 io_ctl->num_pages = num_pages;
306 io_ctl->root = root; 332 io_ctl->root = root;
307 io_ctl->check_crcs = check_crcs; 333 io_ctl->check_crcs = check_crcs;
334 io_ctl->inode = inode;
308 335
309 return 0; 336 return 0;
310} 337}
311 338
312static void io_ctl_free(struct io_ctl *io_ctl) 339static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
313{ 340{
314 kfree(io_ctl->pages); 341 kfree(io_ctl->pages);
342 io_ctl->pages = NULL;
315} 343}
316 344
317static void io_ctl_unmap_page(struct io_ctl *io_ctl) 345static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
318{ 346{
319 if (io_ctl->cur) { 347 if (io_ctl->cur) {
320 kunmap(io_ctl->page);
321 io_ctl->cur = NULL; 348 io_ctl->cur = NULL;
322 io_ctl->orig = NULL; 349 io_ctl->orig = NULL;
323 } 350 }
324} 351}
325 352
326static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 353static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
327{ 354{
328 ASSERT(io_ctl->index < io_ctl->num_pages); 355 ASSERT(io_ctl->index < io_ctl->num_pages);
329 io_ctl->page = io_ctl->pages[io_ctl->index++]; 356 io_ctl->page = io_ctl->pages[io_ctl->index++];
330 io_ctl->cur = kmap(io_ctl->page); 357 io_ctl->cur = page_address(io_ctl->page);
331 io_ctl->orig = io_ctl->cur; 358 io_ctl->orig = io_ctl->cur;
332 io_ctl->size = PAGE_CACHE_SIZE; 359 io_ctl->size = PAGE_CACHE_SIZE;
333 if (clear) 360 if (clear)
334 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); 361 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
335} 362}
336 363
337static void io_ctl_drop_pages(struct io_ctl *io_ctl) 364static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
338{ 365{
339 int i; 366 int i;
340 367
@@ -349,7 +376,7 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl)
349 } 376 }
350} 377}
351 378
352static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, 379static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode,
353 int uptodate) 380 int uptodate)
354{ 381{
355 struct page *page; 382 struct page *page;
@@ -383,7 +410,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
383 return 0; 410 return 0;
384} 411}
385 412
386static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) 413static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
387{ 414{
388 __le64 *val; 415 __le64 *val;
389 416
@@ -406,7 +433,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
406 io_ctl->cur += sizeof(u64); 433 io_ctl->cur += sizeof(u64);
407} 434}
408 435
409static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) 436static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
410{ 437{
411 __le64 *gen; 438 __le64 *gen;
412 439
@@ -435,7 +462,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
435 return 0; 462 return 0;
436} 463}
437 464
438static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) 465static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
439{ 466{
440 u32 *tmp; 467 u32 *tmp;
441 u32 crc = ~(u32)0; 468 u32 crc = ~(u32)0;
@@ -453,13 +480,12 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
453 PAGE_CACHE_SIZE - offset); 480 PAGE_CACHE_SIZE - offset);
454 btrfs_csum_final(crc, (char *)&crc); 481 btrfs_csum_final(crc, (char *)&crc);
455 io_ctl_unmap_page(io_ctl); 482 io_ctl_unmap_page(io_ctl);
456 tmp = kmap(io_ctl->pages[0]); 483 tmp = page_address(io_ctl->pages[0]);
457 tmp += index; 484 tmp += index;
458 *tmp = crc; 485 *tmp = crc;
459 kunmap(io_ctl->pages[0]);
460} 486}
461 487
462static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) 488static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
463{ 489{
464 u32 *tmp, val; 490 u32 *tmp, val;
465 u32 crc = ~(u32)0; 491 u32 crc = ~(u32)0;
@@ -473,10 +499,9 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
473 if (index == 0) 499 if (index == 0)
474 offset = sizeof(u32) * io_ctl->num_pages; 500 offset = sizeof(u32) * io_ctl->num_pages;
475 501
476 tmp = kmap(io_ctl->pages[0]); 502 tmp = page_address(io_ctl->pages[0]);
477 tmp += index; 503 tmp += index;
478 val = *tmp; 504 val = *tmp;
479 kunmap(io_ctl->pages[0]);
480 505
481 io_ctl_map_page(io_ctl, 0); 506 io_ctl_map_page(io_ctl, 0);
482 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 507 crc = btrfs_csum_data(io_ctl->orig + offset, crc,
@@ -492,7 +517,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
492 return 0; 517 return 0;
493} 518}
494 519
495static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, 520static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
496 void *bitmap) 521 void *bitmap)
497{ 522{
498 struct btrfs_free_space_entry *entry; 523 struct btrfs_free_space_entry *entry;
@@ -522,7 +547,7 @@ static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
522 return 0; 547 return 0;
523} 548}
524 549
525static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) 550static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
526{ 551{
527 if (!io_ctl->cur) 552 if (!io_ctl->cur)
528 return -ENOSPC; 553 return -ENOSPC;
@@ -545,7 +570,7 @@ static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
545 return 0; 570 return 0;
546} 571}
547 572
548static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) 573static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
549{ 574{
550 /* 575 /*
551 * If we're not on the boundary we know we've modified the page and we 576 * If we're not on the boundary we know we've modified the page and we
@@ -562,7 +587,7 @@ static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
562 } 587 }
563} 588}
564 589
565static int io_ctl_read_entry(struct io_ctl *io_ctl, 590static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
566 struct btrfs_free_space *entry, u8 *type) 591 struct btrfs_free_space *entry, u8 *type)
567{ 592{
568 struct btrfs_free_space_entry *e; 593 struct btrfs_free_space_entry *e;
@@ -589,7 +614,7 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl,
589 return 0; 614 return 0;
590} 615}
591 616
592static int io_ctl_read_bitmap(struct io_ctl *io_ctl, 617static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
593 struct btrfs_free_space *entry) 618 struct btrfs_free_space *entry)
594{ 619{
595 int ret; 620 int ret;
@@ -648,7 +673,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
648{ 673{
649 struct btrfs_free_space_header *header; 674 struct btrfs_free_space_header *header;
650 struct extent_buffer *leaf; 675 struct extent_buffer *leaf;
651 struct io_ctl io_ctl; 676 struct btrfs_io_ctl io_ctl;
652 struct btrfs_key key; 677 struct btrfs_key key;
653 struct btrfs_free_space *e, *n; 678 struct btrfs_free_space *e, *n;
654 LIST_HEAD(bitmaps); 679 LIST_HEAD(bitmaps);
@@ -877,7 +902,7 @@ out:
877} 902}
878 903
879static noinline_for_stack 904static noinline_for_stack
880int write_cache_extent_entries(struct io_ctl *io_ctl, 905int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
881 struct btrfs_free_space_ctl *ctl, 906 struct btrfs_free_space_ctl *ctl,
882 struct btrfs_block_group_cache *block_group, 907 struct btrfs_block_group_cache *block_group,
883 int *entries, int *bitmaps, 908 int *entries, int *bitmaps,
@@ -885,6 +910,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
885{ 910{
886 int ret; 911 int ret;
887 struct btrfs_free_cluster *cluster = NULL; 912 struct btrfs_free_cluster *cluster = NULL;
913 struct btrfs_free_cluster *cluster_locked = NULL;
888 struct rb_node *node = rb_first(&ctl->free_space_offset); 914 struct rb_node *node = rb_first(&ctl->free_space_offset);
889 struct btrfs_trim_range *trim_entry; 915 struct btrfs_trim_range *trim_entry;
890 916
@@ -896,6 +922,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
896 } 922 }
897 923
898 if (!node && cluster) { 924 if (!node && cluster) {
925 cluster_locked = cluster;
926 spin_lock(&cluster_locked->lock);
899 node = rb_first(&cluster->root); 927 node = rb_first(&cluster->root);
900 cluster = NULL; 928 cluster = NULL;
901 } 929 }
@@ -919,9 +947,15 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
919 node = rb_next(node); 947 node = rb_next(node);
920 if (!node && cluster) { 948 if (!node && cluster) {
921 node = rb_first(&cluster->root); 949 node = rb_first(&cluster->root);
950 cluster_locked = cluster;
951 spin_lock(&cluster_locked->lock);
922 cluster = NULL; 952 cluster = NULL;
923 } 953 }
924 } 954 }
955 if (cluster_locked) {
956 spin_unlock(&cluster_locked->lock);
957 cluster_locked = NULL;
958 }
925 959
926 /* 960 /*
927 * Make sure we don't miss any range that was removed from our rbtree 961 * Make sure we don't miss any range that was removed from our rbtree
@@ -939,6 +973,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
939 973
940 return 0; 974 return 0;
941fail: 975fail:
976 if (cluster_locked)
977 spin_unlock(&cluster_locked->lock);
942 return -ENOSPC; 978 return -ENOSPC;
943} 979}
944 980
@@ -1000,7 +1036,7 @@ fail:
1000static noinline_for_stack int 1036static noinline_for_stack int
1001write_pinned_extent_entries(struct btrfs_root *root, 1037write_pinned_extent_entries(struct btrfs_root *root,
1002 struct btrfs_block_group_cache *block_group, 1038 struct btrfs_block_group_cache *block_group,
1003 struct io_ctl *io_ctl, 1039 struct btrfs_io_ctl *io_ctl,
1004 int *entries) 1040 int *entries)
1005{ 1041{
1006 u64 start, extent_start, extent_end, len; 1042 u64 start, extent_start, extent_end, len;
@@ -1050,7 +1086,7 @@ write_pinned_extent_entries(struct btrfs_root *root,
1050} 1086}
1051 1087
1052static noinline_for_stack int 1088static noinline_for_stack int
1053write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list) 1089write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
1054{ 1090{
1055 struct list_head *pos, *n; 1091 struct list_head *pos, *n;
1056 int ret; 1092 int ret;
@@ -1084,7 +1120,7 @@ static int flush_dirty_cache(struct inode *inode)
1084 1120
1085static void noinline_for_stack 1121static void noinline_for_stack
1086cleanup_write_cache_enospc(struct inode *inode, 1122cleanup_write_cache_enospc(struct inode *inode,
1087 struct io_ctl *io_ctl, 1123 struct btrfs_io_ctl *io_ctl,
1088 struct extent_state **cached_state, 1124 struct extent_state **cached_state,
1089 struct list_head *bitmap_list) 1125 struct list_head *bitmap_list)
1090{ 1126{
@@ -1101,6 +1137,70 @@ cleanup_write_cache_enospc(struct inode *inode,
1101 GFP_NOFS); 1137 GFP_NOFS);
1102} 1138}
1103 1139
1140int btrfs_wait_cache_io(struct btrfs_root *root,
1141 struct btrfs_trans_handle *trans,
1142 struct btrfs_block_group_cache *block_group,
1143 struct btrfs_io_ctl *io_ctl,
1144 struct btrfs_path *path, u64 offset)
1145{
1146 int ret;
1147 struct inode *inode = io_ctl->inode;
1148
1149 if (!inode)
1150 return 0;
1151
1152 root = root->fs_info->tree_root;
1153
1154 /* Flush the dirty pages in the cache file. */
1155 ret = flush_dirty_cache(inode);
1156 if (ret)
1157 goto out;
1158
1159 /* Update the cache item to tell everyone this cache file is valid. */
1160 ret = update_cache_item(trans, root, inode, path, offset,
1161 io_ctl->entries, io_ctl->bitmaps);
1162out:
1163 io_ctl_free(io_ctl);
1164 if (ret) {
1165 invalidate_inode_pages2(inode->i_mapping);
1166 BTRFS_I(inode)->generation = 0;
1167 if (block_group) {
1168#ifdef DEBUG
1169 btrfs_err(root->fs_info,
1170 "failed to write free space cache for block group %llu",
1171 block_group->key.objectid);
1172#endif
1173 }
1174 }
1175 btrfs_update_inode(trans, root, inode);
1176
1177 if (block_group) {
1178 /* the dirty list is protected by the dirty_bgs_lock */
1179 spin_lock(&trans->transaction->dirty_bgs_lock);
1180
1181 /* the disk_cache_state is protected by the block group lock */
1182 spin_lock(&block_group->lock);
1183
1184 /*
1185 * only mark this as written if we didn't get put back on
1186 * the dirty list while waiting for IO. Otherwise our
1187 * cache state won't be right, and we won't get written again
1188 */
1189 if (!ret && list_empty(&block_group->dirty_list))
1190 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1191 else if (ret)
1192 block_group->disk_cache_state = BTRFS_DC_ERROR;
1193
1194 spin_unlock(&block_group->lock);
1195 spin_unlock(&trans->transaction->dirty_bgs_lock);
1196 io_ctl->inode = NULL;
1197 iput(inode);
1198 }
1199
1200 return ret;
1201
1202}
1203
1104/** 1204/**
1105 * __btrfs_write_out_cache - write out cached info to an inode 1205 * __btrfs_write_out_cache - write out cached info to an inode
1106 * @root - the root the inode belongs to 1206 * @root - the root the inode belongs to
@@ -1117,20 +1217,22 @@ cleanup_write_cache_enospc(struct inode *inode,
1117static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1217static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1118 struct btrfs_free_space_ctl *ctl, 1218 struct btrfs_free_space_ctl *ctl,
1119 struct btrfs_block_group_cache *block_group, 1219 struct btrfs_block_group_cache *block_group,
1220 struct btrfs_io_ctl *io_ctl,
1120 struct btrfs_trans_handle *trans, 1221 struct btrfs_trans_handle *trans,
1121 struct btrfs_path *path, u64 offset) 1222 struct btrfs_path *path, u64 offset)
1122{ 1223{
1123 struct extent_state *cached_state = NULL; 1224 struct extent_state *cached_state = NULL;
1124 struct io_ctl io_ctl;
1125 LIST_HEAD(bitmap_list); 1225 LIST_HEAD(bitmap_list);
1126 int entries = 0; 1226 int entries = 0;
1127 int bitmaps = 0; 1227 int bitmaps = 0;
1128 int ret; 1228 int ret;
1229 int must_iput = 0;
1129 1230
1130 if (!i_size_read(inode)) 1231 if (!i_size_read(inode))
1131 return -1; 1232 return -1;
1132 1233
1133 ret = io_ctl_init(&io_ctl, inode, root, 1); 1234 WARN_ON(io_ctl->pages);
1235 ret = io_ctl_init(io_ctl, inode, root, 1);
1134 if (ret) 1236 if (ret)
1135 return -1; 1237 return -1;
1136 1238
@@ -1143,24 +1245,27 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1143 up_write(&block_group->data_rwsem); 1245 up_write(&block_group->data_rwsem);
1144 BTRFS_I(inode)->generation = 0; 1246 BTRFS_I(inode)->generation = 0;
1145 ret = 0; 1247 ret = 0;
1248 must_iput = 1;
1146 goto out; 1249 goto out;
1147 } 1250 }
1148 spin_unlock(&block_group->lock); 1251 spin_unlock(&block_group->lock);
1149 } 1252 }
1150 1253
1151 /* Lock all pages first so we can lock the extent safely. */ 1254 /* Lock all pages first so we can lock the extent safely. */
1152 io_ctl_prepare_pages(&io_ctl, inode, 0); 1255 io_ctl_prepare_pages(io_ctl, inode, 0);
1153 1256
1154 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1257 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
1155 0, &cached_state); 1258 0, &cached_state);
1156 1259
1157 io_ctl_set_generation(&io_ctl, trans->transid); 1260 io_ctl_set_generation(io_ctl, trans->transid);
1158 1261
1159 mutex_lock(&ctl->cache_writeout_mutex); 1262 mutex_lock(&ctl->cache_writeout_mutex);
1160 /* Write out the extent entries in the free space cache */ 1263 /* Write out the extent entries in the free space cache */
1161 ret = write_cache_extent_entries(&io_ctl, ctl, 1264 spin_lock(&ctl->tree_lock);
1265 ret = write_cache_extent_entries(io_ctl, ctl,
1162 block_group, &entries, &bitmaps, 1266 block_group, &entries, &bitmaps,
1163 &bitmap_list); 1267 &bitmap_list);
1268 spin_unlock(&ctl->tree_lock);
1164 if (ret) { 1269 if (ret) {
1165 mutex_unlock(&ctl->cache_writeout_mutex); 1270 mutex_unlock(&ctl->cache_writeout_mutex);
1166 goto out_nospc; 1271 goto out_nospc;
@@ -1170,8 +1275,11 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1170 * Some spaces that are freed in the current transaction are pinned, 1275 * Some spaces that are freed in the current transaction are pinned,
1171 * they will be added into free space cache after the transaction is 1276 * they will be added into free space cache after the transaction is
1172 * committed, we shouldn't lose them. 1277 * committed, we shouldn't lose them.
1278 *
1279 * If this changes while we are working we'll get added back to
1280 * the dirty list and redo it. No locking needed
1173 */ 1281 */
1174 ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); 1282 ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries);
1175 if (ret) { 1283 if (ret) {
1176 mutex_unlock(&ctl->cache_writeout_mutex); 1284 mutex_unlock(&ctl->cache_writeout_mutex);
1177 goto out_nospc; 1285 goto out_nospc;
@@ -1182,16 +1290,18 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1182 * locked while doing it because a concurrent trim can be manipulating 1290 * locked while doing it because a concurrent trim can be manipulating
1183 * or freeing the bitmap. 1291 * or freeing the bitmap.
1184 */ 1292 */
1185 ret = write_bitmap_entries(&io_ctl, &bitmap_list); 1293 spin_lock(&ctl->tree_lock);
1294 ret = write_bitmap_entries(io_ctl, &bitmap_list);
1295 spin_unlock(&ctl->tree_lock);
1186 mutex_unlock(&ctl->cache_writeout_mutex); 1296 mutex_unlock(&ctl->cache_writeout_mutex);
1187 if (ret) 1297 if (ret)
1188 goto out_nospc; 1298 goto out_nospc;
1189 1299
1190 /* Zero out the rest of the pages just to make sure */ 1300 /* Zero out the rest of the pages just to make sure */
1191 io_ctl_zero_remaining_pages(&io_ctl); 1301 io_ctl_zero_remaining_pages(io_ctl);
1192 1302
1193 /* Everything is written out, now we dirty the pages in the file. */ 1303 /* Everything is written out, now we dirty the pages in the file. */
1194 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, 1304 ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages,
1195 0, i_size_read(inode), &cached_state); 1305 0, i_size_read(inode), &cached_state);
1196 if (ret) 1306 if (ret)
1197 goto out_nospc; 1307 goto out_nospc;
@@ -1202,30 +1312,39 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1202 * Release the pages and unlock the extent, we will flush 1312 * Release the pages and unlock the extent, we will flush
1203 * them out later 1313 * them out later
1204 */ 1314 */
1205 io_ctl_drop_pages(&io_ctl); 1315 io_ctl_drop_pages(io_ctl);
1206 1316
1207 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1317 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1208 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1318 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1209 1319
1210 /* Flush the dirty pages in the cache file. */ 1320 /*
1211 ret = flush_dirty_cache(inode); 1321 * at this point the pages are under IO and we're happy,
1322 * The caller is responsible for waiting on them and updating the
1323 * the cache and the inode
1324 */
1325 io_ctl->entries = entries;
1326 io_ctl->bitmaps = bitmaps;
1327
1328 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
1212 if (ret) 1329 if (ret)
1213 goto out; 1330 goto out;
1214 1331
1215 /* Update the cache item to tell everyone this cache file is valid. */ 1332 return 0;
1216 ret = update_cache_item(trans, root, inode, path, offset, 1333
1217 entries, bitmaps);
1218out: 1334out:
1219 io_ctl_free(&io_ctl); 1335 io_ctl->inode = NULL;
1336 io_ctl_free(io_ctl);
1220 if (ret) { 1337 if (ret) {
1221 invalidate_inode_pages2(inode->i_mapping); 1338 invalidate_inode_pages2(inode->i_mapping);
1222 BTRFS_I(inode)->generation = 0; 1339 BTRFS_I(inode)->generation = 0;
1223 } 1340 }
1224 btrfs_update_inode(trans, root, inode); 1341 btrfs_update_inode(trans, root, inode);
1342 if (must_iput)
1343 iput(inode);
1225 return ret; 1344 return ret;
1226 1345
1227out_nospc: 1346out_nospc:
1228 cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); 1347 cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list);
1229 1348
1230 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1349 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
1231 up_write(&block_group->data_rwsem); 1350 up_write(&block_group->data_rwsem);
@@ -1241,7 +1360,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1241 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1360 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1242 struct inode *inode; 1361 struct inode *inode;
1243 int ret = 0; 1362 int ret = 0;
1244 enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
1245 1363
1246 root = root->fs_info->tree_root; 1364 root = root->fs_info->tree_root;
1247 1365
@@ -1250,34 +1368,34 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1250 spin_unlock(&block_group->lock); 1368 spin_unlock(&block_group->lock);
1251 return 0; 1369 return 0;
1252 } 1370 }
1253
1254 if (block_group->delalloc_bytes) {
1255 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
1256 spin_unlock(&block_group->lock);
1257 return 0;
1258 }
1259 spin_unlock(&block_group->lock); 1371 spin_unlock(&block_group->lock);
1260 1372
1261 inode = lookup_free_space_inode(root, block_group, path); 1373 inode = lookup_free_space_inode(root, block_group, path);
1262 if (IS_ERR(inode)) 1374 if (IS_ERR(inode))
1263 return 0; 1375 return 0;
1264 1376
1265 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1377 ret = __btrfs_write_out_cache(root, inode, ctl, block_group,
1378 &block_group->io_ctl, trans,
1266 path, block_group->key.objectid); 1379 path, block_group->key.objectid);
1267 if (ret) { 1380 if (ret) {
1268 dcs = BTRFS_DC_ERROR;
1269 ret = 0;
1270#ifdef DEBUG 1381#ifdef DEBUG
1271 btrfs_err(root->fs_info, 1382 btrfs_err(root->fs_info,
1272 "failed to write free space cache for block group %llu", 1383 "failed to write free space cache for block group %llu",
1273 block_group->key.objectid); 1384 block_group->key.objectid);
1274#endif 1385#endif
1386 spin_lock(&block_group->lock);
1387 block_group->disk_cache_state = BTRFS_DC_ERROR;
1388 spin_unlock(&block_group->lock);
1389
1390 block_group->io_ctl.inode = NULL;
1391 iput(inode);
1275 } 1392 }
1276 1393
1277 spin_lock(&block_group->lock); 1394 /*
1278 block_group->disk_cache_state = dcs; 1395 * if ret == 0 the caller is expected to call btrfs_wait_cache_io
1279 spin_unlock(&block_group->lock); 1396 * to wait for IO and put the inode
1280 iput(inode); 1397 */
1398
1281 return ret; 1399 return ret;
1282} 1400}
1283 1401
@@ -1298,11 +1416,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1298 u64 offset) 1416 u64 offset)
1299{ 1417{
1300 u64 bitmap_start; 1418 u64 bitmap_start;
1301 u64 bytes_per_bitmap; 1419 u32 bytes_per_bitmap;
1302 1420
1303 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1421 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1304 bitmap_start = offset - ctl->start; 1422 bitmap_start = offset - ctl->start;
1305 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1423 bitmap_start = div_u64(bitmap_start, bytes_per_bitmap);
1306 bitmap_start *= bytes_per_bitmap; 1424 bitmap_start *= bytes_per_bitmap;
1307 bitmap_start += ctl->start; 1425 bitmap_start += ctl->start;
1308 1426
@@ -1521,10 +1639,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1521 u64 bitmap_bytes; 1639 u64 bitmap_bytes;
1522 u64 extent_bytes; 1640 u64 extent_bytes;
1523 u64 size = block_group->key.offset; 1641 u64 size = block_group->key.offset;
1524 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1642 u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
1525 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1643 u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg);
1526 1644
1527 max_bitmaps = max(max_bitmaps, 1); 1645 max_bitmaps = max_t(u32, max_bitmaps, 1);
1528 1646
1529 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1647 ASSERT(ctl->total_bitmaps <= max_bitmaps);
1530 1648
@@ -1537,7 +1655,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1537 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1655 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1538 else 1656 else
1539 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1657 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1540 div64_u64(size, 1024 * 1024 * 1024); 1658 div_u64(size, 1024 * 1024 * 1024);
1541 1659
1542 /* 1660 /*
1543 * we want to account for 1 more bitmap than what we have so we can make 1661 * we want to account for 1 more bitmap than what we have so we can make
@@ -1552,14 +1670,14 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1552 } 1670 }
1553 1671
1554 /* 1672 /*
1555 * we want the extent entry threshold to always be at most 1/2 the maxw 1673 * we want the extent entry threshold to always be at most 1/2 the max
1556 * bytes we can have, or whatever is less than that. 1674 * bytes we can have, or whatever is less than that.
1557 */ 1675 */
1558 extent_bytes = max_bytes - bitmap_bytes; 1676 extent_bytes = max_bytes - bitmap_bytes;
1559 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1677 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
1560 1678
1561 ctl->extents_thresh = 1679 ctl->extents_thresh =
1562 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1680 div_u64(extent_bytes, sizeof(struct btrfs_free_space));
1563} 1681}
1564 1682
1565static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1683static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
@@ -1673,7 +1791,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1673 */ 1791 */
1674 if (*bytes >= align) { 1792 if (*bytes >= align) {
1675 tmp = entry->offset - ctl->start + align - 1; 1793 tmp = entry->offset - ctl->start + align - 1;
1676 do_div(tmp, align); 1794 tmp = div64_u64(tmp, align);
1677 tmp = tmp * align + ctl->start; 1795 tmp = tmp * align + ctl->start;
1678 align_off = tmp - entry->offset; 1796 align_off = tmp - entry->offset;
1679 } else { 1797 } else {
@@ -2402,11 +2520,8 @@ static void __btrfs_remove_free_space_cache_locked(
2402 } else { 2520 } else {
2403 free_bitmap(ctl, info); 2521 free_bitmap(ctl, info);
2404 } 2522 }
2405 if (need_resched()) { 2523
2406 spin_unlock(&ctl->tree_lock); 2524 cond_resched_lock(&ctl->tree_lock);
2407 cond_resched();
2408 spin_lock(&ctl->tree_lock);
2409 }
2410 } 2525 }
2411} 2526}
2412 2527
@@ -2431,11 +2546,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2431 2546
2432 WARN_ON(cluster->block_group != block_group); 2547 WARN_ON(cluster->block_group != block_group);
2433 __btrfs_return_cluster_to_free_space(block_group, cluster); 2548 __btrfs_return_cluster_to_free_space(block_group, cluster);
2434 if (need_resched()) { 2549
2435 spin_unlock(&ctl->tree_lock); 2550 cond_resched_lock(&ctl->tree_lock);
2436 cond_resched();
2437 spin_lock(&ctl->tree_lock);
2438 }
2439 } 2551 }
2440 __btrfs_remove_free_space_cache_locked(ctl); 2552 __btrfs_remove_free_space_cache_locked(ctl);
2441 spin_unlock(&ctl->tree_lock); 2553 spin_unlock(&ctl->tree_lock);
@@ -3346,11 +3458,14 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
3346{ 3458{
3347 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3459 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
3348 int ret; 3460 int ret;
3461 struct btrfs_io_ctl io_ctl;
3349 3462
3350 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3463 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
3351 return 0; 3464 return 0;
3352 3465
3353 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 3466 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
3467 trans, path, 0) ||
3468 btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
3354 if (ret) { 3469 if (ret) {
3355 btrfs_delalloc_release_metadata(inode, inode->i_size); 3470 btrfs_delalloc_release_metadata(inode, inode->i_size);
3356#ifdef DEBUG 3471#ifdef DEBUG
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 88b2238a0aed..a16a029ad3b1 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -48,6 +48,8 @@ struct btrfs_free_space_op {
48 struct btrfs_free_space *info); 48 struct btrfs_free_space *info);
49}; 49};
50 50
51struct btrfs_io_ctl;
52
51struct inode *lookup_free_space_inode(struct btrfs_root *root, 53struct inode *lookup_free_space_inode(struct btrfs_root *root,
52 struct btrfs_block_group_cache 54 struct btrfs_block_group_cache
53 *block_group, struct btrfs_path *path); 55 *block_group, struct btrfs_path *path);
@@ -60,14 +62,19 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
60 struct btrfs_block_rsv *rsv); 62 struct btrfs_block_rsv *rsv);
61int btrfs_truncate_free_space_cache(struct btrfs_root *root, 63int btrfs_truncate_free_space_cache(struct btrfs_root *root,
62 struct btrfs_trans_handle *trans, 64 struct btrfs_trans_handle *trans,
65 struct btrfs_block_group_cache *block_group,
63 struct inode *inode); 66 struct inode *inode);
64int load_free_space_cache(struct btrfs_fs_info *fs_info, 67int load_free_space_cache(struct btrfs_fs_info *fs_info,
65 struct btrfs_block_group_cache *block_group); 68 struct btrfs_block_group_cache *block_group);
69int btrfs_wait_cache_io(struct btrfs_root *root,
70 struct btrfs_trans_handle *trans,
71 struct btrfs_block_group_cache *block_group,
72 struct btrfs_io_ctl *io_ctl,
73 struct btrfs_path *path, u64 offset);
66int btrfs_write_out_cache(struct btrfs_root *root, 74int btrfs_write_out_cache(struct btrfs_root *root,
67 struct btrfs_trans_handle *trans, 75 struct btrfs_trans_handle *trans,
68 struct btrfs_block_group_cache *block_group, 76 struct btrfs_block_group_cache *block_group,
69 struct btrfs_path *path); 77 struct btrfs_path *path);
70
71struct inode *lookup_free_ino_inode(struct btrfs_root *root, 78struct inode *lookup_free_ino_inode(struct btrfs_root *root,
72 struct btrfs_path *path); 79 struct btrfs_path *path);
73int create_free_ino_inode(struct btrfs_root *root, 80int create_free_ino_inode(struct btrfs_root *root,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 74faea3a516e..f6a596d5a637 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -456,7 +456,7 @@ again:
456 } 456 }
457 457
458 if (i_size_read(inode) > 0) { 458 if (i_size_read(inode) > 0) {
459 ret = btrfs_truncate_free_space_cache(root, trans, inode); 459 ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
460 if (ret) { 460 if (ret) {
461 if (ret != -ENOSPC) 461 if (ret != -ENOSPC)
462 btrfs_abort_transaction(trans, root, ret); 462 btrfs_abort_transaction(trans, root, ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 43192e10cc43..56f00a25c003 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -59,6 +59,7 @@
59#include "backref.h" 59#include "backref.h"
60#include "hash.h" 60#include "hash.h"
61#include "props.h" 61#include "props.h"
62#include "qgroup.h"
62 63
63struct btrfs_iget_args { 64struct btrfs_iget_args {
64 struct btrfs_key *location; 65 struct btrfs_key *location;
@@ -470,7 +471,7 @@ again:
470 */ 471 */
471 if (inode_need_compress(inode)) { 472 if (inode_need_compress(inode)) {
472 WARN_ON(pages); 473 WARN_ON(pages);
473 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 474 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
474 if (!pages) { 475 if (!pages) {
475 /* just bail out to the uncompressed code */ 476 /* just bail out to the uncompressed code */
476 goto cont; 477 goto cont;
@@ -752,7 +753,6 @@ retry:
752 } 753 }
753 goto out_free; 754 goto out_free;
754 } 755 }
755
756 /* 756 /*
757 * here we're doing allocation and writeback of the 757 * here we're doing allocation and writeback of the
758 * compressed pages 758 * compressed pages
@@ -3110,6 +3110,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
3110 if (empty) 3110 if (empty)
3111 return; 3111 return;
3112 3112
3113 down_read(&fs_info->delayed_iput_sem);
3114
3113 spin_lock(&fs_info->delayed_iput_lock); 3115 spin_lock(&fs_info->delayed_iput_lock);
3114 list_splice_init(&fs_info->delayed_iputs, &list); 3116 list_splice_init(&fs_info->delayed_iputs, &list);
3115 spin_unlock(&fs_info->delayed_iput_lock); 3117 spin_unlock(&fs_info->delayed_iput_lock);
@@ -3120,6 +3122,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
3120 iput(delayed->inode); 3122 iput(delayed->inode);
3121 kfree(delayed); 3123 kfree(delayed);
3122 } 3124 }
3125
3126 up_read(&root->fs_info->delayed_iput_sem);
3123} 3127}
3124 3128
3125/* 3129/*
@@ -4162,6 +4166,21 @@ out:
4162 return err; 4166 return err;
4163} 4167}
4164 4168
4169static int truncate_space_check(struct btrfs_trans_handle *trans,
4170 struct btrfs_root *root,
4171 u64 bytes_deleted)
4172{
4173 int ret;
4174
4175 bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
4176 ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
4177 bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
4178 if (!ret)
4179 trans->bytes_reserved += bytes_deleted;
4180 return ret;
4181
4182}
4183
4165/* 4184/*
4166 * this can truncate away extent items, csum items and directory items. 4185 * this can truncate away extent items, csum items and directory items.
4167 * It starts at a high offset and removes keys until it can't find 4186 * It starts at a high offset and removes keys until it can't find
@@ -4197,9 +4216,21 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4197 int ret; 4216 int ret;
4198 int err = 0; 4217 int err = 0;
4199 u64 ino = btrfs_ino(inode); 4218 u64 ino = btrfs_ino(inode);
4219 u64 bytes_deleted = 0;
4220 bool be_nice = 0;
4221 bool should_throttle = 0;
4222 bool should_end = 0;
4200 4223
4201 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 4224 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4202 4225
4226 /*
4227 * for non-free space inodes and ref cows, we want to back off from
4228 * time to time
4229 */
4230 if (!btrfs_is_free_space_inode(inode) &&
4231 test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4232 be_nice = 1;
4233
4203 path = btrfs_alloc_path(); 4234 path = btrfs_alloc_path();
4204 if (!path) 4235 if (!path)
4205 return -ENOMEM; 4236 return -ENOMEM;
@@ -4229,6 +4260,19 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4229 key.type = (u8)-1; 4260 key.type = (u8)-1;
4230 4261
4231search_again: 4262search_again:
4263 /*
4264 * with a 16K leaf size and 128MB extents, you can actually queue
4265 * up a huge file in a single leaf. Most of the time that
4266 * bytes_deleted is > 0, it will be huge by the time we get here
4267 */
4268 if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
4269 if (btrfs_should_end_transaction(trans, root)) {
4270 err = -EAGAIN;
4271 goto error;
4272 }
4273 }
4274
4275
4232 path->leave_spinning = 1; 4276 path->leave_spinning = 1;
4233 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 4277 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4234 if (ret < 0) { 4278 if (ret < 0) {
@@ -4371,22 +4415,39 @@ delete:
4371 } else { 4415 } else {
4372 break; 4416 break;
4373 } 4417 }
4418 should_throttle = 0;
4419
4374 if (found_extent && 4420 if (found_extent &&
4375 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || 4421 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4376 root == root->fs_info->tree_root)) { 4422 root == root->fs_info->tree_root)) {
4377 btrfs_set_path_blocking(path); 4423 btrfs_set_path_blocking(path);
4424 bytes_deleted += extent_num_bytes;
4378 ret = btrfs_free_extent(trans, root, extent_start, 4425 ret = btrfs_free_extent(trans, root, extent_start,
4379 extent_num_bytes, 0, 4426 extent_num_bytes, 0,
4380 btrfs_header_owner(leaf), 4427 btrfs_header_owner(leaf),
4381 ino, extent_offset, 0); 4428 ino, extent_offset, 0);
4382 BUG_ON(ret); 4429 BUG_ON(ret);
4430 if (btrfs_should_throttle_delayed_refs(trans, root))
4431 btrfs_async_run_delayed_refs(root,
4432 trans->delayed_ref_updates * 2, 0);
4433 if (be_nice) {
4434 if (truncate_space_check(trans, root,
4435 extent_num_bytes)) {
4436 should_end = 1;
4437 }
4438 if (btrfs_should_throttle_delayed_refs(trans,
4439 root)) {
4440 should_throttle = 1;
4441 }
4442 }
4383 } 4443 }
4384 4444
4385 if (found_type == BTRFS_INODE_ITEM_KEY) 4445 if (found_type == BTRFS_INODE_ITEM_KEY)
4386 break; 4446 break;
4387 4447
4388 if (path->slots[0] == 0 || 4448 if (path->slots[0] == 0 ||
4389 path->slots[0] != pending_del_slot) { 4449 path->slots[0] != pending_del_slot ||
4450 should_throttle || should_end) {
4390 if (pending_del_nr) { 4451 if (pending_del_nr) {
4391 ret = btrfs_del_items(trans, root, path, 4452 ret = btrfs_del_items(trans, root, path,
4392 pending_del_slot, 4453 pending_del_slot,
@@ -4399,6 +4460,23 @@ delete:
4399 pending_del_nr = 0; 4460 pending_del_nr = 0;
4400 } 4461 }
4401 btrfs_release_path(path); 4462 btrfs_release_path(path);
4463 if (should_throttle) {
4464 unsigned long updates = trans->delayed_ref_updates;
4465 if (updates) {
4466 trans->delayed_ref_updates = 0;
4467 ret = btrfs_run_delayed_refs(trans, root, updates * 2);
4468 if (ret && !err)
4469 err = ret;
4470 }
4471 }
4472 /*
4473 * if we failed to refill our space rsv, bail out
4474 * and let the transaction restart
4475 */
4476 if (should_end) {
4477 err = -EAGAIN;
4478 goto error;
4479 }
4402 goto search_again; 4480 goto search_again;
4403 } else { 4481 } else {
4404 path->slots[0]--; 4482 path->slots[0]--;
@@ -4415,7 +4493,18 @@ error:
4415 if (last_size != (u64)-1 && 4493 if (last_size != (u64)-1 &&
4416 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 4494 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4417 btrfs_ordered_update_i_size(inode, last_size, NULL); 4495 btrfs_ordered_update_i_size(inode, last_size, NULL);
4496
4418 btrfs_free_path(path); 4497 btrfs_free_path(path);
4498
4499 if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
4500 unsigned long updates = trans->delayed_ref_updates;
4501 if (updates) {
4502 trans->delayed_ref_updates = 0;
4503 ret = btrfs_run_delayed_refs(trans, root, updates * 2);
4504 if (ret && !err)
4505 err = ret;
4506 }
4507 }
4419 return err; 4508 return err;
4420} 4509}
4421 4510
@@ -4924,6 +5013,7 @@ void btrfs_evict_inode(struct inode *inode)
4924 struct btrfs_trans_handle *trans; 5013 struct btrfs_trans_handle *trans;
4925 struct btrfs_root *root = BTRFS_I(inode)->root; 5014 struct btrfs_root *root = BTRFS_I(inode)->root;
4926 struct btrfs_block_rsv *rsv, *global_rsv; 5015 struct btrfs_block_rsv *rsv, *global_rsv;
5016 int steal_from_global = 0;
4927 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 5017 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
4928 int ret; 5018 int ret;
4929 5019
@@ -4991,9 +5081,20 @@ void btrfs_evict_inode(struct inode *inode)
4991 * hard as possible to get this to work. 5081 * hard as possible to get this to work.
4992 */ 5082 */
4993 if (ret) 5083 if (ret)
4994 ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); 5084 steal_from_global++;
5085 else
5086 steal_from_global = 0;
5087 ret = 0;
4995 5088
4996 if (ret) { 5089 /*
5090 * steal_from_global == 0: we reserved stuff, hooray!
5091 * steal_from_global == 1: we didn't reserve stuff, boo!
5092 * steal_from_global == 2: we've committed, still not a lot of
5093 * room but maybe we'll have room in the global reserve this
5094 * time.
5095 * steal_from_global == 3: abandon all hope!
5096 */
5097 if (steal_from_global > 2) {
4997 btrfs_warn(root->fs_info, 5098 btrfs_warn(root->fs_info,
4998 "Could not get space for a delete, will truncate on mount %d", 5099 "Could not get space for a delete, will truncate on mount %d",
4999 ret); 5100 ret);
@@ -5009,10 +5110,40 @@ void btrfs_evict_inode(struct inode *inode)
5009 goto no_delete; 5110 goto no_delete;
5010 } 5111 }
5011 5112
5113 /*
5114 * We can't just steal from the global reserve, we need tomake
5115 * sure there is room to do it, if not we need to commit and try
5116 * again.
5117 */
5118 if (steal_from_global) {
5119 if (!btrfs_check_space_for_delayed_refs(trans, root))
5120 ret = btrfs_block_rsv_migrate(global_rsv, rsv,
5121 min_size);
5122 else
5123 ret = -ENOSPC;
5124 }
5125
5126 /*
5127 * Couldn't steal from the global reserve, we have too much
5128 * pending stuff built up, commit the transaction and try it
5129 * again.
5130 */
5131 if (ret) {
5132 ret = btrfs_commit_transaction(trans, root);
5133 if (ret) {
5134 btrfs_orphan_del(NULL, inode);
5135 btrfs_free_block_rsv(root, rsv);
5136 goto no_delete;
5137 }
5138 continue;
5139 } else {
5140 steal_from_global = 0;
5141 }
5142
5012 trans->block_rsv = rsv; 5143 trans->block_rsv = rsv;
5013 5144
5014 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); 5145 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
5015 if (ret != -ENOSPC) 5146 if (ret != -ENOSPC && ret != -EAGAIN)
5016 break; 5147 break;
5017 5148
5018 trans->block_rsv = &root->fs_info->trans_block_rsv; 5149 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -8581,7 +8712,7 @@ static int btrfs_truncate(struct inode *inode)
8581 ret = btrfs_truncate_inode_items(trans, root, inode, 8712 ret = btrfs_truncate_inode_items(trans, root, inode,
8582 inode->i_size, 8713 inode->i_size,
8583 BTRFS_EXTENT_DATA_KEY); 8714 BTRFS_EXTENT_DATA_KEY);
8584 if (ret != -ENOSPC) { 8715 if (ret != -ENOSPC && ret != -EAGAIN) {
8585 err = ret; 8716 err = ret;
8586 break; 8717 break;
8587 } 8718 }
@@ -9451,6 +9582,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9451 btrfs_end_transaction(trans, root); 9582 btrfs_end_transaction(trans, root);
9452 break; 9583 break;
9453 } 9584 }
9585
9454 btrfs_drop_extent_cache(inode, cur_offset, 9586 btrfs_drop_extent_cache(inode, cur_offset,
9455 cur_offset + ins.offset -1, 0); 9587 cur_offset + ins.offset -1, 0);
9456 9588
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 74609b931ba5..ca5d968f4c37 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -456,6 +456,13 @@ static noinline int create_subvol(struct inode *dir,
456 if (ret) 456 if (ret)
457 return ret; 457 return ret;
458 458
459 /*
460 * Don't create subvolume whose level is not zero. Or qgroup will be
461 * screwed up since it assume subvolme qgroup's level to be 0.
462 */
463 if (btrfs_qgroup_level(objectid))
464 return -ENOSPC;
465
459 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 466 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
460 /* 467 /*
461 * The same as the snapshot creation, please see the comment 468 * The same as the snapshot creation, please see the comment
@@ -1564,7 +1571,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1564 goto out_free; 1571 goto out_free;
1565 } 1572 }
1566 1573
1567 do_div(new_size, root->sectorsize); 1574 new_size = div_u64(new_size, root->sectorsize);
1568 new_size *= root->sectorsize; 1575 new_size *= root->sectorsize;
1569 1576
1570 printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n", 1577 printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
@@ -2897,6 +2904,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
2897 if (src == dst) 2904 if (src == dst)
2898 return -EINVAL; 2905 return -EINVAL;
2899 2906
2907 if (len == 0)
2908 return 0;
2909
2900 btrfs_double_lock(src, loff, dst, dst_loff, len); 2910 btrfs_double_lock(src, loff, dst, dst_loff, len);
2901 2911
2902 ret = extent_same_check_offsets(src, loff, len); 2912 ret = extent_same_check_offsets(src, loff, len);
@@ -3039,7 +3049,7 @@ out:
3039static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3049static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3040 u64 disko) 3050 u64 disko)
3041{ 3051{
3042 struct seq_list tree_mod_seq_elem = {}; 3052 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
3043 struct ulist *roots; 3053 struct ulist *roots;
3044 struct ulist_iterator uiter; 3054 struct ulist_iterator uiter;
3045 struct ulist_node *root_node = NULL; 3055 struct ulist_node *root_node = NULL;
@@ -3202,6 +3212,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
3202 key.offset = off; 3212 key.offset = off;
3203 3213
3204 while (1) { 3214 while (1) {
3215 u64 next_key_min_offset = key.offset + 1;
3216
3205 /* 3217 /*
3206 * note the key will change type as we walk through the 3218 * note the key will change type as we walk through the
3207 * tree. 3219 * tree.
@@ -3282,7 +3294,7 @@ process_slot:
3282 } else if (key.offset >= off + len) { 3294 } else if (key.offset >= off + len) {
3283 break; 3295 break;
3284 } 3296 }
3285 3297 next_key_min_offset = key.offset + datal;
3286 size = btrfs_item_size_nr(leaf, slot); 3298 size = btrfs_item_size_nr(leaf, slot);
3287 read_extent_buffer(leaf, buf, 3299 read_extent_buffer(leaf, buf,
3288 btrfs_item_ptr_offset(leaf, slot), 3300 btrfs_item_ptr_offset(leaf, slot),
@@ -3497,7 +3509,7 @@ process_slot:
3497 break; 3509 break;
3498 } 3510 }
3499 btrfs_release_path(path); 3511 btrfs_release_path(path);
3500 key.offset++; 3512 key.offset = next_key_min_offset;
3501 } 3513 }
3502 ret = 0; 3514 ret = 0;
3503 3515
@@ -3626,6 +3638,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3626 if (off + len == src->i_size) 3638 if (off + len == src->i_size)
3627 len = ALIGN(src->i_size, bs) - off; 3639 len = ALIGN(src->i_size, bs) - off;
3628 3640
3641 if (len == 0) {
3642 ret = 0;
3643 goto out_unlock;
3644 }
3645
3629 /* verify the end result is block aligned */ 3646 /* verify the end result is block aligned */
3630 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 3647 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
3631 !IS_ALIGNED(destoff, bs)) 3648 !IS_ALIGNED(destoff, bs))
@@ -4624,6 +4641,11 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
4624 sa->src, sa->dst); 4641 sa->src, sa->dst);
4625 } 4642 }
4626 4643
4644 /* update qgroup status and info */
4645 err = btrfs_run_qgroups(trans, root->fs_info);
4646 if (err < 0)
4647 btrfs_error(root->fs_info, ret,
4648 "failed to update qgroup status and info\n");
4627 err = btrfs_end_transaction(trans, root); 4649 err = btrfs_end_transaction(trans, root);
4628 if (err && !ret) 4650 if (err && !ret)
4629 ret = err; 4651 ret = err;
@@ -4669,8 +4691,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
4669 4691
4670 /* FIXME: check if the IDs really exist */ 4692 /* FIXME: check if the IDs really exist */
4671 if (sa->create) { 4693 if (sa->create) {
4672 ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, 4694 ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid);
4673 NULL);
4674 } else { 4695 } else {
4675 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); 4696 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
4676 } 4697 }
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 617553cdb7d3..a2f051347731 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -434,7 +434,7 @@ out:
434 return ret; 434 return ret;
435} 435}
436 436
437struct btrfs_compress_op btrfs_lzo_compress = { 437const struct btrfs_compress_op btrfs_lzo_compress = {
438 .alloc_workspace = lzo_alloc_workspace, 438 .alloc_workspace = lzo_alloc_workspace,
439 .free_workspace = lzo_free_workspace, 439 .free_workspace = lzo_free_workspace,
440 .compress_pages = lzo_compress_pages, 440 .compress_pages = lzo_compress_pages,
diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h
index b7816cefbd13..1b10a3cd1195 100644
--- a/fs/btrfs/math.h
+++ b/fs/btrfs/math.h
@@ -28,8 +28,7 @@ static inline u64 div_factor(u64 num, int factor)
28 if (factor == 10) 28 if (factor == 10)
29 return num; 29 return num;
30 num *= factor; 30 num *= factor;
31 do_div(num, 10); 31 return div_u64(num, 10);
32 return num;
33} 32}
34 33
35static inline u64 div_factor_fine(u64 num, int factor) 34static inline u64 div_factor_fine(u64 num, int factor)
@@ -37,8 +36,7 @@ static inline u64 div_factor_fine(u64 num, int factor)
37 if (factor == 100) 36 if (factor == 100)
38 return num; 37 return num;
39 num *= factor; 38 num *= factor;
40 do_div(num, 100); 39 return div_u64(num, 100);
41 return num;
42} 40}
43 41
44#endif 42#endif
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 129b1dd28527..dca137b04095 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -425,3 +425,5 @@ static const char *prop_compression_extract(struct inode *inode)
425 425
426 return NULL; 426 return NULL;
427} 427}
428
429
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 058c79eecbfb..3d6546581bb9 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -644,9 +644,8 @@ out:
644} 644}
645 645
646static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 646static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
647 struct btrfs_root *root, u64 qgroupid, 647 struct btrfs_root *root,
648 u64 flags, u64 max_rfer, u64 max_excl, 648 struct btrfs_qgroup *qgroup)
649 u64 rsv_rfer, u64 rsv_excl)
650{ 649{
651 struct btrfs_path *path; 650 struct btrfs_path *path;
652 struct btrfs_key key; 651 struct btrfs_key key;
@@ -657,7 +656,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
657 656
658 key.objectid = 0; 657 key.objectid = 0;
659 key.type = BTRFS_QGROUP_LIMIT_KEY; 658 key.type = BTRFS_QGROUP_LIMIT_KEY;
660 key.offset = qgroupid; 659 key.offset = qgroup->qgroupid;
661 660
662 path = btrfs_alloc_path(); 661 path = btrfs_alloc_path();
663 if (!path) 662 if (!path)
@@ -673,11 +672,11 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
673 l = path->nodes[0]; 672 l = path->nodes[0];
674 slot = path->slots[0]; 673 slot = path->slots[0];
675 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 674 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
676 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); 675 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
677 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); 676 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
678 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); 677 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
679 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); 678 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
680 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); 679 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
681 680
682 btrfs_mark_buffer_dirty(l); 681 btrfs_mark_buffer_dirty(l);
683 682
@@ -967,6 +966,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
967 fs_info->pending_quota_state = 0; 966 fs_info->pending_quota_state = 0;
968 quota_root = fs_info->quota_root; 967 quota_root = fs_info->quota_root;
969 fs_info->quota_root = NULL; 968 fs_info->quota_root = NULL;
969 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
970 spin_unlock(&fs_info->qgroup_lock); 970 spin_unlock(&fs_info->qgroup_lock);
971 971
972 btrfs_free_qgroup_config(fs_info); 972 btrfs_free_qgroup_config(fs_info);
@@ -982,7 +982,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
982 list_del(&quota_root->dirty_list); 982 list_del(&quota_root->dirty_list);
983 983
984 btrfs_tree_lock(quota_root->node); 984 btrfs_tree_lock(quota_root->node);
985 clean_tree_block(trans, tree_root, quota_root->node); 985 clean_tree_block(trans, tree_root->fs_info, quota_root->node);
986 btrfs_tree_unlock(quota_root->node); 986 btrfs_tree_unlock(quota_root->node);
987 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 987 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
988 988
@@ -1001,6 +1001,110 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1001 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1001 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1002} 1002}
1003 1003
1004/*
1005 * The easy accounting, if we are adding/removing the only ref for an extent
1006 * then this qgroup and all of the parent qgroups get their refrence and
1007 * exclusive counts adjusted.
1008 *
1009 * Caller should hold fs_info->qgroup_lock.
1010 */
1011static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1012 struct ulist *tmp, u64 ref_root,
1013 u64 num_bytes, int sign)
1014{
1015 struct btrfs_qgroup *qgroup;
1016 struct btrfs_qgroup_list *glist;
1017 struct ulist_node *unode;
1018 struct ulist_iterator uiter;
1019 int ret = 0;
1020
1021 qgroup = find_qgroup_rb(fs_info, ref_root);
1022 if (!qgroup)
1023 goto out;
1024
1025 qgroup->rfer += sign * num_bytes;
1026 qgroup->rfer_cmpr += sign * num_bytes;
1027
1028 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1029 qgroup->excl += sign * num_bytes;
1030 qgroup->excl_cmpr += sign * num_bytes;
1031 if (sign > 0)
1032 qgroup->reserved -= num_bytes;
1033
1034 qgroup_dirty(fs_info, qgroup);
1035
1036 /* Get all of the parent groups that contain this qgroup */
1037 list_for_each_entry(glist, &qgroup->groups, next_group) {
1038 ret = ulist_add(tmp, glist->group->qgroupid,
1039 ptr_to_u64(glist->group), GFP_ATOMIC);
1040 if (ret < 0)
1041 goto out;
1042 }
1043
1044 /* Iterate all of the parents and adjust their reference counts */
1045 ULIST_ITER_INIT(&uiter);
1046 while ((unode = ulist_next(tmp, &uiter))) {
1047 qgroup = u64_to_ptr(unode->aux);
1048 qgroup->rfer += sign * num_bytes;
1049 qgroup->rfer_cmpr += sign * num_bytes;
1050 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1051 qgroup->excl += sign * num_bytes;
1052 if (sign > 0)
1053 qgroup->reserved -= num_bytes;
1054 qgroup->excl_cmpr += sign * num_bytes;
1055 qgroup_dirty(fs_info, qgroup);
1056
1057 /* Add any parents of the parents */
1058 list_for_each_entry(glist, &qgroup->groups, next_group) {
1059 ret = ulist_add(tmp, glist->group->qgroupid,
1060 ptr_to_u64(glist->group), GFP_ATOMIC);
1061 if (ret < 0)
1062 goto out;
1063 }
1064 }
1065 ret = 0;
1066out:
1067 return ret;
1068}
1069
1070
1071/*
1072 * Quick path for updating qgroup with only excl refs.
1073 *
1074 * In that case, just update all parent will be enough.
1075 * Or we needs to do a full rescan.
1076 * Caller should also hold fs_info->qgroup_lock.
1077 *
1078 * Return 0 for quick update, return >0 for need to full rescan
1079 * and mark INCONSISTENT flag.
1080 * Return < 0 for other error.
1081 */
1082static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1083 struct ulist *tmp, u64 src, u64 dst,
1084 int sign)
1085{
1086 struct btrfs_qgroup *qgroup;
1087 int ret = 1;
1088 int err = 0;
1089
1090 qgroup = find_qgroup_rb(fs_info, src);
1091 if (!qgroup)
1092 goto out;
1093 if (qgroup->excl == qgroup->rfer) {
1094 ret = 0;
1095 err = __qgroup_excl_accounting(fs_info, tmp, dst,
1096 qgroup->excl, sign);
1097 if (err < 0) {
1098 ret = err;
1099 goto out;
1100 }
1101 }
1102out:
1103 if (ret)
1104 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1105 return ret;
1106}
1107
1004int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1108int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1005 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1109 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1006{ 1110{
@@ -1008,8 +1112,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1008 struct btrfs_qgroup *parent; 1112 struct btrfs_qgroup *parent;
1009 struct btrfs_qgroup *member; 1113 struct btrfs_qgroup *member;
1010 struct btrfs_qgroup_list *list; 1114 struct btrfs_qgroup_list *list;
1115 struct ulist *tmp;
1011 int ret = 0; 1116 int ret = 0;
1012 1117
1118 tmp = ulist_alloc(GFP_NOFS);
1119 if (!tmp)
1120 return -ENOMEM;
1121
1122 /* Check the level of src and dst first */
1123 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1124 return -EINVAL;
1125
1013 mutex_lock(&fs_info->qgroup_ioctl_lock); 1126 mutex_lock(&fs_info->qgroup_ioctl_lock);
1014 quota_root = fs_info->quota_root; 1127 quota_root = fs_info->quota_root;
1015 if (!quota_root) { 1128 if (!quota_root) {
@@ -1043,23 +1156,33 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1043 1156
1044 spin_lock(&fs_info->qgroup_lock); 1157 spin_lock(&fs_info->qgroup_lock);
1045 ret = add_relation_rb(quota_root->fs_info, src, dst); 1158 ret = add_relation_rb(quota_root->fs_info, src, dst);
1159 if (ret < 0) {
1160 spin_unlock(&fs_info->qgroup_lock);
1161 goto out;
1162 }
1163 ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
1046 spin_unlock(&fs_info->qgroup_lock); 1164 spin_unlock(&fs_info->qgroup_lock);
1047out: 1165out:
1048 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1166 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1167 ulist_free(tmp);
1049 return ret; 1168 return ret;
1050} 1169}
1051 1170
1052int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1171int __del_qgroup_relation(struct btrfs_trans_handle *trans,
1053 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1172 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1054{ 1173{
1055 struct btrfs_root *quota_root; 1174 struct btrfs_root *quota_root;
1056 struct btrfs_qgroup *parent; 1175 struct btrfs_qgroup *parent;
1057 struct btrfs_qgroup *member; 1176 struct btrfs_qgroup *member;
1058 struct btrfs_qgroup_list *list; 1177 struct btrfs_qgroup_list *list;
1178 struct ulist *tmp;
1059 int ret = 0; 1179 int ret = 0;
1060 int err; 1180 int err;
1061 1181
1062 mutex_lock(&fs_info->qgroup_ioctl_lock); 1182 tmp = ulist_alloc(GFP_NOFS);
1183 if (!tmp)
1184 return -ENOMEM;
1185
1063 quota_root = fs_info->quota_root; 1186 quota_root = fs_info->quota_root;
1064 if (!quota_root) { 1187 if (!quota_root) {
1065 ret = -EINVAL; 1188 ret = -EINVAL;
@@ -1088,14 +1211,27 @@ exist:
1088 1211
1089 spin_lock(&fs_info->qgroup_lock); 1212 spin_lock(&fs_info->qgroup_lock);
1090 del_relation_rb(fs_info, src, dst); 1213 del_relation_rb(fs_info, src, dst);
1214 ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
1091 spin_unlock(&fs_info->qgroup_lock); 1215 spin_unlock(&fs_info->qgroup_lock);
1092out: 1216out:
1217 ulist_free(tmp);
1218 return ret;
1219}
1220
1221int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1222 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1223{
1224 int ret = 0;
1225
1226 mutex_lock(&fs_info->qgroup_ioctl_lock);
1227 ret = __del_qgroup_relation(trans, fs_info, src, dst);
1093 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1228 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1229
1094 return ret; 1230 return ret;
1095} 1231}
1096 1232
1097int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1233int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1098 struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) 1234 struct btrfs_fs_info *fs_info, u64 qgroupid)
1099{ 1235{
1100 struct btrfs_root *quota_root; 1236 struct btrfs_root *quota_root;
1101 struct btrfs_qgroup *qgroup; 1237 struct btrfs_qgroup *qgroup;
@@ -1133,6 +1269,7 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1133{ 1269{
1134 struct btrfs_root *quota_root; 1270 struct btrfs_root *quota_root;
1135 struct btrfs_qgroup *qgroup; 1271 struct btrfs_qgroup *qgroup;
1272 struct btrfs_qgroup_list *list;
1136 int ret = 0; 1273 int ret = 0;
1137 1274
1138 mutex_lock(&fs_info->qgroup_ioctl_lock); 1275 mutex_lock(&fs_info->qgroup_ioctl_lock);
@@ -1147,15 +1284,24 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1147 ret = -ENOENT; 1284 ret = -ENOENT;
1148 goto out; 1285 goto out;
1149 } else { 1286 } else {
1150 /* check if there are no relations to this qgroup */ 1287 /* check if there are no children of this qgroup */
1151 if (!list_empty(&qgroup->groups) || 1288 if (!list_empty(&qgroup->members)) {
1152 !list_empty(&qgroup->members)) {
1153 ret = -EBUSY; 1289 ret = -EBUSY;
1154 goto out; 1290 goto out;
1155 } 1291 }
1156 } 1292 }
1157 ret = del_qgroup_item(trans, quota_root, qgroupid); 1293 ret = del_qgroup_item(trans, quota_root, qgroupid);
1158 1294
1295 while (!list_empty(&qgroup->groups)) {
1296 list = list_first_entry(&qgroup->groups,
1297 struct btrfs_qgroup_list, next_group);
1298 ret = __del_qgroup_relation(trans, fs_info,
1299 qgroupid,
1300 list->group->qgroupid);
1301 if (ret)
1302 goto out;
1303 }
1304
1159 spin_lock(&fs_info->qgroup_lock); 1305 spin_lock(&fs_info->qgroup_lock);
1160 del_qgroup_rb(quota_root->fs_info, qgroupid); 1306 del_qgroup_rb(quota_root->fs_info, qgroupid);
1161 spin_unlock(&fs_info->qgroup_lock); 1307 spin_unlock(&fs_info->qgroup_lock);
@@ -1184,23 +1330,27 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1184 ret = -ENOENT; 1330 ret = -ENOENT;
1185 goto out; 1331 goto out;
1186 } 1332 }
1187 ret = update_qgroup_limit_item(trans, quota_root, qgroupid, 1333
1188 limit->flags, limit->max_rfer, 1334 spin_lock(&fs_info->qgroup_lock);
1189 limit->max_excl, limit->rsv_rfer, 1335 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
1190 limit->rsv_excl); 1336 qgroup->max_rfer = limit->max_rfer;
1337 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
1338 qgroup->max_excl = limit->max_excl;
1339 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
1340 qgroup->rsv_rfer = limit->rsv_rfer;
1341 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
1342 qgroup->rsv_excl = limit->rsv_excl;
1343 qgroup->lim_flags |= limit->flags;
1344
1345 spin_unlock(&fs_info->qgroup_lock);
1346
1347 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
1191 if (ret) { 1348 if (ret) {
1192 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1349 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1193 btrfs_info(fs_info, "unable to update quota limit for %llu", 1350 btrfs_info(fs_info, "unable to update quota limit for %llu",
1194 qgroupid); 1351 qgroupid);
1195 } 1352 }
1196 1353
1197 spin_lock(&fs_info->qgroup_lock);
1198 qgroup->lim_flags = limit->flags;
1199 qgroup->max_rfer = limit->max_rfer;
1200 qgroup->max_excl = limit->max_excl;
1201 qgroup->rsv_rfer = limit->rsv_rfer;
1202 qgroup->rsv_excl = limit->rsv_excl;
1203 spin_unlock(&fs_info->qgroup_lock);
1204out: 1354out:
1205 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1355 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1206 return ret; 1356 return ret;
@@ -1256,14 +1406,14 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
1256 return -1; 1406 return -1;
1257 if (oper1->bytenr > oper2->bytenr) 1407 if (oper1->bytenr > oper2->bytenr)
1258 return 1; 1408 return 1;
1259 if (oper1->seq < oper2->seq)
1260 return -1;
1261 if (oper1->seq > oper2->seq)
1262 return 1;
1263 if (oper1->ref_root < oper2->ref_root) 1409 if (oper1->ref_root < oper2->ref_root)
1264 return -1; 1410 return -1;
1265 if (oper1->ref_root > oper2->ref_root) 1411 if (oper1->ref_root > oper2->ref_root)
1266 return 1; 1412 return 1;
1413 if (oper1->seq < oper2->seq)
1414 return -1;
1415 if (oper1->seq > oper2->seq)
1416 return 1;
1267 if (oper1->type < oper2->type) 1417 if (oper1->type < oper2->type)
1268 return -1; 1418 return -1;
1269 if (oper1->type > oper2->type) 1419 if (oper1->type > oper2->type)
@@ -1372,19 +1522,10 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1372 return 0; 1522 return 0;
1373} 1523}
1374 1524
1375/*
1376 * The easy accounting, if we are adding/removing the only ref for an extent
1377 * then this qgroup and all of the parent qgroups get their refrence and
1378 * exclusive counts adjusted.
1379 */
1380static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1525static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1381 struct btrfs_qgroup_operation *oper) 1526 struct btrfs_qgroup_operation *oper)
1382{ 1527{
1383 struct btrfs_qgroup *qgroup;
1384 struct ulist *tmp; 1528 struct ulist *tmp;
1385 struct btrfs_qgroup_list *glist;
1386 struct ulist_node *unode;
1387 struct ulist_iterator uiter;
1388 int sign = 0; 1529 int sign = 0;
1389 int ret = 0; 1530 int ret = 0;
1390 1531
@@ -1395,9 +1536,7 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1395 spin_lock(&fs_info->qgroup_lock); 1536 spin_lock(&fs_info->qgroup_lock);
1396 if (!fs_info->quota_root) 1537 if (!fs_info->quota_root)
1397 goto out; 1538 goto out;
1398 qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1539
1399 if (!qgroup)
1400 goto out;
1401 switch (oper->type) { 1540 switch (oper->type) {
1402 case BTRFS_QGROUP_OPER_ADD_EXCL: 1541 case BTRFS_QGROUP_OPER_ADD_EXCL:
1403 sign = 1; 1542 sign = 1;
@@ -1408,43 +1547,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1408 default: 1547 default:
1409 ASSERT(0); 1548 ASSERT(0);
1410 } 1549 }
1411 qgroup->rfer += sign * oper->num_bytes; 1550 ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
1412 qgroup->rfer_cmpr += sign * oper->num_bytes; 1551 oper->num_bytes, sign);
1413
1414 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1415 qgroup->excl += sign * oper->num_bytes;
1416 qgroup->excl_cmpr += sign * oper->num_bytes;
1417
1418 qgroup_dirty(fs_info, qgroup);
1419
1420 /* Get all of the parent groups that contain this qgroup */
1421 list_for_each_entry(glist, &qgroup->groups, next_group) {
1422 ret = ulist_add(tmp, glist->group->qgroupid,
1423 ptr_to_u64(glist->group), GFP_ATOMIC);
1424 if (ret < 0)
1425 goto out;
1426 }
1427
1428 /* Iterate all of the parents and adjust their reference counts */
1429 ULIST_ITER_INIT(&uiter);
1430 while ((unode = ulist_next(tmp, &uiter))) {
1431 qgroup = u64_to_ptr(unode->aux);
1432 qgroup->rfer += sign * oper->num_bytes;
1433 qgroup->rfer_cmpr += sign * oper->num_bytes;
1434 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1435 qgroup->excl += sign * oper->num_bytes;
1436 qgroup->excl_cmpr += sign * oper->num_bytes;
1437 qgroup_dirty(fs_info, qgroup);
1438
1439 /* Add any parents of the parents */
1440 list_for_each_entry(glist, &qgroup->groups, next_group) {
1441 ret = ulist_add(tmp, glist->group->qgroupid,
1442 ptr_to_u64(glist->group), GFP_ATOMIC);
1443 if (ret < 0)
1444 goto out;
1445 }
1446 }
1447 ret = 0;
1448out: 1552out:
1449 spin_unlock(&fs_info->qgroup_lock); 1553 spin_unlock(&fs_info->qgroup_lock);
1450 ulist_free(tmp); 1554 ulist_free(tmp);
@@ -1845,7 +1949,7 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1845 struct ulist *roots = NULL; 1949 struct ulist *roots = NULL;
1846 struct ulist *qgroups, *tmp; 1950 struct ulist *qgroups, *tmp;
1847 struct btrfs_qgroup *qgroup; 1951 struct btrfs_qgroup *qgroup;
1848 struct seq_list elem = {}; 1952 struct seq_list elem = SEQ_LIST_INIT(elem);
1849 u64 seq; 1953 u64 seq;
1850 int old_roots = 0; 1954 int old_roots = 0;
1851 int new_roots = 0; 1955 int new_roots = 0;
@@ -1967,7 +2071,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1967 int err; 2071 int err;
1968 struct btrfs_qgroup *qg; 2072 struct btrfs_qgroup *qg;
1969 u64 root_obj = 0; 2073 u64 root_obj = 0;
1970 struct seq_list elem = {}; 2074 struct seq_list elem = SEQ_LIST_INIT(elem);
1971 2075
1972 parents = ulist_alloc(GFP_NOFS); 2076 parents = ulist_alloc(GFP_NOFS);
1973 if (!parents) 2077 if (!parents)
@@ -2156,6 +2260,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
2156 if (ret) 2260 if (ret)
2157 fs_info->qgroup_flags |= 2261 fs_info->qgroup_flags |=
2158 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2262 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2263 ret = update_qgroup_limit_item(trans, quota_root, qgroup);
2264 if (ret)
2265 fs_info->qgroup_flags |=
2266 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2159 spin_lock(&fs_info->qgroup_lock); 2267 spin_lock(&fs_info->qgroup_lock);
2160 } 2268 }
2161 if (fs_info->quota_enabled) 2269 if (fs_info->quota_enabled)
@@ -2219,6 +2327,11 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2219 ret = -EINVAL; 2327 ret = -EINVAL;
2220 goto out; 2328 goto out;
2221 } 2329 }
2330
2331 if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
2332 ret = -EINVAL;
2333 goto out;
2334 }
2222 ++i_qgroups; 2335 ++i_qgroups;
2223 } 2336 }
2224 } 2337 }
@@ -2230,17 +2343,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2230 if (ret) 2343 if (ret)
2231 goto out; 2344 goto out;
2232 2345
2233 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2234 ret = update_qgroup_limit_item(trans, quota_root, objectid,
2235 inherit->lim.flags,
2236 inherit->lim.max_rfer,
2237 inherit->lim.max_excl,
2238 inherit->lim.rsv_rfer,
2239 inherit->lim.rsv_excl);
2240 if (ret)
2241 goto out;
2242 }
2243
2244 if (srcid) { 2346 if (srcid) {
2245 struct btrfs_root *srcroot; 2347 struct btrfs_root *srcroot;
2246 struct btrfs_key srckey; 2348 struct btrfs_key srckey;
@@ -2286,6 +2388,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2286 goto unlock; 2388 goto unlock;
2287 } 2389 }
2288 2390
2391 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2392 dstgroup->lim_flags = inherit->lim.flags;
2393 dstgroup->max_rfer = inherit->lim.max_rfer;
2394 dstgroup->max_excl = inherit->lim.max_excl;
2395 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
2396 dstgroup->rsv_excl = inherit->lim.rsv_excl;
2397
2398 ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
2399 if (ret) {
2400 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2401 btrfs_info(fs_info, "unable to update quota limit for %llu",
2402 dstgroup->qgroupid);
2403 goto unlock;
2404 }
2405 }
2406
2289 if (srcid) { 2407 if (srcid) {
2290 srcgroup = find_qgroup_rb(fs_info, srcid); 2408 srcgroup = find_qgroup_rb(fs_info, srcid);
2291 if (!srcgroup) 2409 if (!srcgroup)
@@ -2302,6 +2420,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2302 dstgroup->excl_cmpr = level_size; 2420 dstgroup->excl_cmpr = level_size;
2303 srcgroup->excl = level_size; 2421 srcgroup->excl = level_size;
2304 srcgroup->excl_cmpr = level_size; 2422 srcgroup->excl_cmpr = level_size;
2423
2424 /* inherit the limit info */
2425 dstgroup->lim_flags = srcgroup->lim_flags;
2426 dstgroup->max_rfer = srcgroup->max_rfer;
2427 dstgroup->max_excl = srcgroup->max_excl;
2428 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
2429 dstgroup->rsv_excl = srcgroup->rsv_excl;
2430
2305 qgroup_dirty(fs_info, dstgroup); 2431 qgroup_dirty(fs_info, dstgroup);
2306 qgroup_dirty(fs_info, srcgroup); 2432 qgroup_dirty(fs_info, srcgroup);
2307 } 2433 }
@@ -2358,12 +2484,6 @@ out:
2358 return ret; 2484 return ret;
2359} 2485}
2360 2486
2361/*
2362 * reserve some space for a qgroup and all its parents. The reservation takes
2363 * place with start_transaction or dealloc_reserve, similar to ENOSPC
2364 * accounting. If not enough space is available, EDQUOT is returned.
2365 * We assume that the requested space is new for all qgroups.
2366 */
2367int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) 2487int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2368{ 2488{
2369 struct btrfs_root *quota_root; 2489 struct btrfs_root *quota_root;
@@ -2513,7 +2633,7 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
2513 2633
2514/* 2634/*
2515 * returns < 0 on error, 0 when more leafs are to be scanned. 2635 * returns < 0 on error, 0 when more leafs are to be scanned.
2516 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. 2636 * returns 1 when done.
2517 */ 2637 */
2518static int 2638static int
2519qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2639qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
@@ -2522,7 +2642,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2522{ 2642{
2523 struct btrfs_key found; 2643 struct btrfs_key found;
2524 struct ulist *roots = NULL; 2644 struct ulist *roots = NULL;
2525 struct seq_list tree_mod_seq_elem = {}; 2645 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
2526 u64 num_bytes; 2646 u64 num_bytes;
2527 u64 seq; 2647 u64 seq;
2528 int new_roots; 2648 int new_roots;
@@ -2618,6 +2738,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2618 struct ulist *tmp = NULL, *qgroups = NULL; 2738 struct ulist *tmp = NULL, *qgroups = NULL;
2619 struct extent_buffer *scratch_leaf = NULL; 2739 struct extent_buffer *scratch_leaf = NULL;
2620 int err = -ENOMEM; 2740 int err = -ENOMEM;
2741 int ret = 0;
2621 2742
2622 path = btrfs_alloc_path(); 2743 path = btrfs_alloc_path();
2623 if (!path) 2744 if (!path)
@@ -2660,7 +2781,7 @@ out:
2660 mutex_lock(&fs_info->qgroup_rescan_lock); 2781 mutex_lock(&fs_info->qgroup_rescan_lock);
2661 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2782 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2662 2783
2663 if (err == 2 && 2784 if (err > 0 &&
2664 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2785 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2665 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2786 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2666 } else if (err < 0) { 2787 } else if (err < 0) {
@@ -2668,13 +2789,33 @@ out:
2668 } 2789 }
2669 mutex_unlock(&fs_info->qgroup_rescan_lock); 2790 mutex_unlock(&fs_info->qgroup_rescan_lock);
2670 2791
2792 /*
2793 * only update status, since the previous part has alreay updated the
2794 * qgroup info.
2795 */
2796 trans = btrfs_start_transaction(fs_info->quota_root, 1);
2797 if (IS_ERR(trans)) {
2798 err = PTR_ERR(trans);
2799 btrfs_err(fs_info,
2800 "fail to start transaction for status update: %d\n",
2801 err);
2802 goto done;
2803 }
2804 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
2805 if (ret < 0) {
2806 err = ret;
2807 btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
2808 }
2809 btrfs_end_transaction(trans, fs_info->quota_root);
2810
2671 if (err >= 0) { 2811 if (err >= 0) {
2672 btrfs_info(fs_info, "qgroup scan completed%s", 2812 btrfs_info(fs_info, "qgroup scan completed%s",
2673 err == 2 ? " (inconsistency flag cleared)" : ""); 2813 err > 0 ? " (inconsistency flag cleared)" : "");
2674 } else { 2814 } else {
2675 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2815 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2676 } 2816 }
2677 2817
2818done:
2678 complete_all(&fs_info->qgroup_rescan_completion); 2819 complete_all(&fs_info->qgroup_rescan_completion);
2679} 2820}
2680 2821
@@ -2709,7 +2850,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2709 mutex_unlock(&fs_info->qgroup_rescan_lock); 2850 mutex_unlock(&fs_info->qgroup_rescan_lock);
2710 goto err; 2851 goto err;
2711 } 2852 }
2712
2713 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2853 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2714 } 2854 }
2715 2855
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 18cc68ca3090..c5242aa9a4b2 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -70,8 +70,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
70int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 70int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
71 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 71 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
72int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 72int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
73 struct btrfs_fs_info *fs_info, u64 qgroupid, 73 struct btrfs_fs_info *fs_info, u64 qgroupid);
74 char *name);
75int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 74int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
76 struct btrfs_fs_info *fs_info, u64 qgroupid); 75 struct btrfs_fs_info *fs_info, u64 qgroupid);
77int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 76int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 5264858ed768..fa72068bd256 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -237,12 +237,8 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
237 } 237 }
238 238
239 x = cmpxchg(&info->stripe_hash_table, NULL, table); 239 x = cmpxchg(&info->stripe_hash_table, NULL, table);
240 if (x) { 240 if (x)
241 if (is_vmalloc_addr(x)) 241 kvfree(x);
242 vfree(x);
243 else
244 kfree(x);
245 }
246 return 0; 242 return 0;
247} 243}
248 244
@@ -453,10 +449,7 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
453 if (!info->stripe_hash_table) 449 if (!info->stripe_hash_table)
454 return; 450 return;
455 btrfs_clear_rbio_cache(info); 451 btrfs_clear_rbio_cache(info);
456 if (is_vmalloc_addr(info->stripe_hash_table)) 452 kvfree(info->stripe_hash_table);
457 vfree(info->stripe_hash_table);
458 else
459 kfree(info->stripe_hash_table);
460 info->stripe_hash_table = NULL; 453 info->stripe_hash_table = NULL;
461} 454}
462 455
@@ -1807,8 +1800,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1807 int err; 1800 int err;
1808 int i; 1801 int i;
1809 1802
1810 pointers = kzalloc(rbio->real_stripes * sizeof(void *), 1803 pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
1811 GFP_NOFS);
1812 if (!pointers) { 1804 if (!pointers) {
1813 err = -ENOMEM; 1805 err = -ENOMEM;
1814 goto cleanup_io; 1806 goto cleanup_io;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index d83085381bcc..74b24b01d574 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3027,7 +3027,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
3027 mutex_lock(&inode->i_mutex); 3027 mutex_lock(&inode->i_mutex);
3028 3028
3029 ret = btrfs_check_data_free_space(inode, cluster->end + 3029 ret = btrfs_check_data_free_space(inode, cluster->end +
3030 1 - cluster->start); 3030 1 - cluster->start, 0);
3031 if (ret) 3031 if (ret)
3032 goto out; 3032 goto out;
3033 3033
@@ -3430,7 +3430,9 @@ static int block_use_full_backref(struct reloc_control *rc,
3430} 3430}
3431 3431
3432static int delete_block_group_cache(struct btrfs_fs_info *fs_info, 3432static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3433 struct inode *inode, u64 ino) 3433 struct btrfs_block_group_cache *block_group,
3434 struct inode *inode,
3435 u64 ino)
3434{ 3436{
3435 struct btrfs_key key; 3437 struct btrfs_key key;
3436 struct btrfs_root *root = fs_info->tree_root; 3438 struct btrfs_root *root = fs_info->tree_root;
@@ -3463,7 +3465,7 @@ truncate:
3463 goto out; 3465 goto out;
3464 } 3466 }
3465 3467
3466 ret = btrfs_truncate_free_space_cache(root, trans, inode); 3468 ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode);
3467 3469
3468 btrfs_end_transaction(trans, root); 3470 btrfs_end_transaction(trans, root);
3469 btrfs_btree_balance_dirty(root); 3471 btrfs_btree_balance_dirty(root);
@@ -3509,6 +3511,7 @@ static int find_data_references(struct reloc_control *rc,
3509 */ 3511 */
3510 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { 3512 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3511 ret = delete_block_group_cache(rc->extent_root->fs_info, 3513 ret = delete_block_group_cache(rc->extent_root->fs_info,
3514 rc->block_group,
3512 NULL, ref_objectid); 3515 NULL, ref_objectid);
3513 if (ret != -ENOENT) 3516 if (ret != -ENOENT)
3514 return ret; 3517 return ret;
@@ -4223,7 +4226,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4223 btrfs_free_path(path); 4226 btrfs_free_path(path);
4224 4227
4225 if (!IS_ERR(inode)) 4228 if (!IS_ERR(inode))
4226 ret = delete_block_group_cache(fs_info, inode, 0); 4229 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
4227 else 4230 else
4228 ret = PTR_ERR(inode); 4231 ret = PTR_ERR(inode);
4229 4232
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ec57687c9a4d..ab5811545a98 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -964,9 +964,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
964 * the statistics. 964 * the statistics.
965 */ 965 */
966 966
967 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS * 967 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
968 sizeof(*sblocks_for_recheck), 968 sizeof(*sblocks_for_recheck), GFP_NOFS);
969 GFP_NOFS);
970 if (!sblocks_for_recheck) { 969 if (!sblocks_for_recheck) {
971 spin_lock(&sctx->stat_lock); 970 spin_lock(&sctx->stat_lock);
972 sctx->stat.malloc_errors++; 971 sctx->stat.malloc_errors++;
@@ -2319,7 +2318,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2319 unsigned long *bitmap, 2318 unsigned long *bitmap,
2320 u64 start, u64 len) 2319 u64 start, u64 len)
2321{ 2320{
2322 int offset; 2321 u32 offset;
2323 int nsectors; 2322 int nsectors;
2324 int sectorsize = sparity->sctx->dev_root->sectorsize; 2323 int sectorsize = sparity->sctx->dev_root->sectorsize;
2325 2324
@@ -2329,7 +2328,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2329 } 2328 }
2330 2329
2331 start -= sparity->logic_start; 2330 start -= sparity->logic_start;
2332 offset = (int)do_div(start, sparity->stripe_len); 2331 start = div_u64_rem(start, sparity->stripe_len, &offset);
2333 offset /= sectorsize; 2332 offset /= sectorsize;
2334 nsectors = (int)len / sectorsize; 2333 nsectors = (int)len / sectorsize;
2335 2334
@@ -2612,8 +2611,8 @@ static int get_raid56_logic_offset(u64 physical, int num,
2612 int j = 0; 2611 int j = 0;
2613 u64 stripe_nr; 2612 u64 stripe_nr;
2614 u64 last_offset; 2613 u64 last_offset;
2615 int stripe_index; 2614 u32 stripe_index;
2616 int rot; 2615 u32 rot;
2617 2616
2618 last_offset = (physical - map->stripes[num].physical) * 2617 last_offset = (physical - map->stripes[num].physical) *
2619 nr_data_stripes(map); 2618 nr_data_stripes(map);
@@ -2624,12 +2623,11 @@ static int get_raid56_logic_offset(u64 physical, int num,
2624 for (i = 0; i < nr_data_stripes(map); i++) { 2623 for (i = 0; i < nr_data_stripes(map); i++) {
2625 *offset = last_offset + i * map->stripe_len; 2624 *offset = last_offset + i * map->stripe_len;
2626 2625
2627 stripe_nr = *offset; 2626 stripe_nr = div_u64(*offset, map->stripe_len);
2628 do_div(stripe_nr, map->stripe_len); 2627 stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
2629 do_div(stripe_nr, nr_data_stripes(map));
2630 2628
2631 /* Work out the disk rotation on this stripe-set */ 2629 /* Work out the disk rotation on this stripe-set */
2632 rot = do_div(stripe_nr, map->num_stripes); 2630 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2633 /* calculate which stripe this data locates */ 2631 /* calculate which stripe this data locates */
2634 rot += i; 2632 rot += i;
2635 stripe_index = rot % map->num_stripes; 2633 stripe_index = rot % map->num_stripes;
@@ -2995,10 +2993,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2995 int extent_mirror_num; 2993 int extent_mirror_num;
2996 int stop_loop = 0; 2994 int stop_loop = 0;
2997 2995
2998 nstripes = length;
2999 physical = map->stripes[num].physical; 2996 physical = map->stripes[num].physical;
3000 offset = 0; 2997 offset = 0;
3001 do_div(nstripes, map->stripe_len); 2998 nstripes = div_u64(length, map->stripe_len);
3002 if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 2999 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3003 offset = map->stripe_len * num; 3000 offset = map->stripe_len * num;
3004 increment = map->stripe_len * map->num_stripes; 3001 increment = map->stripe_len * map->num_stripes;
@@ -3563,7 +3560,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3563 int is_dev_replace) 3560 int is_dev_replace)
3564{ 3561{
3565 int ret = 0; 3562 int ret = 0;
3566 int flags = WQ_FREEZABLE | WQ_UNBOUND; 3563 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3567 int max_active = fs_info->thread_pool_size; 3564 int max_active = fs_info->thread_pool_size;
3568 3565
3569 if (fs_info->scrub_workers_refcnt == 0) { 3566 if (fs_info->scrub_workers_refcnt == 0) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d6033f540cc7..a1216f9b4917 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3067,48 +3067,6 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
3067 return NULL; 3067 return NULL;
3068} 3068}
3069 3069
3070static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3071 u64 ino, u64 gen, u64 *ancestor_ino)
3072{
3073 int ret = 0;
3074 u64 parent_inode = 0;
3075 u64 parent_gen = 0;
3076 u64 start_ino = ino;
3077
3078 *ancestor_ino = 0;
3079 while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3080 fs_path_reset(name);
3081
3082 if (is_waiting_for_rm(sctx, ino))
3083 break;
3084 if (is_waiting_for_move(sctx, ino)) {
3085 if (*ancestor_ino == 0)
3086 *ancestor_ino = ino;
3087 ret = get_first_ref(sctx->parent_root, ino,
3088 &parent_inode, &parent_gen, name);
3089 } else {
3090 ret = __get_cur_name_and_parent(sctx, ino, gen,
3091 &parent_inode,
3092 &parent_gen, name);
3093 if (ret > 0) {
3094 ret = 0;
3095 break;
3096 }
3097 }
3098 if (ret < 0)
3099 break;
3100 if (parent_inode == start_ino) {
3101 ret = 1;
3102 if (*ancestor_ino == 0)
3103 *ancestor_ino = ino;
3104 break;
3105 }
3106 ino = parent_inode;
3107 gen = parent_gen;
3108 }
3109 return ret;
3110}
3111
3112static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3070static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3113{ 3071{
3114 struct fs_path *from_path = NULL; 3072 struct fs_path *from_path = NULL;
@@ -3120,7 +3078,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3120 struct waiting_dir_move *dm = NULL; 3078 struct waiting_dir_move *dm = NULL;
3121 u64 rmdir_ino = 0; 3079 u64 rmdir_ino = 0;
3122 int ret; 3080 int ret;
3123 u64 ancestor = 0;
3124 3081
3125 name = fs_path_alloc(); 3082 name = fs_path_alloc();
3126 from_path = fs_path_alloc(); 3083 from_path = fs_path_alloc();
@@ -3152,22 +3109,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3152 goto out; 3109 goto out;
3153 3110
3154 sctx->send_progress = sctx->cur_ino + 1; 3111 sctx->send_progress = sctx->cur_ino + 1;
3155 ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
3156 if (ret) {
3157 LIST_HEAD(deleted_refs);
3158 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3159 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3160 &pm->update_refs, &deleted_refs,
3161 pm->is_orphan);
3162 if (ret < 0)
3163 goto out;
3164 if (rmdir_ino) {
3165 dm = get_waiting_dir_move(sctx, pm->ino);
3166 ASSERT(dm);
3167 dm->rmdir_ino = rmdir_ino;
3168 }
3169 goto out;
3170 }
3171 fs_path_reset(name); 3112 fs_path_reset(name);
3172 to_path = name; 3113 to_path = name;
3173 name = NULL; 3114 name = NULL;
@@ -3610,10 +3551,27 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3610 if (ret < 0) 3551 if (ret < 0)
3611 goto out; 3552 goto out;
3612 if (ret) { 3553 if (ret) {
3554 struct name_cache_entry *nce;
3555
3613 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3556 ret = orphanize_inode(sctx, ow_inode, ow_gen,
3614 cur->full_path); 3557 cur->full_path);
3615 if (ret < 0) 3558 if (ret < 0)
3616 goto out; 3559 goto out;
3560 /*
3561 * Make sure we clear our orphanized inode's
3562 * name from the name cache. This is because the
3563 * inode ow_inode might be an ancestor of some
3564 * other inode that will be orphanized as well
3565 * later and has an inode number greater than
3566 * sctx->send_progress. We need to prevent
3567 * future name lookups from using the old name
3568 * and get instead the orphan name.
3569 */
3570 nce = name_cache_search(sctx, ow_inode, ow_gen);
3571 if (nce) {
3572 name_cache_delete(sctx, nce);
3573 kfree(nce);
3574 }
3617 } else { 3575 } else {
3618 ret = send_unlink(sctx, cur->full_path); 3576 ret = send_unlink(sctx, cur->full_path);
3619 if (ret < 0) 3577 if (ret < 0)
@@ -5852,19 +5810,20 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5852 ret = PTR_ERR(clone_root); 5810 ret = PTR_ERR(clone_root);
5853 goto out; 5811 goto out;
5854 } 5812 }
5855 clone_sources_to_rollback = i + 1;
5856 spin_lock(&clone_root->root_item_lock); 5813 spin_lock(&clone_root->root_item_lock);
5857 clone_root->send_in_progress++; 5814 if (!btrfs_root_readonly(clone_root) ||
5858 if (!btrfs_root_readonly(clone_root)) { 5815 btrfs_root_dead(clone_root)) {
5859 spin_unlock(&clone_root->root_item_lock); 5816 spin_unlock(&clone_root->root_item_lock);
5860 srcu_read_unlock(&fs_info->subvol_srcu, index); 5817 srcu_read_unlock(&fs_info->subvol_srcu, index);
5861 ret = -EPERM; 5818 ret = -EPERM;
5862 goto out; 5819 goto out;
5863 } 5820 }
5821 clone_root->send_in_progress++;
5864 spin_unlock(&clone_root->root_item_lock); 5822 spin_unlock(&clone_root->root_item_lock);
5865 srcu_read_unlock(&fs_info->subvol_srcu, index); 5823 srcu_read_unlock(&fs_info->subvol_srcu, index);
5866 5824
5867 sctx->clone_roots[i].root = clone_root; 5825 sctx->clone_roots[i].root = clone_root;
5826 clone_sources_to_rollback = i + 1;
5868 } 5827 }
5869 vfree(clone_sources_tmp); 5828 vfree(clone_sources_tmp);
5870 clone_sources_tmp = NULL; 5829 clone_sources_tmp = NULL;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 05fef198ff94..f2c9f9db3b19 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -901,6 +901,15 @@ find_root:
901 if (IS_ERR(new_root)) 901 if (IS_ERR(new_root))
902 return ERR_CAST(new_root); 902 return ERR_CAST(new_root);
903 903
904 if (!(sb->s_flags & MS_RDONLY)) {
905 int ret;
906 down_read(&fs_info->cleanup_work_sem);
907 ret = btrfs_orphan_cleanup(new_root);
908 up_read(&fs_info->cleanup_work_sem);
909 if (ret)
910 return ERR_PTR(ret);
911 }
912
904 dir_id = btrfs_root_dirid(&new_root->root_item); 913 dir_id = btrfs_root_dirid(&new_root->root_item);
905setup_root: 914setup_root:
906 location.objectid = dir_id; 915 location.objectid = dir_id;
@@ -1714,7 +1723,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1714 avail_space = device->total_bytes - device->bytes_used; 1723 avail_space = device->total_bytes - device->bytes_used;
1715 1724
1716 /* align with stripe_len */ 1725 /* align with stripe_len */
1717 do_div(avail_space, BTRFS_STRIPE_LEN); 1726 avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
1718 avail_space *= BTRFS_STRIPE_LEN; 1727 avail_space *= BTRFS_STRIPE_LEN;
1719 1728
1720 /* 1729 /*
@@ -1908,6 +1917,17 @@ static struct file_system_type btrfs_fs_type = {
1908}; 1917};
1909MODULE_ALIAS_FS("btrfs"); 1918MODULE_ALIAS_FS("btrfs");
1910 1919
1920static int btrfs_control_open(struct inode *inode, struct file *file)
1921{
1922 /*
1923 * The control file's private_data is used to hold the
1924 * transaction when it is started and is used to keep
1925 * track of whether a transaction is already in progress.
1926 */
1927 file->private_data = NULL;
1928 return 0;
1929}
1930
1911/* 1931/*
1912 * used by btrfsctl to scan devices when no FS is mounted 1932 * used by btrfsctl to scan devices when no FS is mounted
1913 */ 1933 */
@@ -2009,6 +2029,7 @@ static const struct super_operations btrfs_super_ops = {
2009}; 2029};
2010 2030
2011static const struct file_operations btrfs_ctl_fops = { 2031static const struct file_operations btrfs_ctl_fops = {
2032 .open = btrfs_control_open,
2012 .unlocked_ioctl = btrfs_control_ioctl, 2033 .unlocked_ioctl = btrfs_control_ioctl,
2013 .compat_ioctl = btrfs_control_ioctl, 2034 .compat_ioctl = btrfs_control_ioctl,
2014 .owner = THIS_MODULE, 2035 .owner = THIS_MODULE,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 94edb0a2a026..e8a4c86d274d 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -459,7 +459,7 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
459static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13]; 459static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
460static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS]; 460static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
461 461
462static u64 supported_feature_masks[3] = { 462static const u64 supported_feature_masks[3] = {
463 [FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP, 463 [FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
464 [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP, 464 [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
465 [FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP, 465 [FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP,
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index f7dd298b3cf6..3a4bbed723fd 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -61,11 +61,23 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
61 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) 61 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
62 62
63/* convert from attribute */ 63/* convert from attribute */
64#define to_btrfs_feature_attr(a) \ 64static inline struct btrfs_feature_attr *
65 container_of(a, struct btrfs_feature_attr, kobj_attr) 65to_btrfs_feature_attr(struct kobj_attribute *a)
66#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr) 66{
67#define attr_to_btrfs_feature_attr(a) \ 67 return container_of(a, struct btrfs_feature_attr, kobj_attr);
68 to_btrfs_feature_attr(attr_to_btrfs_attr(a)) 68}
69
70static inline struct kobj_attribute *attr_to_btrfs_attr(struct attribute *attr)
71{
72 return container_of(attr, struct kobj_attribute, attr);
73}
74
75static inline struct btrfs_feature_attr *
76attr_to_btrfs_feature_attr(struct attribute *attr)
77{
78 return to_btrfs_feature_attr(attr_to_btrfs_attr(attr));
79}
80
69char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags); 81char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
70extern const char * const btrfs_feature_set_names[3]; 82extern const char * const btrfs_feature_set_names[3];
71extern struct kobj_type space_info_ktype; 83extern struct kobj_type space_info_ktype;
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 73f299ebdabb..c32a7ba76bca 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -232,7 +232,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
232 init_dummy_trans(&trans); 232 init_dummy_trans(&trans);
233 233
234 test_msg("Qgroup basic add\n"); 234 test_msg("Qgroup basic add\n");
235 ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL); 235 ret = btrfs_create_qgroup(NULL, fs_info, 5);
236 if (ret) { 236 if (ret) {
237 test_msg("Couldn't create a qgroup %d\n", ret); 237 test_msg("Couldn't create a qgroup %d\n", ret);
238 return ret; 238 return ret;
@@ -301,7 +301,7 @@ static int test_multiple_refs(struct btrfs_root *root)
301 test_msg("Qgroup multiple refs test\n"); 301 test_msg("Qgroup multiple refs test\n");
302 302
303 /* We have 5 created already from the previous test */ 303 /* We have 5 created already from the previous test */
304 ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL); 304 ret = btrfs_create_qgroup(NULL, fs_info, 256);
305 if (ret) { 305 if (ret) {
306 test_msg("Couldn't create a qgroup %d\n", ret); 306 test_msg("Couldn't create a qgroup %d\n", ret);
307 return ret; 307 return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8be4278e25e8..5628e25250c0 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -35,7 +35,7 @@
35 35
36#define BTRFS_ROOT_TRANS_TAG 0 36#define BTRFS_ROOT_TRANS_TAG 0
37 37
38static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { 38static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
39 [TRANS_STATE_RUNNING] = 0U, 39 [TRANS_STATE_RUNNING] = 0U,
40 [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | 40 [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE |
41 __TRANS_START), 41 __TRANS_START),
@@ -64,6 +64,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
64 if (atomic_dec_and_test(&transaction->use_count)) { 64 if (atomic_dec_and_test(&transaction->use_count)) {
65 BUG_ON(!list_empty(&transaction->list)); 65 BUG_ON(!list_empty(&transaction->list));
66 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); 66 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
67 if (transaction->delayed_refs.pending_csums)
68 printk(KERN_ERR "pending csums is %llu\n",
69 transaction->delayed_refs.pending_csums);
67 while (!list_empty(&transaction->pending_chunks)) { 70 while (!list_empty(&transaction->pending_chunks)) {
68 struct extent_map *em; 71 struct extent_map *em;
69 72
@@ -93,11 +96,8 @@ static void clear_btree_io_tree(struct extent_io_tree *tree)
93 */ 96 */
94 ASSERT(!waitqueue_active(&state->wq)); 97 ASSERT(!waitqueue_active(&state->wq));
95 free_extent_state(state); 98 free_extent_state(state);
96 if (need_resched()) { 99
97 spin_unlock(&tree->lock); 100 cond_resched_lock(&tree->lock);
98 cond_resched();
99 spin_lock(&tree->lock);
100 }
101 } 101 }
102 spin_unlock(&tree->lock); 102 spin_unlock(&tree->lock);
103} 103}
@@ -222,10 +222,12 @@ loop:
222 atomic_set(&cur_trans->use_count, 2); 222 atomic_set(&cur_trans->use_count, 2);
223 cur_trans->have_free_bgs = 0; 223 cur_trans->have_free_bgs = 0;
224 cur_trans->start_time = get_seconds(); 224 cur_trans->start_time = get_seconds();
225 cur_trans->dirty_bg_run = 0;
225 226
226 cur_trans->delayed_refs.href_root = RB_ROOT; 227 cur_trans->delayed_refs.href_root = RB_ROOT;
227 atomic_set(&cur_trans->delayed_refs.num_entries, 0); 228 atomic_set(&cur_trans->delayed_refs.num_entries, 0);
228 cur_trans->delayed_refs.num_heads_ready = 0; 229 cur_trans->delayed_refs.num_heads_ready = 0;
230 cur_trans->delayed_refs.pending_csums = 0;
229 cur_trans->delayed_refs.num_heads = 0; 231 cur_trans->delayed_refs.num_heads = 0;
230 cur_trans->delayed_refs.flushing = 0; 232 cur_trans->delayed_refs.flushing = 0;
231 cur_trans->delayed_refs.run_delayed_start = 0; 233 cur_trans->delayed_refs.run_delayed_start = 0;
@@ -250,6 +252,9 @@ loop:
250 INIT_LIST_HEAD(&cur_trans->switch_commits); 252 INIT_LIST_HEAD(&cur_trans->switch_commits);
251 INIT_LIST_HEAD(&cur_trans->pending_ordered); 253 INIT_LIST_HEAD(&cur_trans->pending_ordered);
252 INIT_LIST_HEAD(&cur_trans->dirty_bgs); 254 INIT_LIST_HEAD(&cur_trans->dirty_bgs);
255 INIT_LIST_HEAD(&cur_trans->io_bgs);
256 mutex_init(&cur_trans->cache_write_mutex);
257 cur_trans->num_dirty_bgs = 0;
253 spin_lock_init(&cur_trans->dirty_bgs_lock); 258 spin_lock_init(&cur_trans->dirty_bgs_lock);
254 list_add_tail(&cur_trans->list, &fs_info->trans_list); 259 list_add_tail(&cur_trans->list, &fs_info->trans_list);
255 extent_io_tree_init(&cur_trans->dirty_pages, 260 extent_io_tree_init(&cur_trans->dirty_pages,
@@ -721,7 +726,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
721 updates = trans->delayed_ref_updates; 726 updates = trans->delayed_ref_updates;
722 trans->delayed_ref_updates = 0; 727 trans->delayed_ref_updates = 0;
723 if (updates) { 728 if (updates) {
724 err = btrfs_run_delayed_refs(trans, root, updates); 729 err = btrfs_run_delayed_refs(trans, root, updates * 2);
725 if (err) /* Error code will also eval true */ 730 if (err) /* Error code will also eval true */
726 return err; 731 return err;
727 } 732 }
@@ -1057,6 +1062,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1057{ 1062{
1058 struct btrfs_fs_info *fs_info = root->fs_info; 1063 struct btrfs_fs_info *fs_info = root->fs_info;
1059 struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; 1064 struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
1065 struct list_head *io_bgs = &trans->transaction->io_bgs;
1060 struct list_head *next; 1066 struct list_head *next;
1061 struct extent_buffer *eb; 1067 struct extent_buffer *eb;
1062 int ret; 1068 int ret;
@@ -1110,7 +1116,7 @@ again:
1110 return ret; 1116 return ret;
1111 } 1117 }
1112 1118
1113 while (!list_empty(dirty_bgs)) { 1119 while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
1114 ret = btrfs_write_dirty_block_groups(trans, root); 1120 ret = btrfs_write_dirty_block_groups(trans, root);
1115 if (ret) 1121 if (ret)
1116 return ret; 1122 return ret;
@@ -1810,6 +1816,37 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1810 return ret; 1816 return ret;
1811 } 1817 }
1812 1818
1819 if (!cur_trans->dirty_bg_run) {
1820 int run_it = 0;
1821
1822 /* this mutex is also taken before trying to set
1823 * block groups readonly. We need to make sure
1824 * that nobody has set a block group readonly
1825 * after a extents from that block group have been
1826 * allocated for cache files. btrfs_set_block_group_ro
1827 * will wait for the transaction to commit if it
1828 * finds dirty_bg_run = 1
1829 *
1830 * The dirty_bg_run flag is also used to make sure only
1831 * one process starts all the block group IO. It wouldn't
1832 * hurt to have more than one go through, but there's no
1833 * real advantage to it either.
1834 */
1835 mutex_lock(&root->fs_info->ro_block_group_mutex);
1836 if (!cur_trans->dirty_bg_run) {
1837 run_it = 1;
1838 cur_trans->dirty_bg_run = 1;
1839 }
1840 mutex_unlock(&root->fs_info->ro_block_group_mutex);
1841
1842 if (run_it)
1843 ret = btrfs_start_dirty_block_groups(trans, root);
1844 }
1845 if (ret) {
1846 btrfs_end_transaction(trans, root);
1847 return ret;
1848 }
1849
1813 spin_lock(&root->fs_info->trans_lock); 1850 spin_lock(&root->fs_info->trans_lock);
1814 list_splice(&trans->ordered, &cur_trans->pending_ordered); 1851 list_splice(&trans->ordered, &cur_trans->pending_ordered);
1815 if (cur_trans->state >= TRANS_STATE_COMMIT_START) { 1852 if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
@@ -2003,6 +2040,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
2003 2040
2004 assert_qgroups_uptodate(trans); 2041 assert_qgroups_uptodate(trans);
2005 ASSERT(list_empty(&cur_trans->dirty_bgs)); 2042 ASSERT(list_empty(&cur_trans->dirty_bgs));
2043 ASSERT(list_empty(&cur_trans->io_bgs));
2006 update_super_roots(root); 2044 update_super_roots(root);
2007 2045
2008 btrfs_set_super_log_root(root->fs_info->super_copy, 0); 2046 btrfs_set_super_log_root(root->fs_info->super_copy, 0);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 937050a2b68e..0b24755596ba 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -64,9 +64,19 @@ struct btrfs_transaction {
64 struct list_head pending_ordered; 64 struct list_head pending_ordered;
65 struct list_head switch_commits; 65 struct list_head switch_commits;
66 struct list_head dirty_bgs; 66 struct list_head dirty_bgs;
67 struct list_head io_bgs;
68 u64 num_dirty_bgs;
69
70 /*
71 * we need to make sure block group deletion doesn't race with
72 * free space cache writeout. This mutex keeps them from stomping
73 * on each other
74 */
75 struct mutex cache_write_mutex;
67 spinlock_t dirty_bgs_lock; 76 spinlock_t dirty_bgs_lock;
68 struct btrfs_delayed_ref_root delayed_refs; 77 struct btrfs_delayed_ref_root delayed_refs;
69 int aborted; 78 int aborted;
79 int dirty_bg_run;
70}; 80};
71 81
72#define __TRANS_FREEZABLE (1U << 0) 82#define __TRANS_FREEZABLE (1U << 0)
@@ -136,9 +146,11 @@ struct btrfs_pending_snapshot {
136static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, 146static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
137 struct inode *inode) 147 struct inode *inode)
138{ 148{
149 spin_lock(&BTRFS_I(inode)->lock);
139 BTRFS_I(inode)->last_trans = trans->transaction->transid; 150 BTRFS_I(inode)->last_trans = trans->transaction->transid;
140 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; 151 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
141 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; 152 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
153 spin_unlock(&BTRFS_I(inode)->lock);
142} 154}
143 155
144int btrfs_end_transaction(struct btrfs_trans_handle *trans, 156int btrfs_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c5b8ba37f88e..a089b5944efc 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -492,11 +492,19 @@ insert:
492 492
493 if (btrfs_inode_generation(eb, src_item) == 0) { 493 if (btrfs_inode_generation(eb, src_item) == 0) {
494 struct extent_buffer *dst_eb = path->nodes[0]; 494 struct extent_buffer *dst_eb = path->nodes[0];
495 const u64 ino_size = btrfs_inode_size(eb, src_item);
495 496
497 /*
498 * For regular files an ino_size == 0 is used only when
499 * logging that an inode exists, as part of a directory
500 * fsync, and the inode wasn't fsynced before. In this
501 * case don't set the size of the inode in the fs/subvol
502 * tree, otherwise we would be throwing valid data away.
503 */
496 if (S_ISREG(btrfs_inode_mode(eb, src_item)) && 504 if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
497 S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) { 505 S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
506 ino_size != 0) {
498 struct btrfs_map_token token; 507 struct btrfs_map_token token;
499 u64 ino_size = btrfs_inode_size(eb, src_item);
500 508
501 btrfs_init_map_token(&token); 509 btrfs_init_map_token(&token);
502 btrfs_set_token_inode_size(dst_eb, dst_item, 510 btrfs_set_token_inode_size(dst_eb, dst_item,
@@ -1951,6 +1959,104 @@ out:
1951 return ret; 1959 return ret;
1952} 1960}
1953 1961
1962static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
1963 struct btrfs_root *root,
1964 struct btrfs_root *log,
1965 struct btrfs_path *path,
1966 const u64 ino)
1967{
1968 struct btrfs_key search_key;
1969 struct btrfs_path *log_path;
1970 int i;
1971 int nritems;
1972 int ret;
1973
1974 log_path = btrfs_alloc_path();
1975 if (!log_path)
1976 return -ENOMEM;
1977
1978 search_key.objectid = ino;
1979 search_key.type = BTRFS_XATTR_ITEM_KEY;
1980 search_key.offset = 0;
1981again:
1982 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
1983 if (ret < 0)
1984 goto out;
1985process_leaf:
1986 nritems = btrfs_header_nritems(path->nodes[0]);
1987 for (i = path->slots[0]; i < nritems; i++) {
1988 struct btrfs_key key;
1989 struct btrfs_dir_item *di;
1990 struct btrfs_dir_item *log_di;
1991 u32 total_size;
1992 u32 cur;
1993
1994 btrfs_item_key_to_cpu(path->nodes[0], &key, i);
1995 if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
1996 ret = 0;
1997 goto out;
1998 }
1999
2000 di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
2001 total_size = btrfs_item_size_nr(path->nodes[0], i);
2002 cur = 0;
2003 while (cur < total_size) {
2004 u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
2005 u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
2006 u32 this_len = sizeof(*di) + name_len + data_len;
2007 char *name;
2008
2009 name = kmalloc(name_len, GFP_NOFS);
2010 if (!name) {
2011 ret = -ENOMEM;
2012 goto out;
2013 }
2014 read_extent_buffer(path->nodes[0], name,
2015 (unsigned long)(di + 1), name_len);
2016
2017 log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
2018 name, name_len, 0);
2019 btrfs_release_path(log_path);
2020 if (!log_di) {
2021 /* Doesn't exist in log tree, so delete it. */
2022 btrfs_release_path(path);
2023 di = btrfs_lookup_xattr(trans, root, path, ino,
2024 name, name_len, -1);
2025 kfree(name);
2026 if (IS_ERR(di)) {
2027 ret = PTR_ERR(di);
2028 goto out;
2029 }
2030 ASSERT(di);
2031 ret = btrfs_delete_one_dir_name(trans, root,
2032 path, di);
2033 if (ret)
2034 goto out;
2035 btrfs_release_path(path);
2036 search_key = key;
2037 goto again;
2038 }
2039 kfree(name);
2040 if (IS_ERR(log_di)) {
2041 ret = PTR_ERR(log_di);
2042 goto out;
2043 }
2044 cur += this_len;
2045 di = (struct btrfs_dir_item *)((char *)di + this_len);
2046 }
2047 }
2048 ret = btrfs_next_leaf(root, path);
2049 if (ret > 0)
2050 ret = 0;
2051 else if (ret == 0)
2052 goto process_leaf;
2053out:
2054 btrfs_free_path(log_path);
2055 btrfs_release_path(path);
2056 return ret;
2057}
2058
2059
1954/* 2060/*
1955 * deletion replay happens before we copy any new directory items 2061 * deletion replay happens before we copy any new directory items
1956 * out of the log or out of backreferences from inodes. It 2062 * out of the log or out of backreferences from inodes. It
@@ -2104,6 +2210,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2104 2210
2105 inode_item = btrfs_item_ptr(eb, i, 2211 inode_item = btrfs_item_ptr(eb, i,
2106 struct btrfs_inode_item); 2212 struct btrfs_inode_item);
2213 ret = replay_xattr_deletes(wc->trans, root, log,
2214 path, key.objectid);
2215 if (ret)
2216 break;
2107 mode = btrfs_inode_mode(eb, inode_item); 2217 mode = btrfs_inode_mode(eb, inode_item);
2108 if (S_ISDIR(mode)) { 2218 if (S_ISDIR(mode)) {
2109 ret = replay_dir_deletes(wc->trans, 2219 ret = replay_dir_deletes(wc->trans,
@@ -2230,7 +2340,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2230 if (trans) { 2340 if (trans) {
2231 btrfs_tree_lock(next); 2341 btrfs_tree_lock(next);
2232 btrfs_set_lock_blocking(next); 2342 btrfs_set_lock_blocking(next);
2233 clean_tree_block(trans, root, next); 2343 clean_tree_block(trans, root->fs_info,
2344 next);
2234 btrfs_wait_tree_block_writeback(next); 2345 btrfs_wait_tree_block_writeback(next);
2235 btrfs_tree_unlock(next); 2346 btrfs_tree_unlock(next);
2236 } 2347 }
@@ -2308,7 +2419,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
2308 if (trans) { 2419 if (trans) {
2309 btrfs_tree_lock(next); 2420 btrfs_tree_lock(next);
2310 btrfs_set_lock_blocking(next); 2421 btrfs_set_lock_blocking(next);
2311 clean_tree_block(trans, root, next); 2422 clean_tree_block(trans, root->fs_info,
2423 next);
2312 btrfs_wait_tree_block_writeback(next); 2424 btrfs_wait_tree_block_writeback(next);
2313 btrfs_tree_unlock(next); 2425 btrfs_tree_unlock(next);
2314 } 2426 }
@@ -2384,7 +2496,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2384 if (trans) { 2496 if (trans) {
2385 btrfs_tree_lock(next); 2497 btrfs_tree_lock(next);
2386 btrfs_set_lock_blocking(next); 2498 btrfs_set_lock_blocking(next);
2387 clean_tree_block(trans, log, next); 2499 clean_tree_block(trans, log->fs_info, next);
2388 btrfs_wait_tree_block_writeback(next); 2500 btrfs_wait_tree_block_writeback(next);
2389 btrfs_tree_unlock(next); 2501 btrfs_tree_unlock(next);
2390 } 2502 }
@@ -3020,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3020 struct btrfs_root *root, struct inode *inode, 3132 struct btrfs_root *root, struct inode *inode,
3021 struct btrfs_path *path, 3133 struct btrfs_path *path,
3022 struct btrfs_path *dst_path, int key_type, 3134 struct btrfs_path *dst_path, int key_type,
3135 struct btrfs_log_ctx *ctx,
3023 u64 min_offset, u64 *last_offset_ret) 3136 u64 min_offset, u64 *last_offset_ret)
3024{ 3137{
3025 struct btrfs_key min_key; 3138 struct btrfs_key min_key;
@@ -3104,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3104 src = path->nodes[0]; 3217 src = path->nodes[0];
3105 nritems = btrfs_header_nritems(src); 3218 nritems = btrfs_header_nritems(src);
3106 for (i = path->slots[0]; i < nritems; i++) { 3219 for (i = path->slots[0]; i < nritems; i++) {
3220 struct btrfs_dir_item *di;
3221
3107 btrfs_item_key_to_cpu(src, &min_key, i); 3222 btrfs_item_key_to_cpu(src, &min_key, i);
3108 3223
3109 if (min_key.objectid != ino || min_key.type != key_type) 3224 if (min_key.objectid != ino || min_key.type != key_type)
@@ -3114,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3114 err = ret; 3229 err = ret;
3115 goto done; 3230 goto done;
3116 } 3231 }
3232
3233 /*
3234 * We must make sure that when we log a directory entry,
3235 * the corresponding inode, after log replay, has a
3236 * matching link count. For example:
3237 *
3238 * touch foo
3239 * mkdir mydir
3240 * sync
3241 * ln foo mydir/bar
3242 * xfs_io -c "fsync" mydir
3243 * <crash>
3244 * <mount fs and log replay>
3245 *
3246 * Would result in a fsync log that when replayed, our
3247 * file inode would have a link count of 1, but we get
3248 * two directory entries pointing to the same inode.
3249 * After removing one of the names, it would not be
3250 * possible to remove the other name, which resulted
3251 * always in stale file handle errors, and would not
3252 * be possible to rmdir the parent directory, since
3253 * its i_size could never decrement to the value
3254 * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
3255 */
3256 di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
3257 btrfs_dir_item_key_to_cpu(src, di, &tmp);
3258 if (ctx &&
3259 (btrfs_dir_transid(src, di) == trans->transid ||
3260 btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
3261 tmp.type != BTRFS_ROOT_ITEM_KEY)
3262 ctx->log_new_dentries = true;
3117 } 3263 }
3118 path->slots[0] = nritems; 3264 path->slots[0] = nritems;
3119 3265
@@ -3175,7 +3321,8 @@ done:
3175static noinline int log_directory_changes(struct btrfs_trans_handle *trans, 3321static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
3176 struct btrfs_root *root, struct inode *inode, 3322 struct btrfs_root *root, struct inode *inode,
3177 struct btrfs_path *path, 3323 struct btrfs_path *path,
3178 struct btrfs_path *dst_path) 3324 struct btrfs_path *dst_path,
3325 struct btrfs_log_ctx *ctx)
3179{ 3326{
3180 u64 min_key; 3327 u64 min_key;
3181 u64 max_key; 3328 u64 max_key;
@@ -3187,7 +3334,7 @@ again:
3187 max_key = 0; 3334 max_key = 0;
3188 while (1) { 3335 while (1) {
3189 ret = log_dir_items(trans, root, inode, path, 3336 ret = log_dir_items(trans, root, inode, path,
3190 dst_path, key_type, min_key, 3337 dst_path, key_type, ctx, min_key,
3191 &max_key); 3338 &max_key);
3192 if (ret) 3339 if (ret)
3193 return ret; 3340 return ret;
@@ -3963,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
3963 if (ret < 0) { 4110 if (ret < 0) {
3964 return ret; 4111 return ret;
3965 } else if (ret > 0) { 4112 } else if (ret > 0) {
3966 *size_ret = i_size_read(inode); 4113 *size_ret = 0;
3967 } else { 4114 } else {
3968 struct btrfs_inode_item *item; 4115 struct btrfs_inode_item *item;
3969 4116
@@ -4070,10 +4217,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4070 if (S_ISDIR(inode->i_mode)) { 4217 if (S_ISDIR(inode->i_mode)) {
4071 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 4218 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
4072 4219
4073 if (inode_only == LOG_INODE_EXISTS) { 4220 if (inode_only == LOG_INODE_EXISTS)
4074 max_key_type = BTRFS_INODE_EXTREF_KEY; 4221 max_key_type = BTRFS_XATTR_ITEM_KEY;
4075 max_key.type = max_key_type;
4076 }
4077 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 4222 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
4078 } else { 4223 } else {
4079 if (inode_only == LOG_INODE_EXISTS) { 4224 if (inode_only == LOG_INODE_EXISTS) {
@@ -4098,7 +4243,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4098 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4243 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4099 &BTRFS_I(inode)->runtime_flags)) { 4244 &BTRFS_I(inode)->runtime_flags)) {
4100 if (inode_only == LOG_INODE_EXISTS) { 4245 if (inode_only == LOG_INODE_EXISTS) {
4101 max_key.type = BTRFS_INODE_EXTREF_KEY; 4246 max_key.type = BTRFS_XATTR_ITEM_KEY;
4102 ret = drop_objectid_items(trans, log, path, ino, 4247 ret = drop_objectid_items(trans, log, path, ino,
4103 max_key.type); 4248 max_key.type);
4104 } else { 4249 } else {
@@ -4106,20 +4251,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4106 &BTRFS_I(inode)->runtime_flags); 4251 &BTRFS_I(inode)->runtime_flags);
4107 clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4252 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4108 &BTRFS_I(inode)->runtime_flags); 4253 &BTRFS_I(inode)->runtime_flags);
4109 ret = btrfs_truncate_inode_items(trans, log, 4254 while(1) {
4110 inode, 0, 0); 4255 ret = btrfs_truncate_inode_items(trans,
4256 log, inode, 0, 0);
4257 if (ret != -EAGAIN)
4258 break;
4259 }
4111 } 4260 }
4112 } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING, 4261 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4113 &BTRFS_I(inode)->runtime_flags) || 4262 &BTRFS_I(inode)->runtime_flags) ||
4114 inode_only == LOG_INODE_EXISTS) { 4263 inode_only == LOG_INODE_EXISTS) {
4115 if (inode_only == LOG_INODE_ALL) { 4264 if (inode_only == LOG_INODE_ALL)
4116 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4117 &BTRFS_I(inode)->runtime_flags);
4118 fast_search = true; 4265 fast_search = true;
4119 max_key.type = BTRFS_XATTR_ITEM_KEY; 4266 max_key.type = BTRFS_XATTR_ITEM_KEY;
4120 } else {
4121 max_key.type = BTRFS_INODE_EXTREF_KEY;
4122 }
4123 ret = drop_objectid_items(trans, log, path, ino, 4267 ret = drop_objectid_items(trans, log, path, ino,
4124 max_key.type); 4268 max_key.type);
4125 } else { 4269 } else {
@@ -4277,15 +4421,18 @@ log_extents:
4277 } 4421 }
4278 4422
4279 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 4423 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
4280 ret = log_directory_changes(trans, root, inode, path, dst_path); 4424 ret = log_directory_changes(trans, root, inode, path, dst_path,
4425 ctx);
4281 if (ret) { 4426 if (ret) {
4282 err = ret; 4427 err = ret;
4283 goto out_unlock; 4428 goto out_unlock;
4284 } 4429 }
4285 } 4430 }
4286 4431
4432 spin_lock(&BTRFS_I(inode)->lock);
4287 BTRFS_I(inode)->logged_trans = trans->transid; 4433 BTRFS_I(inode)->logged_trans = trans->transid;
4288 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 4434 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
4435 spin_unlock(&BTRFS_I(inode)->lock);
4289out_unlock: 4436out_unlock:
4290 if (unlikely(err)) 4437 if (unlikely(err))
4291 btrfs_put_logged_extents(&logged_list); 4438 btrfs_put_logged_extents(&logged_list);
@@ -4372,6 +4519,181 @@ out:
4372 return ret; 4519 return ret;
4373} 4520}
4374 4521
4522struct btrfs_dir_list {
4523 u64 ino;
4524 struct list_head list;
4525};
4526
4527/*
4528 * Log the inodes of the new dentries of a directory. See log_dir_items() for
4529 * details about the why it is needed.
4530 * This is a recursive operation - if an existing dentry corresponds to a
4531 * directory, that directory's new entries are logged too (same behaviour as
4532 * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
4533 * the dentries point to we do not lock their i_mutex, otherwise lockdep
4534 * complains about the following circular lock dependency / possible deadlock:
4535 *
4536 * CPU0 CPU1
4537 * ---- ----
4538 * lock(&type->i_mutex_dir_key#3/2);
4539 * lock(sb_internal#2);
4540 * lock(&type->i_mutex_dir_key#3/2);
4541 * lock(&sb->s_type->i_mutex_key#14);
4542 *
4543 * Where sb_internal is the lock (a counter that works as a lock) acquired by
4544 * sb_start_intwrite() in btrfs_start_transaction().
4545 * Not locking i_mutex of the inodes is still safe because:
4546 *
4547 * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
4548 * that while logging the inode new references (names) are added or removed
4549 * from the inode, leaving the logged inode item with a link count that does
4550 * not match the number of logged inode reference items. This is fine because
4551 * at log replay time we compute the real number of links and correct the
4552 * link count in the inode item (see replay_one_buffer() and
4553 * link_to_fixup_dir());
4554 *
4555 * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
4556 * while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
4557 * BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
4558 * has a size that doesn't match the sum of the lengths of all the logged
4559 * names. This does not result in a problem because if a dir_item key is
4560 * logged but its matching dir_index key is not logged, at log replay time we
4561 * don't use it to replay the respective name (see replay_one_name()). On the
4562 * other hand if only the dir_index key ends up being logged, the respective
4563 * name is added to the fs/subvol tree with both the dir_item and dir_index
4564 * keys created (see replay_one_name()).
4565 * The directory's inode item with a wrong i_size is not a problem as well,
4566 * since we don't use it at log replay time to set the i_size in the inode
4567 * item of the fs/subvol tree (see overwrite_item()).
4568 */
4569static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
4570 struct btrfs_root *root,
4571 struct inode *start_inode,
4572 struct btrfs_log_ctx *ctx)
4573{
4574 struct btrfs_root *log = root->log_root;
4575 struct btrfs_path *path;
4576 LIST_HEAD(dir_list);
4577 struct btrfs_dir_list *dir_elem;
4578 int ret = 0;
4579
4580 path = btrfs_alloc_path();
4581 if (!path)
4582 return -ENOMEM;
4583
4584 dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
4585 if (!dir_elem) {
4586 btrfs_free_path(path);
4587 return -ENOMEM;
4588 }
4589 dir_elem->ino = btrfs_ino(start_inode);
4590 list_add_tail(&dir_elem->list, &dir_list);
4591
4592 while (!list_empty(&dir_list)) {
4593 struct extent_buffer *leaf;
4594 struct btrfs_key min_key;
4595 int nritems;
4596 int i;
4597
4598 dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
4599 list);
4600 if (ret)
4601 goto next_dir_inode;
4602
4603 min_key.objectid = dir_elem->ino;
4604 min_key.type = BTRFS_DIR_ITEM_KEY;
4605 min_key.offset = 0;
4606again:
4607 btrfs_release_path(path);
4608 ret = btrfs_search_forward(log, &min_key, path, trans->transid);
4609 if (ret < 0) {
4610 goto next_dir_inode;
4611 } else if (ret > 0) {
4612 ret = 0;
4613 goto next_dir_inode;
4614 }
4615
4616process_leaf:
4617 leaf = path->nodes[0];
4618 nritems = btrfs_header_nritems(leaf);
4619 for (i = path->slots[0]; i < nritems; i++) {
4620 struct btrfs_dir_item *di;
4621 struct btrfs_key di_key;
4622 struct inode *di_inode;
4623 struct btrfs_dir_list *new_dir_elem;
4624 int log_mode = LOG_INODE_EXISTS;
4625 int type;
4626
4627 btrfs_item_key_to_cpu(leaf, &min_key, i);
4628 if (min_key.objectid != dir_elem->ino ||
4629 min_key.type != BTRFS_DIR_ITEM_KEY)
4630 goto next_dir_inode;
4631
4632 di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
4633 type = btrfs_dir_type(leaf, di);
4634 if (btrfs_dir_transid(leaf, di) < trans->transid &&
4635 type != BTRFS_FT_DIR)
4636 continue;
4637 btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
4638 if (di_key.type == BTRFS_ROOT_ITEM_KEY)
4639 continue;
4640
4641 di_inode = btrfs_iget(root->fs_info->sb, &di_key,
4642 root, NULL);
4643 if (IS_ERR(di_inode)) {
4644 ret = PTR_ERR(di_inode);
4645 goto next_dir_inode;
4646 }
4647
4648 if (btrfs_inode_in_log(di_inode, trans->transid)) {
4649 iput(di_inode);
4650 continue;
4651 }
4652
4653 ctx->log_new_dentries = false;
4654 if (type == BTRFS_FT_DIR)
4655 log_mode = LOG_INODE_ALL;
4656 btrfs_release_path(path);
4657 ret = btrfs_log_inode(trans, root, di_inode,
4658 log_mode, 0, LLONG_MAX, ctx);
4659 iput(di_inode);
4660 if (ret)
4661 goto next_dir_inode;
4662 if (ctx->log_new_dentries) {
4663 new_dir_elem = kmalloc(sizeof(*new_dir_elem),
4664 GFP_NOFS);
4665 if (!new_dir_elem) {
4666 ret = -ENOMEM;
4667 goto next_dir_inode;
4668 }
4669 new_dir_elem->ino = di_key.objectid;
4670 list_add_tail(&new_dir_elem->list, &dir_list);
4671 }
4672 break;
4673 }
4674 if (i == nritems) {
4675 ret = btrfs_next_leaf(log, path);
4676 if (ret < 0) {
4677 goto next_dir_inode;
4678 } else if (ret > 0) {
4679 ret = 0;
4680 goto next_dir_inode;
4681 }
4682 goto process_leaf;
4683 }
4684 if (min_key.offset < (u64)-1) {
4685 min_key.offset++;
4686 goto again;
4687 }
4688next_dir_inode:
4689 list_del(&dir_elem->list);
4690 kfree(dir_elem);
4691 }
4692
4693 btrfs_free_path(path);
4694 return ret;
4695}
4696
4375/* 4697/*
4376 * helper function around btrfs_log_inode to make sure newly created 4698 * helper function around btrfs_log_inode to make sure newly created
4377 * parent directories also end up in the log. A minimal inode and backref 4699 * parent directories also end up in the log. A minimal inode and backref
@@ -4394,6 +4716,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4394 const struct dentry * const first_parent = parent; 4716 const struct dentry * const first_parent = parent;
4395 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > 4717 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
4396 last_committed); 4718 last_committed);
4719 bool log_dentries = false;
4720 struct inode *orig_inode = inode;
4397 4721
4398 sb = inode->i_sb; 4722 sb = inode->i_sb;
4399 4723
@@ -4449,6 +4773,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4449 goto end_trans; 4773 goto end_trans;
4450 } 4774 }
4451 4775
4776 if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
4777 log_dentries = true;
4778
4452 while (1) { 4779 while (1) {
4453 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4780 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
4454 break; 4781 break;
@@ -4485,7 +4812,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4485 dput(old_parent); 4812 dput(old_parent);
4486 old_parent = parent; 4813 old_parent = parent;
4487 } 4814 }
4488 ret = 0; 4815 if (log_dentries)
4816 ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
4817 else
4818 ret = 0;
4489end_trans: 4819end_trans:
4490 dput(old_parent); 4820 dput(old_parent);
4491 if (ret < 0) { 4821 if (ret < 0) {
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 154990c26dcb..6916a781ea02 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -29,6 +29,7 @@ struct btrfs_log_ctx {
29 int log_ret; 29 int log_ret;
30 int log_transid; 30 int log_transid;
31 int io_err; 31 int io_err;
32 bool log_new_dentries;
32 struct list_head list; 33 struct list_head list;
33}; 34};
34 35
@@ -37,6 +38,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
37 ctx->log_ret = 0; 38 ctx->log_ret = 0;
38 ctx->log_transid = 0; 39 ctx->log_transid = 0;
39 ctx->io_err = 0; 40 ctx->io_err = 0;
41 ctx->log_new_dentries = false;
40 INIT_LIST_HEAD(&ctx->list); 42 INIT_LIST_HEAD(&ctx->list);
41} 43}
42 44
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8222f6f74147..8bcd2a007517 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -366,8 +366,8 @@ loop_lock:
366 btrfsic_submit_bio(cur->bi_rw, cur); 366 btrfsic_submit_bio(cur->bi_rw, cur);
367 num_run++; 367 num_run++;
368 batch_run++; 368 batch_run++;
369 if (need_resched()) 369
370 cond_resched(); 370 cond_resched();
371 371
372 /* 372 /*
373 * we made progress, there is more work to do and the bdi 373 * we made progress, there is more work to do and the bdi
@@ -400,8 +400,7 @@ loop_lock:
400 * against it before looping 400 * against it before looping
401 */ 401 */
402 last_waited = ioc->last_waited; 402 last_waited = ioc->last_waited;
403 if (need_resched()) 403 cond_resched();
404 cond_resched();
405 continue; 404 continue;
406 } 405 }
407 spin_lock(&device->io_lock); 406 spin_lock(&device->io_lock);
@@ -609,8 +608,7 @@ error:
609 return ERR_PTR(-ENOMEM); 608 return ERR_PTR(-ENOMEM);
610} 609}
611 610
612void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, 611void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
613 struct btrfs_fs_devices *fs_devices, int step)
614{ 612{
615 struct btrfs_device *device, *next; 613 struct btrfs_device *device, *next;
616 struct btrfs_device *latest_dev = NULL; 614 struct btrfs_device *latest_dev = NULL;
@@ -1136,11 +1134,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
1136 path = btrfs_alloc_path(); 1134 path = btrfs_alloc_path();
1137 if (!path) 1135 if (!path)
1138 return -ENOMEM; 1136 return -ENOMEM;
1139again: 1137
1140 max_hole_start = search_start; 1138 max_hole_start = search_start;
1141 max_hole_size = 0; 1139 max_hole_size = 0;
1142 hole_size = 0;
1143 1140
1141again:
1144 if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { 1142 if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
1145 ret = -ENOSPC; 1143 ret = -ENOSPC;
1146 goto out; 1144 goto out;
@@ -1233,21 +1231,23 @@ next:
1233 * allocated dev extents, and when shrinking the device, 1231 * allocated dev extents, and when shrinking the device,
1234 * search_end may be smaller than search_start. 1232 * search_end may be smaller than search_start.
1235 */ 1233 */
1236 if (search_end > search_start) 1234 if (search_end > search_start) {
1237 hole_size = search_end - search_start; 1235 hole_size = search_end - search_start;
1238 1236
1239 if (hole_size > max_hole_size) { 1237 if (contains_pending_extent(trans, device, &search_start,
1240 max_hole_start = search_start; 1238 hole_size)) {
1241 max_hole_size = hole_size; 1239 btrfs_release_path(path);
1242 } 1240 goto again;
1241 }
1243 1242
1244 if (contains_pending_extent(trans, device, &search_start, hole_size)) { 1243 if (hole_size > max_hole_size) {
1245 btrfs_release_path(path); 1244 max_hole_start = search_start;
1246 goto again; 1245 max_hole_size = hole_size;
1246 }
1247 } 1247 }
1248 1248
1249 /* See above. */ 1249 /* See above. */
1250 if (hole_size < num_bytes) 1250 if (max_hole_size < num_bytes)
1251 ret = -ENOSPC; 1251 ret = -ENOSPC;
1252 else 1252 else
1253 ret = 0; 1253 ret = 0;
@@ -2487,8 +2487,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
2487} 2487}
2488 2488
2489static int btrfs_free_chunk(struct btrfs_trans_handle *trans, 2489static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
2490 struct btrfs_root *root, 2490 struct btrfs_root *root, u64 chunk_objectid,
2491 u64 chunk_tree, u64 chunk_objectid,
2492 u64 chunk_offset) 2491 u64 chunk_offset)
2493{ 2492{
2494 int ret; 2493 int ret;
@@ -2580,7 +2579,6 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
2580 struct map_lookup *map; 2579 struct map_lookup *map;
2581 u64 dev_extent_len = 0; 2580 u64 dev_extent_len = 0;
2582 u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; 2581 u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2583 u64 chunk_tree = root->fs_info->chunk_root->objectid;
2584 int i, ret = 0; 2582 int i, ret = 0;
2585 2583
2586 /* Just in case */ 2584 /* Just in case */
@@ -2634,8 +2632,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
2634 } 2632 }
2635 } 2633 }
2636 } 2634 }
2637 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, 2635 ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
2638 chunk_offset);
2639 if (ret) { 2636 if (ret) {
2640 btrfs_abort_transaction(trans, root, ret); 2637 btrfs_abort_transaction(trans, root, ret);
2641 goto out; 2638 goto out;
@@ -2664,8 +2661,8 @@ out:
2664} 2661}
2665 2662
2666static int btrfs_relocate_chunk(struct btrfs_root *root, 2663static int btrfs_relocate_chunk(struct btrfs_root *root,
2667 u64 chunk_tree, u64 chunk_objectid, 2664 u64 chunk_objectid,
2668 u64 chunk_offset) 2665 u64 chunk_offset)
2669{ 2666{
2670 struct btrfs_root *extent_root; 2667 struct btrfs_root *extent_root;
2671 struct btrfs_trans_handle *trans; 2668 struct btrfs_trans_handle *trans;
@@ -2707,7 +2704,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
2707 struct btrfs_chunk *chunk; 2704 struct btrfs_chunk *chunk;
2708 struct btrfs_key key; 2705 struct btrfs_key key;
2709 struct btrfs_key found_key; 2706 struct btrfs_key found_key;
2710 u64 chunk_tree = chunk_root->root_key.objectid;
2711 u64 chunk_type; 2707 u64 chunk_type;
2712 bool retried = false; 2708 bool retried = false;
2713 int failed = 0; 2709 int failed = 0;
@@ -2744,7 +2740,7 @@ again:
2744 btrfs_release_path(path); 2740 btrfs_release_path(path);
2745 2741
2746 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { 2742 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
2747 ret = btrfs_relocate_chunk(chunk_root, chunk_tree, 2743 ret = btrfs_relocate_chunk(chunk_root,
2748 found_key.objectid, 2744 found_key.objectid,
2749 found_key.offset); 2745 found_key.offset);
2750 if (ret == -ENOSPC) 2746 if (ret == -ENOSPC)
@@ -3022,7 +3018,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
3022 3018
3023 stripe_offset = btrfs_stripe_offset(leaf, stripe); 3019 stripe_offset = btrfs_stripe_offset(leaf, stripe);
3024 stripe_length = btrfs_chunk_length(leaf, chunk); 3020 stripe_length = btrfs_chunk_length(leaf, chunk);
3025 do_div(stripe_length, factor); 3021 stripe_length = div_u64(stripe_length, factor);
3026 3022
3027 if (stripe_offset < bargs->pend && 3023 if (stripe_offset < bargs->pend &&
3028 stripe_offset + stripe_length > bargs->pstart) 3024 stripe_offset + stripe_length > bargs->pstart)
@@ -3255,7 +3251,6 @@ again:
3255 } 3251 }
3256 3252
3257 ret = btrfs_relocate_chunk(chunk_root, 3253 ret = btrfs_relocate_chunk(chunk_root,
3258 chunk_root->root_key.objectid,
3259 found_key.objectid, 3254 found_key.objectid,
3260 found_key.offset); 3255 found_key.offset);
3261 if (ret && ret != -ENOSPC) 3256 if (ret && ret != -ENOSPC)
@@ -3957,7 +3952,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
3957 struct btrfs_dev_extent *dev_extent = NULL; 3952 struct btrfs_dev_extent *dev_extent = NULL;
3958 struct btrfs_path *path; 3953 struct btrfs_path *path;
3959 u64 length; 3954 u64 length;
3960 u64 chunk_tree;
3961 u64 chunk_objectid; 3955 u64 chunk_objectid;
3962 u64 chunk_offset; 3956 u64 chunk_offset;
3963 int ret; 3957 int ret;
@@ -4027,13 +4021,11 @@ again:
4027 break; 4021 break;
4028 } 4022 }
4029 4023
4030 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
4031 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 4024 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
4032 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); 4025 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4033 btrfs_release_path(path); 4026 btrfs_release_path(path);
4034 4027
4035 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, 4028 ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
4036 chunk_offset);
4037 if (ret && ret != -ENOSPC) 4029 if (ret && ret != -ENOSPC)
4038 goto done; 4030 goto done;
4039 if (ret == -ENOSPC) 4031 if (ret == -ENOSPC)
@@ -4131,7 +4123,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
4131 return 0; 4123 return 0;
4132} 4124}
4133 4125
4134static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { 4126static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
4135 [BTRFS_RAID_RAID10] = { 4127 [BTRFS_RAID_RAID10] = {
4136 .sub_stripes = 2, 4128 .sub_stripes = 2,
4137 .dev_stripes = 1, 4129 .dev_stripes = 1,
@@ -4289,7 +4281,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4289 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), 4281 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
4290 max_chunk_size); 4282 max_chunk_size);
4291 4283
4292 devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, 4284 devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
4293 GFP_NOFS); 4285 GFP_NOFS);
4294 if (!devices_info) 4286 if (!devices_info)
4295 return -ENOMEM; 4287 return -ENOMEM;
@@ -4400,8 +4392,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4400 */ 4392 */
4401 if (stripe_size * data_stripes > max_chunk_size) { 4393 if (stripe_size * data_stripes > max_chunk_size) {
4402 u64 mask = (1ULL << 24) - 1; 4394 u64 mask = (1ULL << 24) - 1;
4403 stripe_size = max_chunk_size; 4395
4404 do_div(stripe_size, data_stripes); 4396 stripe_size = div_u64(max_chunk_size, data_stripes);
4405 4397
4406 /* bump the answer up to a 16MB boundary */ 4398 /* bump the answer up to a 16MB boundary */
4407 stripe_size = (stripe_size + mask) & ~mask; 4399 stripe_size = (stripe_size + mask) & ~mask;
@@ -4413,10 +4405,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4413 stripe_size = devices_info[ndevs-1].max_avail; 4405 stripe_size = devices_info[ndevs-1].max_avail;
4414 } 4406 }
4415 4407
4416 do_div(stripe_size, dev_stripes); 4408 stripe_size = div_u64(stripe_size, dev_stripes);
4417 4409
4418 /* align to BTRFS_STRIPE_LEN */ 4410 /* align to BTRFS_STRIPE_LEN */
4419 do_div(stripe_size, raid_stripe_len); 4411 stripe_size = div_u64(stripe_size, raid_stripe_len);
4420 stripe_size *= raid_stripe_len; 4412 stripe_size *= raid_stripe_len;
4421 4413
4422 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 4414 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -4954,7 +4946,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4954 u64 stripe_nr_orig; 4946 u64 stripe_nr_orig;
4955 u64 stripe_nr_end; 4947 u64 stripe_nr_end;
4956 u64 stripe_len; 4948 u64 stripe_len;
4957 int stripe_index; 4949 u32 stripe_index;
4958 int i; 4950 int i;
4959 int ret = 0; 4951 int ret = 0;
4960 int num_stripes; 4952 int num_stripes;
@@ -4995,7 +4987,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4995 * stripe_nr counts the total number of stripes we have to stride 4987 * stripe_nr counts the total number of stripes we have to stride
4996 * to get to this block 4988 * to get to this block
4997 */ 4989 */
4998 do_div(stripe_nr, stripe_len); 4990 stripe_nr = div64_u64(stripe_nr, stripe_len);
4999 4991
5000 stripe_offset = stripe_nr * stripe_len; 4992 stripe_offset = stripe_nr * stripe_len;
5001 BUG_ON(offset < stripe_offset); 4993 BUG_ON(offset < stripe_offset);
@@ -5011,7 +5003,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5011 /* allow a write of a full stripe, but make sure we don't 5003 /* allow a write of a full stripe, but make sure we don't
5012 * allow straddling of stripes 5004 * allow straddling of stripes
5013 */ 5005 */
5014 do_div(raid56_full_stripe_start, full_stripe_len); 5006 raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
5007 full_stripe_len);
5015 raid56_full_stripe_start *= full_stripe_len; 5008 raid56_full_stripe_start *= full_stripe_len;
5016 } 5009 }
5017 5010
@@ -5136,7 +5129,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5136 stripe_index = 0; 5129 stripe_index = 0;
5137 stripe_nr_orig = stripe_nr; 5130 stripe_nr_orig = stripe_nr;
5138 stripe_nr_end = ALIGN(offset + *length, map->stripe_len); 5131 stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
5139 do_div(stripe_nr_end, map->stripe_len); 5132 stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
5140 stripe_end_offset = stripe_nr_end * map->stripe_len - 5133 stripe_end_offset = stripe_nr_end * map->stripe_len -
5141 (offset + *length); 5134 (offset + *length);
5142 5135
@@ -5144,7 +5137,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5144 if (rw & REQ_DISCARD) 5137 if (rw & REQ_DISCARD)
5145 num_stripes = min_t(u64, map->num_stripes, 5138 num_stripes = min_t(u64, map->num_stripes,
5146 stripe_nr_end - stripe_nr_orig); 5139 stripe_nr_end - stripe_nr_orig);
5147 stripe_index = do_div(stripe_nr, map->num_stripes); 5140 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5141 &stripe_index);
5148 if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))) 5142 if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)))
5149 mirror_num = 1; 5143 mirror_num = 1;
5150 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 5144 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
@@ -5170,9 +5164,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5170 } 5164 }
5171 5165
5172 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { 5166 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
5173 int factor = map->num_stripes / map->sub_stripes; 5167 u32 factor = map->num_stripes / map->sub_stripes;
5174 5168
5175 stripe_index = do_div(stripe_nr, factor); 5169 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5176 stripe_index *= map->sub_stripes; 5170 stripe_index *= map->sub_stripes;
5177 5171
5178 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) 5172 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
@@ -5198,8 +5192,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5198 ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || 5192 ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
5199 mirror_num > 1)) { 5193 mirror_num > 1)) {
5200 /* push stripe_nr back to the start of the full stripe */ 5194 /* push stripe_nr back to the start of the full stripe */
5201 stripe_nr = raid56_full_stripe_start; 5195 stripe_nr = div_u64(raid56_full_stripe_start,
5202 do_div(stripe_nr, stripe_len * nr_data_stripes(map)); 5196 stripe_len * nr_data_stripes(map));
5203 5197
5204 /* RAID[56] write or recovery. Return all stripes */ 5198 /* RAID[56] write or recovery. Return all stripes */
5205 num_stripes = map->num_stripes; 5199 num_stripes = map->num_stripes;
@@ -5209,32 +5203,32 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5209 stripe_index = 0; 5203 stripe_index = 0;
5210 stripe_offset = 0; 5204 stripe_offset = 0;
5211 } else { 5205 } else {
5212 u64 tmp;
5213
5214 /* 5206 /*
5215 * Mirror #0 or #1 means the original data block. 5207 * Mirror #0 or #1 means the original data block.
5216 * Mirror #2 is RAID5 parity block. 5208 * Mirror #2 is RAID5 parity block.
5217 * Mirror #3 is RAID6 Q block. 5209 * Mirror #3 is RAID6 Q block.
5218 */ 5210 */
5219 stripe_index = do_div(stripe_nr, nr_data_stripes(map)); 5211 stripe_nr = div_u64_rem(stripe_nr,
5212 nr_data_stripes(map), &stripe_index);
5220 if (mirror_num > 1) 5213 if (mirror_num > 1)
5221 stripe_index = nr_data_stripes(map) + 5214 stripe_index = nr_data_stripes(map) +
5222 mirror_num - 2; 5215 mirror_num - 2;
5223 5216
5224 /* We distribute the parity blocks across stripes */ 5217 /* We distribute the parity blocks across stripes */
5225 tmp = stripe_nr + stripe_index; 5218 div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
5226 stripe_index = do_div(tmp, map->num_stripes); 5219 &stripe_index);
5227 if (!(rw & (REQ_WRITE | REQ_DISCARD | 5220 if (!(rw & (REQ_WRITE | REQ_DISCARD |
5228 REQ_GET_READ_MIRRORS)) && mirror_num <= 1) 5221 REQ_GET_READ_MIRRORS)) && mirror_num <= 1)
5229 mirror_num = 1; 5222 mirror_num = 1;
5230 } 5223 }
5231 } else { 5224 } else {
5232 /* 5225 /*
5233 * after this do_div call, stripe_nr is the number of stripes 5226 * after this, stripe_nr is the number of stripes on this
5234 * on this device we have to walk to find the data, and 5227 * device we have to walk to find the data, and stripe_index is
5235 * stripe_index is the number of our device in the stripe array 5228 * the number of our device in the stripe array
5236 */ 5229 */
5237 stripe_index = do_div(stripe_nr, map->num_stripes); 5230 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5231 &stripe_index);
5238 mirror_num = stripe_index + 1; 5232 mirror_num = stripe_index + 1;
5239 } 5233 }
5240 BUG_ON(stripe_index >= map->num_stripes); 5234 BUG_ON(stripe_index >= map->num_stripes);
@@ -5261,7 +5255,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5261 need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || 5255 need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
5262 mirror_num > 1)) { 5256 mirror_num > 1)) {
5263 u64 tmp; 5257 u64 tmp;
5264 int i, rot; 5258 unsigned rot;
5265 5259
5266 bbio->raid_map = (u64 *)((void *)bbio->stripes + 5260 bbio->raid_map = (u64 *)((void *)bbio->stripes +
5267 sizeof(struct btrfs_bio_stripe) * 5261 sizeof(struct btrfs_bio_stripe) *
@@ -5269,8 +5263,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5269 sizeof(int) * tgtdev_indexes); 5263 sizeof(int) * tgtdev_indexes);
5270 5264
5271 /* Work out the disk rotation on this stripe-set */ 5265 /* Work out the disk rotation on this stripe-set */
5272 tmp = stripe_nr; 5266 div_u64_rem(stripe_nr, num_stripes, &rot);
5273 rot = do_div(tmp, num_stripes);
5274 5267
5275 /* Fill in the logical address of each stripe */ 5268 /* Fill in the logical address of each stripe */
5276 tmp = stripe_nr * nr_data_stripes(map); 5269 tmp = stripe_nr * nr_data_stripes(map);
@@ -5285,8 +5278,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5285 } 5278 }
5286 5279
5287 if (rw & REQ_DISCARD) { 5280 if (rw & REQ_DISCARD) {
5288 int factor = 0; 5281 u32 factor = 0;
5289 int sub_stripes = 0; 5282 u32 sub_stripes = 0;
5290 u64 stripes_per_dev = 0; 5283 u64 stripes_per_dev = 0;
5291 u32 remaining_stripes = 0; 5284 u32 remaining_stripes = 0;
5292 u32 last_stripe = 0; 5285 u32 last_stripe = 0;
@@ -5437,9 +5430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
5437 } 5430 }
5438 } 5431 }
5439 if (found) { 5432 if (found) {
5440 u64 length = map->stripe_len; 5433 if (physical_of_found + map->stripe_len <=
5441
5442 if (physical_of_found + length <=
5443 dev_replace->cursor_left) { 5434 dev_replace->cursor_left) {
5444 struct btrfs_bio_stripe *tgtdev_stripe = 5435 struct btrfs_bio_stripe *tgtdev_stripe =
5445 bbio->stripes + num_stripes; 5436 bbio->stripes + num_stripes;
@@ -5535,15 +5526,15 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5535 rmap_len = map->stripe_len; 5526 rmap_len = map->stripe_len;
5536 5527
5537 if (map->type & BTRFS_BLOCK_GROUP_RAID10) 5528 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5538 do_div(length, map->num_stripes / map->sub_stripes); 5529 length = div_u64(length, map->num_stripes / map->sub_stripes);
5539 else if (map->type & BTRFS_BLOCK_GROUP_RAID0) 5530 else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5540 do_div(length, map->num_stripes); 5531 length = div_u64(length, map->num_stripes);
5541 else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { 5532 else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5542 do_div(length, nr_data_stripes(map)); 5533 length = div_u64(length, nr_data_stripes(map));
5543 rmap_len = map->stripe_len * nr_data_stripes(map); 5534 rmap_len = map->stripe_len * nr_data_stripes(map);
5544 } 5535 }
5545 5536
5546 buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); 5537 buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
5547 BUG_ON(!buf); /* -ENOMEM */ 5538 BUG_ON(!buf); /* -ENOMEM */
5548 5539
5549 for (i = 0; i < map->num_stripes; i++) { 5540 for (i = 0; i < map->num_stripes; i++) {
@@ -5554,11 +5545,11 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5554 continue; 5545 continue;
5555 5546
5556 stripe_nr = physical - map->stripes[i].physical; 5547 stripe_nr = physical - map->stripes[i].physical;
5557 do_div(stripe_nr, map->stripe_len); 5548 stripe_nr = div_u64(stripe_nr, map->stripe_len);
5558 5549
5559 if (map->type & BTRFS_BLOCK_GROUP_RAID10) { 5550 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
5560 stripe_nr = stripe_nr * map->num_stripes + i; 5551 stripe_nr = stripe_nr * map->num_stripes + i;
5561 do_div(stripe_nr, map->sub_stripes); 5552 stripe_nr = div_u64(stripe_nr, map->sub_stripes);
5562 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 5553 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
5563 stripe_nr = stripe_nr * map->num_stripes + i; 5554 stripe_nr = stripe_nr * map->num_stripes + i;
5564 } /* else if RAID[56], multiply by nr_data_stripes(). 5555 } /* else if RAID[56], multiply by nr_data_stripes().
@@ -5835,8 +5826,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5835 u64 length = 0; 5826 u64 length = 0;
5836 u64 map_length; 5827 u64 map_length;
5837 int ret; 5828 int ret;
5838 int dev_nr = 0; 5829 int dev_nr;
5839 int total_devs = 1; 5830 int total_devs;
5840 struct btrfs_bio *bbio = NULL; 5831 struct btrfs_bio *bbio = NULL;
5841 5832
5842 length = bio->bi_iter.bi_size; 5833 length = bio->bi_iter.bi_size;
@@ -5877,11 +5868,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5877 BUG(); 5868 BUG();
5878 } 5869 }
5879 5870
5880 while (dev_nr < total_devs) { 5871 for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
5881 dev = bbio->stripes[dev_nr].dev; 5872 dev = bbio->stripes[dev_nr].dev;
5882 if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) { 5873 if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
5883 bbio_error(bbio, first_bio, logical); 5874 bbio_error(bbio, first_bio, logical);
5884 dev_nr++;
5885 continue; 5875 continue;
5886 } 5876 }
5887 5877
@@ -5894,7 +5884,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5894 ret = breakup_stripe_bio(root, bbio, first_bio, dev, 5884 ret = breakup_stripe_bio(root, bbio, first_bio, dev,
5895 dev_nr, rw, async_submit); 5885 dev_nr, rw, async_submit);
5896 BUG_ON(ret); 5886 BUG_ON(ret);
5897 dev_nr++;
5898 continue; 5887 continue;
5899 } 5888 }
5900 5889
@@ -5909,7 +5898,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5909 submit_stripe_bio(root, bbio, bio, 5898 submit_stripe_bio(root, bbio, bio,
5910 bbio->stripes[dev_nr].physical, dev_nr, rw, 5899 bbio->stripes[dev_nr].physical, dev_nr, rw,
5911 async_submit); 5900 async_submit);
5912 dev_nr++;
5913 } 5901 }
5914 btrfs_bio_counter_dec(root->fs_info); 5902 btrfs_bio_counter_dec(root->fs_info);
5915 return 0; 5903 return 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 83069dec6898..ebc31331a837 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -421,8 +421,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
421int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 421int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
422 struct btrfs_fs_devices **fs_devices_ret); 422 struct btrfs_fs_devices **fs_devices_ret);
423int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); 423int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
424void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, 424void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
425 struct btrfs_fs_devices *fs_devices, int step);
426int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, 425int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
427 char *device_path, 426 char *device_path,
428 struct btrfs_device **device); 427 struct btrfs_device **device);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 883b93623bc5..45ea704be030 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -364,22 +364,42 @@ const struct xattr_handler *btrfs_xattr_handlers[] = {
364/* 364/*
365 * Check if the attribute is in a supported namespace. 365 * Check if the attribute is in a supported namespace.
366 * 366 *
367 * This applied after the check for the synthetic attributes in the system 367 * This is applied after the check for the synthetic attributes in the system
368 * namespace. 368 * namespace.
369 */ 369 */
370static bool btrfs_is_valid_xattr(const char *name) 370static int btrfs_is_valid_xattr(const char *name)
371{ 371{
372 return !strncmp(name, XATTR_SECURITY_PREFIX, 372 int len = strlen(name);
373 XATTR_SECURITY_PREFIX_LEN) || 373 int prefixlen = 0;
374 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || 374
375 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 375 if (!strncmp(name, XATTR_SECURITY_PREFIX,
376 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) || 376 XATTR_SECURITY_PREFIX_LEN))
377 !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN); 377 prefixlen = XATTR_SECURITY_PREFIX_LEN;
378 else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
379 prefixlen = XATTR_SYSTEM_PREFIX_LEN;
380 else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
381 prefixlen = XATTR_TRUSTED_PREFIX_LEN;
382 else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
383 prefixlen = XATTR_USER_PREFIX_LEN;
384 else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
385 prefixlen = XATTR_BTRFS_PREFIX_LEN;
386 else
387 return -EOPNOTSUPP;
388
389 /*
390 * The name cannot consist of just prefix
391 */
392 if (len <= prefixlen)
393 return -EINVAL;
394
395 return 0;
378} 396}
379 397
380ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 398ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
381 void *buffer, size_t size) 399 void *buffer, size_t size)
382{ 400{
401 int ret;
402
383 /* 403 /*
384 * If this is a request for a synthetic attribute in the system.* 404 * If this is a request for a synthetic attribute in the system.*
385 * namespace use the generic infrastructure to resolve a handler 405 * namespace use the generic infrastructure to resolve a handler
@@ -388,8 +408,9 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
388 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 408 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
389 return generic_getxattr(dentry, name, buffer, size); 409 return generic_getxattr(dentry, name, buffer, size);
390 410
391 if (!btrfs_is_valid_xattr(name)) 411 ret = btrfs_is_valid_xattr(name);
392 return -EOPNOTSUPP; 412 if (ret)
413 return ret;
393 return __btrfs_getxattr(dentry->d_inode, name, buffer, size); 414 return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
394} 415}
395 416
@@ -397,6 +418,7 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
397 size_t size, int flags) 418 size_t size, int flags)
398{ 419{
399 struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; 420 struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
421 int ret;
400 422
401 /* 423 /*
402 * The permission on security.* and system.* is not checked 424 * The permission on security.* and system.* is not checked
@@ -413,8 +435,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
413 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 435 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
414 return generic_setxattr(dentry, name, value, size, flags); 436 return generic_setxattr(dentry, name, value, size, flags);
415 437
416 if (!btrfs_is_valid_xattr(name)) 438 ret = btrfs_is_valid_xattr(name);
417 return -EOPNOTSUPP; 439 if (ret)
440 return ret;
418 441
419 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) 442 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
420 return btrfs_set_prop(dentry->d_inode, name, 443 return btrfs_set_prop(dentry->d_inode, name,
@@ -430,6 +453,7 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
430int btrfs_removexattr(struct dentry *dentry, const char *name) 453int btrfs_removexattr(struct dentry *dentry, const char *name)
431{ 454{
432 struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; 455 struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
456 int ret;
433 457
434 /* 458 /*
435 * The permission on security.* and system.* is not checked 459 * The permission on security.* and system.* is not checked
@@ -446,8 +470,9 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
446 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 470 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
447 return generic_removexattr(dentry, name); 471 return generic_removexattr(dentry, name);
448 472
449 if (!btrfs_is_valid_xattr(name)) 473 ret = btrfs_is_valid_xattr(name);
450 return -EOPNOTSUPP; 474 if (ret)
475 return ret;
451 476
452 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) 477 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
453 return btrfs_set_prop(dentry->d_inode, name, 478 return btrfs_set_prop(dentry->d_inode, name,
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index fb22fd8d8fb8..82990b8f872b 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -403,7 +403,7 @@ next:
403 return ret; 403 return ret;
404} 404}
405 405
406struct btrfs_compress_op btrfs_zlib_compress = { 406const struct btrfs_compress_op btrfs_zlib_compress = {
407 .alloc_workspace = zlib_alloc_workspace, 407 .alloc_workspace = zlib_alloc_workspace,
408 .free_workspace = zlib_free_workspace, 408 .free_workspace = zlib_free_workspace,
409 .compress_pages = zlib_compress_pages, 409 .compress_pages = zlib_compress_pages,