aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-11 17:16:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-11 17:16:53 -0400
commit3123bca71993c2346a458875488863772c1d5dc4 (patch)
treea1e082130a3d7a4ba1faaea60e699939cf821ab6 /fs/btrfs
parent582076ab16779208e7eb6ce712a9c0a6cc5bafe4 (diff)
parente4fbaee29272533a242f117d18712e2974520d2c (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull second set of btrfs updates from Chris Mason: "The most important changes here are from Josef, fixing a btrfs regression in 3.14 that can cause corruptions in the extent allocation tree when snapshots are in use. Josef also fixed some deadlocks in send/recv and other assorted races when balance is running" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (23 commits) Btrfs: fix compile warnings on on avr32 platform btrfs: allow mounting btrfs subvolumes with different ro/rw options btrfs: export global block reserve size as space_info btrfs: fix crash in remount(thread_pool=) case Btrfs: abort the transaction when we don't find our extent ref Btrfs: fix EINVAL checks in btrfs_clone Btrfs: fix unlock in __start_delalloc_inodes() Btrfs: scrub raid56 stripes in the right way Btrfs: don't compress for a small write Btrfs: more efficient io tree navigation on wait_extent_bit Btrfs: send, build path string only once in send_hole btrfs: filter invalid arg for btrfs resize Btrfs: send, fix data corruption due to incorrect hole detection Btrfs: kmalloc() doesn't return an ERR_PTR Btrfs: fix snapshot vs nocow writting btrfs: Change the expanding write sequence to fix snapshot related bug. btrfs: make device scan less noisy btrfs: fix lockdep warning with reclaim lock inversion Btrfs: hold the commit_root_sem when getting the commit root during send Btrfs: remove transaction from send ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/backref.c33
-rw-r--r--fs/btrfs/ctree.c94
-rw-r--r--fs/btrfs/ctree.h13
-rw-r--r--fs/btrfs/disk-io.c23
-rw-r--r--fs/btrfs/extent-tree.c35
-rw-r--r--fs/btrfs/extent_io.c8
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/btrfs/inode-map.c14
-rw-r--r--fs/btrfs/inode.c36
-rw-r--r--fs/btrfs/ioctl.c35
-rw-r--r--fs/btrfs/relocation.c21
-rw-r--r--fs/btrfs/scrub.c108
-rw-r--r--fs/btrfs/send.c117
-rw-r--r--fs/btrfs/super.c22
-rw-r--r--fs/btrfs/transaction.c48
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/volumes.c35
19 files changed, 374 insertions, 279 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index ecb5832c0967..5a201d81049c 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -323,6 +323,8 @@ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq)
323 323
324void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) 324void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max)
325{ 325{
326 if (!wq)
327 return;
326 wq->normal->max_active = max; 328 wq->normal->max_active = max;
327 if (wq->high) 329 if (wq->high)
328 wq->high->max_active = max; 330 wq->high->max_active = max;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index aad7201ad11b..10db21fa0926 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -330,7 +330,10 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
330 goto out; 330 goto out;
331 } 331 }
332 332
333 root_level = btrfs_old_root_level(root, time_seq); 333 if (path->search_commit_root)
334 root_level = btrfs_header_level(root->commit_root);
335 else
336 root_level = btrfs_old_root_level(root, time_seq);
334 337
335 if (root_level + 1 == level) { 338 if (root_level + 1 == level) {
336 srcu_read_unlock(&fs_info->subvol_srcu, index); 339 srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -1099,9 +1102,9 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
1099 * 1102 *
1100 * returns 0 on success, < 0 on error. 1103 * returns 0 on success, < 0 on error.
1101 */ 1104 */
1102int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 1105static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1103 struct btrfs_fs_info *fs_info, u64 bytenr, 1106 struct btrfs_fs_info *fs_info, u64 bytenr,
1104 u64 time_seq, struct ulist **roots) 1107 u64 time_seq, struct ulist **roots)
1105{ 1108{
1106 struct ulist *tmp; 1109 struct ulist *tmp;
1107 struct ulist_node *node = NULL; 1110 struct ulist_node *node = NULL;
@@ -1137,6 +1140,20 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1137 return 0; 1140 return 0;
1138} 1141}
1139 1142
1143int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1144 struct btrfs_fs_info *fs_info, u64 bytenr,
1145 u64 time_seq, struct ulist **roots)
1146{
1147 int ret;
1148
1149 if (!trans)
1150 down_read(&fs_info->commit_root_sem);
1151 ret = __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots);
1152 if (!trans)
1153 up_read(&fs_info->commit_root_sem);
1154 return ret;
1155}
1156
1140/* 1157/*
1141 * this makes the path point to (inum INODE_ITEM ioff) 1158 * this makes the path point to (inum INODE_ITEM ioff)
1142 */ 1159 */
@@ -1516,6 +1533,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1516 if (IS_ERR(trans)) 1533 if (IS_ERR(trans))
1517 return PTR_ERR(trans); 1534 return PTR_ERR(trans);
1518 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1535 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1536 } else {
1537 down_read(&fs_info->commit_root_sem);
1519 } 1538 }
1520 1539
1521 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 1540 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
@@ -1526,8 +1545,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1526 1545
1527 ULIST_ITER_INIT(&ref_uiter); 1546 ULIST_ITER_INIT(&ref_uiter);
1528 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { 1547 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
1529 ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, 1548 ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val,
1530 tree_mod_seq_elem.seq, &roots); 1549 tree_mod_seq_elem.seq, &roots);
1531 if (ret) 1550 if (ret)
1532 break; 1551 break;
1533 ULIST_ITER_INIT(&root_uiter); 1552 ULIST_ITER_INIT(&root_uiter);
@@ -1549,6 +1568,8 @@ out:
1549 if (!search_commit_root) { 1568 if (!search_commit_root) {
1550 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1569 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1551 btrfs_end_transaction(trans, fs_info->extent_root); 1570 btrfs_end_transaction(trans, fs_info->extent_root);
1571 } else {
1572 up_read(&fs_info->commit_root_sem);
1552 } 1573 }
1553 1574
1554 return ret; 1575 return ret;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 88d1b1eedc9c..1bcfcdb23cf4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2769,9 +2769,13 @@ again:
2769 * the commit roots are read only 2769 * the commit roots are read only
2770 * so we always do read locks 2770 * so we always do read locks
2771 */ 2771 */
2772 if (p->need_commit_sem)
2773 down_read(&root->fs_info->commit_root_sem);
2772 b = root->commit_root; 2774 b = root->commit_root;
2773 extent_buffer_get(b); 2775 extent_buffer_get(b);
2774 level = btrfs_header_level(b); 2776 level = btrfs_header_level(b);
2777 if (p->need_commit_sem)
2778 up_read(&root->fs_info->commit_root_sem);
2775 if (!p->skip_locking) 2779 if (!p->skip_locking)
2776 btrfs_tree_read_lock(b); 2780 btrfs_tree_read_lock(b);
2777 } else { 2781 } else {
@@ -5360,7 +5364,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5360{ 5364{
5361 int ret; 5365 int ret;
5362 int cmp; 5366 int cmp;
5363 struct btrfs_trans_handle *trans = NULL;
5364 struct btrfs_path *left_path = NULL; 5367 struct btrfs_path *left_path = NULL;
5365 struct btrfs_path *right_path = NULL; 5368 struct btrfs_path *right_path = NULL;
5366 struct btrfs_key left_key; 5369 struct btrfs_key left_key;
@@ -5378,9 +5381,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5378 u64 right_blockptr; 5381 u64 right_blockptr;
5379 u64 left_gen; 5382 u64 left_gen;
5380 u64 right_gen; 5383 u64 right_gen;
5381 u64 left_start_ctransid;
5382 u64 right_start_ctransid;
5383 u64 ctransid;
5384 5384
5385 left_path = btrfs_alloc_path(); 5385 left_path = btrfs_alloc_path();
5386 if (!left_path) { 5386 if (!left_path) {
@@ -5404,21 +5404,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5404 right_path->search_commit_root = 1; 5404 right_path->search_commit_root = 1;
5405 right_path->skip_locking = 1; 5405 right_path->skip_locking = 1;
5406 5406
5407 spin_lock(&left_root->root_item_lock);
5408 left_start_ctransid = btrfs_root_ctransid(&left_root->root_item);
5409 spin_unlock(&left_root->root_item_lock);
5410
5411 spin_lock(&right_root->root_item_lock);
5412 right_start_ctransid = btrfs_root_ctransid(&right_root->root_item);
5413 spin_unlock(&right_root->root_item_lock);
5414
5415 trans = btrfs_join_transaction(left_root);
5416 if (IS_ERR(trans)) {
5417 ret = PTR_ERR(trans);
5418 trans = NULL;
5419 goto out;
5420 }
5421
5422 /* 5407 /*
5423 * Strategy: Go to the first items of both trees. Then do 5408 * Strategy: Go to the first items of both trees. Then do
5424 * 5409 *
@@ -5455,6 +5440,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5455 * the right if possible or go up and right. 5440 * the right if possible or go up and right.
5456 */ 5441 */
5457 5442
5443 down_read(&left_root->fs_info->commit_root_sem);
5458 left_level = btrfs_header_level(left_root->commit_root); 5444 left_level = btrfs_header_level(left_root->commit_root);
5459 left_root_level = left_level; 5445 left_root_level = left_level;
5460 left_path->nodes[left_level] = left_root->commit_root; 5446 left_path->nodes[left_level] = left_root->commit_root;
@@ -5464,6 +5450,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5464 right_root_level = right_level; 5450 right_root_level = right_level;
5465 right_path->nodes[right_level] = right_root->commit_root; 5451 right_path->nodes[right_level] = right_root->commit_root;
5466 extent_buffer_get(right_path->nodes[right_level]); 5452 extent_buffer_get(right_path->nodes[right_level]);
5453 up_read(&left_root->fs_info->commit_root_sem);
5467 5454
5468 if (left_level == 0) 5455 if (left_level == 0)
5469 btrfs_item_key_to_cpu(left_path->nodes[left_level], 5456 btrfs_item_key_to_cpu(left_path->nodes[left_level],
@@ -5482,67 +5469,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5482 advance_left = advance_right = 0; 5469 advance_left = advance_right = 0;
5483 5470
5484 while (1) { 5471 while (1) {
5485 /*
5486 * We need to make sure the transaction does not get committed
5487 * while we do anything on commit roots. This means, we need to
5488 * join and leave transactions for every item that we process.
5489 */
5490 if (trans && btrfs_should_end_transaction(trans, left_root)) {
5491 btrfs_release_path(left_path);
5492 btrfs_release_path(right_path);
5493
5494 ret = btrfs_end_transaction(trans, left_root);
5495 trans = NULL;
5496 if (ret < 0)
5497 goto out;
5498 }
5499 /* now rejoin the transaction */
5500 if (!trans) {
5501 trans = btrfs_join_transaction(left_root);
5502 if (IS_ERR(trans)) {
5503 ret = PTR_ERR(trans);
5504 trans = NULL;
5505 goto out;
5506 }
5507
5508 spin_lock(&left_root->root_item_lock);
5509 ctransid = btrfs_root_ctransid(&left_root->root_item);
5510 spin_unlock(&left_root->root_item_lock);
5511 if (ctransid != left_start_ctransid)
5512 left_start_ctransid = 0;
5513
5514 spin_lock(&right_root->root_item_lock);
5515 ctransid = btrfs_root_ctransid(&right_root->root_item);
5516 spin_unlock(&right_root->root_item_lock);
5517 if (ctransid != right_start_ctransid)
5518 right_start_ctransid = 0;
5519
5520 if (!left_start_ctransid || !right_start_ctransid) {
5521 WARN(1, KERN_WARNING
5522 "BTRFS: btrfs_compare_tree detected "
5523 "a change in one of the trees while "
5524 "iterating. This is probably a "
5525 "bug.\n");
5526 ret = -EIO;
5527 goto out;
5528 }
5529
5530 /*
5531 * the commit root may have changed, so start again
5532 * where we stopped
5533 */
5534 left_path->lowest_level = left_level;
5535 right_path->lowest_level = right_level;
5536 ret = btrfs_search_slot(NULL, left_root,
5537 &left_key, left_path, 0, 0);
5538 if (ret < 0)
5539 goto out;
5540 ret = btrfs_search_slot(NULL, right_root,
5541 &right_key, right_path, 0, 0);
5542 if (ret < 0)
5543 goto out;
5544 }
5545
5546 if (advance_left && !left_end_reached) { 5472 if (advance_left && !left_end_reached) {
5547 ret = tree_advance(left_root, left_path, &left_level, 5473 ret = tree_advance(left_root, left_path, &left_level,
5548 left_root_level, 5474 left_root_level,
@@ -5672,14 +5598,6 @@ out:
5672 btrfs_free_path(left_path); 5598 btrfs_free_path(left_path);
5673 btrfs_free_path(right_path); 5599 btrfs_free_path(right_path);
5674 kfree(tmp_buf); 5600 kfree(tmp_buf);
5675
5676 if (trans) {
5677 if (!ret)
5678 ret = btrfs_end_transaction(trans, left_root);
5679 else
5680 btrfs_end_transaction(trans, left_root);
5681 }
5682
5683 return ret; 5601 return ret;
5684} 5602}
5685 5603
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bc96c03dd259..4c48df572bd6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -609,6 +609,7 @@ struct btrfs_path {
609 unsigned int skip_locking:1; 609 unsigned int skip_locking:1;
610 unsigned int leave_spinning:1; 610 unsigned int leave_spinning:1;
611 unsigned int search_commit_root:1; 611 unsigned int search_commit_root:1;
612 unsigned int need_commit_sem:1;
612}; 613};
613 614
614/* 615/*
@@ -986,7 +987,8 @@ struct btrfs_dev_replace_item {
986#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) 987#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
987#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7) 988#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
988#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8) 989#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
989#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE 990#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
991 BTRFS_SPACE_INFO_GLOBAL_RSV)
990 992
991enum btrfs_raid_types { 993enum btrfs_raid_types {
992 BTRFS_RAID_RAID10, 994 BTRFS_RAID_RAID10,
@@ -1018,6 +1020,12 @@ enum btrfs_raid_types {
1018 */ 1020 */
1019#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48) 1021#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
1020 1022
1023/*
1024 * A fake block group type that is used to communicate global block reserve
1025 * size to userspace via the SPACE_INFO ioctl.
1026 */
1027#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49)
1028
1021#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \ 1029#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
1022 BTRFS_AVAIL_ALLOC_BIT_SINGLE) 1030 BTRFS_AVAIL_ALLOC_BIT_SINGLE)
1023 1031
@@ -1440,7 +1448,7 @@ struct btrfs_fs_info {
1440 */ 1448 */
1441 struct mutex ordered_extent_flush_mutex; 1449 struct mutex ordered_extent_flush_mutex;
1442 1450
1443 struct rw_semaphore extent_commit_sem; 1451 struct rw_semaphore commit_root_sem;
1444 1452
1445 struct rw_semaphore cleanup_work_sem; 1453 struct rw_semaphore cleanup_work_sem;
1446 1454
@@ -1711,7 +1719,6 @@ struct btrfs_root {
1711 struct btrfs_block_rsv *block_rsv; 1719 struct btrfs_block_rsv *block_rsv;
1712 1720
1713 /* free ino cache stuff */ 1721 /* free ino cache stuff */
1714 struct mutex fs_commit_mutex;
1715 struct btrfs_free_space_ctl *free_ino_ctl; 1722 struct btrfs_free_space_ctl *free_ino_ctl;
1716 enum btrfs_caching_type cached; 1723 enum btrfs_caching_type cached;
1717 spinlock_t cache_lock; 1724 spinlock_t cache_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index bd0f752b797b..029d46c2e170 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -329,6 +329,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
329{ 329{
330 struct extent_state *cached_state = NULL; 330 struct extent_state *cached_state = NULL;
331 int ret; 331 int ret;
332 bool need_lock = (current->journal_info ==
333 (void *)BTRFS_SEND_TRANS_STUB);
332 334
333 if (!parent_transid || btrfs_header_generation(eb) == parent_transid) 335 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
334 return 0; 336 return 0;
@@ -336,6 +338,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
336 if (atomic) 338 if (atomic)
337 return -EAGAIN; 339 return -EAGAIN;
338 340
341 if (need_lock) {
342 btrfs_tree_read_lock(eb);
343 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
344 }
345
339 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, 346 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
340 0, &cached_state); 347 0, &cached_state);
341 if (extent_buffer_uptodate(eb) && 348 if (extent_buffer_uptodate(eb) &&
@@ -347,10 +354,21 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
347 "found %llu\n", 354 "found %llu\n",
348 eb->start, parent_transid, btrfs_header_generation(eb)); 355 eb->start, parent_transid, btrfs_header_generation(eb));
349 ret = 1; 356 ret = 1;
350 clear_extent_buffer_uptodate(eb); 357
358 /*
359 * Things reading via commit roots that don't have normal protection,
360 * like send, can have a really old block in cache that may point at a
361 * block that has been free'd and re-allocated. So don't clear uptodate
362 * if we find an eb that is under IO (dirty/writeback) because we could
363 * end up reading in the stale data and then writing it back out and
364 * making everybody very sad.
365 */
366 if (!extent_buffer_under_io(eb))
367 clear_extent_buffer_uptodate(eb);
351out: 368out:
352 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, 369 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
353 &cached_state, GFP_NOFS); 370 &cached_state, GFP_NOFS);
371 btrfs_tree_read_unlock_blocking(eb);
354 return ret; 372 return ret;
355} 373}
356 374
@@ -1546,7 +1564,6 @@ int btrfs_init_fs_root(struct btrfs_root *root)
1546 root->subv_writers = writers; 1564 root->subv_writers = writers;
1547 1565
1548 btrfs_init_free_ino_ctl(root); 1566 btrfs_init_free_ino_ctl(root);
1549 mutex_init(&root->fs_commit_mutex);
1550 spin_lock_init(&root->cache_lock); 1567 spin_lock_init(&root->cache_lock);
1551 init_waitqueue_head(&root->cache_wait); 1568 init_waitqueue_head(&root->cache_wait);
1552 1569
@@ -2324,7 +2341,7 @@ int open_ctree(struct super_block *sb,
2324 mutex_init(&fs_info->transaction_kthread_mutex); 2341 mutex_init(&fs_info->transaction_kthread_mutex);
2325 mutex_init(&fs_info->cleaner_mutex); 2342 mutex_init(&fs_info->cleaner_mutex);
2326 mutex_init(&fs_info->volume_mutex); 2343 mutex_init(&fs_info->volume_mutex);
2327 init_rwsem(&fs_info->extent_commit_sem); 2344 init_rwsem(&fs_info->commit_root_sem);
2328 init_rwsem(&fs_info->cleanup_work_sem); 2345 init_rwsem(&fs_info->cleanup_work_sem);
2329 init_rwsem(&fs_info->subvol_sem); 2346 init_rwsem(&fs_info->subvol_sem);
2330 sema_init(&fs_info->uuid_tree_rescan_sem, 1); 2347 sema_init(&fs_info->uuid_tree_rescan_sem, 1);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index c6b6a6e3e735..1306487c82cf 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -419,7 +419,7 @@ static noinline void caching_thread(struct btrfs_work *work)
419again: 419again:
420 mutex_lock(&caching_ctl->mutex); 420 mutex_lock(&caching_ctl->mutex);
421 /* need to make sure the commit_root doesn't disappear */ 421 /* need to make sure the commit_root doesn't disappear */
422 down_read(&fs_info->extent_commit_sem); 422 down_read(&fs_info->commit_root_sem);
423 423
424next: 424next:
425 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 425 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
@@ -443,10 +443,10 @@ next:
443 break; 443 break;
444 444
445 if (need_resched() || 445 if (need_resched() ||
446 rwsem_is_contended(&fs_info->extent_commit_sem)) { 446 rwsem_is_contended(&fs_info->commit_root_sem)) {
447 caching_ctl->progress = last; 447 caching_ctl->progress = last;
448 btrfs_release_path(path); 448 btrfs_release_path(path);
449 up_read(&fs_info->extent_commit_sem); 449 up_read(&fs_info->commit_root_sem);
450 mutex_unlock(&caching_ctl->mutex); 450 mutex_unlock(&caching_ctl->mutex);
451 cond_resched(); 451 cond_resched();
452 goto again; 452 goto again;
@@ -513,7 +513,7 @@ next:
513 513
514err: 514err:
515 btrfs_free_path(path); 515 btrfs_free_path(path);
516 up_read(&fs_info->extent_commit_sem); 516 up_read(&fs_info->commit_root_sem);
517 517
518 free_excluded_extents(extent_root, block_group); 518 free_excluded_extents(extent_root, block_group);
519 519
@@ -633,10 +633,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
633 return 0; 633 return 0;
634 } 634 }
635 635
636 down_write(&fs_info->extent_commit_sem); 636 down_write(&fs_info->commit_root_sem);
637 atomic_inc(&caching_ctl->count); 637 atomic_inc(&caching_ctl->count);
638 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 638 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
639 up_write(&fs_info->extent_commit_sem); 639 up_write(&fs_info->commit_root_sem);
640 640
641 btrfs_get_block_group(cache); 641 btrfs_get_block_group(cache);
642 642
@@ -2444,7 +2444,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2444 spin_unlock(&locked_ref->lock); 2444 spin_unlock(&locked_ref->lock);
2445 spin_lock(&delayed_refs->lock); 2445 spin_lock(&delayed_refs->lock);
2446 spin_lock(&locked_ref->lock); 2446 spin_lock(&locked_ref->lock);
2447 if (rb_first(&locked_ref->ref_root)) { 2447 if (rb_first(&locked_ref->ref_root) ||
2448 locked_ref->extent_op) {
2448 spin_unlock(&locked_ref->lock); 2449 spin_unlock(&locked_ref->lock);
2449 spin_unlock(&delayed_refs->lock); 2450 spin_unlock(&delayed_refs->lock);
2450 continue; 2451 continue;
@@ -5470,7 +5471,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5470 struct btrfs_block_group_cache *cache; 5471 struct btrfs_block_group_cache *cache;
5471 struct btrfs_space_info *space_info; 5472 struct btrfs_space_info *space_info;
5472 5473
5473 down_write(&fs_info->extent_commit_sem); 5474 down_write(&fs_info->commit_root_sem);
5474 5475
5475 list_for_each_entry_safe(caching_ctl, next, 5476 list_for_each_entry_safe(caching_ctl, next,
5476 &fs_info->caching_block_groups, list) { 5477 &fs_info->caching_block_groups, list) {
@@ -5489,7 +5490,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
5489 else 5490 else
5490 fs_info->pinned_extents = &fs_info->freed_extents[0]; 5491 fs_info->pinned_extents = &fs_info->freed_extents[0];
5491 5492
5492 up_write(&fs_info->extent_commit_sem); 5493 up_write(&fs_info->commit_root_sem);
5493 5494
5494 list_for_each_entry_rcu(space_info, &fs_info->space_info, list) 5495 list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
5495 percpu_counter_set(&space_info->total_bytes_pinned, 0); 5496 percpu_counter_set(&space_info->total_bytes_pinned, 0);
@@ -5744,6 +5745,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5744 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", 5745 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
5745 bytenr, parent, root_objectid, owner_objectid, 5746 bytenr, parent, root_objectid, owner_objectid,
5746 owner_offset); 5747 owner_offset);
5748 btrfs_abort_transaction(trans, extent_root, ret);
5749 goto out;
5747 } else { 5750 } else {
5748 btrfs_abort_transaction(trans, extent_root, ret); 5751 btrfs_abort_transaction(trans, extent_root, ret);
5749 goto out; 5752 goto out;
@@ -8255,14 +8258,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8255 struct btrfs_caching_control *caching_ctl; 8258 struct btrfs_caching_control *caching_ctl;
8256 struct rb_node *n; 8259 struct rb_node *n;
8257 8260
8258 down_write(&info->extent_commit_sem); 8261 down_write(&info->commit_root_sem);
8259 while (!list_empty(&info->caching_block_groups)) { 8262 while (!list_empty(&info->caching_block_groups)) {
8260 caching_ctl = list_entry(info->caching_block_groups.next, 8263 caching_ctl = list_entry(info->caching_block_groups.next,
8261 struct btrfs_caching_control, list); 8264 struct btrfs_caching_control, list);
8262 list_del(&caching_ctl->list); 8265 list_del(&caching_ctl->list);
8263 put_caching_control(caching_ctl); 8266 put_caching_control(caching_ctl);
8264 } 8267 }
8265 up_write(&info->extent_commit_sem); 8268 up_write(&info->commit_root_sem);
8266 8269
8267 spin_lock(&info->block_group_cache_lock); 8270 spin_lock(&info->block_group_cache_lock);
8268 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 8271 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
@@ -8336,9 +8339,15 @@ static void __link_block_group(struct btrfs_space_info *space_info,
8336 struct btrfs_block_group_cache *cache) 8339 struct btrfs_block_group_cache *cache)
8337{ 8340{
8338 int index = get_block_group_index(cache); 8341 int index = get_block_group_index(cache);
8342 bool first = false;
8339 8343
8340 down_write(&space_info->groups_sem); 8344 down_write(&space_info->groups_sem);
8341 if (list_empty(&space_info->block_groups[index])) { 8345 if (list_empty(&space_info->block_groups[index]))
8346 first = true;
8347 list_add_tail(&cache->list, &space_info->block_groups[index]);
8348 up_write(&space_info->groups_sem);
8349
8350 if (first) {
8342 struct kobject *kobj = &space_info->block_group_kobjs[index]; 8351 struct kobject *kobj = &space_info->block_group_kobjs[index];
8343 int ret; 8352 int ret;
8344 8353
@@ -8350,8 +8359,6 @@ static void __link_block_group(struct btrfs_space_info *space_info,
8350 kobject_put(&space_info->kobj); 8359 kobject_put(&space_info->kobj);
8351 } 8360 }
8352 } 8361 }
8353 list_add_tail(&cache->list, &space_info->block_groups[index]);
8354 up_write(&space_info->groups_sem);
8355} 8362}
8356 8363
8357static struct btrfs_block_group_cache * 8364static struct btrfs_block_group_cache *
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ae69a00387e7..3955e475ceec 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -749,6 +749,7 @@ again:
749 * our range starts 749 * our range starts
750 */ 750 */
751 node = tree_search(tree, start); 751 node = tree_search(tree, start);
752process_node:
752 if (!node) 753 if (!node)
753 break; 754 break;
754 755
@@ -769,7 +770,10 @@ again:
769 if (start > end) 770 if (start > end)
770 break; 771 break;
771 772
772 cond_resched_lock(&tree->lock); 773 if (!cond_resched_lock(&tree->lock)) {
774 node = rb_next(node);
775 goto process_node;
776 }
773 } 777 }
774out: 778out:
775 spin_unlock(&tree->lock); 779 spin_unlock(&tree->lock);
@@ -4306,7 +4310,7 @@ static void __free_extent_buffer(struct extent_buffer *eb)
4306 kmem_cache_free(extent_buffer_cache, eb); 4310 kmem_cache_free(extent_buffer_cache, eb);
4307} 4311}
4308 4312
4309static int extent_buffer_under_io(struct extent_buffer *eb) 4313int extent_buffer_under_io(struct extent_buffer *eb)
4310{ 4314{
4311 return (atomic_read(&eb->io_pages) || 4315 return (atomic_read(&eb->io_pages) ||
4312 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || 4316 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 58b27e5ab521..c488b45237bf 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -320,6 +320,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb);
320int set_extent_buffer_uptodate(struct extent_buffer *eb); 320int set_extent_buffer_uptodate(struct extent_buffer *eb);
321int clear_extent_buffer_uptodate(struct extent_buffer *eb); 321int clear_extent_buffer_uptodate(struct extent_buffer *eb);
322int extent_buffer_uptodate(struct extent_buffer *eb); 322int extent_buffer_uptodate(struct extent_buffer *eb);
323int extent_buffer_under_io(struct extent_buffer *eb);
323int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, 324int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
324 unsigned long min_len, char **map, 325 unsigned long min_len, char **map,
325 unsigned long *map_start, 326 unsigned long *map_start,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c660527af838..c5998477fe60 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1727,6 +1727,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1727 struct btrfs_root *root = BTRFS_I(inode)->root; 1727 struct btrfs_root *root = BTRFS_I(inode)->root;
1728 loff_t *ppos = &iocb->ki_pos; 1728 loff_t *ppos = &iocb->ki_pos;
1729 u64 start_pos; 1729 u64 start_pos;
1730 u64 end_pos;
1730 ssize_t num_written = 0; 1731 ssize_t num_written = 0;
1731 ssize_t err = 0; 1732 ssize_t err = 0;
1732 size_t count, ocount; 1733 size_t count, ocount;
@@ -1781,7 +1782,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1781 1782
1782 start_pos = round_down(pos, root->sectorsize); 1783 start_pos = round_down(pos, root->sectorsize);
1783 if (start_pos > i_size_read(inode)) { 1784 if (start_pos > i_size_read(inode)) {
1784 err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); 1785 /* Expand hole size to cover write data, preventing empty gap */
1786 end_pos = round_up(pos + iov->iov_len, root->sectorsize);
1787 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
1785 if (err) { 1788 if (err) {
1786 mutex_unlock(&inode->i_mutex); 1789 mutex_unlock(&inode->i_mutex);
1787 goto out; 1790 goto out;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index ab485e57b6fe..cc8ca193d830 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -55,7 +55,7 @@ static int caching_kthread(void *data)
55 key.type = BTRFS_INODE_ITEM_KEY; 55 key.type = BTRFS_INODE_ITEM_KEY;
56again: 56again:
57 /* need to make sure the commit_root doesn't disappear */ 57 /* need to make sure the commit_root doesn't disappear */
58 mutex_lock(&root->fs_commit_mutex); 58 down_read(&fs_info->commit_root_sem);
59 59
60 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 60 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
61 if (ret < 0) 61 if (ret < 0)
@@ -88,7 +88,7 @@ again:
88 btrfs_item_key_to_cpu(leaf, &key, 0); 88 btrfs_item_key_to_cpu(leaf, &key, 0);
89 btrfs_release_path(path); 89 btrfs_release_path(path);
90 root->cache_progress = last; 90 root->cache_progress = last;
91 mutex_unlock(&root->fs_commit_mutex); 91 up_read(&fs_info->commit_root_sem);
92 schedule_timeout(1); 92 schedule_timeout(1);
93 goto again; 93 goto again;
94 } else 94 } else
@@ -127,7 +127,7 @@ next:
127 btrfs_unpin_free_ino(root); 127 btrfs_unpin_free_ino(root);
128out: 128out:
129 wake_up(&root->cache_wait); 129 wake_up(&root->cache_wait);
130 mutex_unlock(&root->fs_commit_mutex); 130 up_read(&fs_info->commit_root_sem);
131 131
132 btrfs_free_path(path); 132 btrfs_free_path(path);
133 133
@@ -223,11 +223,11 @@ again:
223 * or the caching work is done. 223 * or the caching work is done.
224 */ 224 */
225 225
226 mutex_lock(&root->fs_commit_mutex); 226 down_write(&root->fs_info->commit_root_sem);
227 spin_lock(&root->cache_lock); 227 spin_lock(&root->cache_lock);
228 if (root->cached == BTRFS_CACHE_FINISHED) { 228 if (root->cached == BTRFS_CACHE_FINISHED) {
229 spin_unlock(&root->cache_lock); 229 spin_unlock(&root->cache_lock);
230 mutex_unlock(&root->fs_commit_mutex); 230 up_write(&root->fs_info->commit_root_sem);
231 goto again; 231 goto again;
232 } 232 }
233 spin_unlock(&root->cache_lock); 233 spin_unlock(&root->cache_lock);
@@ -240,7 +240,7 @@ again:
240 else 240 else
241 __btrfs_add_free_space(pinned, objectid, 1); 241 __btrfs_add_free_space(pinned, objectid, 1);
242 242
243 mutex_unlock(&root->fs_commit_mutex); 243 up_write(&root->fs_info->commit_root_sem);
244 } 244 }
245} 245}
246 246
@@ -250,7 +250,7 @@ again:
250 * and others will just be dropped, because the commit root we were 250 * and others will just be dropped, because the commit root we were
251 * searching has changed. 251 * searching has changed.
252 * 252 *
253 * Must be called with root->fs_commit_mutex held 253 * Must be called with root->fs_info->commit_root_sem held
254 */ 254 */
255void btrfs_unpin_free_ino(struct btrfs_root *root) 255void btrfs_unpin_free_ino(struct btrfs_root *root)
256{ 256{
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 06e9a4152b14..5f805bc944fa 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -394,6 +394,14 @@ static noinline int compress_file_range(struct inode *inode,
394 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) 394 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
395 btrfs_add_inode_defrag(NULL, inode); 395 btrfs_add_inode_defrag(NULL, inode);
396 396
397 /*
398 * skip compression for a small file range(<=blocksize) that
399 * isn't an inline extent, since it dosen't save disk space at all.
400 */
401 if ((end - start + 1) <= blocksize &&
402 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
403 goto cleanup_and_bail_uncompressed;
404
397 actual_end = min_t(u64, isize, end + 1); 405 actual_end = min_t(u64, isize, end + 1);
398again: 406again:
399 will_compress = 0; 407 will_compress = 0;
@@ -1271,6 +1279,15 @@ next_slot:
1271 disk_bytenr += cur_offset - found_key.offset; 1279 disk_bytenr += cur_offset - found_key.offset;
1272 num_bytes = min(end + 1, extent_end) - cur_offset; 1280 num_bytes = min(end + 1, extent_end) - cur_offset;
1273 /* 1281 /*
1282 * if there are pending snapshots for this root,
1283 * we fall into common COW way.
1284 */
1285 if (!nolock) {
1286 err = btrfs_start_nocow_write(root);
1287 if (!err)
1288 goto out_check;
1289 }
1290 /*
1274 * force cow if csum exists in the range. 1291 * force cow if csum exists in the range.
1275 * this ensure that csum for a given extent are 1292 * this ensure that csum for a given extent are
1276 * either valid or do not exist. 1293 * either valid or do not exist.
@@ -1289,6 +1306,8 @@ next_slot:
1289out_check: 1306out_check:
1290 if (extent_end <= start) { 1307 if (extent_end <= start) {
1291 path->slots[0]++; 1308 path->slots[0]++;
1309 if (!nolock && nocow)
1310 btrfs_end_nocow_write(root);
1292 goto next_slot; 1311 goto next_slot;
1293 } 1312 }
1294 if (!nocow) { 1313 if (!nocow) {
@@ -1306,8 +1325,11 @@ out_check:
1306 ret = cow_file_range(inode, locked_page, 1325 ret = cow_file_range(inode, locked_page,
1307 cow_start, found_key.offset - 1, 1326 cow_start, found_key.offset - 1,
1308 page_started, nr_written, 1); 1327 page_started, nr_written, 1);
1309 if (ret) 1328 if (ret) {
1329 if (!nolock && nocow)
1330 btrfs_end_nocow_write(root);
1310 goto error; 1331 goto error;
1332 }
1311 cow_start = (u64)-1; 1333 cow_start = (u64)-1;
1312 } 1334 }
1313 1335
@@ -1354,8 +1376,11 @@ out_check:
1354 BTRFS_DATA_RELOC_TREE_OBJECTID) { 1376 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1355 ret = btrfs_reloc_clone_csums(inode, cur_offset, 1377 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1356 num_bytes); 1378 num_bytes);
1357 if (ret) 1379 if (ret) {
1380 if (!nolock && nocow)
1381 btrfs_end_nocow_write(root);
1358 goto error; 1382 goto error;
1383 }
1359 } 1384 }
1360 1385
1361 extent_clear_unlock_delalloc(inode, cur_offset, 1386 extent_clear_unlock_delalloc(inode, cur_offset,
@@ -1363,6 +1388,8 @@ out_check:
1363 locked_page, EXTENT_LOCKED | 1388 locked_page, EXTENT_LOCKED |
1364 EXTENT_DELALLOC, PAGE_UNLOCK | 1389 EXTENT_DELALLOC, PAGE_UNLOCK |
1365 PAGE_SET_PRIVATE2); 1390 PAGE_SET_PRIVATE2);
1391 if (!nolock && nocow)
1392 btrfs_end_nocow_write(root);
1366 cur_offset = extent_end; 1393 cur_offset = extent_end;
1367 if (cur_offset > end) 1394 if (cur_offset > end)
1368 break; 1395 break;
@@ -8476,19 +8503,20 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
8476 else 8503 else
8477 iput(inode); 8504 iput(inode);
8478 ret = -ENOMEM; 8505 ret = -ENOMEM;
8479 break; 8506 goto out;
8480 } 8507 }
8481 list_add_tail(&work->list, &works); 8508 list_add_tail(&work->list, &works);
8482 btrfs_queue_work(root->fs_info->flush_workers, 8509 btrfs_queue_work(root->fs_info->flush_workers,
8483 &work->work); 8510 &work->work);
8484 ret++; 8511 ret++;
8485 if (nr != -1 && ret >= nr) 8512 if (nr != -1 && ret >= nr)
8486 break; 8513 goto out;
8487 cond_resched(); 8514 cond_resched();
8488 spin_lock(&root->delalloc_lock); 8515 spin_lock(&root->delalloc_lock);
8489 } 8516 }
8490 spin_unlock(&root->delalloc_lock); 8517 spin_unlock(&root->delalloc_lock);
8491 8518
8519out:
8492 list_for_each_entry_safe(work, next, &works, list) { 8520 list_for_each_entry_safe(work, next, &works, list) {
8493 list_del_init(&work->list); 8521 list_del_init(&work->list);
8494 btrfs_wait_and_free_delalloc_work(work); 8522 btrfs_wait_and_free_delalloc_work(work);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0401397b5c92..e79ff6b90cb7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1472,6 +1472,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1472 struct btrfs_trans_handle *trans; 1472 struct btrfs_trans_handle *trans;
1473 struct btrfs_device *device = NULL; 1473 struct btrfs_device *device = NULL;
1474 char *sizestr; 1474 char *sizestr;
1475 char *retptr;
1475 char *devstr = NULL; 1476 char *devstr = NULL;
1476 int ret = 0; 1477 int ret = 0;
1477 int mod = 0; 1478 int mod = 0;
@@ -1539,8 +1540,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1539 mod = 1; 1540 mod = 1;
1540 sizestr++; 1541 sizestr++;
1541 } 1542 }
1542 new_size = memparse(sizestr, NULL); 1543 new_size = memparse(sizestr, &retptr);
1543 if (new_size == 0) { 1544 if (*retptr != '\0' || new_size == 0) {
1544 ret = -EINVAL; 1545 ret = -EINVAL;
1545 goto out_free; 1546 goto out_free;
1546 } 1547 }
@@ -3140,8 +3141,9 @@ process_slot:
3140 new_key.offset + datal, 3141 new_key.offset + datal,
3141 1); 3142 1);
3142 if (ret) { 3143 if (ret) {
3143 btrfs_abort_transaction(trans, root, 3144 if (ret != -EINVAL)
3144 ret); 3145 btrfs_abort_transaction(trans,
3146 root, ret);
3145 btrfs_end_transaction(trans, root); 3147 btrfs_end_transaction(trans, root);
3146 goto out; 3148 goto out;
3147 } 3149 }
@@ -3538,6 +3540,11 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
3538 up_read(&info->groups_sem); 3540 up_read(&info->groups_sem);
3539 } 3541 }
3540 3542
3543 /*
3544 * Global block reserve, exported as a space_info
3545 */
3546 slot_count++;
3547
3541 /* space_slots == 0 means they are asking for a count */ 3548 /* space_slots == 0 means they are asking for a count */
3542 if (space_args.space_slots == 0) { 3549 if (space_args.space_slots == 0) {
3543 space_args.total_spaces = slot_count; 3550 space_args.total_spaces = slot_count;
@@ -3596,6 +3603,21 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
3596 up_read(&info->groups_sem); 3603 up_read(&info->groups_sem);
3597 } 3604 }
3598 3605
3606 /*
3607 * Add global block reserve
3608 */
3609 if (slot_count) {
3610 struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv;
3611
3612 spin_lock(&block_rsv->lock);
3613 space.total_bytes = block_rsv->size;
3614 space.used_bytes = block_rsv->size - block_rsv->reserved;
3615 spin_unlock(&block_rsv->lock);
3616 space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV;
3617 memcpy(dest, &space, sizeof(space));
3618 space_args.total_spaces++;
3619 }
3620
3599 user_dest = (struct btrfs_ioctl_space_info __user *) 3621 user_dest = (struct btrfs_ioctl_space_info __user *)
3600 (arg + sizeof(struct btrfs_ioctl_space_args)); 3622 (arg + sizeof(struct btrfs_ioctl_space_args));
3601 3623
@@ -4531,9 +4553,8 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file,
4531 } 4553 }
4532 4554
4533 args64 = kmalloc(sizeof(*args64), GFP_NOFS); 4555 args64 = kmalloc(sizeof(*args64), GFP_NOFS);
4534 if (IS_ERR(args64)) { 4556 if (!args64) {
4535 ret = PTR_ERR(args64); 4557 ret = -ENOMEM;
4536 args64 = NULL;
4537 goto out; 4558 goto out;
4538 } 4559 }
4539 4560
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index def428a25b2a..7f92ab1daa87 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2317,7 +2317,6 @@ void free_reloc_roots(struct list_head *list)
2317static noinline_for_stack 2317static noinline_for_stack
2318int merge_reloc_roots(struct reloc_control *rc) 2318int merge_reloc_roots(struct reloc_control *rc)
2319{ 2319{
2320 struct btrfs_trans_handle *trans;
2321 struct btrfs_root *root; 2320 struct btrfs_root *root;
2322 struct btrfs_root *reloc_root; 2321 struct btrfs_root *reloc_root;
2323 u64 last_snap; 2322 u64 last_snap;
@@ -2375,26 +2374,6 @@ again:
2375 list_add_tail(&reloc_root->root_list, 2374 list_add_tail(&reloc_root->root_list,
2376 &reloc_roots); 2375 &reloc_roots);
2377 goto out; 2376 goto out;
2378 } else if (!ret) {
2379 /*
2380 * recover the last snapshot tranid to avoid
2381 * the space balance break NOCOW.
2382 */
2383 root = read_fs_root(rc->extent_root->fs_info,
2384 objectid);
2385 if (IS_ERR(root))
2386 continue;
2387
2388 trans = btrfs_join_transaction(root);
2389 BUG_ON(IS_ERR(trans));
2390
2391 /* Check if the fs/file tree was snapshoted or not. */
2392 if (btrfs_root_last_snapshot(&root->root_item) ==
2393 otransid - 1)
2394 btrfs_set_root_last_snapshot(&root->root_item,
2395 last_snap);
2396
2397 btrfs_end_transaction(trans, root);
2398 } 2377 }
2399 } 2378 }
2400 2379
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 93e6d7172844..0be77993378e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2235,6 +2235,47 @@ behind_scrub_pages:
2235 return 0; 2235 return 0;
2236} 2236}
2237 2237
2238/*
2239 * Given a physical address, this will calculate it's
2240 * logical offset. if this is a parity stripe, it will return
2241 * the most left data stripe's logical offset.
2242 *
2243 * return 0 if it is a data stripe, 1 means parity stripe.
2244 */
2245static int get_raid56_logic_offset(u64 physical, int num,
2246 struct map_lookup *map, u64 *offset)
2247{
2248 int i;
2249 int j = 0;
2250 u64 stripe_nr;
2251 u64 last_offset;
2252 int stripe_index;
2253 int rot;
2254
2255 last_offset = (physical - map->stripes[num].physical) *
2256 nr_data_stripes(map);
2257 *offset = last_offset;
2258 for (i = 0; i < nr_data_stripes(map); i++) {
2259 *offset = last_offset + i * map->stripe_len;
2260
2261 stripe_nr = *offset;
2262 do_div(stripe_nr, map->stripe_len);
2263 do_div(stripe_nr, nr_data_stripes(map));
2264
2265 /* Work out the disk rotation on this stripe-set */
2266 rot = do_div(stripe_nr, map->num_stripes);
2267 /* calculate which stripe this data locates */
2268 rot += i;
2269 stripe_index = rot % map->num_stripes;
2270 if (stripe_index == num)
2271 return 0;
2272 if (stripe_index < num)
2273 j++;
2274 }
2275 *offset = last_offset + j * map->stripe_len;
2276 return 1;
2277}
2278
2238static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, 2279static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2239 struct map_lookup *map, 2280 struct map_lookup *map,
2240 struct btrfs_device *scrub_dev, 2281 struct btrfs_device *scrub_dev,
@@ -2256,6 +2297,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2256 u64 physical; 2297 u64 physical;
2257 u64 logical; 2298 u64 logical;
2258 u64 logic_end; 2299 u64 logic_end;
2300 u64 physical_end;
2259 u64 generation; 2301 u64 generation;
2260 int mirror_num; 2302 int mirror_num;
2261 struct reada_control *reada1; 2303 struct reada_control *reada1;
@@ -2269,16 +2311,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2269 u64 extent_len; 2311 u64 extent_len;
2270 struct btrfs_device *extent_dev; 2312 struct btrfs_device *extent_dev;
2271 int extent_mirror_num; 2313 int extent_mirror_num;
2272 int stop_loop; 2314 int stop_loop = 0;
2273
2274 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2275 BTRFS_BLOCK_GROUP_RAID6)) {
2276 if (num >= nr_data_stripes(map)) {
2277 return 0;
2278 }
2279 }
2280 2315
2281 nstripes = length; 2316 nstripes = length;
2317 physical = map->stripes[num].physical;
2282 offset = 0; 2318 offset = 0;
2283 do_div(nstripes, map->stripe_len); 2319 do_div(nstripes, map->stripe_len);
2284 if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 2320 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
@@ -2296,6 +2332,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2296 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 2332 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2297 increment = map->stripe_len; 2333 increment = map->stripe_len;
2298 mirror_num = num % map->num_stripes + 1; 2334 mirror_num = num % map->num_stripes + 1;
2335 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2336 BTRFS_BLOCK_GROUP_RAID6)) {
2337 get_raid56_logic_offset(physical, num, map, &offset);
2338 increment = map->stripe_len * nr_data_stripes(map);
2339 mirror_num = 1;
2299 } else { 2340 } else {
2300 increment = map->stripe_len; 2341 increment = map->stripe_len;
2301 mirror_num = 1; 2342 mirror_num = 1;
@@ -2319,7 +2360,15 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2319 * to not hold off transaction commits 2360 * to not hold off transaction commits
2320 */ 2361 */
2321 logical = base + offset; 2362 logical = base + offset;
2322 2363 physical_end = physical + nstripes * map->stripe_len;
2364 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2365 BTRFS_BLOCK_GROUP_RAID6)) {
2366 get_raid56_logic_offset(physical_end, num,
2367 map, &logic_end);
2368 logic_end += base;
2369 } else {
2370 logic_end = logical + increment * nstripes;
2371 }
2323 wait_event(sctx->list_wait, 2372 wait_event(sctx->list_wait,
2324 atomic_read(&sctx->bios_in_flight) == 0); 2373 atomic_read(&sctx->bios_in_flight) == 0);
2325 scrub_blocked_if_needed(fs_info); 2374 scrub_blocked_if_needed(fs_info);
@@ -2328,7 +2377,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2328 key_start.objectid = logical; 2377 key_start.objectid = logical;
2329 key_start.type = BTRFS_EXTENT_ITEM_KEY; 2378 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2330 key_start.offset = (u64)0; 2379 key_start.offset = (u64)0;
2331 key_end.objectid = base + offset + nstripes * increment; 2380 key_end.objectid = logic_end;
2332 key_end.type = BTRFS_METADATA_ITEM_KEY; 2381 key_end.type = BTRFS_METADATA_ITEM_KEY;
2333 key_end.offset = (u64)-1; 2382 key_end.offset = (u64)-1;
2334 reada1 = btrfs_reada_add(root, &key_start, &key_end); 2383 reada1 = btrfs_reada_add(root, &key_start, &key_end);
@@ -2338,7 +2387,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2338 key_start.offset = logical; 2387 key_start.offset = logical;
2339 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 2388 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2340 key_end.type = BTRFS_EXTENT_CSUM_KEY; 2389 key_end.type = BTRFS_EXTENT_CSUM_KEY;
2341 key_end.offset = base + offset + nstripes * increment; 2390 key_end.offset = logic_end;
2342 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end); 2391 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
2343 2392
2344 if (!IS_ERR(reada1)) 2393 if (!IS_ERR(reada1))
@@ -2356,11 +2405,17 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2356 /* 2405 /*
2357 * now find all extents for each stripe and scrub them 2406 * now find all extents for each stripe and scrub them
2358 */ 2407 */
2359 logical = base + offset;
2360 physical = map->stripes[num].physical;
2361 logic_end = logical + increment * nstripes;
2362 ret = 0; 2408 ret = 0;
2363 while (logical < logic_end) { 2409 while (physical < physical_end) {
2410 /* for raid56, we skip parity stripe */
2411 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2412 BTRFS_BLOCK_GROUP_RAID6)) {
2413 ret = get_raid56_logic_offset(physical, num,
2414 map, &logical);
2415 logical += base;
2416 if (ret)
2417 goto skip;
2418 }
2364 /* 2419 /*
2365 * canceled? 2420 * canceled?
2366 */ 2421 */
@@ -2504,15 +2559,29 @@ again:
2504 scrub_free_csums(sctx); 2559 scrub_free_csums(sctx);
2505 if (extent_logical + extent_len < 2560 if (extent_logical + extent_len <
2506 key.objectid + bytes) { 2561 key.objectid + bytes) {
2507 logical += increment; 2562 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2508 physical += map->stripe_len; 2563 BTRFS_BLOCK_GROUP_RAID6)) {
2509 2564 /*
2565 * loop until we find next data stripe
2566 * or we have finished all stripes.
2567 */
2568 do {
2569 physical += map->stripe_len;
2570 ret = get_raid56_logic_offset(
2571 physical, num,
2572 map, &logical);
2573 logical += base;
2574 } while (physical < physical_end && ret);
2575 } else {
2576 physical += map->stripe_len;
2577 logical += increment;
2578 }
2510 if (logical < key.objectid + bytes) { 2579 if (logical < key.objectid + bytes) {
2511 cond_resched(); 2580 cond_resched();
2512 goto again; 2581 goto again;
2513 } 2582 }
2514 2583
2515 if (logical >= logic_end) { 2584 if (physical >= physical_end) {
2516 stop_loop = 1; 2585 stop_loop = 1;
2517 break; 2586 break;
2518 } 2587 }
@@ -2521,6 +2590,7 @@ next:
2521 path->slots[0]++; 2590 path->slots[0]++;
2522 } 2591 }
2523 btrfs_release_path(path); 2592 btrfs_release_path(path);
2593skip:
2524 logical += increment; 2594 logical += increment;
2525 physical += map->stripe_len; 2595 physical += map->stripe_len;
2526 spin_lock(&sctx->stat_lock); 2596 spin_lock(&sctx->stat_lock);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 9b6da9d55f9a..1ac3ca98c429 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -493,6 +493,7 @@ static struct btrfs_path *alloc_path_for_send(void)
493 return NULL; 493 return NULL;
494 path->search_commit_root = 1; 494 path->search_commit_root = 1;
495 path->skip_locking = 1; 495 path->skip_locking = 1;
496 path->need_commit_sem = 1;
496 return path; 497 return path;
497} 498}
498 499
@@ -771,29 +772,22 @@ out:
771/* 772/*
772 * Helper function to retrieve some fields from an inode item. 773 * Helper function to retrieve some fields from an inode item.
773 */ 774 */
774static int get_inode_info(struct btrfs_root *root, 775static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
775 u64 ino, u64 *size, u64 *gen, 776 u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
776 u64 *mode, u64 *uid, u64 *gid, 777 u64 *gid, u64 *rdev)
777 u64 *rdev)
778{ 778{
779 int ret; 779 int ret;
780 struct btrfs_inode_item *ii; 780 struct btrfs_inode_item *ii;
781 struct btrfs_key key; 781 struct btrfs_key key;
782 struct btrfs_path *path;
783
784 path = alloc_path_for_send();
785 if (!path)
786 return -ENOMEM;
787 782
788 key.objectid = ino; 783 key.objectid = ino;
789 key.type = BTRFS_INODE_ITEM_KEY; 784 key.type = BTRFS_INODE_ITEM_KEY;
790 key.offset = 0; 785 key.offset = 0;
791 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 786 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
792 if (ret < 0)
793 goto out;
794 if (ret) { 787 if (ret) {
795 ret = -ENOENT; 788 if (ret > 0)
796 goto out; 789 ret = -ENOENT;
790 return ret;
797 } 791 }
798 792
799 ii = btrfs_item_ptr(path->nodes[0], path->slots[0], 793 ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -811,7 +805,22 @@ static int get_inode_info(struct btrfs_root *root,
811 if (rdev) 805 if (rdev)
812 *rdev = btrfs_inode_rdev(path->nodes[0], ii); 806 *rdev = btrfs_inode_rdev(path->nodes[0], ii);
813 807
814out: 808 return ret;
809}
810
811static int get_inode_info(struct btrfs_root *root,
812 u64 ino, u64 *size, u64 *gen,
813 u64 *mode, u64 *uid, u64 *gid,
814 u64 *rdev)
815{
816 struct btrfs_path *path;
817 int ret;
818
819 path = alloc_path_for_send();
820 if (!path)
821 return -ENOMEM;
822 ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
823 rdev);
815 btrfs_free_path(path); 824 btrfs_free_path(path);
816 return ret; 825 return ret;
817} 826}
@@ -1085,6 +1094,7 @@ out:
1085struct backref_ctx { 1094struct backref_ctx {
1086 struct send_ctx *sctx; 1095 struct send_ctx *sctx;
1087 1096
1097 struct btrfs_path *path;
1088 /* number of total found references */ 1098 /* number of total found references */
1089 u64 found; 1099 u64 found;
1090 1100
@@ -1155,8 +1165,9 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1155 * There are inodes that have extents that lie behind its i_size. Don't 1165 * There are inodes that have extents that lie behind its i_size. Don't
1156 * accept clones from these extents. 1166 * accept clones from these extents.
1157 */ 1167 */
1158 ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL, 1168 ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
1159 NULL); 1169 NULL, NULL, NULL);
1170 btrfs_release_path(bctx->path);
1160 if (ret < 0) 1171 if (ret < 0)
1161 return ret; 1172 return ret;
1162 1173
@@ -1235,12 +1246,17 @@ static int find_extent_clone(struct send_ctx *sctx,
1235 if (!tmp_path) 1246 if (!tmp_path)
1236 return -ENOMEM; 1247 return -ENOMEM;
1237 1248
1249 /* We only use this path under the commit sem */
1250 tmp_path->need_commit_sem = 0;
1251
1238 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); 1252 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
1239 if (!backref_ctx) { 1253 if (!backref_ctx) {
1240 ret = -ENOMEM; 1254 ret = -ENOMEM;
1241 goto out; 1255 goto out;
1242 } 1256 }
1243 1257
1258 backref_ctx->path = tmp_path;
1259
1244 if (data_offset >= ino_size) { 1260 if (data_offset >= ino_size) {
1245 /* 1261 /*
1246 * There may be extents that lie behind the file's size. 1262 * There may be extents that lie behind the file's size.
@@ -1268,8 +1284,10 @@ static int find_extent_clone(struct send_ctx *sctx,
1268 } 1284 }
1269 logical = disk_byte + btrfs_file_extent_offset(eb, fi); 1285 logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1270 1286
1287 down_read(&sctx->send_root->fs_info->commit_root_sem);
1271 ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, 1288 ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
1272 &found_key, &flags); 1289 &found_key, &flags);
1290 up_read(&sctx->send_root->fs_info->commit_root_sem);
1273 btrfs_release_path(tmp_path); 1291 btrfs_release_path(tmp_path);
1274 1292
1275 if (ret < 0) 1293 if (ret < 0)
@@ -4418,6 +4436,9 @@ static int send_hole(struct send_ctx *sctx, u64 end)
4418 p = fs_path_alloc(); 4436 p = fs_path_alloc();
4419 if (!p) 4437 if (!p)
4420 return -ENOMEM; 4438 return -ENOMEM;
4439 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4440 if (ret < 0)
4441 goto tlv_put_failure;
4421 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); 4442 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
4422 while (offset < end) { 4443 while (offset < end) {
4423 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); 4444 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
@@ -4425,9 +4446,6 @@ static int send_hole(struct send_ctx *sctx, u64 end)
4425 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4446 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4426 if (ret < 0) 4447 if (ret < 0)
4427 break; 4448 break;
4428 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4429 if (ret < 0)
4430 break;
4431 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4449 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4432 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4450 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4433 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); 4451 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
@@ -4968,7 +4986,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4968 4986
4969 if (S_ISREG(sctx->cur_inode_mode)) { 4987 if (S_ISREG(sctx->cur_inode_mode)) {
4970 if (need_send_hole(sctx)) { 4988 if (need_send_hole(sctx)) {
4971 if (sctx->cur_inode_last_extent == (u64)-1) { 4989 if (sctx->cur_inode_last_extent == (u64)-1 ||
4990 sctx->cur_inode_last_extent <
4991 sctx->cur_inode_size) {
4972 ret = get_last_extent(sctx, (u64)-1); 4992 ret = get_last_extent(sctx, (u64)-1);
4973 if (ret) 4993 if (ret)
4974 goto out; 4994 goto out;
@@ -5367,57 +5387,21 @@ out:
5367static int full_send_tree(struct send_ctx *sctx) 5387static int full_send_tree(struct send_ctx *sctx)
5368{ 5388{
5369 int ret; 5389 int ret;
5370 struct btrfs_trans_handle *trans = NULL;
5371 struct btrfs_root *send_root = sctx->send_root; 5390 struct btrfs_root *send_root = sctx->send_root;
5372 struct btrfs_key key; 5391 struct btrfs_key key;
5373 struct btrfs_key found_key; 5392 struct btrfs_key found_key;
5374 struct btrfs_path *path; 5393 struct btrfs_path *path;
5375 struct extent_buffer *eb; 5394 struct extent_buffer *eb;
5376 int slot; 5395 int slot;
5377 u64 start_ctransid;
5378 u64 ctransid;
5379 5396
5380 path = alloc_path_for_send(); 5397 path = alloc_path_for_send();
5381 if (!path) 5398 if (!path)
5382 return -ENOMEM; 5399 return -ENOMEM;
5383 5400
5384 spin_lock(&send_root->root_item_lock);
5385 start_ctransid = btrfs_root_ctransid(&send_root->root_item);
5386 spin_unlock(&send_root->root_item_lock);
5387
5388 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 5401 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
5389 key.type = BTRFS_INODE_ITEM_KEY; 5402 key.type = BTRFS_INODE_ITEM_KEY;
5390 key.offset = 0; 5403 key.offset = 0;
5391 5404
5392join_trans:
5393 /*
5394 * We need to make sure the transaction does not get committed
5395 * while we do anything on commit roots. Join a transaction to prevent
5396 * this.
5397 */
5398 trans = btrfs_join_transaction(send_root);
5399 if (IS_ERR(trans)) {
5400 ret = PTR_ERR(trans);
5401 trans = NULL;
5402 goto out;
5403 }
5404
5405 /*
5406 * Make sure the tree has not changed after re-joining. We detect this
5407 * by comparing start_ctransid and ctransid. They should always match.
5408 */
5409 spin_lock(&send_root->root_item_lock);
5410 ctransid = btrfs_root_ctransid(&send_root->root_item);
5411 spin_unlock(&send_root->root_item_lock);
5412
5413 if (ctransid != start_ctransid) {
5414 WARN(1, KERN_WARNING "BTRFS: the root that you're trying to "
5415 "send was modified in between. This is "
5416 "probably a bug.\n");
5417 ret = -EIO;
5418 goto out;
5419 }
5420
5421 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); 5405 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
5422 if (ret < 0) 5406 if (ret < 0)
5423 goto out; 5407 goto out;
@@ -5425,19 +5409,6 @@ join_trans:
5425 goto out_finish; 5409 goto out_finish;
5426 5410
5427 while (1) { 5411 while (1) {
5428 /*
5429 * When someone want to commit while we iterate, end the
5430 * joined transaction and rejoin.
5431 */
5432 if (btrfs_should_end_transaction(trans, send_root)) {
5433 ret = btrfs_end_transaction(trans, send_root);
5434 trans = NULL;
5435 if (ret < 0)
5436 goto out;
5437 btrfs_release_path(path);
5438 goto join_trans;
5439 }
5440
5441 eb = path->nodes[0]; 5412 eb = path->nodes[0];
5442 slot = path->slots[0]; 5413 slot = path->slots[0];
5443 btrfs_item_key_to_cpu(eb, &found_key, slot); 5414 btrfs_item_key_to_cpu(eb, &found_key, slot);
@@ -5465,12 +5436,6 @@ out_finish:
5465 5436
5466out: 5437out:
5467 btrfs_free_path(path); 5438 btrfs_free_path(path);
5468 if (trans) {
5469 if (!ret)
5470 ret = btrfs_end_transaction(trans, send_root);
5471 else
5472 btrfs_end_transaction(trans, send_root);
5473 }
5474 return ret; 5439 return ret;
5475} 5440}
5476 5441
@@ -5718,7 +5683,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
5718 NULL); 5683 NULL);
5719 sort_clone_roots = 1; 5684 sort_clone_roots = 1;
5720 5685
5686 current->journal_info = (void *)BTRFS_SEND_TRANS_STUB;
5721 ret = send_subvol(sctx); 5687 ret = send_subvol(sctx);
5688 current->journal_info = NULL;
5722 if (ret < 0) 5689 if (ret < 0)
5723 goto out; 5690 goto out;
5724 5691
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9dbf42395153..5011aadacab8 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,6 +66,8 @@
66static const struct super_operations btrfs_super_ops; 66static const struct super_operations btrfs_super_ops;
67static struct file_system_type btrfs_fs_type; 67static struct file_system_type btrfs_fs_type;
68 68
69static int btrfs_remount(struct super_block *sb, int *flags, char *data);
70
69static const char *btrfs_decode_error(int errno) 71static const char *btrfs_decode_error(int errno)
70{ 72{
71 char *errstr = "unknown"; 73 char *errstr = "unknown";
@@ -1185,6 +1187,26 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
1185 mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, 1187 mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
1186 newargs); 1188 newargs);
1187 kfree(newargs); 1189 kfree(newargs);
1190
1191 if (PTR_RET(mnt) == -EBUSY) {
1192 if (flags & MS_RDONLY) {
1193 mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name,
1194 newargs);
1195 } else {
1196 int r;
1197 mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
1198 newargs);
1199 if (IS_ERR(mnt))
1200 return ERR_CAST(mnt);
1201
1202 r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
1203 if (r < 0) {
1204 /* FIXME: release vfsmount mnt ??*/
1205 return ERR_PTR(r);
1206 }
1207 }
1208 }
1209
1188 if (IS_ERR(mnt)) 1210 if (IS_ERR(mnt))
1189 return ERR_CAST(mnt); 1211 return ERR_CAST(mnt);
1190 1212
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a04707f740d6..7579f6d0b854 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -75,10 +75,21 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
75 } 75 }
76} 76}
77 77
78static noinline void switch_commit_root(struct btrfs_root *root) 78static noinline void switch_commit_roots(struct btrfs_transaction *trans,
79 struct btrfs_fs_info *fs_info)
79{ 80{
80 free_extent_buffer(root->commit_root); 81 struct btrfs_root *root, *tmp;
81 root->commit_root = btrfs_root_node(root); 82
83 down_write(&fs_info->commit_root_sem);
84 list_for_each_entry_safe(root, tmp, &trans->switch_commits,
85 dirty_list) {
86 list_del_init(&root->dirty_list);
87 free_extent_buffer(root->commit_root);
88 root->commit_root = btrfs_root_node(root);
89 if (is_fstree(root->objectid))
90 btrfs_unpin_free_ino(root);
91 }
92 up_write(&fs_info->commit_root_sem);
82} 93}
83 94
84static inline void extwriter_counter_inc(struct btrfs_transaction *trans, 95static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
@@ -208,6 +219,7 @@ loop:
208 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 219 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
209 INIT_LIST_HEAD(&cur_trans->ordered_operations); 220 INIT_LIST_HEAD(&cur_trans->ordered_operations);
210 INIT_LIST_HEAD(&cur_trans->pending_chunks); 221 INIT_LIST_HEAD(&cur_trans->pending_chunks);
222 INIT_LIST_HEAD(&cur_trans->switch_commits);
211 list_add_tail(&cur_trans->list, &fs_info->trans_list); 223 list_add_tail(&cur_trans->list, &fs_info->trans_list);
212 extent_io_tree_init(&cur_trans->dirty_pages, 224 extent_io_tree_init(&cur_trans->dirty_pages,
213 fs_info->btree_inode->i_mapping); 225 fs_info->btree_inode->i_mapping);
@@ -375,7 +387,8 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
375 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) 387 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
376 return ERR_PTR(-EROFS); 388 return ERR_PTR(-EROFS);
377 389
378 if (current->journal_info) { 390 if (current->journal_info &&
391 current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
379 WARN_ON(type & TRANS_EXTWRITERS); 392 WARN_ON(type & TRANS_EXTWRITERS);
380 h = current->journal_info; 393 h = current->journal_info;
381 h->use_count++; 394 h->use_count++;
@@ -919,9 +932,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
919 return ret; 932 return ret;
920 } 933 }
921 934
922 if (root != root->fs_info->extent_root)
923 switch_commit_root(root);
924
925 return 0; 935 return 0;
926} 936}
927 937
@@ -977,15 +987,16 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
977 list_del_init(next); 987 list_del_init(next);
978 root = list_entry(next, struct btrfs_root, dirty_list); 988 root = list_entry(next, struct btrfs_root, dirty_list);
979 989
990 if (root != fs_info->extent_root)
991 list_add_tail(&root->dirty_list,
992 &trans->transaction->switch_commits);
980 ret = update_cowonly_root(trans, root); 993 ret = update_cowonly_root(trans, root);
981 if (ret) 994 if (ret)
982 return ret; 995 return ret;
983 } 996 }
984 997
985 down_write(&fs_info->extent_commit_sem); 998 list_add_tail(&fs_info->extent_root->dirty_list,
986 switch_commit_root(fs_info->extent_root); 999 &trans->transaction->switch_commits);
987 up_write(&fs_info->extent_commit_sem);
988
989 btrfs_after_dev_replace_commit(fs_info); 1000 btrfs_after_dev_replace_commit(fs_info);
990 1001
991 return 0; 1002 return 0;
@@ -1042,11 +1053,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
1042 smp_wmb(); 1053 smp_wmb();
1043 1054
1044 if (root->commit_root != root->node) { 1055 if (root->commit_root != root->node) {
1045 mutex_lock(&root->fs_commit_mutex); 1056 list_add_tail(&root->dirty_list,
1046 switch_commit_root(root); 1057 &trans->transaction->switch_commits);
1047 btrfs_unpin_free_ino(root);
1048 mutex_unlock(&root->fs_commit_mutex);
1049
1050 btrfs_set_root_node(&root->root_item, 1058 btrfs_set_root_node(&root->root_item,
1051 root->node); 1059 root->node);
1052 } 1060 }
@@ -1857,11 +1865,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1857 1865
1858 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1866 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1859 root->fs_info->tree_root->node); 1867 root->fs_info->tree_root->node);
1860 switch_commit_root(root->fs_info->tree_root); 1868 list_add_tail(&root->fs_info->tree_root->dirty_list,
1869 &cur_trans->switch_commits);
1861 1870
1862 btrfs_set_root_node(&root->fs_info->chunk_root->root_item, 1871 btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1863 root->fs_info->chunk_root->node); 1872 root->fs_info->chunk_root->node);
1864 switch_commit_root(root->fs_info->chunk_root); 1873 list_add_tail(&root->fs_info->chunk_root->dirty_list,
1874 &cur_trans->switch_commits);
1875
1876 switch_commit_roots(cur_trans, root->fs_info);
1865 1877
1866 assert_qgroups_uptodate(trans); 1878 assert_qgroups_uptodate(trans);
1867 update_super_roots(root); 1879 update_super_roots(root);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 6ac037e9f9f0..b57b924e8e03 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -57,6 +57,7 @@ struct btrfs_transaction {
57 struct list_head pending_snapshots; 57 struct list_head pending_snapshots;
58 struct list_head ordered_operations; 58 struct list_head ordered_operations;
59 struct list_head pending_chunks; 59 struct list_head pending_chunks;
60 struct list_head switch_commits;
60 struct btrfs_delayed_ref_root delayed_refs; 61 struct btrfs_delayed_ref_root delayed_refs;
61 int aborted; 62 int aborted;
62}; 63};
@@ -78,6 +79,8 @@ struct btrfs_transaction {
78#define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \ 79#define TRANS_EXTWRITERS (__TRANS_USERSPACE | __TRANS_START | \
79 __TRANS_ATTACH) 80 __TRANS_ATTACH)
80 81
82#define BTRFS_SEND_TRANS_STUB 1
83
81struct btrfs_trans_handle { 84struct btrfs_trans_handle {
82 u64 transid; 85 u64 transid;
83 u64 bytes_reserved; 86 u64 bytes_reserved;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d241130a32fd..49d7fab73360 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -448,6 +448,14 @@ static void pending_bios_fn(struct btrfs_work *work)
448 run_scheduled_bios(device); 448 run_scheduled_bios(device);
449} 449}
450 450
451/*
452 * Add new device to list of registered devices
453 *
454 * Returns:
455 * 1 - first time device is seen
456 * 0 - device already known
457 * < 0 - error
458 */
451static noinline int device_list_add(const char *path, 459static noinline int device_list_add(const char *path,
452 struct btrfs_super_block *disk_super, 460 struct btrfs_super_block *disk_super,
453 u64 devid, struct btrfs_fs_devices **fs_devices_ret) 461 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
@@ -455,6 +463,7 @@ static noinline int device_list_add(const char *path,
455 struct btrfs_device *device; 463 struct btrfs_device *device;
456 struct btrfs_fs_devices *fs_devices; 464 struct btrfs_fs_devices *fs_devices;
457 struct rcu_string *name; 465 struct rcu_string *name;
466 int ret = 0;
458 u64 found_transid = btrfs_super_generation(disk_super); 467 u64 found_transid = btrfs_super_generation(disk_super);
459 468
460 fs_devices = find_fsid(disk_super->fsid); 469 fs_devices = find_fsid(disk_super->fsid);
@@ -495,6 +504,7 @@ static noinline int device_list_add(const char *path,
495 fs_devices->num_devices++; 504 fs_devices->num_devices++;
496 mutex_unlock(&fs_devices->device_list_mutex); 505 mutex_unlock(&fs_devices->device_list_mutex);
497 506
507 ret = 1;
498 device->fs_devices = fs_devices; 508 device->fs_devices = fs_devices;
499 } else if (!device->name || strcmp(device->name->str, path)) { 509 } else if (!device->name || strcmp(device->name->str, path)) {
500 name = rcu_string_strdup(path, GFP_NOFS); 510 name = rcu_string_strdup(path, GFP_NOFS);
@@ -513,7 +523,8 @@ static noinline int device_list_add(const char *path,
513 fs_devices->latest_trans = found_transid; 523 fs_devices->latest_trans = found_transid;
514 } 524 }
515 *fs_devices_ret = fs_devices; 525 *fs_devices_ret = fs_devices;
516 return 0; 526
527 return ret;
517} 528}
518 529
519static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) 530static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
@@ -910,17 +921,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
910 transid = btrfs_super_generation(disk_super); 921 transid = btrfs_super_generation(disk_super);
911 total_devices = btrfs_super_num_devices(disk_super); 922 total_devices = btrfs_super_num_devices(disk_super);
912 923
913 if (disk_super->label[0]) {
914 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
915 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
916 printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
917 } else {
918 printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
919 }
920
921 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
922
923 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 924 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
925 if (ret > 0) {
926 if (disk_super->label[0]) {
927 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
928 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
929 printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
930 } else {
931 printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
932 }
933
934 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
935 ret = 0;
936 }
924 if (!ret && fs_devices_ret) 937 if (!ret && fs_devices_ret)
925 (*fs_devices_ret)->total_devices = total_devices; 938 (*fs_devices_ret)->total_devices = total_devices;
926 939