aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-07-02 19:52:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-07-02 19:52:38 -0400
commit5291a12f0503e31e0b8e90ee8e4997d59c1c3aad (patch)
tree63606ab8991540c5cb963e079be92a05fd829791 /fs
parentc7cba0623fc17fe766063d492810632366790763 (diff)
parent68f5a38c3ea4ae9cc7a40f86ff6d6d031583d93a (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: Btrfs: fix error message formatting Btrfs: fix use after free in btrfs_start_workers fail path Btrfs: honor nodatacow/sum mount options for new files Btrfs: update backrefs while dropping snapshot Btrfs: account for space we may use in fallocate Btrfs: fix the file clone ioctl for preallocated extents Btrfs: don't log the inode in file_write while growing the file
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/extent-tree.c566
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/btrfs/inode.c25
-rw-r--r--fs/btrfs/ioctl.c6
-rw-r--r--fs/btrfs/relocation.c5
-rw-r--r--fs/btrfs/transaction.c4
8 files changed, 423 insertions, 193 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7f88628a1a72..6e4f6c50a120 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
299 "btrfs-%s-%d", workers->name, 299 "btrfs-%s-%d", workers->name,
300 workers->num_workers + i); 300 workers->num_workers + i);
301 if (IS_ERR(worker->task)) { 301 if (IS_ERR(worker->task)) {
302 kfree(worker);
303 ret = PTR_ERR(worker->task); 302 ret = PTR_ERR(worker->task);
303 kfree(worker);
304 goto fail; 304 goto fail;
305 } 305 }
306 306
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2779c2f5360a..98a873838717 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2074,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2074int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 2074int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
2075int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); 2075int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
2076int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 2076int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
2077int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 2077int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
2078 *root);
2079int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 2078int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2080 struct btrfs_root *root, 2079 struct btrfs_root *root,
2081 struct extent_buffer *node, 2080 struct extent_buffer *node,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edc7d208c5ce..a5aca3997d42 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
990 return type; 990 return type;
991} 991}
992 992
993static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) 993static int find_next_key(struct btrfs_path *path, int level,
994 struct btrfs_key *key)
994 995
995{ 996{
996 int level; 997 for (; level < BTRFS_MAX_LEVEL; level++) {
997 BUG_ON(!path->keep_locks);
998 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
999 if (!path->nodes[level]) 998 if (!path->nodes[level])
1000 break; 999 break;
1001 btrfs_assert_tree_locked(path->nodes[level]);
1002 if (path->slots[level] + 1 >= 1000 if (path->slots[level] + 1 >=
1003 btrfs_header_nritems(path->nodes[level])) 1001 btrfs_header_nritems(path->nodes[level]))
1004 continue; 1002 continue;
@@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1158 * For simplicity, we just do not add new inline back 1156 * For simplicity, we just do not add new inline back
1159 * ref if there is any kind of item for this block 1157 * ref if there is any kind of item for this block
1160 */ 1158 */
1161 if (find_next_key(path, &key) == 0 && key.objectid == bytenr && 1159 if (find_next_key(path, 0, &key) == 0 &&
1160 key.objectid == bytenr &&
1162 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1161 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1163 err = -EAGAIN; 1162 err = -EAGAIN;
1164 goto out; 1163 goto out;
@@ -2697,7 +2696,7 @@ again:
2697 2696
2698 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2697 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
2699 ", %llu bytes_used, %llu bytes_reserved, " 2698 ", %llu bytes_used, %llu bytes_reserved, "
2700 "%llu bytes_pinned, %llu bytes_readonly, %llu may use" 2699 "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
2701 "%llu total\n", (unsigned long long)bytes, 2700 "%llu total\n", (unsigned long long)bytes,
2702 (unsigned long long)data_sinfo->bytes_delalloc, 2701 (unsigned long long)data_sinfo->bytes_delalloc,
2703 (unsigned long long)data_sinfo->bytes_used, 2702 (unsigned long long)data_sinfo->bytes_used,
@@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4128 return buf; 4127 return buf;
4129} 4128}
4130 4129
4130#if 0
4131int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4131int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
4132 struct btrfs_root *root, struct extent_buffer *leaf) 4132 struct btrfs_root *root, struct extent_buffer *leaf)
4133{ 4133{
@@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
4171 return 0; 4171 return 0;
4172} 4172}
4173 4173
4174#if 0
4175
4176static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4174static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
4177 struct btrfs_root *root, 4175 struct btrfs_root *root,
4178 struct btrfs_leaf_ref *ref) 4176 struct btrfs_leaf_ref *ref)
@@ -4553,262 +4551,471 @@ out:
4553} 4551}
4554#endif 4552#endif
4555 4553
4554struct walk_control {
4555 u64 refs[BTRFS_MAX_LEVEL];
4556 u64 flags[BTRFS_MAX_LEVEL];
4557 struct btrfs_key update_progress;
4558 int stage;
4559 int level;
4560 int shared_level;
4561 int update_ref;
4562 int keep_locks;
4563};
4564
4565#define DROP_REFERENCE 1
4566#define UPDATE_BACKREF 2
4567
4556/* 4568/*
4557 * helper function for drop_subtree, this function is similar to 4569 * hepler to process tree block while walking down the tree.
4558 * walk_down_tree. The main difference is that it checks reference 4570 *
4559 * counts while tree blocks are locked. 4571 * when wc->stage == DROP_REFERENCE, this function checks
4572 * reference count of the block. if the block is shared and
4573 * we need update back refs for the subtree rooted at the
4574 * block, this function changes wc->stage to UPDATE_BACKREF
4575 *
4576 * when wc->stage == UPDATE_BACKREF, this function updates
4577 * back refs for pointers in the block.
4578 *
4579 * NOTE: return value 1 means we should stop walking down.
4560 */ 4580 */
4561static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4581static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4562 struct btrfs_root *root, 4582 struct btrfs_root *root,
4563 struct btrfs_path *path, int *level) 4583 struct btrfs_path *path,
4584 struct walk_control *wc)
4564{ 4585{
4565 struct extent_buffer *next; 4586 int level = wc->level;
4566 struct extent_buffer *cur; 4587 struct extent_buffer *eb = path->nodes[level];
4567 struct extent_buffer *parent; 4588 struct btrfs_key key;
4568 u64 bytenr; 4589 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4569 u64 ptr_gen;
4570 u64 refs;
4571 u64 flags;
4572 u32 blocksize;
4573 int ret; 4590 int ret;
4574 4591
4575 cur = path->nodes[*level]; 4592 if (wc->stage == UPDATE_BACKREF &&
4576 ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, 4593 btrfs_header_owner(eb) != root->root_key.objectid)
4577 &refs, &flags); 4594 return 1;
4578 BUG_ON(ret);
4579 if (refs > 1)
4580 goto out;
4581 4595
4582 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4596 /*
4597 * when reference count of tree block is 1, it won't increase
4598 * again. once full backref flag is set, we never clear it.
4599 */
4600 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4601 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
4602 BUG_ON(!path->locks[level]);
4603 ret = btrfs_lookup_extent_info(trans, root,
4604 eb->start, eb->len,
4605 &wc->refs[level],
4606 &wc->flags[level]);
4607 BUG_ON(ret);
4608 BUG_ON(wc->refs[level] == 0);
4609 }
4583 4610
4584 while (*level >= 0) { 4611 if (wc->stage == DROP_REFERENCE &&
4585 cur = path->nodes[*level]; 4612 wc->update_ref && wc->refs[level] > 1) {
4586 if (*level == 0) { 4613 BUG_ON(eb == root->node);
4587 ret = btrfs_drop_leaf_ref(trans, root, cur); 4614 BUG_ON(path->slots[level] > 0);
4588 BUG_ON(ret); 4615 if (level == 0)
4589 clean_tree_block(trans, root, cur); 4616 btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
4590 break; 4617 else
4591 } 4618 btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
4592 if (path->slots[*level] >= btrfs_header_nritems(cur)) { 4619 if (btrfs_header_owner(eb) == root->root_key.objectid &&
4593 clean_tree_block(trans, root, cur); 4620 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
4594 break; 4621 wc->stage = UPDATE_BACKREF;
4622 wc->shared_level = level;
4595 } 4623 }
4624 }
4596 4625
4597 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4626 if (wc->stage == DROP_REFERENCE) {
4598 blocksize = btrfs_level_size(root, *level - 1); 4627 if (wc->refs[level] > 1)
4599 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4628 return 1;
4600 4629
4601 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4630 if (path->locks[level] && !wc->keep_locks) {
4602 btrfs_tree_lock(next); 4631 btrfs_tree_unlock(eb);
4603 btrfs_set_lock_blocking(next); 4632 path->locks[level] = 0;
4633 }
4634 return 0;
4635 }
4604 4636
4605 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 4637 /* wc->stage == UPDATE_BACKREF */
4606 &refs, &flags); 4638 if (!(wc->flags[level] & flag)) {
4639 BUG_ON(!path->locks[level]);
4640 ret = btrfs_inc_ref(trans, root, eb, 1);
4607 BUG_ON(ret); 4641 BUG_ON(ret);
4608 if (refs > 1) { 4642 ret = btrfs_dec_ref(trans, root, eb, 0);
4609 parent = path->nodes[*level]; 4643 BUG_ON(ret);
4610 ret = btrfs_free_extent(trans, root, bytenr, 4644 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
4611 blocksize, parent->start, 4645 eb->len, flag, 0);
4612 btrfs_header_owner(parent), 4646 BUG_ON(ret);
4613 *level - 1, 0); 4647 wc->flags[level] |= flag;
4648 }
4649
4650 /*
4651 * the block is shared by multiple trees, so it's not good to
4652 * keep the tree lock
4653 */
4654 if (path->locks[level] && level > 0) {
4655 btrfs_tree_unlock(eb);
4656 path->locks[level] = 0;
4657 }
4658 return 0;
4659}
4660
4661/*
4662 * hepler to process tree block while walking up the tree.
4663 *
4664 * when wc->stage == DROP_REFERENCE, this function drops
4665 * reference count on the block.
4666 *
4667 * when wc->stage == UPDATE_BACKREF, this function changes
4668 * wc->stage back to DROP_REFERENCE if we changed wc->stage
4669 * to UPDATE_BACKREF previously while processing the block.
4670 *
4671 * NOTE: return value 1 means we should stop walking up.
4672 */
4673static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4674 struct btrfs_root *root,
4675 struct btrfs_path *path,
4676 struct walk_control *wc)
4677{
4678 int ret = 0;
4679 int level = wc->level;
4680 struct extent_buffer *eb = path->nodes[level];
4681 u64 parent = 0;
4682
4683 if (wc->stage == UPDATE_BACKREF) {
4684 BUG_ON(wc->shared_level < level);
4685 if (level < wc->shared_level)
4686 goto out;
4687
4688 BUG_ON(wc->refs[level] <= 1);
4689 ret = find_next_key(path, level + 1, &wc->update_progress);
4690 if (ret > 0)
4691 wc->update_ref = 0;
4692
4693 wc->stage = DROP_REFERENCE;
4694 wc->shared_level = -1;
4695 path->slots[level] = 0;
4696
4697 /*
4698 * check reference count again if the block isn't locked.
4699 * we should start walking down the tree again if reference
4700 * count is one.
4701 */
4702 if (!path->locks[level]) {
4703 BUG_ON(level == 0);
4704 btrfs_tree_lock(eb);
4705 btrfs_set_lock_blocking(eb);
4706 path->locks[level] = 1;
4707
4708 ret = btrfs_lookup_extent_info(trans, root,
4709 eb->start, eb->len,
4710 &wc->refs[level],
4711 &wc->flags[level]);
4614 BUG_ON(ret); 4712 BUG_ON(ret);
4615 path->slots[*level]++; 4713 BUG_ON(wc->refs[level] == 0);
4616 btrfs_tree_unlock(next); 4714 if (wc->refs[level] == 1) {
4617 free_extent_buffer(next); 4715 btrfs_tree_unlock(eb);
4618 continue; 4716 path->locks[level] = 0;
4717 return 1;
4718 }
4719 } else {
4720 BUG_ON(level != 0);
4619 } 4721 }
4722 }
4620 4723
4621 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4724 /* wc->stage == DROP_REFERENCE */
4725 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
4622 4726
4623 *level = btrfs_header_level(next); 4727 if (wc->refs[level] == 1) {
4624 path->nodes[*level] = next; 4728 if (level == 0) {
4625 path->slots[*level] = 0; 4729 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4626 path->locks[*level] = 1; 4730 ret = btrfs_dec_ref(trans, root, eb, 1);
4627 cond_resched(); 4731 else
4732 ret = btrfs_dec_ref(trans, root, eb, 0);
4733 BUG_ON(ret);
4734 }
4735 /* make block locked assertion in clean_tree_block happy */
4736 if (!path->locks[level] &&
4737 btrfs_header_generation(eb) == trans->transid) {
4738 btrfs_tree_lock(eb);
4739 btrfs_set_lock_blocking(eb);
4740 path->locks[level] = 1;
4741 }
4742 clean_tree_block(trans, root, eb);
4743 }
4744
4745 if (eb == root->node) {
4746 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4747 parent = eb->start;
4748 else
4749 BUG_ON(root->root_key.objectid !=
4750 btrfs_header_owner(eb));
4751 } else {
4752 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4753 parent = path->nodes[level + 1]->start;
4754 else
4755 BUG_ON(root->root_key.objectid !=
4756 btrfs_header_owner(path->nodes[level + 1]));
4628 } 4757 }
4629out:
4630 if (path->nodes[*level] == root->node)
4631 parent = path->nodes[*level];
4632 else
4633 parent = path->nodes[*level + 1];
4634 bytenr = path->nodes[*level]->start;
4635 blocksize = path->nodes[*level]->len;
4636 4758
4637 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, 4759 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
4638 btrfs_header_owner(parent), *level, 0); 4760 root->root_key.objectid, level, 0);
4639 BUG_ON(ret); 4761 BUG_ON(ret);
4762out:
4763 wc->refs[level] = 0;
4764 wc->flags[level] = 0;
4765 return ret;
4766}
4767
4768static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4769 struct btrfs_root *root,
4770 struct btrfs_path *path,
4771 struct walk_control *wc)
4772{
4773 struct extent_buffer *next;
4774 struct extent_buffer *cur;
4775 u64 bytenr;
4776 u64 ptr_gen;
4777 u32 blocksize;
4778 int level = wc->level;
4779 int ret;
4780
4781 while (level >= 0) {
4782 cur = path->nodes[level];
4783 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
4640 4784
4641 if (path->locks[*level]) { 4785 ret = walk_down_proc(trans, root, path, wc);
4642 btrfs_tree_unlock(path->nodes[*level]); 4786 if (ret > 0)
4643 path->locks[*level] = 0; 4787 break;
4788
4789 if (level == 0)
4790 break;
4791
4792 bytenr = btrfs_node_blockptr(cur, path->slots[level]);
4793 blocksize = btrfs_level_size(root, level - 1);
4794 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
4795
4796 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
4797 btrfs_tree_lock(next);
4798 btrfs_set_lock_blocking(next);
4799
4800 level--;
4801 BUG_ON(level != btrfs_header_level(next));
4802 path->nodes[level] = next;
4803 path->slots[level] = 0;
4804 path->locks[level] = 1;
4805 wc->level = level;
4644 } 4806 }
4645 free_extent_buffer(path->nodes[*level]);
4646 path->nodes[*level] = NULL;
4647 *level += 1;
4648 cond_resched();
4649 return 0; 4807 return 0;
4650} 4808}
4651 4809
4652/*
4653 * helper for dropping snapshots. This walks back up the tree in the path
4654 * to find the first node higher up where we haven't yet gone through
4655 * all the slots
4656 */
4657static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 4810static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
4658 struct btrfs_root *root, 4811 struct btrfs_root *root,
4659 struct btrfs_path *path, 4812 struct btrfs_path *path,
4660 int *level, int max_level) 4813 struct walk_control *wc, int max_level)
4661{ 4814{
4662 struct btrfs_root_item *root_item = &root->root_item; 4815 int level = wc->level;
4663 int i;
4664 int slot;
4665 int ret; 4816 int ret;
4666 4817
4667 for (i = *level; i < max_level && path->nodes[i]; i++) { 4818 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
4668 slot = path->slots[i]; 4819 while (level < max_level && path->nodes[level]) {
4669 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 4820 wc->level = level;
4670 /* 4821 if (path->slots[level] + 1 <
4671 * there is more work to do in this level. 4822 btrfs_header_nritems(path->nodes[level])) {
4672 * Update the drop_progress marker to reflect 4823 path->slots[level]++;
4673 * the work we've done so far, and then bump
4674 * the slot number
4675 */
4676 path->slots[i]++;
4677 WARN_ON(*level == 0);
4678 if (max_level == BTRFS_MAX_LEVEL) {
4679 btrfs_node_key(path->nodes[i],
4680 &root_item->drop_progress,
4681 path->slots[i]);
4682 root_item->drop_level = i;
4683 }
4684 *level = i;
4685 return 0; 4824 return 0;
4686 } else { 4825 } else {
4687 struct extent_buffer *parent; 4826 ret = walk_up_proc(trans, root, path, wc);
4688 4827 if (ret > 0)
4689 /* 4828 return 0;
4690 * this whole node is done, free our reference
4691 * on it and go up one level
4692 */
4693 if (path->nodes[*level] == root->node)
4694 parent = path->nodes[*level];
4695 else
4696 parent = path->nodes[*level + 1];
4697 4829
4698 clean_tree_block(trans, root, path->nodes[i]); 4830 if (path->locks[level]) {
4699 ret = btrfs_free_extent(trans, root, 4831 btrfs_tree_unlock(path->nodes[level]);
4700 path->nodes[i]->start, 4832 path->locks[level] = 0;
4701 path->nodes[i]->len,
4702 parent->start,
4703 btrfs_header_owner(parent),
4704 *level, 0);
4705 BUG_ON(ret);
4706 if (path->locks[*level]) {
4707 btrfs_tree_unlock(path->nodes[i]);
4708 path->locks[i] = 0;
4709 } 4833 }
4710 free_extent_buffer(path->nodes[i]); 4834 free_extent_buffer(path->nodes[level]);
4711 path->nodes[i] = NULL; 4835 path->nodes[level] = NULL;
4712 *level = i + 1; 4836 level++;
4713 } 4837 }
4714 } 4838 }
4715 return 1; 4839 return 1;
4716} 4840}
4717 4841
4718/* 4842/*
4719 * drop the reference count on the tree rooted at 'snap'. This traverses 4843 * drop a subvolume tree.
4720 * the tree freeing any blocks that have a ref count of zero after being 4844 *
4721 * decremented. 4845 * this function traverses the tree freeing any blocks that only
4846 * referenced by the tree.
4847 *
4848 * when a shared tree block is found. this function decreases its
4849 * reference count by one. if update_ref is true, this function
4850 * also make sure backrefs for the shared block and all lower level
4851 * blocks are properly updated.
4722 */ 4852 */
4723int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 4853int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
4724 *root)
4725{ 4854{
4726 int ret = 0;
4727 int wret;
4728 int level;
4729 struct btrfs_path *path; 4855 struct btrfs_path *path;
4730 int update_count; 4856 struct btrfs_trans_handle *trans;
4857 struct btrfs_root *tree_root = root->fs_info->tree_root;
4731 struct btrfs_root_item *root_item = &root->root_item; 4858 struct btrfs_root_item *root_item = &root->root_item;
4859 struct walk_control *wc;
4860 struct btrfs_key key;
4861 int err = 0;
4862 int ret;
4863 int level;
4732 4864
4733 path = btrfs_alloc_path(); 4865 path = btrfs_alloc_path();
4734 BUG_ON(!path); 4866 BUG_ON(!path);
4735 4867
4736 level = btrfs_header_level(root->node); 4868 wc = kzalloc(sizeof(*wc), GFP_NOFS);
4869 BUG_ON(!wc);
4870
4871 trans = btrfs_start_transaction(tree_root, 1);
4872
4737 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4873 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
4874 level = btrfs_header_level(root->node);
4738 path->nodes[level] = btrfs_lock_root_node(root); 4875 path->nodes[level] = btrfs_lock_root_node(root);
4739 btrfs_set_lock_blocking(path->nodes[level]); 4876 btrfs_set_lock_blocking(path->nodes[level]);
4740 path->slots[level] = 0; 4877 path->slots[level] = 0;
4741 path->locks[level] = 1; 4878 path->locks[level] = 1;
4879 memset(&wc->update_progress, 0,
4880 sizeof(wc->update_progress));
4742 } else { 4881 } else {
4743 struct btrfs_key key;
4744 struct btrfs_disk_key found_key;
4745 struct extent_buffer *node;
4746
4747 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 4882 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
4883 memcpy(&wc->update_progress, &key,
4884 sizeof(wc->update_progress));
4885
4748 level = root_item->drop_level; 4886 level = root_item->drop_level;
4887 BUG_ON(level == 0);
4749 path->lowest_level = level; 4888 path->lowest_level = level;
4750 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4889 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4751 if (wret < 0) { 4890 path->lowest_level = 0;
4752 ret = wret; 4891 if (ret < 0) {
4892 err = ret;
4753 goto out; 4893 goto out;
4754 } 4894 }
4755 node = path->nodes[level]; 4895 btrfs_node_key_to_cpu(path->nodes[level], &key,
4756 btrfs_node_key(node, &found_key, path->slots[level]); 4896 path->slots[level]);
4757 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 4897 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
4758 sizeof(found_key))); 4898
4759 /* 4899 /*
4760 * unlock our path, this is safe because only this 4900 * unlock our path, this is safe because only this
4761 * function is allowed to delete this snapshot 4901 * function is allowed to delete this snapshot
4762 */ 4902 */
4763 btrfs_unlock_up_safe(path, 0); 4903 btrfs_unlock_up_safe(path, 0);
4904
4905 level = btrfs_header_level(root->node);
4906 while (1) {
4907 btrfs_tree_lock(path->nodes[level]);
4908 btrfs_set_lock_blocking(path->nodes[level]);
4909
4910 ret = btrfs_lookup_extent_info(trans, root,
4911 path->nodes[level]->start,
4912 path->nodes[level]->len,
4913 &wc->refs[level],
4914 &wc->flags[level]);
4915 BUG_ON(ret);
4916 BUG_ON(wc->refs[level] == 0);
4917
4918 if (level == root_item->drop_level)
4919 break;
4920
4921 btrfs_tree_unlock(path->nodes[level]);
4922 WARN_ON(wc->refs[level] != 1);
4923 level--;
4924 }
4764 } 4925 }
4926
4927 wc->level = level;
4928 wc->shared_level = -1;
4929 wc->stage = DROP_REFERENCE;
4930 wc->update_ref = update_ref;
4931 wc->keep_locks = 0;
4932
4765 while (1) { 4933 while (1) {
4766 unsigned long update; 4934 ret = walk_down_tree(trans, root, path, wc);
4767 wret = walk_down_tree(trans, root, path, &level); 4935 if (ret < 0) {
4768 if (wret > 0) 4936 err = ret;
4769 break; 4937 break;
4770 if (wret < 0) 4938 }
4771 ret = wret;
4772 4939
4773 wret = walk_up_tree(trans, root, path, &level, 4940 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
4774 BTRFS_MAX_LEVEL); 4941 if (ret < 0) {
4775 if (wret > 0) 4942 err = ret;
4776 break; 4943 break;
4777 if (wret < 0) 4944 }
4778 ret = wret; 4945
4779 if (trans->transaction->in_commit || 4946 if (ret > 0) {
4780 trans->transaction->delayed_refs.flushing) { 4947 BUG_ON(wc->stage != DROP_REFERENCE);
4781 ret = -EAGAIN;
4782 break; 4948 break;
4783 } 4949 }
4784 for (update_count = 0; update_count < 16; update_count++) { 4950
4951 if (wc->stage == DROP_REFERENCE) {
4952 level = wc->level;
4953 btrfs_node_key(path->nodes[level],
4954 &root_item->drop_progress,
4955 path->slots[level]);
4956 root_item->drop_level = level;
4957 }
4958
4959 BUG_ON(wc->level == 0);
4960 if (trans->transaction->in_commit ||
4961 trans->transaction->delayed_refs.flushing) {
4962 ret = btrfs_update_root(trans, tree_root,
4963 &root->root_key,
4964 root_item);
4965 BUG_ON(ret);
4966
4967 btrfs_end_transaction(trans, tree_root);
4968 trans = btrfs_start_transaction(tree_root, 1);
4969 } else {
4970 unsigned long update;
4785 update = trans->delayed_ref_updates; 4971 update = trans->delayed_ref_updates;
4786 trans->delayed_ref_updates = 0; 4972 trans->delayed_ref_updates = 0;
4787 if (update) 4973 if (update)
4788 btrfs_run_delayed_refs(trans, root, update); 4974 btrfs_run_delayed_refs(trans, tree_root,
4789 else 4975 update);
4790 break;
4791 } 4976 }
4792 } 4977 }
4978 btrfs_release_path(root, path);
4979 BUG_ON(err);
4980
4981 ret = btrfs_del_root(trans, tree_root, &root->root_key);
4982 BUG_ON(ret);
4983
4984 free_extent_buffer(root->node);
4985 free_extent_buffer(root->commit_root);
4986 kfree(root);
4793out: 4987out:
4988 btrfs_end_transaction(trans, tree_root);
4989 kfree(wc);
4794 btrfs_free_path(path); 4990 btrfs_free_path(path);
4795 return ret; 4991 return err;
4796} 4992}
4797 4993
4994/*
4995 * drop subtree rooted at tree block 'node'.
4996 *
4997 * NOTE: this function will unlock and release tree block 'node'
4998 */
4798int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 4999int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4799 struct btrfs_root *root, 5000 struct btrfs_root *root,
4800 struct extent_buffer *node, 5001 struct extent_buffer *node,
4801 struct extent_buffer *parent) 5002 struct extent_buffer *parent)
4802{ 5003{
4803 struct btrfs_path *path; 5004 struct btrfs_path *path;
5005 struct walk_control *wc;
4804 int level; 5006 int level;
4805 int parent_level; 5007 int parent_level;
4806 int ret = 0; 5008 int ret = 0;
4807 int wret; 5009 int wret;
4808 5010
5011 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5012
4809 path = btrfs_alloc_path(); 5013 path = btrfs_alloc_path();
4810 BUG_ON(!path); 5014 BUG_ON(!path);
4811 5015
5016 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5017 BUG_ON(!wc);
5018
4812 btrfs_assert_tree_locked(parent); 5019 btrfs_assert_tree_locked(parent);
4813 parent_level = btrfs_header_level(parent); 5020 parent_level = btrfs_header_level(parent);
4814 extent_buffer_get(parent); 5021 extent_buffer_get(parent);
@@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4817 5024
4818 btrfs_assert_tree_locked(node); 5025 btrfs_assert_tree_locked(node);
4819 level = btrfs_header_level(node); 5026 level = btrfs_header_level(node);
4820 extent_buffer_get(node);
4821 path->nodes[level] = node; 5027 path->nodes[level] = node;
4822 path->slots[level] = 0; 5028 path->slots[level] = 0;
5029 path->locks[level] = 1;
5030
5031 wc->refs[parent_level] = 1;
5032 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5033 wc->level = level;
5034 wc->shared_level = -1;
5035 wc->stage = DROP_REFERENCE;
5036 wc->update_ref = 0;
5037 wc->keep_locks = 1;
4823 5038
4824 while (1) { 5039 while (1) {
4825 wret = walk_down_tree(trans, root, path, &level); 5040 wret = walk_down_tree(trans, root, path, wc);
4826 if (wret < 0) 5041 if (wret < 0) {
4827 ret = wret; 5042 ret = wret;
4828 if (wret != 0)
4829 break; 5043 break;
5044 }
4830 5045
4831 wret = walk_up_tree(trans, root, path, &level, parent_level); 5046 wret = walk_up_tree(trans, root, path, wc, parent_level);
4832 if (wret < 0) 5047 if (wret < 0)
4833 ret = wret; 5048 ret = wret;
4834 if (wret != 0) 5049 if (wret != 0)
4835 break; 5050 break;
4836 } 5051 }
4837 5052
5053 kfree(wc);
4838 btrfs_free_path(path); 5054 btrfs_free_path(path);
4839 return ret; 5055 return ret;
4840} 5056}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 126477eaecf5..7c3cd248d8d6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
151 } 151 }
152 if (end_pos > isize) { 152 if (end_pos > isize) {
153 i_size_write(inode, end_pos); 153 i_size_write(inode, end_pos);
154 btrfs_update_inode(trans, root, inode); 154 /* we've only changed i_size in ram, and we haven't updated
155 * the disk i_size. There is no need to log the inode
156 * at this time.
157 */
155 } 158 }
156 err = btrfs_end_transaction(trans, root); 159 err = btrfs_end_transaction(trans, root);
157out_unlock: 160out_unlock:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dbe1aabf96cd..7ffa3d34ea19 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3580,12 +3580,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3580 owner = 1; 3580 owner = 1;
3581 BTRFS_I(inode)->block_group = 3581 BTRFS_I(inode)->block_group =
3582 btrfs_find_block_group(root, 0, alloc_hint, owner); 3582 btrfs_find_block_group(root, 0, alloc_hint, owner);
3583 if ((mode & S_IFREG)) {
3584 if (btrfs_test_opt(root, NODATASUM))
3585 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
3586 if (btrfs_test_opt(root, NODATACOW))
3587 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
3588 }
3589 3583
3590 key[0].objectid = objectid; 3584 key[0].objectid = objectid;
3591 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 3585 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -3640,6 +3634,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3640 3634
3641 btrfs_inherit_iflags(inode, dir); 3635 btrfs_inherit_iflags(inode, dir);
3642 3636
3637 if ((mode & S_IFREG)) {
3638 if (btrfs_test_opt(root, NODATASUM))
3639 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
3640 if (btrfs_test_opt(root, NODATACOW))
3641 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
3642 }
3643
3643 insert_inode_hash(inode); 3644 insert_inode_hash(inode);
3644 inode_tree_add(inode); 3645 inode_tree_add(inode);
3645 return inode; 3646 return inode;
@@ -5082,6 +5083,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5082 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5083 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5083 struct extent_map *em; 5084 struct extent_map *em;
5084 struct btrfs_trans_handle *trans; 5085 struct btrfs_trans_handle *trans;
5086 struct btrfs_root *root;
5085 int ret; 5087 int ret;
5086 5088
5087 alloc_start = offset & ~mask; 5089 alloc_start = offset & ~mask;
@@ -5100,6 +5102,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5100 goto out; 5102 goto out;
5101 } 5103 }
5102 5104
5105 root = BTRFS_I(inode)->root;
5106
5107 ret = btrfs_check_data_free_space(root, inode,
5108 alloc_end - alloc_start);
5109 if (ret)
5110 goto out;
5111
5103 locked_end = alloc_end - 1; 5112 locked_end = alloc_end - 1;
5104 while (1) { 5113 while (1) {
5105 struct btrfs_ordered_extent *ordered; 5114 struct btrfs_ordered_extent *ordered;
@@ -5107,7 +5116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5107 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); 5116 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5108 if (!trans) { 5117 if (!trans) {
5109 ret = -EIO; 5118 ret = -EIO;
5110 goto out; 5119 goto out_free;
5111 } 5120 }
5112 5121
5113 /* the extent lock is ordered inside the running 5122 /* the extent lock is ordered inside the running
@@ -5168,6 +5177,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5168 GFP_NOFS); 5177 GFP_NOFS);
5169 5178
5170 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5179 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5180out_free:
5181 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5171out: 5182out:
5172 mutex_unlock(&inode->i_mutex); 5183 mutex_unlock(&inode->i_mutex);
5173 return ret; 5184 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index eff18f5b5362..9f4db848db10 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1028 struct btrfs_file_extent_item); 1028 struct btrfs_file_extent_item);
1029 comp = btrfs_file_extent_compression(leaf, extent); 1029 comp = btrfs_file_extent_compression(leaf, extent);
1030 type = btrfs_file_extent_type(leaf, extent); 1030 type = btrfs_file_extent_type(leaf, extent);
1031 if (type == BTRFS_FILE_EXTENT_REG) { 1031 if (type == BTRFS_FILE_EXTENT_REG ||
1032 type == BTRFS_FILE_EXTENT_PREALLOC) {
1032 disko = btrfs_file_extent_disk_bytenr(leaf, 1033 disko = btrfs_file_extent_disk_bytenr(leaf,
1033 extent); 1034 extent);
1034 diskl = btrfs_file_extent_disk_num_bytes(leaf, 1035 diskl = btrfs_file_extent_disk_num_bytes(leaf,
@@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1051 new_key.objectid = inode->i_ino; 1052 new_key.objectid = inode->i_ino;
1052 new_key.offset = key.offset + destoff - off; 1053 new_key.offset = key.offset + destoff - off;
1053 1054
1054 if (type == BTRFS_FILE_EXTENT_REG) { 1055 if (type == BTRFS_FILE_EXTENT_REG ||
1056 type == BTRFS_FILE_EXTENT_PREALLOC) {
1055 ret = btrfs_insert_empty_item(trans, root, path, 1057 ret = btrfs_insert_empty_item(trans, root, path,
1056 &new_key, size); 1058 &new_key, size);
1057 if (ret) 1059 if (ret)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b23dc209ae10..008397934778 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work)
1788 btrfs_end_transaction(trans, root); 1788 btrfs_end_transaction(trans, root);
1789 } 1789 }
1790 1790
1791 btrfs_drop_dead_root(reloc_root); 1791 btrfs_drop_snapshot(reloc_root, 0);
1792 1792
1793 if (atomic_dec_and_test(async->num_pending)) 1793 if (atomic_dec_and_test(async->num_pending))
1794 complete(async->done); 1794 complete(async->done);
@@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2075 2075
2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2077 BUG_ON(ret); 2077 BUG_ON(ret);
2078
2079 btrfs_tree_unlock(eb);
2080 free_extent_buffer(eb);
2081 } 2078 }
2082 if (!lowest) { 2079 if (!lowest) {
2083 btrfs_tree_unlock(upper->eb); 2080 btrfs_tree_unlock(upper->eb);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4e83457ea253..2dbf1c1f56ee 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
593 return 0; 593 return 0;
594} 594}
595 595
596#if 0
596/* 597/*
597 * when dropping snapshots, we generate a ton of delayed refs, and it makes 598 * when dropping snapshots, we generate a ton of delayed refs, and it makes
598 * sense not to join the transaction while it is trying to flush the current 599 * sense not to join the transaction while it is trying to flush the current
@@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root)
681 btrfs_btree_balance_dirty(tree_root, nr); 682 btrfs_btree_balance_dirty(tree_root, nr);
682 return ret; 683 return ret;
683} 684}
685#endif
684 686
685/* 687/*
686 * new snapshots need to be created at a very specific time in the 688 * new snapshots need to be created at a very specific time in the
@@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1081 while (!list_empty(&list)) { 1083 while (!list_empty(&list)) {
1082 root = list_entry(list.next, struct btrfs_root, root_list); 1084 root = list_entry(list.next, struct btrfs_root, root_list);
1083 list_del_init(&root->root_list); 1085 list_del_init(&root->root_list);
1084 btrfs_drop_dead_root(root); 1086 btrfs_drop_snapshot(root, 0);
1085 } 1087 }
1086 return 0; 1088 return 0;
1087} 1089}