aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2012-08-17 13:14:17 -0400
committerChris Mason <chris.mason@fusionio.com>2012-10-01 15:19:03 -0400
commit5dc562c541e1026df9d43913c2f6b91156e22d32 (patch)
treea7768100e81b756f2a3edbfcaf99ad77ca7ed605 /fs
parent224ecce517af3a952321202cdf304c12e138caca (diff)
Btrfs: turbo charge fsync
At least for the vm workload. Currently on fsync we will 1) Truncate all items in the log tree for the given inode if they exist and 2) Copy all items for a given inode into the log The problem with this is that for things like VMs you can have lots of extents from the fragmented writing behavior, and worst yet you may have only modified a few extents, not the entire thing. This patch fixes this problem by tracking which transid modified our extent, and then when we do the tree logging we find all of the extents we've modified in our current transaction, sort them and commit them. We also only truncate up to the xattrs of the inode and copy that stuff in normally, and then just drop any extents in the range we have that exist in the log already. Here are some numbers of a 50 meg fio job that does random writes and fsync()s after every write Original Patched SATA drive 82KB/s 140KB/s Fusion drive 431KB/s 2532KB/s So around 2-6 times faster depending on your hardware. There are a few corner cases, for example if you truncate at all we have to do it the old way since there is no way to be sure what is in the log is ok. This probably could be done smarter, but if you write-fsync-truncate-write-fsync you deserve what you get. All this work is in RAM of course so if your inode gets evicted from cache and you read it in and fsync it we'll do it the slow way if we are still in the same transaction that we last modified the inode in. The biggest cool part of this is that it requires no changes to the recovery code, so if you fsync with this patch and crash and load an old kernel, it will run the recovery and be a-ok. I have tested this pretty thoroughly with an fsync tester and everything comes back fine, as well as xfstests. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/extent_map.c34
-rw-r--r--fs/btrfs/extent_map.h5
-rw-r--r--fs/btrfs/file.c62
-rw-r--r--fs/btrfs/inode.c120
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/tree-log.c220
8 files changed, 416 insertions, 42 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 5b2ad6bc4fe7..7c7bf818f3c1 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -38,6 +38,7 @@
38#define BTRFS_INODE_DELALLOC_META_RESERVED 4 38#define BTRFS_INODE_DELALLOC_META_RESERVED 4
39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6 40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
41#define BTRFS_INODE_NEEDS_FULL_SYNC 7
41 42
42/* in memory btrfs inode */ 43/* in memory btrfs inode */
43struct btrfs_inode { 44struct btrfs_inode {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0d195b507660..4b81ea3fa1b2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3315,9 +3315,17 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
3315int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); 3315int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
3316int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 3316int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
3317 int skip_pinned); 3317 int skip_pinned);
3318int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace,
3319 u64 start, u64 end, int skip_pinned,
3320 int modified);
3318extern const struct file_operations btrfs_file_operations; 3321extern const struct file_operations btrfs_file_operations;
3319int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, 3322int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
3320 u64 start, u64 end, u64 *hint_byte, int drop_cache); 3323 struct btrfs_root *root, struct inode *inode,
3324 struct btrfs_path *path, u64 start, u64 end,
3325 u64 *hint_byte, int drop_cache);
3326int btrfs_drop_extents(struct btrfs_trans_handle *trans,
3327 struct btrfs_root *root, struct inode *inode, u64 start,
3328 u64 end, u64 *hint_byte, int drop_cache);
3321int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 3329int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
3322 struct inode *inode, u64 start, u64 end); 3330 struct inode *inode, u64 start, u64 end);
3323int btrfs_release_file(struct inode *inode, struct file *file); 3331int btrfs_release_file(struct inode *inode, struct file *file);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 7c97b3301459..1fe82cfc1d93 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -35,6 +35,7 @@ void extent_map_exit(void)
35void extent_map_tree_init(struct extent_map_tree *tree) 35void extent_map_tree_init(struct extent_map_tree *tree)
36{ 36{
37 tree->map = RB_ROOT; 37 tree->map = RB_ROOT;
38 INIT_LIST_HEAD(&tree->modified_extents);
38 rwlock_init(&tree->lock); 39 rwlock_init(&tree->lock);
39} 40}
40 41
@@ -54,7 +55,9 @@ struct extent_map *alloc_extent_map(void)
54 em->in_tree = 0; 55 em->in_tree = 0;
55 em->flags = 0; 56 em->flags = 0;
56 em->compress_type = BTRFS_COMPRESS_NONE; 57 em->compress_type = BTRFS_COMPRESS_NONE;
58 em->generation = 0;
57 atomic_set(&em->refs, 1); 59 atomic_set(&em->refs, 1);
60 INIT_LIST_HEAD(&em->list);
58 return em; 61 return em;
59} 62}
60 63
@@ -72,6 +75,7 @@ void free_extent_map(struct extent_map *em)
72 WARN_ON(atomic_read(&em->refs) == 0); 75 WARN_ON(atomic_read(&em->refs) == 0);
73 if (atomic_dec_and_test(&em->refs)) { 76 if (atomic_dec_and_test(&em->refs)) {
74 WARN_ON(em->in_tree); 77 WARN_ON(em->in_tree);
78 WARN_ON(!list_empty(&em->list));
75 kmem_cache_free(extent_map_cache, em); 79 kmem_cache_free(extent_map_cache, em);
76 } 80 }
77} 81}
@@ -198,6 +202,12 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
198 em->block_len += merge->block_len; 202 em->block_len += merge->block_len;
199 em->block_start = merge->block_start; 203 em->block_start = merge->block_start;
200 merge->in_tree = 0; 204 merge->in_tree = 0;
205 if (merge->generation > em->generation) {
206 em->generation = merge->generation;
207 list_move(&em->list, &tree->modified_extents);
208 }
209
210 list_del_init(&merge->list);
201 rb_erase(&merge->rb_node, &tree->map); 211 rb_erase(&merge->rb_node, &tree->map);
202 free_extent_map(merge); 212 free_extent_map(merge);
203 } 213 }
@@ -211,11 +221,29 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
211 em->block_len += merge->len; 221 em->block_len += merge->len;
212 rb_erase(&merge->rb_node, &tree->map); 222 rb_erase(&merge->rb_node, &tree->map);
213 merge->in_tree = 0; 223 merge->in_tree = 0;
224 if (merge->generation > em->generation) {
225 em->generation = merge->generation;
226 list_move(&em->list, &tree->modified_extents);
227 }
228 list_del_init(&merge->list);
214 free_extent_map(merge); 229 free_extent_map(merge);
215 } 230 }
216} 231}
217 232
218int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) 233/**
234 * unpint_extent_cache - unpin an extent from the cache
235 * @tree: tree to unpin the extent in
236 * @start: logical offset in the file
237 * @len: length of the extent
238 * @gen: generation that this extent has been modified in
239 * @prealloc: if this is set we need to clear the prealloc flag
240 *
241 * Called after an extent has been written to disk properly. Set the generation
242 * to the generation that actually added the file item to the inode so we know
243 * we need to sync this extent when we call fsync().
244 */
245int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
246 u64 gen)
219{ 247{
220 int ret = 0; 248 int ret = 0;
221 struct extent_map *em; 249 struct extent_map *em;
@@ -228,10 +256,11 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
228 if (!em) 256 if (!em)
229 goto out; 257 goto out;
230 258
259 list_move(&em->list, &tree->modified_extents);
260 em->generation = gen;
231 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 261 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
232 262
233 try_merge_map(tree, em); 263 try_merge_map(tree, em);
234
235 free_extent_map(em); 264 free_extent_map(em);
236out: 265out:
237 write_unlock(&tree->lock); 266 write_unlock(&tree->lock);
@@ -358,6 +387,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
358 387
359 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); 388 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
360 rb_erase(&em->rb_node, &tree->map); 389 rb_erase(&em->rb_node, &tree->map);
390 list_del_init(&em->list);
361 em->in_tree = 0; 391 em->in_tree = 0;
362 return ret; 392 return ret;
363} 393}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 1195f09761fe..2388a60bd6e3 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -23,15 +23,18 @@ struct extent_map {
23 u64 orig_start; 23 u64 orig_start;
24 u64 block_start; 24 u64 block_start;
25 u64 block_len; 25 u64 block_len;
26 u64 generation;
26 unsigned long flags; 27 unsigned long flags;
27 struct block_device *bdev; 28 struct block_device *bdev;
28 atomic_t refs; 29 atomic_t refs;
29 unsigned int in_tree; 30 unsigned int in_tree;
30 unsigned int compress_type; 31 unsigned int compress_type;
32 struct list_head list;
31}; 33};
32 34
33struct extent_map_tree { 35struct extent_map_tree {
34 struct rb_root map; 36 struct rb_root map;
37 struct list_head modified_extents;
35 rwlock_t lock; 38 rwlock_t lock;
36}; 39};
37 40
@@ -60,7 +63,7 @@ struct extent_map *alloc_extent_map(void);
60void free_extent_map(struct extent_map *em); 63void free_extent_map(struct extent_map *em);
61int __init extent_map_init(void); 64int __init extent_map_init(void);
62void extent_map_exit(void); 65void extent_map_exit(void);
63int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); 66int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
64struct extent_map *search_extent_mapping(struct extent_map_tree *tree, 67struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
65 u64 start, u64 len); 68 u64 start, u64 len);
66#endif 69#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b7c885c8423f..399f9d71a926 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -459,13 +459,14 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
459 * [start, end]. Existing extents are split as required. 459 * [start, end]. Existing extents are split as required.
460 */ 460 */
461int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 461int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
462 int skip_pinned) 462 int skip_pinned)
463{ 463{
464 struct extent_map *em; 464 struct extent_map *em;
465 struct extent_map *split = NULL; 465 struct extent_map *split = NULL;
466 struct extent_map *split2 = NULL; 466 struct extent_map *split2 = NULL;
467 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 467 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
468 u64 len = end - start + 1; 468 u64 len = end - start + 1;
469 u64 gen;
469 int ret; 470 int ret;
470 int testend = 1; 471 int testend = 1;
471 unsigned long flags; 472 unsigned long flags;
@@ -490,6 +491,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
490 break; 491 break;
491 } 492 }
492 flags = em->flags; 493 flags = em->flags;
494 gen = em->generation;
493 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 495 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
494 if (testend && em->start + em->len >= start + len) { 496 if (testend && em->start + em->len >= start + len) {
495 free_extent_map(em); 497 free_extent_map(em);
@@ -518,12 +520,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
518 split->block_len = em->block_len; 520 split->block_len = em->block_len;
519 else 521 else
520 split->block_len = split->len; 522 split->block_len = split->len;
521 523 split->generation = gen;
522 split->bdev = em->bdev; 524 split->bdev = em->bdev;
523 split->flags = flags; 525 split->flags = flags;
524 split->compress_type = em->compress_type; 526 split->compress_type = em->compress_type;
525 ret = add_extent_mapping(em_tree, split); 527 ret = add_extent_mapping(em_tree, split);
526 BUG_ON(ret); /* Logic error */ 528 BUG_ON(ret); /* Logic error */
529 list_move(&split->list, &em_tree->modified_extents);
527 free_extent_map(split); 530 free_extent_map(split);
528 split = split2; 531 split = split2;
529 split2 = NULL; 532 split2 = NULL;
@@ -537,6 +540,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
537 split->bdev = em->bdev; 540 split->bdev = em->bdev;
538 split->flags = flags; 541 split->flags = flags;
539 split->compress_type = em->compress_type; 542 split->compress_type = em->compress_type;
543 split->generation = gen;
540 544
541 if (compressed) { 545 if (compressed) {
542 split->block_len = em->block_len; 546 split->block_len = em->block_len;
@@ -550,6 +554,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
550 554
551 ret = add_extent_mapping(em_tree, split); 555 ret = add_extent_mapping(em_tree, split);
552 BUG_ON(ret); /* Logic error */ 556 BUG_ON(ret); /* Logic error */
557 list_move(&split->list, &em_tree->modified_extents);
553 free_extent_map(split); 558 free_extent_map(split);
554 split = NULL; 559 split = NULL;
555 } 560 }
@@ -576,13 +581,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
576 * it is either truncated or split. Anything entirely inside the range 581 * it is either truncated or split. Anything entirely inside the range
577 * is deleted from the tree. 582 * is deleted from the tree.
578 */ 583 */
579int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, 584int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
580 u64 start, u64 end, u64 *hint_byte, int drop_cache) 585 struct btrfs_root *root, struct inode *inode,
586 struct btrfs_path *path, u64 start, u64 end,
587 u64 *hint_byte, int drop_cache)
581{ 588{
582 struct btrfs_root *root = BTRFS_I(inode)->root;
583 struct extent_buffer *leaf; 589 struct extent_buffer *leaf;
584 struct btrfs_file_extent_item *fi; 590 struct btrfs_file_extent_item *fi;
585 struct btrfs_path *path;
586 struct btrfs_key key; 591 struct btrfs_key key;
587 struct btrfs_key new_key; 592 struct btrfs_key new_key;
588 u64 ino = btrfs_ino(inode); 593 u64 ino = btrfs_ino(inode);
@@ -597,14 +602,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
597 int recow; 602 int recow;
598 int ret; 603 int ret;
599 int modify_tree = -1; 604 int modify_tree = -1;
605 int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
600 606
601 if (drop_cache) 607 if (drop_cache)
602 btrfs_drop_extent_cache(inode, start, end - 1, 0); 608 btrfs_drop_extent_cache(inode, start, end - 1, 0);
603 609
604 path = btrfs_alloc_path();
605 if (!path)
606 return -ENOMEM;
607
608 if (start >= BTRFS_I(inode)->disk_i_size) 610 if (start >= BTRFS_I(inode)->disk_i_size)
609 modify_tree = 0; 611 modify_tree = 0;
610 612
@@ -707,7 +709,7 @@ next_slot:
707 extent_end - start); 709 extent_end - start);
708 btrfs_mark_buffer_dirty(leaf); 710 btrfs_mark_buffer_dirty(leaf);
709 711
710 if (disk_bytenr > 0) { 712 if (update_refs && disk_bytenr > 0) {
711 ret = btrfs_inc_extent_ref(trans, root, 713 ret = btrfs_inc_extent_ref(trans, root,
712 disk_bytenr, num_bytes, 0, 714 disk_bytenr, num_bytes, 0,
713 root->root_key.objectid, 715 root->root_key.objectid,
@@ -734,7 +736,7 @@ next_slot:
734 btrfs_set_file_extent_num_bytes(leaf, fi, 736 btrfs_set_file_extent_num_bytes(leaf, fi,
735 extent_end - end); 737 extent_end - end);
736 btrfs_mark_buffer_dirty(leaf); 738 btrfs_mark_buffer_dirty(leaf);
737 if (disk_bytenr > 0) { 739 if (update_refs && disk_bytenr > 0) {
738 inode_sub_bytes(inode, end - key.offset); 740 inode_sub_bytes(inode, end - key.offset);
739 *hint_byte = disk_bytenr; 741 *hint_byte = disk_bytenr;
740 } 742 }
@@ -753,7 +755,7 @@ next_slot:
753 btrfs_set_file_extent_num_bytes(leaf, fi, 755 btrfs_set_file_extent_num_bytes(leaf, fi,
754 start - key.offset); 756 start - key.offset);
755 btrfs_mark_buffer_dirty(leaf); 757 btrfs_mark_buffer_dirty(leaf);
756 if (disk_bytenr > 0) { 758 if (update_refs && disk_bytenr > 0) {
757 inode_sub_bytes(inode, extent_end - start); 759 inode_sub_bytes(inode, extent_end - start);
758 *hint_byte = disk_bytenr; 760 *hint_byte = disk_bytenr;
759 } 761 }
@@ -777,12 +779,13 @@ next_slot:
777 del_nr++; 779 del_nr++;
778 } 780 }
779 781
780 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 782 if (update_refs &&
783 extent_type == BTRFS_FILE_EXTENT_INLINE) {
781 inode_sub_bytes(inode, 784 inode_sub_bytes(inode,
782 extent_end - key.offset); 785 extent_end - key.offset);
783 extent_end = ALIGN(extent_end, 786 extent_end = ALIGN(extent_end,
784 root->sectorsize); 787 root->sectorsize);
785 } else if (disk_bytenr > 0) { 788 } else if (update_refs && disk_bytenr > 0) {
786 ret = btrfs_free_extent(trans, root, 789 ret = btrfs_free_extent(trans, root,
787 disk_bytenr, num_bytes, 0, 790 disk_bytenr, num_bytes, 0,
788 root->root_key.objectid, 791 root->root_key.objectid,
@@ -806,7 +809,7 @@ next_slot:
806 del_nr); 809 del_nr);
807 if (ret) { 810 if (ret) {
808 btrfs_abort_transaction(trans, root, ret); 811 btrfs_abort_transaction(trans, root, ret);
809 goto out; 812 break;
810 } 813 }
811 814
812 del_nr = 0; 815 del_nr = 0;
@@ -825,7 +828,22 @@ next_slot:
825 btrfs_abort_transaction(trans, root, ret); 828 btrfs_abort_transaction(trans, root, ret);
826 } 829 }
827 830
828out: 831 btrfs_release_path(path);
832 return ret;
833}
834
835int btrfs_drop_extents(struct btrfs_trans_handle *trans,
836 struct btrfs_root *root, struct inode *inode, u64 start,
837 u64 end, u64 *hint_byte, int drop_cache)
838{
839 struct btrfs_path *path;
840 int ret;
841
842 path = btrfs_alloc_path();
843 if (!path)
844 return -ENOMEM;
845 ret = __btrfs_drop_extents(trans, root, inode, path, start, end,
846 hint_byte, drop_cache);
829 btrfs_free_path(path); 847 btrfs_free_path(path);
830 return ret; 848 return ret;
831} 849}
@@ -892,8 +910,6 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
892 int ret; 910 int ret;
893 u64 ino = btrfs_ino(inode); 911 u64 ino = btrfs_ino(inode);
894 912
895 btrfs_drop_extent_cache(inode, start, end - 1, 0);
896
897 path = btrfs_alloc_path(); 913 path = btrfs_alloc_path();
898 if (!path) 914 if (!path)
899 return -ENOMEM; 915 return -ENOMEM;
@@ -1556,6 +1572,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1556 BTRFS_I(inode)->last_trans <= 1572 BTRFS_I(inode)->last_trans <=
1557 root->fs_info->last_trans_committed) { 1573 root->fs_info->last_trans_committed) {
1558 BTRFS_I(inode)->last_trans = 0; 1574 BTRFS_I(inode)->last_trans = 0;
1575
1576 /*
1577 * We'v had everything committed since the last time we were
1578 * modified so clear this flag in case it was set for whatever
1579 * reason, it's no longer relevant.
1580 */
1581 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1582 &BTRFS_I(inode)->runtime_flags);
1559 mutex_unlock(&inode->i_mutex); 1583 mutex_unlock(&inode->i_mutex);
1560 goto out; 1584 goto out;
1561 } 1585 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6971bac66d9d..1b99fe8a129d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -247,7 +247,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
247 return 1; 247 return 1;
248 } 248 }
249 249
250 ret = btrfs_drop_extents(trans, inode, start, aligned_end, 250 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end,
251 &hint_byte, 1); 251 &hint_byte, 1);
252 if (ret) 252 if (ret)
253 return ret; 253 return ret;
@@ -1803,7 +1803,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1803 * the caller is expected to unpin it and allow it to be merged 1803 * the caller is expected to unpin it and allow it to be merged
1804 * with the others. 1804 * with the others.
1805 */ 1805 */
1806 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, 1806 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1807 file_pos + num_bytes,
1807 &hint, 0); 1808 &hint, 0);
1808 if (ret) 1809 if (ret)
1809 goto out; 1810 goto out;
@@ -1929,11 +1930,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1929 ordered_extent->len, 1930 ordered_extent->len,
1930 compress_type, 0, 0, 1931 compress_type, 0, 0,
1931 BTRFS_FILE_EXTENT_REG); 1932 BTRFS_FILE_EXTENT_REG);
1932 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1933 ordered_extent->file_offset,
1934 ordered_extent->len);
1935 } 1933 }
1936 1934 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1935 ordered_extent->file_offset, ordered_extent->len,
1936 trans->transid);
1937 if (ret < 0) { 1937 if (ret < 0) {
1938 btrfs_abort_transaction(trans, root, ret); 1938 btrfs_abort_transaction(trans, root, ret);
1939 goto out_unlock; 1939 goto out_unlock;
@@ -2592,6 +2592,18 @@ static void btrfs_read_locked_inode(struct inode *inode)
2592 2592
2593 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); 2593 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
2594 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 2594 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
2595 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
2596
2597 /*
2598 * If we were modified in the current generation and evicted from memory
2599 * and then re-read we need to do a full sync since we don't have any
2600 * idea about which extents were modified before we were evicted from
2601 * cache.
2602 */
2603 if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
2604 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2605 &BTRFS_I(inode)->runtime_flags);
2606
2595 inode->i_version = btrfs_inode_sequence(leaf, inode_item); 2607 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
2596 inode->i_generation = BTRFS_I(inode)->generation; 2608 inode->i_generation = BTRFS_I(inode)->generation;
2597 inode->i_rdev = 0; 2609 inode->i_rdev = 0;
@@ -3269,8 +3281,13 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3269 return -ENOMEM; 3281 return -ENOMEM;
3270 path->reada = -1; 3282 path->reada = -1;
3271 3283
3284 /*
3285 * We want to drop from the next block forward in case this new size is
3286 * not block aligned since we will be keeping the last block of the
3287 * extent just the way it is.
3288 */
3272 if (root->ref_cows || root == root->fs_info->tree_root) 3289 if (root->ref_cows || root == root->fs_info->tree_root)
3273 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3290 btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0);
3274 3291
3275 /* 3292 /*
3276 * This function is also used to drop the items in the log tree before 3293 * This function is also used to drop the items in the log tree before
@@ -3579,6 +3596,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3579 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3596 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3580 struct extent_map *em = NULL; 3597 struct extent_map *em = NULL;
3581 struct extent_state *cached_state = NULL; 3598 struct extent_state *cached_state = NULL;
3599 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
3582 u64 mask = root->sectorsize - 1; 3600 u64 mask = root->sectorsize - 1;
3583 u64 hole_start = (oldsize + mask) & ~mask; 3601 u64 hole_start = (oldsize + mask) & ~mask;
3584 u64 block_end = (size + mask) & ~mask; 3602 u64 block_end = (size + mask) & ~mask;
@@ -3615,6 +3633,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3615 last_byte = min(extent_map_end(em), block_end); 3633 last_byte = min(extent_map_end(em), block_end);
3616 last_byte = (last_byte + mask) & ~mask; 3634 last_byte = (last_byte + mask) & ~mask;
3617 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3635 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3636 struct extent_map *hole_em;
3618 u64 hint_byte = 0; 3637 u64 hint_byte = 0;
3619 hole_size = last_byte - cur_offset; 3638 hole_size = last_byte - cur_offset;
3620 3639
@@ -3624,7 +3643,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3624 break; 3643 break;
3625 } 3644 }
3626 3645
3627 err = btrfs_drop_extents(trans, inode, cur_offset, 3646 err = btrfs_drop_extents(trans, root, inode,
3647 cur_offset,
3628 cur_offset + hole_size, 3648 cur_offset + hole_size,
3629 &hint_byte, 1); 3649 &hint_byte, 1);
3630 if (err) { 3650 if (err) {
@@ -3643,9 +3663,39 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3643 break; 3663 break;
3644 } 3664 }
3645 3665
3646 btrfs_drop_extent_cache(inode, hole_start, 3666 btrfs_drop_extent_cache(inode, cur_offset,
3647 last_byte - 1, 0); 3667 cur_offset + hole_size - 1, 0);
3668 hole_em = alloc_extent_map();
3669 if (!hole_em) {
3670 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3671 &BTRFS_I(inode)->runtime_flags);
3672 goto next;
3673 }
3674 hole_em->start = cur_offset;
3675 hole_em->len = hole_size;
3676 hole_em->orig_start = cur_offset;
3677
3678 hole_em->block_start = EXTENT_MAP_HOLE;
3679 hole_em->block_len = 0;
3680 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
3681 hole_em->compress_type = BTRFS_COMPRESS_NONE;
3682 hole_em->generation = trans->transid;
3648 3683
3684 while (1) {
3685 write_lock(&em_tree->lock);
3686 err = add_extent_mapping(em_tree, hole_em);
3687 if (!err)
3688 list_move(&hole_em->list,
3689 &em_tree->modified_extents);
3690 write_unlock(&em_tree->lock);
3691 if (err != -EEXIST)
3692 break;
3693 btrfs_drop_extent_cache(inode, cur_offset,
3694 cur_offset +
3695 hole_size - 1, 0);
3696 }
3697 free_extent_map(hole_em);
3698next:
3649 btrfs_update_inode(trans, root, inode); 3699 btrfs_update_inode(trans, root, inode);
3650 btrfs_end_transaction(trans, root); 3700 btrfs_end_transaction(trans, root);
3651 } 3701 }
@@ -4673,6 +4723,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4673 BTRFS_I(inode)->generation = trans->transid; 4723 BTRFS_I(inode)->generation = trans->transid;
4674 inode->i_generation = BTRFS_I(inode)->generation; 4724 inode->i_generation = BTRFS_I(inode)->generation;
4675 4725
4726 /*
4727 * We could have gotten an inode number from somebody who was fsynced
4728 * and then removed in this same transaction, so let's just set full
4729 * sync since it will be a full sync anyway and this will blow away the
4730 * old info in the log.
4731 */
4732 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
4733
4676 if (S_ISDIR(mode)) 4734 if (S_ISDIR(mode))
4677 owner = 0; 4735 owner = 0;
4678 else 4736 else
@@ -6839,6 +6897,15 @@ static int btrfs_truncate(struct inode *inode)
6839 &BTRFS_I(inode)->runtime_flags)) 6897 &BTRFS_I(inode)->runtime_flags))
6840 btrfs_add_ordered_operation(trans, root, inode); 6898 btrfs_add_ordered_operation(trans, root, inode);
6841 6899
6900 /*
6901 * So if we truncate and then write and fsync we normally would just
6902 * write the extents that changed, which is a problem if we need to
6903 * first truncate that entire inode. So set this flag so we write out
6904 * all of the extents in the inode to the sync log so we're completely
6905 * safe.
6906 */
6907 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
6908
6842 while (1) { 6909 while (1) {
6843 ret = btrfs_block_rsv_refill(root, rsv, min_size); 6910 ret = btrfs_block_rsv_refill(root, rsv, min_size);
6844 if (ret) { 6911 if (ret) {
@@ -7510,6 +7577,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7510 loff_t actual_len, u64 *alloc_hint, 7577 loff_t actual_len, u64 *alloc_hint,
7511 struct btrfs_trans_handle *trans) 7578 struct btrfs_trans_handle *trans)
7512{ 7579{
7580 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
7581 struct extent_map *em;
7513 struct btrfs_root *root = BTRFS_I(inode)->root; 7582 struct btrfs_root *root = BTRFS_I(inode)->root;
7514 struct btrfs_key ins; 7583 struct btrfs_key ins;
7515 u64 cur_offset = start; 7584 u64 cur_offset = start;
@@ -7550,6 +7619,37 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7550 btrfs_drop_extent_cache(inode, cur_offset, 7619 btrfs_drop_extent_cache(inode, cur_offset,
7551 cur_offset + ins.offset -1, 0); 7620 cur_offset + ins.offset -1, 0);
7552 7621
7622 em = alloc_extent_map();
7623 if (!em) {
7624 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
7625 &BTRFS_I(inode)->runtime_flags);
7626 goto next;
7627 }
7628
7629 em->start = cur_offset;
7630 em->orig_start = cur_offset;
7631 em->len = ins.offset;
7632 em->block_start = ins.objectid;
7633 em->block_len = ins.offset;
7634 em->bdev = root->fs_info->fs_devices->latest_bdev;
7635 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7636 em->generation = trans->transid;
7637
7638 while (1) {
7639 write_lock(&em_tree->lock);
7640 ret = add_extent_mapping(em_tree, em);
7641 if (!ret)
7642 list_move(&em->list,
7643 &em_tree->modified_extents);
7644 write_unlock(&em_tree->lock);
7645 if (ret != -EEXIST)
7646 break;
7647 btrfs_drop_extent_cache(inode, cur_offset,
7648 cur_offset + ins.offset - 1,
7649 0);
7650 }
7651 free_extent_map(em);
7652next:
7553 num_bytes -= ins.offset; 7653 num_bytes -= ins.offset;
7554 cur_offset += ins.offset; 7654 cur_offset += ins.offset;
7555 *alloc_hint = ins.objectid + ins.offset; 7655 *alloc_hint = ins.objectid + ins.offset;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9df50fa8a078..95223222d5ad 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2576,7 +2576,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2576 datal -= off - key.offset; 2576 datal -= off - key.offset;
2577 } 2577 }
2578 2578
2579 ret = btrfs_drop_extents(trans, inode, 2579 ret = btrfs_drop_extents(trans, root, inode,
2580 new_key.offset, 2580 new_key.offset,
2581 new_key.offset + datal, 2581 new_key.offset + datal,
2582 &hint_byte, 1); 2582 &hint_byte, 1);
@@ -2650,7 +2650,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2650 size -= skip + trim; 2650 size -= skip + trim;
2651 datal -= skip + trim; 2651 datal -= skip + trim;
2652 2652
2653 ret = btrfs_drop_extents(trans, inode, 2653 ret = btrfs_drop_extents(trans, root, inode,
2654 new_key.offset, 2654 new_key.offset,
2655 new_key.offset + datal, 2655 new_key.offset + datal,
2656 &hint_byte, 1); 2656 &hint_byte, 1);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c86670f4f285..f2ff02c55130 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/list_sort.h>
21#include "ctree.h" 22#include "ctree.h"
22#include "transaction.h" 23#include "transaction.h"
23#include "disk-io.h" 24#include "disk-io.h"
@@ -550,7 +551,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
550 551
551 saved_nbytes = inode_get_bytes(inode); 552 saved_nbytes = inode_get_bytes(inode);
552 /* drop any overlapping extents */ 553 /* drop any overlapping extents */
553 ret = btrfs_drop_extents(trans, inode, start, extent_end, 554 ret = btrfs_drop_extents(trans, root, inode, start, extent_end,
554 &alloc_hint, 1); 555 &alloc_hint, 1);
555 BUG_ON(ret); 556 BUG_ON(ret);
556 557
@@ -2803,6 +2804,194 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2803 return ret; 2804 return ret;
2804} 2805}
2805 2806
2807static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
2808{
2809 struct extent_map *em1, *em2;
2810
2811 em1 = list_entry(a, struct extent_map, list);
2812 em2 = list_entry(b, struct extent_map, list);
2813
2814 if (em1->start < em2->start)
2815 return -1;
2816 else if (em1->start > em2->start)
2817 return 1;
2818 return 0;
2819}
2820
2821struct log_args {
2822 struct extent_buffer *src;
2823 u64 next_offset;
2824 int start_slot;
2825 int nr;
2826};
2827
2828static int log_one_extent(struct btrfs_trans_handle *trans,
2829 struct inode *inode, struct btrfs_root *root,
2830 struct extent_map *em, struct btrfs_path *path,
2831 struct btrfs_path *dst_path, struct log_args *args)
2832{
2833 struct btrfs_root *log = root->log_root;
2834 struct btrfs_file_extent_item *fi;
2835 struct btrfs_key key;
2836 u64 start = em->start;
2837 u64 len = em->len;
2838 u64 num_bytes;
2839 int nritems;
2840 int ret;
2841
2842 if (BTRFS_I(inode)->logged_trans == trans->transid) {
2843 u64 tmp;
2844 ret = __btrfs_drop_extents(trans, log, inode, dst_path, start,
2845 start + len, &tmp, 0);
2846 if (ret)
2847 return ret;
2848 }
2849
2850 while (len) {
2851 if (args->nr)
2852 goto next_slot;
2853 key.objectid = btrfs_ino(inode);
2854 key.type = BTRFS_EXTENT_DATA_KEY;
2855 key.offset = start;
2856
2857 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2858 if (ret < 0)
2859 return ret;
2860 if (ret) {
2861 /*
2862 * This shouldn't happen, but it might so warn and
2863 * return an error.
2864 */
2865 WARN_ON(1);
2866 return -ENOENT;
2867 }
2868 args->src = path->nodes[0];
2869next_slot:
2870 fi = btrfs_item_ptr(args->src, path->slots[0],
2871 struct btrfs_file_extent_item);
2872 if (args->nr &&
2873 args->start_slot + args->nr == path->slots[0]) {
2874 args->nr++;
2875 } else if (args->nr) {
2876 ret = copy_items(trans, log, dst_path, args->src,
2877 args->start_slot, args->nr,
2878 LOG_INODE_ALL);
2879 if (ret)
2880 return ret;
2881 args->nr = 1;
2882 args->start_slot = path->slots[0];
2883 } else if (!args->nr) {
2884 args->nr = 1;
2885 args->start_slot = path->slots[0];
2886 }
2887 nritems = btrfs_header_nritems(path->nodes[0]);
2888 path->slots[0]++;
2889 num_bytes = btrfs_file_extent_num_bytes(args->src, fi);
2890 if (len < num_bytes) {
2891 /* I _think_ this is ok, envision we write to a
2892 * preallocated space that is adjacent to a previously
2893 * written preallocated space that gets merged when we
2894 * mark this preallocated space written. If we do not
2895 * have the adjacent extent in cache then when we copy
2896 * this extent it could end up being larger than our EM
2897 * thinks it is, which is a-ok, so just set len to 0.
2898 */
2899 len = 0;
2900 } else {
2901 len -= num_bytes;
2902 }
2903 start += btrfs_file_extent_num_bytes(args->src, fi);
2904 args->next_offset = start;
2905
2906 if (path->slots[0] < nritems) {
2907 if (len)
2908 goto next_slot;
2909 break;
2910 }
2911
2912 if (args->nr) {
2913 ret = copy_items(trans, log, dst_path, args->src,
2914 args->start_slot, args->nr,
2915 LOG_INODE_ALL);
2916 if (ret)
2917 return ret;
2918 args->nr = 0;
2919 btrfs_release_path(path);
2920 }
2921 }
2922
2923 return 0;
2924}
2925
2926static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
2927 struct btrfs_root *root,
2928 struct inode *inode,
2929 struct btrfs_path *path,
2930 struct btrfs_path *dst_path)
2931{
2932 struct log_args args;
2933 struct btrfs_root *log = root->log_root;
2934 struct extent_map *em, *n;
2935 struct list_head extents;
2936 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
2937 u64 test_gen;
2938 int ret = 0;
2939
2940 INIT_LIST_HEAD(&extents);
2941
2942 memset(&args, 0, sizeof(args));
2943
2944 write_lock(&tree->lock);
2945 test_gen = root->fs_info->last_trans_committed;
2946
2947 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
2948 list_del_init(&em->list);
2949 if (em->generation <= test_gen)
2950 continue;
2951 list_add_tail(&em->list, &extents);
2952 }
2953
2954 list_sort(NULL, &extents, extent_cmp);
2955
2956 while (!list_empty(&extents)) {
2957 em = list_entry(extents.next, struct extent_map, list);
2958
2959 list_del_init(&em->list);
2960
2961 /*
2962 * If we had an error we just need to delete everybody from our
2963 * private list.
2964 */
2965 if (ret)
2966 continue;
2967
2968 /*
2969 * If the previous EM and the last extent we left off on aren't
2970 * sequential then we need to copy the items we have and redo
2971 * our search
2972 */
2973 if (args.nr && em->start != args.next_offset) {
2974 ret = copy_items(trans, log, dst_path, args.src,
2975 args.start_slot, args.nr,
2976 LOG_INODE_ALL);
2977 if (ret)
2978 continue;
2979 btrfs_release_path(path);
2980 args.nr = 0;
2981 }
2982
2983 ret = log_one_extent(trans, inode, root, em, path, dst_path, &args);
2984 }
2985
2986 if (!ret && args.nr)
2987 ret = copy_items(trans, log, dst_path, args.src,
2988 args.start_slot, args.nr, LOG_INODE_ALL);
2989 btrfs_release_path(path);
2990 WARN_ON(!list_empty(&extents));
2991 write_unlock(&tree->lock);
2992 return ret;
2993}
2994
2806/* log a single inode in the tree log. 2995/* log a single inode in the tree log.
2807 * At least one parent directory for this inode must exist in the tree 2996 * At least one parent directory for this inode must exist in the tree
2808 * or be logged already. 2997 * or be logged already.
@@ -2832,6 +3021,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2832 int nritems; 3021 int nritems;
2833 int ins_start_slot = 0; 3022 int ins_start_slot = 0;
2834 int ins_nr; 3023 int ins_nr;
3024 bool fast_search = false;
2835 u64 ino = btrfs_ino(inode); 3025 u64 ino = btrfs_ino(inode);
2836 3026
2837 log = root->log_root; 3027 log = root->log_root;
@@ -2851,10 +3041,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2851 3041
2852 max_key.objectid = ino; 3042 max_key.objectid = ino;
2853 3043
2854 /* today the code can only do partial logging of directories */
2855 if (!S_ISDIR(inode->i_mode))
2856 inode_only = LOG_INODE_ALL;
2857 3044
3045 /* today the code can only do partial logging of directories */
2858 if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) 3046 if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
2859 max_key.type = BTRFS_XATTR_ITEM_KEY; 3047 max_key.type = BTRFS_XATTR_ITEM_KEY;
2860 else 3048 else
@@ -2881,7 +3069,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2881 max_key_type = BTRFS_XATTR_ITEM_KEY; 3069 max_key_type = BTRFS_XATTR_ITEM_KEY;
2882 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 3070 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
2883 } else { 3071 } else {
2884 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); 3072 if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3073 &BTRFS_I(inode)->runtime_flags)) {
3074 ret = btrfs_truncate_inode_items(trans, log,
3075 inode, 0, 0);
3076 } else {
3077 fast_search = true;
3078 max_key.type = BTRFS_XATTR_ITEM_KEY;
3079 ret = drop_objectid_items(trans, log, path, ino,
3080 BTRFS_XATTR_ITEM_KEY);
3081 }
2885 } 3082 }
2886 if (ret) { 3083 if (ret) {
2887 err = ret; 3084 err = ret;
@@ -2960,7 +3157,18 @@ next_slot:
2960 } 3157 }
2961 ins_nr = 0; 3158 ins_nr = 0;
2962 } 3159 }
2963 WARN_ON(ins_nr); 3160
3161 if (fast_search) {
3162 btrfs_release_path(path);
3163 btrfs_release_path(dst_path);
3164 ret = btrfs_log_changed_extents(trans, root, inode, path,
3165 dst_path);
3166 if (ret) {
3167 err = ret;
3168 goto out_unlock;
3169 }
3170 }
3171
2964 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 3172 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
2965 btrfs_release_path(path); 3173 btrfs_release_path(path);
2966 btrfs_release_path(dst_path); 3174 btrfs_release_path(dst_path);