aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/extent_map.c34
-rw-r--r--fs/btrfs/extent_map.h5
-rw-r--r--fs/btrfs/file.c62
-rw-r--r--fs/btrfs/inode.c120
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/tree-log.c220
8 files changed, 416 insertions, 42 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 5b2ad6bc4fe7..7c7bf818f3c1 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -38,6 +38,7 @@
38#define BTRFS_INODE_DELALLOC_META_RESERVED 4 38#define BTRFS_INODE_DELALLOC_META_RESERVED 4
39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6 40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
41#define BTRFS_INODE_NEEDS_FULL_SYNC 7
41 42
42/* in memory btrfs inode */ 43/* in memory btrfs inode */
43struct btrfs_inode { 44struct btrfs_inode {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0d195b507660..4b81ea3fa1b2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3315,9 +3315,17 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
3315int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); 3315int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
3316int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 3316int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
3317 int skip_pinned); 3317 int skip_pinned);
3318int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace,
3319 u64 start, u64 end, int skip_pinned,
3320 int modified);
3318extern const struct file_operations btrfs_file_operations; 3321extern const struct file_operations btrfs_file_operations;
3319int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, 3322int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
3320 u64 start, u64 end, u64 *hint_byte, int drop_cache); 3323 struct btrfs_root *root, struct inode *inode,
3324 struct btrfs_path *path, u64 start, u64 end,
3325 u64 *hint_byte, int drop_cache);
3326int btrfs_drop_extents(struct btrfs_trans_handle *trans,
3327 struct btrfs_root *root, struct inode *inode, u64 start,
3328 u64 end, u64 *hint_byte, int drop_cache);
3321int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 3329int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
3322 struct inode *inode, u64 start, u64 end); 3330 struct inode *inode, u64 start, u64 end);
3323int btrfs_release_file(struct inode *inode, struct file *file); 3331int btrfs_release_file(struct inode *inode, struct file *file);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 7c97b3301459..1fe82cfc1d93 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -35,6 +35,7 @@ void extent_map_exit(void)
35void extent_map_tree_init(struct extent_map_tree *tree) 35void extent_map_tree_init(struct extent_map_tree *tree)
36{ 36{
37 tree->map = RB_ROOT; 37 tree->map = RB_ROOT;
38 INIT_LIST_HEAD(&tree->modified_extents);
38 rwlock_init(&tree->lock); 39 rwlock_init(&tree->lock);
39} 40}
40 41
@@ -54,7 +55,9 @@ struct extent_map *alloc_extent_map(void)
54 em->in_tree = 0; 55 em->in_tree = 0;
55 em->flags = 0; 56 em->flags = 0;
56 em->compress_type = BTRFS_COMPRESS_NONE; 57 em->compress_type = BTRFS_COMPRESS_NONE;
58 em->generation = 0;
57 atomic_set(&em->refs, 1); 59 atomic_set(&em->refs, 1);
60 INIT_LIST_HEAD(&em->list);
58 return em; 61 return em;
59} 62}
60 63
@@ -72,6 +75,7 @@ void free_extent_map(struct extent_map *em)
72 WARN_ON(atomic_read(&em->refs) == 0); 75 WARN_ON(atomic_read(&em->refs) == 0);
73 if (atomic_dec_and_test(&em->refs)) { 76 if (atomic_dec_and_test(&em->refs)) {
74 WARN_ON(em->in_tree); 77 WARN_ON(em->in_tree);
78 WARN_ON(!list_empty(&em->list));
75 kmem_cache_free(extent_map_cache, em); 79 kmem_cache_free(extent_map_cache, em);
76 } 80 }
77} 81}
@@ -198,6 +202,12 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
198 em->block_len += merge->block_len; 202 em->block_len += merge->block_len;
199 em->block_start = merge->block_start; 203 em->block_start = merge->block_start;
200 merge->in_tree = 0; 204 merge->in_tree = 0;
205 if (merge->generation > em->generation) {
206 em->generation = merge->generation;
207 list_move(&em->list, &tree->modified_extents);
208 }
209
210 list_del_init(&merge->list);
201 rb_erase(&merge->rb_node, &tree->map); 211 rb_erase(&merge->rb_node, &tree->map);
202 free_extent_map(merge); 212 free_extent_map(merge);
203 } 213 }
@@ -211,11 +221,29 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
211 em->block_len += merge->len; 221 em->block_len += merge->len;
212 rb_erase(&merge->rb_node, &tree->map); 222 rb_erase(&merge->rb_node, &tree->map);
213 merge->in_tree = 0; 223 merge->in_tree = 0;
224 if (merge->generation > em->generation) {
225 em->generation = merge->generation;
226 list_move(&em->list, &tree->modified_extents);
227 }
228 list_del_init(&merge->list);
214 free_extent_map(merge); 229 free_extent_map(merge);
215 } 230 }
216} 231}
217 232
218int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) 233/**
234 * unpint_extent_cache - unpin an extent from the cache
235 * @tree: tree to unpin the extent in
236 * @start: logical offset in the file
237 * @len: length of the extent
238 * @gen: generation that this extent has been modified in
239 * @prealloc: if this is set we need to clear the prealloc flag
240 *
241 * Called after an extent has been written to disk properly. Set the generation
242 * to the generation that actually added the file item to the inode so we know
243 * we need to sync this extent when we call fsync().
244 */
245int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
246 u64 gen)
219{ 247{
220 int ret = 0; 248 int ret = 0;
221 struct extent_map *em; 249 struct extent_map *em;
@@ -228,10 +256,11 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
228 if (!em) 256 if (!em)
229 goto out; 257 goto out;
230 258
259 list_move(&em->list, &tree->modified_extents);
260 em->generation = gen;
231 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 261 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
232 262
233 try_merge_map(tree, em); 263 try_merge_map(tree, em);
234
235 free_extent_map(em); 264 free_extent_map(em);
236out: 265out:
237 write_unlock(&tree->lock); 266 write_unlock(&tree->lock);
@@ -358,6 +387,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
358 387
359 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); 388 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
360 rb_erase(&em->rb_node, &tree->map); 389 rb_erase(&em->rb_node, &tree->map);
390 list_del_init(&em->list);
361 em->in_tree = 0; 391 em->in_tree = 0;
362 return ret; 392 return ret;
363} 393}
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 1195f09761fe..2388a60bd6e3 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -23,15 +23,18 @@ struct extent_map {
23 u64 orig_start; 23 u64 orig_start;
24 u64 block_start; 24 u64 block_start;
25 u64 block_len; 25 u64 block_len;
26 u64 generation;
26 unsigned long flags; 27 unsigned long flags;
27 struct block_device *bdev; 28 struct block_device *bdev;
28 atomic_t refs; 29 atomic_t refs;
29 unsigned int in_tree; 30 unsigned int in_tree;
30 unsigned int compress_type; 31 unsigned int compress_type;
32 struct list_head list;
31}; 33};
32 34
33struct extent_map_tree { 35struct extent_map_tree {
34 struct rb_root map; 36 struct rb_root map;
37 struct list_head modified_extents;
35 rwlock_t lock; 38 rwlock_t lock;
36}; 39};
37 40
@@ -60,7 +63,7 @@ struct extent_map *alloc_extent_map(void);
60void free_extent_map(struct extent_map *em); 63void free_extent_map(struct extent_map *em);
61int __init extent_map_init(void); 64int __init extent_map_init(void);
62void extent_map_exit(void); 65void extent_map_exit(void);
63int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); 66int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
64struct extent_map *search_extent_mapping(struct extent_map_tree *tree, 67struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
65 u64 start, u64 len); 68 u64 start, u64 len);
66#endif 69#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b7c885c8423f..399f9d71a926 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -459,13 +459,14 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
459 * [start, end]. Existing extents are split as required. 459 * [start, end]. Existing extents are split as required.
460 */ 460 */
461int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 461int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
462 int skip_pinned) 462 int skip_pinned)
463{ 463{
464 struct extent_map *em; 464 struct extent_map *em;
465 struct extent_map *split = NULL; 465 struct extent_map *split = NULL;
466 struct extent_map *split2 = NULL; 466 struct extent_map *split2 = NULL;
467 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 467 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
468 u64 len = end - start + 1; 468 u64 len = end - start + 1;
469 u64 gen;
469 int ret; 470 int ret;
470 int testend = 1; 471 int testend = 1;
471 unsigned long flags; 472 unsigned long flags;
@@ -490,6 +491,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
490 break; 491 break;
491 } 492 }
492 flags = em->flags; 493 flags = em->flags;
494 gen = em->generation;
493 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 495 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
494 if (testend && em->start + em->len >= start + len) { 496 if (testend && em->start + em->len >= start + len) {
495 free_extent_map(em); 497 free_extent_map(em);
@@ -518,12 +520,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
518 split->block_len = em->block_len; 520 split->block_len = em->block_len;
519 else 521 else
520 split->block_len = split->len; 522 split->block_len = split->len;
521 523 split->generation = gen;
522 split->bdev = em->bdev; 524 split->bdev = em->bdev;
523 split->flags = flags; 525 split->flags = flags;
524 split->compress_type = em->compress_type; 526 split->compress_type = em->compress_type;
525 ret = add_extent_mapping(em_tree, split); 527 ret = add_extent_mapping(em_tree, split);
526 BUG_ON(ret); /* Logic error */ 528 BUG_ON(ret); /* Logic error */
529 list_move(&split->list, &em_tree->modified_extents);
527 free_extent_map(split); 530 free_extent_map(split);
528 split = split2; 531 split = split2;
529 split2 = NULL; 532 split2 = NULL;
@@ -537,6 +540,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
537 split->bdev = em->bdev; 540 split->bdev = em->bdev;
538 split->flags = flags; 541 split->flags = flags;
539 split->compress_type = em->compress_type; 542 split->compress_type = em->compress_type;
543 split->generation = gen;
540 544
541 if (compressed) { 545 if (compressed) {
542 split->block_len = em->block_len; 546 split->block_len = em->block_len;
@@ -550,6 +554,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
550 554
551 ret = add_extent_mapping(em_tree, split); 555 ret = add_extent_mapping(em_tree, split);
552 BUG_ON(ret); /* Logic error */ 556 BUG_ON(ret); /* Logic error */
557 list_move(&split->list, &em_tree->modified_extents);
553 free_extent_map(split); 558 free_extent_map(split);
554 split = NULL; 559 split = NULL;
555 } 560 }
@@ -576,13 +581,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
576 * it is either truncated or split. Anything entirely inside the range 581 * it is either truncated or split. Anything entirely inside the range
577 * is deleted from the tree. 582 * is deleted from the tree.
578 */ 583 */
579int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, 584int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
580 u64 start, u64 end, u64 *hint_byte, int drop_cache) 585 struct btrfs_root *root, struct inode *inode,
586 struct btrfs_path *path, u64 start, u64 end,
587 u64 *hint_byte, int drop_cache)
581{ 588{
582 struct btrfs_root *root = BTRFS_I(inode)->root;
583 struct extent_buffer *leaf; 589 struct extent_buffer *leaf;
584 struct btrfs_file_extent_item *fi; 590 struct btrfs_file_extent_item *fi;
585 struct btrfs_path *path;
586 struct btrfs_key key; 591 struct btrfs_key key;
587 struct btrfs_key new_key; 592 struct btrfs_key new_key;
588 u64 ino = btrfs_ino(inode); 593 u64 ino = btrfs_ino(inode);
@@ -597,14 +602,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
597 int recow; 602 int recow;
598 int ret; 603 int ret;
599 int modify_tree = -1; 604 int modify_tree = -1;
605 int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
600 606
601 if (drop_cache) 607 if (drop_cache)
602 btrfs_drop_extent_cache(inode, start, end - 1, 0); 608 btrfs_drop_extent_cache(inode, start, end - 1, 0);
603 609
604 path = btrfs_alloc_path();
605 if (!path)
606 return -ENOMEM;
607
608 if (start >= BTRFS_I(inode)->disk_i_size) 610 if (start >= BTRFS_I(inode)->disk_i_size)
609 modify_tree = 0; 611 modify_tree = 0;
610 612
@@ -707,7 +709,7 @@ next_slot:
707 extent_end - start); 709 extent_end - start);
708 btrfs_mark_buffer_dirty(leaf); 710 btrfs_mark_buffer_dirty(leaf);
709 711
710 if (disk_bytenr > 0) { 712 if (update_refs && disk_bytenr > 0) {
711 ret = btrfs_inc_extent_ref(trans, root, 713 ret = btrfs_inc_extent_ref(trans, root,
712 disk_bytenr, num_bytes, 0, 714 disk_bytenr, num_bytes, 0,
713 root->root_key.objectid, 715 root->root_key.objectid,
@@ -734,7 +736,7 @@ next_slot:
734 btrfs_set_file_extent_num_bytes(leaf, fi, 736 btrfs_set_file_extent_num_bytes(leaf, fi,
735 extent_end - end); 737 extent_end - end);
736 btrfs_mark_buffer_dirty(leaf); 738 btrfs_mark_buffer_dirty(leaf);
737 if (disk_bytenr > 0) { 739 if (update_refs && disk_bytenr > 0) {
738 inode_sub_bytes(inode, end - key.offset); 740 inode_sub_bytes(inode, end - key.offset);
739 *hint_byte = disk_bytenr; 741 *hint_byte = disk_bytenr;
740 } 742 }
@@ -753,7 +755,7 @@ next_slot:
753 btrfs_set_file_extent_num_bytes(leaf, fi, 755 btrfs_set_file_extent_num_bytes(leaf, fi,
754 start - key.offset); 756 start - key.offset);
755 btrfs_mark_buffer_dirty(leaf); 757 btrfs_mark_buffer_dirty(leaf);
756 if (disk_bytenr > 0) { 758 if (update_refs && disk_bytenr > 0) {
757 inode_sub_bytes(inode, extent_end - start); 759 inode_sub_bytes(inode, extent_end - start);
758 *hint_byte = disk_bytenr; 760 *hint_byte = disk_bytenr;
759 } 761 }
@@ -777,12 +779,13 @@ next_slot:
777 del_nr++; 779 del_nr++;
778 } 780 }
779 781
780 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 782 if (update_refs &&
783 extent_type == BTRFS_FILE_EXTENT_INLINE) {
781 inode_sub_bytes(inode, 784 inode_sub_bytes(inode,
782 extent_end - key.offset); 785 extent_end - key.offset);
783 extent_end = ALIGN(extent_end, 786 extent_end = ALIGN(extent_end,
784 root->sectorsize); 787 root->sectorsize);
785 } else if (disk_bytenr > 0) { 788 } else if (update_refs && disk_bytenr > 0) {
786 ret = btrfs_free_extent(trans, root, 789 ret = btrfs_free_extent(trans, root,
787 disk_bytenr, num_bytes, 0, 790 disk_bytenr, num_bytes, 0,
788 root->root_key.objectid, 791 root->root_key.objectid,
@@ -806,7 +809,7 @@ next_slot:
806 del_nr); 809 del_nr);
807 if (ret) { 810 if (ret) {
808 btrfs_abort_transaction(trans, root, ret); 811 btrfs_abort_transaction(trans, root, ret);
809 goto out; 812 break;
810 } 813 }
811 814
812 del_nr = 0; 815 del_nr = 0;
@@ -825,7 +828,22 @@ next_slot:
825 btrfs_abort_transaction(trans, root, ret); 828 btrfs_abort_transaction(trans, root, ret);
826 } 829 }
827 830
828out: 831 btrfs_release_path(path);
832 return ret;
833}
834
835int btrfs_drop_extents(struct btrfs_trans_handle *trans,
836 struct btrfs_root *root, struct inode *inode, u64 start,
837 u64 end, u64 *hint_byte, int drop_cache)
838{
839 struct btrfs_path *path;
840 int ret;
841
842 path = btrfs_alloc_path();
843 if (!path)
844 return -ENOMEM;
845 ret = __btrfs_drop_extents(trans, root, inode, path, start, end,
846 hint_byte, drop_cache);
829 btrfs_free_path(path); 847 btrfs_free_path(path);
830 return ret; 848 return ret;
831} 849}
@@ -892,8 +910,6 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
892 int ret; 910 int ret;
893 u64 ino = btrfs_ino(inode); 911 u64 ino = btrfs_ino(inode);
894 912
895 btrfs_drop_extent_cache(inode, start, end - 1, 0);
896
897 path = btrfs_alloc_path(); 913 path = btrfs_alloc_path();
898 if (!path) 914 if (!path)
899 return -ENOMEM; 915 return -ENOMEM;
@@ -1556,6 +1572,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1556 BTRFS_I(inode)->last_trans <= 1572 BTRFS_I(inode)->last_trans <=
1557 root->fs_info->last_trans_committed) { 1573 root->fs_info->last_trans_committed) {
1558 BTRFS_I(inode)->last_trans = 0; 1574 BTRFS_I(inode)->last_trans = 0;
1575
1576 /*
1577 * We'v had everything committed since the last time we were
1578 * modified so clear this flag in case it was set for whatever
1579 * reason, it's no longer relevant.
1580 */
1581 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1582 &BTRFS_I(inode)->runtime_flags);
1559 mutex_unlock(&inode->i_mutex); 1583 mutex_unlock(&inode->i_mutex);
1560 goto out; 1584 goto out;
1561 } 1585 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6971bac66d9d..1b99fe8a129d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -247,7 +247,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
247 return 1; 247 return 1;
248 } 248 }
249 249
250 ret = btrfs_drop_extents(trans, inode, start, aligned_end, 250 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end,
251 &hint_byte, 1); 251 &hint_byte, 1);
252 if (ret) 252 if (ret)
253 return ret; 253 return ret;
@@ -1803,7 +1803,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1803 * the caller is expected to unpin it and allow it to be merged 1803 * the caller is expected to unpin it and allow it to be merged
1804 * with the others. 1804 * with the others.
1805 */ 1805 */
1806 ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, 1806 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1807 file_pos + num_bytes,
1807 &hint, 0); 1808 &hint, 0);
1808 if (ret) 1809 if (ret)
1809 goto out; 1810 goto out;
@@ -1929,11 +1930,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1929 ordered_extent->len, 1930 ordered_extent->len,
1930 compress_type, 0, 0, 1931 compress_type, 0, 0,
1931 BTRFS_FILE_EXTENT_REG); 1932 BTRFS_FILE_EXTENT_REG);
1932 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1933 ordered_extent->file_offset,
1934 ordered_extent->len);
1935 } 1933 }
1936 1934 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1935 ordered_extent->file_offset, ordered_extent->len,
1936 trans->transid);
1937 if (ret < 0) { 1937 if (ret < 0) {
1938 btrfs_abort_transaction(trans, root, ret); 1938 btrfs_abort_transaction(trans, root, ret);
1939 goto out_unlock; 1939 goto out_unlock;
@@ -2592,6 +2592,18 @@ static void btrfs_read_locked_inode(struct inode *inode)
2592 2592
2593 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item)); 2593 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
2594 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); 2594 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
2595 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
2596
2597 /*
2598 * If we were modified in the current generation and evicted from memory
2599 * and then re-read we need to do a full sync since we don't have any
2600 * idea about which extents were modified before we were evicted from
2601 * cache.
2602 */
2603 if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
2604 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2605 &BTRFS_I(inode)->runtime_flags);
2606
2595 inode->i_version = btrfs_inode_sequence(leaf, inode_item); 2607 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
2596 inode->i_generation = BTRFS_I(inode)->generation; 2608 inode->i_generation = BTRFS_I(inode)->generation;
2597 inode->i_rdev = 0; 2609 inode->i_rdev = 0;
@@ -3269,8 +3281,13 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3269 return -ENOMEM; 3281 return -ENOMEM;
3270 path->reada = -1; 3282 path->reada = -1;
3271 3283
3284 /*
3285 * We want to drop from the next block forward in case this new size is
3286 * not block aligned since we will be keeping the last block of the
3287 * extent just the way it is.
3288 */
3272 if (root->ref_cows || root == root->fs_info->tree_root) 3289 if (root->ref_cows || root == root->fs_info->tree_root)
3273 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3290 btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0);
3274 3291
3275 /* 3292 /*
3276 * This function is also used to drop the items in the log tree before 3293 * This function is also used to drop the items in the log tree before
@@ -3579,6 +3596,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3579 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3596 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3580 struct extent_map *em = NULL; 3597 struct extent_map *em = NULL;
3581 struct extent_state *cached_state = NULL; 3598 struct extent_state *cached_state = NULL;
3599 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
3582 u64 mask = root->sectorsize - 1; 3600 u64 mask = root->sectorsize - 1;
3583 u64 hole_start = (oldsize + mask) & ~mask; 3601 u64 hole_start = (oldsize + mask) & ~mask;
3584 u64 block_end = (size + mask) & ~mask; 3602 u64 block_end = (size + mask) & ~mask;
@@ -3615,6 +3633,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3615 last_byte = min(extent_map_end(em), block_end); 3633 last_byte = min(extent_map_end(em), block_end);
3616 last_byte = (last_byte + mask) & ~mask; 3634 last_byte = (last_byte + mask) & ~mask;
3617 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3635 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
3636 struct extent_map *hole_em;
3618 u64 hint_byte = 0; 3637 u64 hint_byte = 0;
3619 hole_size = last_byte - cur_offset; 3638 hole_size = last_byte - cur_offset;
3620 3639
@@ -3624,7 +3643,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3624 break; 3643 break;
3625 } 3644 }
3626 3645
3627 err = btrfs_drop_extents(trans, inode, cur_offset, 3646 err = btrfs_drop_extents(trans, root, inode,
3647 cur_offset,
3628 cur_offset + hole_size, 3648 cur_offset + hole_size,
3629 &hint_byte, 1); 3649 &hint_byte, 1);
3630 if (err) { 3650 if (err) {
@@ -3643,9 +3663,39 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3643 break; 3663 break;
3644 } 3664 }
3645 3665
3646 btrfs_drop_extent_cache(inode, hole_start, 3666 btrfs_drop_extent_cache(inode, cur_offset,
3647 last_byte - 1, 0); 3667 cur_offset + hole_size - 1, 0);
3668 hole_em = alloc_extent_map();
3669 if (!hole_em) {
3670 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3671 &BTRFS_I(inode)->runtime_flags);
3672 goto next;
3673 }
3674 hole_em->start = cur_offset;
3675 hole_em->len = hole_size;
3676 hole_em->orig_start = cur_offset;
3677
3678 hole_em->block_start = EXTENT_MAP_HOLE;
3679 hole_em->block_len = 0;
3680 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
3681 hole_em->compress_type = BTRFS_COMPRESS_NONE;
3682 hole_em->generation = trans->transid;
3648 3683
3684 while (1) {
3685 write_lock(&em_tree->lock);
3686 err = add_extent_mapping(em_tree, hole_em);
3687 if (!err)
3688 list_move(&hole_em->list,
3689 &em_tree->modified_extents);
3690 write_unlock(&em_tree->lock);
3691 if (err != -EEXIST)
3692 break;
3693 btrfs_drop_extent_cache(inode, cur_offset,
3694 cur_offset +
3695 hole_size - 1, 0);
3696 }
3697 free_extent_map(hole_em);
3698next:
3649 btrfs_update_inode(trans, root, inode); 3699 btrfs_update_inode(trans, root, inode);
3650 btrfs_end_transaction(trans, root); 3700 btrfs_end_transaction(trans, root);
3651 } 3701 }
@@ -4673,6 +4723,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4673 BTRFS_I(inode)->generation = trans->transid; 4723 BTRFS_I(inode)->generation = trans->transid;
4674 inode->i_generation = BTRFS_I(inode)->generation; 4724 inode->i_generation = BTRFS_I(inode)->generation;
4675 4725
4726 /*
4727 * We could have gotten an inode number from somebody who was fsynced
4728 * and then removed in this same transaction, so let's just set full
4729 * sync since it will be a full sync anyway and this will blow away the
4730 * old info in the log.
4731 */
4732 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
4733
4676 if (S_ISDIR(mode)) 4734 if (S_ISDIR(mode))
4677 owner = 0; 4735 owner = 0;
4678 else 4736 else
@@ -6839,6 +6897,15 @@ static int btrfs_truncate(struct inode *inode)
6839 &BTRFS_I(inode)->runtime_flags)) 6897 &BTRFS_I(inode)->runtime_flags))
6840 btrfs_add_ordered_operation(trans, root, inode); 6898 btrfs_add_ordered_operation(trans, root, inode);
6841 6899
6900 /*
6901 * So if we truncate and then write and fsync we normally would just
6902 * write the extents that changed, which is a problem if we need to
6903 * first truncate that entire inode. So set this flag so we write out
6904 * all of the extents in the inode to the sync log so we're completely
6905 * safe.
6906 */
6907 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
6908
6842 while (1) { 6909 while (1) {
6843 ret = btrfs_block_rsv_refill(root, rsv, min_size); 6910 ret = btrfs_block_rsv_refill(root, rsv, min_size);
6844 if (ret) { 6911 if (ret) {
@@ -7510,6 +7577,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7510 loff_t actual_len, u64 *alloc_hint, 7577 loff_t actual_len, u64 *alloc_hint,
7511 struct btrfs_trans_handle *trans) 7578 struct btrfs_trans_handle *trans)
7512{ 7579{
7580 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
7581 struct extent_map *em;
7513 struct btrfs_root *root = BTRFS_I(inode)->root; 7582 struct btrfs_root *root = BTRFS_I(inode)->root;
7514 struct btrfs_key ins; 7583 struct btrfs_key ins;
7515 u64 cur_offset = start; 7584 u64 cur_offset = start;
@@ -7550,6 +7619,37 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7550 btrfs_drop_extent_cache(inode, cur_offset, 7619 btrfs_drop_extent_cache(inode, cur_offset,
7551 cur_offset + ins.offset -1, 0); 7620 cur_offset + ins.offset -1, 0);
7552 7621
7622 em = alloc_extent_map();
7623 if (!em) {
7624 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
7625 &BTRFS_I(inode)->runtime_flags);
7626 goto next;
7627 }
7628
7629 em->start = cur_offset;
7630 em->orig_start = cur_offset;
7631 em->len = ins.offset;
7632 em->block_start = ins.objectid;
7633 em->block_len = ins.offset;
7634 em->bdev = root->fs_info->fs_devices->latest_bdev;
7635 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7636 em->generation = trans->transid;
7637
7638 while (1) {
7639 write_lock(&em_tree->lock);
7640 ret = add_extent_mapping(em_tree, em);
7641 if (!ret)
7642 list_move(&em->list,
7643 &em_tree->modified_extents);
7644 write_unlock(&em_tree->lock);
7645 if (ret != -EEXIST)
7646 break;
7647 btrfs_drop_extent_cache(inode, cur_offset,
7648 cur_offset + ins.offset - 1,
7649 0);
7650 }
7651 free_extent_map(em);
7652next:
7553 num_bytes -= ins.offset; 7653 num_bytes -= ins.offset;
7554 cur_offset += ins.offset; 7654 cur_offset += ins.offset;
7555 *alloc_hint = ins.objectid + ins.offset; 7655 *alloc_hint = ins.objectid + ins.offset;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9df50fa8a078..95223222d5ad 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2576,7 +2576,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2576 datal -= off - key.offset; 2576 datal -= off - key.offset;
2577 } 2577 }
2578 2578
2579 ret = btrfs_drop_extents(trans, inode, 2579 ret = btrfs_drop_extents(trans, root, inode,
2580 new_key.offset, 2580 new_key.offset,
2581 new_key.offset + datal, 2581 new_key.offset + datal,
2582 &hint_byte, 1); 2582 &hint_byte, 1);
@@ -2650,7 +2650,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2650 size -= skip + trim; 2650 size -= skip + trim;
2651 datal -= skip + trim; 2651 datal -= skip + trim;
2652 2652
2653 ret = btrfs_drop_extents(trans, inode, 2653 ret = btrfs_drop_extents(trans, root, inode,
2654 new_key.offset, 2654 new_key.offset,
2655 new_key.offset + datal, 2655 new_key.offset + datal,
2656 &hint_byte, 1); 2656 &hint_byte, 1);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c86670f4f285..f2ff02c55130 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/list_sort.h>
21#include "ctree.h" 22#include "ctree.h"
22#include "transaction.h" 23#include "transaction.h"
23#include "disk-io.h" 24#include "disk-io.h"
@@ -550,7 +551,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
550 551
551 saved_nbytes = inode_get_bytes(inode); 552 saved_nbytes = inode_get_bytes(inode);
552 /* drop any overlapping extents */ 553 /* drop any overlapping extents */
553 ret = btrfs_drop_extents(trans, inode, start, extent_end, 554 ret = btrfs_drop_extents(trans, root, inode, start, extent_end,
554 &alloc_hint, 1); 555 &alloc_hint, 1);
555 BUG_ON(ret); 556 BUG_ON(ret);
556 557
@@ -2803,6 +2804,194 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2803 return ret; 2804 return ret;
2804} 2805}
2805 2806
2807static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
2808{
2809 struct extent_map *em1, *em2;
2810
2811 em1 = list_entry(a, struct extent_map, list);
2812 em2 = list_entry(b, struct extent_map, list);
2813
2814 if (em1->start < em2->start)
2815 return -1;
2816 else if (em1->start > em2->start)
2817 return 1;
2818 return 0;
2819}
2820
2821struct log_args {
2822 struct extent_buffer *src;
2823 u64 next_offset;
2824 int start_slot;
2825 int nr;
2826};
2827
2828static int log_one_extent(struct btrfs_trans_handle *trans,
2829 struct inode *inode, struct btrfs_root *root,
2830 struct extent_map *em, struct btrfs_path *path,
2831 struct btrfs_path *dst_path, struct log_args *args)
2832{
2833 struct btrfs_root *log = root->log_root;
2834 struct btrfs_file_extent_item *fi;
2835 struct btrfs_key key;
2836 u64 start = em->start;
2837 u64 len = em->len;
2838 u64 num_bytes;
2839 int nritems;
2840 int ret;
2841
2842 if (BTRFS_I(inode)->logged_trans == trans->transid) {
2843 u64 tmp;
2844 ret = __btrfs_drop_extents(trans, log, inode, dst_path, start,
2845 start + len, &tmp, 0);
2846 if (ret)
2847 return ret;
2848 }
2849
2850 while (len) {
2851 if (args->nr)
2852 goto next_slot;
2853 key.objectid = btrfs_ino(inode);
2854 key.type = BTRFS_EXTENT_DATA_KEY;
2855 key.offset = start;
2856
2857 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2858 if (ret < 0)
2859 return ret;
2860 if (ret) {
2861 /*
2862 * This shouldn't happen, but it might so warn and
2863 * return an error.
2864 */
2865 WARN_ON(1);
2866 return -ENOENT;
2867 }
2868 args->src = path->nodes[0];
2869next_slot:
2870 fi = btrfs_item_ptr(args->src, path->slots[0],
2871 struct btrfs_file_extent_item);
2872 if (args->nr &&
2873 args->start_slot + args->nr == path->slots[0]) {
2874 args->nr++;
2875 } else if (args->nr) {
2876 ret = copy_items(trans, log, dst_path, args->src,
2877 args->start_slot, args->nr,
2878 LOG_INODE_ALL);
2879 if (ret)
2880 return ret;
2881 args->nr = 1;
2882 args->start_slot = path->slots[0];
2883 } else if (!args->nr) {
2884 args->nr = 1;
2885 args->start_slot = path->slots[0];
2886 }
2887 nritems = btrfs_header_nritems(path->nodes[0]);
2888 path->slots[0]++;
2889 num_bytes = btrfs_file_extent_num_bytes(args->src, fi);
2890 if (len < num_bytes) {
2891 /* I _think_ this is ok, envision we write to a
2892 * preallocated space that is adjacent to a previously
2893 * written preallocated space that gets merged when we
2894 * mark this preallocated space written. If we do not
2895 * have the adjacent extent in cache then when we copy
2896 * this extent it could end up being larger than our EM
2897 * thinks it is, which is a-ok, so just set len to 0.
2898 */
2899 len = 0;
2900 } else {
2901 len -= num_bytes;
2902 }
2903 start += btrfs_file_extent_num_bytes(args->src, fi);
2904 args->next_offset = start;
2905
2906 if (path->slots[0] < nritems) {
2907 if (len)
2908 goto next_slot;
2909 break;
2910 }
2911
2912 if (args->nr) {
2913 ret = copy_items(trans, log, dst_path, args->src,
2914 args->start_slot, args->nr,
2915 LOG_INODE_ALL);
2916 if (ret)
2917 return ret;
2918 args->nr = 0;
2919 btrfs_release_path(path);
2920 }
2921 }
2922
2923 return 0;
2924}
2925
2926static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
2927 struct btrfs_root *root,
2928 struct inode *inode,
2929 struct btrfs_path *path,
2930 struct btrfs_path *dst_path)
2931{
2932 struct log_args args;
2933 struct btrfs_root *log = root->log_root;
2934 struct extent_map *em, *n;
2935 struct list_head extents;
2936 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
2937 u64 test_gen;
2938 int ret = 0;
2939
2940 INIT_LIST_HEAD(&extents);
2941
2942 memset(&args, 0, sizeof(args));
2943
2944 write_lock(&tree->lock);
2945 test_gen = root->fs_info->last_trans_committed;
2946
2947 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
2948 list_del_init(&em->list);
2949 if (em->generation <= test_gen)
2950 continue;
2951 list_add_tail(&em->list, &extents);
2952 }
2953
2954 list_sort(NULL, &extents, extent_cmp);
2955
2956 while (!list_empty(&extents)) {
2957 em = list_entry(extents.next, struct extent_map, list);
2958
2959 list_del_init(&em->list);
2960
2961 /*
2962 * If we had an error we just need to delete everybody from our
2963 * private list.
2964 */
2965 if (ret)
2966 continue;
2967
2968 /*
2969 * If the previous EM and the last extent we left off on aren't
2970 * sequential then we need to copy the items we have and redo
2971 * our search
2972 */
2973 if (args.nr && em->start != args.next_offset) {
2974 ret = copy_items(trans, log, dst_path, args.src,
2975 args.start_slot, args.nr,
2976 LOG_INODE_ALL);
2977 if (ret)
2978 continue;
2979 btrfs_release_path(path);
2980 args.nr = 0;
2981 }
2982
2983 ret = log_one_extent(trans, inode, root, em, path, dst_path, &args);
2984 }
2985
2986 if (!ret && args.nr)
2987 ret = copy_items(trans, log, dst_path, args.src,
2988 args.start_slot, args.nr, LOG_INODE_ALL);
2989 btrfs_release_path(path);
2990 WARN_ON(!list_empty(&extents));
2991 write_unlock(&tree->lock);
2992 return ret;
2993}
2994
2806/* log a single inode in the tree log. 2995/* log a single inode in the tree log.
2807 * At least one parent directory for this inode must exist in the tree 2996 * At least one parent directory for this inode must exist in the tree
2808 * or be logged already. 2997 * or be logged already.
@@ -2832,6 +3021,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2832 int nritems; 3021 int nritems;
2833 int ins_start_slot = 0; 3022 int ins_start_slot = 0;
2834 int ins_nr; 3023 int ins_nr;
3024 bool fast_search = false;
2835 u64 ino = btrfs_ino(inode); 3025 u64 ino = btrfs_ino(inode);
2836 3026
2837 log = root->log_root; 3027 log = root->log_root;
@@ -2851,10 +3041,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2851 3041
2852 max_key.objectid = ino; 3042 max_key.objectid = ino;
2853 3043
2854 /* today the code can only do partial logging of directories */
2855 if (!S_ISDIR(inode->i_mode))
2856 inode_only = LOG_INODE_ALL;
2857 3044
3045 /* today the code can only do partial logging of directories */
2858 if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) 3046 if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
2859 max_key.type = BTRFS_XATTR_ITEM_KEY; 3047 max_key.type = BTRFS_XATTR_ITEM_KEY;
2860 else 3048 else
@@ -2881,7 +3069,16 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2881 max_key_type = BTRFS_XATTR_ITEM_KEY; 3069 max_key_type = BTRFS_XATTR_ITEM_KEY;
2882 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 3070 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
2883 } else { 3071 } else {
2884 ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); 3072 if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3073 &BTRFS_I(inode)->runtime_flags)) {
3074 ret = btrfs_truncate_inode_items(trans, log,
3075 inode, 0, 0);
3076 } else {
3077 fast_search = true;
3078 max_key.type = BTRFS_XATTR_ITEM_KEY;
3079 ret = drop_objectid_items(trans, log, path, ino,
3080 BTRFS_XATTR_ITEM_KEY);
3081 }
2885 } 3082 }
2886 if (ret) { 3083 if (ret) {
2887 err = ret; 3084 err = ret;
@@ -2960,7 +3157,18 @@ next_slot:
2960 } 3157 }
2961 ins_nr = 0; 3158 ins_nr = 0;
2962 } 3159 }
2963 WARN_ON(ins_nr); 3160
3161 if (fast_search) {
3162 btrfs_release_path(path);
3163 btrfs_release_path(dst_path);
3164 ret = btrfs_log_changed_extents(trans, root, inode, path,
3165 dst_path);
3166 if (ret) {
3167 err = ret;
3168 goto out_unlock;
3169 }
3170 }
3171
2964 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 3172 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
2965 btrfs_release_path(path); 3173 btrfs_release_path(path);
2966 btrfs_release_path(dst_path); 3174 btrfs_release_path(dst_path);