aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 12:58:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 12:58:51 -0400
commitb7c09ad4014e3678e8cc01fdf663c9f43b272dc6 (patch)
tree1edb073b0a76ce1530cb31c113f9e741e33ece0e /fs/btrfs/inode.c
parent1812997720ab90d029548778c55d7315555e1fef (diff)
parentd7396f07358a7c6e22c238d36d1d85f9d652a414 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This is against 3.11-rc7, but was pulled and tested against your tree as of yesterday. We do have two small incrementals queued up, but I wanted to get this bunch out the door before I hop on an airplane. This is a fairly large batch of fixes, performance improvements, and cleanups from the usual Btrfs suspects. We've included Stefan Behren's work to index subvolume UUIDs, which is targeted at speeding up send/receive with many subvolumes or snapshots in place. It closes a long standing performance issue that was built in to the disk format. Mark Fasheh's offline dedup work is also here. In this case offline means the FS is mounted and active, but the dedup work is not done inline during file IO. This is a building block where utilities are able to ask the FS to dedup a series of extents. The kernel takes care of verifying the data involved really is the same. Today this involves reading both extents, but we'll continue to evolve the patches" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits) Btrfs: optimize key searches in btrfs_search_slot Btrfs: don't use an async starter for most of our workers Btrfs: only update disk_i_size as we remove extents Btrfs: fix deadlock in uuid scan kthread Btrfs: stop refusing the relocation of chunk 0 Btrfs: fix memory leak of uuid_root in free_fs_info btrfs: reuse kbasename helper btrfs: return btrfs error code for dev excl ops err Btrfs: allow partial ordered extent completion Btrfs: convert all bug_ons in free-space-cache.c Btrfs: add support for asserts Btrfs: adjust the fs_devices->missing count on unmount Btrf: cleanup: don't check for root_refs == 0 twice Btrfs: fix for patch "cleanup: don't check the same thing twice" Btrfs: get rid of one BUG() in write_all_supers() Btrfs: allocate prelim_ref with a slab allocater Btrfs: pass gfp_t to __add_prelim_ref() to avoid always using GFP_ATOMIC Btrfs: fix race conditions in BTRFS_IOC_FS_INFO ioctl Btrfs: fix race between removing a dev and writing sbs Btrfs: remove ourselves from the cluster list under lock ...
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c615
1 files changed, 281 insertions, 334 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bdc83d04d54..db1e43948579 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -230,12 +230,13 @@ fail:
230 * does the checks required to make sure the data is small enough 230 * does the checks required to make sure the data is small enough
231 * to fit as an inline extent. 231 * to fit as an inline extent.
232 */ 232 */
233static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, 233static noinline int cow_file_range_inline(struct btrfs_root *root,
234 struct btrfs_root *root, 234 struct inode *inode, u64 start,
235 struct inode *inode, u64 start, u64 end, 235 u64 end, size_t compressed_size,
236 size_t compressed_size, int compress_type, 236 int compress_type,
237 struct page **compressed_pages) 237 struct page **compressed_pages)
238{ 238{
239 struct btrfs_trans_handle *trans;
239 u64 isize = i_size_read(inode); 240 u64 isize = i_size_read(inode);
240 u64 actual_end = min(end + 1, isize); 241 u64 actual_end = min(end + 1, isize);
241 u64 inline_len = actual_end - start; 242 u64 inline_len = actual_end - start;
@@ -256,9 +257,16 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
256 return 1; 257 return 1;
257 } 258 }
258 259
260 trans = btrfs_join_transaction(root);
261 if (IS_ERR(trans))
262 return PTR_ERR(trans);
263 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
264
259 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); 265 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
260 if (ret) 266 if (ret) {
261 return ret; 267 btrfs_abort_transaction(trans, root, ret);
268 goto out;
269 }
262 270
263 if (isize > actual_end) 271 if (isize > actual_end)
264 inline_len = min_t(u64, isize, actual_end); 272 inline_len = min_t(u64, isize, actual_end);
@@ -267,15 +275,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
267 compress_type, compressed_pages); 275 compress_type, compressed_pages);
268 if (ret && ret != -ENOSPC) { 276 if (ret && ret != -ENOSPC) {
269 btrfs_abort_transaction(trans, root, ret); 277 btrfs_abort_transaction(trans, root, ret);
270 return ret; 278 goto out;
271 } else if (ret == -ENOSPC) { 279 } else if (ret == -ENOSPC) {
272 return 1; 280 ret = 1;
281 goto out;
273 } 282 }
274 283
275 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); 284 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
276 btrfs_delalloc_release_metadata(inode, end + 1 - start); 285 btrfs_delalloc_release_metadata(inode, end + 1 - start);
277 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 286 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
278 return 0; 287out:
288 btrfs_end_transaction(trans, root);
289 return ret;
279} 290}
280 291
281struct async_extent { 292struct async_extent {
@@ -343,7 +354,6 @@ static noinline int compress_file_range(struct inode *inode,
343 int *num_added) 354 int *num_added)
344{ 355{
345 struct btrfs_root *root = BTRFS_I(inode)->root; 356 struct btrfs_root *root = BTRFS_I(inode)->root;
346 struct btrfs_trans_handle *trans;
347 u64 num_bytes; 357 u64 num_bytes;
348 u64 blocksize = root->sectorsize; 358 u64 blocksize = root->sectorsize;
349 u64 actual_end; 359 u64 actual_end;
@@ -461,45 +471,36 @@ again:
461 } 471 }
462cont: 472cont:
463 if (start == 0) { 473 if (start == 0) {
464 trans = btrfs_join_transaction(root);
465 if (IS_ERR(trans)) {
466 ret = PTR_ERR(trans);
467 trans = NULL;
468 goto cleanup_and_out;
469 }
470 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
471
472 /* lets try to make an inline extent */ 474 /* lets try to make an inline extent */
473 if (ret || total_in < (actual_end - start)) { 475 if (ret || total_in < (actual_end - start)) {
474 /* we didn't compress the entire range, try 476 /* we didn't compress the entire range, try
475 * to make an uncompressed inline extent. 477 * to make an uncompressed inline extent.
476 */ 478 */
477 ret = cow_file_range_inline(trans, root, inode, 479 ret = cow_file_range_inline(root, inode, start, end,
478 start, end, 0, 0, NULL); 480 0, 0, NULL);
479 } else { 481 } else {
480 /* try making a compressed inline extent */ 482 /* try making a compressed inline extent */
481 ret = cow_file_range_inline(trans, root, inode, 483 ret = cow_file_range_inline(root, inode, start, end,
482 start, end,
483 total_compressed, 484 total_compressed,
484 compress_type, pages); 485 compress_type, pages);
485 } 486 }
486 if (ret <= 0) { 487 if (ret <= 0) {
488 unsigned long clear_flags = EXTENT_DELALLOC |
489 EXTENT_DEFRAG;
490 clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
491
487 /* 492 /*
488 * inline extent creation worked or returned error, 493 * inline extent creation worked or returned error,
489 * we don't need to create any more async work items. 494 * we don't need to create any more async work items.
490 * Unlock and free up our temp pages. 495 * Unlock and free up our temp pages.
491 */ 496 */
492 extent_clear_unlock_delalloc(inode, 497 extent_clear_unlock_delalloc(inode, start, end, NULL,
493 &BTRFS_I(inode)->io_tree, 498 clear_flags, PAGE_UNLOCK |
494 start, end, NULL, 499 PAGE_CLEAR_DIRTY |
495 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 500 PAGE_SET_WRITEBACK |
496 EXTENT_CLEAR_DELALLOC | 501 PAGE_END_WRITEBACK);
497 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
498
499 btrfs_end_transaction(trans, root);
500 goto free_pages_out; 502 goto free_pages_out;
501 } 503 }
502 btrfs_end_transaction(trans, root);
503 } 504 }
504 505
505 if (will_compress) { 506 if (will_compress) {
@@ -590,20 +591,6 @@ free_pages_out:
590 kfree(pages); 591 kfree(pages);
591 592
592 goto out; 593 goto out;
593
594cleanup_and_out:
595 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
596 start, end, NULL,
597 EXTENT_CLEAR_UNLOCK_PAGE |
598 EXTENT_CLEAR_DIRTY |
599 EXTENT_CLEAR_DELALLOC |
600 EXTENT_SET_WRITEBACK |
601 EXTENT_END_WRITEBACK);
602 if (!trans || IS_ERR(trans))
603 btrfs_error(root->fs_info, ret, "Failed to join transaction");
604 else
605 btrfs_abort_transaction(trans, root, ret);
606 goto free_pages_out;
607} 594}
608 595
609/* 596/*
@@ -617,7 +604,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
617{ 604{
618 struct async_extent *async_extent; 605 struct async_extent *async_extent;
619 u64 alloc_hint = 0; 606 u64 alloc_hint = 0;
620 struct btrfs_trans_handle *trans;
621 struct btrfs_key ins; 607 struct btrfs_key ins;
622 struct extent_map *em; 608 struct extent_map *em;
623 struct btrfs_root *root = BTRFS_I(inode)->root; 609 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -678,20 +664,10 @@ retry:
678 lock_extent(io_tree, async_extent->start, 664 lock_extent(io_tree, async_extent->start,
679 async_extent->start + async_extent->ram_size - 1); 665 async_extent->start + async_extent->ram_size - 1);
680 666
681 trans = btrfs_join_transaction(root); 667 ret = btrfs_reserve_extent(root,
682 if (IS_ERR(trans)) {
683 ret = PTR_ERR(trans);
684 } else {
685 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
686 ret = btrfs_reserve_extent(trans, root,
687 async_extent->compressed_size, 668 async_extent->compressed_size,
688 async_extent->compressed_size, 669 async_extent->compressed_size,
689 0, alloc_hint, &ins, 1); 670 0, alloc_hint, &ins, 1);
690 if (ret && ret != -ENOSPC)
691 btrfs_abort_transaction(trans, root, ret);
692 btrfs_end_transaction(trans, root);
693 }
694
695 if (ret) { 671 if (ret) {
696 int i; 672 int i;
697 673
@@ -770,16 +746,12 @@ retry:
770 /* 746 /*
771 * clear dirty, set writeback and unlock the pages. 747 * clear dirty, set writeback and unlock the pages.
772 */ 748 */
773 extent_clear_unlock_delalloc(inode, 749 extent_clear_unlock_delalloc(inode, async_extent->start,
774 &BTRFS_I(inode)->io_tree,
775 async_extent->start,
776 async_extent->start + 750 async_extent->start +
777 async_extent->ram_size - 1, 751 async_extent->ram_size - 1,
778 NULL, EXTENT_CLEAR_UNLOCK_PAGE | 752 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
779 EXTENT_CLEAR_UNLOCK | 753 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
780 EXTENT_CLEAR_DELALLOC | 754 PAGE_SET_WRITEBACK);
781 EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
782
783 ret = btrfs_submit_compressed_write(inode, 755 ret = btrfs_submit_compressed_write(inode,
784 async_extent->start, 756 async_extent->start,
785 async_extent->ram_size, 757 async_extent->ram_size,
@@ -798,16 +770,13 @@ out:
798out_free_reserve: 770out_free_reserve:
799 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 771 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
800out_free: 772out_free:
801 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 773 extent_clear_unlock_delalloc(inode, async_extent->start,
802 async_extent->start,
803 async_extent->start + 774 async_extent->start +
804 async_extent->ram_size - 1, 775 async_extent->ram_size - 1,
805 NULL, EXTENT_CLEAR_UNLOCK_PAGE | 776 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
806 EXTENT_CLEAR_UNLOCK | 777 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
807 EXTENT_CLEAR_DELALLOC | 778 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
808 EXTENT_CLEAR_DIRTY | 779 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
809 EXTENT_SET_WRITEBACK |
810 EXTENT_END_WRITEBACK);
811 kfree(async_extent); 780 kfree(async_extent);
812 goto again; 781 goto again;
813} 782}
@@ -857,14 +826,13 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
857 * required to start IO on it. It may be clean and already done with 826 * required to start IO on it. It may be clean and already done with
858 * IO when we return. 827 * IO when we return.
859 */ 828 */
860static noinline int __cow_file_range(struct btrfs_trans_handle *trans, 829static noinline int cow_file_range(struct inode *inode,
861 struct inode *inode, 830 struct page *locked_page,
862 struct btrfs_root *root, 831 u64 start, u64 end, int *page_started,
863 struct page *locked_page, 832 unsigned long *nr_written,
864 u64 start, u64 end, int *page_started, 833 int unlock)
865 unsigned long *nr_written,
866 int unlock)
867{ 834{
835 struct btrfs_root *root = BTRFS_I(inode)->root;
868 u64 alloc_hint = 0; 836 u64 alloc_hint = 0;
869 u64 num_bytes; 837 u64 num_bytes;
870 unsigned long ram_size; 838 unsigned long ram_size;
@@ -885,29 +853,24 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
885 /* if this is a small write inside eof, kick off defrag */ 853 /* if this is a small write inside eof, kick off defrag */
886 if (num_bytes < 64 * 1024 && 854 if (num_bytes < 64 * 1024 &&
887 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) 855 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
888 btrfs_add_inode_defrag(trans, inode); 856 btrfs_add_inode_defrag(NULL, inode);
889 857
890 if (start == 0) { 858 if (start == 0) {
891 /* lets try to make an inline extent */ 859 /* lets try to make an inline extent */
892 ret = cow_file_range_inline(trans, root, inode, 860 ret = cow_file_range_inline(root, inode, start, end, 0, 0,
893 start, end, 0, 0, NULL); 861 NULL);
894 if (ret == 0) { 862 if (ret == 0) {
895 extent_clear_unlock_delalloc(inode, 863 extent_clear_unlock_delalloc(inode, start, end, NULL,
896 &BTRFS_I(inode)->io_tree, 864 EXTENT_LOCKED | EXTENT_DELALLOC |
897 start, end, NULL, 865 EXTENT_DEFRAG, PAGE_UNLOCK |
898 EXTENT_CLEAR_UNLOCK_PAGE | 866 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
899 EXTENT_CLEAR_UNLOCK | 867 PAGE_END_WRITEBACK);
900 EXTENT_CLEAR_DELALLOC |
901 EXTENT_CLEAR_DIRTY |
902 EXTENT_SET_WRITEBACK |
903 EXTENT_END_WRITEBACK);
904 868
905 *nr_written = *nr_written + 869 *nr_written = *nr_written +
906 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 870 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
907 *page_started = 1; 871 *page_started = 1;
908 goto out; 872 goto out;
909 } else if (ret < 0) { 873 } else if (ret < 0) {
910 btrfs_abort_transaction(trans, root, ret);
911 goto out_unlock; 874 goto out_unlock;
912 } 875 }
913 } 876 }
@@ -922,13 +885,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
922 unsigned long op; 885 unsigned long op;
923 886
924 cur_alloc_size = disk_num_bytes; 887 cur_alloc_size = disk_num_bytes;
925 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 888 ret = btrfs_reserve_extent(root, cur_alloc_size,
926 root->sectorsize, 0, alloc_hint, 889 root->sectorsize, 0, alloc_hint,
927 &ins, 1); 890 &ins, 1);
928 if (ret < 0) { 891 if (ret < 0)
929 btrfs_abort_transaction(trans, root, ret);
930 goto out_unlock; 892 goto out_unlock;
931 }
932 893
933 em = alloc_extent_map(); 894 em = alloc_extent_map();
934 if (!em) { 895 if (!em) {
@@ -974,10 +935,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
974 BTRFS_DATA_RELOC_TREE_OBJECTID) { 935 BTRFS_DATA_RELOC_TREE_OBJECTID) {
975 ret = btrfs_reloc_clone_csums(inode, start, 936 ret = btrfs_reloc_clone_csums(inode, start,
976 cur_alloc_size); 937 cur_alloc_size);
977 if (ret) { 938 if (ret)
978 btrfs_abort_transaction(trans, root, ret);
979 goto out_reserve; 939 goto out_reserve;
980 }
981 } 940 }
982 941
983 if (disk_num_bytes < cur_alloc_size) 942 if (disk_num_bytes < cur_alloc_size)
@@ -990,13 +949,13 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
990 * Do set the Private2 bit so we know this page was properly 949 * Do set the Private2 bit so we know this page was properly
991 * setup for writepage 950 * setup for writepage
992 */ 951 */
993 op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; 952 op = unlock ? PAGE_UNLOCK : 0;
994 op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | 953 op |= PAGE_SET_PRIVATE2;
995 EXTENT_SET_PRIVATE2;
996 954
997 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 955 extent_clear_unlock_delalloc(inode, start,
998 start, start + ram_size - 1, 956 start + ram_size - 1, locked_page,
999 locked_page, op); 957 EXTENT_LOCKED | EXTENT_DELALLOC,
958 op);
1000 disk_num_bytes -= cur_alloc_size; 959 disk_num_bytes -= cur_alloc_size;
1001 num_bytes -= cur_alloc_size; 960 num_bytes -= cur_alloc_size;
1002 alloc_hint = ins.objectid + ins.offset; 961 alloc_hint = ins.objectid + ins.offset;
@@ -1008,52 +967,14 @@ out:
1008out_reserve: 967out_reserve:
1009 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 968 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
1010out_unlock: 969out_unlock:
1011 extent_clear_unlock_delalloc(inode, 970 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1012 &BTRFS_I(inode)->io_tree, 971 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
1013 start, end, locked_page, 972 EXTENT_DELALLOC | EXTENT_DEFRAG,
1014 EXTENT_CLEAR_UNLOCK_PAGE | 973 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1015 EXTENT_CLEAR_UNLOCK | 974 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
1016 EXTENT_CLEAR_DELALLOC |
1017 EXTENT_CLEAR_DIRTY |
1018 EXTENT_SET_WRITEBACK |
1019 EXTENT_END_WRITEBACK);
1020
1021 goto out; 975 goto out;
1022} 976}
1023 977
1024static noinline int cow_file_range(struct inode *inode,
1025 struct page *locked_page,
1026 u64 start, u64 end, int *page_started,
1027 unsigned long *nr_written,
1028 int unlock)
1029{
1030 struct btrfs_trans_handle *trans;
1031 struct btrfs_root *root = BTRFS_I(inode)->root;
1032 int ret;
1033
1034 trans = btrfs_join_transaction(root);
1035 if (IS_ERR(trans)) {
1036 extent_clear_unlock_delalloc(inode,
1037 &BTRFS_I(inode)->io_tree,
1038 start, end, locked_page,
1039 EXTENT_CLEAR_UNLOCK_PAGE |
1040 EXTENT_CLEAR_UNLOCK |
1041 EXTENT_CLEAR_DELALLOC |
1042 EXTENT_CLEAR_DIRTY |
1043 EXTENT_SET_WRITEBACK |
1044 EXTENT_END_WRITEBACK);
1045 return PTR_ERR(trans);
1046 }
1047 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1048
1049 ret = __cow_file_range(trans, inode, root, locked_page, start, end,
1050 page_started, nr_written, unlock);
1051
1052 btrfs_end_transaction(trans, root);
1053
1054 return ret;
1055}
1056
1057/* 978/*
1058 * work queue call back to started compression on a file and pages 979 * work queue call back to started compression on a file and pages
1059 */ 980 */
@@ -1221,15 +1142,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1221 1142
1222 path = btrfs_alloc_path(); 1143 path = btrfs_alloc_path();
1223 if (!path) { 1144 if (!path) {
1224 extent_clear_unlock_delalloc(inode, 1145 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1225 &BTRFS_I(inode)->io_tree, 1146 EXTENT_LOCKED | EXTENT_DELALLOC |
1226 start, end, locked_page, 1147 EXTENT_DO_ACCOUNTING |
1227 EXTENT_CLEAR_UNLOCK_PAGE | 1148 EXTENT_DEFRAG, PAGE_UNLOCK |
1228 EXTENT_CLEAR_UNLOCK | 1149 PAGE_CLEAR_DIRTY |
1229 EXTENT_CLEAR_DELALLOC | 1150 PAGE_SET_WRITEBACK |
1230 EXTENT_CLEAR_DIRTY | 1151 PAGE_END_WRITEBACK);
1231 EXTENT_SET_WRITEBACK |
1232 EXTENT_END_WRITEBACK);
1233 return -ENOMEM; 1152 return -ENOMEM;
1234 } 1153 }
1235 1154
@@ -1241,15 +1160,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1241 trans = btrfs_join_transaction(root); 1160 trans = btrfs_join_transaction(root);
1242 1161
1243 if (IS_ERR(trans)) { 1162 if (IS_ERR(trans)) {
1244 extent_clear_unlock_delalloc(inode, 1163 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1245 &BTRFS_I(inode)->io_tree, 1164 EXTENT_LOCKED | EXTENT_DELALLOC |
1246 start, end, locked_page, 1165 EXTENT_DO_ACCOUNTING |
1247 EXTENT_CLEAR_UNLOCK_PAGE | 1166 EXTENT_DEFRAG, PAGE_UNLOCK |
1248 EXTENT_CLEAR_UNLOCK | 1167 PAGE_CLEAR_DIRTY |
1249 EXTENT_CLEAR_DELALLOC | 1168 PAGE_SET_WRITEBACK |
1250 EXTENT_CLEAR_DIRTY | 1169 PAGE_END_WRITEBACK);
1251 EXTENT_SET_WRITEBACK |
1252 EXTENT_END_WRITEBACK);
1253 btrfs_free_path(path); 1170 btrfs_free_path(path);
1254 return PTR_ERR(trans); 1171 return PTR_ERR(trans);
1255 } 1172 }
@@ -1369,9 +1286,9 @@ out_check:
1369 1286
1370 btrfs_release_path(path); 1287 btrfs_release_path(path);
1371 if (cow_start != (u64)-1) { 1288 if (cow_start != (u64)-1) {
1372 ret = __cow_file_range(trans, inode, root, locked_page, 1289 ret = cow_file_range(inode, locked_page,
1373 cow_start, found_key.offset - 1, 1290 cow_start, found_key.offset - 1,
1374 page_started, nr_written, 1); 1291 page_started, nr_written, 1);
1375 if (ret) { 1292 if (ret) {
1376 btrfs_abort_transaction(trans, root, ret); 1293 btrfs_abort_transaction(trans, root, ret);
1377 goto error; 1294 goto error;
@@ -1428,11 +1345,11 @@ out_check:
1428 } 1345 }
1429 } 1346 }
1430 1347
1431 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1348 extent_clear_unlock_delalloc(inode, cur_offset,
1432 cur_offset, cur_offset + num_bytes - 1, 1349 cur_offset + num_bytes - 1,
1433 locked_page, EXTENT_CLEAR_UNLOCK_PAGE | 1350 locked_page, EXTENT_LOCKED |
1434 EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | 1351 EXTENT_DELALLOC, PAGE_UNLOCK |
1435 EXTENT_SET_PRIVATE2); 1352 PAGE_SET_PRIVATE2);
1436 cur_offset = extent_end; 1353 cur_offset = extent_end;
1437 if (cur_offset > end) 1354 if (cur_offset > end)
1438 break; 1355 break;
@@ -1445,9 +1362,8 @@ out_check:
1445 } 1362 }
1446 1363
1447 if (cow_start != (u64)-1) { 1364 if (cow_start != (u64)-1) {
1448 ret = __cow_file_range(trans, inode, root, locked_page, 1365 ret = cow_file_range(inode, locked_page, cow_start, end,
1449 cow_start, end, 1366 page_started, nr_written, 1);
1450 page_started, nr_written, 1);
1451 if (ret) { 1367 if (ret) {
1452 btrfs_abort_transaction(trans, root, ret); 1368 btrfs_abort_transaction(trans, root, ret);
1453 goto error; 1369 goto error;
@@ -1460,16 +1376,13 @@ error:
1460 ret = err; 1376 ret = err;
1461 1377
1462 if (ret && cur_offset < end) 1378 if (ret && cur_offset < end)
1463 extent_clear_unlock_delalloc(inode, 1379 extent_clear_unlock_delalloc(inode, cur_offset, end,
1464 &BTRFS_I(inode)->io_tree, 1380 locked_page, EXTENT_LOCKED |
1465 cur_offset, end, locked_page, 1381 EXTENT_DELALLOC | EXTENT_DEFRAG |
1466 EXTENT_CLEAR_UNLOCK_PAGE | 1382 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1467 EXTENT_CLEAR_UNLOCK | 1383 PAGE_CLEAR_DIRTY |
1468 EXTENT_CLEAR_DELALLOC | 1384 PAGE_SET_WRITEBACK |
1469 EXTENT_CLEAR_DIRTY | 1385 PAGE_END_WRITEBACK);
1470 EXTENT_SET_WRITEBACK |
1471 EXTENT_END_WRITEBACK);
1472
1473 btrfs_free_path(path); 1386 btrfs_free_path(path);
1474 return ret; 1387 return ret;
1475} 1388}
@@ -2132,6 +2045,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2132 WARN_ON(1); 2045 WARN_ON(1);
2133 return ret; 2046 return ret;
2134 } 2047 }
2048 ret = 0;
2135 2049
2136 while (1) { 2050 while (1) {
2137 cond_resched(); 2051 cond_resched();
@@ -2181,8 +2095,6 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2181 old->len || extent_offset + num_bytes <= 2095 old->len || extent_offset + num_bytes <=
2182 old->extent_offset + old->offset) 2096 old->extent_offset + old->offset)
2183 continue; 2097 continue;
2184
2185 ret = 0;
2186 break; 2098 break;
2187 } 2099 }
2188 2100
@@ -2238,16 +2150,18 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
2238 2150
2239static int relink_is_mergable(struct extent_buffer *leaf, 2151static int relink_is_mergable(struct extent_buffer *leaf,
2240 struct btrfs_file_extent_item *fi, 2152 struct btrfs_file_extent_item *fi,
2241 u64 disk_bytenr) 2153 struct new_sa_defrag_extent *new)
2242{ 2154{
2243 if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr) 2155 if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
2244 return 0; 2156 return 0;
2245 2157
2246 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) 2158 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2247 return 0; 2159 return 0;
2248 2160
2249 if (btrfs_file_extent_compression(leaf, fi) || 2161 if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
2250 btrfs_file_extent_encryption(leaf, fi) || 2162 return 0;
2163
2164 if (btrfs_file_extent_encryption(leaf, fi) ||
2251 btrfs_file_extent_other_encoding(leaf, fi)) 2165 btrfs_file_extent_other_encoding(leaf, fi))
2252 return 0; 2166 return 0;
2253 2167
@@ -2391,8 +2305,8 @@ again:
2391 struct btrfs_file_extent_item); 2305 struct btrfs_file_extent_item);
2392 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 2306 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2393 2307
2394 if (relink_is_mergable(leaf, fi, new->bytenr) && 2308 if (extent_len + found_key.offset == start &&
2395 extent_len + found_key.offset == start) { 2309 relink_is_mergable(leaf, fi, new)) {
2396 btrfs_set_file_extent_num_bytes(leaf, fi, 2310 btrfs_set_file_extent_num_bytes(leaf, fi,
2397 extent_len + len); 2311 extent_len + len);
2398 btrfs_mark_buffer_dirty(leaf); 2312 btrfs_mark_buffer_dirty(leaf);
@@ -2648,8 +2562,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2648 struct extent_state *cached_state = NULL; 2562 struct extent_state *cached_state = NULL;
2649 struct new_sa_defrag_extent *new = NULL; 2563 struct new_sa_defrag_extent *new = NULL;
2650 int compress_type = 0; 2564 int compress_type = 0;
2651 int ret; 2565 int ret = 0;
2566 u64 logical_len = ordered_extent->len;
2652 bool nolock; 2567 bool nolock;
2568 bool truncated = false;
2653 2569
2654 nolock = btrfs_is_free_space_inode(inode); 2570 nolock = btrfs_is_free_space_inode(inode);
2655 2571
@@ -2658,6 +2574,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2658 goto out; 2574 goto out;
2659 } 2575 }
2660 2576
2577 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
2578 truncated = true;
2579 logical_len = ordered_extent->truncated_len;
2580 /* Truncated the entire extent, don't bother adding */
2581 if (!logical_len)
2582 goto out;
2583 }
2584
2661 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 2585 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2662 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ 2586 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
2663 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 2587 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -2713,15 +2637,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2713 ret = btrfs_mark_extent_written(trans, inode, 2637 ret = btrfs_mark_extent_written(trans, inode,
2714 ordered_extent->file_offset, 2638 ordered_extent->file_offset,
2715 ordered_extent->file_offset + 2639 ordered_extent->file_offset +
2716 ordered_extent->len); 2640 logical_len);
2717 } else { 2641 } else {
2718 BUG_ON(root == root->fs_info->tree_root); 2642 BUG_ON(root == root->fs_info->tree_root);
2719 ret = insert_reserved_file_extent(trans, inode, 2643 ret = insert_reserved_file_extent(trans, inode,
2720 ordered_extent->file_offset, 2644 ordered_extent->file_offset,
2721 ordered_extent->start, 2645 ordered_extent->start,
2722 ordered_extent->disk_len, 2646 ordered_extent->disk_len,
2723 ordered_extent->len, 2647 logical_len, logical_len,
2724 ordered_extent->len,
2725 compress_type, 0, 0, 2648 compress_type, 0, 0,
2726 BTRFS_FILE_EXTENT_REG); 2649 BTRFS_FILE_EXTENT_REG);
2727 } 2650 }
@@ -2753,17 +2676,27 @@ out:
2753 if (trans) 2676 if (trans)
2754 btrfs_end_transaction(trans, root); 2677 btrfs_end_transaction(trans, root);
2755 2678
2756 if (ret) { 2679 if (ret || truncated) {
2757 clear_extent_uptodate(io_tree, ordered_extent->file_offset, 2680 u64 start, end;
2758 ordered_extent->file_offset + 2681
2759 ordered_extent->len - 1, NULL, GFP_NOFS); 2682 if (truncated)
2683 start = ordered_extent->file_offset + logical_len;
2684 else
2685 start = ordered_extent->file_offset;
2686 end = ordered_extent->file_offset + ordered_extent->len - 1;
2687 clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
2688
2689 /* Drop the cache for the part of the extent we didn't write. */
2690 btrfs_drop_extent_cache(inode, start, end, 0);
2760 2691
2761 /* 2692 /*
2762 * If the ordered extent had an IOERR or something else went 2693 * If the ordered extent had an IOERR or something else went
2763 * wrong we need to return the space for this ordered extent 2694 * wrong we need to return the space for this ordered extent
2764 * back to the allocator. 2695 * back to the allocator. We only free the extent in the
2696 * truncated case if we didn't write out the extent at all.
2765 */ 2697 */
2766 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && 2698 if ((ret || !logical_len) &&
2699 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2767 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) 2700 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2768 btrfs_free_reserved_extent(root, ordered_extent->start, 2701 btrfs_free_reserved_extent(root, ordered_extent->start,
2769 ordered_extent->disk_len); 2702 ordered_extent->disk_len);
@@ -2827,16 +2760,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2827 * if there's a match, we allow the bio to finish. If not, the code in 2760 * if there's a match, we allow the bio to finish. If not, the code in
2828 * extent_io.c will try to find good copies for us. 2761 * extent_io.c will try to find good copies for us.
2829 */ 2762 */
2830static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, 2763static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
2831 struct extent_state *state, int mirror) 2764 u64 phy_offset, struct page *page,
2765 u64 start, u64 end, int mirror)
2832{ 2766{
2833 size_t offset = start - page_offset(page); 2767 size_t offset = start - page_offset(page);
2834 struct inode *inode = page->mapping->host; 2768 struct inode *inode = page->mapping->host;
2835 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2769 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2836 char *kaddr; 2770 char *kaddr;
2837 u64 private = ~(u32)0;
2838 int ret;
2839 struct btrfs_root *root = BTRFS_I(inode)->root; 2771 struct btrfs_root *root = BTRFS_I(inode)->root;
2772 u32 csum_expected;
2840 u32 csum = ~(u32)0; 2773 u32 csum = ~(u32)0;
2841 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 2774 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
2842 DEFAULT_RATELIMIT_BURST); 2775 DEFAULT_RATELIMIT_BURST);
@@ -2856,19 +2789,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2856 return 0; 2789 return 0;
2857 } 2790 }
2858 2791
2859 if (state && state->start == start) { 2792 phy_offset >>= inode->i_sb->s_blocksize_bits;
2860 private = state->private; 2793 csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
2861 ret = 0;
2862 } else {
2863 ret = get_state_private(io_tree, start, &private);
2864 }
2865 kaddr = kmap_atomic(page);
2866 if (ret)
2867 goto zeroit;
2868 2794
2795 kaddr = kmap_atomic(page);
2869 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); 2796 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1);
2870 btrfs_csum_final(csum, (char *)&csum); 2797 btrfs_csum_final(csum, (char *)&csum);
2871 if (csum != private) 2798 if (csum != csum_expected)
2872 goto zeroit; 2799 goto zeroit;
2873 2800
2874 kunmap_atomic(kaddr); 2801 kunmap_atomic(kaddr);
@@ -2877,14 +2804,12 @@ good:
2877 2804
2878zeroit: 2805zeroit:
2879 if (__ratelimit(&_rs)) 2806 if (__ratelimit(&_rs))
2880 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu", 2807 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
2881 (unsigned long long)btrfs_ino(page->mapping->host), 2808 btrfs_ino(page->mapping->host), start, csum, csum_expected);
2882 (unsigned long long)start, csum,
2883 (unsigned long long)private);
2884 memset(kaddr + offset, 1, end - start + 1); 2809 memset(kaddr + offset, 1, end - start + 1);
2885 flush_dcache_page(page); 2810 flush_dcache_page(page);
2886 kunmap_atomic(kaddr); 2811 kunmap_atomic(kaddr);
2887 if (private == 0) 2812 if (csum_expected == 0)
2888 return 0; 2813 return 0;
2889 return -EIO; 2814 return -EIO;
2890} 2815}
@@ -2971,8 +2896,10 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2971 btrfs_root_refs(&root->root_item) > 0) { 2896 btrfs_root_refs(&root->root_item) > 0) {
2972 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, 2897 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
2973 root->root_key.objectid); 2898 root->root_key.objectid);
2974 BUG_ON(ret); 2899 if (ret)
2975 root->orphan_item_inserted = 0; 2900 btrfs_abort_transaction(trans, root, ret);
2901 else
2902 root->orphan_item_inserted = 0;
2976 } 2903 }
2977 2904
2978 if (block_rsv) { 2905 if (block_rsv) {
@@ -3041,11 +2968,18 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
3041 /* insert an orphan item to track this unlinked/truncated file */ 2968 /* insert an orphan item to track this unlinked/truncated file */
3042 if (insert >= 1) { 2969 if (insert >= 1) {
3043 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2970 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
3044 if (ret && ret != -EEXIST) { 2971 if (ret) {
3045 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 2972 if (reserve) {
3046 &BTRFS_I(inode)->runtime_flags); 2973 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3047 btrfs_abort_transaction(trans, root, ret); 2974 &BTRFS_I(inode)->runtime_flags);
3048 return ret; 2975 btrfs_orphan_release_metadata(inode);
2976 }
2977 if (ret != -EEXIST) {
2978 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2979 &BTRFS_I(inode)->runtime_flags);
2980 btrfs_abort_transaction(trans, root, ret);
2981 return ret;
2982 }
3049 } 2983 }
3050 ret = 0; 2984 ret = 0;
3051 } 2985 }
@@ -3084,17 +3018,15 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3084 release_rsv = 1; 3018 release_rsv = 1;
3085 spin_unlock(&root->orphan_lock); 3019 spin_unlock(&root->orphan_lock);
3086 3020
3087 if (trans && delete_item) { 3021 if (trans && delete_item)
3088 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); 3022 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
3089 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
3090 }
3091 3023
3092 if (release_rsv) { 3024 if (release_rsv) {
3093 btrfs_orphan_release_metadata(inode); 3025 btrfs_orphan_release_metadata(inode);
3094 atomic_dec(&root->orphan_inodes); 3026 atomic_dec(&root->orphan_inodes);
3095 } 3027 }
3096 3028
3097 return 0; 3029 return ret;
3098} 3030}
3099 3031
3100/* 3032/*
@@ -3224,8 +3156,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3224 found_key.objectid); 3156 found_key.objectid);
3225 ret = btrfs_del_orphan_item(trans, root, 3157 ret = btrfs_del_orphan_item(trans, root,
3226 found_key.objectid); 3158 found_key.objectid);
3227 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
3228 btrfs_end_transaction(trans, root); 3159 btrfs_end_transaction(trans, root);
3160 if (ret)
3161 goto out;
3229 continue; 3162 continue;
3230 } 3163 }
3231 3164
@@ -3657,8 +3590,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3657 if (ret) { 3590 if (ret) {
3658 btrfs_info(root->fs_info, 3591 btrfs_info(root->fs_info,
3659 "failed to delete reference to %.*s, inode %llu parent %llu", 3592 "failed to delete reference to %.*s, inode %llu parent %llu",
3660 name_len, name, 3593 name_len, name, ino, dir_ino);
3661 (unsigned long long)ino, (unsigned long long)dir_ino);
3662 btrfs_abort_transaction(trans, root, ret); 3594 btrfs_abort_transaction(trans, root, ret);
3663 goto err; 3595 goto err;
3664 } 3596 }
@@ -3929,6 +3861,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3929 u64 extent_num_bytes = 0; 3861 u64 extent_num_bytes = 0;
3930 u64 extent_offset = 0; 3862 u64 extent_offset = 0;
3931 u64 item_end = 0; 3863 u64 item_end = 0;
3864 u64 last_size = (u64)-1;
3932 u32 found_type = (u8)-1; 3865 u32 found_type = (u8)-1;
3933 int found_extent; 3866 int found_extent;
3934 int del_item; 3867 int del_item;
@@ -4026,6 +3959,11 @@ search_again:
4026 if (found_type != BTRFS_EXTENT_DATA_KEY) 3959 if (found_type != BTRFS_EXTENT_DATA_KEY)
4027 goto delete; 3960 goto delete;
4028 3961
3962 if (del_item)
3963 last_size = found_key.offset;
3964 else
3965 last_size = new_size;
3966
4029 if (extent_type != BTRFS_FILE_EXTENT_INLINE) { 3967 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4030 u64 num_dec; 3968 u64 num_dec;
4031 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); 3969 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
@@ -4137,6 +4075,8 @@ out:
4137 btrfs_abort_transaction(trans, root, ret); 4075 btrfs_abort_transaction(trans, root, ret);
4138 } 4076 }
4139error: 4077error:
4078 if (last_size != (u64)-1)
4079 btrfs_ordered_update_i_size(inode, last_size, NULL);
4140 btrfs_free_path(path); 4080 btrfs_free_path(path);
4141 return err; 4081 return err;
4142} 4082}
@@ -4465,8 +4405,26 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4465 btrfs_inode_resume_unlocked_dio(inode); 4405 btrfs_inode_resume_unlocked_dio(inode);
4466 4406
4467 ret = btrfs_truncate(inode); 4407 ret = btrfs_truncate(inode);
4468 if (ret && inode->i_nlink) 4408 if (ret && inode->i_nlink) {
4469 btrfs_orphan_del(NULL, inode); 4409 int err;
4410
4411 /*
4412 * failed to truncate, disk_i_size is only adjusted down
4413 * as we remove extents, so it should represent the true
4414 * size of the inode, so reset the in memory size and
4415 * delete our orphan entry.
4416 */
4417 trans = btrfs_join_transaction(root);
4418 if (IS_ERR(trans)) {
4419 btrfs_orphan_del(NULL, inode);
4420 return ret;
4421 }
4422 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
4423 err = btrfs_orphan_del(trans, inode);
4424 if (err)
4425 btrfs_abort_transaction(trans, root, err);
4426 btrfs_end_transaction(trans, root);
4427 }
4470 } 4428 }
4471 4429
4472 return ret; 4430 return ret;
@@ -4601,10 +4559,15 @@ void btrfs_evict_inode(struct inode *inode)
4601 4559
4602 btrfs_free_block_rsv(root, rsv); 4560 btrfs_free_block_rsv(root, rsv);
4603 4561
4562 /*
4563 * Errors here aren't a big deal, it just means we leave orphan items
4564 * in the tree. They will be cleaned up on the next mount.
4565 */
4604 if (ret == 0) { 4566 if (ret == 0) {
4605 trans->block_rsv = root->orphan_block_rsv; 4567 trans->block_rsv = root->orphan_block_rsv;
4606 ret = btrfs_orphan_del(trans, inode); 4568 btrfs_orphan_del(trans, inode);
4607 BUG_ON(ret); 4569 } else {
4570 btrfs_orphan_del(NULL, inode);
4608 } 4571 }
4609 4572
4610 trans->block_rsv = &root->fs_info->trans_block_rsv; 4573 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -6161,10 +6124,7 @@ insert:
6161 btrfs_release_path(path); 6124 btrfs_release_path(path);
6162 if (em->start > start || extent_map_end(em) <= start) { 6125 if (em->start > start || extent_map_end(em) <= start) {
6163 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]", 6126 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
6164 (unsigned long long)em->start, 6127 em->start, em->len, start, len);
6165 (unsigned long long)em->len,
6166 (unsigned long long)start,
6167 (unsigned long long)len);
6168 err = -EIO; 6128 err = -EIO;
6169 goto out; 6129 goto out;
6170 } 6130 }
@@ -6362,39 +6322,32 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
6362 u64 start, u64 len) 6322 u64 start, u64 len)
6363{ 6323{
6364 struct btrfs_root *root = BTRFS_I(inode)->root; 6324 struct btrfs_root *root = BTRFS_I(inode)->root;
6365 struct btrfs_trans_handle *trans;
6366 struct extent_map *em; 6325 struct extent_map *em;
6367 struct btrfs_key ins; 6326 struct btrfs_key ins;
6368 u64 alloc_hint; 6327 u64 alloc_hint;
6369 int ret; 6328 int ret;
6370 6329
6371 trans = btrfs_join_transaction(root);
6372 if (IS_ERR(trans))
6373 return ERR_CAST(trans);
6374
6375 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
6376
6377 alloc_hint = get_extent_allocation_hint(inode, start, len); 6330 alloc_hint = get_extent_allocation_hint(inode, start, len);
6378 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, 6331 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
6379 alloc_hint, &ins, 1); 6332 alloc_hint, &ins, 1);
6380 if (ret) { 6333 if (ret)
6381 em = ERR_PTR(ret); 6334 return ERR_PTR(ret);
6382 goto out;
6383 }
6384 6335
6385 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 6336 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
6386 ins.offset, ins.offset, ins.offset, 0); 6337 ins.offset, ins.offset, ins.offset, 0);
6387 if (IS_ERR(em)) 6338 if (IS_ERR(em)) {
6388 goto out; 6339 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
6340 return em;
6341 }
6389 6342
6390 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, 6343 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
6391 ins.offset, ins.offset, 0); 6344 ins.offset, ins.offset, 0);
6392 if (ret) { 6345 if (ret) {
6393 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 6346 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
6394 em = ERR_PTR(ret); 6347 free_extent_map(em);
6348 return ERR_PTR(ret);
6395 } 6349 }
6396out: 6350
6397 btrfs_end_transaction(trans, root);
6398 return em; 6351 return em;
6399} 6352}
6400 6353
@@ -6402,11 +6355,11 @@ out:
6402 * returns 1 when the nocow is safe, < 1 on error, 0 if the 6355 * returns 1 when the nocow is safe, < 1 on error, 0 if the
6403 * block must be cow'd 6356 * block must be cow'd
6404 */ 6357 */
6405noinline int can_nocow_extent(struct btrfs_trans_handle *trans, 6358noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6406 struct inode *inode, u64 offset, u64 *len,
6407 u64 *orig_start, u64 *orig_block_len, 6359 u64 *orig_start, u64 *orig_block_len,
6408 u64 *ram_bytes) 6360 u64 *ram_bytes)
6409{ 6361{
6362 struct btrfs_trans_handle *trans;
6410 struct btrfs_path *path; 6363 struct btrfs_path *path;
6411 int ret; 6364 int ret;
6412 struct extent_buffer *leaf; 6365 struct extent_buffer *leaf;
@@ -6424,7 +6377,7 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6424 if (!path) 6377 if (!path)
6425 return -ENOMEM; 6378 return -ENOMEM;
6426 6379
6427 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), 6380 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
6428 offset, 0); 6381 offset, 0);
6429 if (ret < 0) 6382 if (ret < 0)
6430 goto out; 6383 goto out;
@@ -6489,9 +6442,19 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6489 * look for other files referencing this extent, if we 6442 * look for other files referencing this extent, if we
6490 * find any we must cow 6443 * find any we must cow
6491 */ 6444 */
6492 if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), 6445 trans = btrfs_join_transaction(root);
6493 key.offset - backref_offset, disk_bytenr)) 6446 if (IS_ERR(trans)) {
6447 ret = 0;
6494 goto out; 6448 goto out;
6449 }
6450
6451 ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
6452 key.offset - backref_offset, disk_bytenr);
6453 btrfs_end_transaction(trans, root);
6454 if (ret) {
6455 ret = 0;
6456 goto out;
6457 }
6495 6458
6496 /* 6459 /*
6497 * adjust disk_bytenr and num_bytes to cover just the bytes 6460 * adjust disk_bytenr and num_bytes to cover just the bytes
@@ -6633,7 +6596,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6633 u64 start = iblock << inode->i_blkbits; 6596 u64 start = iblock << inode->i_blkbits;
6634 u64 lockstart, lockend; 6597 u64 lockstart, lockend;
6635 u64 len = bh_result->b_size; 6598 u64 len = bh_result->b_size;
6636 struct btrfs_trans_handle *trans;
6637 int unlock_bits = EXTENT_LOCKED; 6599 int unlock_bits = EXTENT_LOCKED;
6638 int ret = 0; 6600 int ret = 0;
6639 6601
@@ -6715,16 +6677,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6715 len = min(len, em->len - (start - em->start)); 6677 len = min(len, em->len - (start - em->start));
6716 block_start = em->block_start + (start - em->start); 6678 block_start = em->block_start + (start - em->start);
6717 6679
6718 /* 6680 if (can_nocow_extent(inode, start, &len, &orig_start,
6719 * we're not going to log anything, but we do need
6720 * to make sure the current transaction stays open
6721 * while we look for nocow cross refs
6722 */
6723 trans = btrfs_join_transaction(root);
6724 if (IS_ERR(trans))
6725 goto must_cow;
6726
6727 if (can_nocow_extent(trans, inode, start, &len, &orig_start,
6728 &orig_block_len, &ram_bytes) == 1) { 6681 &orig_block_len, &ram_bytes) == 1) {
6729 if (type == BTRFS_ORDERED_PREALLOC) { 6682 if (type == BTRFS_ORDERED_PREALLOC) {
6730 free_extent_map(em); 6683 free_extent_map(em);
@@ -6733,24 +6686,20 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6733 block_start, len, 6686 block_start, len,
6734 orig_block_len, 6687 orig_block_len,
6735 ram_bytes, type); 6688 ram_bytes, type);
6736 if (IS_ERR(em)) { 6689 if (IS_ERR(em))
6737 btrfs_end_transaction(trans, root);
6738 goto unlock_err; 6690 goto unlock_err;
6739 }
6740 } 6691 }
6741 6692
6742 ret = btrfs_add_ordered_extent_dio(inode, start, 6693 ret = btrfs_add_ordered_extent_dio(inode, start,
6743 block_start, len, len, type); 6694 block_start, len, len, type);
6744 btrfs_end_transaction(trans, root);
6745 if (ret) { 6695 if (ret) {
6746 free_extent_map(em); 6696 free_extent_map(em);
6747 goto unlock_err; 6697 goto unlock_err;
6748 } 6698 }
6749 goto unlock; 6699 goto unlock;
6750 } 6700 }
6751 btrfs_end_transaction(trans, root);
6752 } 6701 }
6753must_cow: 6702
6754 /* 6703 /*
6755 * this will cow the extent, reset the len in case we changed 6704 * this will cow the extent, reset the len in case we changed
6756 * it above 6705 * it above
@@ -6813,26 +6762,6 @@ unlock_err:
6813 return ret; 6762 return ret;
6814} 6763}
6815 6764
6816struct btrfs_dio_private {
6817 struct inode *inode;
6818 u64 logical_offset;
6819 u64 disk_bytenr;
6820 u64 bytes;
6821 void *private;
6822
6823 /* number of bios pending for this dio */
6824 atomic_t pending_bios;
6825
6826 /* IO errors */
6827 int errors;
6828
6829 /* orig_bio is our btrfs_io_bio */
6830 struct bio *orig_bio;
6831
6832 /* dio_bio came from fs/direct-io.c */
6833 struct bio *dio_bio;
6834};
6835
6836static void btrfs_endio_direct_read(struct bio *bio, int err) 6765static void btrfs_endio_direct_read(struct bio *bio, int err)
6837{ 6766{
6838 struct btrfs_dio_private *dip = bio->bi_private; 6767 struct btrfs_dio_private *dip = bio->bi_private;
@@ -6841,6 +6770,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6841 struct inode *inode = dip->inode; 6770 struct inode *inode = dip->inode;
6842 struct btrfs_root *root = BTRFS_I(inode)->root; 6771 struct btrfs_root *root = BTRFS_I(inode)->root;
6843 struct bio *dio_bio; 6772 struct bio *dio_bio;
6773 u32 *csums = (u32 *)dip->csum;
6774 int index = 0;
6844 u64 start; 6775 u64 start;
6845 6776
6846 start = dip->logical_offset; 6777 start = dip->logical_offset;
@@ -6849,12 +6780,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6849 struct page *page = bvec->bv_page; 6780 struct page *page = bvec->bv_page;
6850 char *kaddr; 6781 char *kaddr;
6851 u32 csum = ~(u32)0; 6782 u32 csum = ~(u32)0;
6852 u64 private = ~(u32)0;
6853 unsigned long flags; 6783 unsigned long flags;
6854 6784
6855 if (get_state_private(&BTRFS_I(inode)->io_tree,
6856 start, &private))
6857 goto failed;
6858 local_irq_save(flags); 6785 local_irq_save(flags);
6859 kaddr = kmap_atomic(page); 6786 kaddr = kmap_atomic(page);
6860 csum = btrfs_csum_data(kaddr + bvec->bv_offset, 6787 csum = btrfs_csum_data(kaddr + bvec->bv_offset,
@@ -6864,18 +6791,17 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6864 local_irq_restore(flags); 6791 local_irq_restore(flags);
6865 6792
6866 flush_dcache_page(bvec->bv_page); 6793 flush_dcache_page(bvec->bv_page);
6867 if (csum != private) { 6794 if (csum != csums[index]) {
6868failed: 6795 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
6869 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u", 6796 btrfs_ino(inode), start, csum,
6870 (unsigned long long)btrfs_ino(inode), 6797 csums[index]);
6871 (unsigned long long)start,
6872 csum, (unsigned)private);
6873 err = -EIO; 6798 err = -EIO;
6874 } 6799 }
6875 } 6800 }
6876 6801
6877 start += bvec->bv_len; 6802 start += bvec->bv_len;
6878 bvec++; 6803 bvec++;
6804 index++;
6879 } while (bvec <= bvec_end); 6805 } while (bvec <= bvec_end);
6880 6806
6881 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 6807 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@ -6956,7 +6882,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
6956 if (err) { 6882 if (err) {
6957 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " 6883 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
6958 "sector %#Lx len %u err no %d\n", 6884 "sector %#Lx len %u err no %d\n",
6959 (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw, 6885 btrfs_ino(dip->inode), bio->bi_rw,
6960 (unsigned long long)bio->bi_sector, bio->bi_size, err); 6886 (unsigned long long)bio->bi_sector, bio->bi_size, err);
6961 dip->errors = 1; 6887 dip->errors = 1;
6962 6888
@@ -6992,6 +6918,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6992 int rw, u64 file_offset, int skip_sum, 6918 int rw, u64 file_offset, int skip_sum,
6993 int async_submit) 6919 int async_submit)
6994{ 6920{
6921 struct btrfs_dio_private *dip = bio->bi_private;
6995 int write = rw & REQ_WRITE; 6922 int write = rw & REQ_WRITE;
6996 struct btrfs_root *root = BTRFS_I(inode)->root; 6923 struct btrfs_root *root = BTRFS_I(inode)->root;
6997 int ret; 6924 int ret;
@@ -7026,7 +6953,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
7026 if (ret) 6953 if (ret)
7027 goto err; 6954 goto err;
7028 } else if (!skip_sum) { 6955 } else if (!skip_sum) {
7029 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); 6956 ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
6957 file_offset);
7030 if (ret) 6958 if (ret)
7031 goto err; 6959 goto err;
7032 } 6960 }
@@ -7061,6 +6989,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7061 bio_put(orig_bio); 6989 bio_put(orig_bio);
7062 return -EIO; 6990 return -EIO;
7063 } 6991 }
6992
7064 if (map_length >= orig_bio->bi_size) { 6993 if (map_length >= orig_bio->bi_size) {
7065 bio = orig_bio; 6994 bio = orig_bio;
7066 goto submit; 6995 goto submit;
@@ -7156,19 +7085,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7156 struct btrfs_dio_private *dip; 7085 struct btrfs_dio_private *dip;
7157 struct bio *io_bio; 7086 struct bio *io_bio;
7158 int skip_sum; 7087 int skip_sum;
7088 int sum_len;
7159 int write = rw & REQ_WRITE; 7089 int write = rw & REQ_WRITE;
7160 int ret = 0; 7090 int ret = 0;
7091 u16 csum_size;
7161 7092
7162 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 7093 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
7163 7094
7164 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); 7095 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
7165
7166 if (!io_bio) { 7096 if (!io_bio) {
7167 ret = -ENOMEM; 7097 ret = -ENOMEM;
7168 goto free_ordered; 7098 goto free_ordered;
7169 } 7099 }
7170 7100
7171 dip = kmalloc(sizeof(*dip), GFP_NOFS); 7101 if (!skip_sum && !write) {
7102 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7103 sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
7104 sum_len *= csum_size;
7105 } else {
7106 sum_len = 0;
7107 }
7108
7109 dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
7172 if (!dip) { 7110 if (!dip) {
7173 ret = -ENOMEM; 7111 ret = -ENOMEM;
7174 goto free_io_bio; 7112 goto free_io_bio;
@@ -7443,10 +7381,23 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7443 * whoever cleared the private bit is responsible 7381 * whoever cleared the private bit is responsible
7444 * for the finish_ordered_io 7382 * for the finish_ordered_io
7445 */ 7383 */
7446 if (TestClearPagePrivate2(page) && 7384 if (TestClearPagePrivate2(page)) {
7447 btrfs_dec_test_ordered_pending(inode, &ordered, page_start, 7385 struct btrfs_ordered_inode_tree *tree;
7448 PAGE_CACHE_SIZE, 1)) { 7386 u64 new_len;
7449 btrfs_finish_ordered_io(ordered); 7387
7388 tree = &BTRFS_I(inode)->ordered_tree;
7389
7390 spin_lock_irq(&tree->lock);
7391 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
7392 new_len = page_start - ordered->file_offset;
7393 if (new_len < ordered->truncated_len)
7394 ordered->truncated_len = new_len;
7395 spin_unlock_irq(&tree->lock);
7396
7397 if (btrfs_dec_test_ordered_pending(inode, &ordered,
7398 page_start,
7399 PAGE_CACHE_SIZE, 1))
7400 btrfs_finish_ordered_io(ordered);
7450 } 7401 }
7451 btrfs_put_ordered_extent(ordered); 7402 btrfs_put_ordered_extent(ordered);
7452 cached_state = NULL; 7403 cached_state = NULL;
@@ -7612,7 +7563,6 @@ static int btrfs_truncate(struct inode *inode)
7612 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 7563 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
7613 7564
7614 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 7565 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
7615 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
7616 7566
7617 /* 7567 /*
7618 * Yes ladies and gentelment, this is indeed ugly. The fact is we have 7568 * Yes ladies and gentelment, this is indeed ugly. The fact is we have
@@ -7876,7 +7826,7 @@ void btrfs_destroy_inode(struct inode *inode)
7876 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 7826 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
7877 &BTRFS_I(inode)->runtime_flags)) { 7827 &BTRFS_I(inode)->runtime_flags)) {
7878 btrfs_info(root->fs_info, "inode %llu still on the orphan list", 7828 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
7879 (unsigned long long)btrfs_ino(inode)); 7829 btrfs_ino(inode));
7880 atomic_dec(&root->orphan_inodes); 7830 atomic_dec(&root->orphan_inodes);
7881 } 7831 }
7882 7832
@@ -7886,8 +7836,7 @@ void btrfs_destroy_inode(struct inode *inode)
7886 break; 7836 break;
7887 else { 7837 else {
7888 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup", 7838 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
7889 (unsigned long long)ordered->file_offset, 7839 ordered->file_offset, ordered->len);
7890 (unsigned long long)ordered->len);
7891 btrfs_remove_ordered_extent(inode, ordered); 7840 btrfs_remove_ordered_extent(inode, ordered);
7892 btrfs_put_ordered_extent(ordered); 7841 btrfs_put_ordered_extent(ordered);
7893 btrfs_put_ordered_extent(ordered); 7842 btrfs_put_ordered_extent(ordered);
@@ -8161,10 +8110,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8161 new_dentry->d_name.name, 8110 new_dentry->d_name.name,
8162 new_dentry->d_name.len); 8111 new_dentry->d_name.len);
8163 } 8112 }
8164 if (!ret && new_inode->i_nlink == 0) { 8113 if (!ret && new_inode->i_nlink == 0)
8165 ret = btrfs_orphan_add(trans, new_dentry->d_inode); 8114 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
8166 BUG_ON(ret);
8167 }
8168 if (ret) { 8115 if (ret) {
8169 btrfs_abort_transaction(trans, root, ret); 8116 btrfs_abort_transaction(trans, root, ret);
8170 goto out_fail; 8117 goto out_fail;
@@ -8525,8 +8472,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8525 8472
8526 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); 8473 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
8527 cur_bytes = max(cur_bytes, min_size); 8474 cur_bytes = max(cur_bytes, min_size);
8528 ret = btrfs_reserve_extent(trans, root, cur_bytes, 8475 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
8529 min_size, 0, *alloc_hint, &ins, 1); 8476 *alloc_hint, &ins, 1);
8530 if (ret) { 8477 if (ret) {
8531 if (own_trans) 8478 if (own_trans)
8532 btrfs_end_transaction(trans, root); 8479 btrfs_end_transaction(trans, root);