aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/Kconfig9
-rw-r--r--fs/btrfs/Makefile5
-rw-r--r--fs/btrfs/backref.c93
-rw-r--r--fs/btrfs/backref.h2
-rw-r--r--fs/btrfs/btrfs_inode.h21
-rw-r--r--fs/btrfs/check-integrity.c422
-rw-r--r--fs/btrfs/compression.c11
-rw-r--r--fs/btrfs/ctree.c289
-rw-r--r--fs/btrfs/ctree.h161
-rw-r--r--fs/btrfs/delayed-inode.c46
-rw-r--r--fs/btrfs/delayed-ref.c8
-rw-r--r--fs/btrfs/dev-replace.c4
-rw-r--r--fs/btrfs/disk-io.c174
-rw-r--r--fs/btrfs/extent-tree.c184
-rw-r--r--fs/btrfs/extent_io.c664
-rw-r--r--fs/btrfs/extent_io.h35
-rw-r--r--fs/btrfs/file-item.c85
-rw-r--r--fs/btrfs/file.c11
-rw-r--r--fs/btrfs/free-space-cache.c525
-rw-r--r--fs/btrfs/free-space-cache.h11
-rw-r--r--fs/btrfs/inode.c615
-rw-r--r--fs/btrfs/ioctl.c745
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/ordered-data.c28
-rw-r--r--fs/btrfs/ordered-data.h7
-rw-r--r--fs/btrfs/print-tree.c107
-rw-r--r--fs/btrfs/qgroup.c69
-rw-r--r--fs/btrfs/raid56.c14
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/root-tree.c21
-rw-r--r--fs/btrfs/scrub.c42
-rw-r--r--fs/btrfs/send.c240
-rw-r--r--fs/btrfs/super.c145
-rw-r--r--fs/btrfs/tests/btrfs-tests.h34
-rw-r--r--fs/btrfs/tests/free-space-tests.c395
-rw-r--r--fs/btrfs/transaction.c34
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c19
-rw-r--r--fs/btrfs/uuid-tree.c358
-rw-r--r--fs/btrfs/volumes.c613
-rw-r--r--fs/btrfs/volumes.h12
-rw-r--r--include/trace/events/btrfs.h60
-rw-r--r--include/uapi/linux/btrfs.h30
43 files changed, 4045 insertions, 2352 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 2b3b83296977..398cbd517be2 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -72,3 +72,12 @@ config BTRFS_DEBUG
72 performance, or export extra information via sysfs. 72 performance, or export extra information via sysfs.
73 73
74 If unsure, say N. 74 If unsure, say N.
75
76config BTRFS_ASSERT
77 bool "Btrfs assert support"
78 depends on BTRFS_FS
79 help
80 Enable run-time assertion checking. This will result in panics if
81 any of the assertions trip. This is meant for btrfs developers only.
82
83 If unsure, say N.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 3932224f99e9..a91a6a355cc5 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,7 +8,10 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o 11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
12 uuid-tree.o
12 13
13btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o 14btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
14btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o 15btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
16
17btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8bc5e8ccb091..0552a599b28f 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -119,6 +119,26 @@ struct __prelim_ref {
119 u64 wanted_disk_byte; 119 u64 wanted_disk_byte;
120}; 120};
121 121
122static struct kmem_cache *btrfs_prelim_ref_cache;
123
124int __init btrfs_prelim_ref_init(void)
125{
126 btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
127 sizeof(struct __prelim_ref),
128 0,
129 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
130 NULL);
131 if (!btrfs_prelim_ref_cache)
132 return -ENOMEM;
133 return 0;
134}
135
136void btrfs_prelim_ref_exit(void)
137{
138 if (btrfs_prelim_ref_cache)
139 kmem_cache_destroy(btrfs_prelim_ref_cache);
140}
141
122/* 142/*
123 * the rules for all callers of this function are: 143 * the rules for all callers of this function are:
124 * - obtaining the parent is the goal 144 * - obtaining the parent is the goal
@@ -160,12 +180,12 @@ struct __prelim_ref {
160 180
161static int __add_prelim_ref(struct list_head *head, u64 root_id, 181static int __add_prelim_ref(struct list_head *head, u64 root_id,
162 struct btrfs_key *key, int level, 182 struct btrfs_key *key, int level,
163 u64 parent, u64 wanted_disk_byte, int count) 183 u64 parent, u64 wanted_disk_byte, int count,
184 gfp_t gfp_mask)
164{ 185{
165 struct __prelim_ref *ref; 186 struct __prelim_ref *ref;
166 187
167 /* in case we're adding delayed refs, we're holding the refs spinlock */ 188 ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
168 ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
169 if (!ref) 189 if (!ref)
170 return -ENOMEM; 190 return -ENOMEM;
171 191
@@ -295,10 +315,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
295 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); 315 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
296 pr_debug("search slot in root %llu (level %d, ref count %d) returned " 316 pr_debug("search slot in root %llu (level %d, ref count %d) returned "
297 "%d for key (%llu %u %llu)\n", 317 "%d for key (%llu %u %llu)\n",
298 (unsigned long long)ref->root_id, level, ref->count, ret, 318 ref->root_id, level, ref->count, ret,
299 (unsigned long long)ref->key_for_search.objectid, 319 ref->key_for_search.objectid, ref->key_for_search.type,
300 ref->key_for_search.type, 320 ref->key_for_search.offset);
301 (unsigned long long)ref->key_for_search.offset);
302 if (ret < 0) 321 if (ret < 0)
303 goto out; 322 goto out;
304 323
@@ -365,11 +384,12 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
365 node = ulist_next(parents, &uiter); 384 node = ulist_next(parents, &uiter);
366 ref->parent = node ? node->val : 0; 385 ref->parent = node ? node->val : 0;
367 ref->inode_list = node ? 386 ref->inode_list = node ?
368 (struct extent_inode_elem *)(uintptr_t)node->aux : 0; 387 (struct extent_inode_elem *)(uintptr_t)node->aux : NULL;
369 388
370 /* additional parents require new refs being added here */ 389 /* additional parents require new refs being added here */
371 while ((node = ulist_next(parents, &uiter))) { 390 while ((node = ulist_next(parents, &uiter))) {
372 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); 391 new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
392 GFP_NOFS);
373 if (!new_ref) { 393 if (!new_ref) {
374 ret = -ENOMEM; 394 ret = -ENOMEM;
375 goto out; 395 goto out;
@@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode)
493 ref1->count += ref2->count; 513 ref1->count += ref2->count;
494 514
495 list_del(&ref2->list); 515 list_del(&ref2->list);
496 kfree(ref2); 516 kmem_cache_free(btrfs_prelim_ref_cache, ref2);
497 } 517 }
498 518
499 } 519 }
@@ -548,7 +568,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
548 ref = btrfs_delayed_node_to_tree_ref(node); 568 ref = btrfs_delayed_node_to_tree_ref(node);
549 ret = __add_prelim_ref(prefs, ref->root, &op_key, 569 ret = __add_prelim_ref(prefs, ref->root, &op_key,
550 ref->level + 1, 0, node->bytenr, 570 ref->level + 1, 0, node->bytenr,
551 node->ref_mod * sgn); 571 node->ref_mod * sgn, GFP_ATOMIC);
552 break; 572 break;
553 } 573 }
554 case BTRFS_SHARED_BLOCK_REF_KEY: { 574 case BTRFS_SHARED_BLOCK_REF_KEY: {
@@ -558,7 +578,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
558 ret = __add_prelim_ref(prefs, ref->root, NULL, 578 ret = __add_prelim_ref(prefs, ref->root, NULL,
559 ref->level + 1, ref->parent, 579 ref->level + 1, ref->parent,
560 node->bytenr, 580 node->bytenr,
561 node->ref_mod * sgn); 581 node->ref_mod * sgn, GFP_ATOMIC);
562 break; 582 break;
563 } 583 }
564 case BTRFS_EXTENT_DATA_REF_KEY: { 584 case BTRFS_EXTENT_DATA_REF_KEY: {
@@ -570,7 +590,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
570 key.offset = ref->offset; 590 key.offset = ref->offset;
571 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, 591 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0,
572 node->bytenr, 592 node->bytenr,
573 node->ref_mod * sgn); 593 node->ref_mod * sgn, GFP_ATOMIC);
574 break; 594 break;
575 } 595 }
576 case BTRFS_SHARED_DATA_REF_KEY: { 596 case BTRFS_SHARED_DATA_REF_KEY: {
@@ -583,7 +603,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
583 key.offset = ref->offset; 603 key.offset = ref->offset;
584 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 604 ret = __add_prelim_ref(prefs, ref->root, &key, 0,
585 ref->parent, node->bytenr, 605 ref->parent, node->bytenr,
586 node->ref_mod * sgn); 606 node->ref_mod * sgn, GFP_ATOMIC);
587 break; 607 break;
588 } 608 }
589 default: 609 default:
@@ -657,7 +677,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
657 case BTRFS_SHARED_BLOCK_REF_KEY: 677 case BTRFS_SHARED_BLOCK_REF_KEY:
658 ret = __add_prelim_ref(prefs, 0, NULL, 678 ret = __add_prelim_ref(prefs, 0, NULL,
659 *info_level + 1, offset, 679 *info_level + 1, offset,
660 bytenr, 1); 680 bytenr, 1, GFP_NOFS);
661 break; 681 break;
662 case BTRFS_SHARED_DATA_REF_KEY: { 682 case BTRFS_SHARED_DATA_REF_KEY: {
663 struct btrfs_shared_data_ref *sdref; 683 struct btrfs_shared_data_ref *sdref;
@@ -666,13 +686,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
666 sdref = (struct btrfs_shared_data_ref *)(iref + 1); 686 sdref = (struct btrfs_shared_data_ref *)(iref + 1);
667 count = btrfs_shared_data_ref_count(leaf, sdref); 687 count = btrfs_shared_data_ref_count(leaf, sdref);
668 ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, 688 ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
669 bytenr, count); 689 bytenr, count, GFP_NOFS);
670 break; 690 break;
671 } 691 }
672 case BTRFS_TREE_BLOCK_REF_KEY: 692 case BTRFS_TREE_BLOCK_REF_KEY:
673 ret = __add_prelim_ref(prefs, offset, NULL, 693 ret = __add_prelim_ref(prefs, offset, NULL,
674 *info_level + 1, 0, 694 *info_level + 1, 0,
675 bytenr, 1); 695 bytenr, 1, GFP_NOFS);
676 break; 696 break;
677 case BTRFS_EXTENT_DATA_REF_KEY: { 697 case BTRFS_EXTENT_DATA_REF_KEY: {
678 struct btrfs_extent_data_ref *dref; 698 struct btrfs_extent_data_ref *dref;
@@ -687,7 +707,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
687 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 707 key.offset = btrfs_extent_data_ref_offset(leaf, dref);
688 root = btrfs_extent_data_ref_root(leaf, dref); 708 root = btrfs_extent_data_ref_root(leaf, dref);
689 ret = __add_prelim_ref(prefs, root, &key, 0, 0, 709 ret = __add_prelim_ref(prefs, root, &key, 0, 0,
690 bytenr, count); 710 bytenr, count, GFP_NOFS);
691 break; 711 break;
692 } 712 }
693 default: 713 default:
@@ -738,7 +758,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
738 case BTRFS_SHARED_BLOCK_REF_KEY: 758 case BTRFS_SHARED_BLOCK_REF_KEY:
739 ret = __add_prelim_ref(prefs, 0, NULL, 759 ret = __add_prelim_ref(prefs, 0, NULL,
740 info_level + 1, key.offset, 760 info_level + 1, key.offset,
741 bytenr, 1); 761 bytenr, 1, GFP_NOFS);
742 break; 762 break;
743 case BTRFS_SHARED_DATA_REF_KEY: { 763 case BTRFS_SHARED_DATA_REF_KEY: {
744 struct btrfs_shared_data_ref *sdref; 764 struct btrfs_shared_data_ref *sdref;
@@ -748,13 +768,13 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
748 struct btrfs_shared_data_ref); 768 struct btrfs_shared_data_ref);
749 count = btrfs_shared_data_ref_count(leaf, sdref); 769 count = btrfs_shared_data_ref_count(leaf, sdref);
750 ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, 770 ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
751 bytenr, count); 771 bytenr, count, GFP_NOFS);
752 break; 772 break;
753 } 773 }
754 case BTRFS_TREE_BLOCK_REF_KEY: 774 case BTRFS_TREE_BLOCK_REF_KEY:
755 ret = __add_prelim_ref(prefs, key.offset, NULL, 775 ret = __add_prelim_ref(prefs, key.offset, NULL,
756 info_level + 1, 0, 776 info_level + 1, 0,
757 bytenr, 1); 777 bytenr, 1, GFP_NOFS);
758 break; 778 break;
759 case BTRFS_EXTENT_DATA_REF_KEY: { 779 case BTRFS_EXTENT_DATA_REF_KEY: {
760 struct btrfs_extent_data_ref *dref; 780 struct btrfs_extent_data_ref *dref;
@@ -770,7 +790,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
770 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 790 key.offset = btrfs_extent_data_ref_offset(leaf, dref);
771 root = btrfs_extent_data_ref_root(leaf, dref); 791 root = btrfs_extent_data_ref_root(leaf, dref);
772 ret = __add_prelim_ref(prefs, root, &key, 0, 0, 792 ret = __add_prelim_ref(prefs, root, &key, 0, 0,
773 bytenr, count); 793 bytenr, count, GFP_NOFS);
774 break; 794 break;
775 } 795 }
776 default: 796 default:
@@ -911,7 +931,6 @@ again:
911 931
912 while (!list_empty(&prefs)) { 932 while (!list_empty(&prefs)) {
913 ref = list_first_entry(&prefs, struct __prelim_ref, list); 933 ref = list_first_entry(&prefs, struct __prelim_ref, list);
914 list_del(&ref->list);
915 WARN_ON(ref->count < 0); 934 WARN_ON(ref->count < 0);
916 if (ref->count && ref->root_id && ref->parent == 0) { 935 if (ref->count && ref->root_id && ref->parent == 0) {
917 /* no parent == root of tree */ 936 /* no parent == root of tree */
@@ -935,8 +954,10 @@ again:
935 } 954 }
936 ret = find_extent_in_eb(eb, bytenr, 955 ret = find_extent_in_eb(eb, bytenr,
937 *extent_item_pos, &eie); 956 *extent_item_pos, &eie);
938 ref->inode_list = eie;
939 free_extent_buffer(eb); 957 free_extent_buffer(eb);
958 if (ret < 0)
959 goto out;
960 ref->inode_list = eie;
940 } 961 }
941 ret = ulist_add_merge(refs, ref->parent, 962 ret = ulist_add_merge(refs, ref->parent,
942 (uintptr_t)ref->inode_list, 963 (uintptr_t)ref->inode_list,
@@ -954,7 +975,8 @@ again:
954 eie->next = ref->inode_list; 975 eie->next = ref->inode_list;
955 } 976 }
956 } 977 }
957 kfree(ref); 978 list_del(&ref->list);
979 kmem_cache_free(btrfs_prelim_ref_cache, ref);
958 } 980 }
959 981
960out: 982out:
@@ -962,13 +984,13 @@ out:
962 while (!list_empty(&prefs)) { 984 while (!list_empty(&prefs)) {
963 ref = list_first_entry(&prefs, struct __prelim_ref, list); 985 ref = list_first_entry(&prefs, struct __prelim_ref, list);
964 list_del(&ref->list); 986 list_del(&ref->list);
965 kfree(ref); 987 kmem_cache_free(btrfs_prelim_ref_cache, ref);
966 } 988 }
967 while (!list_empty(&prefs_delayed)) { 989 while (!list_empty(&prefs_delayed)) {
968 ref = list_first_entry(&prefs_delayed, struct __prelim_ref, 990 ref = list_first_entry(&prefs_delayed, struct __prelim_ref,
969 list); 991 list);
970 list_del(&ref->list); 992 list_del(&ref->list);
971 kfree(ref); 993 kmem_cache_free(btrfs_prelim_ref_cache, ref);
972 } 994 }
973 995
974 return ret; 996 return ret;
@@ -1326,8 +1348,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
1326 found_key->type != BTRFS_METADATA_ITEM_KEY) || 1348 found_key->type != BTRFS_METADATA_ITEM_KEY) ||
1327 found_key->objectid > logical || 1349 found_key->objectid > logical ||
1328 found_key->objectid + size <= logical) { 1350 found_key->objectid + size <= logical) {
1329 pr_debug("logical %llu is not within any extent\n", 1351 pr_debug("logical %llu is not within any extent\n", logical);
1330 (unsigned long long)logical);
1331 return -ENOENT; 1352 return -ENOENT;
1332 } 1353 }
1333 1354
@@ -1340,11 +1361,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
1340 1361
1341 pr_debug("logical %llu is at position %llu within the extent (%llu " 1362 pr_debug("logical %llu is at position %llu within the extent (%llu "
1342 "EXTENT_ITEM %llu) flags %#llx size %u\n", 1363 "EXTENT_ITEM %llu) flags %#llx size %u\n",
1343 (unsigned long long)logical, 1364 logical, logical - found_key->objectid, found_key->objectid,
1344 (unsigned long long)(logical - found_key->objectid), 1365 found_key->offset, flags, item_size);
1345 (unsigned long long)found_key->objectid,
1346 (unsigned long long)found_key->offset,
1347 (unsigned long long)flags, item_size);
1348 1366
1349 WARN_ON(!flags_ret); 1367 WARN_ON(!flags_ret);
1350 if (flags_ret) { 1368 if (flags_ret) {
@@ -1516,7 +1534,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1516 while (!ret && (root_node = ulist_next(roots, &root_uiter))) { 1534 while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
1517 pr_debug("root %llu references leaf %llu, data list " 1535 pr_debug("root %llu references leaf %llu, data list "
1518 "%#llx\n", root_node->val, ref_node->val, 1536 "%#llx\n", root_node->val, ref_node->val,
1519 (long long)ref_node->aux); 1537 ref_node->aux);
1520 ret = iterate_leaf_refs((struct extent_inode_elem *) 1538 ret = iterate_leaf_refs((struct extent_inode_elem *)
1521 (uintptr_t)ref_node->aux, 1539 (uintptr_t)ref_node->aux,
1522 root_node->val, 1540 root_node->val,
@@ -1608,9 +1626,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
1608 name_len = btrfs_inode_ref_name_len(eb, iref); 1626 name_len = btrfs_inode_ref_name_len(eb, iref);
1609 /* path must be released before calling iterate()! */ 1627 /* path must be released before calling iterate()! */
1610 pr_debug("following ref at offset %u for inode %llu in " 1628 pr_debug("following ref at offset %u for inode %llu in "
1611 "tree %llu\n", cur, 1629 "tree %llu\n", cur, found_key.objectid,
1612 (unsigned long long)found_key.objectid, 1630 fs_root->objectid);
1613 (unsigned long long)fs_root->objectid);
1614 ret = iterate(parent, name_len, 1631 ret = iterate(parent, name_len,
1615 (unsigned long)(iref + 1), eb, ctx); 1632 (unsigned long)(iref + 1), eb, ctx);
1616 if (ret) 1633 if (ret)
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 8f2e76702932..a910b27a8ad9 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
72 struct btrfs_inode_extref **ret_extref, 72 struct btrfs_inode_extref **ret_extref,
73 u64 *found_off); 73 u64 *found_off);
74 74
75int __init btrfs_prelim_ref_init(void);
76void btrfs_prelim_ref_exit(void);
75#endif 77#endif
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 08b286b2a2c5..d0ae226926ee 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -218,6 +218,27 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
218 return 0; 218 return 0;
219} 219}
220 220
221struct btrfs_dio_private {
222 struct inode *inode;
223 u64 logical_offset;
224 u64 disk_bytenr;
225 u64 bytes;
226 void *private;
227
228 /* number of bios pending for this dio */
229 atomic_t pending_bios;
230
231 /* IO errors */
232 int errors;
233
234 /* orig_bio is our btrfs_io_bio */
235 struct bio *orig_bio;
236
237 /* dio_bio came from fs/direct-io.c */
238 struct bio *dio_bio;
239 u8 csum[0];
240};
241
221/* 242/*
222 * Disable DIO read nolock optimization, so new dio readers will be forced 243 * Disable DIO read nolock optimization, so new dio readers will be forced
223 * to grab i_mutex. It is used to avoid the endless truncate due to 244 * to grab i_mutex. It is used to avoid the endless truncate due to
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 1431a6965017..1c47be187240 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -701,15 +701,13 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
701 next_bytenr = btrfs_super_root(selected_super); 701 next_bytenr = btrfs_super_root(selected_super);
702 if (state->print_mask & 702 if (state->print_mask &
703 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 703 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
704 printk(KERN_INFO "root@%llu\n", 704 printk(KERN_INFO "root@%llu\n", next_bytenr);
705 (unsigned long long)next_bytenr);
706 break; 705 break;
707 case 1: 706 case 1:
708 next_bytenr = btrfs_super_chunk_root(selected_super); 707 next_bytenr = btrfs_super_chunk_root(selected_super);
709 if (state->print_mask & 708 if (state->print_mask &
710 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 709 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
711 printk(KERN_INFO "chunk@%llu\n", 710 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
712 (unsigned long long)next_bytenr);
713 break; 711 break;
714 case 2: 712 case 2:
715 next_bytenr = btrfs_super_log_root(selected_super); 713 next_bytenr = btrfs_super_log_root(selected_super);
@@ -717,8 +715,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
717 continue; 715 continue;
718 if (state->print_mask & 716 if (state->print_mask &
719 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 717 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
720 printk(KERN_INFO "log@%llu\n", 718 printk(KERN_INFO "log@%llu\n", next_bytenr);
721 (unsigned long long)next_bytenr);
722 break; 719 break;
723 } 720 }
724 721
@@ -727,7 +724,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
727 next_bytenr, state->metablock_size); 724 next_bytenr, state->metablock_size);
728 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 725 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
729 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 726 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
730 (unsigned long long)next_bytenr, num_copies); 727 next_bytenr, num_copies);
731 728
732 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 729 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
733 struct btrfsic_block *next_block; 730 struct btrfsic_block *next_block;
@@ -742,8 +739,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
742 printk(KERN_INFO "btrfsic:" 739 printk(KERN_INFO "btrfsic:"
743 " btrfsic_map_block(root @%llu," 740 " btrfsic_map_block(root @%llu,"
744 " mirror %d) failed!\n", 741 " mirror %d) failed!\n",
745 (unsigned long long)next_bytenr, 742 next_bytenr, mirror_num);
746 mirror_num);
747 kfree(selected_super); 743 kfree(selected_super);
748 return -1; 744 return -1;
749 } 745 }
@@ -767,7 +763,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
767 if (ret < (int)PAGE_CACHE_SIZE) { 763 if (ret < (int)PAGE_CACHE_SIZE) {
768 printk(KERN_INFO 764 printk(KERN_INFO
769 "btrfsic: read @logical %llu failed!\n", 765 "btrfsic: read @logical %llu failed!\n",
770 (unsigned long long)
771 tmp_next_block_ctx.start); 766 tmp_next_block_ctx.start);
772 btrfsic_release_block_ctx(&tmp_next_block_ctx); 767 btrfsic_release_block_ctx(&tmp_next_block_ctx);
773 kfree(selected_super); 768 kfree(selected_super);
@@ -813,7 +808,7 @@ static int btrfsic_process_superblock_dev_mirror(
813 (bh->b_data + (dev_bytenr & 4095)); 808 (bh->b_data + (dev_bytenr & 4095));
814 809
815 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 810 if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
816 super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) || 811 btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
817 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 812 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
818 btrfs_super_nodesize(super_tmp) != state->metablock_size || 813 btrfs_super_nodesize(super_tmp) != state->metablock_size ||
819 btrfs_super_leafsize(super_tmp) != state->metablock_size || 814 btrfs_super_leafsize(super_tmp) != state->metablock_size ||
@@ -847,10 +842,8 @@ static int btrfsic_process_superblock_dev_mirror(
847 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" 842 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
848 " @%llu (%s/%llu/%d)\n", 843 " @%llu (%s/%llu/%d)\n",
849 superblock_bdev, 844 superblock_bdev,
850 rcu_str_deref(device->name), 845 rcu_str_deref(device->name), dev_bytenr,
851 (unsigned long long)dev_bytenr, 846 dev_state->name, dev_bytenr,
852 dev_state->name,
853 (unsigned long long)dev_bytenr,
854 superblock_mirror_num); 847 superblock_mirror_num);
855 list_add(&superblock_tmp->all_blocks_node, 848 list_add(&superblock_tmp->all_blocks_node,
856 &state->all_blocks_list); 849 &state->all_blocks_list);
@@ -880,20 +873,20 @@ static int btrfsic_process_superblock_dev_mirror(
880 tmp_disk_key.offset = 0; 873 tmp_disk_key.offset = 0;
881 switch (pass) { 874 switch (pass) {
882 case 0: 875 case 0:
883 tmp_disk_key.objectid = 876 btrfs_set_disk_key_objectid(&tmp_disk_key,
884 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 877 BTRFS_ROOT_TREE_OBJECTID);
885 additional_string = "initial root "; 878 additional_string = "initial root ";
886 next_bytenr = btrfs_super_root(super_tmp); 879 next_bytenr = btrfs_super_root(super_tmp);
887 break; 880 break;
888 case 1: 881 case 1:
889 tmp_disk_key.objectid = 882 btrfs_set_disk_key_objectid(&tmp_disk_key,
890 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 883 BTRFS_CHUNK_TREE_OBJECTID);
891 additional_string = "initial chunk "; 884 additional_string = "initial chunk ";
892 next_bytenr = btrfs_super_chunk_root(super_tmp); 885 next_bytenr = btrfs_super_chunk_root(super_tmp);
893 break; 886 break;
894 case 2: 887 case 2:
895 tmp_disk_key.objectid = 888 btrfs_set_disk_key_objectid(&tmp_disk_key,
896 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 889 BTRFS_TREE_LOG_OBJECTID);
897 additional_string = "initial log "; 890 additional_string = "initial log ";
898 next_bytenr = btrfs_super_log_root(super_tmp); 891 next_bytenr = btrfs_super_log_root(super_tmp);
899 if (0 == next_bytenr) 892 if (0 == next_bytenr)
@@ -906,7 +899,7 @@ static int btrfsic_process_superblock_dev_mirror(
906 next_bytenr, state->metablock_size); 899 next_bytenr, state->metablock_size);
907 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 900 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
908 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 901 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
909 (unsigned long long)next_bytenr, num_copies); 902 next_bytenr, num_copies);
910 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 903 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
911 struct btrfsic_block *next_block; 904 struct btrfsic_block *next_block;
912 struct btrfsic_block_data_ctx tmp_next_block_ctx; 905 struct btrfsic_block_data_ctx tmp_next_block_ctx;
@@ -918,8 +911,7 @@ static int btrfsic_process_superblock_dev_mirror(
918 mirror_num)) { 911 mirror_num)) {
919 printk(KERN_INFO "btrfsic: btrfsic_map_block(" 912 printk(KERN_INFO "btrfsic: btrfsic_map_block("
920 "bytenr @%llu, mirror %d) failed!\n", 913 "bytenr @%llu, mirror %d) failed!\n",
921 (unsigned long long)next_bytenr, 914 next_bytenr, mirror_num);
922 mirror_num);
923 brelse(bh); 915 brelse(bh);
924 return -1; 916 return -1;
925 } 917 }
@@ -1003,19 +995,17 @@ continue_with_new_stack_frame:
1003 (struct btrfs_leaf *)sf->hdr; 995 (struct btrfs_leaf *)sf->hdr;
1004 996
1005 if (-1 == sf->i) { 997 if (-1 == sf->i) {
1006 sf->nr = le32_to_cpu(leafhdr->header.nritems); 998 sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
1007 999
1008 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1000 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1009 printk(KERN_INFO 1001 printk(KERN_INFO
1010 "leaf %llu items %d generation %llu" 1002 "leaf %llu items %d generation %llu"
1011 " owner %llu\n", 1003 " owner %llu\n",
1012 (unsigned long long) 1004 sf->block_ctx->start, sf->nr,
1013 sf->block_ctx->start, 1005 btrfs_stack_header_generation(
1014 sf->nr, 1006 &leafhdr->header),
1015 (unsigned long long) 1007 btrfs_stack_header_owner(
1016 le64_to_cpu(leafhdr->header.generation), 1008 &leafhdr->header));
1017 (unsigned long long)
1018 le64_to_cpu(leafhdr->header.owner));
1019 } 1009 }
1020 1010
1021continue_with_current_leaf_stack_frame: 1011continue_with_current_leaf_stack_frame:
@@ -1047,10 +1037,10 @@ leaf_item_out_of_bounce_error:
1047 &disk_item, 1037 &disk_item,
1048 disk_item_offset, 1038 disk_item_offset,
1049 sizeof(struct btrfs_item)); 1039 sizeof(struct btrfs_item));
1050 item_offset = le32_to_cpu(disk_item.offset); 1040 item_offset = btrfs_stack_item_offset(&disk_item);
1051 item_size = le32_to_cpu(disk_item.size); 1041 item_size = btrfs_stack_item_offset(&disk_item);
1052 disk_key = &disk_item.key; 1042 disk_key = &disk_item.key;
1053 type = disk_key->type; 1043 type = btrfs_disk_key_type(disk_key);
1054 1044
1055 if (BTRFS_ROOT_ITEM_KEY == type) { 1045 if (BTRFS_ROOT_ITEM_KEY == type) {
1056 struct btrfs_root_item root_item; 1046 struct btrfs_root_item root_item;
@@ -1066,7 +1056,7 @@ leaf_item_out_of_bounce_error:
1066 sf->block_ctx, &root_item, 1056 sf->block_ctx, &root_item,
1067 root_item_offset, 1057 root_item_offset,
1068 item_size); 1058 item_size);
1069 next_bytenr = le64_to_cpu(root_item.bytenr); 1059 next_bytenr = btrfs_root_bytenr(&root_item);
1070 1060
1071 sf->error = 1061 sf->error =
1072 btrfsic_create_link_to_next_block( 1062 btrfsic_create_link_to_next_block(
@@ -1081,8 +1071,8 @@ leaf_item_out_of_bounce_error:
1081 &sf->num_copies, 1071 &sf->num_copies,
1082 &sf->mirror_num, 1072 &sf->mirror_num,
1083 disk_key, 1073 disk_key,
1084 le64_to_cpu(root_item. 1074 btrfs_root_generation(
1085 generation)); 1075 &root_item));
1086 if (sf->error) 1076 if (sf->error)
1087 goto one_stack_frame_backwards; 1077 goto one_stack_frame_backwards;
1088 1078
@@ -1130,18 +1120,17 @@ leaf_item_out_of_bounce_error:
1130 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; 1120 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1131 1121
1132 if (-1 == sf->i) { 1122 if (-1 == sf->i) {
1133 sf->nr = le32_to_cpu(nodehdr->header.nritems); 1123 sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
1134 1124
1135 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1125 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1136 printk(KERN_INFO "node %llu level %d items %d" 1126 printk(KERN_INFO "node %llu level %d items %d"
1137 " generation %llu owner %llu\n", 1127 " generation %llu owner %llu\n",
1138 (unsigned long long)
1139 sf->block_ctx->start, 1128 sf->block_ctx->start,
1140 nodehdr->header.level, sf->nr, 1129 nodehdr->header.level, sf->nr,
1141 (unsigned long long) 1130 btrfs_stack_header_generation(
1142 le64_to_cpu(nodehdr->header.generation), 1131 &nodehdr->header),
1143 (unsigned long long) 1132 btrfs_stack_header_owner(
1144 le64_to_cpu(nodehdr->header.owner)); 1133 &nodehdr->header));
1145 } 1134 }
1146 1135
1147continue_with_current_node_stack_frame: 1136continue_with_current_node_stack_frame:
@@ -1168,7 +1157,7 @@ continue_with_current_node_stack_frame:
1168 btrfsic_read_from_block_data( 1157 btrfsic_read_from_block_data(
1169 sf->block_ctx, &key_ptr, key_ptr_offset, 1158 sf->block_ctx, &key_ptr, key_ptr_offset,
1170 sizeof(struct btrfs_key_ptr)); 1159 sizeof(struct btrfs_key_ptr));
1171 next_bytenr = le64_to_cpu(key_ptr.blockptr); 1160 next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
1172 1161
1173 sf->error = btrfsic_create_link_to_next_block( 1162 sf->error = btrfsic_create_link_to_next_block(
1174 state, 1163 state,
@@ -1182,7 +1171,7 @@ continue_with_current_node_stack_frame:
1182 &sf->num_copies, 1171 &sf->num_copies,
1183 &sf->mirror_num, 1172 &sf->mirror_num,
1184 &key_ptr.key, 1173 &key_ptr.key,
1185 le64_to_cpu(key_ptr.generation)); 1174 btrfs_stack_key_generation(&key_ptr));
1186 if (sf->error) 1175 if (sf->error)
1187 goto one_stack_frame_backwards; 1176 goto one_stack_frame_backwards;
1188 1177
@@ -1247,8 +1236,7 @@ static void btrfsic_read_from_block_data(
1247 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; 1236 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1248 1237
1249 WARN_ON(offset + len > block_ctx->len); 1238 WARN_ON(offset + len > block_ctx->len);
1250 offset_in_page = (start_offset + offset) & 1239 offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
1251 ((unsigned long)PAGE_CACHE_SIZE - 1);
1252 1240
1253 while (len > 0) { 1241 while (len > 0) {
1254 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); 1242 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
@@ -1290,7 +1278,7 @@ static int btrfsic_create_link_to_next_block(
1290 next_bytenr, state->metablock_size); 1278 next_bytenr, state->metablock_size);
1291 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1279 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1292 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1280 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1293 (unsigned long long)next_bytenr, *num_copiesp); 1281 next_bytenr, *num_copiesp);
1294 *mirror_nump = 1; 1282 *mirror_nump = 1;
1295 } 1283 }
1296 1284
@@ -1307,7 +1295,7 @@ static int btrfsic_create_link_to_next_block(
1307 if (ret) { 1295 if (ret) {
1308 printk(KERN_INFO 1296 printk(KERN_INFO
1309 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1297 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1310 (unsigned long long)next_bytenr, *mirror_nump); 1298 next_bytenr, *mirror_nump);
1311 btrfsic_release_block_ctx(next_block_ctx); 1299 btrfsic_release_block_ctx(next_block_ctx);
1312 *next_blockp = NULL; 1300 *next_blockp = NULL;
1313 return -1; 1301 return -1;
@@ -1335,20 +1323,16 @@ static int btrfsic_create_link_to_next_block(
1335 "Referenced block @%llu (%s/%llu/%d)" 1323 "Referenced block @%llu (%s/%llu/%d)"
1336 " found in hash table, %c," 1324 " found in hash table, %c,"
1337 " bytenr mismatch (!= stored %llu).\n", 1325 " bytenr mismatch (!= stored %llu).\n",
1338 (unsigned long long)next_bytenr, 1326 next_bytenr, next_block_ctx->dev->name,
1339 next_block_ctx->dev->name, 1327 next_block_ctx->dev_bytenr, *mirror_nump,
1340 (unsigned long long)next_block_ctx->dev_bytenr,
1341 *mirror_nump,
1342 btrfsic_get_block_type(state, next_block), 1328 btrfsic_get_block_type(state, next_block),
1343 (unsigned long long)next_block->logical_bytenr); 1329 next_block->logical_bytenr);
1344 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1330 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1345 printk(KERN_INFO 1331 printk(KERN_INFO
1346 "Referenced block @%llu (%s/%llu/%d)" 1332 "Referenced block @%llu (%s/%llu/%d)"
1347 " found in hash table, %c.\n", 1333 " found in hash table, %c.\n",
1348 (unsigned long long)next_bytenr, 1334 next_bytenr, next_block_ctx->dev->name,
1349 next_block_ctx->dev->name, 1335 next_block_ctx->dev_bytenr, *mirror_nump,
1350 (unsigned long long)next_block_ctx->dev_bytenr,
1351 *mirror_nump,
1352 btrfsic_get_block_type(state, next_block)); 1336 btrfsic_get_block_type(state, next_block));
1353 next_block->logical_bytenr = next_bytenr; 1337 next_block->logical_bytenr = next_bytenr;
1354 1338
@@ -1400,7 +1384,7 @@ static int btrfsic_create_link_to_next_block(
1400 if (ret < (int)next_block_ctx->len) { 1384 if (ret < (int)next_block_ctx->len) {
1401 printk(KERN_INFO 1385 printk(KERN_INFO
1402 "btrfsic: read block @logical %llu failed!\n", 1386 "btrfsic: read block @logical %llu failed!\n",
1403 (unsigned long long)next_bytenr); 1387 next_bytenr);
1404 btrfsic_release_block_ctx(next_block_ctx); 1388 btrfsic_release_block_ctx(next_block_ctx);
1405 *next_blockp = NULL; 1389 *next_blockp = NULL;
1406 return -1; 1390 return -1;
@@ -1444,12 +1428,12 @@ static int btrfsic_handle_extent_data(
1444 file_extent_item_offset, 1428 file_extent_item_offset,
1445 offsetof(struct btrfs_file_extent_item, disk_num_bytes)); 1429 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1446 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || 1430 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1447 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { 1431 btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
1448 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1432 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1449 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", 1433 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1450 file_extent_item.type, 1434 file_extent_item.type,
1451 (unsigned long long) 1435 btrfs_stack_file_extent_disk_bytenr(
1452 le64_to_cpu(file_extent_item.disk_bytenr)); 1436 &file_extent_item));
1453 return 0; 1437 return 0;
1454 } 1438 }
1455 1439
@@ -1463,20 +1447,19 @@ static int btrfsic_handle_extent_data(
1463 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1447 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1464 file_extent_item_offset, 1448 file_extent_item_offset,
1465 sizeof(struct btrfs_file_extent_item)); 1449 sizeof(struct btrfs_file_extent_item));
1466 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + 1450 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) +
1467 le64_to_cpu(file_extent_item.offset); 1451 btrfs_stack_file_extent_offset(&file_extent_item);
1468 generation = le64_to_cpu(file_extent_item.generation); 1452 generation = btrfs_stack_file_extent_generation(&file_extent_item);
1469 num_bytes = le64_to_cpu(file_extent_item.num_bytes); 1453 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1470 generation = le64_to_cpu(file_extent_item.generation); 1454 generation = btrfs_stack_file_extent_generation(&file_extent_item);
1471 1455
1472 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1456 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1473 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," 1457 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1474 " offset = %llu, num_bytes = %llu\n", 1458 " offset = %llu, num_bytes = %llu\n",
1475 file_extent_item.type, 1459 file_extent_item.type,
1476 (unsigned long long) 1460 btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
1477 le64_to_cpu(file_extent_item.disk_bytenr), 1461 btrfs_stack_file_extent_offset(&file_extent_item),
1478 (unsigned long long)le64_to_cpu(file_extent_item.offset), 1462 num_bytes);
1479 (unsigned long long)num_bytes);
1480 while (num_bytes > 0) { 1463 while (num_bytes > 0) {
1481 u32 chunk_len; 1464 u32 chunk_len;
1482 int num_copies; 1465 int num_copies;
@@ -1492,7 +1475,7 @@ static int btrfsic_handle_extent_data(
1492 next_bytenr, state->datablock_size); 1475 next_bytenr, state->datablock_size);
1493 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1476 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1494 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1477 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1495 (unsigned long long)next_bytenr, num_copies); 1478 next_bytenr, num_copies);
1496 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 1479 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1497 struct btrfsic_block_data_ctx next_block_ctx; 1480 struct btrfsic_block_data_ctx next_block_ctx;
1498 struct btrfsic_block *next_block; 1481 struct btrfsic_block *next_block;
@@ -1504,8 +1487,7 @@ static int btrfsic_handle_extent_data(
1504 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1487 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1505 printk(KERN_INFO 1488 printk(KERN_INFO
1506 "\tdisk_bytenr = %llu, num_bytes %u\n", 1489 "\tdisk_bytenr = %llu, num_bytes %u\n",
1507 (unsigned long long)next_bytenr, 1490 next_bytenr, chunk_len);
1508 chunk_len);
1509 ret = btrfsic_map_block(state, next_bytenr, 1491 ret = btrfsic_map_block(state, next_bytenr,
1510 chunk_len, &next_block_ctx, 1492 chunk_len, &next_block_ctx,
1511 mirror_num); 1493 mirror_num);
@@ -1513,8 +1495,7 @@ static int btrfsic_handle_extent_data(
1513 printk(KERN_INFO 1495 printk(KERN_INFO
1514 "btrfsic: btrfsic_map_block(@%llu," 1496 "btrfsic: btrfsic_map_block(@%llu,"
1515 " mirror=%d) failed!\n", 1497 " mirror=%d) failed!\n",
1516 (unsigned long long)next_bytenr, 1498 next_bytenr, mirror_num);
1517 mirror_num);
1518 return -1; 1499 return -1;
1519 } 1500 }
1520 1501
@@ -1543,12 +1524,10 @@ static int btrfsic_handle_extent_data(
1543 " found in hash table, D," 1524 " found in hash table, D,"
1544 " bytenr mismatch" 1525 " bytenr mismatch"
1545 " (!= stored %llu).\n", 1526 " (!= stored %llu).\n",
1546 (unsigned long long)next_bytenr, 1527 next_bytenr,
1547 next_block_ctx.dev->name, 1528 next_block_ctx.dev->name,
1548 (unsigned long long)
1549 next_block_ctx.dev_bytenr, 1529 next_block_ctx.dev_bytenr,
1550 mirror_num, 1530 mirror_num,
1551 (unsigned long long)
1552 next_block->logical_bytenr); 1531 next_block->logical_bytenr);
1553 } 1532 }
1554 next_block->logical_bytenr = next_bytenr; 1533 next_block->logical_bytenr = next_bytenr;
@@ -1675,7 +1654,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1675 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { 1654 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1676 printk(KERN_INFO 1655 printk(KERN_INFO
1677 "btrfsic: read_block() with unaligned bytenr %llu\n", 1656 "btrfsic: read_block() with unaligned bytenr %llu\n",
1678 (unsigned long long)block_ctx->dev_bytenr); 1657 block_ctx->dev_bytenr);
1679 return -1; 1658 return -1;
1680 } 1659 }
1681 1660
@@ -1772,10 +1751,8 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
1772 1751
1773 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", 1752 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1774 btrfsic_get_block_type(state, b_all), 1753 btrfsic_get_block_type(state, b_all),
1775 (unsigned long long)b_all->logical_bytenr, 1754 b_all->logical_bytenr, b_all->dev_state->name,
1776 b_all->dev_state->name, 1755 b_all->dev_bytenr, b_all->mirror_num);
1777 (unsigned long long)b_all->dev_bytenr,
1778 b_all->mirror_num);
1779 1756
1780 list_for_each(elem_ref_to, &b_all->ref_to_list) { 1757 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1781 const struct btrfsic_block_link *const l = 1758 const struct btrfsic_block_link *const l =
@@ -1787,16 +1764,13 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
1787 " refers %u* to" 1764 " refers %u* to"
1788 " %c @%llu (%s/%llu/%d)\n", 1765 " %c @%llu (%s/%llu/%d)\n",
1789 btrfsic_get_block_type(state, b_all), 1766 btrfsic_get_block_type(state, b_all),
1790 (unsigned long long)b_all->logical_bytenr, 1767 b_all->logical_bytenr, b_all->dev_state->name,
1791 b_all->dev_state->name, 1768 b_all->dev_bytenr, b_all->mirror_num,
1792 (unsigned long long)b_all->dev_bytenr,
1793 b_all->mirror_num,
1794 l->ref_cnt, 1769 l->ref_cnt,
1795 btrfsic_get_block_type(state, l->block_ref_to), 1770 btrfsic_get_block_type(state, l->block_ref_to),
1796 (unsigned long long)
1797 l->block_ref_to->logical_bytenr, 1771 l->block_ref_to->logical_bytenr,
1798 l->block_ref_to->dev_state->name, 1772 l->block_ref_to->dev_state->name,
1799 (unsigned long long)l->block_ref_to->dev_bytenr, 1773 l->block_ref_to->dev_bytenr,
1800 l->block_ref_to->mirror_num); 1774 l->block_ref_to->mirror_num);
1801 } 1775 }
1802 1776
@@ -1810,16 +1784,12 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
1810 " is ref %u* from" 1784 " is ref %u* from"
1811 " %c @%llu (%s/%llu/%d)\n", 1785 " %c @%llu (%s/%llu/%d)\n",
1812 btrfsic_get_block_type(state, b_all), 1786 btrfsic_get_block_type(state, b_all),
1813 (unsigned long long)b_all->logical_bytenr, 1787 b_all->logical_bytenr, b_all->dev_state->name,
1814 b_all->dev_state->name, 1788 b_all->dev_bytenr, b_all->mirror_num,
1815 (unsigned long long)b_all->dev_bytenr,
1816 b_all->mirror_num,
1817 l->ref_cnt, 1789 l->ref_cnt,
1818 btrfsic_get_block_type(state, l->block_ref_from), 1790 btrfsic_get_block_type(state, l->block_ref_from),
1819 (unsigned long long)
1820 l->block_ref_from->logical_bytenr, 1791 l->block_ref_from->logical_bytenr,
1821 l->block_ref_from->dev_state->name, 1792 l->block_ref_from->dev_state->name,
1822 (unsigned long long)
1823 l->block_ref_from->dev_bytenr, 1793 l->block_ref_from->dev_bytenr,
1824 l->block_ref_from->mirror_num); 1794 l->block_ref_from->mirror_num);
1825 } 1795 }
@@ -1896,8 +1866,8 @@ again:
1896 struct list_head *tmp_ref_to; 1866 struct list_head *tmp_ref_to;
1897 1867
1898 if (block->is_superblock) { 1868 if (block->is_superblock) {
1899 bytenr = le64_to_cpu(((struct btrfs_super_block *) 1869 bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
1900 mapped_datav[0])->bytenr); 1870 mapped_datav[0]);
1901 if (num_pages * PAGE_CACHE_SIZE < 1871 if (num_pages * PAGE_CACHE_SIZE <
1902 BTRFS_SUPER_INFO_SIZE) { 1872 BTRFS_SUPER_INFO_SIZE) {
1903 printk(KERN_INFO 1873 printk(KERN_INFO
@@ -1923,8 +1893,9 @@ again:
1923 return; 1893 return;
1924 } 1894 }
1925 processed_len = state->metablock_size; 1895 processed_len = state->metablock_size;
1926 bytenr = le64_to_cpu(((struct btrfs_header *) 1896 bytenr = btrfs_stack_header_bytenr(
1927 mapped_datav[0])->bytenr); 1897 (struct btrfs_header *)
1898 mapped_datav[0]);
1928 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1899 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1929 dev_state, 1900 dev_state,
1930 dev_bytenr); 1901 dev_bytenr);
@@ -1935,12 +1906,9 @@ again:
1935 " found in hash table, %c," 1906 " found in hash table, %c,"
1936 " bytenr mismatch" 1907 " bytenr mismatch"
1937 " (!= stored %llu).\n", 1908 " (!= stored %llu).\n",
1938 (unsigned long long)bytenr, 1909 bytenr, dev_state->name, dev_bytenr,
1939 dev_state->name,
1940 (unsigned long long)dev_bytenr,
1941 block->mirror_num, 1910 block->mirror_num,
1942 btrfsic_get_block_type(state, block), 1911 btrfsic_get_block_type(state, block),
1943 (unsigned long long)
1944 block->logical_bytenr); 1912 block->logical_bytenr);
1945 block->logical_bytenr = bytenr; 1913 block->logical_bytenr = bytenr;
1946 } else if (state->print_mask & 1914 } else if (state->print_mask &
@@ -1948,9 +1916,7 @@ again:
1948 printk(KERN_INFO 1916 printk(KERN_INFO
1949 "Written block @%llu (%s/%llu/%d)" 1917 "Written block @%llu (%s/%llu/%d)"
1950 " found in hash table, %c.\n", 1918 " found in hash table, %c.\n",
1951 (unsigned long long)bytenr, 1919 bytenr, dev_state->name, dev_bytenr,
1952 dev_state->name,
1953 (unsigned long long)dev_bytenr,
1954 block->mirror_num, 1920 block->mirror_num,
1955 btrfsic_get_block_type(state, block)); 1921 btrfsic_get_block_type(state, block));
1956 } else { 1922 } else {
@@ -1966,9 +1932,7 @@ again:
1966 printk(KERN_INFO 1932 printk(KERN_INFO
1967 "Written block @%llu (%s/%llu/%d)" 1933 "Written block @%llu (%s/%llu/%d)"
1968 " found in hash table, %c.\n", 1934 " found in hash table, %c.\n",
1969 (unsigned long long)bytenr, 1935 bytenr, dev_state->name, dev_bytenr,
1970 dev_state->name,
1971 (unsigned long long)dev_bytenr,
1972 block->mirror_num, 1936 block->mirror_num,
1973 btrfsic_get_block_type(state, block)); 1937 btrfsic_get_block_type(state, block));
1974 } 1938 }
@@ -1985,21 +1949,14 @@ again:
1985 " new(gen=%llu)," 1949 " new(gen=%llu),"
1986 " which is referenced by most recent superblock" 1950 " which is referenced by most recent superblock"
1987 " (superblockgen=%llu)!\n", 1951 " (superblockgen=%llu)!\n",
1988 btrfsic_get_block_type(state, block), 1952 btrfsic_get_block_type(state, block), bytenr,
1989 (unsigned long long)bytenr, 1953 dev_state->name, dev_bytenr, block->mirror_num,
1990 dev_state->name, 1954 block->generation,
1991 (unsigned long long)dev_bytenr, 1955 btrfs_disk_key_objectid(&block->disk_key),
1992 block->mirror_num,
1993 (unsigned long long)block->generation,
1994 (unsigned long long)
1995 le64_to_cpu(block->disk_key.objectid),
1996 block->disk_key.type, 1956 block->disk_key.type,
1997 (unsigned long long) 1957 btrfs_disk_key_offset(&block->disk_key),
1998 le64_to_cpu(block->disk_key.offset), 1958 btrfs_stack_header_generation(
1999 (unsigned long long) 1959 (struct btrfs_header *) mapped_datav[0]),
2000 le64_to_cpu(((struct btrfs_header *)
2001 mapped_datav[0])->generation),
2002 (unsigned long long)
2003 state->max_superblock_generation); 1960 state->max_superblock_generation);
2004 btrfsic_dump_tree(state); 1961 btrfsic_dump_tree(state);
2005 } 1962 }
@@ -2008,15 +1965,12 @@ again:
2008 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1965 printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
2009 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," 1966 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
2010 " which is not yet iodone!\n", 1967 " which is not yet iodone!\n",
2011 btrfsic_get_block_type(state, block), 1968 btrfsic_get_block_type(state, block), bytenr,
2012 (unsigned long long)bytenr, 1969 dev_state->name, dev_bytenr, block->mirror_num,
2013 dev_state->name, 1970 block->generation,
2014 (unsigned long long)dev_bytenr, 1971 btrfs_stack_header_generation(
2015 block->mirror_num, 1972 (struct btrfs_header *)
2016 (unsigned long long)block->generation, 1973 mapped_datav[0]));
2017 (unsigned long long)
2018 le64_to_cpu(((struct btrfs_header *)
2019 mapped_datav[0])->generation));
2020 /* it would not be safe to go on */ 1974 /* it would not be safe to go on */
2021 btrfsic_dump_tree(state); 1975 btrfsic_dump_tree(state);
2022 goto continue_loop; 1976 goto continue_loop;
@@ -2056,7 +2010,7 @@ again:
2056 if (ret) { 2010 if (ret) {
2057 printk(KERN_INFO 2011 printk(KERN_INFO
2058 "btrfsic: btrfsic_map_block(root @%llu)" 2012 "btrfsic: btrfsic_map_block(root @%llu)"
2059 " failed!\n", (unsigned long long)bytenr); 2013 " failed!\n", bytenr);
2060 goto continue_loop; 2014 goto continue_loop;
2061 } 2015 }
2062 block_ctx.datav = mapped_datav; 2016 block_ctx.datav = mapped_datav;
@@ -2140,7 +2094,7 @@ again:
2140 printk(KERN_INFO 2094 printk(KERN_INFO
2141 "btrfsic: btrfsic_process_metablock" 2095 "btrfsic: btrfsic_process_metablock"
2142 "(root @%llu) failed!\n", 2096 "(root @%llu) failed!\n",
2143 (unsigned long long)dev_bytenr); 2097 dev_bytenr);
2144 } else { 2098 } else {
2145 block->is_metadata = 0; 2099 block->is_metadata = 0;
2146 block->mirror_num = 0; /* unknown */ 2100 block->mirror_num = 0; /* unknown */
@@ -2168,8 +2122,7 @@ again:
2168 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2122 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2169 printk(KERN_INFO "Written block (%s/%llu/?)" 2123 printk(KERN_INFO "Written block (%s/%llu/?)"
2170 " !found in hash table, D.\n", 2124 " !found in hash table, D.\n",
2171 dev_state->name, 2125 dev_state->name, dev_bytenr);
2172 (unsigned long long)dev_bytenr);
2173 if (!state->include_extent_data) { 2126 if (!state->include_extent_data) {
2174 /* ignore that written D block */ 2127 /* ignore that written D block */
2175 goto continue_loop; 2128 goto continue_loop;
@@ -2184,17 +2137,16 @@ again:
2184 block_ctx.pagev = NULL; 2137 block_ctx.pagev = NULL;
2185 } else { 2138 } else {
2186 processed_len = state->metablock_size; 2139 processed_len = state->metablock_size;
2187 bytenr = le64_to_cpu(((struct btrfs_header *) 2140 bytenr = btrfs_stack_header_bytenr(
2188 mapped_datav[0])->bytenr); 2141 (struct btrfs_header *)
2142 mapped_datav[0]);
2189 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 2143 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2190 dev_bytenr); 2144 dev_bytenr);
2191 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2145 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2192 printk(KERN_INFO 2146 printk(KERN_INFO
2193 "Written block @%llu (%s/%llu/?)" 2147 "Written block @%llu (%s/%llu/?)"
2194 " !found in hash table, M.\n", 2148 " !found in hash table, M.\n",
2195 (unsigned long long)bytenr, 2149 bytenr, dev_state->name, dev_bytenr);
2196 dev_state->name,
2197 (unsigned long long)dev_bytenr);
2198 2150
2199 ret = btrfsic_map_block(state, bytenr, processed_len, 2151 ret = btrfsic_map_block(state, bytenr, processed_len,
2200 &block_ctx, 0); 2152 &block_ctx, 0);
@@ -2202,7 +2154,7 @@ again:
2202 printk(KERN_INFO 2154 printk(KERN_INFO
2203 "btrfsic: btrfsic_map_block(root @%llu)" 2155 "btrfsic: btrfsic_map_block(root @%llu)"
2204 " failed!\n", 2156 " failed!\n",
2205 (unsigned long long)dev_bytenr); 2157 dev_bytenr);
2206 goto continue_loop; 2158 goto continue_loop;
2207 } 2159 }
2208 } 2160 }
@@ -2267,10 +2219,8 @@ again:
2267 printk(KERN_INFO 2219 printk(KERN_INFO
2268 "New written %c-block @%llu (%s/%llu/%d)\n", 2220 "New written %c-block @%llu (%s/%llu/%d)\n",
2269 is_metadata ? 'M' : 'D', 2221 is_metadata ? 'M' : 'D',
2270 (unsigned long long)block->logical_bytenr, 2222 block->logical_bytenr, block->dev_state->name,
2271 block->dev_state->name, 2223 block->dev_bytenr, block->mirror_num);
2272 (unsigned long long)block->dev_bytenr,
2273 block->mirror_num);
2274 list_add(&block->all_blocks_node, &state->all_blocks_list); 2224 list_add(&block->all_blocks_node, &state->all_blocks_list);
2275 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2225 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2276 2226
@@ -2281,7 +2231,7 @@ again:
2281 printk(KERN_INFO 2231 printk(KERN_INFO
2282 "btrfsic: process_metablock(root @%llu)" 2232 "btrfsic: process_metablock(root @%llu)"
2283 " failed!\n", 2233 " failed!\n",
2284 (unsigned long long)dev_bytenr); 2234 dev_bytenr);
2285 } 2235 }
2286 btrfsic_release_block_ctx(&block_ctx); 2236 btrfsic_release_block_ctx(&block_ctx);
2287 } 2237 }
@@ -2319,10 +2269,8 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2319 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2269 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2320 bio_error_status, 2270 bio_error_status,
2321 btrfsic_get_block_type(dev_state->state, block), 2271 btrfsic_get_block_type(dev_state->state, block),
2322 (unsigned long long)block->logical_bytenr, 2272 block->logical_bytenr, dev_state->name,
2323 dev_state->name, 2273 block->dev_bytenr, block->mirror_num);
2324 (unsigned long long)block->dev_bytenr,
2325 block->mirror_num);
2326 next_block = block->next_in_same_bio; 2274 next_block = block->next_in_same_bio;
2327 block->iodone_w_error = iodone_w_error; 2275 block->iodone_w_error = iodone_w_error;
2328 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2276 if (block->submit_bio_bh_rw & REQ_FLUSH) {
@@ -2332,7 +2280,6 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2332 printk(KERN_INFO 2280 printk(KERN_INFO
2333 "bio_end_io() new %s flush_gen=%llu\n", 2281 "bio_end_io() new %s flush_gen=%llu\n",
2334 dev_state->name, 2282 dev_state->name,
2335 (unsigned long long)
2336 dev_state->last_flush_gen); 2283 dev_state->last_flush_gen);
2337 } 2284 }
2338 if (block->submit_bio_bh_rw & REQ_FUA) 2285 if (block->submit_bio_bh_rw & REQ_FUA)
@@ -2358,10 +2305,8 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2358 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", 2305 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2359 iodone_w_error, 2306 iodone_w_error,
2360 btrfsic_get_block_type(dev_state->state, block), 2307 btrfsic_get_block_type(dev_state->state, block),
2361 (unsigned long long)block->logical_bytenr, 2308 block->logical_bytenr, block->dev_state->name,
2362 block->dev_state->name, 2309 block->dev_bytenr, block->mirror_num);
2363 (unsigned long long)block->dev_bytenr,
2364 block->mirror_num);
2365 2310
2366 block->iodone_w_error = iodone_w_error; 2311 block->iodone_w_error = iodone_w_error;
2367 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2312 if (block->submit_bio_bh_rw & REQ_FLUSH) {
@@ -2370,8 +2315,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2370 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2315 BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2371 printk(KERN_INFO 2316 printk(KERN_INFO
2372 "bh_end_io() new %s flush_gen=%llu\n", 2317 "bh_end_io() new %s flush_gen=%llu\n",
2373 dev_state->name, 2318 dev_state->name, dev_state->last_flush_gen);
2374 (unsigned long long)dev_state->last_flush_gen);
2375 } 2319 }
2376 if (block->submit_bio_bh_rw & REQ_FUA) 2320 if (block->submit_bio_bh_rw & REQ_FUA)
2377 block->flush_gen = 0; /* FUA completed means block is on disk */ 2321 block->flush_gen = 0; /* FUA completed means block is on disk */
@@ -2396,26 +2340,20 @@ static int btrfsic_process_written_superblock(
2396 printk(KERN_INFO 2340 printk(KERN_INFO
2397 "btrfsic: superblock @%llu (%s/%llu/%d)" 2341 "btrfsic: superblock @%llu (%s/%llu/%d)"
2398 " with old gen %llu <= %llu\n", 2342 " with old gen %llu <= %llu\n",
2399 (unsigned long long)superblock->logical_bytenr, 2343 superblock->logical_bytenr,
2400 superblock->dev_state->name, 2344 superblock->dev_state->name,
2401 (unsigned long long)superblock->dev_bytenr, 2345 superblock->dev_bytenr, superblock->mirror_num,
2402 superblock->mirror_num,
2403 (unsigned long long)
2404 btrfs_super_generation(super_hdr), 2346 btrfs_super_generation(super_hdr),
2405 (unsigned long long)
2406 state->max_superblock_generation); 2347 state->max_superblock_generation);
2407 } else { 2348 } else {
2408 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2349 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2409 printk(KERN_INFO 2350 printk(KERN_INFO
2410 "btrfsic: got new superblock @%llu (%s/%llu/%d)" 2351 "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2411 " with new gen %llu > %llu\n", 2352 " with new gen %llu > %llu\n",
2412 (unsigned long long)superblock->logical_bytenr, 2353 superblock->logical_bytenr,
2413 superblock->dev_state->name, 2354 superblock->dev_state->name,
2414 (unsigned long long)superblock->dev_bytenr, 2355 superblock->dev_bytenr, superblock->mirror_num,
2415 superblock->mirror_num,
2416 (unsigned long long)
2417 btrfs_super_generation(super_hdr), 2356 btrfs_super_generation(super_hdr),
2418 (unsigned long long)
2419 state->max_superblock_generation); 2357 state->max_superblock_generation);
2420 2358
2421 state->max_superblock_generation = 2359 state->max_superblock_generation =
@@ -2432,43 +2370,41 @@ static int btrfsic_process_written_superblock(
2432 int num_copies; 2370 int num_copies;
2433 int mirror_num; 2371 int mirror_num;
2434 const char *additional_string = NULL; 2372 const char *additional_string = NULL;
2435 struct btrfs_disk_key tmp_disk_key; 2373 struct btrfs_disk_key tmp_disk_key = {0};
2436 2374
2437 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 2375 btrfs_set_disk_key_objectid(&tmp_disk_key,
2438 tmp_disk_key.offset = 0; 2376 BTRFS_ROOT_ITEM_KEY);
2377 btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
2439 2378
2440 switch (pass) { 2379 switch (pass) {
2441 case 0: 2380 case 0:
2442 tmp_disk_key.objectid = 2381 btrfs_set_disk_key_objectid(&tmp_disk_key,
2443 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 2382 BTRFS_ROOT_TREE_OBJECTID);
2444 additional_string = "root "; 2383 additional_string = "root ";
2445 next_bytenr = btrfs_super_root(super_hdr); 2384 next_bytenr = btrfs_super_root(super_hdr);
2446 if (state->print_mask & 2385 if (state->print_mask &
2447 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2386 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2448 printk(KERN_INFO "root@%llu\n", 2387 printk(KERN_INFO "root@%llu\n", next_bytenr);
2449 (unsigned long long)next_bytenr);
2450 break; 2388 break;
2451 case 1: 2389 case 1:
2452 tmp_disk_key.objectid = 2390 btrfs_set_disk_key_objectid(&tmp_disk_key,
2453 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 2391 BTRFS_CHUNK_TREE_OBJECTID);
2454 additional_string = "chunk "; 2392 additional_string = "chunk ";
2455 next_bytenr = btrfs_super_chunk_root(super_hdr); 2393 next_bytenr = btrfs_super_chunk_root(super_hdr);
2456 if (state->print_mask & 2394 if (state->print_mask &
2457 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2395 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2458 printk(KERN_INFO "chunk@%llu\n", 2396 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
2459 (unsigned long long)next_bytenr);
2460 break; 2397 break;
2461 case 2: 2398 case 2:
2462 tmp_disk_key.objectid = 2399 btrfs_set_disk_key_objectid(&tmp_disk_key,
2463 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 2400 BTRFS_TREE_LOG_OBJECTID);
2464 additional_string = "log "; 2401 additional_string = "log ";
2465 next_bytenr = btrfs_super_log_root(super_hdr); 2402 next_bytenr = btrfs_super_log_root(super_hdr);
2466 if (0 == next_bytenr) 2403 if (0 == next_bytenr)
2467 continue; 2404 continue;
2468 if (state->print_mask & 2405 if (state->print_mask &
2469 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2406 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2470 printk(KERN_INFO "log@%llu\n", 2407 printk(KERN_INFO "log@%llu\n", next_bytenr);
2471 (unsigned long long)next_bytenr);
2472 break; 2408 break;
2473 } 2409 }
2474 2410
@@ -2477,7 +2413,7 @@ static int btrfsic_process_written_superblock(
2477 next_bytenr, BTRFS_SUPER_INFO_SIZE); 2413 next_bytenr, BTRFS_SUPER_INFO_SIZE);
2478 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2414 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2479 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 2415 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2480 (unsigned long long)next_bytenr, num_copies); 2416 next_bytenr, num_copies);
2481 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2417 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2482 int was_created; 2418 int was_created;
2483 2419
@@ -2493,8 +2429,7 @@ static int btrfsic_process_written_superblock(
2493 printk(KERN_INFO 2429 printk(KERN_INFO
2494 "btrfsic: btrfsic_map_block(@%llu," 2430 "btrfsic: btrfsic_map_block(@%llu,"
2495 " mirror=%d) failed!\n", 2431 " mirror=%d) failed!\n",
2496 (unsigned long long)next_bytenr, 2432 next_bytenr, mirror_num);
2497 mirror_num);
2498 return -1; 2433 return -1;
2499 } 2434 }
2500 2435
@@ -2579,26 +2514,22 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2579 " %u* refers to %c @%llu (%s/%llu/%d)\n", 2514 " %u* refers to %c @%llu (%s/%llu/%d)\n",
2580 recursion_level, 2515 recursion_level,
2581 btrfsic_get_block_type(state, block), 2516 btrfsic_get_block_type(state, block),
2582 (unsigned long long)block->logical_bytenr, 2517 block->logical_bytenr, block->dev_state->name,
2583 block->dev_state->name, 2518 block->dev_bytenr, block->mirror_num,
2584 (unsigned long long)block->dev_bytenr,
2585 block->mirror_num,
2586 l->ref_cnt, 2519 l->ref_cnt,
2587 btrfsic_get_block_type(state, l->block_ref_to), 2520 btrfsic_get_block_type(state, l->block_ref_to),
2588 (unsigned long long)
2589 l->block_ref_to->logical_bytenr, 2521 l->block_ref_to->logical_bytenr,
2590 l->block_ref_to->dev_state->name, 2522 l->block_ref_to->dev_state->name,
2591 (unsigned long long)l->block_ref_to->dev_bytenr, 2523 l->block_ref_to->dev_bytenr,
2592 l->block_ref_to->mirror_num); 2524 l->block_ref_to->mirror_num);
2593 if (l->block_ref_to->never_written) { 2525 if (l->block_ref_to->never_written) {
2594 printk(KERN_INFO "btrfs: attempt to write superblock" 2526 printk(KERN_INFO "btrfs: attempt to write superblock"
2595 " which references block %c @%llu (%s/%llu/%d)" 2527 " which references block %c @%llu (%s/%llu/%d)"
2596 " which is never written!\n", 2528 " which is never written!\n",
2597 btrfsic_get_block_type(state, l->block_ref_to), 2529 btrfsic_get_block_type(state, l->block_ref_to),
2598 (unsigned long long)
2599 l->block_ref_to->logical_bytenr, 2530 l->block_ref_to->logical_bytenr,
2600 l->block_ref_to->dev_state->name, 2531 l->block_ref_to->dev_state->name,
2601 (unsigned long long)l->block_ref_to->dev_bytenr, 2532 l->block_ref_to->dev_bytenr,
2602 l->block_ref_to->mirror_num); 2533 l->block_ref_to->mirror_num);
2603 ret = -1; 2534 ret = -1;
2604 } else if (!l->block_ref_to->is_iodone) { 2535 } else if (!l->block_ref_to->is_iodone) {
@@ -2606,10 +2537,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2606 " which references block %c @%llu (%s/%llu/%d)" 2537 " which references block %c @%llu (%s/%llu/%d)"
2607 " which is not yet iodone!\n", 2538 " which is not yet iodone!\n",
2608 btrfsic_get_block_type(state, l->block_ref_to), 2539 btrfsic_get_block_type(state, l->block_ref_to),
2609 (unsigned long long)
2610 l->block_ref_to->logical_bytenr, 2540 l->block_ref_to->logical_bytenr,
2611 l->block_ref_to->dev_state->name, 2541 l->block_ref_to->dev_state->name,
2612 (unsigned long long)l->block_ref_to->dev_bytenr, 2542 l->block_ref_to->dev_bytenr,
2613 l->block_ref_to->mirror_num); 2543 l->block_ref_to->mirror_num);
2614 ret = -1; 2544 ret = -1;
2615 } else if (l->block_ref_to->iodone_w_error) { 2545 } else if (l->block_ref_to->iodone_w_error) {
@@ -2617,10 +2547,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2617 " which references block %c @%llu (%s/%llu/%d)" 2547 " which references block %c @%llu (%s/%llu/%d)"
2618 " which has write error!\n", 2548 " which has write error!\n",
2619 btrfsic_get_block_type(state, l->block_ref_to), 2549 btrfsic_get_block_type(state, l->block_ref_to),
2620 (unsigned long long)
2621 l->block_ref_to->logical_bytenr, 2550 l->block_ref_to->logical_bytenr,
2622 l->block_ref_to->dev_state->name, 2551 l->block_ref_to->dev_state->name,
2623 (unsigned long long)l->block_ref_to->dev_bytenr, 2552 l->block_ref_to->dev_bytenr,
2624 l->block_ref_to->mirror_num); 2553 l->block_ref_to->mirror_num);
2625 ret = -1; 2554 ret = -1;
2626 } else if (l->parent_generation != 2555 } else if (l->parent_generation !=
@@ -2634,13 +2563,12 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2634 " with generation %llu !=" 2563 " with generation %llu !="
2635 " parent generation %llu!\n", 2564 " parent generation %llu!\n",
2636 btrfsic_get_block_type(state, l->block_ref_to), 2565 btrfsic_get_block_type(state, l->block_ref_to),
2637 (unsigned long long)
2638 l->block_ref_to->logical_bytenr, 2566 l->block_ref_to->logical_bytenr,
2639 l->block_ref_to->dev_state->name, 2567 l->block_ref_to->dev_state->name,
2640 (unsigned long long)l->block_ref_to->dev_bytenr, 2568 l->block_ref_to->dev_bytenr,
2641 l->block_ref_to->mirror_num, 2569 l->block_ref_to->mirror_num,
2642 (unsigned long long)l->block_ref_to->generation, 2570 l->block_ref_to->generation,
2643 (unsigned long long)l->parent_generation); 2571 l->parent_generation);
2644 ret = -1; 2572 ret = -1;
2645 } else if (l->block_ref_to->flush_gen > 2573 } else if (l->block_ref_to->flush_gen >
2646 l->block_ref_to->dev_state->last_flush_gen) { 2574 l->block_ref_to->dev_state->last_flush_gen) {
@@ -2650,13 +2578,10 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2650 " (block flush_gen=%llu," 2578 " (block flush_gen=%llu,"
2651 " dev->flush_gen=%llu)!\n", 2579 " dev->flush_gen=%llu)!\n",
2652 btrfsic_get_block_type(state, l->block_ref_to), 2580 btrfsic_get_block_type(state, l->block_ref_to),
2653 (unsigned long long)
2654 l->block_ref_to->logical_bytenr, 2581 l->block_ref_to->logical_bytenr,
2655 l->block_ref_to->dev_state->name, 2582 l->block_ref_to->dev_state->name,
2656 (unsigned long long)l->block_ref_to->dev_bytenr, 2583 l->block_ref_to->dev_bytenr,
2657 l->block_ref_to->mirror_num, 2584 l->block_ref_to->mirror_num, block->flush_gen,
2658 (unsigned long long)block->flush_gen,
2659 (unsigned long long)
2660 l->block_ref_to->dev_state->last_flush_gen); 2585 l->block_ref_to->dev_state->last_flush_gen);
2661 ret = -1; 2586 ret = -1;
2662 } else if (-1 == btrfsic_check_all_ref_blocks(state, 2587 } else if (-1 == btrfsic_check_all_ref_blocks(state,
@@ -2701,16 +2626,12 @@ static int btrfsic_is_block_ref_by_superblock(
2701 " is ref %u* from %c @%llu (%s/%llu/%d)\n", 2626 " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2702 recursion_level, 2627 recursion_level,
2703 btrfsic_get_block_type(state, block), 2628 btrfsic_get_block_type(state, block),
2704 (unsigned long long)block->logical_bytenr, 2629 block->logical_bytenr, block->dev_state->name,
2705 block->dev_state->name, 2630 block->dev_bytenr, block->mirror_num,
2706 (unsigned long long)block->dev_bytenr,
2707 block->mirror_num,
2708 l->ref_cnt, 2631 l->ref_cnt,
2709 btrfsic_get_block_type(state, l->block_ref_from), 2632 btrfsic_get_block_type(state, l->block_ref_from),
2710 (unsigned long long)
2711 l->block_ref_from->logical_bytenr, 2633 l->block_ref_from->logical_bytenr,
2712 l->block_ref_from->dev_state->name, 2634 l->block_ref_from->dev_state->name,
2713 (unsigned long long)
2714 l->block_ref_from->dev_bytenr, 2635 l->block_ref_from->dev_bytenr,
2715 l->block_ref_from->mirror_num); 2636 l->block_ref_from->mirror_num);
2716 if (l->block_ref_from->is_superblock && 2637 if (l->block_ref_from->is_superblock &&
@@ -2737,14 +2658,12 @@ static void btrfsic_print_add_link(const struct btrfsic_state *state,
2737 " to %c @%llu (%s/%llu/%d).\n", 2658 " to %c @%llu (%s/%llu/%d).\n",
2738 l->ref_cnt, 2659 l->ref_cnt,
2739 btrfsic_get_block_type(state, l->block_ref_from), 2660 btrfsic_get_block_type(state, l->block_ref_from),
2740 (unsigned long long)l->block_ref_from->logical_bytenr, 2661 l->block_ref_from->logical_bytenr,
2741 l->block_ref_from->dev_state->name, 2662 l->block_ref_from->dev_state->name,
2742 (unsigned long long)l->block_ref_from->dev_bytenr, 2663 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2743 l->block_ref_from->mirror_num,
2744 btrfsic_get_block_type(state, l->block_ref_to), 2664 btrfsic_get_block_type(state, l->block_ref_to),
2745 (unsigned long long)l->block_ref_to->logical_bytenr, 2665 l->block_ref_to->logical_bytenr,
2746 l->block_ref_to->dev_state->name, 2666 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2747 (unsigned long long)l->block_ref_to->dev_bytenr,
2748 l->block_ref_to->mirror_num); 2667 l->block_ref_to->mirror_num);
2749} 2668}
2750 2669
@@ -2756,14 +2675,12 @@ static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2756 " to %c @%llu (%s/%llu/%d).\n", 2675 " to %c @%llu (%s/%llu/%d).\n",
2757 l->ref_cnt, 2676 l->ref_cnt,
2758 btrfsic_get_block_type(state, l->block_ref_from), 2677 btrfsic_get_block_type(state, l->block_ref_from),
2759 (unsigned long long)l->block_ref_from->logical_bytenr, 2678 l->block_ref_from->logical_bytenr,
2760 l->block_ref_from->dev_state->name, 2679 l->block_ref_from->dev_state->name,
2761 (unsigned long long)l->block_ref_from->dev_bytenr, 2680 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2762 l->block_ref_from->mirror_num,
2763 btrfsic_get_block_type(state, l->block_ref_to), 2681 btrfsic_get_block_type(state, l->block_ref_to),
2764 (unsigned long long)l->block_ref_to->logical_bytenr, 2682 l->block_ref_to->logical_bytenr,
2765 l->block_ref_to->dev_state->name, 2683 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2766 (unsigned long long)l->block_ref_to->dev_bytenr,
2767 l->block_ref_to->mirror_num); 2684 l->block_ref_to->mirror_num);
2768} 2685}
2769 2686
@@ -2807,10 +2724,8 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2807 */ 2724 */
2808 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", 2725 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2809 btrfsic_get_block_type(state, block), 2726 btrfsic_get_block_type(state, block),
2810 (unsigned long long)block->logical_bytenr, 2727 block->logical_bytenr, block->dev_state->name,
2811 block->dev_state->name, 2728 block->dev_bytenr, block->mirror_num);
2812 (unsigned long long)block->dev_bytenr,
2813 block->mirror_num);
2814 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2729 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2815 printk("[...]\n"); 2730 printk("[...]\n");
2816 return; 2731 return;
@@ -2943,10 +2858,8 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
2943 "New %s%c-block @%llu (%s/%llu/%d)\n", 2858 "New %s%c-block @%llu (%s/%llu/%d)\n",
2944 additional_string, 2859 additional_string,
2945 btrfsic_get_block_type(state, block), 2860 btrfsic_get_block_type(state, block),
2946 (unsigned long long)block->logical_bytenr, 2861 block->logical_bytenr, dev_state->name,
2947 dev_state->name, 2862 block->dev_bytenr, mirror_num);
2948 (unsigned long long)block->dev_bytenr,
2949 mirror_num);
2950 list_add(&block->all_blocks_node, &state->all_blocks_list); 2863 list_add(&block->all_blocks_node, &state->all_blocks_list);
2951 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2864 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2952 if (NULL != was_created) 2865 if (NULL != was_created)
@@ -2980,7 +2893,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2980 printk(KERN_INFO "btrfsic:" 2893 printk(KERN_INFO "btrfsic:"
2981 " btrfsic_map_block(logical @%llu," 2894 " btrfsic_map_block(logical @%llu,"
2982 " mirror %d) failed!\n", 2895 " mirror %d) failed!\n",
2983 (unsigned long long)bytenr, mirror_num); 2896 bytenr, mirror_num);
2984 continue; 2897 continue;
2985 } 2898 }
2986 2899
@@ -2997,8 +2910,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2997 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," 2910 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2998 " buffer->log_bytenr=%llu, submit_bio(bdev=%s," 2911 " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2999 " phys_bytenr=%llu)!\n", 2912 " phys_bytenr=%llu)!\n",
3000 (unsigned long long)bytenr, dev_state->name, 2913 bytenr, dev_state->name, dev_bytenr);
3001 (unsigned long long)dev_bytenr);
3002 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2914 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
3003 ret = btrfsic_map_block(state, bytenr, 2915 ret = btrfsic_map_block(state, bytenr,
3004 state->metablock_size, 2916 state->metablock_size,
@@ -3008,10 +2920,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
3008 2920
3009 printk(KERN_INFO "Read logical bytenr @%llu maps to" 2921 printk(KERN_INFO "Read logical bytenr @%llu maps to"
3010 " (%s/%llu/%d)\n", 2922 " (%s/%llu/%d)\n",
3011 (unsigned long long)bytenr, 2923 bytenr, block_ctx.dev->name,
3012 block_ctx.dev->name, 2924 block_ctx.dev_bytenr, mirror_num);
3013 (unsigned long long)block_ctx.dev_bytenr,
3014 mirror_num);
3015 } 2925 }
3016 WARN_ON(1); 2926 WARN_ON(1);
3017 } 2927 }
@@ -3048,12 +2958,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
3048 if (dev_state->state->print_mask & 2958 if (dev_state->state->print_mask &
3049 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 2959 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3050 printk(KERN_INFO 2960 printk(KERN_INFO
3051 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," 2961 "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
3052 " size=%lu, data=%p, bdev=%p)\n", 2962 " size=%zu, data=%p, bdev=%p)\n",
3053 rw, (unsigned long)bh->b_blocknr, 2963 rw, (unsigned long long)bh->b_blocknr,
3054 (unsigned long long)dev_bytenr, 2964 dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
3055 (unsigned long)bh->b_size, bh->b_data,
3056 bh->b_bdev);
3057 btrfsic_process_written_block(dev_state, dev_bytenr, 2965 btrfsic_process_written_block(dev_state, dev_bytenr,
3058 &bh->b_data, 1, NULL, 2966 &bh->b_data, 1, NULL,
3059 NULL, bh, rw); 2967 NULL, bh, rw);
@@ -3118,9 +3026,9 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
3118 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3026 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3119 printk(KERN_INFO 3027 printk(KERN_INFO
3120 "submit_bio(rw=0x%x, bi_vcnt=%u," 3028 "submit_bio(rw=0x%x, bi_vcnt=%u,"
3121 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", 3029 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
3122 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, 3030 rw, bio->bi_vcnt,
3123 (unsigned long long)dev_bytenr, 3031 (unsigned long long)bio->bi_sector, dev_bytenr,
3124 bio->bi_bdev); 3032 bio->bi_bdev);
3125 3033
3126 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 3034 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
@@ -3213,19 +3121,19 @@ int btrfsic_mount(struct btrfs_root *root,
3213 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { 3121 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3214 printk(KERN_INFO 3122 printk(KERN_INFO
3215 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3123 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3216 root->nodesize, (unsigned long)PAGE_CACHE_SIZE); 3124 root->nodesize, PAGE_CACHE_SIZE);
3217 return -1; 3125 return -1;
3218 } 3126 }
3219 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3127 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3220 printk(KERN_INFO 3128 printk(KERN_INFO
3221 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3129 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3222 root->leafsize, (unsigned long)PAGE_CACHE_SIZE); 3130 root->leafsize, PAGE_CACHE_SIZE);
3223 return -1; 3131 return -1;
3224 } 3132 }
3225 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3133 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3226 printk(KERN_INFO 3134 printk(KERN_INFO
3227 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3135 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3228 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); 3136 root->sectorsize, PAGE_CACHE_SIZE);
3229 return -1; 3137 return -1;
3230 } 3138 }
3231 state = kzalloc(sizeof(*state), GFP_NOFS); 3139 state = kzalloc(sizeof(*state), GFP_NOFS);
@@ -3369,10 +3277,8 @@ void btrfsic_unmount(struct btrfs_root *root,
3369 " @%llu (%s/%llu/%d) on umount which is" 3277 " @%llu (%s/%llu/%d) on umount which is"
3370 " not yet iodone!\n", 3278 " not yet iodone!\n",
3371 btrfsic_get_block_type(state, b_all), 3279 btrfsic_get_block_type(state, b_all),
3372 (unsigned long long)b_all->logical_bytenr, 3280 b_all->logical_bytenr, b_all->dev_state->name,
3373 b_all->dev_state->name, 3281 b_all->dev_bytenr, b_all->mirror_num);
3374 (unsigned long long)b_all->dev_bytenr,
3375 b_all->mirror_num);
3376 } 3282 }
3377 3283
3378 mutex_unlock(&btrfsic_mutex); 3284 mutex_unlock(&btrfsic_mutex);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b189bd1e7a3e..6aad98cb343f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -132,9 +132,8 @@ static int check_compressed_csum(struct inode *inode,
132 printk(KERN_INFO "btrfs csum failed ino %llu " 132 printk(KERN_INFO "btrfs csum failed ino %llu "
133 "extent %llu csum %u " 133 "extent %llu csum %u "
134 "wanted %u mirror %d\n", 134 "wanted %u mirror %d\n",
135 (unsigned long long)btrfs_ino(inode), 135 btrfs_ino(inode), disk_start, csum, *cb_sum,
136 (unsigned long long)disk_start, 136 cb->mirror_num);
137 csum, *cb_sum, cb->mirror_num);
138 ret = -EIO; 137 ret = -EIO;
139 goto fail; 138 goto fail;
140 } 139 }
@@ -639,7 +638,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
639 faili = nr_pages - 1; 638 faili = nr_pages - 1;
640 cb->nr_pages = nr_pages; 639 cb->nr_pages = nr_pages;
641 640
642 add_ra_bio_pages(inode, em_start + em_len, cb); 641 /* In the parent-locked case, we only locked the range we are
642 * interested in. In all other cases, we can opportunistically
643 * cache decompressed data that goes beyond the requested range. */
644 if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
645 add_ra_bio_pages(inode, em_start + em_len, cb);
643 646
644 /* include any pages we added in add_ra-bio_pages */ 647 /* include any pages we added in add_ra-bio_pages */
645 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; 648 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ed504607d8ec..64346721173f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -274,8 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
274 else 274 else
275 btrfs_set_header_owner(cow, new_root_objectid); 275 btrfs_set_header_owner(cow, new_root_objectid);
276 276
277 write_extent_buffer(cow, root->fs_info->fsid, 277 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
278 (unsigned long)btrfs_header_fsid(cow),
279 BTRFS_FSID_SIZE); 278 BTRFS_FSID_SIZE);
280 279
281 WARN_ON(btrfs_header_generation(buf) > trans->transid); 280 WARN_ON(btrfs_header_generation(buf) > trans->transid);
@@ -484,8 +483,27 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
484 struct rb_node **new; 483 struct rb_node **new;
485 struct rb_node *parent = NULL; 484 struct rb_node *parent = NULL;
486 struct tree_mod_elem *cur; 485 struct tree_mod_elem *cur;
486 int ret = 0;
487
488 BUG_ON(!tm);
489
490 tree_mod_log_write_lock(fs_info);
491 if (list_empty(&fs_info->tree_mod_seq_list)) {
492 tree_mod_log_write_unlock(fs_info);
493 /*
494 * Ok we no longer care about logging modifications, free up tm
495 * and return 0. Any callers shouldn't be using tm after
496 * calling tree_mod_log_insert, but if they do we can just
497 * change this to return a special error code to let the callers
498 * do their own thing.
499 */
500 kfree(tm);
501 return 0;
502 }
487 503
488 BUG_ON(!tm || !tm->seq); 504 spin_lock(&fs_info->tree_mod_seq_lock);
505 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
506 spin_unlock(&fs_info->tree_mod_seq_lock);
489 507
490 tm_root = &fs_info->tree_mod_log; 508 tm_root = &fs_info->tree_mod_log;
491 new = &tm_root->rb_node; 509 new = &tm_root->rb_node;
@@ -501,14 +519,17 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
501 else if (cur->seq > tm->seq) 519 else if (cur->seq > tm->seq)
502 new = &((*new)->rb_right); 520 new = &((*new)->rb_right);
503 else { 521 else {
522 ret = -EEXIST;
504 kfree(tm); 523 kfree(tm);
505 return -EEXIST; 524 goto out;
506 } 525 }
507 } 526 }
508 527
509 rb_link_node(&tm->node, parent, new); 528 rb_link_node(&tm->node, parent, new);
510 rb_insert_color(&tm->node, tm_root); 529 rb_insert_color(&tm->node, tm_root);
511 return 0; 530out:
531 tree_mod_log_write_unlock(fs_info);
532 return ret;
512} 533}
513 534
514/* 535/*
@@ -524,57 +545,19 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
524 return 1; 545 return 1;
525 if (eb && btrfs_header_level(eb) == 0) 546 if (eb && btrfs_header_level(eb) == 0)
526 return 1; 547 return 1;
527
528 tree_mod_log_write_lock(fs_info);
529 if (list_empty(&fs_info->tree_mod_seq_list)) {
530 /*
531 * someone emptied the list while we were waiting for the lock.
532 * we must not add to the list when no blocker exists.
533 */
534 tree_mod_log_write_unlock(fs_info);
535 return 1;
536 }
537
538 return 0; 548 return 0;
539} 549}
540 550
541/*
542 * This allocates memory and gets a tree modification sequence number.
543 *
544 * Returns <0 on error.
545 * Returns >0 (the added sequence number) on success.
546 */
547static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
548 struct tree_mod_elem **tm_ret)
549{
550 struct tree_mod_elem *tm;
551
552 /*
553 * once we switch from spin locks to something different, we should
554 * honor the flags parameter here.
555 */
556 tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
557 if (!tm)
558 return -ENOMEM;
559
560 spin_lock(&fs_info->tree_mod_seq_lock);
561 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
562 spin_unlock(&fs_info->tree_mod_seq_lock);
563
564 return tm->seq;
565}
566
567static inline int 551static inline int
568__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, 552__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
569 struct extent_buffer *eb, int slot, 553 struct extent_buffer *eb, int slot,
570 enum mod_log_op op, gfp_t flags) 554 enum mod_log_op op, gfp_t flags)
571{ 555{
572 int ret;
573 struct tree_mod_elem *tm; 556 struct tree_mod_elem *tm;
574 557
575 ret = tree_mod_alloc(fs_info, flags, &tm); 558 tm = kzalloc(sizeof(*tm), flags);
576 if (ret < 0) 559 if (!tm)
577 return ret; 560 return -ENOMEM;
578 561
579 tm->index = eb->start >> PAGE_CACHE_SHIFT; 562 tm->index = eb->start >> PAGE_CACHE_SHIFT;
580 if (op != MOD_LOG_KEY_ADD) { 563 if (op != MOD_LOG_KEY_ADD) {
@@ -589,34 +572,14 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
589} 572}
590 573
591static noinline int 574static noinline int
592tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, 575tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
593 struct extent_buffer *eb, int slot, 576 struct extent_buffer *eb, int slot,
594 enum mod_log_op op, gfp_t flags) 577 enum mod_log_op op, gfp_t flags)
595{ 578{
596 int ret;
597
598 if (tree_mod_dont_log(fs_info, eb)) 579 if (tree_mod_dont_log(fs_info, eb))
599 return 0; 580 return 0;
600 581
601 ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); 582 return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
602
603 tree_mod_log_write_unlock(fs_info);
604 return ret;
605}
606
607static noinline int
608tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
609 int slot, enum mod_log_op op)
610{
611 return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
612}
613
614static noinline int
615tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
616 struct extent_buffer *eb, int slot,
617 enum mod_log_op op)
618{
619 return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
620} 583}
621 584
622static noinline int 585static noinline int
@@ -637,14 +600,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
637 * buffer, i.e. dst_slot < src_slot. 600 * buffer, i.e. dst_slot < src_slot.
638 */ 601 */
639 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 602 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
640 ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot, 603 ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
641 MOD_LOG_KEY_REMOVE_WHILE_MOVING); 604 MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
642 BUG_ON(ret < 0); 605 BUG_ON(ret < 0);
643 } 606 }
644 607
645 ret = tree_mod_alloc(fs_info, flags, &tm); 608 tm = kzalloc(sizeof(*tm), flags);
646 if (ret < 0) 609 if (!tm)
647 goto out; 610 return -ENOMEM;
648 611
649 tm->index = eb->start >> PAGE_CACHE_SHIFT; 612 tm->index = eb->start >> PAGE_CACHE_SHIFT;
650 tm->slot = src_slot; 613 tm->slot = src_slot;
@@ -652,10 +615,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
652 tm->move.nr_items = nr_items; 615 tm->move.nr_items = nr_items;
653 tm->op = MOD_LOG_MOVE_KEYS; 616 tm->op = MOD_LOG_MOVE_KEYS;
654 617
655 ret = __tree_mod_log_insert(fs_info, tm); 618 return __tree_mod_log_insert(fs_info, tm);
656out:
657 tree_mod_log_write_unlock(fs_info);
658 return ret;
659} 619}
660 620
661static inline void 621static inline void
@@ -670,8 +630,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
670 630
671 nritems = btrfs_header_nritems(eb); 631 nritems = btrfs_header_nritems(eb);
672 for (i = nritems - 1; i >= 0; i--) { 632 for (i = nritems - 1; i >= 0; i--) {
673 ret = tree_mod_log_insert_key_locked(fs_info, eb, i, 633 ret = __tree_mod_log_insert_key(fs_info, eb, i,
674 MOD_LOG_KEY_REMOVE_WHILE_FREEING); 634 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
675 BUG_ON(ret < 0); 635 BUG_ON(ret < 0);
676 } 636 }
677} 637}
@@ -683,7 +643,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
683 int log_removal) 643 int log_removal)
684{ 644{
685 struct tree_mod_elem *tm; 645 struct tree_mod_elem *tm;
686 int ret;
687 646
688 if (tree_mod_dont_log(fs_info, NULL)) 647 if (tree_mod_dont_log(fs_info, NULL))
689 return 0; 648 return 0;
@@ -691,9 +650,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
691 if (log_removal) 650 if (log_removal)
692 __tree_mod_log_free_eb(fs_info, old_root); 651 __tree_mod_log_free_eb(fs_info, old_root);
693 652
694 ret = tree_mod_alloc(fs_info, flags, &tm); 653 tm = kzalloc(sizeof(*tm), flags);
695 if (ret < 0) 654 if (!tm)
696 goto out; 655 return -ENOMEM;
697 656
698 tm->index = new_root->start >> PAGE_CACHE_SHIFT; 657 tm->index = new_root->start >> PAGE_CACHE_SHIFT;
699 tm->old_root.logical = old_root->start; 658 tm->old_root.logical = old_root->start;
@@ -701,10 +660,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
701 tm->generation = btrfs_header_generation(old_root); 660 tm->generation = btrfs_header_generation(old_root);
702 tm->op = MOD_LOG_ROOT_REPLACE; 661 tm->op = MOD_LOG_ROOT_REPLACE;
703 662
704 ret = __tree_mod_log_insert(fs_info, tm); 663 return __tree_mod_log_insert(fs_info, tm);
705out:
706 tree_mod_log_write_unlock(fs_info);
707 return ret;
708} 664}
709 665
710static struct tree_mod_elem * 666static struct tree_mod_elem *
@@ -784,23 +740,20 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
784 if (tree_mod_dont_log(fs_info, NULL)) 740 if (tree_mod_dont_log(fs_info, NULL))
785 return; 741 return;
786 742
787 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) { 743 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
788 tree_mod_log_write_unlock(fs_info);
789 return; 744 return;
790 }
791 745
792 for (i = 0; i < nr_items; i++) { 746 for (i = 0; i < nr_items; i++) {
793 ret = tree_mod_log_insert_key_locked(fs_info, src, 747 ret = __tree_mod_log_insert_key(fs_info, src,
794 i + src_offset, 748 i + src_offset,
795 MOD_LOG_KEY_REMOVE); 749 MOD_LOG_KEY_REMOVE, GFP_NOFS);
796 BUG_ON(ret < 0); 750 BUG_ON(ret < 0);
797 ret = tree_mod_log_insert_key_locked(fs_info, dst, 751 ret = __tree_mod_log_insert_key(fs_info, dst,
798 i + dst_offset, 752 i + dst_offset,
799 MOD_LOG_KEY_ADD); 753 MOD_LOG_KEY_ADD,
754 GFP_NOFS);
800 BUG_ON(ret < 0); 755 BUG_ON(ret < 0);
801 } 756 }
802
803 tree_mod_log_write_unlock(fs_info);
804} 757}
805 758
806static inline void 759static inline void
@@ -819,9 +772,9 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
819{ 772{
820 int ret; 773 int ret;
821 774
822 ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, 775 ret = __tree_mod_log_insert_key(fs_info, eb, slot,
823 MOD_LOG_KEY_REPLACE, 776 MOD_LOG_KEY_REPLACE,
824 atomic ? GFP_ATOMIC : GFP_NOFS); 777 atomic ? GFP_ATOMIC : GFP_NOFS);
825 BUG_ON(ret < 0); 778 BUG_ON(ret < 0);
826} 779}
827 780
@@ -830,10 +783,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
830{ 783{
831 if (tree_mod_dont_log(fs_info, eb)) 784 if (tree_mod_dont_log(fs_info, eb))
832 return; 785 return;
833
834 __tree_mod_log_free_eb(fs_info, eb); 786 __tree_mod_log_free_eb(fs_info, eb);
835
836 tree_mod_log_write_unlock(fs_info);
837} 787}
838 788
839static noinline void 789static noinline void
@@ -1046,8 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1046 else 996 else
1047 btrfs_set_header_owner(cow, root->root_key.objectid); 997 btrfs_set_header_owner(cow, root->root_key.objectid);
1048 998
1049 write_extent_buffer(cow, root->fs_info->fsid, 999 write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
1050 (unsigned long)btrfs_header_fsid(cow),
1051 BTRFS_FSID_SIZE); 1000 BTRFS_FSID_SIZE);
1052 1001
1053 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); 1002 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
@@ -1083,7 +1032,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1083 1032
1084 WARN_ON(trans->transid != btrfs_header_generation(parent)); 1033 WARN_ON(trans->transid != btrfs_header_generation(parent));
1085 tree_mod_log_insert_key(root->fs_info, parent, parent_slot, 1034 tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
1086 MOD_LOG_KEY_REPLACE); 1035 MOD_LOG_KEY_REPLACE, GFP_NOFS);
1087 btrfs_set_node_blockptr(parent, parent_slot, 1036 btrfs_set_node_blockptr(parent, parent_slot,
1088 cow->start); 1037 cow->start);
1089 btrfs_set_node_ptr_generation(parent, parent_slot, 1038 btrfs_set_node_ptr_generation(parent, parent_slot,
@@ -1116,7 +1065,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1116 int looped = 0; 1065 int looped = 0;
1117 1066
1118 if (!time_seq) 1067 if (!time_seq)
1119 return 0; 1068 return NULL;
1120 1069
1121 /* 1070 /*
1122 * the very last operation that's logged for a root is the replacement 1071 * the very last operation that's logged for a root is the replacement
@@ -1127,7 +1076,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1127 tm = tree_mod_log_search_oldest(fs_info, root_logical, 1076 tm = tree_mod_log_search_oldest(fs_info, root_logical,
1128 time_seq); 1077 time_seq);
1129 if (!looped && !tm) 1078 if (!looped && !tm)
1130 return 0; 1079 return NULL;
1131 /* 1080 /*
1132 * if there are no tree operation for the oldest root, we simply 1081 * if there are no tree operation for the oldest root, we simply
1133 * return it. this should only happen if that (old) root is at 1082 * return it. this should only happen if that (old) root is at
@@ -1240,8 +1189,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1240 * is freed (its refcount is decremented). 1189 * is freed (its refcount is decremented).
1241 */ 1190 */
1242static struct extent_buffer * 1191static struct extent_buffer *
1243tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, 1192tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1244 u64 time_seq) 1193 struct extent_buffer *eb, u64 time_seq)
1245{ 1194{
1246 struct extent_buffer *eb_rewin; 1195 struct extent_buffer *eb_rewin;
1247 struct tree_mod_elem *tm; 1196 struct tree_mod_elem *tm;
@@ -1256,11 +1205,18 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1256 if (!tm) 1205 if (!tm)
1257 return eb; 1206 return eb;
1258 1207
1208 btrfs_set_path_blocking(path);
1209 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1210
1259 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 1211 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1260 BUG_ON(tm->slot != 0); 1212 BUG_ON(tm->slot != 0);
1261 eb_rewin = alloc_dummy_extent_buffer(eb->start, 1213 eb_rewin = alloc_dummy_extent_buffer(eb->start,
1262 fs_info->tree_root->nodesize); 1214 fs_info->tree_root->nodesize);
1263 BUG_ON(!eb_rewin); 1215 if (!eb_rewin) {
1216 btrfs_tree_read_unlock_blocking(eb);
1217 free_extent_buffer(eb);
1218 return NULL;
1219 }
1264 btrfs_set_header_bytenr(eb_rewin, eb->start); 1220 btrfs_set_header_bytenr(eb_rewin, eb->start);
1265 btrfs_set_header_backref_rev(eb_rewin, 1221 btrfs_set_header_backref_rev(eb_rewin,
1266 btrfs_header_backref_rev(eb)); 1222 btrfs_header_backref_rev(eb));
@@ -1268,10 +1224,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1268 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); 1224 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
1269 } else { 1225 } else {
1270 eb_rewin = btrfs_clone_extent_buffer(eb); 1226 eb_rewin = btrfs_clone_extent_buffer(eb);
1271 BUG_ON(!eb_rewin); 1227 if (!eb_rewin) {
1228 btrfs_tree_read_unlock_blocking(eb);
1229 free_extent_buffer(eb);
1230 return NULL;
1231 }
1272 } 1232 }
1273 1233
1274 btrfs_tree_read_unlock(eb); 1234 btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
1235 btrfs_tree_read_unlock_blocking(eb);
1275 free_extent_buffer(eb); 1236 free_extent_buffer(eb);
1276 1237
1277 extent_buffer_get(eb_rewin); 1238 extent_buffer_get(eb_rewin);
@@ -1335,8 +1296,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1335 free_extent_buffer(eb_root); 1296 free_extent_buffer(eb_root);
1336 eb = alloc_dummy_extent_buffer(logical, root->nodesize); 1297 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1337 } else { 1298 } else {
1299 btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
1338 eb = btrfs_clone_extent_buffer(eb_root); 1300 eb = btrfs_clone_extent_buffer(eb_root);
1339 btrfs_tree_read_unlock(eb_root); 1301 btrfs_tree_read_unlock_blocking(eb_root);
1340 free_extent_buffer(eb_root); 1302 free_extent_buffer(eb_root);
1341 } 1303 }
1342 1304
@@ -1419,14 +1381,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
1419 1381
1420 if (trans->transaction != root->fs_info->running_transaction) 1382 if (trans->transaction != root->fs_info->running_transaction)
1421 WARN(1, KERN_CRIT "trans %llu running %llu\n", 1383 WARN(1, KERN_CRIT "trans %llu running %llu\n",
1422 (unsigned long long)trans->transid, 1384 trans->transid,
1423 (unsigned long long)
1424 root->fs_info->running_transaction->transid); 1385 root->fs_info->running_transaction->transid);
1425 1386
1426 if (trans->transid != root->fs_info->generation) 1387 if (trans->transid != root->fs_info->generation)
1427 WARN(1, KERN_CRIT "trans %llu running %llu\n", 1388 WARN(1, KERN_CRIT "trans %llu running %llu\n",
1428 (unsigned long long)trans->transid, 1389 trans->transid, root->fs_info->generation);
1429 (unsigned long long)root->fs_info->generation);
1430 1390
1431 if (!should_cow_block(trans, root, buf)) { 1391 if (!should_cow_block(trans, root, buf)) {
1432 *cow_ret = buf; 1392 *cow_ret = buf;
@@ -2466,6 +2426,40 @@ done:
2466 return ret; 2426 return ret;
2467} 2427}
2468 2428
2429static void key_search_validate(struct extent_buffer *b,
2430 struct btrfs_key *key,
2431 int level)
2432{
2433#ifdef CONFIG_BTRFS_ASSERT
2434 struct btrfs_disk_key disk_key;
2435
2436 btrfs_cpu_key_to_disk(&disk_key, key);
2437
2438 if (level == 0)
2439 ASSERT(!memcmp_extent_buffer(b, &disk_key,
2440 offsetof(struct btrfs_leaf, items[0].key),
2441 sizeof(disk_key)));
2442 else
2443 ASSERT(!memcmp_extent_buffer(b, &disk_key,
2444 offsetof(struct btrfs_node, ptrs[0].key),
2445 sizeof(disk_key)));
2446#endif
2447}
2448
2449static int key_search(struct extent_buffer *b, struct btrfs_key *key,
2450 int level, int *prev_cmp, int *slot)
2451{
2452 if (*prev_cmp != 0) {
2453 *prev_cmp = bin_search(b, key, level, slot);
2454 return *prev_cmp;
2455 }
2456
2457 key_search_validate(b, key, level);
2458 *slot = 0;
2459
2460 return 0;
2461}
2462
2469/* 2463/*
2470 * look for key in the tree. path is filled in with nodes along the way 2464 * look for key in the tree. path is filled in with nodes along the way
2471 * if key is found, we return zero and you can find the item in the leaf 2465 * if key is found, we return zero and you can find the item in the leaf
@@ -2494,6 +2488,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2494 int write_lock_level = 0; 2488 int write_lock_level = 0;
2495 u8 lowest_level = 0; 2489 u8 lowest_level = 0;
2496 int min_write_lock_level; 2490 int min_write_lock_level;
2491 int prev_cmp;
2497 2492
2498 lowest_level = p->lowest_level; 2493 lowest_level = p->lowest_level;
2499 WARN_ON(lowest_level && ins_len > 0); 2494 WARN_ON(lowest_level && ins_len > 0);
@@ -2524,6 +2519,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2524 min_write_lock_level = write_lock_level; 2519 min_write_lock_level = write_lock_level;
2525 2520
2526again: 2521again:
2522 prev_cmp = -1;
2527 /* 2523 /*
2528 * we try very hard to do read locks on the root 2524 * we try very hard to do read locks on the root
2529 */ 2525 */
@@ -2624,7 +2620,7 @@ cow_done:
2624 if (!cow) 2620 if (!cow)
2625 btrfs_unlock_up_safe(p, level + 1); 2621 btrfs_unlock_up_safe(p, level + 1);
2626 2622
2627 ret = bin_search(b, key, level, &slot); 2623 ret = key_search(b, key, level, &prev_cmp, &slot);
2628 2624
2629 if (level != 0) { 2625 if (level != 0) {
2630 int dec = 0; 2626 int dec = 0;
@@ -2759,6 +2755,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2759 int level; 2755 int level;
2760 int lowest_unlock = 1; 2756 int lowest_unlock = 1;
2761 u8 lowest_level = 0; 2757 u8 lowest_level = 0;
2758 int prev_cmp;
2762 2759
2763 lowest_level = p->lowest_level; 2760 lowest_level = p->lowest_level;
2764 WARN_ON(p->nodes[0] != NULL); 2761 WARN_ON(p->nodes[0] != NULL);
@@ -2769,6 +2766,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2769 } 2766 }
2770 2767
2771again: 2768again:
2769 prev_cmp = -1;
2772 b = get_old_root(root, time_seq); 2770 b = get_old_root(root, time_seq);
2773 level = btrfs_header_level(b); 2771 level = btrfs_header_level(b);
2774 p->locks[level] = BTRFS_READ_LOCK; 2772 p->locks[level] = BTRFS_READ_LOCK;
@@ -2786,7 +2784,7 @@ again:
2786 */ 2784 */
2787 btrfs_unlock_up_safe(p, level + 1); 2785 btrfs_unlock_up_safe(p, level + 1);
2788 2786
2789 ret = bin_search(b, key, level, &slot); 2787 ret = key_search(b, key, level, &prev_cmp, &slot);
2790 2788
2791 if (level != 0) { 2789 if (level != 0) {
2792 int dec = 0; 2790 int dec = 0;
@@ -2820,7 +2818,11 @@ again:
2820 btrfs_clear_path_blocking(p, b, 2818 btrfs_clear_path_blocking(p, b,
2821 BTRFS_READ_LOCK); 2819 BTRFS_READ_LOCK);
2822 } 2820 }
2823 b = tree_mod_log_rewind(root->fs_info, b, time_seq); 2821 b = tree_mod_log_rewind(root->fs_info, p, b, time_seq);
2822 if (!b) {
2823 ret = -ENOMEM;
2824 goto done;
2825 }
2824 p->locks[level] = BTRFS_READ_LOCK; 2826 p->locks[level] = BTRFS_READ_LOCK;
2825 p->nodes[level] = b; 2827 p->nodes[level] = b;
2826 } else { 2828 } else {
@@ -3143,13 +3145,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3143 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); 3145 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
3144 btrfs_set_header_owner(c, root->root_key.objectid); 3146 btrfs_set_header_owner(c, root->root_key.objectid);
3145 3147
3146 write_extent_buffer(c, root->fs_info->fsid, 3148 write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c),
3147 (unsigned long)btrfs_header_fsid(c),
3148 BTRFS_FSID_SIZE); 3149 BTRFS_FSID_SIZE);
3149 3150
3150 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 3151 write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
3151 (unsigned long)btrfs_header_chunk_tree_uuid(c), 3152 btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE);
3152 BTRFS_UUID_SIZE);
3153 3153
3154 btrfs_set_node_key(c, &lower_key, 0); 3154 btrfs_set_node_key(c, &lower_key, 0);
3155 btrfs_set_node_blockptr(c, 0, lower->start); 3155 btrfs_set_node_blockptr(c, 0, lower->start);
@@ -3208,7 +3208,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
3208 } 3208 }
3209 if (level) { 3209 if (level) {
3210 ret = tree_mod_log_insert_key(root->fs_info, lower, slot, 3210 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
3211 MOD_LOG_KEY_ADD); 3211 MOD_LOG_KEY_ADD, GFP_NOFS);
3212 BUG_ON(ret < 0); 3212 BUG_ON(ret < 0);
3213 } 3213 }
3214 btrfs_set_node_key(lower, key, slot); 3214 btrfs_set_node_key(lower, key, slot);
@@ -3284,10 +3284,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3284 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); 3284 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
3285 btrfs_set_header_owner(split, root->root_key.objectid); 3285 btrfs_set_header_owner(split, root->root_key.objectid);
3286 write_extent_buffer(split, root->fs_info->fsid, 3286 write_extent_buffer(split, root->fs_info->fsid,
3287 (unsigned long)btrfs_header_fsid(split), 3287 btrfs_header_fsid(split), BTRFS_FSID_SIZE);
3288 BTRFS_FSID_SIZE);
3289 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 3288 write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
3290 (unsigned long)btrfs_header_chunk_tree_uuid(split), 3289 btrfs_header_chunk_tree_uuid(split),
3291 BTRFS_UUID_SIZE); 3290 BTRFS_UUID_SIZE);
3292 3291
3293 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); 3292 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
@@ -4040,11 +4039,10 @@ again:
4040 btrfs_set_header_owner(right, root->root_key.objectid); 4039 btrfs_set_header_owner(right, root->root_key.objectid);
4041 btrfs_set_header_level(right, 0); 4040 btrfs_set_header_level(right, 0);
4042 write_extent_buffer(right, root->fs_info->fsid, 4041 write_extent_buffer(right, root->fs_info->fsid,
4043 (unsigned long)btrfs_header_fsid(right), 4042 btrfs_header_fsid(right), BTRFS_FSID_SIZE);
4044 BTRFS_FSID_SIZE);
4045 4043
4046 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 4044 write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
4047 (unsigned long)btrfs_header_chunk_tree_uuid(right), 4045 btrfs_header_chunk_tree_uuid(right),
4048 BTRFS_UUID_SIZE); 4046 BTRFS_UUID_SIZE);
4049 4047
4050 if (split == 0) { 4048 if (split == 0) {
@@ -4642,7 +4640,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4642 (nritems - slot - 1)); 4640 (nritems - slot - 1));
4643 } else if (level) { 4641 } else if (level) {
4644 ret = tree_mod_log_insert_key(root->fs_info, parent, slot, 4642 ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
4645 MOD_LOG_KEY_REMOVE); 4643 MOD_LOG_KEY_REMOVE, GFP_NOFS);
4646 BUG_ON(ret < 0); 4644 BUG_ON(ret < 0);
4647 } 4645 }
4648 4646
@@ -4814,7 +4812,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4814 * This may release the path, and so you may lose any locks held at the 4812 * This may release the path, and so you may lose any locks held at the
4815 * time you call it. 4813 * time you call it.
4816 */ 4814 */
4817int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 4815static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4818{ 4816{
4819 struct btrfs_key key; 4817 struct btrfs_key key;
4820 struct btrfs_disk_key found_key; 4818 struct btrfs_disk_key found_key;
@@ -5329,19 +5327,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5329 goto out; 5327 goto out;
5330 advance_right = ADVANCE; 5328 advance_right = ADVANCE;
5331 } else { 5329 } else {
5330 enum btrfs_compare_tree_result cmp;
5331
5332 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); 5332 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
5333 ret = tree_compare_item(left_root, left_path, 5333 ret = tree_compare_item(left_root, left_path,
5334 right_path, tmp_buf); 5334 right_path, tmp_buf);
5335 if (ret) { 5335 if (ret)
5336 WARN_ON(!extent_buffer_uptodate(left_path->nodes[0])); 5336 cmp = BTRFS_COMPARE_TREE_CHANGED;
5337 ret = changed_cb(left_root, right_root, 5337 else
5338 left_path, right_path, 5338 cmp = BTRFS_COMPARE_TREE_SAME;
5339 &left_key, 5339 ret = changed_cb(left_root, right_root,
5340 BTRFS_COMPARE_TREE_CHANGED, 5340 left_path, right_path,
5341 ctx); 5341 &left_key, cmp, ctx);
5342 if (ret < 0) 5342 if (ret < 0)
5343 goto out; 5343 goto out;
5344 }
5345 advance_left = ADVANCE; 5344 advance_left = ADVANCE;
5346 advance_right = ADVANCE; 5345 advance_right = ADVANCE;
5347 } 5346 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e795bf135e80..3c1da6f98a4d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -23,6 +23,7 @@
23#include <linux/highmem.h> 23#include <linux/highmem.h>
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/rwsem.h> 25#include <linux/rwsem.h>
26#include <linux/semaphore.h>
26#include <linux/completion.h> 27#include <linux/completion.h>
27#include <linux/backing-dev.h> 28#include <linux/backing-dev.h>
28#include <linux/wait.h> 29#include <linux/wait.h>
@@ -91,6 +92,9 @@ struct btrfs_ordered_sum;
91/* holds quota configuration and tracking */ 92/* holds quota configuration and tracking */
92#define BTRFS_QUOTA_TREE_OBJECTID 8ULL 93#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
93 94
95/* for storing items that use the BTRFS_UUID_KEY* types */
96#define BTRFS_UUID_TREE_OBJECTID 9ULL
97
94/* for storing balance parameters in the root tree */ 98/* for storing balance parameters in the root tree */
95#define BTRFS_BALANCE_OBJECTID -4ULL 99#define BTRFS_BALANCE_OBJECTID -4ULL
96 100
@@ -142,7 +146,7 @@ struct btrfs_ordered_sum;
142 146
143#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 147#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
144 148
145#define BTRFS_DEV_REPLACE_DEVID 0 149#define BTRFS_DEV_REPLACE_DEVID 0ULL
146 150
147/* 151/*
148 * the max metadata block size. This limit is somewhat artificial, 152 * the max metadata block size. This limit is somewhat artificial,
@@ -478,9 +482,10 @@ struct btrfs_super_block {
478 char label[BTRFS_LABEL_SIZE]; 482 char label[BTRFS_LABEL_SIZE];
479 483
480 __le64 cache_generation; 484 __le64 cache_generation;
485 __le64 uuid_tree_generation;
481 486
482 /* future expansion */ 487 /* future expansion */
483 __le64 reserved[31]; 488 __le64 reserved[30];
484 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; 489 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
485 struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; 490 struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
486} __attribute__ ((__packed__)); 491} __attribute__ ((__packed__));
@@ -1188,6 +1193,7 @@ enum btrfs_caching_type {
1188 BTRFS_CACHE_STARTED = 1, 1193 BTRFS_CACHE_STARTED = 1,
1189 BTRFS_CACHE_FAST = 2, 1194 BTRFS_CACHE_FAST = 2,
1190 BTRFS_CACHE_FINISHED = 3, 1195 BTRFS_CACHE_FINISHED = 3,
1196 BTRFS_CACHE_ERROR = 4,
1191}; 1197};
1192 1198
1193enum btrfs_disk_cache_state { 1199enum btrfs_disk_cache_state {
@@ -1302,6 +1308,7 @@ struct btrfs_fs_info {
1302 struct btrfs_root *fs_root; 1308 struct btrfs_root *fs_root;
1303 struct btrfs_root *csum_root; 1309 struct btrfs_root *csum_root;
1304 struct btrfs_root *quota_root; 1310 struct btrfs_root *quota_root;
1311 struct btrfs_root *uuid_root;
1305 1312
1306 /* the log root tree is a directory of all the other log roots */ 1313 /* the log root tree is a directory of all the other log roots */
1307 struct btrfs_root *log_root_tree; 1314 struct btrfs_root *log_root_tree;
@@ -1350,6 +1357,7 @@ struct btrfs_fs_info {
1350 u64 last_trans_log_full_commit; 1357 u64 last_trans_log_full_commit;
1351 unsigned long mount_opt; 1358 unsigned long mount_opt;
1352 unsigned long compress_type:4; 1359 unsigned long compress_type:4;
1360 int commit_interval;
1353 /* 1361 /*
1354 * It is a suggestive number, the read side is safe even it gets a 1362 * It is a suggestive number, the read side is safe even it gets a
1355 * wrong number because we will write out the data into a regular 1363 * wrong number because we will write out the data into a regular
@@ -1411,6 +1419,13 @@ struct btrfs_fs_info {
1411 * before jumping into the main commit. 1419 * before jumping into the main commit.
1412 */ 1420 */
1413 struct mutex ordered_operations_mutex; 1421 struct mutex ordered_operations_mutex;
1422
1423 /*
1424 * Same as ordered_operations_mutex except this is for ordered extents
1425 * and not the operations.
1426 */
1427 struct mutex ordered_extent_flush_mutex;
1428
1414 struct rw_semaphore extent_commit_sem; 1429 struct rw_semaphore extent_commit_sem;
1415 1430
1416 struct rw_semaphore cleanup_work_sem; 1431 struct rw_semaphore cleanup_work_sem;
@@ -1641,6 +1656,9 @@ struct btrfs_fs_info {
1641 struct btrfs_dev_replace dev_replace; 1656 struct btrfs_dev_replace dev_replace;
1642 1657
1643 atomic_t mutually_exclusive_operation_running; 1658 atomic_t mutually_exclusive_operation_running;
1659
1660 struct semaphore uuid_tree_rescan_sem;
1661 unsigned int update_uuid_tree_gen:1;
1644}; 1662};
1645 1663
1646/* 1664/*
@@ -1934,6 +1952,19 @@ struct btrfs_ioctl_defrag_range_args {
1934#define BTRFS_DEV_REPLACE_KEY 250 1952#define BTRFS_DEV_REPLACE_KEY 250
1935 1953
1936/* 1954/*
1955 * Stores items that allow to quickly map UUIDs to something else.
1956 * These items are part of the filesystem UUID tree.
1957 * The key is built like this:
1958 * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
1959 */
1960#if BTRFS_UUID_SIZE != 16
1961#error "UUID items require BTRFS_UUID_SIZE == 16!"
1962#endif
1963#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
1964#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
1965 * received subvols */
1966
1967/*
1937 * string items are for debugging. They just store a short string of 1968 * string items are for debugging. They just store a short string of
1938 * data in the FS 1969 * data in the FS
1939 */ 1970 */
@@ -1967,6 +1998,9 @@ struct btrfs_ioctl_defrag_range_args {
1967#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20) 1998#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
1968#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) 1999#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
1969#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) 2000#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
2001#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
2002
2003#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
1970 2004
1971#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 2005#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1972#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 2006#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2130,14 +2164,14 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
2130BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, 2164BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
2131 generation, 64); 2165 generation, 64);
2132 2166
2133static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) 2167static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d)
2134{ 2168{
2135 return (char *)d + offsetof(struct btrfs_dev_item, uuid); 2169 return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid);
2136} 2170}
2137 2171
2138static inline char *btrfs_device_fsid(struct btrfs_dev_item *d) 2172static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d)
2139{ 2173{
2140 return (char *)d + offsetof(struct btrfs_dev_item, fsid); 2174 return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid);
2141} 2175}
2142 2176
2143BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); 2177BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
@@ -2240,6 +2274,23 @@ BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
2240BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); 2274BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
2241BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); 2275BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
2242BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); 2276BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
2277BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
2278 generation, 64);
2279BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
2280 sequence, 64);
2281BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
2282 transid, 64);
2283BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
2284BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
2285 nbytes, 64);
2286BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
2287 block_group, 64);
2288BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
2289BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
2290BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
2291BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
2292BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
2293BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
2243 2294
2244static inline struct btrfs_timespec * 2295static inline struct btrfs_timespec *
2245btrfs_inode_atime(struct btrfs_inode_item *inode_item) 2296btrfs_inode_atime(struct btrfs_inode_item *inode_item)
@@ -2267,6 +2318,8 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
2267 2318
2268BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); 2319BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
2269BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); 2320BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
2321BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
2322BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
2270 2323
2271/* struct btrfs_dev_extent */ 2324/* struct btrfs_dev_extent */
2272BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, 2325BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
@@ -2277,10 +2330,10 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
2277 chunk_offset, 64); 2330 chunk_offset, 64);
2278BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); 2331BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
2279 2332
2280static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev) 2333static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
2281{ 2334{
2282 unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid); 2335 unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
2283 return (u8 *)((unsigned long)dev + ptr); 2336 return (unsigned long)dev + ptr;
2284} 2337}
2285 2338
2286BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); 2339BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
@@ -2348,6 +2401,10 @@ BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
2348/* struct btrfs_node */ 2401/* struct btrfs_node */
2349BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); 2402BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
2350BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); 2403BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
2404BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr,
2405 blockptr, 64);
2406BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr,
2407 generation, 64);
2351 2408
2352static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) 2409static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
2353{ 2410{
@@ -2404,6 +2461,8 @@ static inline void btrfs_set_node_key(struct extent_buffer *eb,
2404/* struct btrfs_item */ 2461/* struct btrfs_item */
2405BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); 2462BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
2406BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); 2463BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
2464BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32);
2465BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32);
2407 2466
2408static inline unsigned long btrfs_item_nr_offset(int nr) 2467static inline unsigned long btrfs_item_nr_offset(int nr)
2409{ 2468{
@@ -2466,6 +2525,13 @@ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
2466BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); 2525BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
2467BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); 2526BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
2468BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); 2527BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
2528BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8);
2529BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item,
2530 data_len, 16);
2531BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item,
2532 name_len, 16);
2533BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item,
2534 transid, 64);
2469 2535
2470static inline void btrfs_dir_item_key(struct extent_buffer *eb, 2536static inline void btrfs_dir_item_key(struct extent_buffer *eb,
2471 struct btrfs_dir_item *item, 2537 struct btrfs_dir_item *item,
@@ -2568,6 +2634,12 @@ BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
2568BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); 2634BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
2569BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); 2635BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
2570BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); 2636BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
2637BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header,
2638 generation, 64);
2639BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64);
2640BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header,
2641 nritems, 32);
2642BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
2571 2643
2572static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag) 2644static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
2573{ 2645{
@@ -2603,16 +2675,14 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
2603 btrfs_set_header_flags(eb, flags); 2675 btrfs_set_header_flags(eb, flags);
2604} 2676}
2605 2677
2606static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) 2678static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb)
2607{ 2679{
2608 unsigned long ptr = offsetof(struct btrfs_header, fsid); 2680 return offsetof(struct btrfs_header, fsid);
2609 return (u8 *)ptr;
2610} 2681}
2611 2682
2612static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb) 2683static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
2613{ 2684{
2614 unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid); 2685 return offsetof(struct btrfs_header, chunk_tree_uuid);
2615 return (u8 *)ptr;
2616} 2686}
2617 2687
2618static inline int btrfs_is_leaf(struct extent_buffer *eb) 2688static inline int btrfs_is_leaf(struct extent_buffer *eb)
@@ -2830,6 +2900,9 @@ BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
2830 csum_type, 16); 2900 csum_type, 16);
2831BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, 2901BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
2832 cache_generation, 64); 2902 cache_generation, 64);
2903BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
2904BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
2905 uuid_tree_generation, 64);
2833 2906
2834static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 2907static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
2835{ 2908{
@@ -2847,6 +2920,14 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
2847 2920
2848/* struct btrfs_file_extent_item */ 2921/* struct btrfs_file_extent_item */
2849BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); 2922BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
2923BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr,
2924 struct btrfs_file_extent_item, disk_bytenr, 64);
2925BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset,
2926 struct btrfs_file_extent_item, offset, 64);
2927BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
2928 struct btrfs_file_extent_item, generation, 64);
2929BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
2930 struct btrfs_file_extent_item, num_bytes, 64);
2850 2931
2851static inline unsigned long 2932static inline unsigned long
2852btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) 2933btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
@@ -3107,11 +3188,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
3107 struct btrfs_root *root, 3188 struct btrfs_root *root,
3108 u64 root_objectid, u64 owner, u64 offset, 3189 u64 root_objectid, u64 owner, u64 offset,
3109 struct btrfs_key *ins); 3190 struct btrfs_key *ins);
3110int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3191int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
3111 struct btrfs_root *root, 3192 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
3112 u64 num_bytes, u64 min_alloc_size, 3193 struct btrfs_key *ins, int is_data);
3113 u64 empty_size, u64 hint_byte,
3114 struct btrfs_key *ins, int is_data);
3115int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3194int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3116 struct extent_buffer *buf, int full_backref, int for_cow); 3195 struct extent_buffer *buf, int full_backref, int for_cow);
3117int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3196int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3175,7 +3254,7 @@ void btrfs_orphan_release_metadata(struct inode *inode);
3175int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, 3254int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
3176 struct btrfs_block_rsv *rsv, 3255 struct btrfs_block_rsv *rsv,
3177 int nitems, 3256 int nitems,
3178 u64 *qgroup_reserved); 3257 u64 *qgroup_reserved, bool use_global_rsv);
3179void btrfs_subvolume_release_metadata(struct btrfs_root *root, 3258void btrfs_subvolume_release_metadata(struct btrfs_root *root,
3180 struct btrfs_block_rsv *rsv, 3259 struct btrfs_block_rsv *rsv,
3181 u64 qgroup_reserved); 3260 u64 qgroup_reserved);
@@ -3245,6 +3324,7 @@ enum btrfs_compare_tree_result {
3245 BTRFS_COMPARE_TREE_NEW, 3324 BTRFS_COMPARE_TREE_NEW,
3246 BTRFS_COMPARE_TREE_DELETED, 3325 BTRFS_COMPARE_TREE_DELETED,
3247 BTRFS_COMPARE_TREE_CHANGED, 3326 BTRFS_COMPARE_TREE_CHANGED,
3327 BTRFS_COMPARE_TREE_SAME,
3248}; 3328};
3249typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root, 3329typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root,
3250 struct btrfs_root *right_root, 3330 struct btrfs_root *right_root,
@@ -3380,6 +3460,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
3380 kfree(fs_info->dev_root); 3460 kfree(fs_info->dev_root);
3381 kfree(fs_info->csum_root); 3461 kfree(fs_info->csum_root);
3382 kfree(fs_info->quota_root); 3462 kfree(fs_info->quota_root);
3463 kfree(fs_info->uuid_root);
3383 kfree(fs_info->super_copy); 3464 kfree(fs_info->super_copy);
3384 kfree(fs_info->super_for_commit); 3465 kfree(fs_info->super_for_commit);
3385 kfree(fs_info); 3466 kfree(fs_info);
@@ -3414,8 +3495,6 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
3414 struct btrfs_root *root, 3495 struct btrfs_root *root,
3415 struct btrfs_key *key, 3496 struct btrfs_key *key,
3416 struct btrfs_root_item *item); 3497 struct btrfs_root_item *item);
3417void btrfs_read_root_item(struct extent_buffer *eb, int slot,
3418 struct btrfs_root_item *item);
3419int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key, 3498int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
3420 struct btrfs_path *path, struct btrfs_root_item *root_item, 3499 struct btrfs_path *path, struct btrfs_root_item *root_item,
3421 struct btrfs_key *root_key); 3500 struct btrfs_key *root_key);
@@ -3426,6 +3505,17 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
3426void btrfs_update_root_times(struct btrfs_trans_handle *trans, 3505void btrfs_update_root_times(struct btrfs_trans_handle *trans,
3427 struct btrfs_root *root); 3506 struct btrfs_root *root);
3428 3507
3508/* uuid-tree.c */
3509int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
3510 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
3511 u64 subid);
3512int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
3513 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
3514 u64 subid);
3515int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
3516 int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
3517 u64));
3518
3429/* dir-item.c */ 3519/* dir-item.c */
3430int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, 3520int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
3431 const char *name, int name_len); 3521 const char *name, int name_len);
@@ -3509,12 +3599,14 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
3509 struct btrfs_inode_extref **extref_ret); 3599 struct btrfs_inode_extref **extref_ret);
3510 3600
3511/* file-item.c */ 3601/* file-item.c */
3602struct btrfs_dio_private;
3512int btrfs_del_csums(struct btrfs_trans_handle *trans, 3603int btrfs_del_csums(struct btrfs_trans_handle *trans,
3513 struct btrfs_root *root, u64 bytenr, u64 len); 3604 struct btrfs_root *root, u64 bytenr, u64 len);
3514int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 3605int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
3515 struct bio *bio, u32 *dst); 3606 struct bio *bio, u32 *dst);
3516int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 3607int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
3517 struct bio *bio, u64 logical_offset); 3608 struct btrfs_dio_private *dip, struct bio *bio,
3609 u64 logical_offset);
3518int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 3610int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
3519 struct btrfs_root *root, 3611 struct btrfs_root *root,
3520 u64 objectid, u64 pos, 3612 u64 objectid, u64 pos,
@@ -3552,8 +3644,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
3552struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, 3644struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
3553 size_t pg_offset, u64 start, u64 len, 3645 size_t pg_offset, u64 start, u64 len,
3554 int create); 3646 int create);
3555noinline int can_nocow_extent(struct btrfs_trans_handle *trans, 3647noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
3556 struct inode *inode, u64 offset, u64 *len,
3557 u64 *orig_start, u64 *orig_block_len, 3648 u64 *orig_start, u64 *orig_block_len,
3558 u64 *ram_bytes); 3649 u64 *ram_bytes);
3559 3650
@@ -3643,11 +3734,15 @@ extern const struct dentry_operations btrfs_dentry_operations;
3643long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 3734long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
3644void btrfs_update_iflags(struct inode *inode); 3735void btrfs_update_iflags(struct inode *inode);
3645void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); 3736void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
3737int btrfs_is_empty_uuid(u8 *uuid);
3646int btrfs_defrag_file(struct inode *inode, struct file *file, 3738int btrfs_defrag_file(struct inode *inode, struct file *file,
3647 struct btrfs_ioctl_defrag_range_args *range, 3739 struct btrfs_ioctl_defrag_range_args *range,
3648 u64 newer_than, unsigned long max_pages); 3740 u64 newer_than, unsigned long max_pages);
3649void btrfs_get_block_group_info(struct list_head *groups_list, 3741void btrfs_get_block_group_info(struct list_head *groups_list,
3650 struct btrfs_ioctl_space_info *space); 3742 struct btrfs_ioctl_space_info *space);
3743void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3744 struct btrfs_ioctl_balance_args *bargs);
3745
3651 3746
3652/* file.c */ 3747/* file.c */
3653int btrfs_auto_defrag_init(void); 3748int btrfs_auto_defrag_init(void);
@@ -3720,6 +3815,22 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
3720#define btrfs_debug(fs_info, fmt, args...) \ 3815#define btrfs_debug(fs_info, fmt, args...) \
3721 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) 3816 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
3722 3817
3818#ifdef CONFIG_BTRFS_ASSERT
3819
3820static inline void assfail(char *expr, char *file, int line)
3821{
3822 printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d",
3823 expr, file, line);
3824 BUG();
3825}
3826
3827#define ASSERT(expr) \
3828 (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
3829#else
3830#define ASSERT(expr) ((void)0)
3831#endif
3832
3833#define btrfs_assert()
3723__printf(5, 6) 3834__printf(5, 6)
3724void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 3835void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
3725 unsigned int line, int errno, const char *fmt, ...); 3836 unsigned int line, int errno, const char *fmt, ...);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 375510913fe7..cbd9523ad09c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -21,6 +21,7 @@
21#include "delayed-inode.h" 21#include "delayed-inode.h"
22#include "disk-io.h" 22#include "disk-io.h"
23#include "transaction.h" 23#include "transaction.h"
24#include "ctree.h"
24 25
25#define BTRFS_DELAYED_WRITEBACK 512 26#define BTRFS_DELAYED_WRITEBACK 512
26#define BTRFS_DELAYED_BACKGROUND 128 27#define BTRFS_DELAYED_BACKGROUND 128
@@ -1453,10 +1454,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1453 1454
1454 dir_item = (struct btrfs_dir_item *)delayed_item->data; 1455 dir_item = (struct btrfs_dir_item *)delayed_item->data;
1455 dir_item->location = *disk_key; 1456 dir_item->location = *disk_key;
1456 dir_item->transid = cpu_to_le64(trans->transid); 1457 btrfs_set_stack_dir_transid(dir_item, trans->transid);
1457 dir_item->data_len = 0; 1458 btrfs_set_stack_dir_data_len(dir_item, 0);
1458 dir_item->name_len = cpu_to_le16(name_len); 1459 btrfs_set_stack_dir_name_len(dir_item, name_len);
1459 dir_item->type = type; 1460 btrfs_set_stack_dir_type(dir_item, type);
1460 memcpy((char *)(dir_item + 1), name, name_len); 1461 memcpy((char *)(dir_item + 1), name, name_len);
1461 1462
1462 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item); 1463 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
@@ -1470,13 +1471,11 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1470 mutex_lock(&delayed_node->mutex); 1471 mutex_lock(&delayed_node->mutex);
1471 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); 1472 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1472 if (unlikely(ret)) { 1473 if (unlikely(ret)) {
1473 printk(KERN_ERR "err add delayed dir index item(name: %s) into " 1474 printk(KERN_ERR "err add delayed dir index item(name: %.*s) "
1474 "the insertion tree of the delayed node" 1475 "into the insertion tree of the delayed node"
1475 "(root id: %llu, inode id: %llu, errno: %d)\n", 1476 "(root id: %llu, inode id: %llu, errno: %d)\n",
1476 name, 1477 name_len, name, delayed_node->root->objectid,
1477 (unsigned long long)delayed_node->root->objectid, 1478 delayed_node->inode_id, ret);
1478 (unsigned long long)delayed_node->inode_id,
1479 ret);
1480 BUG(); 1479 BUG();
1481 } 1480 }
1482 mutex_unlock(&delayed_node->mutex); 1481 mutex_unlock(&delayed_node->mutex);
@@ -1547,9 +1546,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1547 printk(KERN_ERR "err add delayed dir index item(index: %llu) " 1546 printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1548 "into the deletion tree of the delayed node" 1547 "into the deletion tree of the delayed node"
1549 "(root id: %llu, inode id: %llu, errno: %d)\n", 1548 "(root id: %llu, inode id: %llu, errno: %d)\n",
1550 (unsigned long long)index, 1549 index, node->root->objectid, node->inode_id,
1551 (unsigned long long)node->root->objectid,
1552 (unsigned long long)node->inode_id,
1553 ret); 1550 ret);
1554 BUG(); 1551 BUG();
1555 } 1552 }
@@ -1699,7 +1696,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1699 1696
1700 di = (struct btrfs_dir_item *)curr->data; 1697 di = (struct btrfs_dir_item *)curr->data;
1701 name = (char *)(di + 1); 1698 name = (char *)(di + 1);
1702 name_len = le16_to_cpu(di->name_len); 1699 name_len = btrfs_stack_dir_name_len(di);
1703 1700
1704 d_type = btrfs_filetype_table[di->type]; 1701 d_type = btrfs_filetype_table[di->type];
1705 btrfs_disk_key_to_cpu(&location, &di->location); 1702 btrfs_disk_key_to_cpu(&location, &di->location);
@@ -1716,27 +1713,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1716 return 0; 1713 return 0;
1717} 1714}
1718 1715
1719BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1720 generation, 64);
1721BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1722 sequence, 64);
1723BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1724 transid, 64);
1725BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1726BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1727 nbytes, 64);
1728BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1729 block_group, 64);
1730BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1731BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1732BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1733BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1734BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1735BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1736
1737BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1738BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1739
1740static void fill_stack_inode_item(struct btrfs_trans_handle *trans, 1716static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1741 struct btrfs_inode_item *inode_item, 1717 struct btrfs_inode_item *inode_item,
1742 struct inode *inode) 1718 struct inode *inode)
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index c219463fb1fd..e4d467be2dd4 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -241,7 +241,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
241 return 0; 241 return 0;
242} 242}
243 243
244static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, 244static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
245 struct btrfs_delayed_ref_root *delayed_refs, 245 struct btrfs_delayed_ref_root *delayed_refs,
246 struct btrfs_delayed_ref_node *ref) 246 struct btrfs_delayed_ref_node *ref)
247{ 247{
@@ -600,7 +600,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
600 INIT_LIST_HEAD(&head_ref->cluster); 600 INIT_LIST_HEAD(&head_ref->cluster);
601 mutex_init(&head_ref->mutex); 601 mutex_init(&head_ref->mutex);
602 602
603 trace_btrfs_delayed_ref_head(ref, head_ref, action); 603 trace_add_delayed_ref_head(ref, head_ref, action);
604 604
605 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 605 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
606 606
@@ -661,7 +661,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
661 ref->type = BTRFS_TREE_BLOCK_REF_KEY; 661 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
662 full_ref->level = level; 662 full_ref->level = level;
663 663
664 trace_btrfs_delayed_tree_ref(ref, full_ref, action); 664 trace_add_delayed_tree_ref(ref, full_ref, action);
665 665
666 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 666 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
667 667
@@ -722,7 +722,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
722 full_ref->objectid = owner; 722 full_ref->objectid = owner;
723 full_ref->offset = offset; 723 full_ref->offset = offset;
724 724
725 trace_btrfs_delayed_data_ref(ref, full_ref, action); 725 trace_add_delayed_data_ref(ref, full_ref, action);
726 726
727 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 727 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
728 728
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5f8f3341c099..a64435359385 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -148,13 +148,13 @@ no_valid_dev_replace_entry_found:
148 !btrfs_test_opt(dev_root, DEGRADED)) { 148 !btrfs_test_opt(dev_root, DEGRADED)) {
149 ret = -EIO; 149 ret = -EIO;
150 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", 150 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n",
151 (unsigned long long)src_devid); 151 src_devid);
152 } 152 }
153 if (!dev_replace->tgtdev && 153 if (!dev_replace->tgtdev &&
154 !btrfs_test_opt(dev_root, DEGRADED)) { 154 !btrfs_test_opt(dev_root, DEGRADED)) {
155 ret = -EIO; 155 ret = -EIO;
156 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", 156 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n",
157 (unsigned long long)BTRFS_DEV_REPLACE_DEVID); 157 BTRFS_DEV_REPLACE_DEVID);
158 } 158 }
159 if (dev_replace->tgtdev) { 159 if (dev_replace->tgtdev) {
160 if (dev_replace->srcdev) { 160 if (dev_replace->srcdev) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6b092a1c4e37..4cbb00af92ff 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -31,6 +31,7 @@
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <linux/ratelimit.h> 32#include <linux/ratelimit.h>
33#include <linux/uuid.h> 33#include <linux/uuid.h>
34#include <linux/semaphore.h>
34#include <asm/unaligned.h> 35#include <asm/unaligned.h>
35#include "compat.h" 36#include "compat.h"
36#include "ctree.h" 37#include "ctree.h"
@@ -302,9 +303,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
302 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " 303 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
303 "failed on %llu wanted %X found %X " 304 "failed on %llu wanted %X found %X "
304 "level %d\n", 305 "level %d\n",
305 root->fs_info->sb->s_id, 306 root->fs_info->sb->s_id, buf->start,
306 (unsigned long long)buf->start, val, found, 307 val, found, btrfs_header_level(buf));
307 btrfs_header_level(buf));
308 if (result != (char *)&inline_result) 308 if (result != (char *)&inline_result)
309 kfree(result); 309 kfree(result);
310 return 1; 310 return 1;
@@ -345,9 +345,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
345 } 345 }
346 printk_ratelimited("parent transid verify failed on %llu wanted %llu " 346 printk_ratelimited("parent transid verify failed on %llu wanted %llu "
347 "found %llu\n", 347 "found %llu\n",
348 (unsigned long long)eb->start, 348 eb->start, parent_transid, btrfs_header_generation(eb));
349 (unsigned long long)parent_transid,
350 (unsigned long long)btrfs_header_generation(eb));
351 ret = 1; 349 ret = 1;
352 clear_extent_buffer_uptodate(eb); 350 clear_extent_buffer_uptodate(eb);
353out: 351out:
@@ -497,8 +495,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
497 u8 fsid[BTRFS_UUID_SIZE]; 495 u8 fsid[BTRFS_UUID_SIZE];
498 int ret = 1; 496 int ret = 1;
499 497
500 read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), 498 read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE);
501 BTRFS_FSID_SIZE);
502 while (fs_devices) { 499 while (fs_devices) {
503 if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { 500 if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
504 ret = 0; 501 ret = 0;
@@ -512,8 +509,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
512#define CORRUPT(reason, eb, root, slot) \ 509#define CORRUPT(reason, eb, root, slot) \
513 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ 510 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
514 "root=%llu, slot=%d\n", reason, \ 511 "root=%llu, slot=%d\n", reason, \
515 (unsigned long long)btrfs_header_bytenr(eb), \ 512 btrfs_header_bytenr(eb), root->objectid, slot)
516 (unsigned long long)root->objectid, slot)
517 513
518static noinline int check_leaf(struct btrfs_root *root, 514static noinline int check_leaf(struct btrfs_root *root,
519 struct extent_buffer *leaf) 515 struct extent_buffer *leaf)
@@ -576,8 +572,9 @@ static noinline int check_leaf(struct btrfs_root *root,
576 return 0; 572 return 0;
577} 573}
578 574
579static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 575static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
580 struct extent_state *state, int mirror) 576 u64 phy_offset, struct page *page,
577 u64 start, u64 end, int mirror)
581{ 578{
582 struct extent_io_tree *tree; 579 struct extent_io_tree *tree;
583 u64 found_start; 580 u64 found_start;
@@ -612,14 +609,13 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
612 if (found_start != eb->start) { 609 if (found_start != eb->start) {
613 printk_ratelimited(KERN_INFO "btrfs bad tree block start " 610 printk_ratelimited(KERN_INFO "btrfs bad tree block start "
614 "%llu %llu\n", 611 "%llu %llu\n",
615 (unsigned long long)found_start, 612 found_start, eb->start);
616 (unsigned long long)eb->start);
617 ret = -EIO; 613 ret = -EIO;
618 goto err; 614 goto err;
619 } 615 }
620 if (check_tree_block_fsid(root, eb)) { 616 if (check_tree_block_fsid(root, eb)) {
621 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 617 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
622 (unsigned long long)eb->start); 618 eb->start);
623 ret = -EIO; 619 ret = -EIO;
624 goto err; 620 goto err;
625 } 621 }
@@ -1148,6 +1144,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1148 return NULL; 1144 return NULL;
1149 1145
1150 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 1146 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
1147 if (ret) {
1148 free_extent_buffer(buf);
1149 return NULL;
1150 }
1151 return buf; 1151 return buf;
1152 1152
1153} 1153}
@@ -1291,11 +1291,10 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1291 btrfs_set_header_owner(leaf, objectid); 1291 btrfs_set_header_owner(leaf, objectid);
1292 root->node = leaf; 1292 root->node = leaf;
1293 1293
1294 write_extent_buffer(leaf, fs_info->fsid, 1294 write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf),
1295 (unsigned long)btrfs_header_fsid(leaf),
1296 BTRFS_FSID_SIZE); 1295 BTRFS_FSID_SIZE);
1297 write_extent_buffer(leaf, fs_info->chunk_tree_uuid, 1296 write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
1298 (unsigned long)btrfs_header_chunk_tree_uuid(leaf), 1297 btrfs_header_chunk_tree_uuid(leaf),
1299 BTRFS_UUID_SIZE); 1298 BTRFS_UUID_SIZE);
1300 btrfs_mark_buffer_dirty(leaf); 1299 btrfs_mark_buffer_dirty(leaf);
1301 1300
@@ -1379,8 +1378,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1379 root->node = leaf; 1378 root->node = leaf;
1380 1379
1381 write_extent_buffer(root->node, root->fs_info->fsid, 1380 write_extent_buffer(root->node, root->fs_info->fsid,
1382 (unsigned long)btrfs_header_fsid(root->node), 1381 btrfs_header_fsid(root->node), BTRFS_FSID_SIZE);
1383 BTRFS_FSID_SIZE);
1384 btrfs_mark_buffer_dirty(root->node); 1382 btrfs_mark_buffer_dirty(root->node);
1385 btrfs_tree_unlock(root->node); 1383 btrfs_tree_unlock(root->node);
1386 return root; 1384 return root;
@@ -1413,11 +1411,11 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1413 log_root->root_key.offset = root->root_key.objectid; 1411 log_root->root_key.offset = root->root_key.objectid;
1414 1412
1415 inode_item = &log_root->root_item.inode; 1413 inode_item = &log_root->root_item.inode;
1416 inode_item->generation = cpu_to_le64(1); 1414 btrfs_set_stack_inode_generation(inode_item, 1);
1417 inode_item->size = cpu_to_le64(3); 1415 btrfs_set_stack_inode_size(inode_item, 3);
1418 inode_item->nlink = cpu_to_le32(1); 1416 btrfs_set_stack_inode_nlink(inode_item, 1);
1419 inode_item->nbytes = cpu_to_le64(root->leafsize); 1417 btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
1420 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 1418 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
1421 1419
1422 btrfs_set_root_node(&log_root->root_item, log_root->node); 1420 btrfs_set_root_node(&log_root->root_item, log_root->node);
1423 1421
@@ -1428,8 +1426,8 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1428 return 0; 1426 return 0;
1429} 1427}
1430 1428
1431struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, 1429static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1432 struct btrfs_key *key) 1430 struct btrfs_key *key)
1433{ 1431{
1434 struct btrfs_root *root; 1432 struct btrfs_root *root;
1435 struct btrfs_fs_info *fs_info = tree_root->fs_info; 1433 struct btrfs_fs_info *fs_info = tree_root->fs_info;
@@ -1529,8 +1527,8 @@ fail:
1529 return ret; 1527 return ret;
1530} 1528}
1531 1529
1532struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, 1530static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
1533 u64 root_id) 1531 u64 root_id)
1534{ 1532{
1535 struct btrfs_root *root; 1533 struct btrfs_root *root;
1536 1534
@@ -1581,10 +1579,16 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1581 if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID) 1579 if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
1582 return fs_info->quota_root ? fs_info->quota_root : 1580 return fs_info->quota_root ? fs_info->quota_root :
1583 ERR_PTR(-ENOENT); 1581 ERR_PTR(-ENOENT);
1582 if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
1583 return fs_info->uuid_root ? fs_info->uuid_root :
1584 ERR_PTR(-ENOENT);
1584again: 1585again:
1585 root = btrfs_lookup_fs_root(fs_info, location->objectid); 1586 root = btrfs_lookup_fs_root(fs_info, location->objectid);
1586 if (root) 1587 if (root) {
1588 if (btrfs_root_refs(&root->root_item) == 0)
1589 return ERR_PTR(-ENOENT);
1587 return root; 1590 return root;
1591 }
1588 1592
1589 root = btrfs_read_fs_root(fs_info->tree_root, location); 1593 root = btrfs_read_fs_root(fs_info->tree_root, location);
1590 if (IS_ERR(root)) 1594 if (IS_ERR(root))
@@ -1737,7 +1741,7 @@ static int transaction_kthread(void *arg)
1737 1741
1738 do { 1742 do {
1739 cannot_commit = false; 1743 cannot_commit = false;
1740 delay = HZ * 30; 1744 delay = HZ * root->fs_info->commit_interval;
1741 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1745 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1742 1746
1743 spin_lock(&root->fs_info->trans_lock); 1747 spin_lock(&root->fs_info->trans_lock);
@@ -1749,7 +1753,8 @@ static int transaction_kthread(void *arg)
1749 1753
1750 now = get_seconds(); 1754 now = get_seconds();
1751 if (cur->state < TRANS_STATE_BLOCKED && 1755 if (cur->state < TRANS_STATE_BLOCKED &&
1752 (now < cur->start_time || now - cur->start_time < 30)) { 1756 (now < cur->start_time ||
1757 now - cur->start_time < root->fs_info->commit_interval)) {
1753 spin_unlock(&root->fs_info->trans_lock); 1758 spin_unlock(&root->fs_info->trans_lock);
1754 delay = HZ * 5; 1759 delay = HZ * 5;
1755 goto sleep; 1760 goto sleep;
@@ -2038,6 +2043,12 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
2038 info->quota_root->node = NULL; 2043 info->quota_root->node = NULL;
2039 info->quota_root->commit_root = NULL; 2044 info->quota_root->commit_root = NULL;
2040 } 2045 }
2046 if (info->uuid_root) {
2047 free_extent_buffer(info->uuid_root->node);
2048 free_extent_buffer(info->uuid_root->commit_root);
2049 info->uuid_root->node = NULL;
2050 info->uuid_root->commit_root = NULL;
2051 }
2041 if (chunk_root) { 2052 if (chunk_root) {
2042 free_extent_buffer(info->chunk_root->node); 2053 free_extent_buffer(info->chunk_root->node);
2043 free_extent_buffer(info->chunk_root->commit_root); 2054 free_extent_buffer(info->chunk_root->commit_root);
@@ -2098,11 +2109,14 @@ int open_ctree(struct super_block *sb,
2098 struct btrfs_root *chunk_root; 2109 struct btrfs_root *chunk_root;
2099 struct btrfs_root *dev_root; 2110 struct btrfs_root *dev_root;
2100 struct btrfs_root *quota_root; 2111 struct btrfs_root *quota_root;
2112 struct btrfs_root *uuid_root;
2101 struct btrfs_root *log_tree_root; 2113 struct btrfs_root *log_tree_root;
2102 int ret; 2114 int ret;
2103 int err = -EINVAL; 2115 int err = -EINVAL;
2104 int num_backups_tried = 0; 2116 int num_backups_tried = 0;
2105 int backup_index = 0; 2117 int backup_index = 0;
2118 bool create_uuid_tree;
2119 bool check_uuid_tree;
2106 2120
2107 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); 2121 tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
2108 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); 2122 chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2189,6 +2203,7 @@ int open_ctree(struct super_block *sb,
2189 fs_info->defrag_inodes = RB_ROOT; 2203 fs_info->defrag_inodes = RB_ROOT;
2190 fs_info->free_chunk_space = 0; 2204 fs_info->free_chunk_space = 0;
2191 fs_info->tree_mod_log = RB_ROOT; 2205 fs_info->tree_mod_log = RB_ROOT;
2206 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
2192 2207
2193 /* readahead state */ 2208 /* readahead state */
2194 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2209 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
@@ -2270,6 +2285,7 @@ int open_ctree(struct super_block *sb,
2270 2285
2271 2286
2272 mutex_init(&fs_info->ordered_operations_mutex); 2287 mutex_init(&fs_info->ordered_operations_mutex);
2288 mutex_init(&fs_info->ordered_extent_flush_mutex);
2273 mutex_init(&fs_info->tree_log_mutex); 2289 mutex_init(&fs_info->tree_log_mutex);
2274 mutex_init(&fs_info->chunk_mutex); 2290 mutex_init(&fs_info->chunk_mutex);
2275 mutex_init(&fs_info->transaction_kthread_mutex); 2291 mutex_init(&fs_info->transaction_kthread_mutex);
@@ -2278,6 +2294,7 @@ int open_ctree(struct super_block *sb,
2278 init_rwsem(&fs_info->extent_commit_sem); 2294 init_rwsem(&fs_info->extent_commit_sem);
2279 init_rwsem(&fs_info->cleanup_work_sem); 2295 init_rwsem(&fs_info->cleanup_work_sem);
2280 init_rwsem(&fs_info->subvol_sem); 2296 init_rwsem(&fs_info->subvol_sem);
2297 sema_init(&fs_info->uuid_tree_rescan_sem, 1);
2281 fs_info->dev_replace.lock_owner = 0; 2298 fs_info->dev_replace.lock_owner = 0;
2282 atomic_set(&fs_info->dev_replace.nesting_level, 0); 2299 atomic_set(&fs_info->dev_replace.nesting_level, 0);
2283 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); 2300 mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
@@ -2383,7 +2400,7 @@ int open_ctree(struct super_block *sb,
2383 if (features) { 2400 if (features) {
2384 printk(KERN_ERR "BTRFS: couldn't mount because of " 2401 printk(KERN_ERR "BTRFS: couldn't mount because of "
2385 "unsupported optional features (%Lx).\n", 2402 "unsupported optional features (%Lx).\n",
2386 (unsigned long long)features); 2403 features);
2387 err = -EINVAL; 2404 err = -EINVAL;
2388 goto fail_alloc; 2405 goto fail_alloc;
2389 } 2406 }
@@ -2453,7 +2470,7 @@ int open_ctree(struct super_block *sb,
2453 if (!(sb->s_flags & MS_RDONLY) && features) { 2470 if (!(sb->s_flags & MS_RDONLY) && features) {
2454 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " 2471 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
2455 "unsupported option features (%Lx).\n", 2472 "unsupported option features (%Lx).\n",
2456 (unsigned long long)features); 2473 features);
2457 err = -EINVAL; 2474 err = -EINVAL;
2458 goto fail_alloc; 2475 goto fail_alloc;
2459 } 2476 }
@@ -2466,20 +2483,17 @@ int open_ctree(struct super_block *sb,
2466 &fs_info->generic_worker); 2483 &fs_info->generic_worker);
2467 2484
2468 btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", 2485 btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
2469 fs_info->thread_pool_size, 2486 fs_info->thread_pool_size, NULL);
2470 &fs_info->generic_worker);
2471 2487
2472 btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", 2488 btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
2473 fs_info->thread_pool_size, 2489 fs_info->thread_pool_size, NULL);
2474 &fs_info->generic_worker);
2475 2490
2476 btrfs_init_workers(&fs_info->submit_workers, "submit", 2491 btrfs_init_workers(&fs_info->submit_workers, "submit",
2477 min_t(u64, fs_devices->num_devices, 2492 min_t(u64, fs_devices->num_devices,
2478 fs_info->thread_pool_size), 2493 fs_info->thread_pool_size), NULL);
2479 &fs_info->generic_worker);
2480 2494
2481 btrfs_init_workers(&fs_info->caching_workers, "cache", 2495 btrfs_init_workers(&fs_info->caching_workers, "cache",
2482 2, &fs_info->generic_worker); 2496 fs_info->thread_pool_size, NULL);
2483 2497
2484 /* a higher idle thresh on the submit workers makes it much more 2498 /* a higher idle thresh on the submit workers makes it much more
2485 * likely that bios will be send down in a sane order to the 2499 * likely that bios will be send down in a sane order to the
@@ -2575,7 +2589,7 @@ int open_ctree(struct super_block *sb,
2575 sb->s_blocksize = sectorsize; 2589 sb->s_blocksize = sectorsize;
2576 sb->s_blocksize_bits = blksize_bits(sectorsize); 2590 sb->s_blocksize_bits = blksize_bits(sectorsize);
2577 2591
2578 if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) { 2592 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
2579 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); 2593 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
2580 goto fail_sb_buffer; 2594 goto fail_sb_buffer;
2581 } 2595 }
@@ -2615,8 +2629,7 @@ int open_ctree(struct super_block *sb,
2615 chunk_root->commit_root = btrfs_root_node(chunk_root); 2629 chunk_root->commit_root = btrfs_root_node(chunk_root);
2616 2630
2617 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, 2631 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
2618 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), 2632 btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE);
2619 BTRFS_UUID_SIZE);
2620 2633
2621 ret = btrfs_read_chunk_tree(chunk_root); 2634 ret = btrfs_read_chunk_tree(chunk_root);
2622 if (ret) { 2635 if (ret) {
@@ -2696,6 +2709,22 @@ retry_root_backup:
2696 fs_info->quota_root = quota_root; 2709 fs_info->quota_root = quota_root;
2697 } 2710 }
2698 2711
2712 location.objectid = BTRFS_UUID_TREE_OBJECTID;
2713 uuid_root = btrfs_read_tree_root(tree_root, &location);
2714 if (IS_ERR(uuid_root)) {
2715 ret = PTR_ERR(uuid_root);
2716 if (ret != -ENOENT)
2717 goto recovery_tree_root;
2718 create_uuid_tree = true;
2719 check_uuid_tree = false;
2720 } else {
2721 uuid_root->track_dirty = 1;
2722 fs_info->uuid_root = uuid_root;
2723 create_uuid_tree = false;
2724 check_uuid_tree =
2725 generation != btrfs_super_uuid_tree_generation(disk_super);
2726 }
2727
2699 fs_info->generation = generation; 2728 fs_info->generation = generation;
2700 fs_info->last_trans_committed = generation; 2729 fs_info->last_trans_committed = generation;
2701 2730
@@ -2882,6 +2911,29 @@ retry_root_backup:
2882 2911
2883 btrfs_qgroup_rescan_resume(fs_info); 2912 btrfs_qgroup_rescan_resume(fs_info);
2884 2913
2914 if (create_uuid_tree) {
2915 pr_info("btrfs: creating UUID tree\n");
2916 ret = btrfs_create_uuid_tree(fs_info);
2917 if (ret) {
2918 pr_warn("btrfs: failed to create the UUID tree %d\n",
2919 ret);
2920 close_ctree(tree_root);
2921 return ret;
2922 }
2923 } else if (check_uuid_tree ||
2924 btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
2925 pr_info("btrfs: checking UUID tree\n");
2926 ret = btrfs_check_uuid_tree(fs_info);
2927 if (ret) {
2928 pr_warn("btrfs: failed to check the UUID tree %d\n",
2929 ret);
2930 close_ctree(tree_root);
2931 return ret;
2932 }
2933 } else {
2934 fs_info->update_uuid_tree_gen = 1;
2935 }
2936
2885 return 0; 2937 return 0;
2886 2938
2887fail_qgroup: 2939fail_qgroup:
@@ -2983,15 +3035,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
2983 */ 3035 */
2984 for (i = 0; i < 1; i++) { 3036 for (i = 0; i < 1; i++) {
2985 bytenr = btrfs_sb_offset(i); 3037 bytenr = btrfs_sb_offset(i);
2986 if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) 3038 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
3039 i_size_read(bdev->bd_inode))
2987 break; 3040 break;
2988 bh = __bread(bdev, bytenr / 4096, 4096); 3041 bh = __bread(bdev, bytenr / 4096,
3042 BTRFS_SUPER_INFO_SIZE);
2989 if (!bh) 3043 if (!bh)
2990 continue; 3044 continue;
2991 3045
2992 super = (struct btrfs_super_block *)bh->b_data; 3046 super = (struct btrfs_super_block *)bh->b_data;
2993 if (btrfs_super_bytenr(super) != bytenr || 3047 if (btrfs_super_bytenr(super) != bytenr ||
2994 super->magic != cpu_to_le64(BTRFS_MAGIC)) { 3048 btrfs_super_magic(super) != BTRFS_MAGIC) {
2995 brelse(bh); 3049 brelse(bh);
2996 continue; 3050 continue;
2997 } 3051 }
@@ -3311,7 +3365,6 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3311 int total_errors = 0; 3365 int total_errors = 0;
3312 u64 flags; 3366 u64 flags;
3313 3367
3314 max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
3315 do_barriers = !btrfs_test_opt(root, NOBARRIER); 3368 do_barriers = !btrfs_test_opt(root, NOBARRIER);
3316 backup_super_roots(root->fs_info); 3369 backup_super_roots(root->fs_info);
3317 3370
@@ -3320,6 +3373,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3320 3373
3321 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 3374 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
3322 head = &root->fs_info->fs_devices->devices; 3375 head = &root->fs_info->fs_devices->devices;
3376 max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
3323 3377
3324 if (do_barriers) { 3378 if (do_barriers) {
3325 ret = barrier_all_devices(root->fs_info); 3379 ret = barrier_all_devices(root->fs_info);
@@ -3362,8 +3416,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3362 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 3416 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
3363 total_errors); 3417 total_errors);
3364 3418
3365 /* This shouldn't happen. FUA is masked off if unsupported */ 3419 /* FUA is masked off if unsupported and can't be the reason */
3366 BUG(); 3420 btrfs_error(root->fs_info, -EIO,
3421 "%d errors while writing supers", total_errors);
3422 return -EIO;
3367 } 3423 }
3368 3424
3369 total_errors = 0; 3425 total_errors = 0;
@@ -3421,6 +3477,8 @@ static void free_fs_root(struct btrfs_root *root)
3421{ 3477{
3422 iput(root->cache_inode); 3478 iput(root->cache_inode);
3423 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 3479 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
3480 btrfs_free_block_rsv(root, root->orphan_block_rsv);
3481 root->orphan_block_rsv = NULL;
3424 if (root->anon_dev) 3482 if (root->anon_dev)
3425 free_anon_bdev(root->anon_dev); 3483 free_anon_bdev(root->anon_dev);
3426 free_extent_buffer(root->node); 3484 free_extent_buffer(root->node);
@@ -3510,6 +3568,11 @@ int close_ctree(struct btrfs_root *root)
3510 fs_info->closing = 1; 3568 fs_info->closing = 1;
3511 smp_mb(); 3569 smp_mb();
3512 3570
3571 /* wait for the uuid_scan task to finish */
3572 down(&fs_info->uuid_tree_rescan_sem);
3573 /* avoid complains from lockdep et al., set sem back to initial state */
3574 up(&fs_info->uuid_tree_rescan_sem);
3575
3513 /* pause restriper - we want to resume on mount */ 3576 /* pause restriper - we want to resume on mount */
3514 btrfs_pause_balance(fs_info); 3577 btrfs_pause_balance(fs_info);
3515 3578
@@ -3573,6 +3636,9 @@ int close_ctree(struct btrfs_root *root)
3573 3636
3574 btrfs_free_stripe_hash_table(fs_info); 3637 btrfs_free_stripe_hash_table(fs_info);
3575 3638
3639 btrfs_free_block_rsv(root, root->orphan_block_rsv);
3640 root->orphan_block_rsv = NULL;
3641
3576 return 0; 3642 return 0;
3577} 3643}
3578 3644
@@ -3608,9 +3674,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3608 if (transid != root->fs_info->generation) 3674 if (transid != root->fs_info->generation)
3609 WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " 3675 WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
3610 "found %llu running %llu\n", 3676 "found %llu running %llu\n",
3611 (unsigned long long)buf->start, 3677 buf->start, transid, root->fs_info->generation);
3612 (unsigned long long)transid,
3613 (unsigned long long)root->fs_info->generation);
3614 was_dirty = set_extent_buffer_dirty(buf); 3678 was_dirty = set_extent_buffer_dirty(buf);
3615 if (!was_dirty) 3679 if (!was_dirty)
3616 __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, 3680 __percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
@@ -3744,8 +3808,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
3744 spin_unlock(&fs_info->ordered_root_lock); 3808 spin_unlock(&fs_info->ordered_root_lock);
3745} 3809}
3746 3810
3747int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 3811static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3748 struct btrfs_root *root) 3812 struct btrfs_root *root)
3749{ 3813{
3750 struct rb_node *node; 3814 struct rb_node *node;
3751 struct btrfs_delayed_ref_root *delayed_refs; 3815 struct btrfs_delayed_ref_root *delayed_refs;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1204c8ef6f32..cfb3cf711b34 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -113,7 +113,8 @@ static noinline int
113block_group_cache_done(struct btrfs_block_group_cache *cache) 113block_group_cache_done(struct btrfs_block_group_cache *cache)
114{ 114{
115 smp_mb(); 115 smp_mb();
116 return cache->cached == BTRFS_CACHE_FINISHED; 116 return cache->cached == BTRFS_CACHE_FINISHED ||
117 cache->cached == BTRFS_CACHE_ERROR;
117} 118}
118 119
119static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 120static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
@@ -389,7 +390,7 @@ static noinline void caching_thread(struct btrfs_work *work)
389 u64 total_found = 0; 390 u64 total_found = 0;
390 u64 last = 0; 391 u64 last = 0;
391 u32 nritems; 392 u32 nritems;
392 int ret = 0; 393 int ret = -ENOMEM;
393 394
394 caching_ctl = container_of(work, struct btrfs_caching_control, work); 395 caching_ctl = container_of(work, struct btrfs_caching_control, work);
395 block_group = caching_ctl->block_group; 396 block_group = caching_ctl->block_group;
@@ -420,6 +421,7 @@ again:
420 /* need to make sure the commit_root doesn't disappear */ 421 /* need to make sure the commit_root doesn't disappear */
421 down_read(&fs_info->extent_commit_sem); 422 down_read(&fs_info->extent_commit_sem);
422 423
424next:
423 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 425 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
424 if (ret < 0) 426 if (ret < 0)
425 goto err; 427 goto err;
@@ -459,6 +461,16 @@ again:
459 continue; 461 continue;
460 } 462 }
461 463
464 if (key.objectid < last) {
465 key.objectid = last;
466 key.offset = 0;
467 key.type = BTRFS_EXTENT_ITEM_KEY;
468
469 caching_ctl->progress = last;
470 btrfs_release_path(path);
471 goto next;
472 }
473
462 if (key.objectid < block_group->key.objectid) { 474 if (key.objectid < block_group->key.objectid) {
463 path->slots[0]++; 475 path->slots[0]++;
464 continue; 476 continue;
@@ -506,6 +518,12 @@ err:
506 518
507 mutex_unlock(&caching_ctl->mutex); 519 mutex_unlock(&caching_ctl->mutex);
508out: 520out:
521 if (ret) {
522 spin_lock(&block_group->lock);
523 block_group->caching_ctl = NULL;
524 block_group->cached = BTRFS_CACHE_ERROR;
525 spin_unlock(&block_group->lock);
526 }
509 wake_up(&caching_ctl->wait); 527 wake_up(&caching_ctl->wait);
510 528
511 put_caching_control(caching_ctl); 529 put_caching_control(caching_ctl);
@@ -771,10 +789,23 @@ again:
771 goto out_free; 789 goto out_free;
772 790
773 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { 791 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
774 key.type = BTRFS_EXTENT_ITEM_KEY; 792 metadata = 0;
775 key.offset = root->leafsize; 793 if (path->slots[0]) {
776 btrfs_release_path(path); 794 path->slots[0]--;
777 goto again; 795 btrfs_item_key_to_cpu(path->nodes[0], &key,
796 path->slots[0]);
797 if (key.objectid == bytenr &&
798 key.type == BTRFS_EXTENT_ITEM_KEY &&
799 key.offset == root->leafsize)
800 ret = 0;
801 }
802 if (ret) {
803 key.objectid = bytenr;
804 key.type = BTRFS_EXTENT_ITEM_KEY;
805 key.offset = root->leafsize;
806 btrfs_release_path(path);
807 goto again;
808 }
778 } 809 }
779 810
780 if (ret == 0) { 811 if (ret == 0) {
@@ -2011,6 +2042,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
2011 ins.type = BTRFS_EXTENT_ITEM_KEY; 2042 ins.type = BTRFS_EXTENT_ITEM_KEY;
2012 2043
2013 ref = btrfs_delayed_node_to_data_ref(node); 2044 ref = btrfs_delayed_node_to_data_ref(node);
2045 trace_run_delayed_data_ref(node, ref, node->action);
2046
2014 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 2047 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
2015 parent = ref->parent; 2048 parent = ref->parent;
2016 else 2049 else
@@ -2154,6 +2187,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2154 SKINNY_METADATA); 2187 SKINNY_METADATA);
2155 2188
2156 ref = btrfs_delayed_node_to_tree_ref(node); 2189 ref = btrfs_delayed_node_to_tree_ref(node);
2190 trace_run_delayed_tree_ref(node, ref, node->action);
2191
2157 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2192 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2158 parent = ref->parent; 2193 parent = ref->parent;
2159 else 2194 else
@@ -2212,6 +2247,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2212 */ 2247 */
2213 BUG_ON(extent_op); 2248 BUG_ON(extent_op);
2214 head = btrfs_delayed_node_to_head(node); 2249 head = btrfs_delayed_node_to_head(node);
2250 trace_run_delayed_ref_head(node, head, node->action);
2251
2215 if (insert_reserved) { 2252 if (insert_reserved) {
2216 btrfs_pin_extent(root, node->bytenr, 2253 btrfs_pin_extent(root, node->bytenr,
2217 node->num_bytes, 1); 2254 node->num_bytes, 1);
@@ -2403,6 +2440,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2403 default: 2440 default:
2404 WARN_ON(1); 2441 WARN_ON(1);
2405 } 2442 }
2443 } else {
2444 list_del_init(&locked_ref->cluster);
2406 } 2445 }
2407 spin_unlock(&delayed_refs->lock); 2446 spin_unlock(&delayed_refs->lock);
2408 2447
@@ -2425,7 +2464,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2425 * list before we release it. 2464 * list before we release it.
2426 */ 2465 */
2427 if (btrfs_delayed_ref_is_head(ref)) { 2466 if (btrfs_delayed_ref_is_head(ref)) {
2428 list_del_init(&locked_ref->cluster);
2429 btrfs_delayed_ref_unlock(locked_ref); 2467 btrfs_delayed_ref_unlock(locked_ref);
2430 locked_ref = NULL; 2468 locked_ref = NULL;
2431 } 2469 }
@@ -3799,8 +3837,12 @@ again:
3799 if (force < space_info->force_alloc) 3837 if (force < space_info->force_alloc)
3800 force = space_info->force_alloc; 3838 force = space_info->force_alloc;
3801 if (space_info->full) { 3839 if (space_info->full) {
3840 if (should_alloc_chunk(extent_root, space_info, force))
3841 ret = -ENOSPC;
3842 else
3843 ret = 0;
3802 spin_unlock(&space_info->lock); 3844 spin_unlock(&space_info->lock);
3803 return 0; 3845 return ret;
3804 } 3846 }
3805 3847
3806 if (!should_alloc_chunk(extent_root, space_info, force)) { 3848 if (!should_alloc_chunk(extent_root, space_info, force)) {
@@ -4320,6 +4362,9 @@ static struct btrfs_block_rsv *get_block_rsv(
4320 if (root == root->fs_info->csum_root && trans->adding_csums) 4362 if (root == root->fs_info->csum_root && trans->adding_csums)
4321 block_rsv = trans->block_rsv; 4363 block_rsv = trans->block_rsv;
4322 4364
4365 if (root == root->fs_info->uuid_root)
4366 block_rsv = trans->block_rsv;
4367
4323 if (!block_rsv) 4368 if (!block_rsv)
4324 block_rsv = root->block_rsv; 4369 block_rsv = root->block_rsv;
4325 4370
@@ -4729,10 +4774,12 @@ void btrfs_orphan_release_metadata(struct inode *inode)
4729int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, 4774int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
4730 struct btrfs_block_rsv *rsv, 4775 struct btrfs_block_rsv *rsv,
4731 int items, 4776 int items,
4732 u64 *qgroup_reserved) 4777 u64 *qgroup_reserved,
4778 bool use_global_rsv)
4733{ 4779{
4734 u64 num_bytes; 4780 u64 num_bytes;
4735 int ret; 4781 int ret;
4782 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4736 4783
4737 if (root->fs_info->quota_enabled) { 4784 if (root->fs_info->quota_enabled) {
4738 /* One for parent inode, two for dir entries */ 4785 /* One for parent inode, two for dir entries */
@@ -4751,6 +4798,10 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
4751 BTRFS_BLOCK_GROUP_METADATA); 4798 BTRFS_BLOCK_GROUP_METADATA);
4752 ret = btrfs_block_rsv_add(root, rsv, num_bytes, 4799 ret = btrfs_block_rsv_add(root, rsv, num_bytes,
4753 BTRFS_RESERVE_FLUSH_ALL); 4800 BTRFS_RESERVE_FLUSH_ALL);
4801
4802 if (ret == -ENOSPC && use_global_rsv)
4803 ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
4804
4754 if (ret) { 4805 if (ret) {
4755 if (*qgroup_reserved) 4806 if (*qgroup_reserved)
4756 btrfs_qgroup_free(root, *qgroup_reserved); 4807 btrfs_qgroup_free(root, *qgroup_reserved);
@@ -5668,7 +5719,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5668 5719
5669 if (ret) { 5720 if (ret) {
5670 btrfs_err(info, "umm, got %d back from search, was looking for %llu", 5721 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5671 ret, (unsigned long long)bytenr); 5722 ret, bytenr);
5672 if (ret > 0) 5723 if (ret > 0)
5673 btrfs_print_leaf(extent_root, 5724 btrfs_print_leaf(extent_root,
5674 path->nodes[0]); 5725 path->nodes[0]);
@@ -5684,11 +5735,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5684 WARN_ON(1); 5735 WARN_ON(1);
5685 btrfs_err(info, 5736 btrfs_err(info,
5686 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", 5737 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
5687 (unsigned long long)bytenr, 5738 bytenr, parent, root_objectid, owner_objectid,
5688 (unsigned long long)parent, 5739 owner_offset);
5689 (unsigned long long)root_objectid,
5690 (unsigned long long)owner_objectid,
5691 (unsigned long long)owner_offset);
5692 } else { 5740 } else {
5693 btrfs_abort_transaction(trans, extent_root, ret); 5741 btrfs_abort_transaction(trans, extent_root, ret);
5694 goto out; 5742 goto out;
@@ -5717,7 +5765,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5717 -1, 1); 5765 -1, 1);
5718 if (ret) { 5766 if (ret) {
5719 btrfs_err(info, "umm, got %d back from search, was looking for %llu", 5767 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5720 ret, (unsigned long long)bytenr); 5768 ret, bytenr);
5721 btrfs_print_leaf(extent_root, path->nodes[0]); 5769 btrfs_print_leaf(extent_root, path->nodes[0]);
5722 } 5770 }
5723 if (ret < 0) { 5771 if (ret < 0) {
@@ -5999,8 +6047,11 @@ static u64 stripe_align(struct btrfs_root *root,
5999 * for our min num_bytes. Another option is to have it go ahead 6047 * for our min num_bytes. Another option is to have it go ahead
6000 * and look in the rbtree for a free extent of a given size, but this 6048 * and look in the rbtree for a free extent of a given size, but this
6001 * is a good start. 6049 * is a good start.
6050 *
6051 * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
6052 * any of the information in this block group.
6002 */ 6053 */
6003static noinline int 6054static noinline void
6004wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 6055wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6005 u64 num_bytes) 6056 u64 num_bytes)
6006{ 6057{
@@ -6008,28 +6059,29 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
6008 6059
6009 caching_ctl = get_caching_control(cache); 6060 caching_ctl = get_caching_control(cache);
6010 if (!caching_ctl) 6061 if (!caching_ctl)
6011 return 0; 6062 return;
6012 6063
6013 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 6064 wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
6014 (cache->free_space_ctl->free_space >= num_bytes)); 6065 (cache->free_space_ctl->free_space >= num_bytes));
6015 6066
6016 put_caching_control(caching_ctl); 6067 put_caching_control(caching_ctl);
6017 return 0;
6018} 6068}
6019 6069
6020static noinline int 6070static noinline int
6021wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 6071wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
6022{ 6072{
6023 struct btrfs_caching_control *caching_ctl; 6073 struct btrfs_caching_control *caching_ctl;
6074 int ret = 0;
6024 6075
6025 caching_ctl = get_caching_control(cache); 6076 caching_ctl = get_caching_control(cache);
6026 if (!caching_ctl) 6077 if (!caching_ctl)
6027 return 0; 6078 return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
6028 6079
6029 wait_event(caching_ctl->wait, block_group_cache_done(cache)); 6080 wait_event(caching_ctl->wait, block_group_cache_done(cache));
6030 6081 if (cache->cached == BTRFS_CACHE_ERROR)
6082 ret = -EIO;
6031 put_caching_control(caching_ctl); 6083 put_caching_control(caching_ctl);
6032 return 0; 6084 return ret;
6033} 6085}
6034 6086
6035int __get_raid_index(u64 flags) 6087int __get_raid_index(u64 flags)
@@ -6070,8 +6122,7 @@ enum btrfs_loop_type {
6070 * ins->offset == number of blocks 6122 * ins->offset == number of blocks
6071 * Any available blocks before search_start are skipped. 6123 * Any available blocks before search_start are skipped.
6072 */ 6124 */
6073static noinline int find_free_extent(struct btrfs_trans_handle *trans, 6125static noinline int find_free_extent(struct btrfs_root *orig_root,
6074 struct btrfs_root *orig_root,
6075 u64 num_bytes, u64 empty_size, 6126 u64 num_bytes, u64 empty_size,
6076 u64 hint_byte, struct btrfs_key *ins, 6127 u64 hint_byte, struct btrfs_key *ins,
6077 u64 flags) 6128 u64 flags)
@@ -6212,6 +6263,8 @@ have_block_group:
6212 ret = 0; 6263 ret = 0;
6213 } 6264 }
6214 6265
6266 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
6267 goto loop;
6215 if (unlikely(block_group->ro)) 6268 if (unlikely(block_group->ro))
6216 goto loop; 6269 goto loop;
6217 6270
@@ -6292,10 +6345,10 @@ refill_cluster:
6292 block_group->full_stripe_len); 6345 block_group->full_stripe_len);
6293 6346
6294 /* allocate a cluster in this block group */ 6347 /* allocate a cluster in this block group */
6295 ret = btrfs_find_space_cluster(trans, root, 6348 ret = btrfs_find_space_cluster(root, block_group,
6296 block_group, last_ptr, 6349 last_ptr, search_start,
6297 search_start, num_bytes, 6350 num_bytes,
6298 aligned_cluster); 6351 aligned_cluster);
6299 if (ret == 0) { 6352 if (ret == 0) {
6300 /* 6353 /*
6301 * now pull our allocation out of this 6354 * now pull our allocation out of this
@@ -6426,17 +6479,28 @@ loop:
6426 index = 0; 6479 index = 0;
6427 loop++; 6480 loop++;
6428 if (loop == LOOP_ALLOC_CHUNK) { 6481 if (loop == LOOP_ALLOC_CHUNK) {
6482 struct btrfs_trans_handle *trans;
6483
6484 trans = btrfs_join_transaction(root);
6485 if (IS_ERR(trans)) {
6486 ret = PTR_ERR(trans);
6487 goto out;
6488 }
6489
6429 ret = do_chunk_alloc(trans, root, flags, 6490 ret = do_chunk_alloc(trans, root, flags,
6430 CHUNK_ALLOC_FORCE); 6491 CHUNK_ALLOC_FORCE);
6431 /* 6492 /*
6432 * Do not bail out on ENOSPC since we 6493 * Do not bail out on ENOSPC since we
6433 * can do more things. 6494 * can do more things.
6434 */ 6495 */
6435 if (ret < 0 && ret != -ENOSPC) { 6496 if (ret < 0 && ret != -ENOSPC)
6436 btrfs_abort_transaction(trans, 6497 btrfs_abort_transaction(trans,
6437 root, ret); 6498 root, ret);
6499 else
6500 ret = 0;
6501 btrfs_end_transaction(trans, root);
6502 if (ret)
6438 goto out; 6503 goto out;
6439 }
6440 } 6504 }
6441 6505
6442 if (loop == LOOP_NO_EMPTY_SIZE) { 6506 if (loop == LOOP_NO_EMPTY_SIZE) {
@@ -6463,19 +6527,15 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6463 6527
6464 spin_lock(&info->lock); 6528 spin_lock(&info->lock);
6465 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", 6529 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
6466 (unsigned long long)info->flags, 6530 info->flags,
6467 (unsigned long long)(info->total_bytes - info->bytes_used - 6531 info->total_bytes - info->bytes_used - info->bytes_pinned -
6468 info->bytes_pinned - info->bytes_reserved - 6532 info->bytes_reserved - info->bytes_readonly,
6469 info->bytes_readonly),
6470 (info->full) ? "" : "not "); 6533 (info->full) ? "" : "not ");
6471 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " 6534 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
6472 "reserved=%llu, may_use=%llu, readonly=%llu\n", 6535 "reserved=%llu, may_use=%llu, readonly=%llu\n",
6473 (unsigned long long)info->total_bytes, 6536 info->total_bytes, info->bytes_used, info->bytes_pinned,
6474 (unsigned long long)info->bytes_used, 6537 info->bytes_reserved, info->bytes_may_use,
6475 (unsigned long long)info->bytes_pinned, 6538 info->bytes_readonly);
6476 (unsigned long long)info->bytes_reserved,
6477 (unsigned long long)info->bytes_may_use,
6478 (unsigned long long)info->bytes_readonly);
6479 spin_unlock(&info->lock); 6539 spin_unlock(&info->lock);
6480 6540
6481 if (!dump_block_groups) 6541 if (!dump_block_groups)
@@ -6486,12 +6546,9 @@ again:
6486 list_for_each_entry(cache, &info->block_groups[index], list) { 6546 list_for_each_entry(cache, &info->block_groups[index], list) {
6487 spin_lock(&cache->lock); 6547 spin_lock(&cache->lock);
6488 printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", 6548 printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
6489 (unsigned long long)cache->key.objectid, 6549 cache->key.objectid, cache->key.offset,
6490 (unsigned long long)cache->key.offset, 6550 btrfs_block_group_used(&cache->item), cache->pinned,
6491 (unsigned long long)btrfs_block_group_used(&cache->item), 6551 cache->reserved, cache->ro ? "[readonly]" : "");
6492 (unsigned long long)cache->pinned,
6493 (unsigned long long)cache->reserved,
6494 cache->ro ? "[readonly]" : "");
6495 btrfs_dump_free_space(cache, bytes); 6552 btrfs_dump_free_space(cache, bytes);
6496 spin_unlock(&cache->lock); 6553 spin_unlock(&cache->lock);
6497 } 6554 }
@@ -6500,8 +6557,7 @@ again:
6500 up_read(&info->groups_sem); 6557 up_read(&info->groups_sem);
6501} 6558}
6502 6559
6503int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 6560int btrfs_reserve_extent(struct btrfs_root *root,
6504 struct btrfs_root *root,
6505 u64 num_bytes, u64 min_alloc_size, 6561 u64 num_bytes, u64 min_alloc_size,
6506 u64 empty_size, u64 hint_byte, 6562 u64 empty_size, u64 hint_byte,
6507 struct btrfs_key *ins, int is_data) 6563 struct btrfs_key *ins, int is_data)
@@ -6513,8 +6569,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
6513 flags = btrfs_get_alloc_profile(root, is_data); 6569 flags = btrfs_get_alloc_profile(root, is_data);
6514again: 6570again:
6515 WARN_ON(num_bytes < root->sectorsize); 6571 WARN_ON(num_bytes < root->sectorsize);
6516 ret = find_free_extent(trans, root, num_bytes, empty_size, 6572 ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
6517 hint_byte, ins, flags); 6573 flags);
6518 6574
6519 if (ret == -ENOSPC) { 6575 if (ret == -ENOSPC) {
6520 if (!final_tried) { 6576 if (!final_tried) {
@@ -6529,8 +6585,7 @@ again:
6529 6585
6530 sinfo = __find_space_info(root->fs_info, flags); 6586 sinfo = __find_space_info(root->fs_info, flags);
6531 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu", 6587 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
6532 (unsigned long long)flags, 6588 flags, num_bytes);
6533 (unsigned long long)num_bytes);
6534 if (sinfo) 6589 if (sinfo)
6535 dump_space_info(sinfo, num_bytes, 1); 6590 dump_space_info(sinfo, num_bytes, 1);
6536 } 6591 }
@@ -6550,7 +6605,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6550 cache = btrfs_lookup_block_group(root->fs_info, start); 6605 cache = btrfs_lookup_block_group(root->fs_info, start);
6551 if (!cache) { 6606 if (!cache) {
6552 btrfs_err(root->fs_info, "Unable to find block group for %llu", 6607 btrfs_err(root->fs_info, "Unable to find block group for %llu",
6553 (unsigned long long)start); 6608 start);
6554 return -ENOSPC; 6609 return -ENOSPC;
6555 } 6610 }
6556 6611
@@ -6646,8 +6701,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6646 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6701 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6647 if (ret) { /* -ENOENT, logic error */ 6702 if (ret) { /* -ENOENT, logic error */
6648 btrfs_err(fs_info, "update block group failed for %llu %llu", 6703 btrfs_err(fs_info, "update block group failed for %llu %llu",
6649 (unsigned long long)ins->objectid, 6704 ins->objectid, ins->offset);
6650 (unsigned long long)ins->offset);
6651 BUG(); 6705 BUG();
6652 } 6706 }
6653 return ret; 6707 return ret;
@@ -6719,8 +6773,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6719 ret = update_block_group(root, ins->objectid, root->leafsize, 1); 6773 ret = update_block_group(root, ins->objectid, root->leafsize, 1);
6720 if (ret) { /* -ENOENT, logic error */ 6774 if (ret) { /* -ENOENT, logic error */
6721 btrfs_err(fs_info, "update block group failed for %llu %llu", 6775 btrfs_err(fs_info, "update block group failed for %llu %llu",
6722 (unsigned long long)ins->objectid, 6776 ins->objectid, ins->offset);
6723 (unsigned long long)ins->offset);
6724 BUG(); 6777 BUG();
6725 } 6778 }
6726 return ret; 6779 return ret;
@@ -6902,7 +6955,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6902 if (IS_ERR(block_rsv)) 6955 if (IS_ERR(block_rsv))
6903 return ERR_CAST(block_rsv); 6956 return ERR_CAST(block_rsv);
6904 6957
6905 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, 6958 ret = btrfs_reserve_extent(root, blocksize, blocksize,
6906 empty_size, hint, &ins, 0); 6959 empty_size, hint, &ins, 0);
6907 if (ret) { 6960 if (ret) {
6908 unuse_block_rsv(root->fs_info, block_rsv, blocksize); 6961 unuse_block_rsv(root->fs_info, block_rsv, blocksize);
@@ -7173,6 +7226,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7173 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 7226 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
7174 if (!next) 7227 if (!next)
7175 return -ENOMEM; 7228 return -ENOMEM;
7229 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
7230 level - 1);
7176 reada = 1; 7231 reada = 1;
7177 } 7232 }
7178 btrfs_tree_lock(next); 7233 btrfs_tree_lock(next);
@@ -7658,7 +7713,7 @@ out:
7658 * don't have it in the radix (like when we recover after a power fail 7713 * don't have it in the radix (like when we recover after a power fail
7659 * or unmount) so we don't leak memory. 7714 * or unmount) so we don't leak memory.
7660 */ 7715 */
7661 if (root_dropped == false) 7716 if (!for_reloc && root_dropped == false)
7662 btrfs_add_dead_root(root); 7717 btrfs_add_dead_root(root);
7663 if (err) 7718 if (err)
7664 btrfs_std_error(root->fs_info, err); 7719 btrfs_std_error(root->fs_info, err);
@@ -8192,7 +8247,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8192 * We haven't cached this block group, which means we could 8247 * We haven't cached this block group, which means we could
8193 * possibly have excluded extents on this block group. 8248 * possibly have excluded extents on this block group.
8194 */ 8249 */
8195 if (block_group->cached == BTRFS_CACHE_NO) 8250 if (block_group->cached == BTRFS_CACHE_NO ||
8251 block_group->cached == BTRFS_CACHE_ERROR)
8196 free_excluded_extents(info->extent_root, block_group); 8252 free_excluded_extents(info->extent_root, block_group);
8197 8253
8198 btrfs_remove_free_space_cache(block_group); 8254 btrfs_remove_free_space_cache(block_group);
@@ -8409,9 +8465,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8409 * avoid allocating from un-mirrored block group if there are 8465 * avoid allocating from un-mirrored block group if there are
8410 * mirrored block groups. 8466 * mirrored block groups.
8411 */ 8467 */
8412 list_for_each_entry(cache, &space_info->block_groups[3], list) 8468 list_for_each_entry(cache,
8469 &space_info->block_groups[BTRFS_RAID_RAID0],
8470 list)
8413 set_block_group_ro(cache, 1); 8471 set_block_group_ro(cache, 1);
8414 list_for_each_entry(cache, &space_info->block_groups[4], list) 8472 list_for_each_entry(cache,
8473 &space_info->block_groups[BTRFS_RAID_SINGLE],
8474 list)
8415 set_block_group_ro(cache, 1); 8475 set_block_group_ro(cache, 1);
8416 } 8476 }
8417 8477
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fe443fece851..09582b81640c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void)
61 state = list_entry(states.next, struct extent_state, leak_list); 61 state = list_entry(states.next, struct extent_state, leak_list);
62 printk(KERN_ERR "btrfs state leak: start %llu end %llu " 62 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
63 "state %lu in tree %p refs %d\n", 63 "state %lu in tree %p refs %d\n",
64 (unsigned long long)state->start, 64 state->start, state->end, state->state, state->tree,
65 (unsigned long long)state->end, 65 atomic_read(&state->refs));
66 state->state, state->tree, atomic_read(&state->refs));
67 list_del(&state->leak_list); 66 list_del(&state->leak_list);
68 kmem_cache_free(extent_state_cache, state); 67 kmem_cache_free(extent_state_cache, state);
69 } 68 }
@@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void)
71 while (!list_empty(&buffers)) { 70 while (!list_empty(&buffers)) {
72 eb = list_entry(buffers.next, struct extent_buffer, leak_list); 71 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
73 printk(KERN_ERR "btrfs buffer leak start %llu len %lu " 72 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
74 "refs %d\n", (unsigned long long)eb->start, 73 "refs %d\n",
75 eb->len, atomic_read(&eb->refs)); 74 eb->start, eb->len, atomic_read(&eb->refs));
76 list_del(&eb->leak_list); 75 list_del(&eb->leak_list);
77 kmem_cache_free(extent_buffer_cache, eb); 76 kmem_cache_free(extent_buffer_cache, eb);
78 } 77 }
@@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
88 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 87 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
89 printk_ratelimited(KERN_DEBUG 88 printk_ratelimited(KERN_DEBUG
90 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", 89 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
91 caller, 90 caller, btrfs_ino(inode), isize, start, end);
92 (unsigned long long)btrfs_ino(inode),
93 (unsigned long long)isize,
94 (unsigned long long)start,
95 (unsigned long long)end);
96 } 91 }
97} 92}
98#else 93#else
@@ -388,8 +383,7 @@ static int insert_state(struct extent_io_tree *tree,
388 383
389 if (end < start) 384 if (end < start)
390 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", 385 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
391 (unsigned long long)end, 386 end, start);
392 (unsigned long long)start);
393 state->start = start; 387 state->start = start;
394 state->end = end; 388 state->end = end;
395 389
@@ -400,9 +394,8 @@ static int insert_state(struct extent_io_tree *tree,
400 struct extent_state *found; 394 struct extent_state *found;
401 found = rb_entry(node, struct extent_state, rb_node); 395 found = rb_entry(node, struct extent_state, rb_node);
402 printk(KERN_ERR "btrfs found node %llu %llu on insert of " 396 printk(KERN_ERR "btrfs found node %llu %llu on insert of "
403 "%llu %llu\n", (unsigned long long)found->start, 397 "%llu %llu\n",
404 (unsigned long long)found->end, 398 found->start, found->end, start, end);
405 (unsigned long long)start, (unsigned long long)end);
406 return -EEXIST; 399 return -EEXIST;
407 } 400 }
408 state->tree = tree; 401 state->tree = tree;
@@ -762,15 +755,6 @@ static void cache_state(struct extent_state *state,
762 } 755 }
763} 756}
764 757
765static void uncache_state(struct extent_state **cached_ptr)
766{
767 if (cached_ptr && (*cached_ptr)) {
768 struct extent_state *state = *cached_ptr;
769 *cached_ptr = NULL;
770 free_extent_state(state);
771 }
772}
773
774/* 758/*
775 * set some bits on a range in the tree. This may require allocations or 759 * set some bits on a range in the tree. This may require allocations or
776 * sleeping, so the gfp mask is used to indicate what is allowed. 760 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -1687,31 +1671,21 @@ out_failed:
1687 return found; 1671 return found;
1688} 1672}
1689 1673
1690int extent_clear_unlock_delalloc(struct inode *inode, 1674int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
1691 struct extent_io_tree *tree, 1675 struct page *locked_page,
1692 u64 start, u64 end, struct page *locked_page, 1676 unsigned long clear_bits,
1693 unsigned long op) 1677 unsigned long page_ops)
1694{ 1678{
1679 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
1695 int ret; 1680 int ret;
1696 struct page *pages[16]; 1681 struct page *pages[16];
1697 unsigned long index = start >> PAGE_CACHE_SHIFT; 1682 unsigned long index = start >> PAGE_CACHE_SHIFT;
1698 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1683 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1699 unsigned long nr_pages = end_index - index + 1; 1684 unsigned long nr_pages = end_index - index + 1;
1700 int i; 1685 int i;
1701 unsigned long clear_bits = 0;
1702
1703 if (op & EXTENT_CLEAR_UNLOCK)
1704 clear_bits |= EXTENT_LOCKED;
1705 if (op & EXTENT_CLEAR_DIRTY)
1706 clear_bits |= EXTENT_DIRTY;
1707
1708 if (op & EXTENT_CLEAR_DELALLOC)
1709 clear_bits |= EXTENT_DELALLOC;
1710 1686
1711 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); 1687 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1712 if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 1688 if (page_ops == 0)
1713 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
1714 EXTENT_SET_PRIVATE2)))
1715 return 0; 1689 return 0;
1716 1690
1717 while (nr_pages > 0) { 1691 while (nr_pages > 0) {
@@ -1720,20 +1694,20 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1720 nr_pages, ARRAY_SIZE(pages)), pages); 1694 nr_pages, ARRAY_SIZE(pages)), pages);
1721 for (i = 0; i < ret; i++) { 1695 for (i = 0; i < ret; i++) {
1722 1696
1723 if (op & EXTENT_SET_PRIVATE2) 1697 if (page_ops & PAGE_SET_PRIVATE2)
1724 SetPagePrivate2(pages[i]); 1698 SetPagePrivate2(pages[i]);
1725 1699
1726 if (pages[i] == locked_page) { 1700 if (pages[i] == locked_page) {
1727 page_cache_release(pages[i]); 1701 page_cache_release(pages[i]);
1728 continue; 1702 continue;
1729 } 1703 }
1730 if (op & EXTENT_CLEAR_DIRTY) 1704 if (page_ops & PAGE_CLEAR_DIRTY)
1731 clear_page_dirty_for_io(pages[i]); 1705 clear_page_dirty_for_io(pages[i]);
1732 if (op & EXTENT_SET_WRITEBACK) 1706 if (page_ops & PAGE_SET_WRITEBACK)
1733 set_page_writeback(pages[i]); 1707 set_page_writeback(pages[i]);
1734 if (op & EXTENT_END_WRITEBACK) 1708 if (page_ops & PAGE_END_WRITEBACK)
1735 end_page_writeback(pages[i]); 1709 end_page_writeback(pages[i]);
1736 if (op & EXTENT_CLEAR_UNLOCK_PAGE) 1710 if (page_ops & PAGE_UNLOCK)
1737 unlock_page(pages[i]); 1711 unlock_page(pages[i]);
1738 page_cache_release(pages[i]); 1712 page_cache_release(pages[i]);
1739 } 1713 }
@@ -1810,7 +1784,7 @@ out:
1810 * set the private field for a given byte offset in the tree. If there isn't 1784 * set the private field for a given byte offset in the tree. If there isn't
1811 * an extent_state there already, this does nothing. 1785 * an extent_state there already, this does nothing.
1812 */ 1786 */
1813int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) 1787static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1814{ 1788{
1815 struct rb_node *node; 1789 struct rb_node *node;
1816 struct extent_state *state; 1790 struct extent_state *state;
@@ -1837,64 +1811,6 @@ out:
1837 return ret; 1811 return ret;
1838} 1812}
1839 1813
1840void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
1841 int count)
1842{
1843 struct rb_node *node;
1844 struct extent_state *state;
1845
1846 spin_lock(&tree->lock);
1847 /*
1848 * this search will find all the extents that end after
1849 * our range starts.
1850 */
1851 node = tree_search(tree, start);
1852 BUG_ON(!node);
1853
1854 state = rb_entry(node, struct extent_state, rb_node);
1855 BUG_ON(state->start != start);
1856
1857 while (count) {
1858 state->private = *csums++;
1859 count--;
1860 state = next_state(state);
1861 }
1862 spin_unlock(&tree->lock);
1863}
1864
1865static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
1866{
1867 struct bio_vec *bvec = bio->bi_io_vec + bio_index;
1868
1869 return page_offset(bvec->bv_page) + bvec->bv_offset;
1870}
1871
1872void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
1873 u32 csums[], int count)
1874{
1875 struct rb_node *node;
1876 struct extent_state *state = NULL;
1877 u64 start;
1878
1879 spin_lock(&tree->lock);
1880 do {
1881 start = __btrfs_get_bio_offset(bio, bio_index);
1882 if (state == NULL || state->start != start) {
1883 node = tree_search(tree, start);
1884 BUG_ON(!node);
1885
1886 state = rb_entry(node, struct extent_state, rb_node);
1887 BUG_ON(state->start != start);
1888 }
1889 state->private = *csums++;
1890 count--;
1891 bio_index++;
1892
1893 state = next_state(state);
1894 } while (count);
1895 spin_unlock(&tree->lock);
1896}
1897
1898int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1814int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1899{ 1815{
1900 struct rb_node *node; 1816 struct rb_node *node;
@@ -2173,7 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
2173 EXTENT_LOCKED); 2089 EXTENT_LOCKED);
2174 spin_unlock(&BTRFS_I(inode)->io_tree.lock); 2090 spin_unlock(&BTRFS_I(inode)->io_tree.lock);
2175 2091
2176 if (state && state->start == failrec->start) { 2092 if (state && state->start <= failrec->start &&
2093 state->end >= failrec->start + failrec->len - 1) {
2177 fs_info = BTRFS_I(inode)->root->fs_info; 2094 fs_info = BTRFS_I(inode)->root->fs_info;
2178 num_copies = btrfs_num_copies(fs_info, failrec->logical, 2095 num_copies = btrfs_num_copies(fs_info, failrec->logical,
2179 failrec->len); 2096 failrec->len);
@@ -2201,9 +2118,9 @@ out:
2201 * needed 2118 * needed
2202 */ 2119 */
2203 2120
2204static int bio_readpage_error(struct bio *failed_bio, struct page *page, 2121static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2205 u64 start, u64 end, int failed_mirror, 2122 struct page *page, u64 start, u64 end,
2206 struct extent_state *state) 2123 int failed_mirror)
2207{ 2124{
2208 struct io_failure_record *failrec = NULL; 2125 struct io_failure_record *failrec = NULL;
2209 u64 private; 2126 u64 private;
@@ -2213,6 +2130,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2213 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2130 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2214 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2131 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2215 struct bio *bio; 2132 struct bio *bio;
2133 struct btrfs_io_bio *btrfs_failed_bio;
2134 struct btrfs_io_bio *btrfs_bio;
2216 int num_copies; 2135 int num_copies;
2217 int ret; 2136 int ret;
2218 int read_mode; 2137 int read_mode;
@@ -2296,23 +2215,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2296 * all the retry and error correction code that follows. no 2215 * all the retry and error correction code that follows. no
2297 * matter what the error is, it is very likely to persist. 2216 * matter what the error is, it is very likely to persist.
2298 */ 2217 */
2299 pr_debug("bio_readpage_error: cannot repair, num_copies == 1. " 2218 pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
2300 "state=%p, num_copies=%d, next_mirror %d, " 2219 num_copies, failrec->this_mirror, failed_mirror);
2301 "failed_mirror %d\n", state, num_copies,
2302 failrec->this_mirror, failed_mirror);
2303 free_io_failure(inode, failrec, 0); 2220 free_io_failure(inode, failrec, 0);
2304 return -EIO; 2221 return -EIO;
2305 } 2222 }
2306 2223
2307 if (!state) {
2308 spin_lock(&tree->lock);
2309 state = find_first_extent_bit_state(tree, failrec->start,
2310 EXTENT_LOCKED);
2311 if (state && state->start != failrec->start)
2312 state = NULL;
2313 spin_unlock(&tree->lock);
2314 }
2315
2316 /* 2224 /*
2317 * there are two premises: 2225 * there are two premises:
2318 * a) deliver good data to the caller 2226 * a) deliver good data to the caller
@@ -2349,9 +2257,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2349 read_mode = READ_SYNC; 2257 read_mode = READ_SYNC;
2350 } 2258 }
2351 2259
2352 if (!state || failrec->this_mirror > num_copies) { 2260 if (failrec->this_mirror > num_copies) {
2353 pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, " 2261 pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
2354 "next_mirror %d, failed_mirror %d\n", state,
2355 num_copies, failrec->this_mirror, failed_mirror); 2262 num_copies, failrec->this_mirror, failed_mirror);
2356 free_io_failure(inode, failrec, 0); 2263 free_io_failure(inode, failrec, 0);
2357 return -EIO; 2264 return -EIO;
@@ -2362,12 +2269,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
2362 free_io_failure(inode, failrec, 0); 2269 free_io_failure(inode, failrec, 0);
2363 return -EIO; 2270 return -EIO;
2364 } 2271 }
2365 bio->bi_private = state;
2366 bio->bi_end_io = failed_bio->bi_end_io; 2272 bio->bi_end_io = failed_bio->bi_end_io;
2367 bio->bi_sector = failrec->logical >> 9; 2273 bio->bi_sector = failrec->logical >> 9;
2368 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2274 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2369 bio->bi_size = 0; 2275 bio->bi_size = 0;
2370 2276
2277 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2278 if (btrfs_failed_bio->csum) {
2279 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2280 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2281
2282 btrfs_bio = btrfs_io_bio(bio);
2283 btrfs_bio->csum = btrfs_bio->csum_inline;
2284 phy_offset >>= inode->i_sb->s_blocksize_bits;
2285 phy_offset *= csum_size;
2286 memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
2287 csum_size);
2288 }
2289
2371 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 2290 bio_add_page(bio, page, failrec->len, start - page_offset(page));
2372 2291
2373 pr_debug("bio_readpage_error: submitting new read[%#x] to " 2292 pr_debug("bio_readpage_error: submitting new read[%#x] to "
@@ -2450,6 +2369,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2450 bio_put(bio); 2369 bio_put(bio);
2451} 2370}
2452 2371
2372static void
2373endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2374 int uptodate)
2375{
2376 struct extent_state *cached = NULL;
2377 u64 end = start + len - 1;
2378
2379 if (uptodate && tree->track_uptodate)
2380 set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
2381 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2382}
2383
2453/* 2384/*
2454 * after a readpage IO is done, we need to: 2385 * after a readpage IO is done, we need to:
2455 * clear the uptodate bits on error 2386 * clear the uptodate bits on error
@@ -2466,9 +2397,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2466 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 2397 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
2467 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 2398 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2468 struct bio_vec *bvec = bio->bi_io_vec; 2399 struct bio_vec *bvec = bio->bi_io_vec;
2400 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2469 struct extent_io_tree *tree; 2401 struct extent_io_tree *tree;
2402 u64 offset = 0;
2470 u64 start; 2403 u64 start;
2471 u64 end; 2404 u64 end;
2405 u64 len;
2406 u64 extent_start = 0;
2407 u64 extent_len = 0;
2472 int mirror; 2408 int mirror;
2473 int ret; 2409 int ret;
2474 2410
@@ -2477,9 +2413,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2477 2413
2478 do { 2414 do {
2479 struct page *page = bvec->bv_page; 2415 struct page *page = bvec->bv_page;
2480 struct extent_state *cached = NULL;
2481 struct extent_state *state;
2482 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2483 struct inode *inode = page->mapping->host; 2416 struct inode *inode = page->mapping->host;
2484 2417
2485 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2418 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2500,37 +2433,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2500 2433
2501 start = page_offset(page); 2434 start = page_offset(page);
2502 end = start + bvec->bv_offset + bvec->bv_len - 1; 2435 end = start + bvec->bv_offset + bvec->bv_len - 1;
2436 len = bvec->bv_len;
2503 2437
2504 if (++bvec <= bvec_end) 2438 if (++bvec <= bvec_end)
2505 prefetchw(&bvec->bv_page->flags); 2439 prefetchw(&bvec->bv_page->flags);
2506 2440
2507 spin_lock(&tree->lock);
2508 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
2509 if (state && state->start == start) {
2510 /*
2511 * take a reference on the state, unlock will drop
2512 * the ref
2513 */
2514 cache_state(state, &cached);
2515 }
2516 spin_unlock(&tree->lock);
2517
2518 mirror = io_bio->mirror_num; 2441 mirror = io_bio->mirror_num;
2519 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 2442 if (likely(uptodate && tree->ops &&
2520 ret = tree->ops->readpage_end_io_hook(page, start, end, 2443 tree->ops->readpage_end_io_hook)) {
2521 state, mirror); 2444 ret = tree->ops->readpage_end_io_hook(io_bio, offset,
2445 page, start, end,
2446 mirror);
2522 if (ret) 2447 if (ret)
2523 uptodate = 0; 2448 uptodate = 0;
2524 else 2449 else
2525 clean_io_failure(start, page); 2450 clean_io_failure(start, page);
2526 } 2451 }
2527 2452
2528 if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { 2453 if (likely(uptodate))
2454 goto readpage_ok;
2455
2456 if (tree->ops && tree->ops->readpage_io_failed_hook) {
2529 ret = tree->ops->readpage_io_failed_hook(page, mirror); 2457 ret = tree->ops->readpage_io_failed_hook(page, mirror);
2530 if (!ret && !err && 2458 if (!ret && !err &&
2531 test_bit(BIO_UPTODATE, &bio->bi_flags)) 2459 test_bit(BIO_UPTODATE, &bio->bi_flags))
2532 uptodate = 1; 2460 uptodate = 1;
2533 } else if (!uptodate) { 2461 } else {
2534 /* 2462 /*
2535 * The generic bio_readpage_error handles errors the 2463 * The generic bio_readpage_error handles errors the
2536 * following way: If possible, new read requests are 2464 * following way: If possible, new read requests are
@@ -2541,24 +2469,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2541 * can't handle the error it will return -EIO and we 2469 * can't handle the error it will return -EIO and we
2542 * remain responsible for that page. 2470 * remain responsible for that page.
2543 */ 2471 */
2544 ret = bio_readpage_error(bio, page, start, end, mirror, NULL); 2472 ret = bio_readpage_error(bio, offset, page, start, end,
2473 mirror);
2545 if (ret == 0) { 2474 if (ret == 0) {
2546 uptodate = 2475 uptodate =
2547 test_bit(BIO_UPTODATE, &bio->bi_flags); 2476 test_bit(BIO_UPTODATE, &bio->bi_flags);
2548 if (err) 2477 if (err)
2549 uptodate = 0; 2478 uptodate = 0;
2550 uncache_state(&cached);
2551 continue; 2479 continue;
2552 } 2480 }
2553 } 2481 }
2554 2482readpage_ok:
2555 if (uptodate && tree->track_uptodate) { 2483 if (likely(uptodate)) {
2556 set_extent_uptodate(tree, start, end, &cached,
2557 GFP_ATOMIC);
2558 }
2559 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
2560
2561 if (uptodate) {
2562 loff_t i_size = i_size_read(inode); 2484 loff_t i_size = i_size_read(inode);
2563 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; 2485 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
2564 unsigned offset; 2486 unsigned offset;
@@ -2573,8 +2495,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2573 SetPageError(page); 2495 SetPageError(page);
2574 } 2496 }
2575 unlock_page(page); 2497 unlock_page(page);
2498 offset += len;
2499
2500 if (unlikely(!uptodate)) {
2501 if (extent_len) {
2502 endio_readpage_release_extent(tree,
2503 extent_start,
2504 extent_len, 1);
2505 extent_start = 0;
2506 extent_len = 0;
2507 }
2508 endio_readpage_release_extent(tree, start,
2509 end - start + 1, 0);
2510 } else if (!extent_len) {
2511 extent_start = start;
2512 extent_len = end + 1 - start;
2513 } else if (extent_start + extent_len == start) {
2514 extent_len += end + 1 - start;
2515 } else {
2516 endio_readpage_release_extent(tree, extent_start,
2517 extent_len, uptodate);
2518 extent_start = start;
2519 extent_len = end + 1 - start;
2520 }
2576 } while (bvec <= bvec_end); 2521 } while (bvec <= bvec_end);
2577 2522
2523 if (extent_len)
2524 endio_readpage_release_extent(tree, extent_start, extent_len,
2525 uptodate);
2526 if (io_bio->end_io)
2527 io_bio->end_io(io_bio, err);
2578 bio_put(bio); 2528 bio_put(bio);
2579} 2529}
2580 2530
@@ -2586,6 +2536,7 @@ struct bio *
2586btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 2536btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2587 gfp_t gfp_flags) 2537 gfp_t gfp_flags)
2588{ 2538{
2539 struct btrfs_io_bio *btrfs_bio;
2589 struct bio *bio; 2540 struct bio *bio;
2590 2541
2591 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset); 2542 bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
@@ -2601,6 +2552,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2601 bio->bi_size = 0; 2552 bio->bi_size = 0;
2602 bio->bi_bdev = bdev; 2553 bio->bi_bdev = bdev;
2603 bio->bi_sector = first_sector; 2554 bio->bi_sector = first_sector;
2555 btrfs_bio = btrfs_io_bio(bio);
2556 btrfs_bio->csum = NULL;
2557 btrfs_bio->csum_allocated = NULL;
2558 btrfs_bio->end_io = NULL;
2604 } 2559 }
2605 return bio; 2560 return bio;
2606} 2561}
@@ -2614,7 +2569,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
2614/* this also allocates from the btrfs_bioset */ 2569/* this also allocates from the btrfs_bioset */
2615struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) 2570struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
2616{ 2571{
2617 return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset); 2572 struct btrfs_io_bio *btrfs_bio;
2573 struct bio *bio;
2574
2575 bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
2576 if (bio) {
2577 btrfs_bio = btrfs_io_bio(bio);
2578 btrfs_bio->csum = NULL;
2579 btrfs_bio->csum_allocated = NULL;
2580 btrfs_bio->end_io = NULL;
2581 }
2582 return bio;
2618} 2583}
2619 2584
2620 2585
@@ -2738,17 +2703,45 @@ void set_page_extent_mapped(struct page *page)
2738 } 2703 }
2739} 2704}
2740 2705
2706static struct extent_map *
2707__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
2708 u64 start, u64 len, get_extent_t *get_extent,
2709 struct extent_map **em_cached)
2710{
2711 struct extent_map *em;
2712
2713 if (em_cached && *em_cached) {
2714 em = *em_cached;
2715 if (em->in_tree && start >= em->start &&
2716 start < extent_map_end(em)) {
2717 atomic_inc(&em->refs);
2718 return em;
2719 }
2720
2721 free_extent_map(em);
2722 *em_cached = NULL;
2723 }
2724
2725 em = get_extent(inode, page, pg_offset, start, len, 0);
2726 if (em_cached && !IS_ERR_OR_NULL(em)) {
2727 BUG_ON(*em_cached);
2728 atomic_inc(&em->refs);
2729 *em_cached = em;
2730 }
2731 return em;
2732}
2741/* 2733/*
2742 * basic readpage implementation. Locked extent state structs are inserted 2734 * basic readpage implementation. Locked extent state structs are inserted
2743 * into the tree that are removed when the IO is done (by the end_io 2735 * into the tree that are removed when the IO is done (by the end_io
2744 * handlers) 2736 * handlers)
2745 * XXX JDM: This needs looking at to ensure proper page locking 2737 * XXX JDM: This needs looking at to ensure proper page locking
2746 */ 2738 */
2747static int __extent_read_full_page(struct extent_io_tree *tree, 2739static int __do_readpage(struct extent_io_tree *tree,
2748 struct page *page, 2740 struct page *page,
2749 get_extent_t *get_extent, 2741 get_extent_t *get_extent,
2750 struct bio **bio, int mirror_num, 2742 struct extent_map **em_cached,
2751 unsigned long *bio_flags, int rw) 2743 struct bio **bio, int mirror_num,
2744 unsigned long *bio_flags, int rw)
2752{ 2745{
2753 struct inode *inode = page->mapping->host; 2746 struct inode *inode = page->mapping->host;
2754 u64 start = page_offset(page); 2747 u64 start = page_offset(page);
@@ -2762,35 +2755,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2762 sector_t sector; 2755 sector_t sector;
2763 struct extent_map *em; 2756 struct extent_map *em;
2764 struct block_device *bdev; 2757 struct block_device *bdev;
2765 struct btrfs_ordered_extent *ordered;
2766 int ret; 2758 int ret;
2767 int nr = 0; 2759 int nr = 0;
2760 int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2768 size_t pg_offset = 0; 2761 size_t pg_offset = 0;
2769 size_t iosize; 2762 size_t iosize;
2770 size_t disk_io_size; 2763 size_t disk_io_size;
2771 size_t blocksize = inode->i_sb->s_blocksize; 2764 size_t blocksize = inode->i_sb->s_blocksize;
2772 unsigned long this_bio_flag = 0; 2765 unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2773 2766
2774 set_page_extent_mapped(page); 2767 set_page_extent_mapped(page);
2775 2768
2769 end = page_end;
2776 if (!PageUptodate(page)) { 2770 if (!PageUptodate(page)) {
2777 if (cleancache_get_page(page) == 0) { 2771 if (cleancache_get_page(page) == 0) {
2778 BUG_ON(blocksize != PAGE_SIZE); 2772 BUG_ON(blocksize != PAGE_SIZE);
2773 unlock_extent(tree, start, end);
2779 goto out; 2774 goto out;
2780 } 2775 }
2781 } 2776 }
2782 2777
2783 end = page_end;
2784 while (1) {
2785 lock_extent(tree, start, end);
2786 ordered = btrfs_lookup_ordered_extent(inode, start);
2787 if (!ordered)
2788 break;
2789 unlock_extent(tree, start, end);
2790 btrfs_start_ordered_extent(inode, ordered, 1);
2791 btrfs_put_ordered_extent(ordered);
2792 }
2793
2794 if (page->index == last_byte >> PAGE_CACHE_SHIFT) { 2778 if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
2795 char *userpage; 2779 char *userpage;
2796 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); 2780 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
@@ -2817,15 +2801,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2817 kunmap_atomic(userpage); 2801 kunmap_atomic(userpage);
2818 set_extent_uptodate(tree, cur, cur + iosize - 1, 2802 set_extent_uptodate(tree, cur, cur + iosize - 1,
2819 &cached, GFP_NOFS); 2803 &cached, GFP_NOFS);
2820 unlock_extent_cached(tree, cur, cur + iosize - 1, 2804 if (!parent_locked)
2821 &cached, GFP_NOFS); 2805 unlock_extent_cached(tree, cur,
2806 cur + iosize - 1,
2807 &cached, GFP_NOFS);
2822 break; 2808 break;
2823 } 2809 }
2824 em = get_extent(inode, page, pg_offset, cur, 2810 em = __get_extent_map(inode, page, pg_offset, cur,
2825 end - cur + 1, 0); 2811 end - cur + 1, get_extent, em_cached);
2826 if (IS_ERR_OR_NULL(em)) { 2812 if (IS_ERR_OR_NULL(em)) {
2827 SetPageError(page); 2813 SetPageError(page);
2828 unlock_extent(tree, cur, end); 2814 if (!parent_locked)
2815 unlock_extent(tree, cur, end);
2829 break; 2816 break;
2830 } 2817 }
2831 extent_offset = cur - em->start; 2818 extent_offset = cur - em->start;
@@ -2833,7 +2820,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2833 BUG_ON(end < cur); 2820 BUG_ON(end < cur);
2834 2821
2835 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 2822 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
2836 this_bio_flag = EXTENT_BIO_COMPRESSED; 2823 this_bio_flag |= EXTENT_BIO_COMPRESSED;
2837 extent_set_compress_type(&this_bio_flag, 2824 extent_set_compress_type(&this_bio_flag,
2838 em->compress_type); 2825 em->compress_type);
2839 } 2826 }
@@ -2877,7 +2864,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2877 if (test_range_bit(tree, cur, cur_end, 2864 if (test_range_bit(tree, cur, cur_end,
2878 EXTENT_UPTODATE, 1, NULL)) { 2865 EXTENT_UPTODATE, 1, NULL)) {
2879 check_page_uptodate(tree, page); 2866 check_page_uptodate(tree, page);
2880 unlock_extent(tree, cur, cur + iosize - 1); 2867 if (!parent_locked)
2868 unlock_extent(tree, cur, cur + iosize - 1);
2881 cur = cur + iosize; 2869 cur = cur + iosize;
2882 pg_offset += iosize; 2870 pg_offset += iosize;
2883 continue; 2871 continue;
@@ -2887,7 +2875,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2887 */ 2875 */
2888 if (block_start == EXTENT_MAP_INLINE) { 2876 if (block_start == EXTENT_MAP_INLINE) {
2889 SetPageError(page); 2877 SetPageError(page);
2890 unlock_extent(tree, cur, cur + iosize - 1); 2878 if (!parent_locked)
2879 unlock_extent(tree, cur, cur + iosize - 1);
2891 cur = cur + iosize; 2880 cur = cur + iosize;
2892 pg_offset += iosize; 2881 pg_offset += iosize;
2893 continue; 2882 continue;
@@ -2905,7 +2894,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2905 *bio_flags = this_bio_flag; 2894 *bio_flags = this_bio_flag;
2906 } else { 2895 } else {
2907 SetPageError(page); 2896 SetPageError(page);
2908 unlock_extent(tree, cur, cur + iosize - 1); 2897 if (!parent_locked)
2898 unlock_extent(tree, cur, cur + iosize - 1);
2909 } 2899 }
2910 cur = cur + iosize; 2900 cur = cur + iosize;
2911 pg_offset += iosize; 2901 pg_offset += iosize;
@@ -2919,6 +2909,104 @@ out:
2919 return 0; 2909 return 0;
2920} 2910}
2921 2911
2912static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
2913 struct page *pages[], int nr_pages,
2914 u64 start, u64 end,
2915 get_extent_t *get_extent,
2916 struct extent_map **em_cached,
2917 struct bio **bio, int mirror_num,
2918 unsigned long *bio_flags, int rw)
2919{
2920 struct inode *inode;
2921 struct btrfs_ordered_extent *ordered;
2922 int index;
2923
2924 inode = pages[0]->mapping->host;
2925 while (1) {
2926 lock_extent(tree, start, end);
2927 ordered = btrfs_lookup_ordered_range(inode, start,
2928 end - start + 1);
2929 if (!ordered)
2930 break;
2931 unlock_extent(tree, start, end);
2932 btrfs_start_ordered_extent(inode, ordered, 1);
2933 btrfs_put_ordered_extent(ordered);
2934 }
2935
2936 for (index = 0; index < nr_pages; index++) {
2937 __do_readpage(tree, pages[index], get_extent, em_cached, bio,
2938 mirror_num, bio_flags, rw);
2939 page_cache_release(pages[index]);
2940 }
2941}
2942
2943static void __extent_readpages(struct extent_io_tree *tree,
2944 struct page *pages[],
2945 int nr_pages, get_extent_t *get_extent,
2946 struct extent_map **em_cached,
2947 struct bio **bio, int mirror_num,
2948 unsigned long *bio_flags, int rw)
2949{
2950 u64 start = 0;
2951 u64 end = 0;
2952 u64 page_start;
2953 int index;
2954 int first_index = 0;
2955
2956 for (index = 0; index < nr_pages; index++) {
2957 page_start = page_offset(pages[index]);
2958 if (!end) {
2959 start = page_start;
2960 end = start + PAGE_CACHE_SIZE - 1;
2961 first_index = index;
2962 } else if (end + 1 == page_start) {
2963 end += PAGE_CACHE_SIZE;
2964 } else {
2965 __do_contiguous_readpages(tree, &pages[first_index],
2966 index - first_index, start,
2967 end, get_extent, em_cached,
2968 bio, mirror_num, bio_flags,
2969 rw);
2970 start = page_start;
2971 end = start + PAGE_CACHE_SIZE - 1;
2972 first_index = index;
2973 }
2974 }
2975
2976 if (end)
2977 __do_contiguous_readpages(tree, &pages[first_index],
2978 index - first_index, start,
2979 end, get_extent, em_cached, bio,
2980 mirror_num, bio_flags, rw);
2981}
2982
2983static int __extent_read_full_page(struct extent_io_tree *tree,
2984 struct page *page,
2985 get_extent_t *get_extent,
2986 struct bio **bio, int mirror_num,
2987 unsigned long *bio_flags, int rw)
2988{
2989 struct inode *inode = page->mapping->host;
2990 struct btrfs_ordered_extent *ordered;
2991 u64 start = page_offset(page);
2992 u64 end = start + PAGE_CACHE_SIZE - 1;
2993 int ret;
2994
2995 while (1) {
2996 lock_extent(tree, start, end);
2997 ordered = btrfs_lookup_ordered_extent(inode, start);
2998 if (!ordered)
2999 break;
3000 unlock_extent(tree, start, end);
3001 btrfs_start_ordered_extent(inode, ordered, 1);
3002 btrfs_put_ordered_extent(ordered);
3003 }
3004
3005 ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
3006 bio_flags, rw);
3007 return ret;
3008}
3009
2922int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 3010int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2923 get_extent_t *get_extent, int mirror_num) 3011 get_extent_t *get_extent, int mirror_num)
2924{ 3012{
@@ -2933,6 +3021,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2933 return ret; 3021 return ret;
2934} 3022}
2935 3023
3024int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3025 get_extent_t *get_extent, int mirror_num)
3026{
3027 struct bio *bio = NULL;
3028 unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
3029 int ret;
3030
3031 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3032 &bio_flags, READ);
3033 if (bio)
3034 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3035 return ret;
3036}
3037
2936static noinline void update_nr_written(struct page *page, 3038static noinline void update_nr_written(struct page *page,
2937 struct writeback_control *wbc, 3039 struct writeback_control *wbc,
2938 unsigned long nr_written) 3040 unsigned long nr_written)
@@ -3189,8 +3291,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3189 if (!PageWriteback(page)) { 3291 if (!PageWriteback(page)) {
3190 printk(KERN_ERR "btrfs warning page %lu not " 3292 printk(KERN_ERR "btrfs warning page %lu not "
3191 "writeback, cur %llu end %llu\n", 3293 "writeback, cur %llu end %llu\n",
3192 page->index, (unsigned long long)cur, 3294 page->index, cur, end);
3193 (unsigned long long)end);
3194 } 3295 }
3195 3296
3196 ret = submit_extent_page(write_flags, tree, page, 3297 ret = submit_extent_page(write_flags, tree, page,
@@ -3769,7 +3870,7 @@ int extent_readpages(struct extent_io_tree *tree,
3769 unsigned long bio_flags = 0; 3870 unsigned long bio_flags = 0;
3770 struct page *pagepool[16]; 3871 struct page *pagepool[16];
3771 struct page *page; 3872 struct page *page;
3772 int i = 0; 3873 struct extent_map *em_cached = NULL;
3773 int nr = 0; 3874 int nr = 0;
3774 3875
3775 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 3876 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -3786,18 +3887,16 @@ int extent_readpages(struct extent_io_tree *tree,
3786 pagepool[nr++] = page; 3887 pagepool[nr++] = page;
3787 if (nr < ARRAY_SIZE(pagepool)) 3888 if (nr < ARRAY_SIZE(pagepool))
3788 continue; 3889 continue;
3789 for (i = 0; i < nr; i++) { 3890 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
3790 __extent_read_full_page(tree, pagepool[i], get_extent, 3891 &bio, 0, &bio_flags, READ);
3791 &bio, 0, &bio_flags, READ);
3792 page_cache_release(pagepool[i]);
3793 }
3794 nr = 0; 3892 nr = 0;
3795 } 3893 }
3796 for (i = 0; i < nr; i++) { 3894 if (nr)
3797 __extent_read_full_page(tree, pagepool[i], get_extent, 3895 __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
3798 &bio, 0, &bio_flags, READ); 3896 &bio, 0, &bio_flags, READ);
3799 page_cache_release(pagepool[i]); 3897
3800 } 3898 if (em_cached)
3899 free_extent_map(em_cached);
3801 3900
3802 BUG_ON(!list_empty(pages)); 3901 BUG_ON(!list_empty(pages));
3803 if (bio) 3902 if (bio)
@@ -4136,6 +4235,76 @@ static void __free_extent_buffer(struct extent_buffer *eb)
4136 kmem_cache_free(extent_buffer_cache, eb); 4235 kmem_cache_free(extent_buffer_cache, eb);
4137} 4236}
4138 4237
4238static int extent_buffer_under_io(struct extent_buffer *eb)
4239{
4240 return (atomic_read(&eb->io_pages) ||
4241 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4242 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4243}
4244
4245/*
4246 * Helper for releasing extent buffer page.
4247 */
4248static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4249 unsigned long start_idx)
4250{
4251 unsigned long index;
4252 unsigned long num_pages;
4253 struct page *page;
4254 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4255
4256 BUG_ON(extent_buffer_under_io(eb));
4257
4258 num_pages = num_extent_pages(eb->start, eb->len);
4259 index = start_idx + num_pages;
4260 if (start_idx >= index)
4261 return;
4262
4263 do {
4264 index--;
4265 page = extent_buffer_page(eb, index);
4266 if (page && mapped) {
4267 spin_lock(&page->mapping->private_lock);
4268 /*
4269 * We do this since we'll remove the pages after we've
4270 * removed the eb from the radix tree, so we could race
4271 * and have this page now attached to the new eb. So
4272 * only clear page_private if it's still connected to
4273 * this eb.
4274 */
4275 if (PagePrivate(page) &&
4276 page->private == (unsigned long)eb) {
4277 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4278 BUG_ON(PageDirty(page));
4279 BUG_ON(PageWriteback(page));
4280 /*
4281 * We need to make sure we haven't be attached
4282 * to a new eb.
4283 */
4284 ClearPagePrivate(page);
4285 set_page_private(page, 0);
4286 /* One for the page private */
4287 page_cache_release(page);
4288 }
4289 spin_unlock(&page->mapping->private_lock);
4290
4291 }
4292 if (page) {
4293 /* One for when we alloced the page */
4294 page_cache_release(page);
4295 }
4296 } while (index != start_idx);
4297}
4298
4299/*
4300 * Helper for releasing the extent buffer.
4301 */
4302static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4303{
4304 btrfs_release_extent_buffer_page(eb, 0);
4305 __free_extent_buffer(eb);
4306}
4307
4139static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 4308static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
4140 u64 start, 4309 u64 start,
4141 unsigned long len, 4310 unsigned long len,
@@ -4184,13 +4353,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
4184 struct extent_buffer *new; 4353 struct extent_buffer *new;
4185 unsigned long num_pages = num_extent_pages(src->start, src->len); 4354 unsigned long num_pages = num_extent_pages(src->start, src->len);
4186 4355
4187 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC); 4356 new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
4188 if (new == NULL) 4357 if (new == NULL)
4189 return NULL; 4358 return NULL;
4190 4359
4191 for (i = 0; i < num_pages; i++) { 4360 for (i = 0; i < num_pages; i++) {
4192 p = alloc_page(GFP_ATOMIC); 4361 p = alloc_page(GFP_NOFS);
4193 BUG_ON(!p); 4362 if (!p) {
4363 btrfs_release_extent_buffer(new);
4364 return NULL;
4365 }
4194 attach_extent_buffer_page(new, p); 4366 attach_extent_buffer_page(new, p);
4195 WARN_ON(PageDirty(p)); 4367 WARN_ON(PageDirty(p));
4196 SetPageUptodate(p); 4368 SetPageUptodate(p);
@@ -4210,12 +4382,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
4210 unsigned long num_pages = num_extent_pages(0, len); 4382 unsigned long num_pages = num_extent_pages(0, len);
4211 unsigned long i; 4383 unsigned long i;
4212 4384
4213 eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); 4385 eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
4214 if (!eb) 4386 if (!eb)
4215 return NULL; 4387 return NULL;
4216 4388
4217 for (i = 0; i < num_pages; i++) { 4389 for (i = 0; i < num_pages; i++) {
4218 eb->pages[i] = alloc_page(GFP_ATOMIC); 4390 eb->pages[i] = alloc_page(GFP_NOFS);
4219 if (!eb->pages[i]) 4391 if (!eb->pages[i])
4220 goto err; 4392 goto err;
4221 } 4393 }
@@ -4231,76 +4403,6 @@ err:
4231 return NULL; 4403 return NULL;
4232} 4404}
4233 4405
4234static int extent_buffer_under_io(struct extent_buffer *eb)
4235{
4236 return (atomic_read(&eb->io_pages) ||
4237 test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
4238 test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4239}
4240
4241/*
4242 * Helper for releasing extent buffer page.
4243 */
4244static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
4245 unsigned long start_idx)
4246{
4247 unsigned long index;
4248 unsigned long num_pages;
4249 struct page *page;
4250 int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
4251
4252 BUG_ON(extent_buffer_under_io(eb));
4253
4254 num_pages = num_extent_pages(eb->start, eb->len);
4255 index = start_idx + num_pages;
4256 if (start_idx >= index)
4257 return;
4258
4259 do {
4260 index--;
4261 page = extent_buffer_page(eb, index);
4262 if (page && mapped) {
4263 spin_lock(&page->mapping->private_lock);
4264 /*
4265 * We do this since we'll remove the pages after we've
4266 * removed the eb from the radix tree, so we could race
4267 * and have this page now attached to the new eb. So
4268 * only clear page_private if it's still connected to
4269 * this eb.
4270 */
4271 if (PagePrivate(page) &&
4272 page->private == (unsigned long)eb) {
4273 BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
4274 BUG_ON(PageDirty(page));
4275 BUG_ON(PageWriteback(page));
4276 /*
4277 * We need to make sure we haven't be attached
4278 * to a new eb.
4279 */
4280 ClearPagePrivate(page);
4281 set_page_private(page, 0);
4282 /* One for the page private */
4283 page_cache_release(page);
4284 }
4285 spin_unlock(&page->mapping->private_lock);
4286
4287 }
4288 if (page) {
4289 /* One for when we alloced the page */
4290 page_cache_release(page);
4291 }
4292 } while (index != start_idx);
4293}
4294
4295/*
4296 * Helper for releasing the extent buffer.
4297 */
4298static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4299{
4300 btrfs_release_extent_buffer_page(eb, 0);
4301 __free_extent_buffer(eb);
4302}
4303
4304static void check_buffer_tree_ref(struct extent_buffer *eb) 4406static void check_buffer_tree_ref(struct extent_buffer *eb)
4305{ 4407{
4306 int refs; 4408 int refs;
@@ -4771,7 +4873,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
4771 WARN_ON(start > eb->len); 4873 WARN_ON(start > eb->len);
4772 WARN_ON(start + len > eb->start + eb->len); 4874 WARN_ON(start + len > eb->start + eb->len);
4773 4875
4774 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4876 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4775 4877
4776 while (len > 0) { 4878 while (len > 0) {
4777 page = extent_buffer_page(eb, i); 4879 page = extent_buffer_page(eb, i);
@@ -4813,8 +4915,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
4813 4915
4814 if (start + min_len > eb->len) { 4916 if (start + min_len > eb->len) {
4815 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " 4917 WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
4816 "wanted %lu %lu\n", (unsigned long long)eb->start, 4918 "wanted %lu %lu\n",
4817 eb->len, start, min_len); 4919 eb->start, eb->len, start, min_len);
4818 return -EINVAL; 4920 return -EINVAL;
4819 } 4921 }
4820 4922
@@ -4841,7 +4943,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
4841 WARN_ON(start > eb->len); 4943 WARN_ON(start > eb->len);
4842 WARN_ON(start + len > eb->start + eb->len); 4944 WARN_ON(start + len > eb->start + eb->len);
4843 4945
4844 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4946 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4845 4947
4846 while (len > 0) { 4948 while (len > 0) {
4847 page = extent_buffer_page(eb, i); 4949 page = extent_buffer_page(eb, i);
@@ -4875,7 +4977,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
4875 WARN_ON(start > eb->len); 4977 WARN_ON(start > eb->len);
4876 WARN_ON(start + len > eb->start + eb->len); 4978 WARN_ON(start + len > eb->start + eb->len);
4877 4979
4878 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 4980 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4879 4981
4880 while (len > 0) { 4982 while (len > 0) {
4881 page = extent_buffer_page(eb, i); 4983 page = extent_buffer_page(eb, i);
@@ -4905,7 +5007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
4905 WARN_ON(start > eb->len); 5007 WARN_ON(start > eb->len);
4906 WARN_ON(start + len > eb->start + eb->len); 5008 WARN_ON(start + len > eb->start + eb->len);
4907 5009
4908 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 5010 offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
4909 5011
4910 while (len > 0) { 5012 while (len > 0) {
4911 page = extent_buffer_page(eb, i); 5013 page = extent_buffer_page(eb, i);
@@ -4936,7 +5038,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
4936 WARN_ON(src->len != dst_len); 5038 WARN_ON(src->len != dst_len);
4937 5039
4938 offset = (start_offset + dst_offset) & 5040 offset = (start_offset + dst_offset) &
4939 ((unsigned long)PAGE_CACHE_SIZE - 1); 5041 (PAGE_CACHE_SIZE - 1);
4940 5042
4941 while (len > 0) { 5043 while (len > 0) {
4942 page = extent_buffer_page(dst, i); 5044 page = extent_buffer_page(dst, i);
@@ -5022,9 +5124,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5022 5124
5023 while (len > 0) { 5125 while (len > 0) {
5024 dst_off_in_page = (start_offset + dst_offset) & 5126 dst_off_in_page = (start_offset + dst_offset) &
5025 ((unsigned long)PAGE_CACHE_SIZE - 1); 5127 (PAGE_CACHE_SIZE - 1);
5026 src_off_in_page = (start_offset + src_offset) & 5128 src_off_in_page = (start_offset + src_offset) &
5027 ((unsigned long)PAGE_CACHE_SIZE - 1); 5129 (PAGE_CACHE_SIZE - 1);
5028 5130
5029 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 5131 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
5030 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 5132 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
@@ -5075,9 +5177,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5075 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 5177 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
5076 5178
5077 dst_off_in_page = (start_offset + dst_end) & 5179 dst_off_in_page = (start_offset + dst_end) &
5078 ((unsigned long)PAGE_CACHE_SIZE - 1); 5180 (PAGE_CACHE_SIZE - 1);
5079 src_off_in_page = (start_offset + src_end) & 5181 src_off_in_page = (start_offset + src_end) &
5080 ((unsigned long)PAGE_CACHE_SIZE - 1); 5182 (PAGE_CACHE_SIZE - 1);
5081 5183
5082 cur = min_t(unsigned long, len, src_off_in_page + 1); 5184 cur = min_t(unsigned long, len, src_off_in_page + 1);
5083 cur = min(cur, dst_off_in_page + 1); 5185 cur = min(cur, dst_off_in_page + 1);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3b8c4e26e1da..6dbc645f1f3d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,6 +29,7 @@
29 */ 29 */
30#define EXTENT_BIO_COMPRESSED 1 30#define EXTENT_BIO_COMPRESSED 1
31#define EXTENT_BIO_TREE_LOG 2 31#define EXTENT_BIO_TREE_LOG 2
32#define EXTENT_BIO_PARENT_LOCKED 4
32#define EXTENT_BIO_FLAG_SHIFT 16 33#define EXTENT_BIO_FLAG_SHIFT 16
33 34
34/* these are bit numbers for test/set bit */ 35/* these are bit numbers for test/set bit */
@@ -44,14 +45,11 @@
44#define EXTENT_BUFFER_DUMMY 9 45#define EXTENT_BUFFER_DUMMY 9
45 46
46/* these are flags for extent_clear_unlock_delalloc */ 47/* these are flags for extent_clear_unlock_delalloc */
47#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 48#define PAGE_UNLOCK (1 << 0)
48#define EXTENT_CLEAR_UNLOCK 0x2 49#define PAGE_CLEAR_DIRTY (1 << 1)
49#define EXTENT_CLEAR_DELALLOC 0x4 50#define PAGE_SET_WRITEBACK (1 << 2)
50#define EXTENT_CLEAR_DIRTY 0x8 51#define PAGE_END_WRITEBACK (1 << 3)
51#define EXTENT_SET_WRITEBACK 0x10 52#define PAGE_SET_PRIVATE2 (1 << 4)
52#define EXTENT_END_WRITEBACK 0x20
53#define EXTENT_SET_PRIVATE2 0x40
54#define EXTENT_CLEAR_ACCOUNTING 0x80
55 53
56/* 54/*
57 * page->private values. Every page that is controlled by the extent 55 * page->private values. Every page that is controlled by the extent
@@ -62,6 +60,7 @@
62 60
63struct extent_state; 61struct extent_state;
64struct btrfs_root; 62struct btrfs_root;
63struct btrfs_io_bio;
65 64
66typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, 65typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
67 struct bio *bio, int mirror_num, 66 struct bio *bio, int mirror_num,
@@ -77,8 +76,9 @@ struct extent_io_ops {
77 size_t size, struct bio *bio, 76 size_t size, struct bio *bio,
78 unsigned long bio_flags); 77 unsigned long bio_flags);
79 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); 78 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
80 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, 79 int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
81 struct extent_state *state, int mirror); 80 struct page *page, u64 start, u64 end,
81 int mirror);
82 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 82 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
83 struct extent_state *state, int uptodate); 83 struct extent_state *state, int uptodate);
84 void (*set_bit_hook)(struct inode *inode, struct extent_state *state, 84 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
@@ -200,6 +200,8 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
200int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); 200int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
201int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 201int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
202 get_extent_t *get_extent, int mirror_num); 202 get_extent_t *get_extent, int mirror_num);
203int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
204 get_extent_t *get_extent, int mirror_num);
203int __init extent_io_init(void); 205int __init extent_io_init(void);
204void extent_io_exit(void); 206void extent_io_exit(void);
205 207
@@ -261,11 +263,6 @@ int extent_readpages(struct extent_io_tree *tree,
261 get_extent_t get_extent); 263 get_extent_t get_extent);
262int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 264int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
263 __u64 start, __u64 len, get_extent_t *get_extent); 265 __u64 start, __u64 len, get_extent_t *get_extent);
264int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
265void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
266 int count);
267void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio,
268 int bvec_index, u32 csums[], int count);
269int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 266int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
270void set_page_extent_mapped(struct page *page); 267void set_page_extent_mapped(struct page *page);
271 268
@@ -330,10 +327,10 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
330 unsigned long *map_len); 327 unsigned long *map_len);
331int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); 328int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
332int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); 329int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
333int extent_clear_unlock_delalloc(struct inode *inode, 330int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
334 struct extent_io_tree *tree, 331 struct page *locked_page,
335 u64 start, u64 end, struct page *locked_page, 332 unsigned long bits_to_clear,
336 unsigned long op); 333 unsigned long page_ops);
337struct bio * 334struct bio *
338btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 335btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
339 gfp_t gfp_flags); 336 gfp_t gfp_flags);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a7bfc9541803..4f53159bdb9d 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -23,6 +23,7 @@
23#include "ctree.h" 23#include "ctree.h"
24#include "disk-io.h" 24#include "disk-io.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "volumes.h"
26#include "print-tree.h" 27#include "print-tree.h"
27 28
28#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ 29#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
@@ -152,28 +153,54 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
152 return ret; 153 return ret;
153} 154}
154 155
156static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
157{
158 kfree(bio->csum_allocated);
159}
160
155static int __btrfs_lookup_bio_sums(struct btrfs_root *root, 161static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
156 struct inode *inode, struct bio *bio, 162 struct inode *inode, struct bio *bio,
157 u64 logical_offset, u32 *dst, int dio) 163 u64 logical_offset, u32 *dst, int dio)
158{ 164{
159 u32 sum[16];
160 int len;
161 struct bio_vec *bvec = bio->bi_io_vec; 165 struct bio_vec *bvec = bio->bi_io_vec;
162 int bio_index = 0; 166 struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
167 struct btrfs_csum_item *item = NULL;
168 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
169 struct btrfs_path *path;
170 u8 *csum;
163 u64 offset = 0; 171 u64 offset = 0;
164 u64 item_start_offset = 0; 172 u64 item_start_offset = 0;
165 u64 item_last_offset = 0; 173 u64 item_last_offset = 0;
166 u64 disk_bytenr; 174 u64 disk_bytenr;
167 u32 diff; 175 u32 diff;
168 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 176 int nblocks;
177 int bio_index = 0;
169 int count; 178 int count;
170 struct btrfs_path *path; 179 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
171 struct btrfs_csum_item *item = NULL;
172 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
173 180
174 path = btrfs_alloc_path(); 181 path = btrfs_alloc_path();
175 if (!path) 182 if (!path)
176 return -ENOMEM; 183 return -ENOMEM;
184
185 nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits;
186 if (!dst) {
187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
188 btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
189 GFP_NOFS);
190 if (!btrfs_bio->csum_allocated) {
191 btrfs_free_path(path);
192 return -ENOMEM;
193 }
194 btrfs_bio->csum = btrfs_bio->csum_allocated;
195 btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
196 } else {
197 btrfs_bio->csum = btrfs_bio->csum_inline;
198 }
199 csum = btrfs_bio->csum;
200 } else {
201 csum = (u8 *)dst;
202 }
203
177 if (bio->bi_size > PAGE_CACHE_SIZE * 8) 204 if (bio->bi_size > PAGE_CACHE_SIZE * 8)
178 path->reada = 2; 205 path->reada = 2;
179 206
@@ -194,11 +221,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
194 if (dio) 221 if (dio)
195 offset = logical_offset; 222 offset = logical_offset;
196 while (bio_index < bio->bi_vcnt) { 223 while (bio_index < bio->bi_vcnt) {
197 len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index);
198 if (!dio) 224 if (!dio)
199 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 225 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
200 count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum, 226 count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
201 len); 227 (u32 *)csum, nblocks);
202 if (count) 228 if (count)
203 goto found; 229 goto found;
204 230
@@ -213,7 +239,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
213 path, disk_bytenr, 0); 239 path, disk_bytenr, 0);
214 if (IS_ERR(item)) { 240 if (IS_ERR(item)) {
215 count = 1; 241 count = 1;
216 sum[0] = 0; 242 memset(csum, 0, csum_size);
217 if (BTRFS_I(inode)->root->root_key.objectid == 243 if (BTRFS_I(inode)->root->root_key.objectid ==
218 BTRFS_DATA_RELOC_TREE_OBJECTID) { 244 BTRFS_DATA_RELOC_TREE_OBJECTID) {
219 set_extent_bits(io_tree, offset, 245 set_extent_bits(io_tree, offset,
@@ -222,9 +248,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
222 } else { 248 } else {
223 printk(KERN_INFO "btrfs no csum found " 249 printk(KERN_INFO "btrfs no csum found "
224 "for inode %llu start %llu\n", 250 "for inode %llu start %llu\n",
225 (unsigned long long) 251 btrfs_ino(inode), offset);
226 btrfs_ino(inode),
227 (unsigned long long)offset);
228 } 252 }
229 item = NULL; 253 item = NULL;
230 btrfs_release_path(path); 254 btrfs_release_path(path);
@@ -249,23 +273,14 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
249 diff = disk_bytenr - item_start_offset; 273 diff = disk_bytenr - item_start_offset;
250 diff = diff / root->sectorsize; 274 diff = diff / root->sectorsize;
251 diff = diff * csum_size; 275 diff = diff * csum_size;
252 count = min_t(int, len, (item_last_offset - disk_bytenr) >> 276 count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >>
253 inode->i_sb->s_blocksize_bits); 277 inode->i_sb->s_blocksize_bits);
254 read_extent_buffer(path->nodes[0], sum, 278 read_extent_buffer(path->nodes[0], csum,
255 ((unsigned long)item) + diff, 279 ((unsigned long)item) + diff,
256 csum_size * count); 280 csum_size * count);
257found: 281found:
258 if (dst) { 282 csum += count * csum_size;
259 memcpy(dst, sum, count * csum_size); 283 nblocks -= count;
260 dst += count;
261 } else {
262 if (dio)
263 extent_cache_csums_dio(io_tree, offset, sum,
264 count);
265 else
266 extent_cache_csums(io_tree, bio, bio_index, sum,
267 count);
268 }
269 while (count--) { 284 while (count--) {
270 disk_bytenr += bvec->bv_len; 285 disk_bytenr += bvec->bv_len;
271 offset += bvec->bv_len; 286 offset += bvec->bv_len;
@@ -284,9 +299,19 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
284} 299}
285 300
286int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, 301int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
287 struct bio *bio, u64 offset) 302 struct btrfs_dio_private *dip, struct bio *bio,
303 u64 offset)
288{ 304{
289 return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1); 305 int len = (bio->bi_sector << 9) - dip->disk_bytenr;
306 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
307 int ret;
308
309 len >>= inode->i_sb->s_blocksize_bits;
310 len *= csum_size;
311
312 ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
313 (u32 *)(dip->csum + len), 1);
314 return ret;
290} 315}
291 316
292int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 317int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4d2eb6417145..bc5072b2db53 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ fail:
1334static noinline int check_can_nocow(struct inode *inode, loff_t pos, 1334static noinline int check_can_nocow(struct inode *inode, loff_t pos,
1335 size_t *write_bytes) 1335 size_t *write_bytes)
1336{ 1336{
1337 struct btrfs_trans_handle *trans;
1338 struct btrfs_root *root = BTRFS_I(inode)->root; 1337 struct btrfs_root *root = BTRFS_I(inode)->root;
1339 struct btrfs_ordered_extent *ordered; 1338 struct btrfs_ordered_extent *ordered;
1340 u64 lockstart, lockend; 1339 u64 lockstart, lockend;
@@ -1356,16 +1355,8 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
1356 btrfs_put_ordered_extent(ordered); 1355 btrfs_put_ordered_extent(ordered);
1357 } 1356 }
1358 1357
1359 trans = btrfs_join_transaction(root);
1360 if (IS_ERR(trans)) {
1361 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1362 return PTR_ERR(trans);
1363 }
1364
1365 num_bytes = lockend - lockstart + 1; 1358 num_bytes = lockend - lockstart + 1;
1366 ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL, 1359 ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
1367 NULL);
1368 btrfs_end_transaction(trans, root);
1369 if (ret <= 0) { 1360 if (ret <= 0) {
1370 ret = 0; 1361 ret = 0;
1371 } else { 1362 } else {
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index b21a3cd667d8..ef3bea7bb257 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -308,7 +308,7 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl)
308 308
309static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 309static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
310{ 310{
311 BUG_ON(io_ctl->index >= io_ctl->num_pages); 311 ASSERT(io_ctl->index < io_ctl->num_pages);
312 io_ctl->page = io_ctl->pages[io_ctl->index++]; 312 io_ctl->page = io_ctl->pages[io_ctl->index++];
313 io_ctl->cur = kmap(io_ctl->page); 313 io_ctl->cur = kmap(io_ctl->page);
314 io_ctl->orig = io_ctl->cur; 314 io_ctl->orig = io_ctl->cur;
@@ -673,8 +673,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
673 btrfs_err(root->fs_info, 673 btrfs_err(root->fs_info,
674 "free space inode generation (%llu) " 674 "free space inode generation (%llu) "
675 "did not match free space cache generation (%llu)", 675 "did not match free space cache generation (%llu)",
676 (unsigned long long)BTRFS_I(inode)->generation, 676 BTRFS_I(inode)->generation, generation);
677 (unsigned long long)generation);
678 return 0; 677 return 0;
679 } 678 }
680 679
@@ -729,7 +728,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
729 goto free_cache; 728 goto free_cache;
730 } 729 }
731 } else { 730 } else {
732 BUG_ON(!num_bitmaps); 731 ASSERT(num_bitmaps);
733 num_bitmaps--; 732 num_bitmaps--;
734 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 733 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
735 if (!e->bitmap) { 734 if (!e->bitmap) {
@@ -1029,7 +1028,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
1029 leaf = path->nodes[0]; 1028 leaf = path->nodes[0];
1030 if (ret > 0) { 1029 if (ret > 0) {
1031 struct btrfs_key found_key; 1030 struct btrfs_key found_key;
1032 BUG_ON(!path->slots[0]); 1031 ASSERT(path->slots[0]);
1033 path->slots[0]--; 1032 path->slots[0]--;
1034 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1033 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1035 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1034 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
@@ -1117,7 +1116,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1117static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1116static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1118 u64 offset) 1117 u64 offset)
1119{ 1118{
1120 BUG_ON(offset < bitmap_start); 1119 ASSERT(offset >= bitmap_start);
1121 offset -= bitmap_start; 1120 offset -= bitmap_start;
1122 return (unsigned long)(div_u64(offset, unit)); 1121 return (unsigned long)(div_u64(offset, unit));
1123} 1122}
@@ -1272,7 +1271,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
1272 if (n) { 1271 if (n) {
1273 entry = rb_entry(n, struct btrfs_free_space, 1272 entry = rb_entry(n, struct btrfs_free_space,
1274 offset_index); 1273 offset_index);
1275 BUG_ON(entry->offset > offset); 1274 ASSERT(entry->offset <= offset);
1276 } else { 1275 } else {
1277 if (fuzzy) 1276 if (fuzzy)
1278 return entry; 1277 return entry;
@@ -1336,7 +1335,7 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
1336{ 1335{
1337 int ret = 0; 1336 int ret = 0;
1338 1337
1339 BUG_ON(!info->bitmap && !info->bytes); 1338 ASSERT(info->bytes || info->bitmap);
1340 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1339 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1341 &info->offset_index, (info->bitmap != NULL)); 1340 &info->offset_index, (info->bitmap != NULL));
1342 if (ret) 1341 if (ret)
@@ -1359,7 +1358,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1359 1358
1360 max_bitmaps = max(max_bitmaps, 1); 1359 max_bitmaps = max(max_bitmaps, 1);
1361 1360
1362 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1361 ASSERT(ctl->total_bitmaps <= max_bitmaps);
1363 1362
1364 /* 1363 /*
1365 * The goal is to keep the total amount of memory used per 1gb of space 1364 * The goal is to keep the total amount of memory used per 1gb of space
@@ -1403,7 +1402,7 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1403 1402
1404 start = offset_to_bit(info->offset, ctl->unit, offset); 1403 start = offset_to_bit(info->offset, ctl->unit, offset);
1405 count = bytes_to_bits(bytes, ctl->unit); 1404 count = bytes_to_bits(bytes, ctl->unit);
1406 BUG_ON(start + count > BITS_PER_BITMAP); 1405 ASSERT(start + count <= BITS_PER_BITMAP);
1407 1406
1408 bitmap_clear(info->bitmap, start, count); 1407 bitmap_clear(info->bitmap, start, count);
1409 1408
@@ -1426,7 +1425,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1426 1425
1427 start = offset_to_bit(info->offset, ctl->unit, offset); 1426 start = offset_to_bit(info->offset, ctl->unit, offset);
1428 count = bytes_to_bits(bytes, ctl->unit); 1427 count = bytes_to_bits(bytes, ctl->unit);
1429 BUG_ON(start + count > BITS_PER_BITMAP); 1428 ASSERT(start + count <= BITS_PER_BITMAP);
1430 1429
1431 bitmap_set(info->bitmap, start, count); 1430 bitmap_set(info->bitmap, start, count);
1432 1431
@@ -1742,7 +1741,7 @@ no_cluster_bitmap:
1742 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1741 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1743 1, 0); 1742 1, 0);
1744 if (!bitmap_info) { 1743 if (!bitmap_info) {
1745 BUG_ON(added); 1744 ASSERT(added == 0);
1746 goto new_bitmap; 1745 goto new_bitmap;
1747 } 1746 }
1748 1747
@@ -1882,7 +1881,7 @@ out:
1882 1881
1883 if (ret) { 1882 if (ret) {
1884 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1883 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1885 BUG_ON(ret == -EEXIST); 1884 ASSERT(ret != -EEXIST);
1886 } 1885 }
1887 1886
1888 return ret; 1887 return ret;
@@ -1991,8 +1990,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1991 if (info->bytes >= bytes && !block_group->ro) 1990 if (info->bytes >= bytes && !block_group->ro)
1992 count++; 1991 count++;
1993 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 1992 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1994 (unsigned long long)info->offset, 1993 info->offset, info->bytes,
1995 (unsigned long long)info->bytes,
1996 (info->bitmap) ? "yes" : "no"); 1994 (info->bitmap) ? "yes" : "no");
1997 } 1995 }
1998 printk(KERN_INFO "block group has cluster?: %s\n", 1996 printk(KERN_INFO "block group has cluster?: %s\n",
@@ -2371,7 +2369,7 @@ again:
2371 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2369 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2372 ret = tree_insert_offset(&cluster->root, entry->offset, 2370 ret = tree_insert_offset(&cluster->root, entry->offset,
2373 &entry->offset_index, 1); 2371 &entry->offset_index, 1);
2374 BUG_ON(ret); /* -EEXIST; Logic error */ 2372 ASSERT(!ret); /* -EEXIST; Logic error */
2375 2373
2376 trace_btrfs_setup_cluster(block_group, cluster, 2374 trace_btrfs_setup_cluster(block_group, cluster,
2377 total_found * ctl->unit, 1); 2375 total_found * ctl->unit, 1);
@@ -2464,7 +2462,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2464 ret = tree_insert_offset(&cluster->root, entry->offset, 2462 ret = tree_insert_offset(&cluster->root, entry->offset,
2465 &entry->offset_index, 0); 2463 &entry->offset_index, 0);
2466 total_size += entry->bytes; 2464 total_size += entry->bytes;
2467 BUG_ON(ret); /* -EEXIST; Logic error */ 2465 ASSERT(!ret); /* -EEXIST; Logic error */
2468 } while (node && entry != last); 2466 } while (node && entry != last);
2469 2467
2470 cluster->max_size = max_extent; 2468 cluster->max_size = max_extent;
@@ -2525,8 +2523,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2525 * returns zero and sets up cluster if things worked out, otherwise 2523 * returns zero and sets up cluster if things worked out, otherwise
2526 * it returns -enospc 2524 * it returns -enospc
2527 */ 2525 */
2528int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2526int btrfs_find_space_cluster(struct btrfs_root *root,
2529 struct btrfs_root *root,
2530 struct btrfs_block_group_cache *block_group, 2527 struct btrfs_block_group_cache *block_group,
2531 struct btrfs_free_cluster *cluster, 2528 struct btrfs_free_cluster *cluster,
2532 u64 offset, u64 bytes, u64 empty_size) 2529 u64 offset, u64 bytes, u64 empty_size)
@@ -2856,7 +2853,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2856 2853
2857 ret = search_bitmap(ctl, entry, &offset, &count); 2854 ret = search_bitmap(ctl, entry, &offset, &count);
2858 /* Logic error; Should be empty if it can't find anything */ 2855 /* Logic error; Should be empty if it can't find anything */
2859 BUG_ON(ret); 2856 ASSERT(!ret);
2860 2857
2861 ino = offset; 2858 ino = offset;
2862 bitmap_clear_bits(ctl, entry, offset, 1); 2859 bitmap_clear_bits(ctl, entry, offset, 1);
@@ -2973,33 +2970,68 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
2973} 2970}
2974 2971
2975#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 2972#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
2976static struct btrfs_block_group_cache *init_test_block_group(void) 2973/*
2974 * Use this if you need to make a bitmap or extent entry specifically, it
2975 * doesn't do any of the merging that add_free_space does, this acts a lot like
2976 * how the free space cache loading stuff works, so you can get really weird
2977 * configurations.
2978 */
2979int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
2980 u64 offset, u64 bytes, bool bitmap)
2977{ 2981{
2978 struct btrfs_block_group_cache *cache; 2982 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
2983 struct btrfs_free_space *info = NULL, *bitmap_info;
2984 void *map = NULL;
2985 u64 bytes_added;
2986 int ret;
2979 2987
2980 cache = kzalloc(sizeof(*cache), GFP_NOFS); 2988again:
2981 if (!cache) 2989 if (!info) {
2982 return NULL; 2990 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
2983 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 2991 if (!info)
2984 GFP_NOFS); 2992 return -ENOMEM;
2985 if (!cache->free_space_ctl) {
2986 kfree(cache);
2987 return NULL;
2988 } 2993 }
2989 2994
2990 cache->key.objectid = 0; 2995 if (!bitmap) {
2991 cache->key.offset = 1024 * 1024 * 1024; 2996 spin_lock(&ctl->tree_lock);
2992 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 2997 info->offset = offset;
2993 cache->sectorsize = 4096; 2998 info->bytes = bytes;
2999 ret = link_free_space(ctl, info);
3000 spin_unlock(&ctl->tree_lock);
3001 if (ret)
3002 kmem_cache_free(btrfs_free_space_cachep, info);
3003 return ret;
3004 }
3005
3006 if (!map) {
3007 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
3008 if (!map) {
3009 kmem_cache_free(btrfs_free_space_cachep, info);
3010 return -ENOMEM;
3011 }
3012 }
3013
3014 spin_lock(&ctl->tree_lock);
3015 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3016 1, 0);
3017 if (!bitmap_info) {
3018 info->bitmap = map;
3019 map = NULL;
3020 add_new_bitmap(ctl, info, offset);
3021 bitmap_info = info;
3022 }
2994 3023
2995 spin_lock_init(&cache->lock); 3024 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
2996 INIT_LIST_HEAD(&cache->list); 3025 bytes -= bytes_added;
2997 INIT_LIST_HEAD(&cache->cluster_list); 3026 offset += bytes_added;
2998 INIT_LIST_HEAD(&cache->new_bg_list); 3027 spin_unlock(&ctl->tree_lock);
2999 3028
3000 btrfs_init_free_space_ctl(cache); 3029 if (bytes)
3030 goto again;
3001 3031
3002 return cache; 3032 if (map)
3033 kfree(map);
3034 return 0;
3003} 3035}
3004 3036
3005/* 3037/*
@@ -3007,8 +3039,8 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
3007 * just used to check the absence of space, so if there is free space in the 3039 * just used to check the absence of space, so if there is free space in the
3008 * range at all we will return 1. 3040 * range at all we will return 1.
3009 */ 3041 */
3010static int check_exists(struct btrfs_block_group_cache *cache, u64 offset, 3042int test_check_exists(struct btrfs_block_group_cache *cache,
3011 u64 bytes) 3043 u64 offset, u64 bytes)
3012{ 3044{
3013 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3045 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3014 struct btrfs_free_space *info; 3046 struct btrfs_free_space *info;
@@ -3085,411 +3117,4 @@ out:
3085 spin_unlock(&ctl->tree_lock); 3117 spin_unlock(&ctl->tree_lock);
3086 return ret; 3118 return ret;
3087} 3119}
3088 3120#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
3089/*
3090 * Use this if you need to make a bitmap or extent entry specifically, it
3091 * doesn't do any of the merging that add_free_space does, this acts a lot like
3092 * how the free space cache loading stuff works, so you can get really weird
3093 * configurations.
3094 */
3095static int add_free_space_entry(struct btrfs_block_group_cache *cache,
3096 u64 offset, u64 bytes, bool bitmap)
3097{
3098 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3099 struct btrfs_free_space *info = NULL, *bitmap_info;
3100 void *map = NULL;
3101 u64 bytes_added;
3102 int ret;
3103
3104again:
3105 if (!info) {
3106 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
3107 if (!info)
3108 return -ENOMEM;
3109 }
3110
3111 if (!bitmap) {
3112 spin_lock(&ctl->tree_lock);
3113 info->offset = offset;
3114 info->bytes = bytes;
3115 ret = link_free_space(ctl, info);
3116 spin_unlock(&ctl->tree_lock);
3117 if (ret)
3118 kmem_cache_free(btrfs_free_space_cachep, info);
3119 return ret;
3120 }
3121
3122 if (!map) {
3123 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
3124 if (!map) {
3125 kmem_cache_free(btrfs_free_space_cachep, info);
3126 return -ENOMEM;
3127 }
3128 }
3129
3130 spin_lock(&ctl->tree_lock);
3131 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3132 1, 0);
3133 if (!bitmap_info) {
3134 info->bitmap = map;
3135 map = NULL;
3136 add_new_bitmap(ctl, info, offset);
3137 bitmap_info = info;
3138 }
3139
3140 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
3141 bytes -= bytes_added;
3142 offset += bytes_added;
3143 spin_unlock(&ctl->tree_lock);
3144
3145 if (bytes)
3146 goto again;
3147
3148 if (map)
3149 kfree(map);
3150 return 0;
3151}
3152
3153#define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__)
3154
3155/*
3156 * This test just does basic sanity checking, making sure we can add an exten
3157 * entry and remove space from either end and the middle, and make sure we can
3158 * remove space that covers adjacent extent entries.
3159 */
3160static int test_extents(struct btrfs_block_group_cache *cache)
3161{
3162 int ret = 0;
3163
3164 test_msg("Running extent only tests\n");
3165
3166 /* First just make sure we can remove an entire entry */
3167 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
3168 if (ret) {
3169 test_msg("Error adding initial extents %d\n", ret);
3170 return ret;
3171 }
3172
3173 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
3174 if (ret) {
3175 test_msg("Error removing extent %d\n", ret);
3176 return ret;
3177 }
3178
3179 if (check_exists(cache, 0, 4 * 1024 * 1024)) {
3180 test_msg("Full remove left some lingering space\n");
3181 return -1;
3182 }
3183
3184 /* Ok edge and middle cases now */
3185 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
3186 if (ret) {
3187 test_msg("Error adding half extent %d\n", ret);
3188 return ret;
3189 }
3190
3191 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
3192 if (ret) {
3193 test_msg("Error removing tail end %d\n", ret);
3194 return ret;
3195 }
3196
3197 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
3198 if (ret) {
3199 test_msg("Error removing front end %d\n", ret);
3200 return ret;
3201 }
3202
3203 ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
3204 if (ret) {
3205 test_msg("Error removing middle piece %d\n", ret);
3206 return ret;
3207 }
3208
3209 if (check_exists(cache, 0, 1 * 1024 * 1024)) {
3210 test_msg("Still have space at the front\n");
3211 return -1;
3212 }
3213
3214 if (check_exists(cache, 2 * 1024 * 1024, 4096)) {
3215 test_msg("Still have space in the middle\n");
3216 return -1;
3217 }
3218
3219 if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
3220 test_msg("Still have space at the end\n");
3221 return -1;
3222 }
3223
3224 /* Cleanup */
3225 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3226
3227 return 0;
3228}
3229
3230static int test_bitmaps(struct btrfs_block_group_cache *cache)
3231{
3232 u64 next_bitmap_offset;
3233 int ret;
3234
3235 test_msg("Running bitmap only tests\n");
3236
3237 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
3238 if (ret) {
3239 test_msg("Couldn't create a bitmap entry %d\n", ret);
3240 return ret;
3241 }
3242
3243 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
3244 if (ret) {
3245 test_msg("Error removing bitmap full range %d\n", ret);
3246 return ret;
3247 }
3248
3249 if (check_exists(cache, 0, 4 * 1024 * 1024)) {
3250 test_msg("Left some space in bitmap\n");
3251 return -1;
3252 }
3253
3254 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
3255 if (ret) {
3256 test_msg("Couldn't add to our bitmap entry %d\n", ret);
3257 return ret;
3258 }
3259
3260 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
3261 if (ret) {
3262 test_msg("Couldn't remove middle chunk %d\n", ret);
3263 return ret;
3264 }
3265
3266 /*
3267 * The first bitmap we have starts at offset 0 so the next one is just
3268 * at the end of the first bitmap.
3269 */
3270 next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
3271
3272 /* Test a bit straddling two bitmaps */
3273 ret = add_free_space_entry(cache, next_bitmap_offset -
3274 (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
3275 if (ret) {
3276 test_msg("Couldn't add space that straddles two bitmaps %d\n",
3277 ret);
3278 return ret;
3279 }
3280
3281 ret = btrfs_remove_free_space(cache, next_bitmap_offset -
3282 (1 * 1024 * 1024), 2 * 1024 * 1024);
3283 if (ret) {
3284 test_msg("Couldn't remove overlapping space %d\n", ret);
3285 return ret;
3286 }
3287
3288 if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
3289 2 * 1024 * 1024)) {
3290 test_msg("Left some space when removing overlapping\n");
3291 return -1;
3292 }
3293
3294 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3295
3296 return 0;
3297}
3298
3299/* This is the high grade jackassery */
3300static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
3301{
3302 u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
3303 int ret;
3304
3305 test_msg("Running bitmap and extent tests\n");
3306
3307 /*
3308 * First let's do something simple, an extent at the same offset as the
3309 * bitmap, but the free space completely in the extent and then
3310 * completely in the bitmap.
3311 */
3312 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
3313 if (ret) {
3314 test_msg("Couldn't create bitmap entry %d\n", ret);
3315 return ret;
3316 }
3317
3318 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
3319 if (ret) {
3320 test_msg("Couldn't add extent entry %d\n", ret);
3321 return ret;
3322 }
3323
3324 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
3325 if (ret) {
3326 test_msg("Couldn't remove extent entry %d\n", ret);
3327 return ret;
3328 }
3329
3330 if (check_exists(cache, 0, 1 * 1024 * 1024)) {
3331 test_msg("Left remnants after our remove\n");
3332 return -1;
3333 }
3334
3335 /* Now to add back the extent entry and remove from the bitmap */
3336 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
3337 if (ret) {
3338 test_msg("Couldn't re-add extent entry %d\n", ret);
3339 return ret;
3340 }
3341
3342 ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
3343 if (ret) {
3344 test_msg("Couldn't remove from bitmap %d\n", ret);
3345 return ret;
3346 }
3347
3348 if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
3349 test_msg("Left remnants in the bitmap\n");
3350 return -1;
3351 }
3352
3353 /*
3354 * Ok so a little more evil, extent entry and bitmap at the same offset,
3355 * removing an overlapping chunk.
3356 */
3357 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
3358 if (ret) {
3359 test_msg("Couldn't add to a bitmap %d\n", ret);
3360 return ret;
3361 }
3362
3363 ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
3364 if (ret) {
3365 test_msg("Couldn't remove overlapping space %d\n", ret);
3366 return ret;
3367 }
3368
3369 if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
3370 test_msg("Left over peices after removing overlapping\n");
3371 return -1;
3372 }
3373
3374 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3375
3376 /* Now with the extent entry offset into the bitmap */
3377 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
3378 if (ret) {
3379 test_msg("Couldn't add space to the bitmap %d\n", ret);
3380 return ret;
3381 }
3382
3383 ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
3384 if (ret) {
3385 test_msg("Couldn't add extent to the cache %d\n", ret);
3386 return ret;
3387 }
3388
3389 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
3390 if (ret) {
3391 test_msg("Problem removing overlapping space %d\n", ret);
3392 return ret;
3393 }
3394
3395 if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
3396 test_msg("Left something behind when removing space");
3397 return -1;
3398 }
3399
3400 /*
3401 * This has blown up in the past, the extent entry starts before the
3402 * bitmap entry, but we're trying to remove an offset that falls
3403 * completely within the bitmap range and is in both the extent entry
3404 * and the bitmap entry, looks like this
3405 *
3406 * [ extent ]
3407 * [ bitmap ]
3408 * [ del ]
3409 */
3410 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3411 ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
3412 4 * 1024 * 1024, 1);
3413 if (ret) {
3414 test_msg("Couldn't add bitmap %d\n", ret);
3415 return ret;
3416 }
3417
3418 ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
3419 5 * 1024 * 1024, 0);
3420 if (ret) {
3421 test_msg("Couldn't add extent entry %d\n", ret);
3422 return ret;
3423 }
3424
3425 ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
3426 5 * 1024 * 1024);
3427 if (ret) {
3428 test_msg("Failed to free our space %d\n", ret);
3429 return ret;
3430 }
3431
3432 if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
3433 5 * 1024 * 1024)) {
3434 test_msg("Left stuff over\n");
3435 return -1;
3436 }
3437
3438 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3439
3440 /*
3441 * This blew up before, we have part of the free space in a bitmap and
3442 * then the entirety of the rest of the space in an extent. This used
3443 * to return -EAGAIN back from btrfs_remove_extent, make sure this
3444 * doesn't happen.
3445 */
3446 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
3447 if (ret) {
3448 test_msg("Couldn't add bitmap entry %d\n", ret);
3449 return ret;
3450 }
3451
3452 ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
3453 if (ret) {
3454 test_msg("Couldn't add extent entry %d\n", ret);
3455 return ret;
3456 }
3457
3458 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
3459 if (ret) {
3460 test_msg("Error removing bitmap and extent overlapping %d\n", ret);
3461 return ret;
3462 }
3463
3464 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3465 return 0;
3466}
3467
3468void btrfs_test_free_space_cache(void)
3469{
3470 struct btrfs_block_group_cache *cache;
3471
3472 test_msg("Running btrfs free space cache tests\n");
3473
3474 cache = init_test_block_group();
3475 if (!cache) {
3476 test_msg("Couldn't run the tests\n");
3477 return;
3478 }
3479
3480 if (test_extents(cache))
3481 goto out;
3482 if (test_bitmaps(cache))
3483 goto out;
3484 if (test_bitmaps_and_extents(cache))
3485 goto out;
3486out:
3487 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3488 kfree(cache->free_space_ctl);
3489 kfree(cache);
3490 test_msg("Free space cache tests finished\n");
3491}
3492#undef test_msg
3493#else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
3494void btrfs_test_free_space_cache(void) {}
3495#endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 894116b71304..c74904167476 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -98,8 +98,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
98u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); 98u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
99void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 99void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
100 u64 bytes); 100 u64 bytes);
101int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 101int btrfs_find_space_cluster(struct btrfs_root *root,
102 struct btrfs_root *root,
103 struct btrfs_block_group_cache *block_group, 102 struct btrfs_block_group_cache *block_group,
104 struct btrfs_free_cluster *cluster, 103 struct btrfs_free_cluster *cluster,
105 u64 offset, u64 bytes, u64 empty_size); 104 u64 offset, u64 bytes, u64 empty_size);
@@ -113,6 +112,12 @@ int btrfs_return_cluster_to_free_space(
113int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 112int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
114 u64 *trimmed, u64 start, u64 end, u64 minlen); 113 u64 *trimmed, u64 start, u64 end, u64 minlen);
115 114
116void btrfs_test_free_space_cache(void); 115/* Support functions for runnint our sanity tests */
116#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
117int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
118 u64 offset, u64 bytes, bool bitmap);
119int test_check_exists(struct btrfs_block_group_cache *cache,
120 u64 offset, u64 bytes);
121#endif
117 122
118#endif 123#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bdc83d04d54..db1e43948579 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -230,12 +230,13 @@ fail:
230 * does the checks required to make sure the data is small enough 230 * does the checks required to make sure the data is small enough
231 * to fit as an inline extent. 231 * to fit as an inline extent.
232 */ 232 */
233static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, 233static noinline int cow_file_range_inline(struct btrfs_root *root,
234 struct btrfs_root *root, 234 struct inode *inode, u64 start,
235 struct inode *inode, u64 start, u64 end, 235 u64 end, size_t compressed_size,
236 size_t compressed_size, int compress_type, 236 int compress_type,
237 struct page **compressed_pages) 237 struct page **compressed_pages)
238{ 238{
239 struct btrfs_trans_handle *trans;
239 u64 isize = i_size_read(inode); 240 u64 isize = i_size_read(inode);
240 u64 actual_end = min(end + 1, isize); 241 u64 actual_end = min(end + 1, isize);
241 u64 inline_len = actual_end - start; 242 u64 inline_len = actual_end - start;
@@ -256,9 +257,16 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
256 return 1; 257 return 1;
257 } 258 }
258 259
260 trans = btrfs_join_transaction(root);
261 if (IS_ERR(trans))
262 return PTR_ERR(trans);
263 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
264
259 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); 265 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
260 if (ret) 266 if (ret) {
261 return ret; 267 btrfs_abort_transaction(trans, root, ret);
268 goto out;
269 }
262 270
263 if (isize > actual_end) 271 if (isize > actual_end)
264 inline_len = min_t(u64, isize, actual_end); 272 inline_len = min_t(u64, isize, actual_end);
@@ -267,15 +275,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
267 compress_type, compressed_pages); 275 compress_type, compressed_pages);
268 if (ret && ret != -ENOSPC) { 276 if (ret && ret != -ENOSPC) {
269 btrfs_abort_transaction(trans, root, ret); 277 btrfs_abort_transaction(trans, root, ret);
270 return ret; 278 goto out;
271 } else if (ret == -ENOSPC) { 279 } else if (ret == -ENOSPC) {
272 return 1; 280 ret = 1;
281 goto out;
273 } 282 }
274 283
275 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); 284 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
276 btrfs_delalloc_release_metadata(inode, end + 1 - start); 285 btrfs_delalloc_release_metadata(inode, end + 1 - start);
277 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 286 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
278 return 0; 287out:
288 btrfs_end_transaction(trans, root);
289 return ret;
279} 290}
280 291
281struct async_extent { 292struct async_extent {
@@ -343,7 +354,6 @@ static noinline int compress_file_range(struct inode *inode,
343 int *num_added) 354 int *num_added)
344{ 355{
345 struct btrfs_root *root = BTRFS_I(inode)->root; 356 struct btrfs_root *root = BTRFS_I(inode)->root;
346 struct btrfs_trans_handle *trans;
347 u64 num_bytes; 357 u64 num_bytes;
348 u64 blocksize = root->sectorsize; 358 u64 blocksize = root->sectorsize;
349 u64 actual_end; 359 u64 actual_end;
@@ -461,45 +471,36 @@ again:
461 } 471 }
462cont: 472cont:
463 if (start == 0) { 473 if (start == 0) {
464 trans = btrfs_join_transaction(root);
465 if (IS_ERR(trans)) {
466 ret = PTR_ERR(trans);
467 trans = NULL;
468 goto cleanup_and_out;
469 }
470 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
471
472 /* lets try to make an inline extent */ 474 /* lets try to make an inline extent */
473 if (ret || total_in < (actual_end - start)) { 475 if (ret || total_in < (actual_end - start)) {
474 /* we didn't compress the entire range, try 476 /* we didn't compress the entire range, try
475 * to make an uncompressed inline extent. 477 * to make an uncompressed inline extent.
476 */ 478 */
477 ret = cow_file_range_inline(trans, root, inode, 479 ret = cow_file_range_inline(root, inode, start, end,
478 start, end, 0, 0, NULL); 480 0, 0, NULL);
479 } else { 481 } else {
480 /* try making a compressed inline extent */ 482 /* try making a compressed inline extent */
481 ret = cow_file_range_inline(trans, root, inode, 483 ret = cow_file_range_inline(root, inode, start, end,
482 start, end,
483 total_compressed, 484 total_compressed,
484 compress_type, pages); 485 compress_type, pages);
485 } 486 }
486 if (ret <= 0) { 487 if (ret <= 0) {
488 unsigned long clear_flags = EXTENT_DELALLOC |
489 EXTENT_DEFRAG;
490 clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
491
487 /* 492 /*
488 * inline extent creation worked or returned error, 493 * inline extent creation worked or returned error,
489 * we don't need to create any more async work items. 494 * we don't need to create any more async work items.
490 * Unlock and free up our temp pages. 495 * Unlock and free up our temp pages.
491 */ 496 */
492 extent_clear_unlock_delalloc(inode, 497 extent_clear_unlock_delalloc(inode, start, end, NULL,
493 &BTRFS_I(inode)->io_tree, 498 clear_flags, PAGE_UNLOCK |
494 start, end, NULL, 499 PAGE_CLEAR_DIRTY |
495 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 500 PAGE_SET_WRITEBACK |
496 EXTENT_CLEAR_DELALLOC | 501 PAGE_END_WRITEBACK);
497 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
498
499 btrfs_end_transaction(trans, root);
500 goto free_pages_out; 502 goto free_pages_out;
501 } 503 }
502 btrfs_end_transaction(trans, root);
503 } 504 }
504 505
505 if (will_compress) { 506 if (will_compress) {
@@ -590,20 +591,6 @@ free_pages_out:
590 kfree(pages); 591 kfree(pages);
591 592
592 goto out; 593 goto out;
593
594cleanup_and_out:
595 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
596 start, end, NULL,
597 EXTENT_CLEAR_UNLOCK_PAGE |
598 EXTENT_CLEAR_DIRTY |
599 EXTENT_CLEAR_DELALLOC |
600 EXTENT_SET_WRITEBACK |
601 EXTENT_END_WRITEBACK);
602 if (!trans || IS_ERR(trans))
603 btrfs_error(root->fs_info, ret, "Failed to join transaction");
604 else
605 btrfs_abort_transaction(trans, root, ret);
606 goto free_pages_out;
607} 594}
608 595
609/* 596/*
@@ -617,7 +604,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
617{ 604{
618 struct async_extent *async_extent; 605 struct async_extent *async_extent;
619 u64 alloc_hint = 0; 606 u64 alloc_hint = 0;
620 struct btrfs_trans_handle *trans;
621 struct btrfs_key ins; 607 struct btrfs_key ins;
622 struct extent_map *em; 608 struct extent_map *em;
623 struct btrfs_root *root = BTRFS_I(inode)->root; 609 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -678,20 +664,10 @@ retry:
678 lock_extent(io_tree, async_extent->start, 664 lock_extent(io_tree, async_extent->start,
679 async_extent->start + async_extent->ram_size - 1); 665 async_extent->start + async_extent->ram_size - 1);
680 666
681 trans = btrfs_join_transaction(root); 667 ret = btrfs_reserve_extent(root,
682 if (IS_ERR(trans)) {
683 ret = PTR_ERR(trans);
684 } else {
685 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
686 ret = btrfs_reserve_extent(trans, root,
687 async_extent->compressed_size, 668 async_extent->compressed_size,
688 async_extent->compressed_size, 669 async_extent->compressed_size,
689 0, alloc_hint, &ins, 1); 670 0, alloc_hint, &ins, 1);
690 if (ret && ret != -ENOSPC)
691 btrfs_abort_transaction(trans, root, ret);
692 btrfs_end_transaction(trans, root);
693 }
694
695 if (ret) { 671 if (ret) {
696 int i; 672 int i;
697 673
@@ -770,16 +746,12 @@ retry:
770 /* 746 /*
771 * clear dirty, set writeback and unlock the pages. 747 * clear dirty, set writeback and unlock the pages.
772 */ 748 */
773 extent_clear_unlock_delalloc(inode, 749 extent_clear_unlock_delalloc(inode, async_extent->start,
774 &BTRFS_I(inode)->io_tree,
775 async_extent->start,
776 async_extent->start + 750 async_extent->start +
777 async_extent->ram_size - 1, 751 async_extent->ram_size - 1,
778 NULL, EXTENT_CLEAR_UNLOCK_PAGE | 752 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
779 EXTENT_CLEAR_UNLOCK | 753 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
780 EXTENT_CLEAR_DELALLOC | 754 PAGE_SET_WRITEBACK);
781 EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
782
783 ret = btrfs_submit_compressed_write(inode, 755 ret = btrfs_submit_compressed_write(inode,
784 async_extent->start, 756 async_extent->start,
785 async_extent->ram_size, 757 async_extent->ram_size,
@@ -798,16 +770,13 @@ out:
798out_free_reserve: 770out_free_reserve:
799 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 771 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
800out_free: 772out_free:
801 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 773 extent_clear_unlock_delalloc(inode, async_extent->start,
802 async_extent->start,
803 async_extent->start + 774 async_extent->start +
804 async_extent->ram_size - 1, 775 async_extent->ram_size - 1,
805 NULL, EXTENT_CLEAR_UNLOCK_PAGE | 776 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
806 EXTENT_CLEAR_UNLOCK | 777 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
807 EXTENT_CLEAR_DELALLOC | 778 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
808 EXTENT_CLEAR_DIRTY | 779 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
809 EXTENT_SET_WRITEBACK |
810 EXTENT_END_WRITEBACK);
811 kfree(async_extent); 780 kfree(async_extent);
812 goto again; 781 goto again;
813} 782}
@@ -857,14 +826,13 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
857 * required to start IO on it. It may be clean and already done with 826 * required to start IO on it. It may be clean and already done with
858 * IO when we return. 827 * IO when we return.
859 */ 828 */
860static noinline int __cow_file_range(struct btrfs_trans_handle *trans, 829static noinline int cow_file_range(struct inode *inode,
861 struct inode *inode, 830 struct page *locked_page,
862 struct btrfs_root *root, 831 u64 start, u64 end, int *page_started,
863 struct page *locked_page, 832 unsigned long *nr_written,
864 u64 start, u64 end, int *page_started, 833 int unlock)
865 unsigned long *nr_written,
866 int unlock)
867{ 834{
835 struct btrfs_root *root = BTRFS_I(inode)->root;
868 u64 alloc_hint = 0; 836 u64 alloc_hint = 0;
869 u64 num_bytes; 837 u64 num_bytes;
870 unsigned long ram_size; 838 unsigned long ram_size;
@@ -885,29 +853,24 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
885 /* if this is a small write inside eof, kick off defrag */ 853 /* if this is a small write inside eof, kick off defrag */
886 if (num_bytes < 64 * 1024 && 854 if (num_bytes < 64 * 1024 &&
887 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) 855 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
888 btrfs_add_inode_defrag(trans, inode); 856 btrfs_add_inode_defrag(NULL, inode);
889 857
890 if (start == 0) { 858 if (start == 0) {
891 /* lets try to make an inline extent */ 859 /* lets try to make an inline extent */
892 ret = cow_file_range_inline(trans, root, inode, 860 ret = cow_file_range_inline(root, inode, start, end, 0, 0,
893 start, end, 0, 0, NULL); 861 NULL);
894 if (ret == 0) { 862 if (ret == 0) {
895 extent_clear_unlock_delalloc(inode, 863 extent_clear_unlock_delalloc(inode, start, end, NULL,
896 &BTRFS_I(inode)->io_tree, 864 EXTENT_LOCKED | EXTENT_DELALLOC |
897 start, end, NULL, 865 EXTENT_DEFRAG, PAGE_UNLOCK |
898 EXTENT_CLEAR_UNLOCK_PAGE | 866 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
899 EXTENT_CLEAR_UNLOCK | 867 PAGE_END_WRITEBACK);
900 EXTENT_CLEAR_DELALLOC |
901 EXTENT_CLEAR_DIRTY |
902 EXTENT_SET_WRITEBACK |
903 EXTENT_END_WRITEBACK);
904 868
905 *nr_written = *nr_written + 869 *nr_written = *nr_written +
906 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 870 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
907 *page_started = 1; 871 *page_started = 1;
908 goto out; 872 goto out;
909 } else if (ret < 0) { 873 } else if (ret < 0) {
910 btrfs_abort_transaction(trans, root, ret);
911 goto out_unlock; 874 goto out_unlock;
912 } 875 }
913 } 876 }
@@ -922,13 +885,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
922 unsigned long op; 885 unsigned long op;
923 886
924 cur_alloc_size = disk_num_bytes; 887 cur_alloc_size = disk_num_bytes;
925 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 888 ret = btrfs_reserve_extent(root, cur_alloc_size,
926 root->sectorsize, 0, alloc_hint, 889 root->sectorsize, 0, alloc_hint,
927 &ins, 1); 890 &ins, 1);
928 if (ret < 0) { 891 if (ret < 0)
929 btrfs_abort_transaction(trans, root, ret);
930 goto out_unlock; 892 goto out_unlock;
931 }
932 893
933 em = alloc_extent_map(); 894 em = alloc_extent_map();
934 if (!em) { 895 if (!em) {
@@ -974,10 +935,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
974 BTRFS_DATA_RELOC_TREE_OBJECTID) { 935 BTRFS_DATA_RELOC_TREE_OBJECTID) {
975 ret = btrfs_reloc_clone_csums(inode, start, 936 ret = btrfs_reloc_clone_csums(inode, start,
976 cur_alloc_size); 937 cur_alloc_size);
977 if (ret) { 938 if (ret)
978 btrfs_abort_transaction(trans, root, ret);
979 goto out_reserve; 939 goto out_reserve;
980 }
981 } 940 }
982 941
983 if (disk_num_bytes < cur_alloc_size) 942 if (disk_num_bytes < cur_alloc_size)
@@ -990,13 +949,13 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
990 * Do set the Private2 bit so we know this page was properly 949 * Do set the Private2 bit so we know this page was properly
991 * setup for writepage 950 * setup for writepage
992 */ 951 */
993 op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; 952 op = unlock ? PAGE_UNLOCK : 0;
994 op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | 953 op |= PAGE_SET_PRIVATE2;
995 EXTENT_SET_PRIVATE2;
996 954
997 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 955 extent_clear_unlock_delalloc(inode, start,
998 start, start + ram_size - 1, 956 start + ram_size - 1, locked_page,
999 locked_page, op); 957 EXTENT_LOCKED | EXTENT_DELALLOC,
958 op);
1000 disk_num_bytes -= cur_alloc_size; 959 disk_num_bytes -= cur_alloc_size;
1001 num_bytes -= cur_alloc_size; 960 num_bytes -= cur_alloc_size;
1002 alloc_hint = ins.objectid + ins.offset; 961 alloc_hint = ins.objectid + ins.offset;
@@ -1008,52 +967,14 @@ out:
1008out_reserve: 967out_reserve:
1009 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 968 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
1010out_unlock: 969out_unlock:
1011 extent_clear_unlock_delalloc(inode, 970 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1012 &BTRFS_I(inode)->io_tree, 971 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
1013 start, end, locked_page, 972 EXTENT_DELALLOC | EXTENT_DEFRAG,
1014 EXTENT_CLEAR_UNLOCK_PAGE | 973 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1015 EXTENT_CLEAR_UNLOCK | 974 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
1016 EXTENT_CLEAR_DELALLOC |
1017 EXTENT_CLEAR_DIRTY |
1018 EXTENT_SET_WRITEBACK |
1019 EXTENT_END_WRITEBACK);
1020
1021 goto out; 975 goto out;
1022} 976}
1023 977
1024static noinline int cow_file_range(struct inode *inode,
1025 struct page *locked_page,
1026 u64 start, u64 end, int *page_started,
1027 unsigned long *nr_written,
1028 int unlock)
1029{
1030 struct btrfs_trans_handle *trans;
1031 struct btrfs_root *root = BTRFS_I(inode)->root;
1032 int ret;
1033
1034 trans = btrfs_join_transaction(root);
1035 if (IS_ERR(trans)) {
1036 extent_clear_unlock_delalloc(inode,
1037 &BTRFS_I(inode)->io_tree,
1038 start, end, locked_page,
1039 EXTENT_CLEAR_UNLOCK_PAGE |
1040 EXTENT_CLEAR_UNLOCK |
1041 EXTENT_CLEAR_DELALLOC |
1042 EXTENT_CLEAR_DIRTY |
1043 EXTENT_SET_WRITEBACK |
1044 EXTENT_END_WRITEBACK);
1045 return PTR_ERR(trans);
1046 }
1047 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1048
1049 ret = __cow_file_range(trans, inode, root, locked_page, start, end,
1050 page_started, nr_written, unlock);
1051
1052 btrfs_end_transaction(trans, root);
1053
1054 return ret;
1055}
1056
1057/* 978/*
1058 * work queue call back to started compression on a file and pages 979 * work queue call back to started compression on a file and pages
1059 */ 980 */
@@ -1221,15 +1142,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1221 1142
1222 path = btrfs_alloc_path(); 1143 path = btrfs_alloc_path();
1223 if (!path) { 1144 if (!path) {
1224 extent_clear_unlock_delalloc(inode, 1145 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1225 &BTRFS_I(inode)->io_tree, 1146 EXTENT_LOCKED | EXTENT_DELALLOC |
1226 start, end, locked_page, 1147 EXTENT_DO_ACCOUNTING |
1227 EXTENT_CLEAR_UNLOCK_PAGE | 1148 EXTENT_DEFRAG, PAGE_UNLOCK |
1228 EXTENT_CLEAR_UNLOCK | 1149 PAGE_CLEAR_DIRTY |
1229 EXTENT_CLEAR_DELALLOC | 1150 PAGE_SET_WRITEBACK |
1230 EXTENT_CLEAR_DIRTY | 1151 PAGE_END_WRITEBACK);
1231 EXTENT_SET_WRITEBACK |
1232 EXTENT_END_WRITEBACK);
1233 return -ENOMEM; 1152 return -ENOMEM;
1234 } 1153 }
1235 1154
@@ -1241,15 +1160,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1241 trans = btrfs_join_transaction(root); 1160 trans = btrfs_join_transaction(root);
1242 1161
1243 if (IS_ERR(trans)) { 1162 if (IS_ERR(trans)) {
1244 extent_clear_unlock_delalloc(inode, 1163 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1245 &BTRFS_I(inode)->io_tree, 1164 EXTENT_LOCKED | EXTENT_DELALLOC |
1246 start, end, locked_page, 1165 EXTENT_DO_ACCOUNTING |
1247 EXTENT_CLEAR_UNLOCK_PAGE | 1166 EXTENT_DEFRAG, PAGE_UNLOCK |
1248 EXTENT_CLEAR_UNLOCK | 1167 PAGE_CLEAR_DIRTY |
1249 EXTENT_CLEAR_DELALLOC | 1168 PAGE_SET_WRITEBACK |
1250 EXTENT_CLEAR_DIRTY | 1169 PAGE_END_WRITEBACK);
1251 EXTENT_SET_WRITEBACK |
1252 EXTENT_END_WRITEBACK);
1253 btrfs_free_path(path); 1170 btrfs_free_path(path);
1254 return PTR_ERR(trans); 1171 return PTR_ERR(trans);
1255 } 1172 }
@@ -1369,9 +1286,9 @@ out_check:
1369 1286
1370 btrfs_release_path(path); 1287 btrfs_release_path(path);
1371 if (cow_start != (u64)-1) { 1288 if (cow_start != (u64)-1) {
1372 ret = __cow_file_range(trans, inode, root, locked_page, 1289 ret = cow_file_range(inode, locked_page,
1373 cow_start, found_key.offset - 1, 1290 cow_start, found_key.offset - 1,
1374 page_started, nr_written, 1); 1291 page_started, nr_written, 1);
1375 if (ret) { 1292 if (ret) {
1376 btrfs_abort_transaction(trans, root, ret); 1293 btrfs_abort_transaction(trans, root, ret);
1377 goto error; 1294 goto error;
@@ -1428,11 +1345,11 @@ out_check:
1428 } 1345 }
1429 } 1346 }
1430 1347
1431 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1348 extent_clear_unlock_delalloc(inode, cur_offset,
1432 cur_offset, cur_offset + num_bytes - 1, 1349 cur_offset + num_bytes - 1,
1433 locked_page, EXTENT_CLEAR_UNLOCK_PAGE | 1350 locked_page, EXTENT_LOCKED |
1434 EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | 1351 EXTENT_DELALLOC, PAGE_UNLOCK |
1435 EXTENT_SET_PRIVATE2); 1352 PAGE_SET_PRIVATE2);
1436 cur_offset = extent_end; 1353 cur_offset = extent_end;
1437 if (cur_offset > end) 1354 if (cur_offset > end)
1438 break; 1355 break;
@@ -1445,9 +1362,8 @@ out_check:
1445 } 1362 }
1446 1363
1447 if (cow_start != (u64)-1) { 1364 if (cow_start != (u64)-1) {
1448 ret = __cow_file_range(trans, inode, root, locked_page, 1365 ret = cow_file_range(inode, locked_page, cow_start, end,
1449 cow_start, end, 1366 page_started, nr_written, 1);
1450 page_started, nr_written, 1);
1451 if (ret) { 1367 if (ret) {
1452 btrfs_abort_transaction(trans, root, ret); 1368 btrfs_abort_transaction(trans, root, ret);
1453 goto error; 1369 goto error;
@@ -1460,16 +1376,13 @@ error:
1460 ret = err; 1376 ret = err;
1461 1377
1462 if (ret && cur_offset < end) 1378 if (ret && cur_offset < end)
1463 extent_clear_unlock_delalloc(inode, 1379 extent_clear_unlock_delalloc(inode, cur_offset, end,
1464 &BTRFS_I(inode)->io_tree, 1380 locked_page, EXTENT_LOCKED |
1465 cur_offset, end, locked_page, 1381 EXTENT_DELALLOC | EXTENT_DEFRAG |
1466 EXTENT_CLEAR_UNLOCK_PAGE | 1382 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1467 EXTENT_CLEAR_UNLOCK | 1383 PAGE_CLEAR_DIRTY |
1468 EXTENT_CLEAR_DELALLOC | 1384 PAGE_SET_WRITEBACK |
1469 EXTENT_CLEAR_DIRTY | 1385 PAGE_END_WRITEBACK);
1470 EXTENT_SET_WRITEBACK |
1471 EXTENT_END_WRITEBACK);
1472
1473 btrfs_free_path(path); 1386 btrfs_free_path(path);
1474 return ret; 1387 return ret;
1475} 1388}
@@ -2132,6 +2045,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2132 WARN_ON(1); 2045 WARN_ON(1);
2133 return ret; 2046 return ret;
2134 } 2047 }
2048 ret = 0;
2135 2049
2136 while (1) { 2050 while (1) {
2137 cond_resched(); 2051 cond_resched();
@@ -2181,8 +2095,6 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2181 old->len || extent_offset + num_bytes <= 2095 old->len || extent_offset + num_bytes <=
2182 old->extent_offset + old->offset) 2096 old->extent_offset + old->offset)
2183 continue; 2097 continue;
2184
2185 ret = 0;
2186 break; 2098 break;
2187 } 2099 }
2188 2100
@@ -2238,16 +2150,18 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
2238 2150
2239static int relink_is_mergable(struct extent_buffer *leaf, 2151static int relink_is_mergable(struct extent_buffer *leaf,
2240 struct btrfs_file_extent_item *fi, 2152 struct btrfs_file_extent_item *fi,
2241 u64 disk_bytenr) 2153 struct new_sa_defrag_extent *new)
2242{ 2154{
2243 if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr) 2155 if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
2244 return 0; 2156 return 0;
2245 2157
2246 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG) 2158 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2247 return 0; 2159 return 0;
2248 2160
2249 if (btrfs_file_extent_compression(leaf, fi) || 2161 if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
2250 btrfs_file_extent_encryption(leaf, fi) || 2162 return 0;
2163
2164 if (btrfs_file_extent_encryption(leaf, fi) ||
2251 btrfs_file_extent_other_encoding(leaf, fi)) 2165 btrfs_file_extent_other_encoding(leaf, fi))
2252 return 0; 2166 return 0;
2253 2167
@@ -2391,8 +2305,8 @@ again:
2391 struct btrfs_file_extent_item); 2305 struct btrfs_file_extent_item);
2392 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 2306 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2393 2307
2394 if (relink_is_mergable(leaf, fi, new->bytenr) && 2308 if (extent_len + found_key.offset == start &&
2395 extent_len + found_key.offset == start) { 2309 relink_is_mergable(leaf, fi, new)) {
2396 btrfs_set_file_extent_num_bytes(leaf, fi, 2310 btrfs_set_file_extent_num_bytes(leaf, fi,
2397 extent_len + len); 2311 extent_len + len);
2398 btrfs_mark_buffer_dirty(leaf); 2312 btrfs_mark_buffer_dirty(leaf);
@@ -2648,8 +2562,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2648 struct extent_state *cached_state = NULL; 2562 struct extent_state *cached_state = NULL;
2649 struct new_sa_defrag_extent *new = NULL; 2563 struct new_sa_defrag_extent *new = NULL;
2650 int compress_type = 0; 2564 int compress_type = 0;
2651 int ret; 2565 int ret = 0;
2566 u64 logical_len = ordered_extent->len;
2652 bool nolock; 2567 bool nolock;
2568 bool truncated = false;
2653 2569
2654 nolock = btrfs_is_free_space_inode(inode); 2570 nolock = btrfs_is_free_space_inode(inode);
2655 2571
@@ -2658,6 +2574,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2658 goto out; 2574 goto out;
2659 } 2575 }
2660 2576
2577 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
2578 truncated = true;
2579 logical_len = ordered_extent->truncated_len;
2580 /* Truncated the entire extent, don't bother adding */
2581 if (!logical_len)
2582 goto out;
2583 }
2584
2661 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 2585 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2662 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ 2586 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
2663 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 2587 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -2713,15 +2637,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2713 ret = btrfs_mark_extent_written(trans, inode, 2637 ret = btrfs_mark_extent_written(trans, inode,
2714 ordered_extent->file_offset, 2638 ordered_extent->file_offset,
2715 ordered_extent->file_offset + 2639 ordered_extent->file_offset +
2716 ordered_extent->len); 2640 logical_len);
2717 } else { 2641 } else {
2718 BUG_ON(root == root->fs_info->tree_root); 2642 BUG_ON(root == root->fs_info->tree_root);
2719 ret = insert_reserved_file_extent(trans, inode, 2643 ret = insert_reserved_file_extent(trans, inode,
2720 ordered_extent->file_offset, 2644 ordered_extent->file_offset,
2721 ordered_extent->start, 2645 ordered_extent->start,
2722 ordered_extent->disk_len, 2646 ordered_extent->disk_len,
2723 ordered_extent->len, 2647 logical_len, logical_len,
2724 ordered_extent->len,
2725 compress_type, 0, 0, 2648 compress_type, 0, 0,
2726 BTRFS_FILE_EXTENT_REG); 2649 BTRFS_FILE_EXTENT_REG);
2727 } 2650 }
@@ -2753,17 +2676,27 @@ out:
2753 if (trans) 2676 if (trans)
2754 btrfs_end_transaction(trans, root); 2677 btrfs_end_transaction(trans, root);
2755 2678
2756 if (ret) { 2679 if (ret || truncated) {
2757 clear_extent_uptodate(io_tree, ordered_extent->file_offset, 2680 u64 start, end;
2758 ordered_extent->file_offset + 2681
2759 ordered_extent->len - 1, NULL, GFP_NOFS); 2682 if (truncated)
2683 start = ordered_extent->file_offset + logical_len;
2684 else
2685 start = ordered_extent->file_offset;
2686 end = ordered_extent->file_offset + ordered_extent->len - 1;
2687 clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
2688
2689 /* Drop the cache for the part of the extent we didn't write. */
2690 btrfs_drop_extent_cache(inode, start, end, 0);
2760 2691
2761 /* 2692 /*
2762 * If the ordered extent had an IOERR or something else went 2693 * If the ordered extent had an IOERR or something else went
2763 * wrong we need to return the space for this ordered extent 2694 * wrong we need to return the space for this ordered extent
2764 * back to the allocator. 2695 * back to the allocator. We only free the extent in the
2696 * truncated case if we didn't write out the extent at all.
2765 */ 2697 */
2766 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && 2698 if ((ret || !logical_len) &&
2699 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2767 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) 2700 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2768 btrfs_free_reserved_extent(root, ordered_extent->start, 2701 btrfs_free_reserved_extent(root, ordered_extent->start,
2769 ordered_extent->disk_len); 2702 ordered_extent->disk_len);
@@ -2827,16 +2760,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2827 * if there's a match, we allow the bio to finish. If not, the code in 2760 * if there's a match, we allow the bio to finish. If not, the code in
2828 * extent_io.c will try to find good copies for us. 2761 * extent_io.c will try to find good copies for us.
2829 */ 2762 */
2830static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, 2763static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
2831 struct extent_state *state, int mirror) 2764 u64 phy_offset, struct page *page,
2765 u64 start, u64 end, int mirror)
2832{ 2766{
2833 size_t offset = start - page_offset(page); 2767 size_t offset = start - page_offset(page);
2834 struct inode *inode = page->mapping->host; 2768 struct inode *inode = page->mapping->host;
2835 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2769 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2836 char *kaddr; 2770 char *kaddr;
2837 u64 private = ~(u32)0;
2838 int ret;
2839 struct btrfs_root *root = BTRFS_I(inode)->root; 2771 struct btrfs_root *root = BTRFS_I(inode)->root;
2772 u32 csum_expected;
2840 u32 csum = ~(u32)0; 2773 u32 csum = ~(u32)0;
2841 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 2774 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
2842 DEFAULT_RATELIMIT_BURST); 2775 DEFAULT_RATELIMIT_BURST);
@@ -2856,19 +2789,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2856 return 0; 2789 return 0;
2857 } 2790 }
2858 2791
2859 if (state && state->start == start) { 2792 phy_offset >>= inode->i_sb->s_blocksize_bits;
2860 private = state->private; 2793 csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
2861 ret = 0;
2862 } else {
2863 ret = get_state_private(io_tree, start, &private);
2864 }
2865 kaddr = kmap_atomic(page);
2866 if (ret)
2867 goto zeroit;
2868 2794
2795 kaddr = kmap_atomic(page);
2869 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1); 2796 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1);
2870 btrfs_csum_final(csum, (char *)&csum); 2797 btrfs_csum_final(csum, (char *)&csum);
2871 if (csum != private) 2798 if (csum != csum_expected)
2872 goto zeroit; 2799 goto zeroit;
2873 2800
2874 kunmap_atomic(kaddr); 2801 kunmap_atomic(kaddr);
@@ -2877,14 +2804,12 @@ good:
2877 2804
2878zeroit: 2805zeroit:
2879 if (__ratelimit(&_rs)) 2806 if (__ratelimit(&_rs))
2880 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu", 2807 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
2881 (unsigned long long)btrfs_ino(page->mapping->host), 2808 btrfs_ino(page->mapping->host), start, csum, csum_expected);
2882 (unsigned long long)start, csum,
2883 (unsigned long long)private);
2884 memset(kaddr + offset, 1, end - start + 1); 2809 memset(kaddr + offset, 1, end - start + 1);
2885 flush_dcache_page(page); 2810 flush_dcache_page(page);
2886 kunmap_atomic(kaddr); 2811 kunmap_atomic(kaddr);
2887 if (private == 0) 2812 if (csum_expected == 0)
2888 return 0; 2813 return 0;
2889 return -EIO; 2814 return -EIO;
2890} 2815}
@@ -2971,8 +2896,10 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2971 btrfs_root_refs(&root->root_item) > 0) { 2896 btrfs_root_refs(&root->root_item) > 0) {
2972 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, 2897 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
2973 root->root_key.objectid); 2898 root->root_key.objectid);
2974 BUG_ON(ret); 2899 if (ret)
2975 root->orphan_item_inserted = 0; 2900 btrfs_abort_transaction(trans, root, ret);
2901 else
2902 root->orphan_item_inserted = 0;
2976 } 2903 }
2977 2904
2978 if (block_rsv) { 2905 if (block_rsv) {
@@ -3041,11 +2968,18 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
3041 /* insert an orphan item to track this unlinked/truncated file */ 2968 /* insert an orphan item to track this unlinked/truncated file */
3042 if (insert >= 1) { 2969 if (insert >= 1) {
3043 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2970 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
3044 if (ret && ret != -EEXIST) { 2971 if (ret) {
3045 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 2972 if (reserve) {
3046 &BTRFS_I(inode)->runtime_flags); 2973 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3047 btrfs_abort_transaction(trans, root, ret); 2974 &BTRFS_I(inode)->runtime_flags);
3048 return ret; 2975 btrfs_orphan_release_metadata(inode);
2976 }
2977 if (ret != -EEXIST) {
2978 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2979 &BTRFS_I(inode)->runtime_flags);
2980 btrfs_abort_transaction(trans, root, ret);
2981 return ret;
2982 }
3049 } 2983 }
3050 ret = 0; 2984 ret = 0;
3051 } 2985 }
@@ -3084,17 +3018,15 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3084 release_rsv = 1; 3018 release_rsv = 1;
3085 spin_unlock(&root->orphan_lock); 3019 spin_unlock(&root->orphan_lock);
3086 3020
3087 if (trans && delete_item) { 3021 if (trans && delete_item)
3088 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); 3022 ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
3089 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
3090 }
3091 3023
3092 if (release_rsv) { 3024 if (release_rsv) {
3093 btrfs_orphan_release_metadata(inode); 3025 btrfs_orphan_release_metadata(inode);
3094 atomic_dec(&root->orphan_inodes); 3026 atomic_dec(&root->orphan_inodes);
3095 } 3027 }
3096 3028
3097 return 0; 3029 return ret;
3098} 3030}
3099 3031
3100/* 3032/*
@@ -3224,8 +3156,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3224 found_key.objectid); 3156 found_key.objectid);
3225 ret = btrfs_del_orphan_item(trans, root, 3157 ret = btrfs_del_orphan_item(trans, root,
3226 found_key.objectid); 3158 found_key.objectid);
3227 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
3228 btrfs_end_transaction(trans, root); 3159 btrfs_end_transaction(trans, root);
3160 if (ret)
3161 goto out;
3229 continue; 3162 continue;
3230 } 3163 }
3231 3164
@@ -3657,8 +3590,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3657 if (ret) { 3590 if (ret) {
3658 btrfs_info(root->fs_info, 3591 btrfs_info(root->fs_info,
3659 "failed to delete reference to %.*s, inode %llu parent %llu", 3592 "failed to delete reference to %.*s, inode %llu parent %llu",
3660 name_len, name, 3593 name_len, name, ino, dir_ino);
3661 (unsigned long long)ino, (unsigned long long)dir_ino);
3662 btrfs_abort_transaction(trans, root, ret); 3594 btrfs_abort_transaction(trans, root, ret);
3663 goto err; 3595 goto err;
3664 } 3596 }
@@ -3929,6 +3861,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3929 u64 extent_num_bytes = 0; 3861 u64 extent_num_bytes = 0;
3930 u64 extent_offset = 0; 3862 u64 extent_offset = 0;
3931 u64 item_end = 0; 3863 u64 item_end = 0;
3864 u64 last_size = (u64)-1;
3932 u32 found_type = (u8)-1; 3865 u32 found_type = (u8)-1;
3933 int found_extent; 3866 int found_extent;
3934 int del_item; 3867 int del_item;
@@ -4026,6 +3959,11 @@ search_again:
4026 if (found_type != BTRFS_EXTENT_DATA_KEY) 3959 if (found_type != BTRFS_EXTENT_DATA_KEY)
4027 goto delete; 3960 goto delete;
4028 3961
3962 if (del_item)
3963 last_size = found_key.offset;
3964 else
3965 last_size = new_size;
3966
4029 if (extent_type != BTRFS_FILE_EXTENT_INLINE) { 3967 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4030 u64 num_dec; 3968 u64 num_dec;
4031 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); 3969 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
@@ -4137,6 +4075,8 @@ out:
4137 btrfs_abort_transaction(trans, root, ret); 4075 btrfs_abort_transaction(trans, root, ret);
4138 } 4076 }
4139error: 4077error:
4078 if (last_size != (u64)-1)
4079 btrfs_ordered_update_i_size(inode, last_size, NULL);
4140 btrfs_free_path(path); 4080 btrfs_free_path(path);
4141 return err; 4081 return err;
4142} 4082}
@@ -4465,8 +4405,26 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4465 btrfs_inode_resume_unlocked_dio(inode); 4405 btrfs_inode_resume_unlocked_dio(inode);
4466 4406
4467 ret = btrfs_truncate(inode); 4407 ret = btrfs_truncate(inode);
4468 if (ret && inode->i_nlink) 4408 if (ret && inode->i_nlink) {
4469 btrfs_orphan_del(NULL, inode); 4409 int err;
4410
4411 /*
4412 * failed to truncate, disk_i_size is only adjusted down
4413 * as we remove extents, so it should represent the true
4414 * size of the inode, so reset the in memory size and
4415 * delete our orphan entry.
4416 */
4417 trans = btrfs_join_transaction(root);
4418 if (IS_ERR(trans)) {
4419 btrfs_orphan_del(NULL, inode);
4420 return ret;
4421 }
4422 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
4423 err = btrfs_orphan_del(trans, inode);
4424 if (err)
4425 btrfs_abort_transaction(trans, root, err);
4426 btrfs_end_transaction(trans, root);
4427 }
4470 } 4428 }
4471 4429
4472 return ret; 4430 return ret;
@@ -4601,10 +4559,15 @@ void btrfs_evict_inode(struct inode *inode)
4601 4559
4602 btrfs_free_block_rsv(root, rsv); 4560 btrfs_free_block_rsv(root, rsv);
4603 4561
4562 /*
4563 * Errors here aren't a big deal, it just means we leave orphan items
4564 * in the tree. They will be cleaned up on the next mount.
4565 */
4604 if (ret == 0) { 4566 if (ret == 0) {
4605 trans->block_rsv = root->orphan_block_rsv; 4567 trans->block_rsv = root->orphan_block_rsv;
4606 ret = btrfs_orphan_del(trans, inode); 4568 btrfs_orphan_del(trans, inode);
4607 BUG_ON(ret); 4569 } else {
4570 btrfs_orphan_del(NULL, inode);
4608 } 4571 }
4609 4572
4610 trans->block_rsv = &root->fs_info->trans_block_rsv; 4573 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -6161,10 +6124,7 @@ insert:
6161 btrfs_release_path(path); 6124 btrfs_release_path(path);
6162 if (em->start > start || extent_map_end(em) <= start) { 6125 if (em->start > start || extent_map_end(em) <= start) {
6163 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]", 6126 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
6164 (unsigned long long)em->start, 6127 em->start, em->len, start, len);
6165 (unsigned long long)em->len,
6166 (unsigned long long)start,
6167 (unsigned long long)len);
6168 err = -EIO; 6128 err = -EIO;
6169 goto out; 6129 goto out;
6170 } 6130 }
@@ -6362,39 +6322,32 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
6362 u64 start, u64 len) 6322 u64 start, u64 len)
6363{ 6323{
6364 struct btrfs_root *root = BTRFS_I(inode)->root; 6324 struct btrfs_root *root = BTRFS_I(inode)->root;
6365 struct btrfs_trans_handle *trans;
6366 struct extent_map *em; 6325 struct extent_map *em;
6367 struct btrfs_key ins; 6326 struct btrfs_key ins;
6368 u64 alloc_hint; 6327 u64 alloc_hint;
6369 int ret; 6328 int ret;
6370 6329
6371 trans = btrfs_join_transaction(root);
6372 if (IS_ERR(trans))
6373 return ERR_CAST(trans);
6374
6375 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
6376
6377 alloc_hint = get_extent_allocation_hint(inode, start, len); 6330 alloc_hint = get_extent_allocation_hint(inode, start, len);
6378 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, 6331 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
6379 alloc_hint, &ins, 1); 6332 alloc_hint, &ins, 1);
6380 if (ret) { 6333 if (ret)
6381 em = ERR_PTR(ret); 6334 return ERR_PTR(ret);
6382 goto out;
6383 }
6384 6335
6385 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 6336 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
6386 ins.offset, ins.offset, ins.offset, 0); 6337 ins.offset, ins.offset, ins.offset, 0);
6387 if (IS_ERR(em)) 6338 if (IS_ERR(em)) {
6388 goto out; 6339 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
6340 return em;
6341 }
6389 6342
6390 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, 6343 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
6391 ins.offset, ins.offset, 0); 6344 ins.offset, ins.offset, 0);
6392 if (ret) { 6345 if (ret) {
6393 btrfs_free_reserved_extent(root, ins.objectid, ins.offset); 6346 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
6394 em = ERR_PTR(ret); 6347 free_extent_map(em);
6348 return ERR_PTR(ret);
6395 } 6349 }
6396out: 6350
6397 btrfs_end_transaction(trans, root);
6398 return em; 6351 return em;
6399} 6352}
6400 6353
@@ -6402,11 +6355,11 @@ out:
6402 * returns 1 when the nocow is safe, < 1 on error, 0 if the 6355 * returns 1 when the nocow is safe, < 1 on error, 0 if the
6403 * block must be cow'd 6356 * block must be cow'd
6404 */ 6357 */
6405noinline int can_nocow_extent(struct btrfs_trans_handle *trans, 6358noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6406 struct inode *inode, u64 offset, u64 *len,
6407 u64 *orig_start, u64 *orig_block_len, 6359 u64 *orig_start, u64 *orig_block_len,
6408 u64 *ram_bytes) 6360 u64 *ram_bytes)
6409{ 6361{
6362 struct btrfs_trans_handle *trans;
6410 struct btrfs_path *path; 6363 struct btrfs_path *path;
6411 int ret; 6364 int ret;
6412 struct extent_buffer *leaf; 6365 struct extent_buffer *leaf;
@@ -6424,7 +6377,7 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6424 if (!path) 6377 if (!path)
6425 return -ENOMEM; 6378 return -ENOMEM;
6426 6379
6427 ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), 6380 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
6428 offset, 0); 6381 offset, 0);
6429 if (ret < 0) 6382 if (ret < 0)
6430 goto out; 6383 goto out;
@@ -6489,9 +6442,19 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
6489 * look for other files referencing this extent, if we 6442 * look for other files referencing this extent, if we
6490 * find any we must cow 6443 * find any we must cow
6491 */ 6444 */
6492 if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode), 6445 trans = btrfs_join_transaction(root);
6493 key.offset - backref_offset, disk_bytenr)) 6446 if (IS_ERR(trans)) {
6447 ret = 0;
6494 goto out; 6448 goto out;
6449 }
6450
6451 ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
6452 key.offset - backref_offset, disk_bytenr);
6453 btrfs_end_transaction(trans, root);
6454 if (ret) {
6455 ret = 0;
6456 goto out;
6457 }
6495 6458
6496 /* 6459 /*
6497 * adjust disk_bytenr and num_bytes to cover just the bytes 6460 * adjust disk_bytenr and num_bytes to cover just the bytes
@@ -6633,7 +6596,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6633 u64 start = iblock << inode->i_blkbits; 6596 u64 start = iblock << inode->i_blkbits;
6634 u64 lockstart, lockend; 6597 u64 lockstart, lockend;
6635 u64 len = bh_result->b_size; 6598 u64 len = bh_result->b_size;
6636 struct btrfs_trans_handle *trans;
6637 int unlock_bits = EXTENT_LOCKED; 6599 int unlock_bits = EXTENT_LOCKED;
6638 int ret = 0; 6600 int ret = 0;
6639 6601
@@ -6715,16 +6677,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6715 len = min(len, em->len - (start - em->start)); 6677 len = min(len, em->len - (start - em->start));
6716 block_start = em->block_start + (start - em->start); 6678 block_start = em->block_start + (start - em->start);
6717 6679
6718 /* 6680 if (can_nocow_extent(inode, start, &len, &orig_start,
6719 * we're not going to log anything, but we do need
6720 * to make sure the current transaction stays open
6721 * while we look for nocow cross refs
6722 */
6723 trans = btrfs_join_transaction(root);
6724 if (IS_ERR(trans))
6725 goto must_cow;
6726
6727 if (can_nocow_extent(trans, inode, start, &len, &orig_start,
6728 &orig_block_len, &ram_bytes) == 1) { 6681 &orig_block_len, &ram_bytes) == 1) {
6729 if (type == BTRFS_ORDERED_PREALLOC) { 6682 if (type == BTRFS_ORDERED_PREALLOC) {
6730 free_extent_map(em); 6683 free_extent_map(em);
@@ -6733,24 +6686,20 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6733 block_start, len, 6686 block_start, len,
6734 orig_block_len, 6687 orig_block_len,
6735 ram_bytes, type); 6688 ram_bytes, type);
6736 if (IS_ERR(em)) { 6689 if (IS_ERR(em))
6737 btrfs_end_transaction(trans, root);
6738 goto unlock_err; 6690 goto unlock_err;
6739 }
6740 } 6691 }
6741 6692
6742 ret = btrfs_add_ordered_extent_dio(inode, start, 6693 ret = btrfs_add_ordered_extent_dio(inode, start,
6743 block_start, len, len, type); 6694 block_start, len, len, type);
6744 btrfs_end_transaction(trans, root);
6745 if (ret) { 6695 if (ret) {
6746 free_extent_map(em); 6696 free_extent_map(em);
6747 goto unlock_err; 6697 goto unlock_err;
6748 } 6698 }
6749 goto unlock; 6699 goto unlock;
6750 } 6700 }
6751 btrfs_end_transaction(trans, root);
6752 } 6701 }
6753must_cow: 6702
6754 /* 6703 /*
6755 * this will cow the extent, reset the len in case we changed 6704 * this will cow the extent, reset the len in case we changed
6756 * it above 6705 * it above
@@ -6813,26 +6762,6 @@ unlock_err:
6813 return ret; 6762 return ret;
6814} 6763}
6815 6764
6816struct btrfs_dio_private {
6817 struct inode *inode;
6818 u64 logical_offset;
6819 u64 disk_bytenr;
6820 u64 bytes;
6821 void *private;
6822
6823 /* number of bios pending for this dio */
6824 atomic_t pending_bios;
6825
6826 /* IO errors */
6827 int errors;
6828
6829 /* orig_bio is our btrfs_io_bio */
6830 struct bio *orig_bio;
6831
6832 /* dio_bio came from fs/direct-io.c */
6833 struct bio *dio_bio;
6834};
6835
6836static void btrfs_endio_direct_read(struct bio *bio, int err) 6765static void btrfs_endio_direct_read(struct bio *bio, int err)
6837{ 6766{
6838 struct btrfs_dio_private *dip = bio->bi_private; 6767 struct btrfs_dio_private *dip = bio->bi_private;
@@ -6841,6 +6770,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6841 struct inode *inode = dip->inode; 6770 struct inode *inode = dip->inode;
6842 struct btrfs_root *root = BTRFS_I(inode)->root; 6771 struct btrfs_root *root = BTRFS_I(inode)->root;
6843 struct bio *dio_bio; 6772 struct bio *dio_bio;
6773 u32 *csums = (u32 *)dip->csum;
6774 int index = 0;
6844 u64 start; 6775 u64 start;
6845 6776
6846 start = dip->logical_offset; 6777 start = dip->logical_offset;
@@ -6849,12 +6780,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6849 struct page *page = bvec->bv_page; 6780 struct page *page = bvec->bv_page;
6850 char *kaddr; 6781 char *kaddr;
6851 u32 csum = ~(u32)0; 6782 u32 csum = ~(u32)0;
6852 u64 private = ~(u32)0;
6853 unsigned long flags; 6783 unsigned long flags;
6854 6784
6855 if (get_state_private(&BTRFS_I(inode)->io_tree,
6856 start, &private))
6857 goto failed;
6858 local_irq_save(flags); 6785 local_irq_save(flags);
6859 kaddr = kmap_atomic(page); 6786 kaddr = kmap_atomic(page);
6860 csum = btrfs_csum_data(kaddr + bvec->bv_offset, 6787 csum = btrfs_csum_data(kaddr + bvec->bv_offset,
@@ -6864,18 +6791,17 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6864 local_irq_restore(flags); 6791 local_irq_restore(flags);
6865 6792
6866 flush_dcache_page(bvec->bv_page); 6793 flush_dcache_page(bvec->bv_page);
6867 if (csum != private) { 6794 if (csum != csums[index]) {
6868failed: 6795 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
6869 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u", 6796 btrfs_ino(inode), start, csum,
6870 (unsigned long long)btrfs_ino(inode), 6797 csums[index]);
6871 (unsigned long long)start,
6872 csum, (unsigned)private);
6873 err = -EIO; 6798 err = -EIO;
6874 } 6799 }
6875 } 6800 }
6876 6801
6877 start += bvec->bv_len; 6802 start += bvec->bv_len;
6878 bvec++; 6803 bvec++;
6804 index++;
6879 } while (bvec <= bvec_end); 6805 } while (bvec <= bvec_end);
6880 6806
6881 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 6807 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@ -6956,7 +6882,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
6956 if (err) { 6882 if (err) {
6957 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " 6883 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
6958 "sector %#Lx len %u err no %d\n", 6884 "sector %#Lx len %u err no %d\n",
6959 (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw, 6885 btrfs_ino(dip->inode), bio->bi_rw,
6960 (unsigned long long)bio->bi_sector, bio->bi_size, err); 6886 (unsigned long long)bio->bi_sector, bio->bi_size, err);
6961 dip->errors = 1; 6887 dip->errors = 1;
6962 6888
@@ -6992,6 +6918,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6992 int rw, u64 file_offset, int skip_sum, 6918 int rw, u64 file_offset, int skip_sum,
6993 int async_submit) 6919 int async_submit)
6994{ 6920{
6921 struct btrfs_dio_private *dip = bio->bi_private;
6995 int write = rw & REQ_WRITE; 6922 int write = rw & REQ_WRITE;
6996 struct btrfs_root *root = BTRFS_I(inode)->root; 6923 struct btrfs_root *root = BTRFS_I(inode)->root;
6997 int ret; 6924 int ret;
@@ -7026,7 +6953,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
7026 if (ret) 6953 if (ret)
7027 goto err; 6954 goto err;
7028 } else if (!skip_sum) { 6955 } else if (!skip_sum) {
7029 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); 6956 ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
6957 file_offset);
7030 if (ret) 6958 if (ret)
7031 goto err; 6959 goto err;
7032 } 6960 }
@@ -7061,6 +6989,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7061 bio_put(orig_bio); 6989 bio_put(orig_bio);
7062 return -EIO; 6990 return -EIO;
7063 } 6991 }
6992
7064 if (map_length >= orig_bio->bi_size) { 6993 if (map_length >= orig_bio->bi_size) {
7065 bio = orig_bio; 6994 bio = orig_bio;
7066 goto submit; 6995 goto submit;
@@ -7156,19 +7085,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7156 struct btrfs_dio_private *dip; 7085 struct btrfs_dio_private *dip;
7157 struct bio *io_bio; 7086 struct bio *io_bio;
7158 int skip_sum; 7087 int skip_sum;
7088 int sum_len;
7159 int write = rw & REQ_WRITE; 7089 int write = rw & REQ_WRITE;
7160 int ret = 0; 7090 int ret = 0;
7091 u16 csum_size;
7161 7092
7162 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 7093 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
7163 7094
7164 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS); 7095 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
7165
7166 if (!io_bio) { 7096 if (!io_bio) {
7167 ret = -ENOMEM; 7097 ret = -ENOMEM;
7168 goto free_ordered; 7098 goto free_ordered;
7169 } 7099 }
7170 7100
7171 dip = kmalloc(sizeof(*dip), GFP_NOFS); 7101 if (!skip_sum && !write) {
7102 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7103 sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
7104 sum_len *= csum_size;
7105 } else {
7106 sum_len = 0;
7107 }
7108
7109 dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
7172 if (!dip) { 7110 if (!dip) {
7173 ret = -ENOMEM; 7111 ret = -ENOMEM;
7174 goto free_io_bio; 7112 goto free_io_bio;
@@ -7443,10 +7381,23 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7443 * whoever cleared the private bit is responsible 7381 * whoever cleared the private bit is responsible
7444 * for the finish_ordered_io 7382 * for the finish_ordered_io
7445 */ 7383 */
7446 if (TestClearPagePrivate2(page) && 7384 if (TestClearPagePrivate2(page)) {
7447 btrfs_dec_test_ordered_pending(inode, &ordered, page_start, 7385 struct btrfs_ordered_inode_tree *tree;
7448 PAGE_CACHE_SIZE, 1)) { 7386 u64 new_len;
7449 btrfs_finish_ordered_io(ordered); 7387
7388 tree = &BTRFS_I(inode)->ordered_tree;
7389
7390 spin_lock_irq(&tree->lock);
7391 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
7392 new_len = page_start - ordered->file_offset;
7393 if (new_len < ordered->truncated_len)
7394 ordered->truncated_len = new_len;
7395 spin_unlock_irq(&tree->lock);
7396
7397 if (btrfs_dec_test_ordered_pending(inode, &ordered,
7398 page_start,
7399 PAGE_CACHE_SIZE, 1))
7400 btrfs_finish_ordered_io(ordered);
7450 } 7401 }
7451 btrfs_put_ordered_extent(ordered); 7402 btrfs_put_ordered_extent(ordered);
7452 cached_state = NULL; 7403 cached_state = NULL;
@@ -7612,7 +7563,6 @@ static int btrfs_truncate(struct inode *inode)
7612 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 7563 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
7613 7564
7614 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 7565 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
7615 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
7616 7566
7617 /* 7567 /*
7618 * Yes ladies and gentelment, this is indeed ugly. The fact is we have 7568 * Yes ladies and gentelment, this is indeed ugly. The fact is we have
@@ -7876,7 +7826,7 @@ void btrfs_destroy_inode(struct inode *inode)
7876 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 7826 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
7877 &BTRFS_I(inode)->runtime_flags)) { 7827 &BTRFS_I(inode)->runtime_flags)) {
7878 btrfs_info(root->fs_info, "inode %llu still on the orphan list", 7828 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
7879 (unsigned long long)btrfs_ino(inode)); 7829 btrfs_ino(inode));
7880 atomic_dec(&root->orphan_inodes); 7830 atomic_dec(&root->orphan_inodes);
7881 } 7831 }
7882 7832
@@ -7886,8 +7836,7 @@ void btrfs_destroy_inode(struct inode *inode)
7886 break; 7836 break;
7887 else { 7837 else {
7888 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup", 7838 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
7889 (unsigned long long)ordered->file_offset, 7839 ordered->file_offset, ordered->len);
7890 (unsigned long long)ordered->len);
7891 btrfs_remove_ordered_extent(inode, ordered); 7840 btrfs_remove_ordered_extent(inode, ordered);
7892 btrfs_put_ordered_extent(ordered); 7841 btrfs_put_ordered_extent(ordered);
7893 btrfs_put_ordered_extent(ordered); 7842 btrfs_put_ordered_extent(ordered);
@@ -8161,10 +8110,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8161 new_dentry->d_name.name, 8110 new_dentry->d_name.name,
8162 new_dentry->d_name.len); 8111 new_dentry->d_name.len);
8163 } 8112 }
8164 if (!ret && new_inode->i_nlink == 0) { 8113 if (!ret && new_inode->i_nlink == 0)
8165 ret = btrfs_orphan_add(trans, new_dentry->d_inode); 8114 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
8166 BUG_ON(ret);
8167 }
8168 if (ret) { 8115 if (ret) {
8169 btrfs_abort_transaction(trans, root, ret); 8116 btrfs_abort_transaction(trans, root, ret);
8170 goto out_fail; 8117 goto out_fail;
@@ -8525,8 +8472,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8525 8472
8526 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); 8473 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
8527 cur_bytes = max(cur_bytes, min_size); 8474 cur_bytes = max(cur_bytes, min_size);
8528 ret = btrfs_reserve_extent(trans, root, cur_bytes, 8475 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
8529 min_size, 0, *alloc_hint, &ins, 1); 8476 *alloc_hint, &ins, 1);
8530 if (ret) { 8477 if (ret) {
8531 if (own_trans) 8478 if (own_trans)
8532 btrfs_end_transaction(trans, root); 8479 btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 238a05545ee2..1a5b9462dd9a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -43,6 +43,7 @@
43#include <linux/blkdev.h> 43#include <linux/blkdev.h>
44#include <linux/uuid.h> 44#include <linux/uuid.h>
45#include <linux/btrfs.h> 45#include <linux/btrfs.h>
46#include <linux/uaccess.h>
46#include "compat.h" 47#include "compat.h"
47#include "ctree.h" 48#include "ctree.h"
48#include "disk-io.h" 49#include "disk-io.h"
@@ -57,6 +58,9 @@
57#include "send.h" 58#include "send.h"
58#include "dev-replace.h" 59#include "dev-replace.h"
59 60
61static int btrfs_clone(struct inode *src, struct inode *inode,
62 u64 off, u64 olen, u64 olen_aligned, u64 destoff);
63
60/* Mask out flags that are inappropriate for the given type of inode. */ 64/* Mask out flags that are inappropriate for the given type of inode. */
61static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 65static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
62{ 66{
@@ -363,6 +367,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
363 return 0; 367 return 0;
364} 368}
365 369
370int btrfs_is_empty_uuid(u8 *uuid)
371{
372 static char empty_uuid[BTRFS_UUID_SIZE] = {0};
373
374 return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE);
375}
376
366static noinline int create_subvol(struct inode *dir, 377static noinline int create_subvol(struct inode *dir,
367 struct dentry *dentry, 378 struct dentry *dentry,
368 char *name, int namelen, 379 char *name, int namelen,
@@ -396,7 +407,7 @@ static noinline int create_subvol(struct inode *dir,
396 * of create_snapshot(). 407 * of create_snapshot().
397 */ 408 */
398 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 409 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
399 7, &qgroup_reserved); 410 8, &qgroup_reserved, false);
400 if (ret) 411 if (ret)
401 return ret; 412 return ret;
402 413
@@ -425,26 +436,25 @@ static noinline int create_subvol(struct inode *dir,
425 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 436 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
426 btrfs_set_header_owner(leaf, objectid); 437 btrfs_set_header_owner(leaf, objectid);
427 438
428 write_extent_buffer(leaf, root->fs_info->fsid, 439 write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf),
429 (unsigned long)btrfs_header_fsid(leaf),
430 BTRFS_FSID_SIZE); 440 BTRFS_FSID_SIZE);
431 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 441 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
432 (unsigned long)btrfs_header_chunk_tree_uuid(leaf), 442 btrfs_header_chunk_tree_uuid(leaf),
433 BTRFS_UUID_SIZE); 443 BTRFS_UUID_SIZE);
434 btrfs_mark_buffer_dirty(leaf); 444 btrfs_mark_buffer_dirty(leaf);
435 445
436 memset(&root_item, 0, sizeof(root_item)); 446 memset(&root_item, 0, sizeof(root_item));
437 447
438 inode_item = &root_item.inode; 448 inode_item = &root_item.inode;
439 inode_item->generation = cpu_to_le64(1); 449 btrfs_set_stack_inode_generation(inode_item, 1);
440 inode_item->size = cpu_to_le64(3); 450 btrfs_set_stack_inode_size(inode_item, 3);
441 inode_item->nlink = cpu_to_le32(1); 451 btrfs_set_stack_inode_nlink(inode_item, 1);
442 inode_item->nbytes = cpu_to_le64(root->leafsize); 452 btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
443 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 453 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
444 454
445 root_item.flags = 0; 455 btrfs_set_root_flags(&root_item, 0);
446 root_item.byte_limit = 0; 456 btrfs_set_root_limit(&root_item, 0);
447 inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT); 457 btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
448 458
449 btrfs_set_root_bytenr(&root_item, leaf->start); 459 btrfs_set_root_bytenr(&root_item, leaf->start);
450 btrfs_set_root_generation(&root_item, trans->transid); 460 btrfs_set_root_generation(&root_item, trans->transid);
@@ -457,8 +467,8 @@ static noinline int create_subvol(struct inode *dir,
457 btrfs_root_generation(&root_item)); 467 btrfs_root_generation(&root_item));
458 uuid_le_gen(&new_uuid); 468 uuid_le_gen(&new_uuid);
459 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 469 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
460 root_item.otime.sec = cpu_to_le64(cur_time.tv_sec); 470 btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec);
461 root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec); 471 btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec);
462 root_item.ctime = root_item.otime; 472 root_item.ctime = root_item.otime;
463 btrfs_set_root_ctransid(&root_item, trans->transid); 473 btrfs_set_root_ctransid(&root_item, trans->transid);
464 btrfs_set_root_otransid(&root_item, trans->transid); 474 btrfs_set_root_otransid(&root_item, trans->transid);
@@ -518,9 +528,14 @@ static noinline int create_subvol(struct inode *dir,
518 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 528 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
519 objectid, root->root_key.objectid, 529 objectid, root->root_key.objectid,
520 btrfs_ino(dir), index, name, namelen); 530 btrfs_ino(dir), index, name, namelen);
521
522 BUG_ON(ret); 531 BUG_ON(ret);
523 532
533 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
534 root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
535 objectid);
536 if (ret)
537 btrfs_abort_transaction(trans, root, ret);
538
524fail: 539fail:
525 trans->block_rsv = NULL; 540 trans->block_rsv = NULL;
526 trans->bytes_reserved = 0; 541 trans->bytes_reserved = 0;
@@ -573,10 +588,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
573 * 1 - root item 588 * 1 - root item
574 * 2 - root ref/backref 589 * 2 - root ref/backref
575 * 1 - root of snapshot 590 * 1 - root of snapshot
591 * 1 - UUID item
576 */ 592 */
577 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, 593 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
578 &pending_snapshot->block_rsv, 7, 594 &pending_snapshot->block_rsv, 8,
579 &pending_snapshot->qgroup_reserved); 595 &pending_snapshot->qgroup_reserved,
596 false);
580 if (ret) 597 if (ret)
581 goto out; 598 goto out;
582 599
@@ -1267,9 +1284,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1267 cluster = max_cluster; 1284 cluster = max_cluster;
1268 } 1285 }
1269 1286
1270 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
1271 BTRFS_I(inode)->force_compress = compress_type;
1272
1273 if (i + cluster > ra_index) { 1287 if (i + cluster > ra_index) {
1274 ra_index = max(i, ra_index); 1288 ra_index = max(i, ra_index);
1275 btrfs_force_ra(inode->i_mapping, ra, file, ra_index, 1289 btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
@@ -1278,6 +1292,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1278 } 1292 }
1279 1293
1280 mutex_lock(&inode->i_mutex); 1294 mutex_lock(&inode->i_mutex);
1295 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
1296 BTRFS_I(inode)->force_compress = compress_type;
1281 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1297 ret = cluster_pages_for_defrag(inode, pages, i, cluster);
1282 if (ret < 0) { 1298 if (ret < 0) {
1283 mutex_unlock(&inode->i_mutex); 1299 mutex_unlock(&inode->i_mutex);
@@ -1334,10 +1350,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1334 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 1350 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
1335 } 1351 }
1336 atomic_dec(&root->fs_info->async_submit_draining); 1352 atomic_dec(&root->fs_info->async_submit_draining);
1337
1338 mutex_lock(&inode->i_mutex);
1339 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
1340 mutex_unlock(&inode->i_mutex);
1341 } 1353 }
1342 1354
1343 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1355 if (range->compress_type == BTRFS_COMPRESS_LZO) {
@@ -1347,6 +1359,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1347 ret = defrag_count; 1359 ret = defrag_count;
1348 1360
1349out_ra: 1361out_ra:
1362 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
1363 mutex_lock(&inode->i_mutex);
1364 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
1365 mutex_unlock(&inode->i_mutex);
1366 }
1350 if (!file) 1367 if (!file)
1351 kfree(ra); 1368 kfree(ra);
1352 kfree(pages); 1369 kfree(pages);
@@ -1377,9 +1394,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1377 1394
1378 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1395 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1379 1)) { 1396 1)) {
1380 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
1381 mnt_drop_write_file(file); 1397 mnt_drop_write_file(file);
1382 return -EINVAL; 1398 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
1383 } 1399 }
1384 1400
1385 mutex_lock(&root->fs_info->volume_mutex); 1401 mutex_lock(&root->fs_info->volume_mutex);
@@ -1403,14 +1419,13 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1403 ret = -EINVAL; 1419 ret = -EINVAL;
1404 goto out_free; 1420 goto out_free;
1405 } 1421 }
1406 printk(KERN_INFO "btrfs: resizing devid %llu\n", 1422 printk(KERN_INFO "btrfs: resizing devid %llu\n", devid);
1407 (unsigned long long)devid);
1408 } 1423 }
1409 1424
1410 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1425 device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
1411 if (!device) { 1426 if (!device) {
1412 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1427 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
1413 (unsigned long long)devid); 1428 devid);
1414 ret = -ENODEV; 1429 ret = -ENODEV;
1415 goto out_free; 1430 goto out_free;
1416 } 1431 }
@@ -1418,7 +1433,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1418 if (!device->writeable) { 1433 if (!device->writeable) {
1419 printk(KERN_INFO "btrfs: resizer unable to apply on " 1434 printk(KERN_INFO "btrfs: resizer unable to apply on "
1420 "readonly device %llu\n", 1435 "readonly device %llu\n",
1421 (unsigned long long)devid); 1436 devid);
1422 ret = -EPERM; 1437 ret = -EPERM;
1423 goto out_free; 1438 goto out_free;
1424 } 1439 }
@@ -1470,8 +1485,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1470 new_size *= root->sectorsize; 1485 new_size *= root->sectorsize;
1471 1486
1472 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", 1487 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
1473 rcu_str_deref(device->name), 1488 rcu_str_deref(device->name), new_size);
1474 (unsigned long long)new_size);
1475 1489
1476 if (new_size > old_size) { 1490 if (new_size > old_size) {
1477 trans = btrfs_start_transaction(root, 0); 1491 trans = btrfs_start_transaction(root, 0);
@@ -1721,13 +1735,28 @@ out:
1721static noinline int may_destroy_subvol(struct btrfs_root *root) 1735static noinline int may_destroy_subvol(struct btrfs_root *root)
1722{ 1736{
1723 struct btrfs_path *path; 1737 struct btrfs_path *path;
1738 struct btrfs_dir_item *di;
1724 struct btrfs_key key; 1739 struct btrfs_key key;
1740 u64 dir_id;
1725 int ret; 1741 int ret;
1726 1742
1727 path = btrfs_alloc_path(); 1743 path = btrfs_alloc_path();
1728 if (!path) 1744 if (!path)
1729 return -ENOMEM; 1745 return -ENOMEM;
1730 1746
1747 /* Make sure this root isn't set as the default subvol */
1748 dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
1749 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path,
1750 dir_id, "default", 7, 0);
1751 if (di && !IS_ERR(di)) {
1752 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1753 if (key.objectid == root->root_key.objectid) {
1754 ret = -ENOTEMPTY;
1755 goto out;
1756 }
1757 btrfs_release_path(path);
1758 }
1759
1731 key.objectid = root->root_key.objectid; 1760 key.objectid = root->root_key.objectid;
1732 key.type = BTRFS_ROOT_REF_KEY; 1761 key.type = BTRFS_ROOT_REF_KEY;
1733 key.offset = (u64)-1; 1762 key.offset = (u64)-1;
@@ -1993,25 +2022,29 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1993 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2022 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1994 if (ret < 0) 2023 if (ret < 0)
1995 goto out; 2024 goto out;
2025 else if (ret > 0) {
2026 ret = btrfs_previous_item(root, path, dirid,
2027 BTRFS_INODE_REF_KEY);
2028 if (ret < 0)
2029 goto out;
2030 else if (ret > 0) {
2031 ret = -ENOENT;
2032 goto out;
2033 }
2034 }
1996 2035
1997 l = path->nodes[0]; 2036 l = path->nodes[0];
1998 slot = path->slots[0]; 2037 slot = path->slots[0];
1999 if (ret > 0 && slot > 0)
2000 slot--;
2001 btrfs_item_key_to_cpu(l, &key, slot); 2038 btrfs_item_key_to_cpu(l, &key, slot);
2002 2039
2003 if (ret > 0 && (key.objectid != dirid ||
2004 key.type != BTRFS_INODE_REF_KEY)) {
2005 ret = -ENOENT;
2006 goto out;
2007 }
2008
2009 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 2040 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
2010 len = btrfs_inode_ref_name_len(l, iref); 2041 len = btrfs_inode_ref_name_len(l, iref);
2011 ptr -= len + 1; 2042 ptr -= len + 1;
2012 total_len += len + 1; 2043 total_len += len + 1;
2013 if (ptr < name) 2044 if (ptr < name) {
2045 ret = -ENAMETOOLONG;
2014 goto out; 2046 goto out;
2047 }
2015 2048
2016 *(ptr + len) = '/'; 2049 *(ptr + len) = '/';
2017 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); 2050 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
@@ -2024,8 +2057,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
2024 key.offset = (u64)-1; 2057 key.offset = (u64)-1;
2025 dirid = key.objectid; 2058 dirid = key.objectid;
2026 } 2059 }
2027 if (ptr < name)
2028 goto out;
2029 memmove(name, ptr, total_len); 2060 memmove(name, ptr, total_len);
2030 name[total_len]='\0'; 2061 name[total_len]='\0';
2031 ret = 0; 2062 ret = 0;
@@ -2174,7 +2205,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2174 * ref/backref. 2205 * ref/backref.
2175 */ 2206 */
2176 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 2207 err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
2177 5, &qgroup_reserved); 2208 5, &qgroup_reserved, true);
2178 if (err) 2209 if (err)
2179 goto out_up_write; 2210 goto out_up_write;
2180 2211
@@ -2213,6 +2244,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2213 goto out_end_trans; 2244 goto out_end_trans;
2214 } 2245 }
2215 } 2246 }
2247
2248 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
2249 dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
2250 dest->root_key.objectid);
2251 if (ret && ret != -ENOENT) {
2252 btrfs_abort_transaction(trans, root, ret);
2253 err = ret;
2254 goto out_end_trans;
2255 }
2256 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
2257 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
2258 dest->root_item.received_uuid,
2259 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
2260 dest->root_key.objectid);
2261 if (ret && ret != -ENOENT) {
2262 btrfs_abort_transaction(trans, root, ret);
2263 err = ret;
2264 goto out_end_trans;
2265 }
2266 }
2267
2216out_end_trans: 2268out_end_trans:
2217 trans->block_rsv = NULL; 2269 trans->block_rsv = NULL;
2218 trans->bytes_reserved = 0; 2270 trans->bytes_reserved = 0;
@@ -2326,8 +2378,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2326 2378
2327 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2379 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2328 1)) { 2380 1)) {
2329 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2381 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
2330 return -EINVAL;
2331 } 2382 }
2332 2383
2333 mutex_lock(&root->fs_info->volume_mutex); 2384 mutex_lock(&root->fs_info->volume_mutex);
@@ -2400,10 +2451,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2400 if (!fi_args) 2451 if (!fi_args)
2401 return -ENOMEM; 2452 return -ENOMEM;
2402 2453
2454 mutex_lock(&fs_devices->device_list_mutex);
2403 fi_args->num_devices = fs_devices->num_devices; 2455 fi_args->num_devices = fs_devices->num_devices;
2404 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); 2456 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2405 2457
2406 mutex_lock(&fs_devices->device_list_mutex);
2407 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2458 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2408 if (device->devid > fi_args->max_id) 2459 if (device->devid > fi_args->max_id)
2409 fi_args->max_id = device->devid; 2460 fi_args->max_id = device->devid;
@@ -2424,7 +2475,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2424 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2475 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2425 int ret = 0; 2476 int ret = 0;
2426 char *s_uuid = NULL; 2477 char *s_uuid = NULL;
2427 char empty_uuid[BTRFS_UUID_SIZE] = {0};
2428 2478
2429 if (!capable(CAP_SYS_ADMIN)) 2479 if (!capable(CAP_SYS_ADMIN))
2430 return -EPERM; 2480 return -EPERM;
@@ -2433,7 +2483,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2433 if (IS_ERR(di_args)) 2483 if (IS_ERR(di_args))
2434 return PTR_ERR(di_args); 2484 return PTR_ERR(di_args);
2435 2485
2436 if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0) 2486 if (!btrfs_is_empty_uuid(di_args->uuid))
2437 s_uuid = di_args->uuid; 2487 s_uuid = di_args->uuid;
2438 2488
2439 mutex_lock(&fs_devices->device_list_mutex); 2489 mutex_lock(&fs_devices->device_list_mutex);
@@ -2469,150 +2519,336 @@ out:
2469 return ret; 2519 return ret;
2470} 2520}
2471 2521
2472static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 2522static struct page *extent_same_get_page(struct inode *inode, u64 off)
2473 u64 off, u64 olen, u64 destoff) 2523{
2524 struct page *page;
2525 pgoff_t index;
2526 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2527
2528 index = off >> PAGE_CACHE_SHIFT;
2529
2530 page = grab_cache_page(inode->i_mapping, index);
2531 if (!page)
2532 return NULL;
2533
2534 if (!PageUptodate(page)) {
2535 if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
2536 0))
2537 return NULL;
2538 lock_page(page);
2539 if (!PageUptodate(page)) {
2540 unlock_page(page);
2541 page_cache_release(page);
2542 return NULL;
2543 }
2544 }
2545 unlock_page(page);
2546
2547 return page;
2548}
2549
2550static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2551{
2552 /* do any pending delalloc/csum calc on src, one way or
2553 another, and lock file content */
2554 while (1) {
2555 struct btrfs_ordered_extent *ordered;
2556 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
2557 ordered = btrfs_lookup_first_ordered_extent(inode,
2558 off + len - 1);
2559 if (!ordered &&
2560 !test_range_bit(&BTRFS_I(inode)->io_tree, off,
2561 off + len - 1, EXTENT_DELALLOC, 0, NULL))
2562 break;
2563 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
2564 if (ordered)
2565 btrfs_put_ordered_extent(ordered);
2566 btrfs_wait_ordered_range(inode, off, len);
2567 }
2568}
2569
2570static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
2571 struct inode *inode2, u64 loff2, u64 len)
2572{
2573 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
2574 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2575
2576 mutex_unlock(&inode1->i_mutex);
2577 mutex_unlock(&inode2->i_mutex);
2578}
2579
2580static void btrfs_double_lock(struct inode *inode1, u64 loff1,
2581 struct inode *inode2, u64 loff2, u64 len)
2582{
2583 if (inode1 < inode2) {
2584 swap(inode1, inode2);
2585 swap(loff1, loff2);
2586 }
2587
2588 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
2589 lock_extent_range(inode1, loff1, len);
2590 if (inode1 != inode2) {
2591 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
2592 lock_extent_range(inode2, loff2, len);
2593 }
2594}
2595
2596static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2597 u64 dst_loff, u64 len)
2598{
2599 int ret = 0;
2600 struct page *src_page, *dst_page;
2601 unsigned int cmp_len = PAGE_CACHE_SIZE;
2602 void *addr, *dst_addr;
2603
2604 while (len) {
2605 if (len < PAGE_CACHE_SIZE)
2606 cmp_len = len;
2607
2608 src_page = extent_same_get_page(src, loff);
2609 if (!src_page)
2610 return -EINVAL;
2611 dst_page = extent_same_get_page(dst, dst_loff);
2612 if (!dst_page) {
2613 page_cache_release(src_page);
2614 return -EINVAL;
2615 }
2616 addr = kmap_atomic(src_page);
2617 dst_addr = kmap_atomic(dst_page);
2618
2619 flush_dcache_page(src_page);
2620 flush_dcache_page(dst_page);
2621
2622 if (memcmp(addr, dst_addr, cmp_len))
2623 ret = BTRFS_SAME_DATA_DIFFERS;
2624
2625 kunmap_atomic(addr);
2626 kunmap_atomic(dst_addr);
2627 page_cache_release(src_page);
2628 page_cache_release(dst_page);
2629
2630 if (ret)
2631 break;
2632
2633 loff += cmp_len;
2634 dst_loff += cmp_len;
2635 len -= cmp_len;
2636 }
2637
2638 return ret;
2639}
2640
2641static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
2642{
2643 u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
2644
2645 if (off + len > inode->i_size || off + len < off)
2646 return -EINVAL;
2647 /* Check that we are block aligned - btrfs_clone() requires this */
2648 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
2649 return -EINVAL;
2650
2651 return 0;
2652}
2653
2654static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
2655 struct inode *dst, u64 dst_loff)
2474{ 2656{
2475 struct inode *inode = file_inode(file);
2476 struct btrfs_root *root = BTRFS_I(inode)->root;
2477 struct fd src_file;
2478 struct inode *src;
2479 struct btrfs_trans_handle *trans;
2480 struct btrfs_path *path;
2481 struct extent_buffer *leaf;
2482 char *buf;
2483 struct btrfs_key key;
2484 u32 nritems;
2485 int slot;
2486 int ret; 2657 int ret;
2487 u64 len = olen;
2488 u64 bs = root->fs_info->sb->s_blocksize;
2489 int same_inode = 0;
2490 2658
2491 /* 2659 /*
2492 * TODO: 2660 * btrfs_clone() can't handle extents in the same file
2493 * - split compressed inline extents. annoying: we need to 2661 * yet. Once that works, we can drop this check and replace it
2494 * decompress into destination's address_space (the file offset 2662 * with a check for the same inode, but overlapping extents.
2495 * may change, so source mapping won't do), then recompress (or
2496 * otherwise reinsert) a subrange.
2497 * - allow ranges within the same file to be cloned (provided
2498 * they don't overlap)?
2499 */ 2663 */
2500 2664 if (src == dst)
2501 /* the destination must be opened for writing */
2502 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
2503 return -EINVAL; 2665 return -EINVAL;
2504 2666
2505 if (btrfs_root_readonly(root)) 2667 btrfs_double_lock(src, loff, dst, dst_loff, len);
2506 return -EROFS; 2668
2669 ret = extent_same_check_offsets(src, loff, len);
2670 if (ret)
2671 goto out_unlock;
2672
2673 ret = extent_same_check_offsets(dst, dst_loff, len);
2674 if (ret)
2675 goto out_unlock;
2676
2677 /* don't make the dst file partly checksummed */
2678 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
2679 (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
2680 ret = -EINVAL;
2681 goto out_unlock;
2682 }
2683
2684 ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
2685 if (ret == 0)
2686 ret = btrfs_clone(src, dst, loff, len, len, dst_loff);
2687
2688out_unlock:
2689 btrfs_double_unlock(src, loff, dst, dst_loff, len);
2690
2691 return ret;
2692}
2693
2694#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
2695
2696static long btrfs_ioctl_file_extent_same(struct file *file,
2697 void __user *argp)
2698{
2699 struct btrfs_ioctl_same_args *args = argp;
2700 struct btrfs_ioctl_same_args same;
2701 struct btrfs_ioctl_same_extent_info info;
2702 struct inode *src = file->f_dentry->d_inode;
2703 struct file *dst_file = NULL;
2704 struct inode *dst;
2705 u64 off;
2706 u64 len;
2707 int i;
2708 int ret;
2709 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
2710 bool is_admin = capable(CAP_SYS_ADMIN);
2711
2712 if (!(file->f_mode & FMODE_READ))
2713 return -EINVAL;
2507 2714
2508 ret = mnt_want_write_file(file); 2715 ret = mnt_want_write_file(file);
2509 if (ret) 2716 if (ret)
2510 return ret; 2717 return ret;
2511 2718
2512 src_file = fdget(srcfd); 2719 if (copy_from_user(&same,
2513 if (!src_file.file) { 2720 (struct btrfs_ioctl_same_args __user *)argp,
2514 ret = -EBADF; 2721 sizeof(same))) {
2515 goto out_drop_write; 2722 ret = -EFAULT;
2723 goto out;
2516 } 2724 }
2517 2725
2518 ret = -EXDEV; 2726 off = same.logical_offset;
2519 if (src_file.file->f_path.mnt != file->f_path.mnt) 2727 len = same.length;
2520 goto out_fput;
2521 2728
2522 src = file_inode(src_file.file); 2729 /*
2730 * Limit the total length we will dedupe for each operation.
2731 * This is intended to bound the total time spent in this
2732 * ioctl to something sane.
2733 */
2734 if (len > BTRFS_MAX_DEDUPE_LEN)
2735 len = BTRFS_MAX_DEDUPE_LEN;
2523 2736
2524 ret = -EINVAL; 2737 if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
2525 if (src == inode) 2738 /*
2526 same_inode = 1; 2739 * Btrfs does not support blocksize < page_size. As a
2740 * result, btrfs_cmp_data() won't correctly handle
2741 * this situation without an update.
2742 */
2743 ret = -EINVAL;
2744 goto out;
2745 }
2527 2746
2528 /* the src must be open for reading */ 2747 ret = -EISDIR;
2529 if (!(src_file.file->f_mode & FMODE_READ)) 2748 if (S_ISDIR(src->i_mode))
2530 goto out_fput; 2749 goto out;
2531 2750
2532 /* don't make the dst file partly checksummed */ 2751 ret = -EACCES;
2533 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 2752 if (!S_ISREG(src->i_mode))
2534 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 2753 goto out;
2535 goto out_fput;
2536 2754
2537 ret = -EISDIR; 2755 ret = 0;
2538 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 2756 for (i = 0; i < same.dest_count; i++) {
2539 goto out_fput; 2757 if (copy_from_user(&info, &args->info[i], sizeof(info))) {
2758 ret = -EFAULT;
2759 goto out;
2760 }
2540 2761
2541 ret = -EXDEV; 2762 info.bytes_deduped = 0;
2542 if (src->i_sb != inode->i_sb)
2543 goto out_fput;
2544 2763
2545 ret = -ENOMEM; 2764 dst_file = fget(info.fd);
2546 buf = vmalloc(btrfs_level_size(root, 0)); 2765 if (!dst_file) {
2547 if (!buf) 2766 info.status = -EBADF;
2548 goto out_fput; 2767 goto next;
2768 }
2549 2769
2550 path = btrfs_alloc_path(); 2770 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
2551 if (!path) { 2771 info.status = -EINVAL;
2552 vfree(buf); 2772 goto next;
2553 goto out_fput; 2773 }
2554 }
2555 path->reada = 2;
2556 2774
2557 if (!same_inode) { 2775 info.status = -EXDEV;
2558 if (inode < src) { 2776 if (file->f_path.mnt != dst_file->f_path.mnt)
2559 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 2777 goto next;
2560 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); 2778
2561 } else { 2779 dst = dst_file->f_dentry->d_inode;
2562 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); 2780 if (src->i_sb != dst->i_sb)
2563 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 2781 goto next;
2782
2783 if (S_ISDIR(dst->i_mode)) {
2784 info.status = -EISDIR;
2785 goto next;
2564 } 2786 }
2565 } else {
2566 mutex_lock(&src->i_mutex);
2567 }
2568 2787
2569 /* determine range to clone */ 2788 if (!S_ISREG(dst->i_mode)) {
2570 ret = -EINVAL; 2789 info.status = -EACCES;
2571 if (off + len > src->i_size || off + len < off) 2790 goto next;
2572 goto out_unlock; 2791 }
2573 if (len == 0)
2574 olen = len = src->i_size - off;
2575 /* if we extend to eof, continue to block boundary */
2576 if (off + len == src->i_size)
2577 len = ALIGN(src->i_size, bs) - off;
2578 2792
2579 /* verify the end result is block aligned */ 2793 info.status = btrfs_extent_same(src, off, len, dst,
2580 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 2794 info.logical_offset);
2581 !IS_ALIGNED(destoff, bs)) 2795 if (info.status == 0)
2582 goto out_unlock; 2796 info.bytes_deduped += len;
2583 2797
2584 /* verify if ranges are overlapped within the same file */ 2798next:
2585 if (same_inode) { 2799 if (dst_file)
2586 if (destoff + len > off && destoff < off + len) 2800 fput(dst_file);
2587 goto out_unlock;
2588 }
2589 2801
2590 if (destoff > inode->i_size) { 2802 if (__put_user_unaligned(info.status, &args->info[i].status) ||
2591 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 2803 __put_user_unaligned(info.bytes_deduped,
2592 if (ret) 2804 &args->info[i].bytes_deduped)) {
2593 goto out_unlock; 2805 ret = -EFAULT;
2806 goto out;
2807 }
2594 } 2808 }
2595 2809
2596 /* truncate page cache pages from target inode range */ 2810out:
2597 truncate_inode_pages_range(&inode->i_data, destoff, 2811 mnt_drop_write_file(file);
2598 PAGE_CACHE_ALIGN(destoff + len) - 1); 2812 return ret;
2813}
2599 2814
2600 /* do any pending delalloc/csum calc on src, one way or 2815/**
2601 another, and lock file content */ 2816 * btrfs_clone() - clone a range from inode file to another
2602 while (1) { 2817 *
2603 struct btrfs_ordered_extent *ordered; 2818 * @src: Inode to clone from
2604 lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 2819 * @inode: Inode to clone to
2605 ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1); 2820 * @off: Offset within source to start clone from
2606 if (!ordered && 2821 * @olen: Original length, passed by user, of range to clone
2607 !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1, 2822 * @olen_aligned: Block-aligned value of olen, extent_same uses
2608 EXTENT_DELALLOC, 0, NULL)) 2823 * identical values here
2609 break; 2824 * @destoff: Offset within @inode to start clone
2610 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 2825 */
2611 if (ordered) 2826static int btrfs_clone(struct inode *src, struct inode *inode,
2612 btrfs_put_ordered_extent(ordered); 2827 u64 off, u64 olen, u64 olen_aligned, u64 destoff)
2613 btrfs_wait_ordered_range(src, off, len); 2828{
2829 struct btrfs_root *root = BTRFS_I(inode)->root;
2830 struct btrfs_path *path = NULL;
2831 struct extent_buffer *leaf;
2832 struct btrfs_trans_handle *trans;
2833 char *buf = NULL;
2834 struct btrfs_key key;
2835 u32 nritems;
2836 int slot;
2837 int ret;
2838 u64 len = olen_aligned;
2839
2840 ret = -ENOMEM;
2841 buf = vmalloc(btrfs_level_size(root, 0));
2842 if (!buf)
2843 return ret;
2844
2845 path = btrfs_alloc_path();
2846 if (!path) {
2847 vfree(buf);
2848 return ret;
2614 } 2849 }
2615 2850
2851 path->reada = 2;
2616 /* clone data */ 2852 /* clone data */
2617 key.objectid = btrfs_ino(src); 2853 key.objectid = btrfs_ino(src);
2618 key.type = BTRFS_EXTENT_DATA_KEY; 2854 key.type = BTRFS_EXTENT_DATA_KEY;
@@ -2858,15 +3094,132 @@ next:
2858 key.offset++; 3094 key.offset++;
2859 } 3095 }
2860 ret = 0; 3096 ret = 0;
3097
2861out: 3098out:
2862 btrfs_release_path(path); 3099 btrfs_release_path(path);
3100 btrfs_free_path(path);
3101 vfree(buf);
3102 return ret;
3103}
3104
3105static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3106 u64 off, u64 olen, u64 destoff)
3107{
3108 struct inode *inode = fdentry(file)->d_inode;
3109 struct btrfs_root *root = BTRFS_I(inode)->root;
3110 struct fd src_file;
3111 struct inode *src;
3112 int ret;
3113 u64 len = olen;
3114 u64 bs = root->fs_info->sb->s_blocksize;
3115 int same_inode = 0;
3116
3117 /*
3118 * TODO:
3119 * - split compressed inline extents. annoying: we need to
3120 * decompress into destination's address_space (the file offset
3121 * may change, so source mapping won't do), then recompress (or
3122 * otherwise reinsert) a subrange.
3123 * - allow ranges within the same file to be cloned (provided
3124 * they don't overlap)?
3125 */
3126
3127 /* the destination must be opened for writing */
3128 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
3129 return -EINVAL;
3130
3131 if (btrfs_root_readonly(root))
3132 return -EROFS;
3133
3134 ret = mnt_want_write_file(file);
3135 if (ret)
3136 return ret;
3137
3138 src_file = fdget(srcfd);
3139 if (!src_file.file) {
3140 ret = -EBADF;
3141 goto out_drop_write;
3142 }
3143
3144 ret = -EXDEV;
3145 if (src_file.file->f_path.mnt != file->f_path.mnt)
3146 goto out_fput;
3147
3148 src = file_inode(src_file.file);
3149
3150 ret = -EINVAL;
3151 if (src == inode)
3152 same_inode = 1;
3153
3154 /* the src must be open for reading */
3155 if (!(src_file.file->f_mode & FMODE_READ))
3156 goto out_fput;
3157
3158 /* don't make the dst file partly checksummed */
3159 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
3160 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
3161 goto out_fput;
3162
3163 ret = -EISDIR;
3164 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
3165 goto out_fput;
3166
3167 ret = -EXDEV;
3168 if (src->i_sb != inode->i_sb)
3169 goto out_fput;
3170
3171 if (!same_inode) {
3172 if (inode < src) {
3173 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
3174 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
3175 } else {
3176 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
3177 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
3178 }
3179 } else {
3180 mutex_lock(&src->i_mutex);
3181 }
3182
3183 /* determine range to clone */
3184 ret = -EINVAL;
3185 if (off + len > src->i_size || off + len < off)
3186 goto out_unlock;
3187 if (len == 0)
3188 olen = len = src->i_size - off;
3189 /* if we extend to eof, continue to block boundary */
3190 if (off + len == src->i_size)
3191 len = ALIGN(src->i_size, bs) - off;
3192
3193 /* verify the end result is block aligned */
3194 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
3195 !IS_ALIGNED(destoff, bs))
3196 goto out_unlock;
3197
3198 /* verify if ranges are overlapped within the same file */
3199 if (same_inode) {
3200 if (destoff + len > off && destoff < off + len)
3201 goto out_unlock;
3202 }
3203
3204 if (destoff > inode->i_size) {
3205 ret = btrfs_cont_expand(inode, inode->i_size, destoff);
3206 if (ret)
3207 goto out_unlock;
3208 }
3209
3210 /* truncate page cache pages from target inode range */
3211 truncate_inode_pages_range(&inode->i_data, destoff,
3212 PAGE_CACHE_ALIGN(destoff + len) - 1);
3213
3214 lock_extent_range(src, off, len);
3215
3216 ret = btrfs_clone(src, inode, off, olen, len, destoff);
3217
2863 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 3218 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
2864out_unlock: 3219out_unlock:
2865 mutex_unlock(&src->i_mutex); 3220 mutex_unlock(&src->i_mutex);
2866 if (!same_inode) 3221 if (!same_inode)
2867 mutex_unlock(&inode->i_mutex); 3222 mutex_unlock(&inode->i_mutex);
2868 vfree(buf);
2869 btrfs_free_path(path);
2870out_fput: 3223out_fput:
2871 fdput(src_file); 3224 fdput(src_file);
2872out_drop_write: 3225out_drop_write:
@@ -3312,11 +3665,13 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
3312 3665
3313 switch (p->cmd) { 3666 switch (p->cmd) {
3314 case BTRFS_IOCTL_DEV_REPLACE_CMD_START: 3667 case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
3668 if (root->fs_info->sb->s_flags & MS_RDONLY)
3669 return -EROFS;
3670
3315 if (atomic_xchg( 3671 if (atomic_xchg(
3316 &root->fs_info->mutually_exclusive_operation_running, 3672 &root->fs_info->mutually_exclusive_operation_running,
3317 1)) { 3673 1)) {
3318 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 3674 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3319 ret = -EINPROGRESS;
3320 } else { 3675 } else {
3321 ret = btrfs_dev_replace_start(root, p); 3676 ret = btrfs_dev_replace_start(root, p);
3322 atomic_set( 3677 atomic_set(
@@ -3560,8 +3915,7 @@ again:
3560 } else { 3915 } else {
3561 /* this is (1) */ 3916 /* this is (1) */
3562 mutex_unlock(&fs_info->balance_mutex); 3917 mutex_unlock(&fs_info->balance_mutex);
3563 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 3918 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
3564 ret = -EINVAL;
3565 goto out; 3919 goto out;
3566 } 3920 }
3567 3921
@@ -3967,6 +4321,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
3967 struct btrfs_trans_handle *trans; 4321 struct btrfs_trans_handle *trans;
3968 struct timespec ct = CURRENT_TIME; 4322 struct timespec ct = CURRENT_TIME;
3969 int ret = 0; 4323 int ret = 0;
4324 int received_uuid_changed;
3970 4325
3971 ret = mnt_want_write_file(file); 4326 ret = mnt_want_write_file(file);
3972 if (ret < 0) 4327 if (ret < 0)
@@ -3996,7 +4351,11 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
3996 goto out; 4351 goto out;
3997 } 4352 }
3998 4353
3999 trans = btrfs_start_transaction(root, 1); 4354 /*
4355 * 1 - root item
4356 * 2 - uuid items (received uuid + subvol uuid)
4357 */
4358 trans = btrfs_start_transaction(root, 3);
4000 if (IS_ERR(trans)) { 4359 if (IS_ERR(trans)) {
4001 ret = PTR_ERR(trans); 4360 ret = PTR_ERR(trans);
4002 trans = NULL; 4361 trans = NULL;
@@ -4007,24 +4366,42 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
4007 sa->rtime.sec = ct.tv_sec; 4366 sa->rtime.sec = ct.tv_sec;
4008 sa->rtime.nsec = ct.tv_nsec; 4367 sa->rtime.nsec = ct.tv_nsec;
4009 4368
4369 received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
4370 BTRFS_UUID_SIZE);
4371 if (received_uuid_changed &&
4372 !btrfs_is_empty_uuid(root_item->received_uuid))
4373 btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
4374 root_item->received_uuid,
4375 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4376 root->root_key.objectid);
4010 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); 4377 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
4011 btrfs_set_root_stransid(root_item, sa->stransid); 4378 btrfs_set_root_stransid(root_item, sa->stransid);
4012 btrfs_set_root_rtransid(root_item, sa->rtransid); 4379 btrfs_set_root_rtransid(root_item, sa->rtransid);
4013 root_item->stime.sec = cpu_to_le64(sa->stime.sec); 4380 btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec);
4014 root_item->stime.nsec = cpu_to_le32(sa->stime.nsec); 4381 btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec);
4015 root_item->rtime.sec = cpu_to_le64(sa->rtime.sec); 4382 btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec);
4016 root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec); 4383 btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec);
4017 4384
4018 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4385 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4019 &root->root_key, &root->root_item); 4386 &root->root_key, &root->root_item);
4020 if (ret < 0) { 4387 if (ret < 0) {
4021 btrfs_end_transaction(trans, root); 4388 btrfs_end_transaction(trans, root);
4022 trans = NULL;
4023 goto out; 4389 goto out;
4024 } else { 4390 }
4025 ret = btrfs_commit_transaction(trans, root); 4391 if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
4026 if (ret < 0) 4392 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
4393 sa->uuid,
4394 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4395 root->root_key.objectid);
4396 if (ret < 0 && ret != -EEXIST) {
4397 btrfs_abort_transaction(trans, root, ret);
4027 goto out; 4398 goto out;
4399 }
4400 }
4401 ret = btrfs_commit_transaction(trans, root);
4402 if (ret < 0) {
4403 btrfs_abort_transaction(trans, root, ret);
4404 goto out;
4028 } 4405 }
4029 4406
4030 ret = copy_to_user(arg, sa, sizeof(*sa)); 4407 ret = copy_to_user(arg, sa, sizeof(*sa));
@@ -4041,18 +4418,22 @@ out:
4041static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) 4418static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
4042{ 4419{
4043 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4420 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
4044 const char *label = root->fs_info->super_copy->label; 4421 size_t len;
4045 size_t len = strnlen(label, BTRFS_LABEL_SIZE);
4046 int ret; 4422 int ret;
4423 char label[BTRFS_LABEL_SIZE];
4424
4425 spin_lock(&root->fs_info->super_lock);
4426 memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE);
4427 spin_unlock(&root->fs_info->super_lock);
4428
4429 len = strnlen(label, BTRFS_LABEL_SIZE);
4047 4430
4048 if (len == BTRFS_LABEL_SIZE) { 4431 if (len == BTRFS_LABEL_SIZE) {
4049 pr_warn("btrfs: label is too long, return the first %zu bytes\n", 4432 pr_warn("btrfs: label is too long, return the first %zu bytes\n",
4050 --len); 4433 --len);
4051 } 4434 }
4052 4435
4053 mutex_lock(&root->fs_info->volume_mutex);
4054 ret = copy_to_user(arg, label, len); 4436 ret = copy_to_user(arg, label, len);
4055 mutex_unlock(&root->fs_info->volume_mutex);
4056 4437
4057 return ret ? -EFAULT : 0; 4438 return ret ? -EFAULT : 0;
4058} 4439}
@@ -4081,18 +4462,18 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
4081 if (ret) 4462 if (ret)
4082 return ret; 4463 return ret;
4083 4464
4084 mutex_lock(&root->fs_info->volume_mutex);
4085 trans = btrfs_start_transaction(root, 0); 4465 trans = btrfs_start_transaction(root, 0);
4086 if (IS_ERR(trans)) { 4466 if (IS_ERR(trans)) {
4087 ret = PTR_ERR(trans); 4467 ret = PTR_ERR(trans);
4088 goto out_unlock; 4468 goto out_unlock;
4089 } 4469 }
4090 4470
4471 spin_lock(&root->fs_info->super_lock);
4091 strcpy(super_block->label, label); 4472 strcpy(super_block->label, label);
4473 spin_unlock(&root->fs_info->super_lock);
4092 ret = btrfs_end_transaction(trans, root); 4474 ret = btrfs_end_transaction(trans, root);
4093 4475
4094out_unlock: 4476out_unlock:
4095 mutex_unlock(&root->fs_info->volume_mutex);
4096 mnt_drop_write_file(file); 4477 mnt_drop_write_file(file);
4097 return ret; 4478 return ret;
4098} 4479}
@@ -4207,6 +4588,8 @@ long btrfs_ioctl(struct file *file, unsigned int
4207 return btrfs_ioctl_get_fslabel(file, argp); 4588 return btrfs_ioctl_get_fslabel(file, argp);
4208 case BTRFS_IOC_SET_FSLABEL: 4589 case BTRFS_IOC_SET_FSLABEL:
4209 return btrfs_ioctl_set_fslabel(file, argp); 4590 return btrfs_ioctl_set_fslabel(file, argp);
4591 case BTRFS_IOC_FILE_EXTENT_SAME:
4592 return btrfs_ioctl_file_extent_same(file, argp);
4210 } 4593 }
4211 4594
4212 return -ENOTTY; 4595 return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index f93151a98886..b6a6f07c5ce2 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -207,8 +207,10 @@ static int lzo_compress_pages(struct list_head *ws,
207 } 207 }
208 208
209 /* we're making it bigger, give up */ 209 /* we're making it bigger, give up */
210 if (tot_in > 8192 && tot_in < tot_out) 210 if (tot_in > 8192 && tot_in < tot_out) {
211 ret = -1;
211 goto out; 212 goto out;
213 }
212 214
213 /* we're all done */ 215 /* we're all done */
214 if (tot_in >= len) 216 if (tot_in >= len)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 81369827e514..966b413a33b8 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -67,7 +67,7 @@ static void ordered_data_tree_panic(struct inode *inode, int errno,
67{ 67{
68 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 68 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
69 btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset " 69 btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
70 "%llu\n", (unsigned long long)offset); 70 "%llu\n", offset);
71} 71}
72 72
73/* 73/*
@@ -205,6 +205,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
205 entry->bytes_left = len; 205 entry->bytes_left = len;
206 entry->inode = igrab(inode); 206 entry->inode = igrab(inode);
207 entry->compress_type = compress_type; 207 entry->compress_type = compress_type;
208 entry->truncated_len = (u64)-1;
208 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 209 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
209 set_bit(type, &entry->flags); 210 set_bit(type, &entry->flags);
210 211
@@ -336,14 +337,12 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
336 *file_offset = dec_end; 337 *file_offset = dec_end;
337 if (dec_start > dec_end) { 338 if (dec_start > dec_end) {
338 printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", 339 printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
339 (unsigned long long)dec_start, 340 dec_start, dec_end);
340 (unsigned long long)dec_end);
341 } 341 }
342 to_dec = dec_end - dec_start; 342 to_dec = dec_end - dec_start;
343 if (to_dec > entry->bytes_left) { 343 if (to_dec > entry->bytes_left) {
344 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", 344 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
345 (unsigned long long)entry->bytes_left, 345 entry->bytes_left, to_dec);
346 (unsigned long long)to_dec);
347 } 346 }
348 entry->bytes_left -= to_dec; 347 entry->bytes_left -= to_dec;
349 if (!uptodate) 348 if (!uptodate)
@@ -403,8 +402,7 @@ have_entry:
403 402
404 if (io_size > entry->bytes_left) { 403 if (io_size > entry->bytes_left) {
405 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", 404 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
406 (unsigned long long)entry->bytes_left, 405 entry->bytes_left, io_size);
407 (unsigned long long)io_size);
408 } 406 }
409 entry->bytes_left -= io_size; 407 entry->bytes_left -= io_size;
410 if (!uptodate) 408 if (!uptodate)
@@ -671,7 +669,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
671 INIT_LIST_HEAD(&splice); 669 INIT_LIST_HEAD(&splice);
672 INIT_LIST_HEAD(&works); 670 INIT_LIST_HEAD(&works);
673 671
674 mutex_lock(&root->fs_info->ordered_operations_mutex); 672 mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
675 spin_lock(&root->fs_info->ordered_root_lock); 673 spin_lock(&root->fs_info->ordered_root_lock);
676 list_splice_init(&cur_trans->ordered_operations, &splice); 674 list_splice_init(&cur_trans->ordered_operations, &splice);
677 while (!list_empty(&splice)) { 675 while (!list_empty(&splice)) {
@@ -718,7 +716,7 @@ out:
718 list_del_init(&work->list); 716 list_del_init(&work->list);
719 btrfs_wait_and_free_delalloc_work(work); 717 btrfs_wait_and_free_delalloc_work(work);
720 } 718 }
721 mutex_unlock(&root->fs_info->ordered_operations_mutex); 719 mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
722 return ret; 720 return ret;
723} 721}
724 722
@@ -923,12 +921,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
923 struct btrfs_ordered_extent *test; 921 struct btrfs_ordered_extent *test;
924 int ret = 1; 922 int ret = 1;
925 923
926 if (ordered) 924 spin_lock_irq(&tree->lock);
925 if (ordered) {
927 offset = entry_end(ordered); 926 offset = entry_end(ordered);
928 else 927 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags))
928 offset = min(offset,
929 ordered->file_offset +
930 ordered->truncated_len);
931 } else {
929 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); 932 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
930 933 }
931 spin_lock_irq(&tree->lock);
932 disk_i_size = BTRFS_I(inode)->disk_i_size; 934 disk_i_size = BTRFS_I(inode)->disk_i_size;
933 935
934 /* truncate file */ 936 /* truncate file */
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 68844d59ee6f..d9a5aa097b4f 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -69,6 +69,7 @@ struct btrfs_ordered_sum {
69 * the isize. */ 69 * the isize. */
70#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered 70#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
71 ordered extent */ 71 ordered extent */
72#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
72 73
73struct btrfs_ordered_extent { 74struct btrfs_ordered_extent {
74 /* logical offset in the file */ 75 /* logical offset in the file */
@@ -96,6 +97,12 @@ struct btrfs_ordered_extent {
96 */ 97 */
97 u64 outstanding_isize; 98 u64 outstanding_isize;
98 99
100 /*
101 * If we get truncated we need to adjust the file extent we enter for
102 * this ordered extent so that we do not expose stale data.
103 */
104 u64 truncated_len;
105
99 /* flags (described above) */ 106 /* flags (described above) */
100 unsigned long flags; 107 unsigned long flags;
101 108
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index dc0024f17c1f..0088bedc8631 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -26,14 +26,12 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
26 int i; 26 int i;
27 printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu " 27 printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu "
28 "num_stripes %d\n", 28 "num_stripes %d\n",
29 (unsigned long long)btrfs_chunk_length(eb, chunk), 29 btrfs_chunk_length(eb, chunk), btrfs_chunk_owner(eb, chunk),
30 (unsigned long long)btrfs_chunk_owner(eb, chunk), 30 btrfs_chunk_type(eb, chunk), num_stripes);
31 (unsigned long long)btrfs_chunk_type(eb, chunk),
32 num_stripes);
33 for (i = 0 ; i < num_stripes ; i++) { 31 for (i = 0 ; i < num_stripes ; i++) {
34 printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i, 32 printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i,
35 (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i), 33 btrfs_stripe_devid_nr(eb, chunk, i),
36 (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i)); 34 btrfs_stripe_offset_nr(eb, chunk, i));
37 } 35 }
38} 36}
39static void print_dev_item(struct extent_buffer *eb, 37static void print_dev_item(struct extent_buffer *eb,
@@ -41,18 +39,18 @@ static void print_dev_item(struct extent_buffer *eb,
41{ 39{
42 printk(KERN_INFO "\t\tdev item devid %llu " 40 printk(KERN_INFO "\t\tdev item devid %llu "
43 "total_bytes %llu bytes used %llu\n", 41 "total_bytes %llu bytes used %llu\n",
44 (unsigned long long)btrfs_device_id(eb, dev_item), 42 btrfs_device_id(eb, dev_item),
45 (unsigned long long)btrfs_device_total_bytes(eb, dev_item), 43 btrfs_device_total_bytes(eb, dev_item),
46 (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); 44 btrfs_device_bytes_used(eb, dev_item));
47} 45}
48static void print_extent_data_ref(struct extent_buffer *eb, 46static void print_extent_data_ref(struct extent_buffer *eb,
49 struct btrfs_extent_data_ref *ref) 47 struct btrfs_extent_data_ref *ref)
50{ 48{
51 printk(KERN_INFO "\t\textent data backref root %llu " 49 printk(KERN_INFO "\t\textent data backref root %llu "
52 "objectid %llu offset %llu count %u\n", 50 "objectid %llu offset %llu count %u\n",
53 (unsigned long long)btrfs_extent_data_ref_root(eb, ref), 51 btrfs_extent_data_ref_root(eb, ref),
54 (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref), 52 btrfs_extent_data_ref_objectid(eb, ref),
55 (unsigned long long)btrfs_extent_data_ref_offset(eb, ref), 53 btrfs_extent_data_ref_offset(eb, ref),
56 btrfs_extent_data_ref_count(eb, ref)); 54 btrfs_extent_data_ref_count(eb, ref));
57} 55}
58 56
@@ -87,19 +85,17 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
87 flags = btrfs_extent_flags(eb, ei); 85 flags = btrfs_extent_flags(eb, ei);
88 86
89 printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n", 87 printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
90 (unsigned long long)btrfs_extent_refs(eb, ei), 88 btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei),
91 (unsigned long long)btrfs_extent_generation(eb, ei), 89 flags);
92 (unsigned long long)flags);
93 90
94 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 91 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
95 struct btrfs_tree_block_info *info; 92 struct btrfs_tree_block_info *info;
96 info = (struct btrfs_tree_block_info *)(ei + 1); 93 info = (struct btrfs_tree_block_info *)(ei + 1);
97 btrfs_tree_block_key(eb, info, &key); 94 btrfs_tree_block_key(eb, info, &key);
98 printk(KERN_INFO "\t\ttree block key (%llu %x %llu) " 95 printk(KERN_INFO "\t\ttree block key (%llu %u %llu) "
99 "level %d\n", 96 "level %d\n",
100 (unsigned long long)btrfs_disk_key_objectid(&key), 97 btrfs_disk_key_objectid(&key), key.type,
101 key.type, 98 btrfs_disk_key_offset(&key),
102 (unsigned long long)btrfs_disk_key_offset(&key),
103 btrfs_tree_block_level(eb, info)); 99 btrfs_tree_block_level(eb, info));
104 iref = (struct btrfs_extent_inline_ref *)(info + 1); 100 iref = (struct btrfs_extent_inline_ref *)(info + 1);
105 } else { 101 } else {
@@ -115,11 +111,11 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
115 switch (type) { 111 switch (type) {
116 case BTRFS_TREE_BLOCK_REF_KEY: 112 case BTRFS_TREE_BLOCK_REF_KEY:
117 printk(KERN_INFO "\t\ttree block backref " 113 printk(KERN_INFO "\t\ttree block backref "
118 "root %llu\n", (unsigned long long)offset); 114 "root %llu\n", offset);
119 break; 115 break;
120 case BTRFS_SHARED_BLOCK_REF_KEY: 116 case BTRFS_SHARED_BLOCK_REF_KEY:
121 printk(KERN_INFO "\t\tshared block backref " 117 printk(KERN_INFO "\t\tshared block backref "
122 "parent %llu\n", (unsigned long long)offset); 118 "parent %llu\n", offset);
123 break; 119 break;
124 case BTRFS_EXTENT_DATA_REF_KEY: 120 case BTRFS_EXTENT_DATA_REF_KEY:
125 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 121 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -129,8 +125,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
129 sref = (struct btrfs_shared_data_ref *)(iref + 1); 125 sref = (struct btrfs_shared_data_ref *)(iref + 1);
130 printk(KERN_INFO "\t\tshared data backref " 126 printk(KERN_INFO "\t\tshared data backref "
131 "parent %llu count %u\n", 127 "parent %llu count %u\n",
132 (unsigned long long)offset, 128 offset, btrfs_shared_data_ref_count(eb, sref));
133 btrfs_shared_data_ref_count(eb, sref));
134 break; 129 break;
135 default: 130 default:
136 BUG(); 131 BUG();
@@ -148,13 +143,32 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
148 ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0); 143 ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
149 printk("\t\textent back ref root %llu gen %llu " 144 printk("\t\textent back ref root %llu gen %llu "
150 "owner %llu num_refs %lu\n", 145 "owner %llu num_refs %lu\n",
151 (unsigned long long)btrfs_ref_root_v0(eb, ref0), 146 btrfs_ref_root_v0(eb, ref0),
152 (unsigned long long)btrfs_ref_generation_v0(eb, ref0), 147 btrfs_ref_generation_v0(eb, ref0),
153 (unsigned long long)btrfs_ref_objectid_v0(eb, ref0), 148 btrfs_ref_objectid_v0(eb, ref0),
154 (unsigned long)btrfs_ref_count_v0(eb, ref0)); 149 (unsigned long)btrfs_ref_count_v0(eb, ref0));
155} 150}
156#endif 151#endif
157 152
153static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
154 u32 item_size)
155{
156 if (!IS_ALIGNED(item_size, sizeof(u64))) {
157 pr_warn("btrfs: uuid item with illegal size %lu!\n",
158 (unsigned long)item_size);
159 return;
160 }
161 while (item_size) {
162 __le64 subvol_id;
163
164 read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id));
165 printk(KERN_INFO "\t\tsubvol_id %llu\n",
166 (unsigned long long)le64_to_cpu(subvol_id));
167 item_size -= sizeof(u64);
168 offset += sizeof(u64);
169 }
170}
171
158void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) 172void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
159{ 173{
160 int i; 174 int i;
@@ -177,39 +191,34 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
177 nr = btrfs_header_nritems(l); 191 nr = btrfs_header_nritems(l);
178 192
179 btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d", 193 btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
180 (unsigned long long)btrfs_header_bytenr(l), nr, 194 btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l));
181 btrfs_leaf_free_space(root, l));
182 for (i = 0 ; i < nr ; i++) { 195 for (i = 0 ; i < nr ; i++) {
183 item = btrfs_item_nr(l, i); 196 item = btrfs_item_nr(l, i);
184 btrfs_item_key_to_cpu(l, &key, i); 197 btrfs_item_key_to_cpu(l, &key, i);
185 type = btrfs_key_type(&key); 198 type = btrfs_key_type(&key);
186 printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d " 199 printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d "
187 "itemsize %d\n", 200 "itemsize %d\n",
188 i, 201 i, key.objectid, type, key.offset,
189 (unsigned long long)key.objectid, type,
190 (unsigned long long)key.offset,
191 btrfs_item_offset(l, item), btrfs_item_size(l, item)); 202 btrfs_item_offset(l, item), btrfs_item_size(l, item));
192 switch (type) { 203 switch (type) {
193 case BTRFS_INODE_ITEM_KEY: 204 case BTRFS_INODE_ITEM_KEY:
194 ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); 205 ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
195 printk(KERN_INFO "\t\tinode generation %llu size %llu " 206 printk(KERN_INFO "\t\tinode generation %llu size %llu "
196 "mode %o\n", 207 "mode %o\n",
197 (unsigned long long)
198 btrfs_inode_generation(l, ii), 208 btrfs_inode_generation(l, ii),
199 (unsigned long long)btrfs_inode_size(l, ii), 209 btrfs_inode_size(l, ii),
200 btrfs_inode_mode(l, ii)); 210 btrfs_inode_mode(l, ii));
201 break; 211 break;
202 case BTRFS_DIR_ITEM_KEY: 212 case BTRFS_DIR_ITEM_KEY:
203 di = btrfs_item_ptr(l, i, struct btrfs_dir_item); 213 di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
204 btrfs_dir_item_key_to_cpu(l, di, &found_key); 214 btrfs_dir_item_key_to_cpu(l, di, &found_key);
205 printk(KERN_INFO "\t\tdir oid %llu type %u\n", 215 printk(KERN_INFO "\t\tdir oid %llu type %u\n",
206 (unsigned long long)found_key.objectid, 216 found_key.objectid,
207 btrfs_dir_type(l, di)); 217 btrfs_dir_type(l, di));
208 break; 218 break;
209 case BTRFS_ROOT_ITEM_KEY: 219 case BTRFS_ROOT_ITEM_KEY:
210 ri = btrfs_item_ptr(l, i, struct btrfs_root_item); 220 ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
211 printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n", 221 printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n",
212 (unsigned long long)
213 btrfs_disk_root_bytenr(l, ri), 222 btrfs_disk_root_bytenr(l, ri),
214 btrfs_disk_root_refs(l, ri)); 223 btrfs_disk_root_refs(l, ri));
215 break; 224 break;
@@ -245,17 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
245 } 254 }
246 printk(KERN_INFO "\t\textent data disk bytenr %llu " 255 printk(KERN_INFO "\t\textent data disk bytenr %llu "
247 "nr %llu\n", 256 "nr %llu\n",
248 (unsigned long long)
249 btrfs_file_extent_disk_bytenr(l, fi), 257 btrfs_file_extent_disk_bytenr(l, fi),
250 (unsigned long long)
251 btrfs_file_extent_disk_num_bytes(l, fi)); 258 btrfs_file_extent_disk_num_bytes(l, fi));
252 printk(KERN_INFO "\t\textent data offset %llu " 259 printk(KERN_INFO "\t\textent data offset %llu "
253 "nr %llu ram %llu\n", 260 "nr %llu ram %llu\n",
254 (unsigned long long)
255 btrfs_file_extent_offset(l, fi), 261 btrfs_file_extent_offset(l, fi),
256 (unsigned long long)
257 btrfs_file_extent_num_bytes(l, fi), 262 btrfs_file_extent_num_bytes(l, fi),
258 (unsigned long long)
259 btrfs_file_extent_ram_bytes(l, fi)); 263 btrfs_file_extent_ram_bytes(l, fi));
260 break; 264 break;
261 case BTRFS_EXTENT_REF_V0_KEY: 265 case BTRFS_EXTENT_REF_V0_KEY:
@@ -269,7 +273,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
269 bi = btrfs_item_ptr(l, i, 273 bi = btrfs_item_ptr(l, i,
270 struct btrfs_block_group_item); 274 struct btrfs_block_group_item);
271 printk(KERN_INFO "\t\tblock group used %llu\n", 275 printk(KERN_INFO "\t\tblock group used %llu\n",
272 (unsigned long long)
273 btrfs_disk_block_group_used(l, bi)); 276 btrfs_disk_block_group_used(l, bi));
274 break; 277 break;
275 case BTRFS_CHUNK_ITEM_KEY: 278 case BTRFS_CHUNK_ITEM_KEY:
@@ -286,13 +289,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
286 printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n" 289 printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n"
287 "\t\tchunk objectid %llu chunk offset %llu " 290 "\t\tchunk objectid %llu chunk offset %llu "
288 "length %llu\n", 291 "length %llu\n",
289 (unsigned long long)
290 btrfs_dev_extent_chunk_tree(l, dev_extent), 292 btrfs_dev_extent_chunk_tree(l, dev_extent),
291 (unsigned long long)
292 btrfs_dev_extent_chunk_objectid(l, dev_extent), 293 btrfs_dev_extent_chunk_objectid(l, dev_extent),
293 (unsigned long long)
294 btrfs_dev_extent_chunk_offset(l, dev_extent), 294 btrfs_dev_extent_chunk_offset(l, dev_extent),
295 (unsigned long long)
296 btrfs_dev_extent_length(l, dev_extent)); 295 btrfs_dev_extent_length(l, dev_extent));
297 break; 296 break;
298 case BTRFS_DEV_STATS_KEY: 297 case BTRFS_DEV_STATS_KEY:
@@ -301,6 +300,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
301 case BTRFS_DEV_REPLACE_KEY: 300 case BTRFS_DEV_REPLACE_KEY:
302 printk(KERN_INFO "\t\tdev replace\n"); 301 printk(KERN_INFO "\t\tdev replace\n");
303 break; 302 break;
303 case BTRFS_UUID_KEY_SUBVOL:
304 case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
305 print_uuid_item(l, btrfs_item_ptr_offset(l, i),
306 btrfs_item_size_nr(l, i));
307 break;
304 }; 308 };
305 } 309 }
306} 310}
@@ -320,16 +324,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
320 return; 324 return;
321 } 325 }
322 btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u", 326 btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u",
323 (unsigned long long)btrfs_header_bytenr(c), 327 btrfs_header_bytenr(c), level, nr,
324 level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); 328 (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
325 for (i = 0; i < nr; i++) { 329 for (i = 0; i < nr; i++) {
326 btrfs_node_key_to_cpu(c, &key, i); 330 btrfs_node_key_to_cpu(c, &key, i);
327 printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", 331 printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
328 i, 332 i, key.objectid, key.type, key.offset,
329 (unsigned long long)key.objectid, 333 btrfs_node_blockptr(c, i));
330 key.type,
331 (unsigned long long)key.offset,
332 (unsigned long long)btrfs_node_blockptr(c, i));
333 } 334 }
334 for (i = 0; i < nr; i++) { 335 for (i = 0; i < nr; i++) {
335 struct extent_buffer *next = read_tree_block(root, 336 struct extent_buffer *next = read_tree_block(root,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1280eff8af56..4e6ef490619e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -157,18 +157,11 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
157 return qgroup; 157 return qgroup;
158} 158}
159 159
160/* must be called with qgroup_lock held */ 160static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
161static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
162{ 161{
163 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
164 struct btrfs_qgroup_list *list; 162 struct btrfs_qgroup_list *list;
165 163
166 if (!qgroup)
167 return -ENOENT;
168
169 rb_erase(&qgroup->node, &fs_info->qgroup_tree);
170 list_del(&qgroup->dirty); 164 list_del(&qgroup->dirty);
171
172 while (!list_empty(&qgroup->groups)) { 165 while (!list_empty(&qgroup->groups)) {
173 list = list_first_entry(&qgroup->groups, 166 list = list_first_entry(&qgroup->groups,
174 struct btrfs_qgroup_list, next_group); 167 struct btrfs_qgroup_list, next_group);
@@ -185,7 +178,18 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
185 kfree(list); 178 kfree(list);
186 } 179 }
187 kfree(qgroup); 180 kfree(qgroup);
181}
188 182
183/* must be called with qgroup_lock held */
184static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
185{
186 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
187
188 if (!qgroup)
189 return -ENOENT;
190
191 rb_erase(&qgroup->node, &fs_info->qgroup_tree);
192 __del_qgroup_rb(qgroup);
189 return 0; 193 return 0;
190} 194}
191 195
@@ -394,8 +398,7 @@ next1:
394 if (ret == -ENOENT) { 398 if (ret == -ENOENT) {
395 printk(KERN_WARNING 399 printk(KERN_WARNING
396 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", 400 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
397 (unsigned long long)found_key.objectid, 401 found_key.objectid, found_key.offset);
398 (unsigned long long)found_key.offset);
399 ret = 0; /* ignore the error */ 402 ret = 0; /* ignore the error */
400 } 403 }
401 if (ret) 404 if (ret)
@@ -428,39 +431,28 @@ out:
428} 431}
429 432
430/* 433/*
431 * This is only called from close_ctree() or open_ctree(), both in single- 434 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
432 * treaded paths. Clean up the in-memory structures. No locking needed. 435 * first two are in single-threaded paths.And for the third one, we have set
436 * quota_root to be null with qgroup_lock held before, so it is safe to clean
437 * up the in-memory structures without qgroup_lock held.
433 */ 438 */
434void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 439void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
435{ 440{
436 struct rb_node *n; 441 struct rb_node *n;
437 struct btrfs_qgroup *qgroup; 442 struct btrfs_qgroup *qgroup;
438 struct btrfs_qgroup_list *list;
439 443
440 while ((n = rb_first(&fs_info->qgroup_tree))) { 444 while ((n = rb_first(&fs_info->qgroup_tree))) {
441 qgroup = rb_entry(n, struct btrfs_qgroup, node); 445 qgroup = rb_entry(n, struct btrfs_qgroup, node);
442 rb_erase(n, &fs_info->qgroup_tree); 446 rb_erase(n, &fs_info->qgroup_tree);
443 447 __del_qgroup_rb(qgroup);
444 while (!list_empty(&qgroup->groups)) {
445 list = list_first_entry(&qgroup->groups,
446 struct btrfs_qgroup_list,
447 next_group);
448 list_del(&list->next_group);
449 list_del(&list->next_member);
450 kfree(list);
451 }
452
453 while (!list_empty(&qgroup->members)) {
454 list = list_first_entry(&qgroup->members,
455 struct btrfs_qgroup_list,
456 next_member);
457 list_del(&list->next_group);
458 list_del(&list->next_member);
459 kfree(list);
460 }
461 kfree(qgroup);
462 } 448 }
449 /*
450 * we call btrfs_free_qgroup_config() when umounting
451 * filesystem and disabling quota, so we set qgroup_ulit
452 * to be null here to avoid double free.
453 */
463 ulist_free(fs_info->qgroup_ulist); 454 ulist_free(fs_info->qgroup_ulist);
455 fs_info->qgroup_ulist = NULL;
464} 456}
465 457
466static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 458static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
@@ -946,13 +938,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
946 fs_info->pending_quota_state = 0; 938 fs_info->pending_quota_state = 0;
947 quota_root = fs_info->quota_root; 939 quota_root = fs_info->quota_root;
948 fs_info->quota_root = NULL; 940 fs_info->quota_root = NULL;
949 btrfs_free_qgroup_config(fs_info);
950 spin_unlock(&fs_info->qgroup_lock); 941 spin_unlock(&fs_info->qgroup_lock);
951 942
952 if (!quota_root) { 943 btrfs_free_qgroup_config(fs_info);
953 ret = -EINVAL;
954 goto out;
955 }
956 944
957 ret = btrfs_clean_quota_tree(trans, quota_root); 945 ret = btrfs_clean_quota_tree(trans, quota_root);
958 if (ret) 946 if (ret)
@@ -1174,7 +1162,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1174 if (ret) { 1162 if (ret) {
1175 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1163 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1176 printk(KERN_INFO "unable to update quota limit for %llu\n", 1164 printk(KERN_INFO "unable to update quota limit for %llu\n",
1177 (unsigned long long)qgroupid); 1165 qgroupid);
1178 } 1166 }
1179 1167
1180 spin_lock(&fs_info->qgroup_lock); 1168 spin_lock(&fs_info->qgroup_lock);
@@ -1884,10 +1872,9 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1884 path, 1, 0); 1872 path, 1, 0);
1885 1873
1886 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", 1874 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1887 (unsigned long long)fs_info->qgroup_rescan_progress.objectid, 1875 fs_info->qgroup_rescan_progress.objectid,
1888 fs_info->qgroup_rescan_progress.type, 1876 fs_info->qgroup_rescan_progress.type,
1889 (unsigned long long)fs_info->qgroup_rescan_progress.offset, 1877 fs_info->qgroup_rescan_progress.offset, ret);
1890 ret);
1891 1878
1892 if (ret) { 1879 if (ret) {
1893 /* 1880 /*
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0525e1389f5b..d0ecfbd9cc9f 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1540,8 +1540,10 @@ static int full_stripe_write(struct btrfs_raid_bio *rbio)
1540 int ret; 1540 int ret;
1541 1541
1542 ret = alloc_rbio_parity_pages(rbio); 1542 ret = alloc_rbio_parity_pages(rbio);
1543 if (ret) 1543 if (ret) {
1544 __free_raid_bio(rbio);
1544 return ret; 1545 return ret;
1546 }
1545 1547
1546 ret = lock_stripe_add(rbio); 1548 ret = lock_stripe_add(rbio);
1547 if (ret == 0) 1549 if (ret == 0)
@@ -1687,11 +1689,8 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1687 struct blk_plug_cb *cb; 1689 struct blk_plug_cb *cb;
1688 1690
1689 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 1691 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
1690 if (IS_ERR(rbio)) { 1692 if (IS_ERR(rbio))
1691 kfree(raid_map);
1692 kfree(bbio);
1693 return PTR_ERR(rbio); 1693 return PTR_ERR(rbio);
1694 }
1695 bio_list_add(&rbio->bio_list, bio); 1694 bio_list_add(&rbio->bio_list, bio);
1696 rbio->bio_list_bytes = bio->bi_size; 1695 rbio->bio_list_bytes = bio->bi_size;
1697 1696
@@ -2041,9 +2040,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2041 int ret; 2040 int ret;
2042 2041
2043 rbio = alloc_rbio(root, bbio, raid_map, stripe_len); 2042 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
2044 if (IS_ERR(rbio)) { 2043 if (IS_ERR(rbio))
2045 return PTR_ERR(rbio); 2044 return PTR_ERR(rbio);
2046 }
2047 2045
2048 rbio->read_rebuild = 1; 2046 rbio->read_rebuild = 1;
2049 bio_list_add(&rbio->bio_list, bio); 2047 bio_list_add(&rbio->bio_list, bio);
@@ -2052,6 +2050,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2052 rbio->faila = find_logical_bio_stripe(rbio, bio); 2050 rbio->faila = find_logical_bio_stripe(rbio, bio);
2053 if (rbio->faila == -1) { 2051 if (rbio->faila == -1) {
2054 BUG(); 2052 BUG();
2053 kfree(raid_map);
2054 kfree(bbio);
2055 kfree(rbio); 2055 kfree(rbio);
2056 return -EIO; 2056 return -EIO;
2057 } 2057 }
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 12096496cc99..aacc2121e87c 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -335,7 +335,7 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
335 if (bnode->root) 335 if (bnode->root)
336 fs_info = bnode->root->fs_info; 336 fs_info = bnode->root->fs_info;
337 btrfs_panic(fs_info, errno, "Inconsistency in backref cache " 337 btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
338 "found at offset %llu\n", (unsigned long long)bytenr); 338 "found at offset %llu\n", bytenr);
339} 339}
340 340
341/* 341/*
@@ -641,6 +641,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
641 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 641 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
642 return 1; 642 return 1;
643 } 643 }
644 if (key.type == BTRFS_METADATA_ITEM_KEY &&
645 item_size <= sizeof(*ei)) {
646 WARN_ON(item_size < sizeof(*ei));
647 return 1;
648 }
644 649
645 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 650 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
646 bi = (struct btrfs_tree_block_info *)(ei + 1); 651 bi = (struct btrfs_tree_block_info *)(ei + 1);
@@ -691,6 +696,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
691 int cowonly; 696 int cowonly;
692 int ret; 697 int ret;
693 int err = 0; 698 int err = 0;
699 bool need_check = true;
694 700
695 path1 = btrfs_alloc_path(); 701 path1 = btrfs_alloc_path();
696 path2 = btrfs_alloc_path(); 702 path2 = btrfs_alloc_path();
@@ -914,6 +920,7 @@ again:
914 cur->bytenr); 920 cur->bytenr);
915 921
916 lower = cur; 922 lower = cur;
923 need_check = true;
917 for (; level < BTRFS_MAX_LEVEL; level++) { 924 for (; level < BTRFS_MAX_LEVEL; level++) {
918 if (!path2->nodes[level]) { 925 if (!path2->nodes[level]) {
919 BUG_ON(btrfs_root_bytenr(&root->root_item) != 926 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
@@ -957,14 +964,12 @@ again:
957 964
958 /* 965 /*
959 * add the block to pending list if we 966 * add the block to pending list if we
960 * need check its backrefs. only block 967 * need check its backrefs, we only do this once
961 * at 'cur->level + 1' is added to the 968 * while walking up a tree as we will catch
962 * tail of pending list. this guarantees 969 * anything else later on.
963 * we check backrefs from lower level
964 * blocks to upper level blocks.
965 */ 970 */
966 if (!upper->checked && 971 if (!upper->checked && need_check) {
967 level == cur->level + 1) { 972 need_check = false;
968 list_add_tail(&edge->list[UPPER], 973 list_add_tail(&edge->list[UPPER],
969 &list); 974 &list);
970 } else 975 } else
@@ -2314,8 +2319,13 @@ again:
2314 BUG_ON(root->reloc_root != reloc_root); 2319 BUG_ON(root->reloc_root != reloc_root);
2315 2320
2316 ret = merge_reloc_root(rc, root); 2321 ret = merge_reloc_root(rc, root);
2317 if (ret) 2322 if (ret) {
2323 __update_reloc_root(reloc_root, 1);
2324 free_extent_buffer(reloc_root->node);
2325 free_extent_buffer(reloc_root->commit_root);
2326 kfree(reloc_root);
2318 goto out; 2327 goto out;
2328 }
2319 } else { 2329 } else {
2320 list_del_init(&reloc_root->root_list); 2330 list_del_init(&reloc_root->root_list);
2321 } 2331 }
@@ -2344,9 +2354,6 @@ again:
2344 if (IS_ERR(root)) 2354 if (IS_ERR(root))
2345 continue; 2355 continue;
2346 2356
2347 if (btrfs_root_refs(&root->root_item) == 0)
2348 continue;
2349
2350 trans = btrfs_join_transaction(root); 2357 trans = btrfs_join_transaction(root);
2351 BUG_ON(IS_ERR(trans)); 2358 BUG_ON(IS_ERR(trans));
2352 2359
@@ -3628,7 +3635,7 @@ int add_data_references(struct reloc_control *rc,
3628 unsigned long ptr; 3635 unsigned long ptr;
3629 unsigned long end; 3636 unsigned long end;
3630 u32 blocksize = btrfs_level_size(rc->extent_root, 0); 3637 u32 blocksize = btrfs_level_size(rc->extent_root, 0);
3631 int ret; 3638 int ret = 0;
3632 int err = 0; 3639 int err = 0;
3633 3640
3634 eb = path->nodes[0]; 3641 eb = path->nodes[0];
@@ -3655,6 +3662,10 @@ int add_data_references(struct reloc_control *rc,
3655 } else { 3662 } else {
3656 BUG(); 3663 BUG();
3657 } 3664 }
3665 if (ret) {
3666 err = ret;
3667 goto out;
3668 }
3658 ptr += btrfs_extent_inline_ref_size(key.type); 3669 ptr += btrfs_extent_inline_ref_size(key.type);
3659 } 3670 }
3660 WARN_ON(ptr > end); 3671 WARN_ON(ptr > end);
@@ -3700,6 +3711,7 @@ int add_data_references(struct reloc_control *rc,
3700 } 3711 }
3701 path->slots[0]++; 3712 path->slots[0]++;
3702 } 3713 }
3714out:
3703 btrfs_release_path(path); 3715 btrfs_release_path(path);
3704 if (err) 3716 if (err)
3705 free_block_list(blocks); 3717 free_block_list(blocks);
@@ -4219,8 +4231,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4219 } 4231 }
4220 4232
4221 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 4233 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
4222 (unsigned long long)rc->block_group->key.objectid, 4234 rc->block_group->key.objectid, rc->block_group->flags);
4223 (unsigned long long)rc->block_group->flags);
4224 4235
4225 ret = btrfs_start_all_delalloc_inodes(fs_info, 0); 4236 ret = btrfs_start_all_delalloc_inodes(fs_info, 0);
4226 if (ret < 0) { 4237 if (ret < 0) {
@@ -4242,7 +4253,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4242 break; 4253 break;
4243 4254
4244 printk(KERN_INFO "btrfs: found %llu extents\n", 4255 printk(KERN_INFO "btrfs: found %llu extents\n",
4245 (unsigned long long)rc->extents_found); 4256 rc->extents_found);
4246 4257
4247 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4258 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4248 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); 4259 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ffb1036ef10d..0b1f4ef8db98 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -29,8 +29,8 @@
29 * generation numbers as then we know the root was once mounted with an older 29 * generation numbers as then we know the root was once mounted with an older
30 * kernel that was not aware of the root item structure change. 30 * kernel that was not aware of the root item structure change.
31 */ 31 */
32void btrfs_read_root_item(struct extent_buffer *eb, int slot, 32static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
33 struct btrfs_root_item *item) 33 struct btrfs_root_item *item)
34{ 34{
35 uuid_le uuid; 35 uuid_le uuid;
36 int len; 36 int len;
@@ -155,8 +155,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
155 if (ret != 0) { 155 if (ret != 0) {
156 btrfs_print_leaf(root, path->nodes[0]); 156 btrfs_print_leaf(root, path->nodes[0]);
157 printk(KERN_CRIT "unable to update root key %llu %u %llu\n", 157 printk(KERN_CRIT "unable to update root key %llu %u %llu\n",
158 (unsigned long long)key->objectid, key->type, 158 key->objectid, key->type, key->offset);
159 (unsigned long long)key->offset);
160 BUG_ON(1); 159 BUG_ON(1);
161 } 160 }
162 161
@@ -490,13 +489,13 @@ again:
490 */ 489 */
491void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item) 490void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
492{ 491{
493 u64 inode_flags = le64_to_cpu(root_item->inode.flags); 492 u64 inode_flags = btrfs_stack_inode_flags(&root_item->inode);
494 493
495 if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) { 494 if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
496 inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT; 495 inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
497 root_item->inode.flags = cpu_to_le64(inode_flags); 496 btrfs_set_stack_inode_flags(&root_item->inode, inode_flags);
498 root_item->flags = 0; 497 btrfs_set_root_flags(root_item, 0);
499 root_item->byte_limit = 0; 498 btrfs_set_root_limit(root_item, 0);
500 } 499 }
501} 500}
502 501
@@ -507,8 +506,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
507 struct timespec ct = CURRENT_TIME; 506 struct timespec ct = CURRENT_TIME;
508 507
509 spin_lock(&root->root_item_lock); 508 spin_lock(&root->root_item_lock);
510 item->ctransid = cpu_to_le64(trans->transid); 509 btrfs_set_root_ctransid(item, trans->transid);
511 item->ctime.sec = cpu_to_le64(ct.tv_sec); 510 btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
512 item->ctime.nsec = cpu_to_le32(ct.tv_nsec); 511 btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec);
513 spin_unlock(&root->root_item_lock); 512 spin_unlock(&root->root_item_lock);
514} 513}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 64a157becbe5..0afcd452fcb3 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -754,8 +754,7 @@ out:
754 num_uncorrectable_read_errors); 754 num_uncorrectable_read_errors);
755 printk_ratelimited_in_rcu(KERN_ERR 755 printk_ratelimited_in_rcu(KERN_ERR
756 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", 756 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
757 (unsigned long long)fixup->logical, 757 fixup->logical, rcu_str_deref(fixup->dev->name));
758 rcu_str_deref(fixup->dev->name));
759 } 758 }
760 759
761 btrfs_free_path(path); 760 btrfs_free_path(path);
@@ -1154,8 +1153,7 @@ corrected_error:
1154 spin_unlock(&sctx->stat_lock); 1153 spin_unlock(&sctx->stat_lock);
1155 printk_ratelimited_in_rcu(KERN_ERR 1154 printk_ratelimited_in_rcu(KERN_ERR
1156 "btrfs: fixed up error at logical %llu on dev %s\n", 1155 "btrfs: fixed up error at logical %llu on dev %s\n",
1157 (unsigned long long)logical, 1156 logical, rcu_str_deref(dev->name));
1158 rcu_str_deref(dev->name));
1159 } 1157 }
1160 } else { 1158 } else {
1161did_not_correct_error: 1159did_not_correct_error:
@@ -1164,8 +1162,7 @@ did_not_correct_error:
1164 spin_unlock(&sctx->stat_lock); 1162 spin_unlock(&sctx->stat_lock);
1165 printk_ratelimited_in_rcu(KERN_ERR 1163 printk_ratelimited_in_rcu(KERN_ERR
1166 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", 1164 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
1167 (unsigned long long)logical, 1165 logical, rcu_str_deref(dev->name));
1168 rcu_str_deref(dev->name));
1169 } 1166 }
1170 1167
1171out: 1168out:
@@ -1345,12 +1342,12 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1345 mapped_buffer = kmap_atomic(sblock->pagev[0]->page); 1342 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1346 h = (struct btrfs_header *)mapped_buffer; 1343 h = (struct btrfs_header *)mapped_buffer;
1347 1344
1348 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) || 1345 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) ||
1349 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || 1346 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1350 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, 1347 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1351 BTRFS_UUID_SIZE)) { 1348 BTRFS_UUID_SIZE)) {
1352 sblock->header_error = 1; 1349 sblock->header_error = 1;
1353 } else if (generation != le64_to_cpu(h->generation)) { 1350 } else if (generation != btrfs_stack_header_generation(h)) {
1354 sblock->header_error = 1; 1351 sblock->header_error = 1;
1355 sblock->generation_error = 1; 1352 sblock->generation_error = 1;
1356 } 1353 }
@@ -1720,10 +1717,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1720 * b) the page is already kmapped 1717 * b) the page is already kmapped
1721 */ 1718 */
1722 1719
1723 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr)) 1720 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1724 ++fail; 1721 ++fail;
1725 1722
1726 if (sblock->pagev[0]->generation != le64_to_cpu(h->generation)) 1723 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h))
1727 ++fail; 1724 ++fail;
1728 1725
1729 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1726 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1786,10 +1783,10 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1786 s = (struct btrfs_super_block *)mapped_buffer; 1783 s = (struct btrfs_super_block *)mapped_buffer;
1787 memcpy(on_disk_csum, s->csum, sctx->csum_size); 1784 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1788 1785
1789 if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr)) 1786 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1790 ++fail_cor; 1787 ++fail_cor;
1791 1788
1792 if (sblock->pagev[0]->generation != le64_to_cpu(s->generation)) 1789 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1793 ++fail_gen; 1790 ++fail_gen;
1794 1791
1795 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) 1792 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -2455,8 +2452,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2455 printk(KERN_ERR 2452 printk(KERN_ERR
2456 "btrfs scrub: tree block %llu spanning " 2453 "btrfs scrub: tree block %llu spanning "
2457 "stripes, ignored. logical=%llu\n", 2454 "stripes, ignored. logical=%llu\n",
2458 (unsigned long long)key.objectid, 2455 key.objectid, logical);
2459 (unsigned long long)logical);
2460 goto next; 2456 goto next;
2461 } 2457 }
2462 2458
@@ -2863,9 +2859,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2863 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { 2859 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
2864 /* not supported for data w/o checksums */ 2860 /* not supported for data w/o checksums */
2865 printk(KERN_ERR 2861 printk(KERN_ERR
2866 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", 2862 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n",
2867 fs_info->chunk_root->sectorsize, 2863 fs_info->chunk_root->sectorsize, PAGE_SIZE);
2868 (unsigned long long)PAGE_SIZE);
2869 return -EINVAL; 2864 return -EINVAL;
2870 } 2865 }
2871 2866
@@ -3175,11 +3170,9 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3175 copy_nocow_pages_for_inode, 3170 copy_nocow_pages_for_inode,
3176 nocow_ctx); 3171 nocow_ctx);
3177 if (ret != 0 && ret != -ENOENT) { 3172 if (ret != 0 && ret != -ENOENT) {
3178 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n", 3173 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
3179 (unsigned long long)logical, 3174 logical, physical_for_dev_replace, len, mirror_num,
3180 (unsigned long long)physical_for_dev_replace, 3175 ret);
3181 (unsigned long long)len,
3182 (unsigned long long)mirror_num, ret);
3183 not_written = 1; 3176 not_written = 1;
3184 goto out; 3177 goto out;
3185 } 3178 }
@@ -3224,11 +3217,6 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3224 return PTR_ERR(local_root); 3217 return PTR_ERR(local_root);
3225 } 3218 }
3226 3219
3227 if (btrfs_root_refs(&local_root->root_item) == 0) {
3228 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3229 return -ENOENT;
3230 }
3231
3232 key.type = BTRFS_INODE_ITEM_KEY; 3220 key.type = BTRFS_INODE_ITEM_KEY;
3233 key.objectid = inum; 3221 key.objectid = inum;
3234 key.offset = 0; 3222 key.offset = 0;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2e14fd89a8b4..e46e0ed74925 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -26,6 +26,7 @@
26#include <linux/radix-tree.h> 26#include <linux/radix-tree.h>
27#include <linux/crc32c.h> 27#include <linux/crc32c.h>
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/string.h>
29 30
30#include "send.h" 31#include "send.h"
31#include "backref.h" 32#include "backref.h"
@@ -54,8 +55,8 @@ struct fs_path {
54 55
55 char *buf; 56 char *buf;
56 int buf_len; 57 int buf_len;
57 int reversed:1; 58 unsigned int reversed:1;
58 int virtual_mem:1; 59 unsigned int virtual_mem:1;
59 char inline_buf[]; 60 char inline_buf[];
60 }; 61 };
61 char pad[PAGE_SIZE]; 62 char pad[PAGE_SIZE];
@@ -1668,6 +1669,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1668 u64 *who_ino, u64 *who_gen) 1669 u64 *who_ino, u64 *who_gen)
1669{ 1670{
1670 int ret = 0; 1671 int ret = 0;
1672 u64 gen;
1671 u64 other_inode = 0; 1673 u64 other_inode = 0;
1672 u8 other_type = 0; 1674 u8 other_type = 0;
1673 1675
@@ -1678,6 +1680,24 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1678 if (ret <= 0) 1680 if (ret <= 0)
1679 goto out; 1681 goto out;
1680 1682
1683 /*
1684 * If we have a parent root we need to verify that the parent dir was
1685 * not delted and then re-created, if it was then we have no overwrite
1686 * and we can just unlink this entry.
1687 */
1688 if (sctx->parent_root) {
1689 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1690 NULL, NULL, NULL);
1691 if (ret < 0 && ret != -ENOENT)
1692 goto out;
1693 if (ret) {
1694 ret = 0;
1695 goto out;
1696 }
1697 if (gen != dir_gen)
1698 goto out;
1699 }
1700
1681 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, 1701 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1682 &other_inode, &other_type); 1702 &other_inode, &other_type);
1683 if (ret < 0 && ret != -ENOENT) 1703 if (ret < 0 && ret != -ENOENT)
@@ -2519,7 +2539,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
2519 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2539 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2520 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2540 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2521 2541
2522 if (di_key.objectid < sctx->send_progress) { 2542 if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2543 di_key.objectid < sctx->send_progress) {
2523 ret = 1; 2544 ret = 1;
2524 goto out; 2545 goto out;
2525 } 2546 }
@@ -2581,7 +2602,6 @@ static int record_ref(struct list_head *head, u64 dir,
2581 u64 dir_gen, struct fs_path *path) 2602 u64 dir_gen, struct fs_path *path)
2582{ 2603{
2583 struct recorded_ref *ref; 2604 struct recorded_ref *ref;
2584 char *tmp;
2585 2605
2586 ref = kmalloc(sizeof(*ref), GFP_NOFS); 2606 ref = kmalloc(sizeof(*ref), GFP_NOFS);
2587 if (!ref) 2607 if (!ref)
@@ -2591,25 +2611,35 @@ static int record_ref(struct list_head *head, u64 dir,
2591 ref->dir_gen = dir_gen; 2611 ref->dir_gen = dir_gen;
2592 ref->full_path = path; 2612 ref->full_path = path;
2593 2613
2594 tmp = strrchr(ref->full_path->start, '/'); 2614 ref->name = (char *)kbasename(ref->full_path->start);
2595 if (!tmp) { 2615 ref->name_len = ref->full_path->end - ref->name;
2596 ref->name_len = ref->full_path->end - ref->full_path->start; 2616 ref->dir_path = ref->full_path->start;
2597 ref->name = ref->full_path->start; 2617 if (ref->name == ref->full_path->start)
2598 ref->dir_path_len = 0; 2618 ref->dir_path_len = 0;
2599 ref->dir_path = ref->full_path->start; 2619 else
2600 } else {
2601 tmp++;
2602 ref->name_len = ref->full_path->end - tmp;
2603 ref->name = tmp;
2604 ref->dir_path = ref->full_path->start;
2605 ref->dir_path_len = ref->full_path->end - 2620 ref->dir_path_len = ref->full_path->end -
2606 ref->full_path->start - 1 - ref->name_len; 2621 ref->full_path->start - 1 - ref->name_len;
2607 }
2608 2622
2609 list_add_tail(&ref->list, head); 2623 list_add_tail(&ref->list, head);
2610 return 0; 2624 return 0;
2611} 2625}
2612 2626
2627static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2628{
2629 struct recorded_ref *new;
2630
2631 new = kmalloc(sizeof(*ref), GFP_NOFS);
2632 if (!new)
2633 return -ENOMEM;
2634
2635 new->dir = ref->dir;
2636 new->dir_gen = ref->dir_gen;
2637 new->full_path = NULL;
2638 INIT_LIST_HEAD(&new->list);
2639 list_add_tail(&new->list, list);
2640 return 0;
2641}
2642
2613static void __free_recorded_refs(struct list_head *head) 2643static void __free_recorded_refs(struct list_head *head)
2614{ 2644{
2615 struct recorded_ref *cur; 2645 struct recorded_ref *cur;
@@ -2724,9 +2754,7 @@ static int process_recorded_refs(struct send_ctx *sctx)
2724 int ret = 0; 2754 int ret = 0;
2725 struct recorded_ref *cur; 2755 struct recorded_ref *cur;
2726 struct recorded_ref *cur2; 2756 struct recorded_ref *cur2;
2727 struct ulist *check_dirs = NULL; 2757 struct list_head check_dirs;
2728 struct ulist_iterator uit;
2729 struct ulist_node *un;
2730 struct fs_path *valid_path = NULL; 2758 struct fs_path *valid_path = NULL;
2731 u64 ow_inode = 0; 2759 u64 ow_inode = 0;
2732 u64 ow_gen; 2760 u64 ow_gen;
@@ -2740,6 +2768,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2740 * which is always '..' 2768 * which is always '..'
2741 */ 2769 */
2742 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); 2770 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
2771 INIT_LIST_HEAD(&check_dirs);
2743 2772
2744 valid_path = fs_path_alloc(); 2773 valid_path = fs_path_alloc();
2745 if (!valid_path) { 2774 if (!valid_path) {
@@ -2747,12 +2776,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2747 goto out; 2776 goto out;
2748 } 2777 }
2749 2778
2750 check_dirs = ulist_alloc(GFP_NOFS);
2751 if (!check_dirs) {
2752 ret = -ENOMEM;
2753 goto out;
2754 }
2755
2756 /* 2779 /*
2757 * First, check if the first ref of the current inode was overwritten 2780 * First, check if the first ref of the current inode was overwritten
2758 * before. If yes, we know that the current inode was already orphanized 2781 * before. If yes, we know that the current inode was already orphanized
@@ -2889,8 +2912,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2889 goto out; 2912 goto out;
2890 } 2913 }
2891 } 2914 }
2892 ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, 2915 ret = dup_ref(cur, &check_dirs);
2893 GFP_NOFS);
2894 if (ret < 0) 2916 if (ret < 0)
2895 goto out; 2917 goto out;
2896 } 2918 }
@@ -2918,8 +2940,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2918 } 2940 }
2919 2941
2920 list_for_each_entry(cur, &sctx->deleted_refs, list) { 2942 list_for_each_entry(cur, &sctx->deleted_refs, list) {
2921 ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, 2943 ret = dup_ref(cur, &check_dirs);
2922 GFP_NOFS);
2923 if (ret < 0) 2944 if (ret < 0)
2924 goto out; 2945 goto out;
2925 } 2946 }
@@ -2930,8 +2951,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2930 */ 2951 */
2931 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, 2952 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
2932 list); 2953 list);
2933 ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, 2954 ret = dup_ref(cur, &check_dirs);
2934 GFP_NOFS);
2935 if (ret < 0) 2955 if (ret < 0)
2936 goto out; 2956 goto out;
2937 } else if (!S_ISDIR(sctx->cur_inode_mode)) { 2957 } else if (!S_ISDIR(sctx->cur_inode_mode)) {
@@ -2951,12 +2971,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2951 if (ret < 0) 2971 if (ret < 0)
2952 goto out; 2972 goto out;
2953 } 2973 }
2954 ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, 2974 ret = dup_ref(cur, &check_dirs);
2955 GFP_NOFS);
2956 if (ret < 0) 2975 if (ret < 0)
2957 goto out; 2976 goto out;
2958 } 2977 }
2959
2960 /* 2978 /*
2961 * If the inode is still orphan, unlink the orphan. This may 2979 * If the inode is still orphan, unlink the orphan. This may
2962 * happen when a previous inode did overwrite the first ref 2980 * happen when a previous inode did overwrite the first ref
@@ -2978,33 +2996,32 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2978 * deletion and if it's finally possible to perform the rmdir now. 2996 * deletion and if it's finally possible to perform the rmdir now.
2979 * We also update the inode stats of the parent dirs here. 2997 * We also update the inode stats of the parent dirs here.
2980 */ 2998 */
2981 ULIST_ITER_INIT(&uit); 2999 list_for_each_entry(cur, &check_dirs, list) {
2982 while ((un = ulist_next(check_dirs, &uit))) {
2983 /* 3000 /*
2984 * In case we had refs into dirs that were not processed yet, 3001 * In case we had refs into dirs that were not processed yet,
2985 * we don't need to do the utime and rmdir logic for these dirs. 3002 * we don't need to do the utime and rmdir logic for these dirs.
2986 * The dir will be processed later. 3003 * The dir will be processed later.
2987 */ 3004 */
2988 if (un->val > sctx->cur_ino) 3005 if (cur->dir > sctx->cur_ino)
2989 continue; 3006 continue;
2990 3007
2991 ret = get_cur_inode_state(sctx, un->val, un->aux); 3008 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
2992 if (ret < 0) 3009 if (ret < 0)
2993 goto out; 3010 goto out;
2994 3011
2995 if (ret == inode_state_did_create || 3012 if (ret == inode_state_did_create ||
2996 ret == inode_state_no_change) { 3013 ret == inode_state_no_change) {
2997 /* TODO delayed utimes */ 3014 /* TODO delayed utimes */
2998 ret = send_utimes(sctx, un->val, un->aux); 3015 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
2999 if (ret < 0) 3016 if (ret < 0)
3000 goto out; 3017 goto out;
3001 } else if (ret == inode_state_did_delete) { 3018 } else if (ret == inode_state_did_delete) {
3002 ret = can_rmdir(sctx, un->val, sctx->cur_ino); 3019 ret = can_rmdir(sctx, cur->dir, sctx->cur_ino);
3003 if (ret < 0) 3020 if (ret < 0)
3004 goto out; 3021 goto out;
3005 if (ret) { 3022 if (ret) {
3006 ret = get_cur_path(sctx, un->val, un->aux, 3023 ret = get_cur_path(sctx, cur->dir,
3007 valid_path); 3024 cur->dir_gen, valid_path);
3008 if (ret < 0) 3025 if (ret < 0)
3009 goto out; 3026 goto out;
3010 ret = send_rmdir(sctx, valid_path); 3027 ret = send_rmdir(sctx, valid_path);
@@ -3017,8 +3034,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3017 ret = 0; 3034 ret = 0;
3018 3035
3019out: 3036out:
3037 __free_recorded_refs(&check_dirs);
3020 free_recorded_refs(sctx); 3038 free_recorded_refs(sctx);
3021 ulist_free(check_dirs);
3022 fs_path_free(valid_path); 3039 fs_path_free(valid_path);
3023 return ret; 3040 return ret;
3024} 3041}
@@ -3119,6 +3136,8 @@ out:
3119 3136
3120struct find_ref_ctx { 3137struct find_ref_ctx {
3121 u64 dir; 3138 u64 dir;
3139 u64 dir_gen;
3140 struct btrfs_root *root;
3122 struct fs_path *name; 3141 struct fs_path *name;
3123 int found_idx; 3142 int found_idx;
3124}; 3143};
@@ -3128,9 +3147,21 @@ static int __find_iref(int num, u64 dir, int index,
3128 void *ctx_) 3147 void *ctx_)
3129{ 3148{
3130 struct find_ref_ctx *ctx = ctx_; 3149 struct find_ref_ctx *ctx = ctx_;
3150 u64 dir_gen;
3151 int ret;
3131 3152
3132 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && 3153 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3133 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { 3154 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3155 /*
3156 * To avoid doing extra lookups we'll only do this if everything
3157 * else matches.
3158 */
3159 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
3160 NULL, NULL, NULL);
3161 if (ret)
3162 return ret;
3163 if (dir_gen != ctx->dir_gen)
3164 return 0;
3134 ctx->found_idx = num; 3165 ctx->found_idx = num;
3135 return 1; 3166 return 1;
3136 } 3167 }
@@ -3140,14 +3171,16 @@ static int __find_iref(int num, u64 dir, int index,
3140static int find_iref(struct btrfs_root *root, 3171static int find_iref(struct btrfs_root *root,
3141 struct btrfs_path *path, 3172 struct btrfs_path *path,
3142 struct btrfs_key *key, 3173 struct btrfs_key *key,
3143 u64 dir, struct fs_path *name) 3174 u64 dir, u64 dir_gen, struct fs_path *name)
3144{ 3175{
3145 int ret; 3176 int ret;
3146 struct find_ref_ctx ctx; 3177 struct find_ref_ctx ctx;
3147 3178
3148 ctx.dir = dir; 3179 ctx.dir = dir;
3149 ctx.name = name; 3180 ctx.name = name;
3181 ctx.dir_gen = dir_gen;
3150 ctx.found_idx = -1; 3182 ctx.found_idx = -1;
3183 ctx.root = root;
3151 3184
3152 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); 3185 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
3153 if (ret < 0) 3186 if (ret < 0)
@@ -3163,11 +3196,17 @@ static int __record_changed_new_ref(int num, u64 dir, int index,
3163 struct fs_path *name, 3196 struct fs_path *name,
3164 void *ctx) 3197 void *ctx)
3165{ 3198{
3199 u64 dir_gen;
3166 int ret; 3200 int ret;
3167 struct send_ctx *sctx = ctx; 3201 struct send_ctx *sctx = ctx;
3168 3202
3203 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
3204 NULL, NULL, NULL);
3205 if (ret)
3206 return ret;
3207
3169 ret = find_iref(sctx->parent_root, sctx->right_path, 3208 ret = find_iref(sctx->parent_root, sctx->right_path,
3170 sctx->cmp_key, dir, name); 3209 sctx->cmp_key, dir, dir_gen, name);
3171 if (ret == -ENOENT) 3210 if (ret == -ENOENT)
3172 ret = __record_new_ref(num, dir, index, name, sctx); 3211 ret = __record_new_ref(num, dir, index, name, sctx);
3173 else if (ret > 0) 3212 else if (ret > 0)
@@ -3180,11 +3219,17 @@ static int __record_changed_deleted_ref(int num, u64 dir, int index,
3180 struct fs_path *name, 3219 struct fs_path *name,
3181 void *ctx) 3220 void *ctx)
3182{ 3221{
3222 u64 dir_gen;
3183 int ret; 3223 int ret;
3184 struct send_ctx *sctx = ctx; 3224 struct send_ctx *sctx = ctx;
3185 3225
3226 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
3227 NULL, NULL, NULL);
3228 if (ret)
3229 return ret;
3230
3186 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, 3231 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
3187 dir, name); 3232 dir, dir_gen, name);
3188 if (ret == -ENOENT) 3233 if (ret == -ENOENT)
3189 ret = __record_deleted_ref(num, dir, index, name, sctx); 3234 ret = __record_deleted_ref(num, dir, index, name, sctx);
3190 else if (ret > 0) 3235 else if (ret > 0)
@@ -3869,7 +3914,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
3869 btrfs_item_key_to_cpu(eb, &found_key, slot); 3914 btrfs_item_key_to_cpu(eb, &found_key, slot);
3870 if (found_key.objectid != key.objectid || 3915 if (found_key.objectid != key.objectid ||
3871 found_key.type != key.type) { 3916 found_key.type != key.type) {
3872 ret = 0; 3917 /* If we're a hole then just pretend nothing changed */
3918 ret = (left_disknr) ? 0 : 1;
3873 goto out; 3919 goto out;
3874 } 3920 }
3875 3921
@@ -3895,7 +3941,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
3895 * This may only happen on the first iteration. 3941 * This may only happen on the first iteration.
3896 */ 3942 */
3897 if (found_key.offset + right_len <= ekey->offset) { 3943 if (found_key.offset + right_len <= ekey->offset) {
3898 ret = 0; 3944 /* If we're a hole just pretend nothing changed */
3945 ret = (left_disknr) ? 0 : 1;
3899 goto out; 3946 goto out;
3900 } 3947 }
3901 3948
@@ -3960,8 +4007,8 @@ static int process_extent(struct send_ctx *sctx,
3960 struct btrfs_path *path, 4007 struct btrfs_path *path,
3961 struct btrfs_key *key) 4008 struct btrfs_key *key)
3962{ 4009{
3963 int ret = 0;
3964 struct clone_root *found_clone = NULL; 4010 struct clone_root *found_clone = NULL;
4011 int ret = 0;
3965 4012
3966 if (S_ISLNK(sctx->cur_inode_mode)) 4013 if (S_ISLNK(sctx->cur_inode_mode))
3967 return 0; 4014 return 0;
@@ -3974,6 +4021,32 @@ static int process_extent(struct send_ctx *sctx,
3974 ret = 0; 4021 ret = 0;
3975 goto out; 4022 goto out;
3976 } 4023 }
4024 } else {
4025 struct btrfs_file_extent_item *ei;
4026 u8 type;
4027
4028 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4029 struct btrfs_file_extent_item);
4030 type = btrfs_file_extent_type(path->nodes[0], ei);
4031 if (type == BTRFS_FILE_EXTENT_PREALLOC ||
4032 type == BTRFS_FILE_EXTENT_REG) {
4033 /*
4034 * The send spec does not have a prealloc command yet,
4035 * so just leave a hole for prealloc'ed extents until
4036 * we have enough commands queued up to justify rev'ing
4037 * the send spec.
4038 */
4039 if (type == BTRFS_FILE_EXTENT_PREALLOC) {
4040 ret = 0;
4041 goto out;
4042 }
4043
4044 /* Have a hole, just skip it. */
4045 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
4046 ret = 0;
4047 goto out;
4048 }
4049 }
3977 } 4050 }
3978 4051
3979 ret = find_extent_clone(sctx, path, key->objectid, key->offset, 4052 ret = find_extent_clone(sctx, path, key->objectid, key->offset,
@@ -4361,6 +4434,64 @@ static int changed_extent(struct send_ctx *sctx,
4361 return ret; 4434 return ret;
4362} 4435}
4363 4436
4437static int dir_changed(struct send_ctx *sctx, u64 dir)
4438{
4439 u64 orig_gen, new_gen;
4440 int ret;
4441
4442 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
4443 NULL, NULL);
4444 if (ret)
4445 return ret;
4446
4447 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
4448 NULL, NULL, NULL);
4449 if (ret)
4450 return ret;
4451
4452 return (orig_gen != new_gen) ? 1 : 0;
4453}
4454
4455static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
4456 struct btrfs_key *key)
4457{
4458 struct btrfs_inode_extref *extref;
4459 struct extent_buffer *leaf;
4460 u64 dirid = 0, last_dirid = 0;
4461 unsigned long ptr;
4462 u32 item_size;
4463 u32 cur_offset = 0;
4464 int ref_name_len;
4465 int ret = 0;
4466
4467 /* Easy case, just check this one dirid */
4468 if (key->type == BTRFS_INODE_REF_KEY) {
4469 dirid = key->offset;
4470
4471 ret = dir_changed(sctx, dirid);
4472 goto out;
4473 }
4474
4475 leaf = path->nodes[0];
4476 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4477 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4478 while (cur_offset < item_size) {
4479 extref = (struct btrfs_inode_extref *)(ptr +
4480 cur_offset);
4481 dirid = btrfs_inode_extref_parent(leaf, extref);
4482 ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
4483 cur_offset += ref_name_len + sizeof(*extref);
4484 if (dirid == last_dirid)
4485 continue;
4486 ret = dir_changed(sctx, dirid);
4487 if (ret)
4488 break;
4489 last_dirid = dirid;
4490 }
4491out:
4492 return ret;
4493}
4494
4364/* 4495/*
4365 * Updates compare related fields in sctx and simply forwards to the actual 4496 * Updates compare related fields in sctx and simply forwards to the actual
4366 * changed_xxx functions. 4497 * changed_xxx functions.
@@ -4376,6 +4507,19 @@ static int changed_cb(struct btrfs_root *left_root,
4376 int ret = 0; 4507 int ret = 0;
4377 struct send_ctx *sctx = ctx; 4508 struct send_ctx *sctx = ctx;
4378 4509
4510 if (result == BTRFS_COMPARE_TREE_SAME) {
4511 if (key->type != BTRFS_INODE_REF_KEY &&
4512 key->type != BTRFS_INODE_EXTREF_KEY)
4513 return 0;
4514 ret = compare_refs(sctx, left_path, key);
4515 if (!ret)
4516 return 0;
4517 if (ret < 0)
4518 return ret;
4519 result = BTRFS_COMPARE_TREE_CHANGED;
4520 ret = 0;
4521 }
4522
4379 sctx->left_path = left_path; 4523 sctx->left_path = left_path;
4380 sctx->right_path = right_path; 4524 sctx->right_path = right_path;
4381 sctx->cmp_key = key; 4525 sctx->cmp_key = key;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8eb6191d86da..3aab10ce63e8 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -56,6 +56,8 @@
56#include "rcu-string.h" 56#include "rcu-string.h"
57#include "dev-replace.h" 57#include "dev-replace.h"
58#include "free-space-cache.h" 58#include "free-space-cache.h"
59#include "backref.h"
60#include "tests/btrfs-tests.h"
59 61
60#define CREATE_TRACE_POINTS 62#define CREATE_TRACE_POINTS
61#include <trace/events/btrfs.h> 63#include <trace/events/btrfs.h>
@@ -320,14 +322,15 @@ enum {
320 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, 322 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
321 Opt_no_space_cache, Opt_recovery, Opt_skip_balance, 323 Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
322 Opt_check_integrity, Opt_check_integrity_including_extent_data, 324 Opt_check_integrity, Opt_check_integrity_including_extent_data,
323 Opt_check_integrity_print_mask, Opt_fatal_errors, 325 Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
326 Opt_commit_interval,
324 Opt_err, 327 Opt_err,
325}; 328};
326 329
327static match_table_t tokens = { 330static match_table_t tokens = {
328 {Opt_degraded, "degraded"}, 331 {Opt_degraded, "degraded"},
329 {Opt_subvol, "subvol=%s"}, 332 {Opt_subvol, "subvol=%s"},
330 {Opt_subvolid, "subvolid=%d"}, 333 {Opt_subvolid, "subvolid=%s"},
331 {Opt_device, "device=%s"}, 334 {Opt_device, "device=%s"},
332 {Opt_nodatasum, "nodatasum"}, 335 {Opt_nodatasum, "nodatasum"},
333 {Opt_nodatacow, "nodatacow"}, 336 {Opt_nodatacow, "nodatacow"},
@@ -360,7 +363,9 @@ static match_table_t tokens = {
360 {Opt_check_integrity, "check_int"}, 363 {Opt_check_integrity, "check_int"},
361 {Opt_check_integrity_including_extent_data, "check_int_data"}, 364 {Opt_check_integrity_including_extent_data, "check_int_data"},
362 {Opt_check_integrity_print_mask, "check_int_print_mask=%d"}, 365 {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
366 {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
363 {Opt_fatal_errors, "fatal_errors=%s"}, 367 {Opt_fatal_errors, "fatal_errors=%s"},
368 {Opt_commit_interval, "commit=%d"},
364 {Opt_err, NULL}, 369 {Opt_err, NULL},
365}; 370};
366 371
@@ -496,10 +501,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
496 btrfs_set_opt(info->mount_opt, NOBARRIER); 501 btrfs_set_opt(info->mount_opt, NOBARRIER);
497 break; 502 break;
498 case Opt_thread_pool: 503 case Opt_thread_pool:
499 intarg = 0; 504 ret = match_int(&args[0], &intarg);
500 match_int(&args[0], &intarg); 505 if (ret) {
501 if (intarg) 506 goto out;
507 } else if (intarg > 0) {
502 info->thread_pool_size = intarg; 508 info->thread_pool_size = intarg;
509 } else {
510 ret = -EINVAL;
511 goto out;
512 }
503 break; 513 break;
504 case Opt_max_inline: 514 case Opt_max_inline:
505 num = match_strdup(&args[0]); 515 num = match_strdup(&args[0]);
@@ -513,7 +523,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
513 root->sectorsize); 523 root->sectorsize);
514 } 524 }
515 printk(KERN_INFO "btrfs: max_inline at %llu\n", 525 printk(KERN_INFO "btrfs: max_inline at %llu\n",
516 (unsigned long long)info->max_inline); 526 info->max_inline);
527 } else {
528 ret = -ENOMEM;
529 goto out;
517 } 530 }
518 break; 531 break;
519 case Opt_alloc_start: 532 case Opt_alloc_start:
@@ -525,7 +538,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
525 kfree(num); 538 kfree(num);
526 printk(KERN_INFO 539 printk(KERN_INFO
527 "btrfs: allocations start at %llu\n", 540 "btrfs: allocations start at %llu\n",
528 (unsigned long long)info->alloc_start); 541 info->alloc_start);
542 } else {
543 ret = -ENOMEM;
544 goto out;
529 } 545 }
530 break; 546 break;
531 case Opt_noacl: 547 case Opt_noacl:
@@ -540,12 +556,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
540 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); 556 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
541 break; 557 break;
542 case Opt_ratio: 558 case Opt_ratio:
543 intarg = 0; 559 ret = match_int(&args[0], &intarg);
544 match_int(&args[0], &intarg); 560 if (ret) {
545 if (intarg) { 561 goto out;
562 } else if (intarg >= 0) {
546 info->metadata_ratio = intarg; 563 info->metadata_ratio = intarg;
547 printk(KERN_INFO "btrfs: metadata ratio %d\n", 564 printk(KERN_INFO "btrfs: metadata ratio %d\n",
548 info->metadata_ratio); 565 info->metadata_ratio);
566 } else {
567 ret = -EINVAL;
568 goto out;
549 } 569 }
550 break; 570 break;
551 case Opt_discard: 571 case Opt_discard:
@@ -554,6 +574,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
554 case Opt_space_cache: 574 case Opt_space_cache:
555 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 575 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
556 break; 576 break;
577 case Opt_rescan_uuid_tree:
578 btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
579 break;
557 case Opt_no_space_cache: 580 case Opt_no_space_cache:
558 printk(KERN_INFO "btrfs: disabling disk space caching\n"); 581 printk(KERN_INFO "btrfs: disabling disk space caching\n");
559 btrfs_clear_opt(info->mount_opt, SPACE_CACHE); 582 btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
@@ -596,13 +619,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
596 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 619 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
597 break; 620 break;
598 case Opt_check_integrity_print_mask: 621 case Opt_check_integrity_print_mask:
599 intarg = 0; 622 ret = match_int(&args[0], &intarg);
600 match_int(&args[0], &intarg); 623 if (ret) {
601 if (intarg) { 624 goto out;
625 } else if (intarg >= 0) {
602 info->check_integrity_print_mask = intarg; 626 info->check_integrity_print_mask = intarg;
603 printk(KERN_INFO "btrfs:" 627 printk(KERN_INFO "btrfs:"
604 " check_integrity_print_mask 0x%x\n", 628 " check_integrity_print_mask 0x%x\n",
605 info->check_integrity_print_mask); 629 info->check_integrity_print_mask);
630 } else {
631 ret = -EINVAL;
632 goto out;
606 } 633 }
607 break; 634 break;
608#else 635#else
@@ -626,6 +653,29 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
626 goto out; 653 goto out;
627 } 654 }
628 break; 655 break;
656 case Opt_commit_interval:
657 intarg = 0;
658 ret = match_int(&args[0], &intarg);
659 if (ret < 0) {
660 printk(KERN_ERR
661 "btrfs: invalid commit interval\n");
662 ret = -EINVAL;
663 goto out;
664 }
665 if (intarg > 0) {
666 if (intarg > 300) {
667 printk(KERN_WARNING
668 "btrfs: excessive commit interval %d\n",
669 intarg);
670 }
671 info->commit_interval = intarg;
672 } else {
673 printk(KERN_INFO
674 "btrfs: using default commit interval %ds\n",
675 BTRFS_DEFAULT_COMMIT_INTERVAL);
676 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
677 }
678 break;
629 case Opt_err: 679 case Opt_err:
630 printk(KERN_INFO "btrfs: unrecognized mount option " 680 printk(KERN_INFO "btrfs: unrecognized mount option "
631 "'%s'\n", p); 681 "'%s'\n", p);
@@ -654,8 +704,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
654{ 704{
655 substring_t args[MAX_OPT_ARGS]; 705 substring_t args[MAX_OPT_ARGS];
656 char *device_name, *opts, *orig, *p; 706 char *device_name, *opts, *orig, *p;
707 char *num = NULL;
657 int error = 0; 708 int error = 0;
658 int intarg;
659 709
660 if (!options) 710 if (!options)
661 return 0; 711 return 0;
@@ -679,17 +729,23 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
679 case Opt_subvol: 729 case Opt_subvol:
680 kfree(*subvol_name); 730 kfree(*subvol_name);
681 *subvol_name = match_strdup(&args[0]); 731 *subvol_name = match_strdup(&args[0]);
732 if (!*subvol_name) {
733 error = -ENOMEM;
734 goto out;
735 }
682 break; 736 break;
683 case Opt_subvolid: 737 case Opt_subvolid:
684 intarg = 0; 738 num = match_strdup(&args[0]);
685 error = match_int(&args[0], &intarg); 739 if (num) {
686 if (!error) { 740 *subvol_objectid = memparse(num, NULL);
741 kfree(num);
687 /* we want the original fs_tree */ 742 /* we want the original fs_tree */
688 if (!intarg) 743 if (!*subvol_objectid)
689 *subvol_objectid = 744 *subvol_objectid =
690 BTRFS_FS_TREE_OBJECTID; 745 BTRFS_FS_TREE_OBJECTID;
691 else 746 } else {
692 *subvol_objectid = intarg; 747 error = -EINVAL;
748 goto out;
693 } 749 }
694 break; 750 break;
695 case Opt_subvolrootid: 751 case Opt_subvolrootid:
@@ -892,11 +948,9 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
892 if (btrfs_test_opt(root, NOBARRIER)) 948 if (btrfs_test_opt(root, NOBARRIER))
893 seq_puts(seq, ",nobarrier"); 949 seq_puts(seq, ",nobarrier");
894 if (info->max_inline != 8192 * 1024) 950 if (info->max_inline != 8192 * 1024)
895 seq_printf(seq, ",max_inline=%llu", 951 seq_printf(seq, ",max_inline=%llu", info->max_inline);
896 (unsigned long long)info->max_inline);
897 if (info->alloc_start != 0) 952 if (info->alloc_start != 0)
898 seq_printf(seq, ",alloc_start=%llu", 953 seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
899 (unsigned long long)info->alloc_start);
900 if (info->thread_pool_size != min_t(unsigned long, 954 if (info->thread_pool_size != min_t(unsigned long,
901 num_online_cpus() + 2, 8)) 955 num_online_cpus() + 2, 8))
902 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 956 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -928,6 +982,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
928 seq_puts(seq, ",space_cache"); 982 seq_puts(seq, ",space_cache");
929 else 983 else
930 seq_puts(seq, ",nospace_cache"); 984 seq_puts(seq, ",nospace_cache");
985 if (btrfs_test_opt(root, RESCAN_UUID_TREE))
986 seq_puts(seq, ",rescan_uuid_tree");
931 if (btrfs_test_opt(root, CLEAR_CACHE)) 987 if (btrfs_test_opt(root, CLEAR_CACHE))
932 seq_puts(seq, ",clear_cache"); 988 seq_puts(seq, ",clear_cache");
933 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 989 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
@@ -940,8 +996,24 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
940 seq_puts(seq, ",inode_cache"); 996 seq_puts(seq, ",inode_cache");
941 if (btrfs_test_opt(root, SKIP_BALANCE)) 997 if (btrfs_test_opt(root, SKIP_BALANCE))
942 seq_puts(seq, ",skip_balance"); 998 seq_puts(seq, ",skip_balance");
999 if (btrfs_test_opt(root, RECOVERY))
1000 seq_puts(seq, ",recovery");
1001#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1002 if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
1003 seq_puts(seq, ",check_int_data");
1004 else if (btrfs_test_opt(root, CHECK_INTEGRITY))
1005 seq_puts(seq, ",check_int");
1006 if (info->check_integrity_print_mask)
1007 seq_printf(seq, ",check_int_print_mask=%d",
1008 info->check_integrity_print_mask);
1009#endif
1010 if (info->metadata_ratio)
1011 seq_printf(seq, ",metadata_ratio=%d",
1012 info->metadata_ratio);
943 if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR)) 1013 if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
944 seq_puts(seq, ",fatal_errors=panic"); 1014 seq_puts(seq, ",fatal_errors=panic");
1015 if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1016 seq_printf(seq, ",commit=%d", info->commit_interval);
945 return 0; 1017 return 0;
946} 1018}
947 1019
@@ -1696,6 +1768,11 @@ static void btrfs_print_info(void)
1696 "\n"); 1768 "\n");
1697} 1769}
1698 1770
1771static int btrfs_run_sanity_tests(void)
1772{
1773 return btrfs_test_free_space_cache();
1774}
1775
1699static int __init init_btrfs_fs(void) 1776static int __init init_btrfs_fs(void)
1700{ 1777{
1701 int err; 1778 int err;
@@ -1734,23 +1811,32 @@ static int __init init_btrfs_fs(void)
1734 if (err) 1811 if (err)
1735 goto free_auto_defrag; 1812 goto free_auto_defrag;
1736 1813
1737 err = btrfs_interface_init(); 1814 err = btrfs_prelim_ref_init();
1738 if (err) 1815 if (err)
1739 goto free_delayed_ref; 1816 goto free_prelim_ref;
1740 1817
1741 err = register_filesystem(&btrfs_fs_type); 1818 err = btrfs_interface_init();
1742 if (err) 1819 if (err)
1743 goto unregister_ioctl; 1820 goto free_delayed_ref;
1744 1821
1745 btrfs_init_lockdep(); 1822 btrfs_init_lockdep();
1746 1823
1747 btrfs_print_info(); 1824 btrfs_print_info();
1748 btrfs_test_free_space_cache(); 1825
1826 err = btrfs_run_sanity_tests();
1827 if (err)
1828 goto unregister_ioctl;
1829
1830 err = register_filesystem(&btrfs_fs_type);
1831 if (err)
1832 goto unregister_ioctl;
1749 1833
1750 return 0; 1834 return 0;
1751 1835
1752unregister_ioctl: 1836unregister_ioctl:
1753 btrfs_interface_exit(); 1837 btrfs_interface_exit();
1838free_prelim_ref:
1839 btrfs_prelim_ref_exit();
1754free_delayed_ref: 1840free_delayed_ref:
1755 btrfs_delayed_ref_exit(); 1841 btrfs_delayed_ref_exit();
1756free_auto_defrag: 1842free_auto_defrag:
@@ -1777,6 +1863,7 @@ static void __exit exit_btrfs_fs(void)
1777 btrfs_delayed_ref_exit(); 1863 btrfs_delayed_ref_exit();
1778 btrfs_auto_defrag_exit(); 1864 btrfs_auto_defrag_exit();
1779 btrfs_delayed_inode_exit(); 1865 btrfs_delayed_inode_exit();
1866 btrfs_prelim_ref_exit();
1780 ordered_data_exit(); 1867 ordered_data_exit();
1781 extent_map_exit(); 1868 extent_map_exit();
1782 extent_io_exit(); 1869 extent_io_exit();
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
new file mode 100644
index 000000000000..580877625776
--- /dev/null
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -0,0 +1,34 @@
1/*
2 * Copyright (C) 2013 Fusion IO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_TESTS
20#define __BTRFS_TESTS
21
22#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
23
24#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__)
25
26int btrfs_test_free_space_cache(void);
27#else
28static inline int btrfs_test_free_space_cache(void)
29{
30 return 0;
31}
32#endif
33
34#endif
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
new file mode 100644
index 000000000000..6fc82010dc15
--- /dev/null
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -0,0 +1,395 @@
1/*
2 * Copyright (C) 2013 Fusion IO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/slab.h>
20#include "btrfs-tests.h"
21#include "../ctree.h"
22#include "../free-space-cache.h"
23
24#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
25static struct btrfs_block_group_cache *init_test_block_group(void)
26{
27 struct btrfs_block_group_cache *cache;
28
29 cache = kzalloc(sizeof(*cache), GFP_NOFS);
30 if (!cache)
31 return NULL;
32 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
33 GFP_NOFS);
34 if (!cache->free_space_ctl) {
35 kfree(cache);
36 return NULL;
37 }
38
39 cache->key.objectid = 0;
40 cache->key.offset = 1024 * 1024 * 1024;
41 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
42 cache->sectorsize = 4096;
43
44 spin_lock_init(&cache->lock);
45 INIT_LIST_HEAD(&cache->list);
46 INIT_LIST_HEAD(&cache->cluster_list);
47 INIT_LIST_HEAD(&cache->new_bg_list);
48
49 btrfs_init_free_space_ctl(cache);
50
51 return cache;
52}
53
54/*
55 * This test just does basic sanity checking, making sure we can add an exten
56 * entry and remove space from either end and the middle, and make sure we can
57 * remove space that covers adjacent extent entries.
58 */
59static int test_extents(struct btrfs_block_group_cache *cache)
60{
61 int ret = 0;
62
63 test_msg("Running extent only tests\n");
64
65 /* First just make sure we can remove an entire entry */
66 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
67 if (ret) {
68 test_msg("Error adding initial extents %d\n", ret);
69 return ret;
70 }
71
72 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
73 if (ret) {
74 test_msg("Error removing extent %d\n", ret);
75 return ret;
76 }
77
78 if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
79 test_msg("Full remove left some lingering space\n");
80 return -1;
81 }
82
83 /* Ok edge and middle cases now */
84 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
85 if (ret) {
86 test_msg("Error adding half extent %d\n", ret);
87 return ret;
88 }
89
90 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
91 if (ret) {
92 test_msg("Error removing tail end %d\n", ret);
93 return ret;
94 }
95
96 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
97 if (ret) {
98 test_msg("Error removing front end %d\n", ret);
99 return ret;
100 }
101
102 ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
103 if (ret) {
104 test_msg("Error removing middle peice %d\n", ret);
105 return ret;
106 }
107
108 if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
109 test_msg("Still have space at the front\n");
110 return -1;
111 }
112
113 if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) {
114 test_msg("Still have space in the middle\n");
115 return -1;
116 }
117
118 if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
119 test_msg("Still have space at the end\n");
120 return -1;
121 }
122
123 /* Cleanup */
124 __btrfs_remove_free_space_cache(cache->free_space_ctl);
125
126 return 0;
127}
128
129static int test_bitmaps(struct btrfs_block_group_cache *cache)
130{
131 u64 next_bitmap_offset;
132 int ret;
133
134 test_msg("Running bitmap only tests\n");
135
136 ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
137 if (ret) {
138 test_msg("Couldn't create a bitmap entry %d\n", ret);
139 return ret;
140 }
141
142 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
143 if (ret) {
144 test_msg("Error removing bitmap full range %d\n", ret);
145 return ret;
146 }
147
148 if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
149 test_msg("Left some space in bitmap\n");
150 return -1;
151 }
152
153 ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
154 if (ret) {
155 test_msg("Couldn't add to our bitmap entry %d\n", ret);
156 return ret;
157 }
158
159 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
160 if (ret) {
161 test_msg("Couldn't remove middle chunk %d\n", ret);
162 return ret;
163 }
164
165 /*
166 * The first bitmap we have starts at offset 0 so the next one is just
167 * at the end of the first bitmap.
168 */
169 next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
170
171 /* Test a bit straddling two bitmaps */
172 ret = test_add_free_space_entry(cache, next_bitmap_offset -
173 (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
174 if (ret) {
175 test_msg("Couldn't add space that straddles two bitmaps %d\n",
176 ret);
177 return ret;
178 }
179
180 ret = btrfs_remove_free_space(cache, next_bitmap_offset -
181 (1 * 1024 * 1024), 2 * 1024 * 1024);
182 if (ret) {
183 test_msg("Couldn't remove overlapping space %d\n", ret);
184 return ret;
185 }
186
187 if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
188 2 * 1024 * 1024)) {
189 test_msg("Left some space when removing overlapping\n");
190 return -1;
191 }
192
193 __btrfs_remove_free_space_cache(cache->free_space_ctl);
194
195 return 0;
196}
197
198/* This is the high grade jackassery */
199static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
200{
201 u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
202 int ret;
203
204 test_msg("Running bitmap and extent tests\n");
205
206 /*
207 * First let's do something simple, an extent at the same offset as the
208 * bitmap, but the free space completely in the extent and then
209 * completely in the bitmap.
210 */
211 ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
212 if (ret) {
213 test_msg("Couldn't create bitmap entry %d\n", ret);
214 return ret;
215 }
216
217 ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
218 if (ret) {
219 test_msg("Couldn't add extent entry %d\n", ret);
220 return ret;
221 }
222
223 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
224 if (ret) {
225 test_msg("Couldn't remove extent entry %d\n", ret);
226 return ret;
227 }
228
229 if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
230 test_msg("Left remnants after our remove\n");
231 return -1;
232 }
233
234 /* Now to add back the extent entry and remove from the bitmap */
235 ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
236 if (ret) {
237 test_msg("Couldn't re-add extent entry %d\n", ret);
238 return ret;
239 }
240
241 ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
242 if (ret) {
243 test_msg("Couldn't remove from bitmap %d\n", ret);
244 return ret;
245 }
246
247 if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
248 test_msg("Left remnants in the bitmap\n");
249 return -1;
250 }
251
252 /*
253 * Ok so a little more evil, extent entry and bitmap at the same offset,
254 * removing an overlapping chunk.
255 */
256 ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
257 if (ret) {
258 test_msg("Couldn't add to a bitmap %d\n", ret);
259 return ret;
260 }
261
262 ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
263 if (ret) {
264 test_msg("Couldn't remove overlapping space %d\n", ret);
265 return ret;
266 }
267
268 if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
269 test_msg("Left over peices after removing overlapping\n");
270 return -1;
271 }
272
273 __btrfs_remove_free_space_cache(cache->free_space_ctl);
274
275 /* Now with the extent entry offset into the bitmap */
276 ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
277 if (ret) {
278 test_msg("Couldn't add space to the bitmap %d\n", ret);
279 return ret;
280 }
281
282 ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
283 if (ret) {
284 test_msg("Couldn't add extent to the cache %d\n", ret);
285 return ret;
286 }
287
288 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
289 if (ret) {
290 test_msg("Problem removing overlapping space %d\n", ret);
291 return ret;
292 }
293
294 if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
295 test_msg("Left something behind when removing space");
296 return -1;
297 }
298
299 /*
300 * This has blown up in the past, the extent entry starts before the
301 * bitmap entry, but we're trying to remove an offset that falls
302 * completely within the bitmap range and is in both the extent entry
303 * and the bitmap entry, looks like this
304 *
305 * [ extent ]
306 * [ bitmap ]
307 * [ del ]
308 */
309 __btrfs_remove_free_space_cache(cache->free_space_ctl);
310 ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
311 4 * 1024 * 1024, 1);
312 if (ret) {
313 test_msg("Couldn't add bitmap %d\n", ret);
314 return ret;
315 }
316
317 ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
318 5 * 1024 * 1024, 0);
319 if (ret) {
320 test_msg("Couldn't add extent entry %d\n", ret);
321 return ret;
322 }
323
324 ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
325 5 * 1024 * 1024);
326 if (ret) {
327 test_msg("Failed to free our space %d\n", ret);
328 return ret;
329 }
330
331 if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
332 5 * 1024 * 1024)) {
333 test_msg("Left stuff over\n");
334 return -1;
335 }
336
337 __btrfs_remove_free_space_cache(cache->free_space_ctl);
338
339 /*
340 * This blew up before, we have part of the free space in a bitmap and
341 * then the entirety of the rest of the space in an extent. This used
342 * to return -EAGAIN back from btrfs_remove_extent, make sure this
343 * doesn't happen.
344 */
345 ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
346 if (ret) {
347 test_msg("Couldn't add bitmap entry %d\n", ret);
348 return ret;
349 }
350
351 ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
352 if (ret) {
353 test_msg("Couldn't add extent entry %d\n", ret);
354 return ret;
355 }
356
357 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
358 if (ret) {
359 test_msg("Error removing bitmap and extent overlapping %d\n", ret);
360 return ret;
361 }
362
363 __btrfs_remove_free_space_cache(cache->free_space_ctl);
364 return 0;
365}
366
367int btrfs_test_free_space_cache(void)
368{
369 struct btrfs_block_group_cache *cache;
370 int ret;
371
372 test_msg("Running btrfs free space cache tests\n");
373
374 cache = init_test_block_group();
375 if (!cache) {
376 test_msg("Couldn't run the tests\n");
377 return 0;
378 }
379
380 ret = test_extents(cache);
381 if (ret)
382 goto out;
383 ret = test_bitmaps(cache);
384 if (ret)
385 goto out;
386 ret = test_bitmaps_and_extents(cache);
387 if (ret)
388 goto out;
389out:
390 __btrfs_remove_free_space_cache(cache->free_space_ctl);
391 kfree(cache->free_space_ctl);
392 kfree(cache);
393 test_msg("Free space cache tests finished\n");
394 return ret;
395}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index af1931a5960d..cac4a3f76323 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -837,7 +837,7 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
837 * them in one of two extent_io trees. This is used to make sure all of 837 * them in one of two extent_io trees. This is used to make sure all of
838 * those extents are on disk for transaction or log commit 838 * those extents are on disk for transaction or log commit
839 */ 839 */
840int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 840static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
841 struct extent_io_tree *dirty_pages, int mark) 841 struct extent_io_tree *dirty_pages, int mark)
842{ 842{
843 int ret; 843 int ret;
@@ -1225,8 +1225,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1225 btrfs_set_root_stransid(new_root_item, 0); 1225 btrfs_set_root_stransid(new_root_item, 0);
1226 btrfs_set_root_rtransid(new_root_item, 0); 1226 btrfs_set_root_rtransid(new_root_item, 0);
1227 } 1227 }
1228 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); 1228 btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
1229 new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); 1229 btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
1230 btrfs_set_root_otransid(new_root_item, trans->transid); 1230 btrfs_set_root_otransid(new_root_item, trans->transid);
1231 1231
1232 old = btrfs_lock_root_node(root); 1232 old = btrfs_lock_root_node(root);
@@ -1311,8 +1311,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1311 dentry->d_name.len * 2); 1311 dentry->d_name.len * 2);
1312 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; 1312 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1313 ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); 1313 ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
1314 if (ret) 1314 if (ret) {
1315 btrfs_abort_transaction(trans, root, ret);
1316 goto fail;
1317 }
1318 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b,
1319 BTRFS_UUID_KEY_SUBVOL, objectid);
1320 if (ret) {
1315 btrfs_abort_transaction(trans, root, ret); 1321 btrfs_abort_transaction(trans, root, ret);
1322 goto fail;
1323 }
1324 if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
1325 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
1326 new_root_item->received_uuid,
1327 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
1328 objectid);
1329 if (ret && ret != -EEXIST) {
1330 btrfs_abort_transaction(trans, root, ret);
1331 goto fail;
1332 }
1333 }
1316fail: 1334fail:
1317 pending->error = ret; 1335 pending->error = ret;
1318dir_item_existed: 1336dir_item_existed:
@@ -1362,6 +1380,8 @@ static void update_super_roots(struct btrfs_root *root)
1362 super->root_level = root_item->level; 1380 super->root_level = root_item->level;
1363 if (btrfs_test_opt(root, SPACE_CACHE)) 1381 if (btrfs_test_opt(root, SPACE_CACHE))
1364 super->cache_generation = root_item->generation; 1382 super->cache_generation = root_item->generation;
1383 if (root->fs_info->update_uuid_tree_gen)
1384 super->uuid_tree_generation = root_item->generation;
1365} 1385}
1366 1386
1367int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1387int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -1928,8 +1948,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1928 list_del_init(&root->root_list); 1948 list_del_init(&root->root_list);
1929 spin_unlock(&fs_info->trans_lock); 1949 spin_unlock(&fs_info->trans_lock);
1930 1950
1931 pr_debug("btrfs: cleaner removing %llu\n", 1951 pr_debug("btrfs: cleaner removing %llu\n", root->objectid);
1932 (unsigned long long)root->objectid);
1933 1952
1934 btrfs_kill_all_delayed_nodes(root); 1953 btrfs_kill_all_delayed_nodes(root);
1935 1954
@@ -1942,6 +1961,5 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1942 * If we encounter a transaction abort during snapshot cleaning, we 1961 * If we encounter a transaction abort during snapshot cleaning, we
1943 * don't want to crash here 1962 * don't want to crash here
1944 */ 1963 */
1945 BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS); 1964 return (ret < 0) ? 0 : 1;
1946 return 1;
1947} 1965}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index defbc4269897..5c2af8491621 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -160,8 +160,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
160void btrfs_throttle(struct btrfs_root *root); 160void btrfs_throttle(struct btrfs_root *root);
161int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 161int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
162 struct btrfs_root *root); 162 struct btrfs_root *root);
163int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
164 struct extent_io_tree *dirty_pages, int mark);
165int btrfs_write_marked_extents(struct btrfs_root *root, 163int btrfs_write_marked_extents(struct btrfs_root *root,
166 struct extent_io_tree *dirty_pages, int mark); 164 struct extent_io_tree *dirty_pages, int mark);
167int btrfs_wait_marked_extents(struct btrfs_root *root, 165int btrfs_wait_marked_extents(struct btrfs_root *root,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ff60d8978ae2..0d9613c3f5e5 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -747,7 +747,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
747 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); 747 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
748 if (ret) 748 if (ret)
749 goto out; 749 goto out;
750 btrfs_run_delayed_items(trans, root); 750 else
751 ret = btrfs_run_delayed_items(trans, root);
751out: 752out:
752 kfree(name); 753 kfree(name);
753 iput(inode); 754 iput(inode);
@@ -923,7 +924,9 @@ again:
923 kfree(victim_name); 924 kfree(victim_name);
924 if (ret) 925 if (ret)
925 return ret; 926 return ret;
926 btrfs_run_delayed_items(trans, root); 927 ret = btrfs_run_delayed_items(trans, root);
928 if (ret)
929 return ret;
927 *search_done = 1; 930 *search_done = 1;
928 goto again; 931 goto again;
929 } 932 }
@@ -990,7 +993,9 @@ again:
990 inode, 993 inode,
991 victim_name, 994 victim_name,
992 victim_name_len); 995 victim_name_len);
993 btrfs_run_delayed_items(trans, root); 996 if (!ret)
997 ret = btrfs_run_delayed_items(
998 trans, root);
994 } 999 }
995 iput(victim_parent); 1000 iput(victim_parent);
996 kfree(victim_name); 1001 kfree(victim_name);
@@ -1536,8 +1541,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1536 1541
1537 name_len = btrfs_dir_name_len(eb, di); 1542 name_len = btrfs_dir_name_len(eb, di);
1538 name = kmalloc(name_len, GFP_NOFS); 1543 name = kmalloc(name_len, GFP_NOFS);
1539 if (!name) 1544 if (!name) {
1540 return -ENOMEM; 1545 ret = -ENOMEM;
1546 goto out;
1547 }
1541 1548
1542 log_type = btrfs_dir_type(eb, di); 1549 log_type = btrfs_dir_type(eb, di);
1543 read_extent_buffer(eb, name, (unsigned long)(di + 1), 1550 read_extent_buffer(eb, name, (unsigned long)(di + 1),
@@ -1810,7 +1817,7 @@ again:
1810 ret = btrfs_unlink_inode(trans, root, dir, inode, 1817 ret = btrfs_unlink_inode(trans, root, dir, inode,
1811 name, name_len); 1818 name, name_len);
1812 if (!ret) 1819 if (!ret)
1813 btrfs_run_delayed_items(trans, root); 1820 ret = btrfs_run_delayed_items(trans, root);
1814 kfree(name); 1821 kfree(name);
1815 iput(inode); 1822 iput(inode);
1816 if (ret) 1823 if (ret)
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
new file mode 100644
index 000000000000..dd0dea3766f7
--- /dev/null
+++ b/fs/btrfs/uuid-tree.c
@@ -0,0 +1,358 @@
1/*
2 * Copyright (C) STRATO AG 2013. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/uuid.h>
19#include <asm/unaligned.h>
20#include "ctree.h"
21#include "transaction.h"
22#include "disk-io.h"
23#include "print-tree.h"
24
25
26static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key)
27{
28 key->type = type;
29 key->objectid = get_unaligned_le64(uuid);
30 key->offset = get_unaligned_le64(uuid + sizeof(u64));
31}
32
33/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */
34static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
35 u8 type, u64 subid)
36{
37 int ret;
38 struct btrfs_path *path = NULL;
39 struct extent_buffer *eb;
40 int slot;
41 u32 item_size;
42 unsigned long offset;
43 struct btrfs_key key;
44
45 if (WARN_ON_ONCE(!uuid_root)) {
46 ret = -ENOENT;
47 goto out;
48 }
49
50 path = btrfs_alloc_path();
51 if (!path) {
52 ret = -ENOMEM;
53 goto out;
54 }
55
56 btrfs_uuid_to_key(uuid, type, &key);
57 ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0);
58 if (ret < 0) {
59 goto out;
60 } else if (ret > 0) {
61 ret = -ENOENT;
62 goto out;
63 }
64
65 eb = path->nodes[0];
66 slot = path->slots[0];
67 item_size = btrfs_item_size_nr(eb, slot);
68 offset = btrfs_item_ptr_offset(eb, slot);
69 ret = -ENOENT;
70
71 if (!IS_ALIGNED(item_size, sizeof(u64))) {
72 pr_warn("btrfs: uuid item with illegal size %lu!\n",
73 (unsigned long)item_size);
74 goto out;
75 }
76 while (item_size) {
77 __le64 data;
78
79 read_extent_buffer(eb, &data, offset, sizeof(data));
80 if (le64_to_cpu(data) == subid) {
81 ret = 0;
82 break;
83 }
84 offset += sizeof(data);
85 item_size -= sizeof(data);
86 }
87
88out:
89 btrfs_free_path(path);
90 return ret;
91}
92
93int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
94 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
95 u64 subid_cpu)
96{
97 int ret;
98 struct btrfs_path *path = NULL;
99 struct btrfs_key key;
100 struct extent_buffer *eb;
101 int slot;
102 unsigned long offset;
103 __le64 subid_le;
104
105 ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu);
106 if (ret != -ENOENT)
107 return ret;
108
109 if (WARN_ON_ONCE(!uuid_root)) {
110 ret = -EINVAL;
111 goto out;
112 }
113
114 btrfs_uuid_to_key(uuid, type, &key);
115
116 path = btrfs_alloc_path();
117 if (!path) {
118 ret = -ENOMEM;
119 goto out;
120 }
121
122 ret = btrfs_insert_empty_item(trans, uuid_root, path, &key,
123 sizeof(subid_le));
124 if (ret >= 0) {
125 /* Add an item for the type for the first time */
126 eb = path->nodes[0];
127 slot = path->slots[0];
128 offset = btrfs_item_ptr_offset(eb, slot);
129 } else if (ret == -EEXIST) {
130 /*
131 * An item with that type already exists.
132 * Extend the item and store the new subid at the end.
133 */
134 btrfs_extend_item(uuid_root, path, sizeof(subid_le));
135 eb = path->nodes[0];
136 slot = path->slots[0];
137 offset = btrfs_item_ptr_offset(eb, slot);
138 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
139 } else if (ret < 0) {
140 pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n",
141 ret, (unsigned long long)key.objectid,
142 (unsigned long long)key.offset, type);
143 goto out;
144 }
145
146 ret = 0;
147 subid_le = cpu_to_le64(subid_cpu);
148 write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le));
149 btrfs_mark_buffer_dirty(eb);
150
151out:
152 btrfs_free_path(path);
153 return ret;
154}
155
156int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
157 struct btrfs_root *uuid_root, u8 *uuid, u8 type,
158 u64 subid)
159{
160 int ret;
161 struct btrfs_path *path = NULL;
162 struct btrfs_key key;
163 struct extent_buffer *eb;
164 int slot;
165 unsigned long offset;
166 u32 item_size;
167 unsigned long move_dst;
168 unsigned long move_src;
169 unsigned long move_len;
170
171 if (WARN_ON_ONCE(!uuid_root)) {
172 ret = -EINVAL;
173 goto out;
174 }
175
176 btrfs_uuid_to_key(uuid, type, &key);
177
178 path = btrfs_alloc_path();
179 if (!path) {
180 ret = -ENOMEM;
181 goto out;
182 }
183
184 ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
185 if (ret < 0) {
186 pr_warn("btrfs: error %d while searching for uuid item!\n",
187 ret);
188 goto out;
189 }
190 if (ret > 0) {
191 ret = -ENOENT;
192 goto out;
193 }
194
195 eb = path->nodes[0];
196 slot = path->slots[0];
197 offset = btrfs_item_ptr_offset(eb, slot);
198 item_size = btrfs_item_size_nr(eb, slot);
199 if (!IS_ALIGNED(item_size, sizeof(u64))) {
200 pr_warn("btrfs: uuid item with illegal size %lu!\n",
201 (unsigned long)item_size);
202 ret = -ENOENT;
203 goto out;
204 }
205 while (item_size) {
206 __le64 read_subid;
207
208 read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid));
209 if (le64_to_cpu(read_subid) == subid)
210 break;
211 offset += sizeof(read_subid);
212 item_size -= sizeof(read_subid);
213 }
214
215 if (!item_size) {
216 ret = -ENOENT;
217 goto out;
218 }
219
220 item_size = btrfs_item_size_nr(eb, slot);
221 if (item_size == sizeof(subid)) {
222 ret = btrfs_del_item(trans, uuid_root, path);
223 goto out;
224 }
225
226 move_dst = offset;
227 move_src = offset + sizeof(subid);
228 move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot));
229 memmove_extent_buffer(eb, move_dst, move_src, move_len);
230 btrfs_truncate_item(uuid_root, path, item_size - sizeof(subid), 1);
231
232out:
233 btrfs_free_path(path);
234 return ret;
235}
236
237static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type,
238 u64 subid)
239{
240 struct btrfs_trans_handle *trans;
241 int ret;
242
243 /* 1 - for the uuid item */
244 trans = btrfs_start_transaction(uuid_root, 1);
245 if (IS_ERR(trans)) {
246 ret = PTR_ERR(trans);
247 goto out;
248 }
249
250 ret = btrfs_uuid_tree_rem(trans, uuid_root, uuid, type, subid);
251 btrfs_end_transaction(trans, uuid_root);
252
253out:
254 return ret;
255}
256
257int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
258 int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
259 u64))
260{
261 struct btrfs_root *root = fs_info->uuid_root;
262 struct btrfs_key key;
263 struct btrfs_key max_key;
264 struct btrfs_path *path;
265 int ret = 0;
266 struct extent_buffer *leaf;
267 int slot;
268 u32 item_size;
269 unsigned long offset;
270
271 path = btrfs_alloc_path();
272 if (!path) {
273 ret = -ENOMEM;
274 goto out;
275 }
276
277 key.objectid = 0;
278 key.type = 0;
279 key.offset = 0;
280 max_key.objectid = (u64)-1;
281 max_key.type = (u8)-1;
282 max_key.offset = (u64)-1;
283
284again_search_slot:
285 path->keep_locks = 1;
286 ret = btrfs_search_forward(root, &key, &max_key, path, 0);
287 if (ret) {
288 if (ret > 0)
289 ret = 0;
290 goto out;
291 }
292
293 while (1) {
294 cond_resched();
295 leaf = path->nodes[0];
296 slot = path->slots[0];
297 btrfs_item_key_to_cpu(leaf, &key, slot);
298
299 if (key.type != BTRFS_UUID_KEY_SUBVOL &&
300 key.type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
301 goto skip;
302
303 offset = btrfs_item_ptr_offset(leaf, slot);
304 item_size = btrfs_item_size_nr(leaf, slot);
305 if (!IS_ALIGNED(item_size, sizeof(u64))) {
306 pr_warn("btrfs: uuid item with illegal size %lu!\n",
307 (unsigned long)item_size);
308 goto skip;
309 }
310 while (item_size) {
311 u8 uuid[BTRFS_UUID_SIZE];
312 __le64 subid_le;
313 u64 subid_cpu;
314
315 put_unaligned_le64(key.objectid, uuid);
316 put_unaligned_le64(key.offset, uuid + sizeof(u64));
317 read_extent_buffer(leaf, &subid_le, offset,
318 sizeof(subid_le));
319 subid_cpu = le64_to_cpu(subid_le);
320 ret = check_func(fs_info, uuid, key.type, subid_cpu);
321 if (ret < 0)
322 goto out;
323 if (ret > 0) {
324 btrfs_release_path(path);
325 ret = btrfs_uuid_iter_rem(root, uuid, key.type,
326 subid_cpu);
327 if (ret == 0) {
328 /*
329 * this might look inefficient, but the
330 * justification is that it is an
331 * exception that check_func returns 1,
332 * and that in the regular case only one
333 * entry per UUID exists.
334 */
335 goto again_search_slot;
336 }
337 if (ret < 0 && ret != -ENOENT)
338 goto out;
339 }
340 item_size -= sizeof(subid_le);
341 offset += sizeof(subid_le);
342 }
343
344skip:
345 ret = btrfs_next_item(root, path);
346 if (ret == 0)
347 continue;
348 else if (ret > 0)
349 ret = 0;
350 break;
351 }
352
353out:
354 btrfs_free_path(path);
355 if (ret)
356 pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret);
357 return 0;
358}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 67a085381845..0052ca8264d9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -26,6 +26,7 @@
26#include <linux/ratelimit.h> 26#include <linux/ratelimit.h>
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/raid/pq.h> 28#include <linux/raid/pq.h>
29#include <linux/semaphore.h>
29#include <asm/div64.h> 30#include <asm/div64.h>
30#include "compat.h" 31#include "compat.h"
31#include "ctree.h" 32#include "ctree.h"
@@ -62,6 +63,48 @@ static void unlock_chunks(struct btrfs_root *root)
62 mutex_unlock(&root->fs_info->chunk_mutex); 63 mutex_unlock(&root->fs_info->chunk_mutex);
63} 64}
64 65
66static struct btrfs_fs_devices *__alloc_fs_devices(void)
67{
68 struct btrfs_fs_devices *fs_devs;
69
70 fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
71 if (!fs_devs)
72 return ERR_PTR(-ENOMEM);
73
74 mutex_init(&fs_devs->device_list_mutex);
75
76 INIT_LIST_HEAD(&fs_devs->devices);
77 INIT_LIST_HEAD(&fs_devs->alloc_list);
78 INIT_LIST_HEAD(&fs_devs->list);
79
80 return fs_devs;
81}
82
83/**
84 * alloc_fs_devices - allocate struct btrfs_fs_devices
85 * @fsid: a pointer to UUID for this FS. If NULL a new UUID is
86 * generated.
87 *
88 * Return: a pointer to a new &struct btrfs_fs_devices on success;
89 * ERR_PTR() on error. Returned struct is not linked onto any lists and
90 * can be destroyed with kfree() right away.
91 */
92static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
93{
94 struct btrfs_fs_devices *fs_devs;
95
96 fs_devs = __alloc_fs_devices();
97 if (IS_ERR(fs_devs))
98 return fs_devs;
99
100 if (fsid)
101 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
102 else
103 generate_random_uuid(fs_devs->fsid);
104
105 return fs_devs;
106}
107
65static void free_fs_devices(struct btrfs_fs_devices *fs_devices) 108static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
66{ 109{
67 struct btrfs_device *device; 110 struct btrfs_device *device;
@@ -101,6 +144,27 @@ void btrfs_cleanup_fs_uuids(void)
101 } 144 }
102} 145}
103 146
147static struct btrfs_device *__alloc_device(void)
148{
149 struct btrfs_device *dev;
150
151 dev = kzalloc(sizeof(*dev), GFP_NOFS);
152 if (!dev)
153 return ERR_PTR(-ENOMEM);
154
155 INIT_LIST_HEAD(&dev->dev_list);
156 INIT_LIST_HEAD(&dev->dev_alloc_list);
157
158 spin_lock_init(&dev->io_lock);
159
160 spin_lock_init(&dev->reada_lock);
161 atomic_set(&dev->reada_in_flight, 0);
162 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
163 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
164
165 return dev;
166}
167
104static noinline struct btrfs_device *__find_device(struct list_head *head, 168static noinline struct btrfs_device *__find_device(struct list_head *head,
105 u64 devid, u8 *uuid) 169 u64 devid, u8 *uuid)
106{ 170{
@@ -395,16 +459,14 @@ static noinline int device_list_add(const char *path,
395 459
396 fs_devices = find_fsid(disk_super->fsid); 460 fs_devices = find_fsid(disk_super->fsid);
397 if (!fs_devices) { 461 if (!fs_devices) {
398 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); 462 fs_devices = alloc_fs_devices(disk_super->fsid);
399 if (!fs_devices) 463 if (IS_ERR(fs_devices))
400 return -ENOMEM; 464 return PTR_ERR(fs_devices);
401 INIT_LIST_HEAD(&fs_devices->devices); 465
402 INIT_LIST_HEAD(&fs_devices->alloc_list);
403 list_add(&fs_devices->list, &fs_uuids); 466 list_add(&fs_devices->list, &fs_uuids);
404 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
405 fs_devices->latest_devid = devid; 467 fs_devices->latest_devid = devid;
406 fs_devices->latest_trans = found_transid; 468 fs_devices->latest_trans = found_transid;
407 mutex_init(&fs_devices->device_list_mutex); 469
408 device = NULL; 470 device = NULL;
409 } else { 471 } else {
410 device = __find_device(&fs_devices->devices, devid, 472 device = __find_device(&fs_devices->devices, devid,
@@ -414,17 +476,12 @@ static noinline int device_list_add(const char *path,
414 if (fs_devices->opened) 476 if (fs_devices->opened)
415 return -EBUSY; 477 return -EBUSY;
416 478
417 device = kzalloc(sizeof(*device), GFP_NOFS); 479 device = btrfs_alloc_device(NULL, &devid,
418 if (!device) { 480 disk_super->dev_item.uuid);
481 if (IS_ERR(device)) {
419 /* we can safely leave the fs_devices entry around */ 482 /* we can safely leave the fs_devices entry around */
420 return -ENOMEM; 483 return PTR_ERR(device);
421 } 484 }
422 device->devid = devid;
423 device->dev_stats_valid = 0;
424 device->work.func = pending_bios_fn;
425 memcpy(device->uuid, disk_super->dev_item.uuid,
426 BTRFS_UUID_SIZE);
427 spin_lock_init(&device->io_lock);
428 485
429 name = rcu_string_strdup(path, GFP_NOFS); 486 name = rcu_string_strdup(path, GFP_NOFS);
430 if (!name) { 487 if (!name) {
@@ -432,22 +489,13 @@ static noinline int device_list_add(const char *path,
432 return -ENOMEM; 489 return -ENOMEM;
433 } 490 }
434 rcu_assign_pointer(device->name, name); 491 rcu_assign_pointer(device->name, name);
435 INIT_LIST_HEAD(&device->dev_alloc_list);
436
437 /* init readahead state */
438 spin_lock_init(&device->reada_lock);
439 device->reada_curr_zone = NULL;
440 atomic_set(&device->reada_in_flight, 0);
441 device->reada_next = 0;
442 INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
443 INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
444 492
445 mutex_lock(&fs_devices->device_list_mutex); 493 mutex_lock(&fs_devices->device_list_mutex);
446 list_add_rcu(&device->dev_list, &fs_devices->devices); 494 list_add_rcu(&device->dev_list, &fs_devices->devices);
495 fs_devices->num_devices++;
447 mutex_unlock(&fs_devices->device_list_mutex); 496 mutex_unlock(&fs_devices->device_list_mutex);
448 497
449 device->fs_devices = fs_devices; 498 device->fs_devices = fs_devices;
450 fs_devices->num_devices++;
451 } else if (!device->name || strcmp(device->name->str, path)) { 499 } else if (!device->name || strcmp(device->name->str, path)) {
452 name = rcu_string_strdup(path, GFP_NOFS); 500 name = rcu_string_strdup(path, GFP_NOFS);
453 if (!name) 501 if (!name)
@@ -474,25 +522,21 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
474 struct btrfs_device *device; 522 struct btrfs_device *device;
475 struct btrfs_device *orig_dev; 523 struct btrfs_device *orig_dev;
476 524
477 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); 525 fs_devices = alloc_fs_devices(orig->fsid);
478 if (!fs_devices) 526 if (IS_ERR(fs_devices))
479 return ERR_PTR(-ENOMEM); 527 return fs_devices;
480 528
481 INIT_LIST_HEAD(&fs_devices->devices);
482 INIT_LIST_HEAD(&fs_devices->alloc_list);
483 INIT_LIST_HEAD(&fs_devices->list);
484 mutex_init(&fs_devices->device_list_mutex);
485 fs_devices->latest_devid = orig->latest_devid; 529 fs_devices->latest_devid = orig->latest_devid;
486 fs_devices->latest_trans = orig->latest_trans; 530 fs_devices->latest_trans = orig->latest_trans;
487 fs_devices->total_devices = orig->total_devices; 531 fs_devices->total_devices = orig->total_devices;
488 memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
489 532
490 /* We have held the volume lock, it is safe to get the devices. */ 533 /* We have held the volume lock, it is safe to get the devices. */
491 list_for_each_entry(orig_dev, &orig->devices, dev_list) { 534 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
492 struct rcu_string *name; 535 struct rcu_string *name;
493 536
494 device = kzalloc(sizeof(*device), GFP_NOFS); 537 device = btrfs_alloc_device(NULL, &orig_dev->devid,
495 if (!device) 538 orig_dev->uuid);
539 if (IS_ERR(device))
496 goto error; 540 goto error;
497 541
498 /* 542 /*
@@ -506,13 +550,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
506 } 550 }
507 rcu_assign_pointer(device->name, name); 551 rcu_assign_pointer(device->name, name);
508 552
509 device->devid = orig_dev->devid;
510 device->work.func = pending_bios_fn;
511 memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
512 spin_lock_init(&device->io_lock);
513 INIT_LIST_HEAD(&device->dev_list);
514 INIT_LIST_HEAD(&device->dev_alloc_list);
515
516 list_add(&device->dev_list, &fs_devices->devices); 553 list_add(&device->dev_list, &fs_devices->devices);
517 device->fs_devices = fs_devices; 554 device->fs_devices = fs_devices;
518 fs_devices->num_devices++; 555 fs_devices->num_devices++;
@@ -636,23 +673,22 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
636 673
637 if (device->can_discard) 674 if (device->can_discard)
638 fs_devices->num_can_discard--; 675 fs_devices->num_can_discard--;
676 if (device->missing)
677 fs_devices->missing_devices--;
639 678
640 new_device = kmalloc(sizeof(*new_device), GFP_NOFS); 679 new_device = btrfs_alloc_device(NULL, &device->devid,
641 BUG_ON(!new_device); /* -ENOMEM */ 680 device->uuid);
642 memcpy(new_device, device, sizeof(*new_device)); 681 BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
643 682
644 /* Safe because we are under uuid_mutex */ 683 /* Safe because we are under uuid_mutex */
645 if (device->name) { 684 if (device->name) {
646 name = rcu_string_strdup(device->name->str, GFP_NOFS); 685 name = rcu_string_strdup(device->name->str, GFP_NOFS);
647 BUG_ON(device->name && !name); /* -ENOMEM */ 686 BUG_ON(!name); /* -ENOMEM */
648 rcu_assign_pointer(new_device->name, name); 687 rcu_assign_pointer(new_device->name, name);
649 } 688 }
650 new_device->bdev = NULL; 689
651 new_device->writeable = 0;
652 new_device->in_fs_metadata = 0;
653 new_device->can_discard = 0;
654 spin_lock_init(&new_device->io_lock);
655 list_replace_rcu(&device->dev_list, &new_device->dev_list); 690 list_replace_rcu(&device->dev_list, &new_device->dev_list);
691 new_device->fs_devices = device->fs_devices;
656 692
657 call_rcu(&device->rcu, free_device); 693 call_rcu(&device->rcu, free_device);
658 } 694 }
@@ -865,7 +901,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
865 disk_super = p + (bytenr & ~PAGE_CACHE_MASK); 901 disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
866 902
867 if (btrfs_super_bytenr(disk_super) != bytenr || 903 if (btrfs_super_bytenr(disk_super) != bytenr ||
868 disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) 904 btrfs_super_magic(disk_super) != BTRFS_MAGIC)
869 goto error_unmap; 905 goto error_unmap;
870 906
871 devid = btrfs_stack_device_id(&disk_super->dev_item); 907 devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -880,8 +916,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
880 printk(KERN_INFO "device fsid %pU ", disk_super->fsid); 916 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
881 } 917 }
882 918
883 printk(KERN_CONT "devid %llu transid %llu %s\n", 919 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
884 (unsigned long long)devid, (unsigned long long)transid, path);
885 920
886 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 921 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
887 if (!ret && fs_devices_ret) 922 if (!ret && fs_devices_ret)
@@ -1278,8 +1313,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1278 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); 1313 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1279 1314
1280 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 1315 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
1281 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), 1316 btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE);
1282 BTRFS_UUID_SIZE);
1283 1317
1284 btrfs_set_dev_extent_length(leaf, extent, num_bytes); 1318 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1285 btrfs_mark_buffer_dirty(leaf); 1319 btrfs_mark_buffer_dirty(leaf);
@@ -1307,15 +1341,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1307 return ret; 1341 return ret;
1308} 1342}
1309 1343
1310static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) 1344static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
1345 u64 *devid_ret)
1311{ 1346{
1312 int ret; 1347 int ret;
1313 struct btrfs_key key; 1348 struct btrfs_key key;
1314 struct btrfs_key found_key; 1349 struct btrfs_key found_key;
1315 struct btrfs_path *path; 1350 struct btrfs_path *path;
1316 1351
1317 root = root->fs_info->chunk_root;
1318
1319 path = btrfs_alloc_path(); 1352 path = btrfs_alloc_path();
1320 if (!path) 1353 if (!path)
1321 return -ENOMEM; 1354 return -ENOMEM;
@@ -1324,20 +1357,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
1324 key.type = BTRFS_DEV_ITEM_KEY; 1357 key.type = BTRFS_DEV_ITEM_KEY;
1325 key.offset = (u64)-1; 1358 key.offset = (u64)-1;
1326 1359
1327 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1360 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
1328 if (ret < 0) 1361 if (ret < 0)
1329 goto error; 1362 goto error;
1330 1363
1331 BUG_ON(ret == 0); /* Corruption */ 1364 BUG_ON(ret == 0); /* Corruption */
1332 1365
1333 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, 1366 ret = btrfs_previous_item(fs_info->chunk_root, path,
1367 BTRFS_DEV_ITEMS_OBJECTID,
1334 BTRFS_DEV_ITEM_KEY); 1368 BTRFS_DEV_ITEM_KEY);
1335 if (ret) { 1369 if (ret) {
1336 *objectid = 1; 1370 *devid_ret = 1;
1337 } else { 1371 } else {
1338 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1372 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1339 path->slots[0]); 1373 path->slots[0]);
1340 *objectid = found_key.offset + 1; 1374 *devid_ret = found_key.offset + 1;
1341 } 1375 }
1342 ret = 0; 1376 ret = 0;
1343error: 1377error:
@@ -1391,9 +1425,9 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
1391 btrfs_set_device_bandwidth(leaf, dev_item, 0); 1425 btrfs_set_device_bandwidth(leaf, dev_item, 0);
1392 btrfs_set_device_start_offset(leaf, dev_item, 0); 1426 btrfs_set_device_start_offset(leaf, dev_item, 0);
1393 1427
1394 ptr = (unsigned long)btrfs_device_uuid(dev_item); 1428 ptr = btrfs_device_uuid(dev_item);
1395 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 1429 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1396 ptr = (unsigned long)btrfs_device_fsid(dev_item); 1430 ptr = btrfs_device_fsid(dev_item);
1397 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); 1431 write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
1398 btrfs_mark_buffer_dirty(leaf); 1432 btrfs_mark_buffer_dirty(leaf);
1399 1433
@@ -1562,7 +1596,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1562 clear_super = true; 1596 clear_super = true;
1563 } 1597 }
1564 1598
1599 mutex_unlock(&uuid_mutex);
1565 ret = btrfs_shrink_device(device, 0); 1600 ret = btrfs_shrink_device(device, 0);
1601 mutex_lock(&uuid_mutex);
1566 if (ret) 1602 if (ret)
1567 goto error_undo; 1603 goto error_undo;
1568 1604
@@ -1586,7 +1622,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1586 /* 1622 /*
1587 * the device list mutex makes sure that we don't change 1623 * the device list mutex makes sure that we don't change
1588 * the device list while someone else is writing out all 1624 * the device list while someone else is writing out all
1589 * the device supers. 1625 * the device supers. Whoever is writing all supers, should
1626 * lock the device list mutex before getting the number of
1627 * devices in the super block (super_copy). Conversely,
1628 * whoever updates the number of devices in the super block
1629 * (super_copy) should hold the device list mutex.
1590 */ 1630 */
1591 1631
1592 cur_devices = device->fs_devices; 1632 cur_devices = device->fs_devices;
@@ -1610,10 +1650,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1610 device->fs_devices->open_devices--; 1650 device->fs_devices->open_devices--;
1611 1651
1612 call_rcu(&device->rcu, free_device); 1652 call_rcu(&device->rcu, free_device);
1613 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1614 1653
1615 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; 1654 num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
1616 btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); 1655 btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
1656 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1617 1657
1618 if (cur_devices->open_devices == 0) { 1658 if (cur_devices->open_devices == 0) {
1619 struct btrfs_fs_devices *fs_devices; 1659 struct btrfs_fs_devices *fs_devices;
@@ -1793,9 +1833,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
1793 if (!fs_devices->seeding) 1833 if (!fs_devices->seeding)
1794 return -EINVAL; 1834 return -EINVAL;
1795 1835
1796 seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); 1836 seed_devices = __alloc_fs_devices();
1797 if (!seed_devices) 1837 if (IS_ERR(seed_devices))
1798 return -ENOMEM; 1838 return PTR_ERR(seed_devices);
1799 1839
1800 old_devices = clone_fs_devices(fs_devices); 1840 old_devices = clone_fs_devices(fs_devices);
1801 if (IS_ERR(old_devices)) { 1841 if (IS_ERR(old_devices)) {
@@ -1814,7 +1854,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
1814 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 1854 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1815 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, 1855 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
1816 synchronize_rcu); 1856 synchronize_rcu);
1817 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1818 1857
1819 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); 1858 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
1820 list_for_each_entry(device, &seed_devices->devices, dev_list) { 1859 list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1830,6 +1869,8 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
1830 generate_random_uuid(fs_devices->fsid); 1869 generate_random_uuid(fs_devices->fsid);
1831 memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); 1870 memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1832 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); 1871 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1872 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1873
1833 super_flags = btrfs_super_flags(disk_super) & 1874 super_flags = btrfs_super_flags(disk_super) &
1834 ~BTRFS_SUPER_FLAG_SEEDING; 1875 ~BTRFS_SUPER_FLAG_SEEDING;
1835 btrfs_set_super_flags(disk_super, super_flags); 1876 btrfs_set_super_flags(disk_super, super_flags);
@@ -1889,11 +1930,9 @@ next_slot:
1889 dev_item = btrfs_item_ptr(leaf, path->slots[0], 1930 dev_item = btrfs_item_ptr(leaf, path->slots[0],
1890 struct btrfs_dev_item); 1931 struct btrfs_dev_item);
1891 devid = btrfs_device_id(leaf, dev_item); 1932 devid = btrfs_device_id(leaf, dev_item);
1892 read_extent_buffer(leaf, dev_uuid, 1933 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
1893 (unsigned long)btrfs_device_uuid(dev_item),
1894 BTRFS_UUID_SIZE); 1934 BTRFS_UUID_SIZE);
1895 read_extent_buffer(leaf, fs_uuid, 1935 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
1896 (unsigned long)btrfs_device_fsid(dev_item),
1897 BTRFS_UUID_SIZE); 1936 BTRFS_UUID_SIZE);
1898 device = btrfs_find_device(root->fs_info, devid, dev_uuid, 1937 device = btrfs_find_device(root->fs_info, devid, dev_uuid,
1899 fs_uuid); 1938 fs_uuid);
@@ -1956,10 +1995,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1956 } 1995 }
1957 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 1996 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1958 1997
1959 device = kzalloc(sizeof(*device), GFP_NOFS); 1998 device = btrfs_alloc_device(root->fs_info, NULL, NULL);
1960 if (!device) { 1999 if (IS_ERR(device)) {
1961 /* we can safely leave the fs_devices entry around */ 2000 /* we can safely leave the fs_devices entry around */
1962 ret = -ENOMEM; 2001 ret = PTR_ERR(device);
1963 goto error; 2002 goto error;
1964 } 2003 }
1965 2004
@@ -1971,13 +2010,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1971 } 2010 }
1972 rcu_assign_pointer(device->name, name); 2011 rcu_assign_pointer(device->name, name);
1973 2012
1974 ret = find_next_devid(root, &device->devid);
1975 if (ret) {
1976 rcu_string_free(device->name);
1977 kfree(device);
1978 goto error;
1979 }
1980
1981 trans = btrfs_start_transaction(root, 0); 2013 trans = btrfs_start_transaction(root, 0);
1982 if (IS_ERR(trans)) { 2014 if (IS_ERR(trans)) {
1983 rcu_string_free(device->name); 2015 rcu_string_free(device->name);
@@ -1992,9 +2024,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1992 if (blk_queue_discard(q)) 2024 if (blk_queue_discard(q))
1993 device->can_discard = 1; 2025 device->can_discard = 1;
1994 device->writeable = 1; 2026 device->writeable = 1;
1995 device->work.func = pending_bios_fn;
1996 generate_random_uuid(device->uuid);
1997 spin_lock_init(&device->io_lock);
1998 device->generation = trans->transid; 2027 device->generation = trans->transid;
1999 device->io_width = root->sectorsize; 2028 device->io_width = root->sectorsize;
2000 device->io_align = root->sectorsize; 2029 device->io_align = root->sectorsize;
@@ -2121,6 +2150,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
2121 struct btrfs_fs_info *fs_info = root->fs_info; 2150 struct btrfs_fs_info *fs_info = root->fs_info;
2122 struct list_head *devices; 2151 struct list_head *devices;
2123 struct rcu_string *name; 2152 struct rcu_string *name;
2153 u64 devid = BTRFS_DEV_REPLACE_DEVID;
2124 int ret = 0; 2154 int ret = 0;
2125 2155
2126 *device_out = NULL; 2156 *device_out = NULL;
@@ -2142,9 +2172,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
2142 } 2172 }
2143 } 2173 }
2144 2174
2145 device = kzalloc(sizeof(*device), GFP_NOFS); 2175 device = btrfs_alloc_device(NULL, &devid, NULL);
2146 if (!device) { 2176 if (IS_ERR(device)) {
2147 ret = -ENOMEM; 2177 ret = PTR_ERR(device);
2148 goto error; 2178 goto error;
2149 } 2179 }
2150 2180
@@ -2161,10 +2191,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
2161 device->can_discard = 1; 2191 device->can_discard = 1;
2162 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2192 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2163 device->writeable = 1; 2193 device->writeable = 1;
2164 device->work.func = pending_bios_fn;
2165 generate_random_uuid(device->uuid);
2166 device->devid = BTRFS_DEV_REPLACE_DEVID;
2167 spin_lock_init(&device->io_lock);
2168 device->generation = 0; 2194 device->generation = 0;
2169 device->io_width = root->sectorsize; 2195 device->io_width = root->sectorsize;
2170 device->io_align = root->sectorsize; 2196 device->io_align = root->sectorsize;
@@ -2971,10 +2997,6 @@ again:
2971 if (found_key.objectid != key.objectid) 2997 if (found_key.objectid != key.objectid)
2972 break; 2998 break;
2973 2999
2974 /* chunk zero is special */
2975 if (found_key.offset == 0)
2976 break;
2977
2978 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 3000 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2979 3001
2980 if (!counting) { 3002 if (!counting) {
@@ -3010,6 +3032,8 @@ again:
3010 spin_unlock(&fs_info->balance_lock); 3032 spin_unlock(&fs_info->balance_lock);
3011 } 3033 }
3012loop: 3034loop:
3035 if (found_key.offset == 0)
3036 break;
3013 key.offset = found_key.offset - 1; 3037 key.offset = found_key.offset - 1;
3014 } 3038 }
3015 3039
@@ -3074,9 +3098,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
3074 atomic_set(&fs_info->mutually_exclusive_operation_running, 0); 3098 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3075} 3099}
3076 3100
3077void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
3078 struct btrfs_ioctl_balance_args *bargs);
3079
3080/* 3101/*
3081 * Should be called with both balance and volume mutexes held 3102 * Should be called with both balance and volume mutexes held
3082 */ 3103 */
@@ -3139,7 +3160,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3139 (bctl->data.target & ~allowed))) { 3160 (bctl->data.target & ~allowed))) {
3140 printk(KERN_ERR "btrfs: unable to start balance with target " 3161 printk(KERN_ERR "btrfs: unable to start balance with target "
3141 "data profile %llu\n", 3162 "data profile %llu\n",
3142 (unsigned long long)bctl->data.target); 3163 bctl->data.target);
3143 ret = -EINVAL; 3164 ret = -EINVAL;
3144 goto out; 3165 goto out;
3145 } 3166 }
@@ -3148,7 +3169,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3148 (bctl->meta.target & ~allowed))) { 3169 (bctl->meta.target & ~allowed))) {
3149 printk(KERN_ERR "btrfs: unable to start balance with target " 3170 printk(KERN_ERR "btrfs: unable to start balance with target "
3150 "metadata profile %llu\n", 3171 "metadata profile %llu\n",
3151 (unsigned long long)bctl->meta.target); 3172 bctl->meta.target);
3152 ret = -EINVAL; 3173 ret = -EINVAL;
3153 goto out; 3174 goto out;
3154 } 3175 }
@@ -3157,7 +3178,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3157 (bctl->sys.target & ~allowed))) { 3178 (bctl->sys.target & ~allowed))) {
3158 printk(KERN_ERR "btrfs: unable to start balance with target " 3179 printk(KERN_ERR "btrfs: unable to start balance with target "
3159 "system profile %llu\n", 3180 "system profile %llu\n",
3160 (unsigned long long)bctl->sys.target); 3181 bctl->sys.target);
3161 ret = -EINVAL; 3182 ret = -EINVAL;
3162 goto out; 3183 goto out;
3163 } 3184 }
@@ -3430,6 +3451,264 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
3430 return 0; 3451 return 0;
3431} 3452}
3432 3453
3454static int btrfs_uuid_scan_kthread(void *data)
3455{
3456 struct btrfs_fs_info *fs_info = data;
3457 struct btrfs_root *root = fs_info->tree_root;
3458 struct btrfs_key key;
3459 struct btrfs_key max_key;
3460 struct btrfs_path *path = NULL;
3461 int ret = 0;
3462 struct extent_buffer *eb;
3463 int slot;
3464 struct btrfs_root_item root_item;
3465 u32 item_size;
3466 struct btrfs_trans_handle *trans = NULL;
3467
3468 path = btrfs_alloc_path();
3469 if (!path) {
3470 ret = -ENOMEM;
3471 goto out;
3472 }
3473
3474 key.objectid = 0;
3475 key.type = BTRFS_ROOT_ITEM_KEY;
3476 key.offset = 0;
3477
3478 max_key.objectid = (u64)-1;
3479 max_key.type = BTRFS_ROOT_ITEM_KEY;
3480 max_key.offset = (u64)-1;
3481
3482 path->keep_locks = 1;
3483
3484 while (1) {
3485 ret = btrfs_search_forward(root, &key, &max_key, path, 0);
3486 if (ret) {
3487 if (ret > 0)
3488 ret = 0;
3489 break;
3490 }
3491
3492 if (key.type != BTRFS_ROOT_ITEM_KEY ||
3493 (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
3494 key.objectid != BTRFS_FS_TREE_OBJECTID) ||
3495 key.objectid > BTRFS_LAST_FREE_OBJECTID)
3496 goto skip;
3497
3498 eb = path->nodes[0];
3499 slot = path->slots[0];
3500 item_size = btrfs_item_size_nr(eb, slot);
3501 if (item_size < sizeof(root_item))
3502 goto skip;
3503
3504 read_extent_buffer(eb, &root_item,
3505 btrfs_item_ptr_offset(eb, slot),
3506 (int)sizeof(root_item));
3507 if (btrfs_root_refs(&root_item) == 0)
3508 goto skip;
3509
3510 if (!btrfs_is_empty_uuid(root_item.uuid) ||
3511 !btrfs_is_empty_uuid(root_item.received_uuid)) {
3512 if (trans)
3513 goto update_tree;
3514
3515 btrfs_release_path(path);
3516 /*
3517 * 1 - subvol uuid item
3518 * 1 - received_subvol uuid item
3519 */
3520 trans = btrfs_start_transaction(fs_info->uuid_root, 2);
3521 if (IS_ERR(trans)) {
3522 ret = PTR_ERR(trans);
3523 break;
3524 }
3525 continue;
3526 } else {
3527 goto skip;
3528 }
3529update_tree:
3530 if (!btrfs_is_empty_uuid(root_item.uuid)) {
3531 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
3532 root_item.uuid,
3533 BTRFS_UUID_KEY_SUBVOL,
3534 key.objectid);
3535 if (ret < 0) {
3536 pr_warn("btrfs: uuid_tree_add failed %d\n",
3537 ret);
3538 break;
3539 }
3540 }
3541
3542 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
3543 ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
3544 root_item.received_uuid,
3545 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
3546 key.objectid);
3547 if (ret < 0) {
3548 pr_warn("btrfs: uuid_tree_add failed %d\n",
3549 ret);
3550 break;
3551 }
3552 }
3553
3554skip:
3555 if (trans) {
3556 ret = btrfs_end_transaction(trans, fs_info->uuid_root);
3557 trans = NULL;
3558 if (ret)
3559 break;
3560 }
3561
3562 btrfs_release_path(path);
3563 if (key.offset < (u64)-1) {
3564 key.offset++;
3565 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
3566 key.offset = 0;
3567 key.type = BTRFS_ROOT_ITEM_KEY;
3568 } else if (key.objectid < (u64)-1) {
3569 key.offset = 0;
3570 key.type = BTRFS_ROOT_ITEM_KEY;
3571 key.objectid++;
3572 } else {
3573 break;
3574 }
3575 cond_resched();
3576 }
3577
3578out:
3579 btrfs_free_path(path);
3580 if (trans && !IS_ERR(trans))
3581 btrfs_end_transaction(trans, fs_info->uuid_root);
3582 if (ret)
3583 pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret);
3584 else
3585 fs_info->update_uuid_tree_gen = 1;
3586 up(&fs_info->uuid_tree_rescan_sem);
3587 return 0;
3588}
3589
3590/*
3591 * Callback for btrfs_uuid_tree_iterate().
3592 * returns:
3593 * 0 check succeeded, the entry is not outdated.
3594 * < 0 if an error occured.
3595 * > 0 if the check failed, which means the caller shall remove the entry.
3596 */
3597static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
3598 u8 *uuid, u8 type, u64 subid)
3599{
3600 struct btrfs_key key;
3601 int ret = 0;
3602 struct btrfs_root *subvol_root;
3603
3604 if (type != BTRFS_UUID_KEY_SUBVOL &&
3605 type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
3606 goto out;
3607
3608 key.objectid = subid;
3609 key.type = BTRFS_ROOT_ITEM_KEY;
3610 key.offset = (u64)-1;
3611 subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
3612 if (IS_ERR(subvol_root)) {
3613 ret = PTR_ERR(subvol_root);
3614 if (ret == -ENOENT)
3615 ret = 1;
3616 goto out;
3617 }
3618
3619 switch (type) {
3620 case BTRFS_UUID_KEY_SUBVOL:
3621 if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
3622 ret = 1;
3623 break;
3624 case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
3625 if (memcmp(uuid, subvol_root->root_item.received_uuid,
3626 BTRFS_UUID_SIZE))
3627 ret = 1;
3628 break;
3629 }
3630
3631out:
3632 return ret;
3633}
3634
3635static int btrfs_uuid_rescan_kthread(void *data)
3636{
3637 struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
3638 int ret;
3639
3640 /*
3641 * 1st step is to iterate through the existing UUID tree and
3642 * to delete all entries that contain outdated data.
3643 * 2nd step is to add all missing entries to the UUID tree.
3644 */
3645 ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
3646 if (ret < 0) {
3647 pr_warn("btrfs: iterating uuid_tree failed %d\n", ret);
3648 up(&fs_info->uuid_tree_rescan_sem);
3649 return ret;
3650 }
3651 return btrfs_uuid_scan_kthread(data);
3652}
3653
3654int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
3655{
3656 struct btrfs_trans_handle *trans;
3657 struct btrfs_root *tree_root = fs_info->tree_root;
3658 struct btrfs_root *uuid_root;
3659 struct task_struct *task;
3660 int ret;
3661
3662 /*
3663 * 1 - root node
3664 * 1 - root item
3665 */
3666 trans = btrfs_start_transaction(tree_root, 2);
3667 if (IS_ERR(trans))
3668 return PTR_ERR(trans);
3669
3670 uuid_root = btrfs_create_tree(trans, fs_info,
3671 BTRFS_UUID_TREE_OBJECTID);
3672 if (IS_ERR(uuid_root)) {
3673 btrfs_abort_transaction(trans, tree_root,
3674 PTR_ERR(uuid_root));
3675 return PTR_ERR(uuid_root);
3676 }
3677
3678 fs_info->uuid_root = uuid_root;
3679
3680 ret = btrfs_commit_transaction(trans, tree_root);
3681 if (ret)
3682 return ret;
3683
3684 down(&fs_info->uuid_tree_rescan_sem);
3685 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
3686 if (IS_ERR(task)) {
3687 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
3688 pr_warn("btrfs: failed to start uuid_scan task\n");
3689 up(&fs_info->uuid_tree_rescan_sem);
3690 return PTR_ERR(task);
3691 }
3692
3693 return 0;
3694}
3695
3696int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
3697{
3698 struct task_struct *task;
3699
3700 down(&fs_info->uuid_tree_rescan_sem);
3701 task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
3702 if (IS_ERR(task)) {
3703 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
3704 pr_warn("btrfs: failed to start uuid_rescan task\n");
3705 up(&fs_info->uuid_tree_rescan_sem);
3706 return PTR_ERR(task);
3707 }
3708
3709 return 0;
3710}
3711
3433/* 3712/*
3434 * shrinking a device means finding all of the device extents past 3713 * shrinking a device means finding all of the device extents past
3435 * the new size, and then following the back refs to the chunks. 3714 * the new size, and then following the back refs to the chunks.
@@ -4194,13 +4473,13 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
4194 * and exit, so return 1 so the callers don't try to use other copies. 4473 * and exit, so return 1 so the callers don't try to use other copies.
4195 */ 4474 */
4196 if (!em) { 4475 if (!em) {
4197 btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical, 4476 btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical,
4198 logical+len); 4477 logical+len);
4199 return 1; 4478 return 1;
4200 } 4479 }
4201 4480
4202 if (em->start > logical || em->start + em->len < logical) { 4481 if (em->start > logical || em->start + em->len < logical) {
4203 btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got " 4482 btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
4204 "%Lu-%Lu\n", logical, logical+len, em->start, 4483 "%Lu-%Lu\n", logical, logical+len, em->start,
4205 em->start + em->len); 4484 em->start + em->len);
4206 return 1; 4485 return 1;
@@ -4375,8 +4654,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4375 4654
4376 if (!em) { 4655 if (!em) {
4377 btrfs_crit(fs_info, "unable to find logical %llu len %llu", 4656 btrfs_crit(fs_info, "unable to find logical %llu len %llu",
4378 (unsigned long long)logical, 4657 logical, *length);
4379 (unsigned long long)*length);
4380 return -EINVAL; 4658 return -EINVAL;
4381 } 4659 }
4382 4660
@@ -4671,6 +4949,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4671 } 4949 }
4672 bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); 4950 bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
4673 if (!bbio) { 4951 if (!bbio) {
4952 kfree(raid_map);
4674 ret = -ENOMEM; 4953 ret = -ENOMEM;
4675 goto out; 4954 goto out;
4676 } 4955 }
@@ -5246,9 +5525,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5246 5525
5247 if (map_length < length) { 5526 if (map_length < length) {
5248 btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu", 5527 btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
5249 (unsigned long long)logical, 5528 logical, length, map_length);
5250 (unsigned long long)length,
5251 (unsigned long long)map_length);
5252 BUG(); 5529 BUG();
5253 } 5530 }
5254 5531
@@ -5314,23 +5591,72 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
5314 struct btrfs_device *device; 5591 struct btrfs_device *device;
5315 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 5592 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
5316 5593
5317 device = kzalloc(sizeof(*device), GFP_NOFS); 5594 device = btrfs_alloc_device(NULL, &devid, dev_uuid);
5318 if (!device) 5595 if (IS_ERR(device))
5319 return NULL; 5596 return NULL;
5320 list_add(&device->dev_list, 5597
5321 &fs_devices->devices); 5598 list_add(&device->dev_list, &fs_devices->devices);
5322 device->devid = devid;
5323 device->work.func = pending_bios_fn;
5324 device->fs_devices = fs_devices; 5599 device->fs_devices = fs_devices;
5325 device->missing = 1;
5326 fs_devices->num_devices++; 5600 fs_devices->num_devices++;
5601
5602 device->missing = 1;
5327 fs_devices->missing_devices++; 5603 fs_devices->missing_devices++;
5328 spin_lock_init(&device->io_lock); 5604
5329 INIT_LIST_HEAD(&device->dev_alloc_list);
5330 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
5331 return device; 5605 return device;
5332} 5606}
5333 5607
5608/**
5609 * btrfs_alloc_device - allocate struct btrfs_device
5610 * @fs_info: used only for generating a new devid, can be NULL if
5611 * devid is provided (i.e. @devid != NULL).
5612 * @devid: a pointer to devid for this device. If NULL a new devid
5613 * is generated.
5614 * @uuid: a pointer to UUID for this device. If NULL a new UUID
5615 * is generated.
5616 *
5617 * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
5618 * on error. Returned struct is not linked onto any lists and can be
5619 * destroyed with kfree() right away.
5620 */
5621struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
5622 const u64 *devid,
5623 const u8 *uuid)
5624{
5625 struct btrfs_device *dev;
5626 u64 tmp;
5627
5628 if (!devid && !fs_info) {
5629 WARN_ON(1);
5630 return ERR_PTR(-EINVAL);
5631 }
5632
5633 dev = __alloc_device();
5634 if (IS_ERR(dev))
5635 return dev;
5636
5637 if (devid)
5638 tmp = *devid;
5639 else {
5640 int ret;
5641
5642 ret = find_next_devid(fs_info, &tmp);
5643 if (ret) {
5644 kfree(dev);
5645 return ERR_PTR(ret);
5646 }
5647 }
5648 dev->devid = tmp;
5649
5650 if (uuid)
5651 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
5652 else
5653 generate_random_uuid(dev->uuid);
5654
5655 dev->work.func = pending_bios_fn;
5656
5657 return dev;
5658}
5659
5334static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, 5660static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
5335 struct extent_buffer *leaf, 5661 struct extent_buffer *leaf,
5336 struct btrfs_chunk *chunk) 5662 struct btrfs_chunk *chunk)
@@ -5437,7 +5763,7 @@ static void fill_device_from_item(struct extent_buffer *leaf,
5437 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); 5763 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
5438 device->is_tgtdev_for_dev_replace = 0; 5764 device->is_tgtdev_for_dev_replace = 0;
5439 5765
5440 ptr = (unsigned long)btrfs_device_uuid(dev_item); 5766 ptr = btrfs_device_uuid(dev_item);
5441 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 5767 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
5442} 5768}
5443 5769
@@ -5500,11 +5826,9 @@ static int read_one_dev(struct btrfs_root *root,
5500 u8 dev_uuid[BTRFS_UUID_SIZE]; 5826 u8 dev_uuid[BTRFS_UUID_SIZE];
5501 5827
5502 devid = btrfs_device_id(leaf, dev_item); 5828 devid = btrfs_device_id(leaf, dev_item);
5503 read_extent_buffer(leaf, dev_uuid, 5829 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
5504 (unsigned long)btrfs_device_uuid(dev_item),
5505 BTRFS_UUID_SIZE); 5830 BTRFS_UUID_SIZE);
5506 read_extent_buffer(leaf, fs_uuid, 5831 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
5507 (unsigned long)btrfs_device_fsid(dev_item),
5508 BTRFS_UUID_SIZE); 5832 BTRFS_UUID_SIZE);
5509 5833
5510 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { 5834 if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
@@ -5519,8 +5843,7 @@ static int read_one_dev(struct btrfs_root *root,
5519 return -EIO; 5843 return -EIO;
5520 5844
5521 if (!device) { 5845 if (!device) {
5522 btrfs_warn(root->fs_info, "devid %llu missing", 5846 btrfs_warn(root->fs_info, "devid %llu missing", devid);
5523 (unsigned long long)devid);
5524 device = add_missing_dev(root, devid, dev_uuid); 5847 device = add_missing_dev(root, devid, dev_uuid);
5525 if (!device) 5848 if (!device)
5526 return -ENOMEM; 5849 return -ENOMEM;
@@ -5644,14 +5967,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
5644 mutex_lock(&uuid_mutex); 5967 mutex_lock(&uuid_mutex);
5645 lock_chunks(root); 5968 lock_chunks(root);
5646 5969
5647 /* first we search for all of the device items, and then we 5970 /*
5648 * read in all of the chunk items. This way we can create chunk 5971 * Read all device items, and then all the chunk items. All
5649 * mappings that reference all of the devices that are afound 5972 * device items are found before any chunk item (their object id
5973 * is smaller than the lowest possible object id for a chunk
5974 * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
5650 */ 5975 */
5651 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 5976 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
5652 key.offset = 0; 5977 key.offset = 0;
5653 key.type = 0; 5978 key.type = 0;
5654again:
5655 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5979 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5656 if (ret < 0) 5980 if (ret < 0)
5657 goto error; 5981 goto error;
@@ -5667,17 +5991,13 @@ again:
5667 break; 5991 break;
5668 } 5992 }
5669 btrfs_item_key_to_cpu(leaf, &found_key, slot); 5993 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5670 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { 5994 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
5671 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) 5995 struct btrfs_dev_item *dev_item;
5672 break; 5996 dev_item = btrfs_item_ptr(leaf, slot,
5673 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
5674 struct btrfs_dev_item *dev_item;
5675 dev_item = btrfs_item_ptr(leaf, slot,
5676 struct btrfs_dev_item); 5997 struct btrfs_dev_item);
5677 ret = read_one_dev(root, leaf, dev_item); 5998 ret = read_one_dev(root, leaf, dev_item);
5678 if (ret) 5999 if (ret)
5679 goto error; 6000 goto error;
5680 }
5681 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { 6001 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
5682 struct btrfs_chunk *chunk; 6002 struct btrfs_chunk *chunk;
5683 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 6003 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -5687,11 +6007,6 @@ again:
5687 } 6007 }
5688 path->slots[0]++; 6008 path->slots[0]++;
5689 } 6009 }
5690 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
5691 key.objectid = 0;
5692 btrfs_release_path(path);
5693 goto again;
5694 }
5695 ret = 0; 6010 ret = 0;
5696error: 6011error:
5697 unlock_chunks(root); 6012 unlock_chunks(root);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 86705583480d..b72f540c8b29 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -152,6 +152,8 @@ struct btrfs_fs_devices {
152 int rotating; 152 int rotating;
153}; 153};
154 154
155#define BTRFS_BIO_INLINE_CSUM_SIZE 64
156
155/* 157/*
156 * we need the mirror number and stripe index to be passed around 158 * we need the mirror number and stripe index to be passed around
157 * the call chain while we are processing end_io (especially errors). 159 * the call chain while we are processing end_io (especially errors).
@@ -161,9 +163,14 @@ struct btrfs_fs_devices {
161 * we allocate are actually btrfs_io_bios. We'll cram as much of 163 * we allocate are actually btrfs_io_bios. We'll cram as much of
162 * struct btrfs_bio as we can into this over time. 164 * struct btrfs_bio as we can into this over time.
163 */ 165 */
166typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err);
164struct btrfs_io_bio { 167struct btrfs_io_bio {
165 unsigned long mirror_num; 168 unsigned long mirror_num;
166 unsigned long stripe_index; 169 unsigned long stripe_index;
170 u8 *csum;
171 u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
172 u8 *csum_allocated;
173 btrfs_io_bio_end_io_t *end_io;
167 struct bio bio; 174 struct bio bio;
168}; 175};
169 176
@@ -298,6 +305,9 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
298int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, 305int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
299 char *device_path, 306 char *device_path,
300 struct btrfs_device **device); 307 struct btrfs_device **device);
308struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
309 const u64 *devid,
310 const u8 *uuid);
301int btrfs_rm_device(struct btrfs_root *root, char *device_path); 311int btrfs_rm_device(struct btrfs_root *root, char *device_path);
302void btrfs_cleanup_fs_uuids(void); 312void btrfs_cleanup_fs_uuids(void);
303int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); 313int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
@@ -315,6 +325,8 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
315int btrfs_recover_balance(struct btrfs_fs_info *fs_info); 325int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
316int btrfs_pause_balance(struct btrfs_fs_info *fs_info); 326int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
317int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); 327int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
328int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
329int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
318int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 330int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
319int find_free_dev_extent(struct btrfs_trans_handle *trans, 331int find_free_dev_extent(struct btrfs_trans_handle *trans,
320 struct btrfs_device *device, u64 num_bytes, 332 struct btrfs_device *device, u64 num_bytes,
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 2902657ba766..45702c3c3837 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -439,7 +439,7 @@ TRACE_EVENT(btrfs_sync_fs,
439 { BTRFS_UPDATE_DELAYED_HEAD, "UPDATE_DELAYED_HEAD" }) 439 { BTRFS_UPDATE_DELAYED_HEAD, "UPDATE_DELAYED_HEAD" })
440 440
441 441
442TRACE_EVENT(btrfs_delayed_tree_ref, 442DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref,
443 443
444 TP_PROTO(struct btrfs_delayed_ref_node *ref, 444 TP_PROTO(struct btrfs_delayed_ref_node *ref,
445 struct btrfs_delayed_tree_ref *full_ref, 445 struct btrfs_delayed_tree_ref *full_ref,
@@ -481,7 +481,25 @@ TRACE_EVENT(btrfs_delayed_tree_ref,
481 (unsigned long long)__entry->seq) 481 (unsigned long long)__entry->seq)
482); 482);
483 483
484TRACE_EVENT(btrfs_delayed_data_ref, 484DEFINE_EVENT(btrfs_delayed_tree_ref, add_delayed_tree_ref,
485
486 TP_PROTO(struct btrfs_delayed_ref_node *ref,
487 struct btrfs_delayed_tree_ref *full_ref,
488 int action),
489
490 TP_ARGS(ref, full_ref, action)
491);
492
493DEFINE_EVENT(btrfs_delayed_tree_ref, run_delayed_tree_ref,
494
495 TP_PROTO(struct btrfs_delayed_ref_node *ref,
496 struct btrfs_delayed_tree_ref *full_ref,
497 int action),
498
499 TP_ARGS(ref, full_ref, action)
500);
501
502DECLARE_EVENT_CLASS(btrfs_delayed_data_ref,
485 503
486 TP_PROTO(struct btrfs_delayed_ref_node *ref, 504 TP_PROTO(struct btrfs_delayed_ref_node *ref,
487 struct btrfs_delayed_data_ref *full_ref, 505 struct btrfs_delayed_data_ref *full_ref,
@@ -527,7 +545,25 @@ TRACE_EVENT(btrfs_delayed_data_ref,
527 (unsigned long long)__entry->seq) 545 (unsigned long long)__entry->seq)
528); 546);
529 547
530TRACE_EVENT(btrfs_delayed_ref_head, 548DEFINE_EVENT(btrfs_delayed_data_ref, add_delayed_data_ref,
549
550 TP_PROTO(struct btrfs_delayed_ref_node *ref,
551 struct btrfs_delayed_data_ref *full_ref,
552 int action),
553
554 TP_ARGS(ref, full_ref, action)
555);
556
557DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref,
558
559 TP_PROTO(struct btrfs_delayed_ref_node *ref,
560 struct btrfs_delayed_data_ref *full_ref,
561 int action),
562
563 TP_ARGS(ref, full_ref, action)
564);
565
566DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
531 567
532 TP_PROTO(struct btrfs_delayed_ref_node *ref, 568 TP_PROTO(struct btrfs_delayed_ref_node *ref,
533 struct btrfs_delayed_ref_head *head_ref, 569 struct btrfs_delayed_ref_head *head_ref,
@@ -556,6 +592,24 @@ TRACE_EVENT(btrfs_delayed_ref_head,
556 __entry->is_data) 592 __entry->is_data)
557); 593);
558 594
595DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head,
596
597 TP_PROTO(struct btrfs_delayed_ref_node *ref,
598 struct btrfs_delayed_ref_head *head_ref,
599 int action),
600
601 TP_ARGS(ref, head_ref, action)
602);
603
604DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head,
605
606 TP_PROTO(struct btrfs_delayed_ref_node *ref,
607 struct btrfs_delayed_ref_head *head_ref,
608 int action),
609
610 TP_ARGS(ref, head_ref, action)
611);
612
559#define show_chunk_type(type) \ 613#define show_chunk_type(type) \
560 __print_flags(type, "|", \ 614 __print_flags(type, "|", \
561 { BTRFS_BLOCK_GROUP_DATA, "DATA" }, \ 615 { BTRFS_BLOCK_GROUP_DATA, "DATA" }, \
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 05aed70627e2..45e618921c61 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -305,6 +305,31 @@ struct btrfs_ioctl_clone_range_args {
305#define BTRFS_DEFRAG_RANGE_COMPRESS 1 305#define BTRFS_DEFRAG_RANGE_COMPRESS 1
306#define BTRFS_DEFRAG_RANGE_START_IO 2 306#define BTRFS_DEFRAG_RANGE_START_IO 2
307 307
308#define BTRFS_SAME_DATA_DIFFERS 1
309/* For extent-same ioctl */
310struct btrfs_ioctl_same_extent_info {
311 __s64 fd; /* in - destination file */
312 __u64 logical_offset; /* in - start of extent in destination */
313 __u64 bytes_deduped; /* out - total # of bytes we were able
314 * to dedupe from this file */
315 /* status of this dedupe operation:
316 * 0 if dedup succeeds
317 * < 0 for error
318 * == BTRFS_SAME_DATA_DIFFERS if data differs
319 */
320 __s32 status; /* out - see above description */
321 __u32 reserved;
322};
323
324struct btrfs_ioctl_same_args {
325 __u64 logical_offset; /* in - start of extent in source */
326 __u64 length; /* in - length of extent */
327 __u16 dest_count; /* in - total elements in info array */
328 __u16 reserved1;
329 __u32 reserved2;
330 struct btrfs_ioctl_same_extent_info info[0];
331};
332
308struct btrfs_ioctl_space_info { 333struct btrfs_ioctl_space_info {
309 __u64 flags; 334 __u64 flags;
310 __u64 total_bytes; 335 __u64 total_bytes;
@@ -524,7 +549,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
524 struct btrfs_ioctl_search_args) 549 struct btrfs_ioctl_search_args)
525#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ 550#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
526 struct btrfs_ioctl_ino_lookup_args) 551 struct btrfs_ioctl_ino_lookup_args)
527#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) 552#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)
528#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ 553#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
529 struct btrfs_ioctl_space_args) 554 struct btrfs_ioctl_space_args)
530#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) 555#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
@@ -579,4 +604,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
579 struct btrfs_ioctl_get_dev_stats) 604 struct btrfs_ioctl_get_dev_stats)
580#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ 605#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
581 struct btrfs_ioctl_dev_replace_args) 606 struct btrfs_ioctl_dev_replace_args)
607#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
608 struct btrfs_ioctl_same_args)
609
582#endif /* _UAPI_LINUX_BTRFS_H */ 610#endif /* _UAPI_LINUX_BTRFS_H */