aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/Kconfig22
-rw-r--r--fs/btrfs/backref.c87
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c382
-rw-r--r--fs/btrfs/ctree.h145
-rw-r--r--fs/btrfs/delayed-inode.c66
-rw-r--r--fs/btrfs/delayed-ref.c30
-rw-r--r--fs/btrfs/dir-item.c11
-rw-r--r--fs/btrfs/disk-io.c409
-rw-r--r--fs/btrfs/disk-io.h5
-rw-r--r--fs/btrfs/extent-tree.c549
-rw-r--r--fs/btrfs/extent_io.c310
-rw-r--r--fs/btrfs/extent_io.h44
-rw-r--r--fs/btrfs/extent_map.c23
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/file-item.c102
-rw-r--r--fs/btrfs/file.c37
-rw-r--r--fs/btrfs/free-space-cache.c596
-rw-r--r--fs/btrfs/free-space-cache.h5
-rw-r--r--fs/btrfs/inode-item.c17
-rw-r--r--fs/btrfs/inode.c183
-rw-r--r--fs/btrfs/ioctl.c108
-rw-r--r--fs/btrfs/locking.c4
-rw-r--r--fs/btrfs/ordered-data.c28
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/print-tree.c9
-rw-r--r--fs/btrfs/print-tree.h2
-rw-r--r--fs/btrfs/qgroup.c840
-rw-r--r--fs/btrfs/raid56.c14
-rw-r--r--fs/btrfs/reada.c5
-rw-r--r--fs/btrfs/relocation.c111
-rw-r--r--fs/btrfs/root-tree.c7
-rw-r--r--fs/btrfs/scrub.c130
-rw-r--r--fs/btrfs/send.c32
-rw-r--r--fs/btrfs/send.h1
-rw-r--r--fs/btrfs/super.c107
-rw-r--r--fs/btrfs/transaction.c95
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c390
-rw-r--r--fs/btrfs/tree-log.h3
-rw-r--r--fs/btrfs/ulist.c58
-rw-r--r--fs/btrfs/ulist.h6
-rw-r--r--fs/btrfs/volumes.c97
-rw-r--r--fs/btrfs/volumes.h13
-rw-r--r--fs/btrfs/xattr.c4
48 files changed, 3215 insertions, 1902 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 9a8622a5b867..2b3b83296977 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,5 +1,5 @@
1config BTRFS_FS 1config BTRFS_FS
2 tristate "Btrfs filesystem Unstable disk format" 2 tristate "Btrfs filesystem support"
3 select LIBCRC32C 3 select LIBCRC32C
4 select ZLIB_INFLATE 4 select ZLIB_INFLATE
5 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
@@ -52,3 +52,23 @@ config BTRFS_FS_CHECK_INTEGRITY
52 In most cases, unless you are a btrfs developer who needs 52 In most cases, unless you are a btrfs developer who needs
53 to verify the integrity of (super)-block write requests 53 to verify the integrity of (super)-block write requests
54 during the run of a regression test, say N 54 during the run of a regression test, say N
55
56config BTRFS_FS_RUN_SANITY_TESTS
57 bool "Btrfs will run sanity tests upon loading"
58 depends on BTRFS_FS
59 help
60 This will run some basic sanity tests on the free space cache
61 code to make sure it is acting as it should. These are mostly
62 regression tests and are only really interesting to btrfs devlopers.
63
64 If unsure, say N.
65
66config BTRFS_DEBUG
67 bool "Btrfs debugging support"
68 depends on BTRFS_FS
69 help
70 Enable run-time debugging support for the btrfs filesystem. This may
71 enable additional and expensive checks with negative impact on
72 performance, or export extra information via sysfs.
73
74 If unsure, say N.
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index bd605c87adfd..b4fb41558111 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -352,6 +352,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
352 err = __resolve_indirect_ref(fs_info, search_commit_root, 352 err = __resolve_indirect_ref(fs_info, search_commit_root,
353 time_seq, ref, parents, 353 time_seq, ref, parents,
354 extent_item_pos); 354 extent_item_pos);
355 if (err == -ENOMEM)
356 goto out;
355 if (err) 357 if (err)
356 continue; 358 continue;
357 359
@@ -367,7 +369,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
367 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); 369 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
368 if (!new_ref) { 370 if (!new_ref) {
369 ret = -ENOMEM; 371 ret = -ENOMEM;
370 break; 372 goto out;
371 } 373 }
372 memcpy(new_ref, ref, sizeof(*ref)); 374 memcpy(new_ref, ref, sizeof(*ref));
373 new_ref->parent = node->val; 375 new_ref->parent = node->val;
@@ -377,7 +379,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
377 } 379 }
378 ulist_reinit(parents); 380 ulist_reinit(parents);
379 } 381 }
380 382out:
381 ulist_free(parents); 383 ulist_free(parents);
382 return ret; 384 return ret;
383} 385}
@@ -421,7 +423,10 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
421 BUG_ON(!ref->wanted_disk_byte); 423 BUG_ON(!ref->wanted_disk_byte);
422 eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, 424 eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
423 fs_info->tree_root->leafsize, 0); 425 fs_info->tree_root->leafsize, 0);
424 BUG_ON(!eb); 426 if (!eb || !extent_buffer_uptodate(eb)) {
427 free_extent_buffer(eb);
428 return -EIO;
429 }
425 btrfs_tree_read_lock(eb); 430 btrfs_tree_read_lock(eb);
426 if (btrfs_header_level(eb) == 0) 431 if (btrfs_header_level(eb) == 0)
427 btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); 432 btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
@@ -443,7 +448,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
443 * having a parent). 448 * having a parent).
444 * mode = 2: merge identical parents 449 * mode = 2: merge identical parents
445 */ 450 */
446static int __merge_refs(struct list_head *head, int mode) 451static void __merge_refs(struct list_head *head, int mode)
447{ 452{
448 struct list_head *pos1; 453 struct list_head *pos1;
449 454
@@ -489,7 +494,6 @@ static int __merge_refs(struct list_head *head, int mode)
489 } 494 }
490 495
491 } 496 }
492 return 0;
493} 497}
494 498
495/* 499/*
@@ -582,7 +586,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
582 default: 586 default:
583 WARN_ON(1); 587 WARN_ON(1);
584 } 588 }
585 BUG_ON(ret); 589 if (ret)
590 return ret;
586 } 591 }
587 592
588 return 0; 593 return 0;
@@ -680,7 +685,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
680 default: 685 default:
681 WARN_ON(1); 686 WARN_ON(1);
682 } 687 }
683 BUG_ON(ret); 688 if (ret)
689 return ret;
684 ptr += btrfs_extent_inline_ref_size(type); 690 ptr += btrfs_extent_inline_ref_size(type);
685 } 691 }
686 692
@@ -762,7 +768,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
762 default: 768 default:
763 WARN_ON(1); 769 WARN_ON(1);
764 } 770 }
765 BUG_ON(ret); 771 if (ret)
772 return ret;
773
766 } 774 }
767 775
768 return ret; 776 return ret;
@@ -880,18 +888,14 @@ again:
880 if (ret) 888 if (ret)
881 goto out; 889 goto out;
882 890
883 ret = __merge_refs(&prefs, 1); 891 __merge_refs(&prefs, 1);
884 if (ret)
885 goto out;
886 892
887 ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, 893 ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq,
888 &prefs, extent_item_pos); 894 &prefs, extent_item_pos);
889 if (ret) 895 if (ret)
890 goto out; 896 goto out;
891 897
892 ret = __merge_refs(&prefs, 2); 898 __merge_refs(&prefs, 2);
893 if (ret)
894 goto out;
895 899
896 while (!list_empty(&prefs)) { 900 while (!list_empty(&prefs)) {
897 ref = list_first_entry(&prefs, struct __prelim_ref, list); 901 ref = list_first_entry(&prefs, struct __prelim_ref, list);
@@ -900,7 +904,8 @@ again:
900 if (ref->count && ref->root_id && ref->parent == 0) { 904 if (ref->count && ref->root_id && ref->parent == 0) {
901 /* no parent == root of tree */ 905 /* no parent == root of tree */
902 ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); 906 ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
903 BUG_ON(ret < 0); 907 if (ret < 0)
908 goto out;
904 } 909 }
905 if (ref->count && ref->parent) { 910 if (ref->count && ref->parent) {
906 struct extent_inode_elem *eie = NULL; 911 struct extent_inode_elem *eie = NULL;
@@ -911,7 +916,10 @@ again:
911 info_level); 916 info_level);
912 eb = read_tree_block(fs_info->extent_root, 917 eb = read_tree_block(fs_info->extent_root,
913 ref->parent, bsz, 0); 918 ref->parent, bsz, 0);
914 BUG_ON(!eb); 919 if (!eb || !extent_buffer_uptodate(eb)) {
920 free_extent_buffer(eb);
921 return -EIO;
922 }
915 ret = find_extent_in_eb(eb, bytenr, 923 ret = find_extent_in_eb(eb, bytenr,
916 *extent_item_pos, &eie); 924 *extent_item_pos, &eie);
917 ref->inode_list = eie; 925 ref->inode_list = eie;
@@ -920,6 +928,8 @@ again:
920 ret = ulist_add_merge(refs, ref->parent, 928 ret = ulist_add_merge(refs, ref->parent,
921 (uintptr_t)ref->inode_list, 929 (uintptr_t)ref->inode_list,
922 (u64 *)&eie, GFP_NOFS); 930 (u64 *)&eie, GFP_NOFS);
931 if (ret < 0)
932 goto out;
923 if (!ret && extent_item_pos) { 933 if (!ret && extent_item_pos) {
924 /* 934 /*
925 * we've recorded that parent, so we must extend 935 * we've recorded that parent, so we must extend
@@ -930,7 +940,6 @@ again:
930 eie = eie->next; 940 eie = eie->next;
931 eie->next = ref->inode_list; 941 eie->next = ref->inode_list;
932 } 942 }
933 BUG_ON(ret < 0);
934 } 943 }
935 kfree(ref); 944 kfree(ref);
936 } 945 }
@@ -1180,6 +1189,20 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
1180 return ret; 1189 return ret;
1181} 1190}
1182 1191
1192/*
1193 * this iterates to turn a name (from iref/extref) into a full filesystem path.
1194 * Elements of the path are separated by '/' and the path is guaranteed to be
1195 * 0-terminated. the path is only given within the current file system.
1196 * Therefore, it never starts with a '/'. the caller is responsible to provide
1197 * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
1198 * the start point of the resulting string is returned. this pointer is within
1199 * dest, normally.
1200 * in case the path buffer would overflow, the pointer is decremented further
1201 * as if output was written to the buffer, though no more output is actually
1202 * generated. that way, the caller can determine how much space would be
1203 * required for the path to fit into the buffer. in that case, the returned
1204 * value will be smaller than dest. callers must check this!
1205 */
1183char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 1206char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1184 u32 name_len, unsigned long name_off, 1207 u32 name_len, unsigned long name_off,
1185 struct extent_buffer *eb_in, u64 parent, 1208 struct extent_buffer *eb_in, u64 parent,
@@ -1249,32 +1272,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1249} 1272}
1250 1273
1251/* 1274/*
1252 * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements
1253 * of the path are separated by '/' and the path is guaranteed to be
1254 * 0-terminated. the path is only given within the current file system.
1255 * Therefore, it never starts with a '/'. the caller is responsible to provide
1256 * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
1257 * the start point of the resulting string is returned. this pointer is within
1258 * dest, normally.
1259 * in case the path buffer would overflow, the pointer is decremented further
1260 * as if output was written to the buffer, though no more output is actually
1261 * generated. that way, the caller can determine how much space would be
1262 * required for the path to fit into the buffer. in that case, the returned
1263 * value will be smaller than dest. callers must check this!
1264 */
1265char *btrfs_iref_to_path(struct btrfs_root *fs_root,
1266 struct btrfs_path *path,
1267 struct btrfs_inode_ref *iref,
1268 struct extent_buffer *eb_in, u64 parent,
1269 char *dest, u32 size)
1270{
1271 return btrfs_ref_to_path(fs_root, path,
1272 btrfs_inode_ref_name_len(eb_in, iref),
1273 (unsigned long)(iref + 1),
1274 eb_in, parent, dest, size);
1275}
1276
1277/*
1278 * this makes the path point to (logical EXTENT_ITEM *) 1275 * this makes the path point to (logical EXTENT_ITEM *)
1279 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for 1276 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
1280 * tree blocks and <0 on error. 1277 * tree blocks and <0 on error.
@@ -1461,8 +1458,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1461 iterate_extent_inodes_t *iterate, void *ctx) 1458 iterate_extent_inodes_t *iterate, void *ctx)
1462{ 1459{
1463 int ret; 1460 int ret;
1464 struct list_head data_refs = LIST_HEAD_INIT(data_refs);
1465 struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
1466 struct btrfs_trans_handle *trans; 1461 struct btrfs_trans_handle *trans;
1467 struct ulist *refs = NULL; 1462 struct ulist *refs = NULL;
1468 struct ulist *roots = NULL; 1463 struct ulist *roots = NULL;
@@ -1508,11 +1503,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1508 iterate, ctx); 1503 iterate, ctx);
1509 } 1504 }
1510 ulist_free(roots); 1505 ulist_free(roots);
1511 roots = NULL;
1512 } 1506 }
1513 1507
1514 free_leaf_list(refs); 1508 free_leaf_list(refs);
1515 ulist_free(roots);
1516out: 1509out:
1517 if (!search_commit_root) { 1510 if (!search_commit_root) {
1518 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1511 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 310a7f6d09b1..0f446d7ca2c0 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -59,9 +59,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
59int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 59int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
60 struct btrfs_fs_info *fs_info, u64 bytenr, 60 struct btrfs_fs_info *fs_info, u64 bytenr,
61 u64 time_seq, struct ulist **roots); 61 u64 time_seq, struct ulist **roots);
62char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
63 struct btrfs_inode_ref *iref, struct extent_buffer *eb,
64 u64 parent, char *dest, u32 size);
65char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 62char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
66 u32 name_len, unsigned long name_off, 63 u32 name_len, unsigned long name_off,
67 struct extent_buffer *eb_in, u64 parent, 64 struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d9b97d4960e6..08b286b2a2c5 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -93,7 +93,7 @@ struct btrfs_inode {
93 93
94 unsigned long runtime_flags; 94 unsigned long runtime_flags;
95 95
96 /* Keep track of who's O_SYNC/fsycing currently */ 96 /* Keep track of who's O_SYNC/fsyncing currently */
97 atomic_t sync_writers; 97 atomic_t sync_writers;
98 98
99 /* full 64 bit generation number, struct vfs_inode doesn't have a big 99 /* full 64 bit generation number, struct vfs_inode doesn't have a big
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 15b94089abc4..b189bd1e7a3e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -82,6 +82,10 @@ struct compressed_bio {
82 u32 sums; 82 u32 sums;
83}; 83};
84 84
85static int btrfs_decompress_biovec(int type, struct page **pages_in,
86 u64 disk_start, struct bio_vec *bvec,
87 int vcnt, size_t srclen);
88
85static inline int compressed_bio_size(struct btrfs_root *root, 89static inline int compressed_bio_size(struct btrfs_root *root,
86 unsigned long disk_size) 90 unsigned long disk_size)
87{ 91{
@@ -106,7 +110,6 @@ static int check_compressed_csum(struct inode *inode,
106 u64 disk_start) 110 u64 disk_start)
107{ 111{
108 int ret; 112 int ret;
109 struct btrfs_root *root = BTRFS_I(inode)->root;
110 struct page *page; 113 struct page *page;
111 unsigned long i; 114 unsigned long i;
112 char *kaddr; 115 char *kaddr;
@@ -121,7 +124,7 @@ static int check_compressed_csum(struct inode *inode,
121 csum = ~(u32)0; 124 csum = ~(u32)0;
122 125
123 kaddr = kmap_atomic(page); 126 kaddr = kmap_atomic(page);
124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE); 127 csum = btrfs_csum_data(kaddr, csum, PAGE_CACHE_SIZE);
125 btrfs_csum_final(csum, (char *)&csum); 128 btrfs_csum_final(csum, (char *)&csum);
126 kunmap_atomic(kaddr); 129 kunmap_atomic(kaddr);
127 130
@@ -739,7 +742,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
739static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; 742static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
740static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; 743static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
741 744
742struct btrfs_compress_op *btrfs_compress_op[] = { 745static struct btrfs_compress_op *btrfs_compress_op[] = {
743 &btrfs_zlib_compress, 746 &btrfs_zlib_compress,
744 &btrfs_lzo_compress, 747 &btrfs_lzo_compress,
745}; 748};
@@ -910,8 +913,9 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
910 * be contiguous. They all correspond to the range of bytes covered by 913 * be contiguous. They all correspond to the range of bytes covered by
911 * the compressed extent. 914 * the compressed extent.
912 */ 915 */
913int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, 916static int btrfs_decompress_biovec(int type, struct page **pages_in,
914 struct bio_vec *bvec, int vcnt, size_t srclen) 917 u64 disk_start, struct bio_vec *bvec,
918 int vcnt, size_t srclen)
915{ 919{
916 struct list_head *workspace; 920 struct list_head *workspace;
917 int ret; 921 int ret;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 9afb0a62ae82..0c803b4fbf93 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -30,8 +30,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
30 unsigned long *total_in, 30 unsigned long *total_in,
31 unsigned long *total_out, 31 unsigned long *total_out,
32 unsigned long max_out); 32 unsigned long max_out);
33int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
34 struct bio_vec *bvec, int vcnt, size_t srclen);
35int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, 33int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
36 unsigned long start_byte, size_t srclen, size_t destlen); 34 unsigned long start_byte, size_t srclen, size_t destlen);
37int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 35int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ca9d8f1a3bb6..de6de8e60b46 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,16 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 37 struct btrfs_root *root,
38 struct extent_buffer *dst_buf, 38 struct extent_buffer *dst_buf,
39 struct extent_buffer *src_buf); 39 struct extent_buffer *src_buf);
40static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 40static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
41 struct btrfs_path *path, int level, int slot); 41 int level, int slot);
42static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, 42static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
43 struct extent_buffer *eb); 43 struct extent_buffer *eb);
44struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, 44static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
45 u32 blocksize, u64 parent_transid,
46 u64 time_seq);
47struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root,
48 u64 bytenr, u32 blocksize,
49 u64 time_seq);
50 45
51struct btrfs_path *btrfs_alloc_path(void) 46struct btrfs_path *btrfs_alloc_path(void)
52{ 47{
@@ -208,7 +203,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
208 * tree until you end up with a lock on the root. A locked buffer 203 * tree until you end up with a lock on the root. A locked buffer
209 * is returned, with a reference held. 204 * is returned, with a reference held.
210 */ 205 */
211struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) 206static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
212{ 207{
213 struct extent_buffer *eb; 208 struct extent_buffer *eb;
214 209
@@ -361,6 +356,44 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
361} 356}
362 357
363/* 358/*
359 * Increment the upper half of tree_mod_seq, set lower half zero.
360 *
361 * Must be called with fs_info->tree_mod_seq_lock held.
362 */
363static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
364{
365 u64 seq = atomic64_read(&fs_info->tree_mod_seq);
366 seq &= 0xffffffff00000000ull;
367 seq += 1ull << 32;
368 atomic64_set(&fs_info->tree_mod_seq, seq);
369 return seq;
370}
371
372/*
373 * Increment the lower half of tree_mod_seq.
374 *
375 * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
376 * are generated should not technically require a spin lock here. (Rationale:
377 * incrementing the minor while incrementing the major seq number is between its
378 * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
379 * just returns a unique sequence number as usual.) We have decided to leave
380 * that requirement in here and rethink it once we notice it really imposes a
381 * problem on some workload.
382 */
383static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
384{
385 return atomic64_inc_return(&fs_info->tree_mod_seq);
386}
387
388/*
389 * return the last minor in the previous major tree_mod_seq number
390 */
391u64 btrfs_tree_mod_seq_prev(u64 seq)
392{
393 return (seq & 0xffffffff00000000ull) - 1ull;
394}
395
396/*
364 * This adds a new blocker to the tree mod log's blocker list if the @elem 397 * This adds a new blocker to the tree mod log's blocker list if the @elem
365 * passed does not already have a sequence number set. So when a caller expects 398 * passed does not already have a sequence number set. So when a caller expects
366 * to record tree modifications, it should ensure to set elem->seq to zero 399 * to record tree modifications, it should ensure to set elem->seq to zero
@@ -376,10 +409,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
376 tree_mod_log_write_lock(fs_info); 409 tree_mod_log_write_lock(fs_info);
377 spin_lock(&fs_info->tree_mod_seq_lock); 410 spin_lock(&fs_info->tree_mod_seq_lock);
378 if (!elem->seq) { 411 if (!elem->seq) {
379 elem->seq = btrfs_inc_tree_mod_seq(fs_info); 412 elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
380 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); 413 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
381 } 414 }
382 seq = btrfs_inc_tree_mod_seq(fs_info); 415 seq = btrfs_inc_tree_mod_seq_minor(fs_info);
383 spin_unlock(&fs_info->tree_mod_seq_lock); 416 spin_unlock(&fs_info->tree_mod_seq_lock);
384 tree_mod_log_write_unlock(fs_info); 417 tree_mod_log_write_unlock(fs_info);
385 418
@@ -524,7 +557,10 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
524 if (!tm) 557 if (!tm)
525 return -ENOMEM; 558 return -ENOMEM;
526 559
527 tm->seq = btrfs_inc_tree_mod_seq(fs_info); 560 spin_lock(&fs_info->tree_mod_seq_lock);
561 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
562 spin_unlock(&fs_info->tree_mod_seq_lock);
563
528 return tm->seq; 564 return tm->seq;
529} 565}
530 566
@@ -643,7 +679,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
643static noinline int 679static noinline int
644tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, 680tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
645 struct extent_buffer *old_root, 681 struct extent_buffer *old_root,
646 struct extent_buffer *new_root, gfp_t flags) 682 struct extent_buffer *new_root, gfp_t flags,
683 int log_removal)
647{ 684{
648 struct tree_mod_elem *tm; 685 struct tree_mod_elem *tm;
649 int ret; 686 int ret;
@@ -651,7 +688,8 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
651 if (tree_mod_dont_log(fs_info, NULL)) 688 if (tree_mod_dont_log(fs_info, NULL))
652 return 0; 689 return 0;
653 690
654 __tree_mod_log_free_eb(fs_info, old_root); 691 if (log_removal)
692 __tree_mod_log_free_eb(fs_info, old_root);
655 693
656 ret = tree_mod_alloc(fs_info, flags, &tm); 694 ret = tree_mod_alloc(fs_info, flags, &tm);
657 if (ret < 0) 695 if (ret < 0)
@@ -738,7 +776,7 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
738static noinline void 776static noinline void
739tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, 777tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
740 struct extent_buffer *src, unsigned long dst_offset, 778 struct extent_buffer *src, unsigned long dst_offset,
741 unsigned long src_offset, int nr_items, int log_removal) 779 unsigned long src_offset, int nr_items)
742{ 780{
743 int ret; 781 int ret;
744 int i; 782 int i;
@@ -752,12 +790,10 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
752 } 790 }
753 791
754 for (i = 0; i < nr_items; i++) { 792 for (i = 0; i < nr_items; i++) {
755 if (log_removal) { 793 ret = tree_mod_log_insert_key_locked(fs_info, src,
756 ret = tree_mod_log_insert_key_locked(fs_info, src, 794 i + src_offset,
757 i + src_offset, 795 MOD_LOG_KEY_REMOVE);
758 MOD_LOG_KEY_REMOVE); 796 BUG_ON(ret < 0);
759 BUG_ON(ret < 0);
760 }
761 ret = tree_mod_log_insert_key_locked(fs_info, dst, 797 ret = tree_mod_log_insert_key_locked(fs_info, dst,
762 i + dst_offset, 798 i + dst_offset,
763 MOD_LOG_KEY_ADD); 799 MOD_LOG_KEY_ADD);
@@ -802,11 +838,12 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
802 838
803static noinline void 839static noinline void
804tree_mod_log_set_root_pointer(struct btrfs_root *root, 840tree_mod_log_set_root_pointer(struct btrfs_root *root,
805 struct extent_buffer *new_root_node) 841 struct extent_buffer *new_root_node,
842 int log_removal)
806{ 843{
807 int ret; 844 int ret;
808 ret = tree_mod_log_insert_root(root->fs_info, root->node, 845 ret = tree_mod_log_insert_root(root->fs_info, root->node,
809 new_root_node, GFP_NOFS); 846 new_root_node, GFP_NOFS, log_removal);
810 BUG_ON(ret < 0); 847 BUG_ON(ret < 0);
811} 848}
812 849
@@ -867,7 +904,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
867 904
868 if (btrfs_block_can_be_shared(root, buf)) { 905 if (btrfs_block_can_be_shared(root, buf)) {
869 ret = btrfs_lookup_extent_info(trans, root, buf->start, 906 ret = btrfs_lookup_extent_info(trans, root, buf->start,
870 buf->len, &refs, &flags); 907 btrfs_header_level(buf), 1,
908 &refs, &flags);
871 if (ret) 909 if (ret)
872 return ret; 910 return ret;
873 if (refs == 0) { 911 if (refs == 0) {
@@ -1028,7 +1066,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1028 parent_start = 0; 1066 parent_start = 0;
1029 1067
1030 extent_buffer_get(cow); 1068 extent_buffer_get(cow);
1031 tree_mod_log_set_root_pointer(root, cow); 1069 tree_mod_log_set_root_pointer(root, cow, 1);
1032 rcu_assign_pointer(root->node, cow); 1070 rcu_assign_pointer(root->node, cow);
1033 1071
1034 btrfs_free_tree_block(trans, root, buf, parent_start, 1072 btrfs_free_tree_block(trans, root, buf, parent_start,
@@ -1067,11 +1105,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1067 */ 1105 */
1068static struct tree_mod_elem * 1106static struct tree_mod_elem *
1069__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, 1107__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1070 struct btrfs_root *root, u64 time_seq) 1108 struct extent_buffer *eb_root, u64 time_seq)
1071{ 1109{
1072 struct tree_mod_elem *tm; 1110 struct tree_mod_elem *tm;
1073 struct tree_mod_elem *found = NULL; 1111 struct tree_mod_elem *found = NULL;
1074 u64 root_logical = root->node->start; 1112 u64 root_logical = eb_root->start;
1075 int looped = 0; 1113 int looped = 0;
1076 1114
1077 if (!time_seq) 1115 if (!time_seq)
@@ -1105,7 +1143,6 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1105 1143
1106 found = tm; 1144 found = tm;
1107 root_logical = tm->old_root.logical; 1145 root_logical = tm->old_root.logical;
1108 BUG_ON(root_logical == root->node->start);
1109 looped = 1; 1146 looped = 1;
1110 } 1147 }
1111 1148
@@ -1190,6 +1227,13 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1190 btrfs_set_header_nritems(eb, n); 1227 btrfs_set_header_nritems(eb, n);
1191} 1228}
1192 1229
1230/*
1231 * Called with eb read locked. If the buffer cannot be rewinded, the same buffer
1232 * is returned. If rewind operations happen, a fresh buffer is returned. The
1233 * returned buffer is always read-locked. If the returned buffer is not the
1234 * input buffer, the lock on the input buffer is released and the input buffer
1235 * is freed (its refcount is decremented).
1236 */
1193static struct extent_buffer * 1237static struct extent_buffer *
1194tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, 1238tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1195 u64 time_seq) 1239 u64 time_seq)
@@ -1223,8 +1267,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1223 } 1267 }
1224 1268
1225 extent_buffer_get(eb_rewin); 1269 extent_buffer_get(eb_rewin);
1270 btrfs_tree_read_unlock(eb);
1226 free_extent_buffer(eb); 1271 free_extent_buffer(eb);
1227 1272
1273 extent_buffer_get(eb_rewin);
1274 btrfs_tree_read_lock(eb_rewin);
1228 __tree_mod_log_rewind(eb_rewin, time_seq, tm); 1275 __tree_mod_log_rewind(eb_rewin, time_seq, tm);
1229 WARN_ON(btrfs_header_nritems(eb_rewin) > 1276 WARN_ON(btrfs_header_nritems(eb_rewin) >
1230 BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); 1277 BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
@@ -1243,33 +1290,35 @@ static inline struct extent_buffer *
1243get_old_root(struct btrfs_root *root, u64 time_seq) 1290get_old_root(struct btrfs_root *root, u64 time_seq)
1244{ 1291{
1245 struct tree_mod_elem *tm; 1292 struct tree_mod_elem *tm;
1246 struct extent_buffer *eb; 1293 struct extent_buffer *eb = NULL;
1294 struct extent_buffer *eb_root;
1247 struct extent_buffer *old; 1295 struct extent_buffer *old;
1248 struct tree_mod_root *old_root = NULL; 1296 struct tree_mod_root *old_root = NULL;
1249 u64 old_generation = 0; 1297 u64 old_generation = 0;
1250 u64 logical; 1298 u64 logical;
1251 u32 blocksize; 1299 u32 blocksize;
1252 1300
1253 eb = btrfs_read_lock_root_node(root); 1301 eb_root = btrfs_read_lock_root_node(root);
1254 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1302 tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
1255 if (!tm) 1303 if (!tm)
1256 return root->node; 1304 return eb_root;
1257 1305
1258 if (tm->op == MOD_LOG_ROOT_REPLACE) { 1306 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1259 old_root = &tm->old_root; 1307 old_root = &tm->old_root;
1260 old_generation = tm->generation; 1308 old_generation = tm->generation;
1261 logical = old_root->logical; 1309 logical = old_root->logical;
1262 } else { 1310 } else {
1263 logical = root->node->start; 1311 logical = eb_root->start;
1264 } 1312 }
1265 1313
1266 tm = tree_mod_log_search(root->fs_info, logical, time_seq); 1314 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1267 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 1315 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1268 btrfs_tree_read_unlock(root->node); 1316 btrfs_tree_read_unlock(eb_root);
1269 free_extent_buffer(root->node); 1317 free_extent_buffer(eb_root);
1270 blocksize = btrfs_level_size(root, old_root->level); 1318 blocksize = btrfs_level_size(root, old_root->level);
1271 old = read_tree_block(root, logical, blocksize, 0); 1319 old = read_tree_block(root, logical, blocksize, 0);
1272 if (!old) { 1320 if (!old || !extent_buffer_uptodate(old)) {
1321 free_extent_buffer(old);
1273 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", 1322 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
1274 logical); 1323 logical);
1275 WARN_ON(1); 1324 WARN_ON(1);
@@ -1278,13 +1327,13 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1278 free_extent_buffer(old); 1327 free_extent_buffer(old);
1279 } 1328 }
1280 } else if (old_root) { 1329 } else if (old_root) {
1281 btrfs_tree_read_unlock(root->node); 1330 btrfs_tree_read_unlock(eb_root);
1282 free_extent_buffer(root->node); 1331 free_extent_buffer(eb_root);
1283 eb = alloc_dummy_extent_buffer(logical, root->nodesize); 1332 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1284 } else { 1333 } else {
1285 eb = btrfs_clone_extent_buffer(root->node); 1334 eb = btrfs_clone_extent_buffer(eb_root);
1286 btrfs_tree_read_unlock(root->node); 1335 btrfs_tree_read_unlock(eb_root);
1287 free_extent_buffer(root->node); 1336 free_extent_buffer(eb_root);
1288 } 1337 }
1289 1338
1290 if (!eb) 1339 if (!eb)
@@ -1294,7 +1343,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1294 if (old_root) { 1343 if (old_root) {
1295 btrfs_set_header_bytenr(eb, eb->start); 1344 btrfs_set_header_bytenr(eb, eb->start);
1296 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); 1345 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1297 btrfs_set_header_owner(eb, root->root_key.objectid); 1346 btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
1298 btrfs_set_header_level(eb, old_root->level); 1347 btrfs_set_header_level(eb, old_root->level);
1299 btrfs_set_header_generation(eb, old_generation); 1348 btrfs_set_header_generation(eb, old_generation);
1300 } 1349 }
@@ -1311,15 +1360,15 @@ int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
1311{ 1360{
1312 struct tree_mod_elem *tm; 1361 struct tree_mod_elem *tm;
1313 int level; 1362 int level;
1363 struct extent_buffer *eb_root = btrfs_root_node(root);
1314 1364
1315 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1365 tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
1316 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) { 1366 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
1317 level = tm->old_root.level; 1367 level = tm->old_root.level;
1318 } else { 1368 } else {
1319 rcu_read_lock(); 1369 level = btrfs_header_level(eb_root);
1320 level = btrfs_header_level(root->node);
1321 rcu_read_unlock();
1322 } 1370 }
1371 free_extent_buffer(eb_root);
1323 1372
1324 return level; 1373 return level;
1325} 1374}
@@ -1514,8 +1563,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1514 if (!cur) { 1563 if (!cur) {
1515 cur = read_tree_block(root, blocknr, 1564 cur = read_tree_block(root, blocknr,
1516 blocksize, gen); 1565 blocksize, gen);
1517 if (!cur) 1566 if (!cur || !extent_buffer_uptodate(cur)) {
1567 free_extent_buffer(cur);
1518 return -EIO; 1568 return -EIO;
1569 }
1519 } else if (!uptodate) { 1570 } else if (!uptodate) {
1520 err = btrfs_read_buffer(cur, gen); 1571 err = btrfs_read_buffer(cur, gen);
1521 if (err) { 1572 if (err) {
@@ -1680,6 +1731,8 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
1680 struct extent_buffer *parent, int slot) 1731 struct extent_buffer *parent, int slot)
1681{ 1732{
1682 int level = btrfs_header_level(parent); 1733 int level = btrfs_header_level(parent);
1734 struct extent_buffer *eb;
1735
1683 if (slot < 0) 1736 if (slot < 0)
1684 return NULL; 1737 return NULL;
1685 if (slot >= btrfs_header_nritems(parent)) 1738 if (slot >= btrfs_header_nritems(parent))
@@ -1687,9 +1740,15 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
1687 1740
1688 BUG_ON(level == 0); 1741 BUG_ON(level == 0);
1689 1742
1690 return read_tree_block(root, btrfs_node_blockptr(parent, slot), 1743 eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
1691 btrfs_level_size(root, level - 1), 1744 btrfs_level_size(root, level - 1),
1692 btrfs_node_ptr_generation(parent, slot)); 1745 btrfs_node_ptr_generation(parent, slot));
1746 if (eb && !extent_buffer_uptodate(eb)) {
1747 free_extent_buffer(eb);
1748 eb = NULL;
1749 }
1750
1751 return eb;
1693} 1752}
1694 1753
1695/* 1754/*
@@ -1754,7 +1813,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1754 goto enospc; 1813 goto enospc;
1755 } 1814 }
1756 1815
1757 tree_mod_log_set_root_pointer(root, child); 1816 tree_mod_log_set_root_pointer(root, child, 1);
1758 rcu_assign_pointer(root->node, child); 1817 rcu_assign_pointer(root->node, child);
1759 1818
1760 add_root_to_dirty_list(root); 1819 add_root_to_dirty_list(root);
@@ -1818,7 +1877,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1818 if (btrfs_header_nritems(right) == 0) { 1877 if (btrfs_header_nritems(right) == 0) {
1819 clean_tree_block(trans, root, right); 1878 clean_tree_block(trans, root, right);
1820 btrfs_tree_unlock(right); 1879 btrfs_tree_unlock(right);
1821 del_ptr(trans, root, path, level + 1, pslot + 1); 1880 del_ptr(root, path, level + 1, pslot + 1);
1822 root_sub_used(root, right->len); 1881 root_sub_used(root, right->len);
1823 btrfs_free_tree_block(trans, root, right, 0, 1); 1882 btrfs_free_tree_block(trans, root, right, 0, 1);
1824 free_extent_buffer_stale(right); 1883 free_extent_buffer_stale(right);
@@ -1862,7 +1921,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1862 if (btrfs_header_nritems(mid) == 0) { 1921 if (btrfs_header_nritems(mid) == 0) {
1863 clean_tree_block(trans, root, mid); 1922 clean_tree_block(trans, root, mid);
1864 btrfs_tree_unlock(mid); 1923 btrfs_tree_unlock(mid);
1865 del_ptr(trans, root, path, level + 1, pslot); 1924 del_ptr(root, path, level + 1, pslot);
1866 root_sub_used(root, mid->len); 1925 root_sub_used(root, mid->len);
1867 btrfs_free_tree_block(trans, root, mid, 0, 1); 1926 btrfs_free_tree_block(trans, root, mid, 0, 1);
1868 free_extent_buffer_stale(mid); 1927 free_extent_buffer_stale(mid);
@@ -2210,9 +2269,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
2210 int no_skips = 0; 2269 int no_skips = 0;
2211 struct extent_buffer *t; 2270 struct extent_buffer *t;
2212 2271
2213 if (path->really_keep_locks)
2214 return;
2215
2216 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 2272 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2217 if (!path->nodes[i]) 2273 if (!path->nodes[i])
2218 break; 2274 break;
@@ -2260,7 +2316,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
2260{ 2316{
2261 int i; 2317 int i;
2262 2318
2263 if (path->keep_locks || path->really_keep_locks) 2319 if (path->keep_locks)
2264 return; 2320 return;
2265 2321
2266 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 2322 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -2493,7 +2549,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2493 if (!cow) 2549 if (!cow)
2494 write_lock_level = -1; 2550 write_lock_level = -1;
2495 2551
2496 if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level)) 2552 if (cow && (p->keep_locks || p->lowest_level))
2497 write_lock_level = BTRFS_MAX_LEVEL; 2553 write_lock_level = BTRFS_MAX_LEVEL;
2498 2554
2499 min_write_lock_level = write_lock_level; 2555 min_write_lock_level = write_lock_level;
@@ -2795,15 +2851,9 @@ again:
2795 btrfs_clear_path_blocking(p, b, 2851 btrfs_clear_path_blocking(p, b,
2796 BTRFS_READ_LOCK); 2852 BTRFS_READ_LOCK);
2797 } 2853 }
2854 b = tree_mod_log_rewind(root->fs_info, b, time_seq);
2798 p->locks[level] = BTRFS_READ_LOCK; 2855 p->locks[level] = BTRFS_READ_LOCK;
2799 p->nodes[level] = b; 2856 p->nodes[level] = b;
2800 b = tree_mod_log_rewind(root->fs_info, b, time_seq);
2801 if (b != p->nodes[level]) {
2802 btrfs_tree_unlock_rw(p->nodes[level],
2803 p->locks[level]);
2804 p->locks[level] = 0;
2805 p->nodes[level] = b;
2806 }
2807 } else { 2857 } else {
2808 p->slots[level] = slot; 2858 p->slots[level] = slot;
2809 unlock_up(p, level, lowest_unlock, 0, NULL); 2859 unlock_up(p, level, lowest_unlock, 0, NULL);
@@ -2902,8 +2952,7 @@ again:
2902 * higher levels 2952 * higher levels
2903 * 2953 *
2904 */ 2954 */
2905static void fixup_low_keys(struct btrfs_trans_handle *trans, 2955static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
2906 struct btrfs_root *root, struct btrfs_path *path,
2907 struct btrfs_disk_key *key, int level) 2956 struct btrfs_disk_key *key, int level)
2908{ 2957{
2909 int i; 2958 int i;
@@ -2928,8 +2977,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans,
2928 * This function isn't completely safe. It's the caller's responsibility 2977 * This function isn't completely safe. It's the caller's responsibility
2929 * that the new key won't break the order 2978 * that the new key won't break the order
2930 */ 2979 */
2931void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 2980void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
2932 struct btrfs_root *root, struct btrfs_path *path,
2933 struct btrfs_key *new_key) 2981 struct btrfs_key *new_key)
2934{ 2982{
2935 struct btrfs_disk_key disk_key; 2983 struct btrfs_disk_key disk_key;
@@ -2951,7 +2999,7 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
2951 btrfs_set_item_key(eb, &disk_key, slot); 2999 btrfs_set_item_key(eb, &disk_key, slot);
2952 btrfs_mark_buffer_dirty(eb); 3000 btrfs_mark_buffer_dirty(eb);
2953 if (slot == 0) 3001 if (slot == 0)
2954 fixup_low_keys(trans, root, path, &disk_key, 1); 3002 fixup_low_keys(root, path, &disk_key, 1);
2955} 3003}
2956 3004
2957/* 3005/*
@@ -2998,7 +3046,7 @@ static int push_node_left(struct btrfs_trans_handle *trans,
2998 push_items = min(src_nritems - 8, push_items); 3046 push_items = min(src_nritems - 8, push_items);
2999 3047
3000 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, 3048 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
3001 push_items, 1); 3049 push_items);
3002 copy_extent_buffer(dst, src, 3050 copy_extent_buffer(dst, src,
3003 btrfs_node_key_ptr_offset(dst_nritems), 3051 btrfs_node_key_ptr_offset(dst_nritems),
3004 btrfs_node_key_ptr_offset(0), 3052 btrfs_node_key_ptr_offset(0),
@@ -3069,7 +3117,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
3069 sizeof(struct btrfs_key_ptr)); 3117 sizeof(struct btrfs_key_ptr));
3070 3118
3071 tree_mod_log_eb_copy(root->fs_info, dst, src, 0, 3119 tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
3072 src_nritems - push_items, push_items, 1); 3120 src_nritems - push_items, push_items);
3073 copy_extent_buffer(dst, src, 3121 copy_extent_buffer(dst, src,
3074 btrfs_node_key_ptr_offset(0), 3122 btrfs_node_key_ptr_offset(0),
3075 btrfs_node_key_ptr_offset(src_nritems - push_items), 3123 btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3093,7 +3141,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
3093 */ 3141 */
3094static noinline int insert_new_root(struct btrfs_trans_handle *trans, 3142static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3095 struct btrfs_root *root, 3143 struct btrfs_root *root,
3096 struct btrfs_path *path, int level) 3144 struct btrfs_path *path, int level, int log_removal)
3097{ 3145{
3098 u64 lower_gen; 3146 u64 lower_gen;
3099 struct extent_buffer *lower; 3147 struct extent_buffer *lower;
@@ -3144,7 +3192,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3144 btrfs_mark_buffer_dirty(c); 3192 btrfs_mark_buffer_dirty(c);
3145 3193
3146 old = root->node; 3194 old = root->node;
3147 tree_mod_log_set_root_pointer(root, c); 3195 tree_mod_log_set_root_pointer(root, c, log_removal);
3148 rcu_assign_pointer(root->node, c); 3196 rcu_assign_pointer(root->node, c);
3149 3197
3150 /* the super has an extra ref to root->node */ 3198 /* the super has an extra ref to root->node */
@@ -3221,18 +3269,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3221 int mid; 3269 int mid;
3222 int ret; 3270 int ret;
3223 u32 c_nritems; 3271 u32 c_nritems;
3224 int tree_mod_log_removal = 1;
3225 3272
3226 c = path->nodes[level]; 3273 c = path->nodes[level];
3227 WARN_ON(btrfs_header_generation(c) != trans->transid); 3274 WARN_ON(btrfs_header_generation(c) != trans->transid);
3228 if (c == root->node) { 3275 if (c == root->node) {
3229 /* trying to split the root, lets make a new one */
3230 ret = insert_new_root(trans, root, path, level + 1);
3231 /* 3276 /*
3232 * removal of root nodes has been logged by 3277 * trying to split the root, lets make a new one
3233 * tree_mod_log_set_root_pointer due to locking 3278 *
3279 * tree mod log: We pass 0 as log_removal parameter to
3280 * insert_new_root, because that root buffer will be kept as a
3281 * normal node. We are going to log removal of half of the
3282 * elements below with tree_mod_log_eb_copy. We're holding a
3283 * tree lock on the buffer, which is why we cannot race with
3284 * other tree_mod_log users.
3234 */ 3285 */
3235 tree_mod_log_removal = 0; 3286 ret = insert_new_root(trans, root, path, level + 1, 0);
3236 if (ret) 3287 if (ret)
3237 return ret; 3288 return ret;
3238 } else { 3289 } else {
@@ -3270,8 +3321,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3270 (unsigned long)btrfs_header_chunk_tree_uuid(split), 3321 (unsigned long)btrfs_header_chunk_tree_uuid(split),
3271 BTRFS_UUID_SIZE); 3322 BTRFS_UUID_SIZE);
3272 3323
3273 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid, 3324 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
3274 tree_mod_log_removal);
3275 copy_extent_buffer(split, c, 3325 copy_extent_buffer(split, c,
3276 btrfs_node_key_ptr_offset(0), 3326 btrfs_node_key_ptr_offset(0),
3277 btrfs_node_key_ptr_offset(mid), 3327 btrfs_node_key_ptr_offset(mid),
@@ -3687,7 +3737,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3687 clean_tree_block(trans, root, right); 3737 clean_tree_block(trans, root, right);
3688 3738
3689 btrfs_item_key(right, &disk_key, 0); 3739 btrfs_item_key(right, &disk_key, 0);
3690 fixup_low_keys(trans, root, path, &disk_key, 1); 3740 fixup_low_keys(root, path, &disk_key, 1);
3691 3741
3692 /* then fixup the leaf pointer in the path */ 3742 /* then fixup the leaf pointer in the path */
3693 if (path->slots[0] < push_items) { 3743 if (path->slots[0] < push_items) {
@@ -3953,7 +4003,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
3953 } 4003 }
3954 4004
3955 if (!path->nodes[1]) { 4005 if (!path->nodes[1]) {
3956 ret = insert_new_root(trans, root, path, 1); 4006 ret = insert_new_root(trans, root, path, 1, 1);
3957 if (ret) 4007 if (ret)
3958 return ret; 4008 return ret;
3959 } 4009 }
@@ -4047,8 +4097,7 @@ again:
4047 path->nodes[0] = right; 4097 path->nodes[0] = right;
4048 path->slots[0] = 0; 4098 path->slots[0] = 0;
4049 if (path->slots[1] == 0) 4099 if (path->slots[1] == 0)
4050 fixup_low_keys(trans, root, path, 4100 fixup_low_keys(root, path, &disk_key, 1);
4051 &disk_key, 1);
4052 } 4101 }
4053 btrfs_mark_buffer_dirty(right); 4102 btrfs_mark_buffer_dirty(right);
4054 return ret; 4103 return ret;
@@ -4264,7 +4313,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4264 return ret; 4313 return ret;
4265 4314
4266 path->slots[0]++; 4315 path->slots[0]++;
4267 setup_items_for_insert(trans, root, path, new_key, &item_size, 4316 setup_items_for_insert(root, path, new_key, &item_size,
4268 item_size, item_size + 4317 item_size, item_size +
4269 sizeof(struct btrfs_item), 1); 4318 sizeof(struct btrfs_item), 1);
4270 leaf = path->nodes[0]; 4319 leaf = path->nodes[0];
@@ -4281,9 +4330,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4281 * off the end of the item or if we shift the item to chop bytes off 4330 * off the end of the item or if we shift the item to chop bytes off
4282 * the front. 4331 * the front.
4283 */ 4332 */
4284void btrfs_truncate_item(struct btrfs_trans_handle *trans, 4333void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
4285 struct btrfs_root *root,
4286 struct btrfs_path *path,
4287 u32 new_size, int from_end) 4334 u32 new_size, int from_end)
4288{ 4335{
4289 int slot; 4336 int slot;
@@ -4367,7 +4414,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
4367 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 4414 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4368 btrfs_set_item_key(leaf, &disk_key, slot); 4415 btrfs_set_item_key(leaf, &disk_key, slot);
4369 if (slot == 0) 4416 if (slot == 0)
4370 fixup_low_keys(trans, root, path, &disk_key, 1); 4417 fixup_low_keys(root, path, &disk_key, 1);
4371 } 4418 }
4372 4419
4373 item = btrfs_item_nr(leaf, slot); 4420 item = btrfs_item_nr(leaf, slot);
@@ -4383,8 +4430,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
4383/* 4430/*
4384 * make the item pointed to by the path bigger, data_size is the new size. 4431 * make the item pointed to by the path bigger, data_size is the new size.
4385 */ 4432 */
4386void btrfs_extend_item(struct btrfs_trans_handle *trans, 4433void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
4387 struct btrfs_root *root, struct btrfs_path *path,
4388 u32 data_size) 4434 u32 data_size)
4389{ 4435{
4390 int slot; 4436 int slot;
@@ -4454,8 +4500,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
4454 * to save stack depth by doing the bulk of the work in a function 4500 * to save stack depth by doing the bulk of the work in a function
4455 * that doesn't call btrfs_search_slot 4501 * that doesn't call btrfs_search_slot
4456 */ 4502 */
4457void setup_items_for_insert(struct btrfs_trans_handle *trans, 4503void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4458 struct btrfs_root *root, struct btrfs_path *path,
4459 struct btrfs_key *cpu_key, u32 *data_size, 4504 struct btrfs_key *cpu_key, u32 *data_size,
4460 u32 total_data, u32 total_size, int nr) 4505 u32 total_data, u32 total_size, int nr)
4461{ 4506{
@@ -4531,7 +4576,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
4531 4576
4532 if (slot == 0) { 4577 if (slot == 0) {
4533 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 4578 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
4534 fixup_low_keys(trans, root, path, &disk_key, 1); 4579 fixup_low_keys(root, path, &disk_key, 1);
4535 } 4580 }
4536 btrfs_unlock_up_safe(path, 1); 4581 btrfs_unlock_up_safe(path, 1);
4537 btrfs_mark_buffer_dirty(leaf); 4582 btrfs_mark_buffer_dirty(leaf);
@@ -4571,7 +4616,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4571 slot = path->slots[0]; 4616 slot = path->slots[0];
4572 BUG_ON(slot < 0); 4617 BUG_ON(slot < 0);
4573 4618
4574 setup_items_for_insert(trans, root, path, cpu_key, data_size, 4619 setup_items_for_insert(root, path, cpu_key, data_size,
4575 total_data, total_size, nr); 4620 total_data, total_size, nr);
4576 return 0; 4621 return 0;
4577} 4622}
@@ -4609,8 +4654,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
4609 * the tree should have been previously balanced so the deletion does not 4654 * the tree should have been previously balanced so the deletion does not
4610 * empty a node. 4655 * empty a node.
4611 */ 4656 */
4612static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 4657static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4613 struct btrfs_path *path, int level, int slot) 4658 int level, int slot)
4614{ 4659{
4615 struct extent_buffer *parent = path->nodes[level]; 4660 struct extent_buffer *parent = path->nodes[level];
4616 u32 nritems; 4661 u32 nritems;
@@ -4642,7 +4687,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4642 struct btrfs_disk_key disk_key; 4687 struct btrfs_disk_key disk_key;
4643 4688
4644 btrfs_node_key(parent, &disk_key, 0); 4689 btrfs_node_key(parent, &disk_key, 0);
4645 fixup_low_keys(trans, root, path, &disk_key, level + 1); 4690 fixup_low_keys(root, path, &disk_key, level + 1);
4646 } 4691 }
4647 btrfs_mark_buffer_dirty(parent); 4692 btrfs_mark_buffer_dirty(parent);
4648} 4693}
@@ -4663,7 +4708,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4663 struct extent_buffer *leaf) 4708 struct extent_buffer *leaf)
4664{ 4709{
4665 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 4710 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
4666 del_ptr(trans, root, path, 1, path->slots[1]); 4711 del_ptr(root, path, 1, path->slots[1]);
4667 4712
4668 /* 4713 /*
4669 * btrfs_free_extent is expensive, we want to make sure we 4714 * btrfs_free_extent is expensive, we want to make sure we
@@ -4744,7 +4789,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4744 struct btrfs_disk_key disk_key; 4789 struct btrfs_disk_key disk_key;
4745 4790
4746 btrfs_item_key(leaf, &disk_key, 0); 4791 btrfs_item_key(leaf, &disk_key, 0);
4747 fixup_low_keys(trans, root, path, &disk_key, 1); 4792 fixup_low_keys(root, path, &disk_key, 1);
4748 } 4793 }
4749 4794
4750 /* delete the leaf if it is mostly empty */ 4795 /* delete the leaf if it is mostly empty */
@@ -5464,139 +5509,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
5464 return btrfs_next_old_leaf(root, path, 0); 5509 return btrfs_next_old_leaf(root, path, 0);
5465} 5510}
5466 5511
5467/* Release the path up to but not including the given level */
5468static void btrfs_release_level(struct btrfs_path *path, int level)
5469{
5470 int i;
5471
5472 for (i = 0; i < level; i++) {
5473 path->slots[i] = 0;
5474 if (!path->nodes[i])
5475 continue;
5476 if (path->locks[i]) {
5477 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
5478 path->locks[i] = 0;
5479 }
5480 free_extent_buffer(path->nodes[i]);
5481 path->nodes[i] = NULL;
5482 }
5483}
5484
5485/*
5486 * This function assumes 2 things
5487 *
5488 * 1) You are using path->keep_locks
5489 * 2) You are not inserting items.
5490 *
5491 * If either of these are not true do not use this function. If you need a next
5492 * leaf with either of these not being true then this function can be easily
5493 * adapted to do that, but at the moment these are the limitations.
5494 */
5495int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
5496 struct btrfs_root *root, struct btrfs_path *path,
5497 int del)
5498{
5499 struct extent_buffer *b;
5500 struct btrfs_key key;
5501 u32 nritems;
5502 int level = 1;
5503 int slot;
5504 int ret = 1;
5505 int write_lock_level = BTRFS_MAX_LEVEL;
5506 int ins_len = del ? -1 : 0;
5507
5508 WARN_ON(!(path->keep_locks || path->really_keep_locks));
5509
5510 nritems = btrfs_header_nritems(path->nodes[0]);
5511 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
5512
5513 while (path->nodes[level]) {
5514 nritems = btrfs_header_nritems(path->nodes[level]);
5515 if (!(path->locks[level] & BTRFS_WRITE_LOCK)) {
5516search:
5517 btrfs_release_path(path);
5518 ret = btrfs_search_slot(trans, root, &key, path,
5519 ins_len, 1);
5520 if (ret < 0)
5521 goto out;
5522 level = 1;
5523 continue;
5524 }
5525
5526 if (path->slots[level] >= nritems - 1) {
5527 level++;
5528 continue;
5529 }
5530
5531 btrfs_release_level(path, level);
5532 break;
5533 }
5534
5535 if (!path->nodes[level]) {
5536 ret = 1;
5537 goto out;
5538 }
5539
5540 path->slots[level]++;
5541 b = path->nodes[level];
5542
5543 while (b) {
5544 level = btrfs_header_level(b);
5545
5546 if (!should_cow_block(trans, root, b))
5547 goto cow_done;
5548
5549 btrfs_set_path_blocking(path);
5550 ret = btrfs_cow_block(trans, root, b,
5551 path->nodes[level + 1],
5552 path->slots[level + 1], &b);
5553 if (ret)
5554 goto out;
5555cow_done:
5556 path->nodes[level] = b;
5557 btrfs_clear_path_blocking(path, NULL, 0);
5558 if (level != 0) {
5559 ret = setup_nodes_for_search(trans, root, path, b,
5560 level, ins_len,
5561 &write_lock_level);
5562 if (ret == -EAGAIN)
5563 goto search;
5564 if (ret)
5565 goto out;
5566
5567 b = path->nodes[level];
5568 slot = path->slots[level];
5569
5570 ret = read_block_for_search(trans, root, path,
5571 &b, level, slot, &key, 0);
5572 if (ret == -EAGAIN)
5573 goto search;
5574 if (ret)
5575 goto out;
5576 level = btrfs_header_level(b);
5577 if (!btrfs_try_tree_write_lock(b)) {
5578 btrfs_set_path_blocking(path);
5579 btrfs_tree_lock(b);
5580 btrfs_clear_path_blocking(path, b,
5581 BTRFS_WRITE_LOCK);
5582 }
5583 path->locks[level] = BTRFS_WRITE_LOCK;
5584 path->nodes[level] = b;
5585 path->slots[level] = 0;
5586 } else {
5587 path->slots[level] = 0;
5588 ret = 0;
5589 break;
5590 }
5591 }
5592
5593out:
5594 if (ret)
5595 btrfs_release_path(path);
5596
5597 return ret;
5598}
5599
5600int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, 5512int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5601 u64 time_seq) 5513 u64 time_seq)
5602{ 5514{
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0d82922179db..63c328a9ce95 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -340,6 +340,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
340 */ 340 */
341#define BTRFS_FS_STATE_ERROR 0 341#define BTRFS_FS_STATE_ERROR 0
342#define BTRFS_FS_STATE_REMOUNTING 1 342#define BTRFS_FS_STATE_REMOUNTING 1
343#define BTRFS_FS_STATE_TRANS_ABORTED 2
343 344
344/* Super block flags */ 345/* Super block flags */
345/* Errors detected */ 346/* Errors detected */
@@ -508,6 +509,7 @@ struct btrfs_super_block {
508 509
509#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) 510#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
510#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) 511#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
512#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
511 513
512#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 514#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
513#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 515#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
@@ -518,7 +520,8 @@ struct btrfs_super_block {
518 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ 520 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
519 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ 521 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
520 BTRFS_FEATURE_INCOMPAT_RAID56 | \ 522 BTRFS_FEATURE_INCOMPAT_RAID56 | \
521 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) 523 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
524 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
522 525
523/* 526/*
524 * A leaf is full of items. offset and size tell us where to find 527 * A leaf is full of items. offset and size tell us where to find
@@ -583,7 +586,6 @@ struct btrfs_path {
583 unsigned int skip_locking:1; 586 unsigned int skip_locking:1;
584 unsigned int leave_spinning:1; 587 unsigned int leave_spinning:1;
585 unsigned int search_commit_root:1; 588 unsigned int search_commit_root:1;
586 unsigned int really_keep_locks:1;
587}; 589};
588 590
589/* 591/*
@@ -1019,9 +1021,9 @@ struct btrfs_block_group_item {
1019 */ 1021 */
1020#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) 1022#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
1021/* 1023/*
1022 * SCANNING is set during the initialization phase 1024 * RESCAN is set during the initialization phase
1023 */ 1025 */
1024#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1) 1026#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1)
1025/* 1027/*
1026 * Some qgroup entries are known to be out of date, 1028 * Some qgroup entries are known to be out of date,
1027 * either because the configuration has changed in a way that 1029 * either because the configuration has changed in a way that
@@ -1050,7 +1052,7 @@ struct btrfs_qgroup_status_item {
1050 * only used during scanning to record the progress 1052 * only used during scanning to record the progress
1051 * of the scan. It contains a logical address 1053 * of the scan. It contains a logical address
1052 */ 1054 */
1053 __le64 scan; 1055 __le64 rescan;
1054} __attribute__ ((__packed__)); 1056} __attribute__ ((__packed__));
1055 1057
1056struct btrfs_qgroup_info_item { 1058struct btrfs_qgroup_info_item {
@@ -1360,6 +1362,17 @@ struct btrfs_fs_info {
1360 wait_queue_head_t transaction_blocked_wait; 1362 wait_queue_head_t transaction_blocked_wait;
1361 wait_queue_head_t async_submit_wait; 1363 wait_queue_head_t async_submit_wait;
1362 1364
1365 /*
1366 * Used to protect the incompat_flags, compat_flags, compat_ro_flags
1367 * when they are updated.
1368 *
1369 * Because we do not clear the flags for ever, so we needn't use
1370 * the lock on the read side.
1371 *
1372 * We also needn't use the lock when we mount the fs, because
1373 * there is no other task which will update the flag.
1374 */
1375 spinlock_t super_lock;
1363 struct btrfs_super_block *super_copy; 1376 struct btrfs_super_block *super_copy;
1364 struct btrfs_super_block *super_for_commit; 1377 struct btrfs_super_block *super_for_commit;
1365 struct block_device *__bdev; 1378 struct block_device *__bdev;
@@ -1409,7 +1422,7 @@ struct btrfs_fs_info {
1409 1422
1410 /* this protects tree_mod_seq_list */ 1423 /* this protects tree_mod_seq_list */
1411 spinlock_t tree_mod_seq_lock; 1424 spinlock_t tree_mod_seq_lock;
1412 atomic_t tree_mod_seq; 1425 atomic64_t tree_mod_seq;
1413 struct list_head tree_mod_seq_list; 1426 struct list_head tree_mod_seq_list;
1414 struct seq_list tree_mod_seq_elem; 1427 struct seq_list tree_mod_seq_elem;
1415 1428
@@ -1581,12 +1594,20 @@ struct btrfs_fs_info {
1581 struct rb_root qgroup_tree; 1594 struct rb_root qgroup_tree;
1582 spinlock_t qgroup_lock; 1595 spinlock_t qgroup_lock;
1583 1596
1597 /* protect user change for quota operations */
1598 struct mutex qgroup_ioctl_lock;
1599
1584 /* list of dirty qgroups to be written at next commit */ 1600 /* list of dirty qgroups to be written at next commit */
1585 struct list_head dirty_qgroups; 1601 struct list_head dirty_qgroups;
1586 1602
1587 /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ 1603 /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
1588 u64 qgroup_seq; 1604 u64 qgroup_seq;
1589 1605
1606 /* qgroup rescan items */
1607 struct mutex qgroup_rescan_lock; /* protects the progress item */
1608 struct btrfs_key qgroup_rescan_progress;
1609 struct btrfs_workers qgroup_rescan_workers;
1610
1590 /* filesystem state */ 1611 /* filesystem state */
1591 unsigned long fs_state; 1612 unsigned long fs_state;
1592 1613
@@ -1808,6 +1829,12 @@ struct btrfs_ioctl_defrag_range_args {
1808 */ 1829 */
1809#define BTRFS_EXTENT_ITEM_KEY 168 1830#define BTRFS_EXTENT_ITEM_KEY 168
1810 1831
1832/*
1833 * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
1834 * the length, so we save the level in key->offset instead of the length.
1835 */
1836#define BTRFS_METADATA_ITEM_KEY 169
1837
1811#define BTRFS_TREE_BLOCK_REF_KEY 176 1838#define BTRFS_TREE_BLOCK_REF_KEY 176
1812 1839
1813#define BTRFS_EXTENT_DATA_REF_KEY 178 1840#define BTRFS_EXTENT_DATA_REF_KEY 178
@@ -2766,8 +2793,10 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
2766 2793
2767static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 2794static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
2768{ 2795{
2769 int t = btrfs_super_csum_type(s); 2796 u16 t = btrfs_super_csum_type(s);
2770 BUG_ON(t >= ARRAY_SIZE(btrfs_csum_sizes)); 2797 /*
2798 * csum type is validated at mount time
2799 */
2771 return btrfs_csum_sizes[t]; 2800 return btrfs_csum_sizes[t];
2772} 2801}
2773 2802
@@ -2864,8 +2893,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
2864 version, 64); 2893 version, 64);
2865BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, 2894BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
2866 flags, 64); 2895 flags, 64);
2867BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, 2896BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
2868 scan, 64); 2897 rescan, 64);
2869 2898
2870/* btrfs_qgroup_info_item */ 2899/* btrfs_qgroup_info_item */
2871BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, 2900BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
@@ -3005,7 +3034,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
3005int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 3034int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
3006int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 3035int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
3007 struct btrfs_root *root, u64 bytenr, 3036 struct btrfs_root *root, u64 bytenr,
3008 u64 num_bytes, u64 *refs, u64 *flags); 3037 u64 offset, int metadata, u64 *refs, u64 *flags);
3009int btrfs_pin_extent(struct btrfs_root *root, 3038int btrfs_pin_extent(struct btrfs_root *root,
3010 u64 bytenr, u64 num, int reserved); 3039 u64 bytenr, u64 num, int reserved);
3011int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, 3040int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
@@ -3017,8 +3046,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
3017 struct btrfs_fs_info *info, 3046 struct btrfs_fs_info *info,
3018 u64 bytenr); 3047 u64 bytenr);
3019void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 3048void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
3020u64 btrfs_find_block_group(struct btrfs_root *root,
3021 u64 search_start, u64 search_hint, int owner);
3022struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 3049struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3023 struct btrfs_root *root, u32 blocksize, 3050 struct btrfs_root *root, u32 blocksize,
3024 u64 parent, u64 root_objectid, 3051 u64 parent, u64 root_objectid,
@@ -3028,10 +3055,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3028 struct btrfs_root *root, 3055 struct btrfs_root *root,
3029 struct extent_buffer *buf, 3056 struct extent_buffer *buf,
3030 u64 parent, int last_ref); 3057 u64 parent, int last_ref);
3031struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3032 struct btrfs_root *root,
3033 u64 bytenr, u32 blocksize,
3034 int level);
3035int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 3058int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
3036 struct btrfs_root *root, 3059 struct btrfs_root *root,
3037 u64 root_objectid, u64 owner, 3060 u64 root_objectid, u64 owner,
@@ -3044,7 +3067,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3044 struct btrfs_root *root, 3067 struct btrfs_root *root,
3045 u64 num_bytes, u64 min_alloc_size, 3068 u64 num_bytes, u64 min_alloc_size,
3046 u64 empty_size, u64 hint_byte, 3069 u64 empty_size, u64 hint_byte,
3047 struct btrfs_key *ins, u64 data); 3070 struct btrfs_key *ins, int is_data);
3048int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3071int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3049 struct extent_buffer *buf, int full_backref, int for_cow); 3072 struct extent_buffer *buf, int full_backref, int for_cow);
3050int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3073int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3084,7 +3107,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
3084 struct btrfs_root *root, u64 group_start); 3107 struct btrfs_root *root, u64 group_start);
3085void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, 3108void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
3086 struct btrfs_root *root); 3109 struct btrfs_root *root);
3087u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
3088u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); 3110u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
3089void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 3111void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
3090 3112
@@ -3161,8 +3183,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
3161int btrfs_previous_item(struct btrfs_root *root, 3183int btrfs_previous_item(struct btrfs_root *root,
3162 struct btrfs_path *path, u64 min_objectid, 3184 struct btrfs_path *path, u64 min_objectid,
3163 int type); 3185 int type);
3164void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 3186void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
3165 struct btrfs_root *root, struct btrfs_path *path,
3166 struct btrfs_key *new_key); 3187 struct btrfs_key *new_key);
3167struct extent_buffer *btrfs_root_node(struct btrfs_root *root); 3188struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
3168struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); 3189struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
@@ -3198,12 +3219,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
3198 struct extent_buffer **cow_ret, u64 new_root_objectid); 3219 struct extent_buffer **cow_ret, u64 new_root_objectid);
3199int btrfs_block_can_be_shared(struct btrfs_root *root, 3220int btrfs_block_can_be_shared(struct btrfs_root *root,
3200 struct extent_buffer *buf); 3221 struct extent_buffer *buf);
3201void btrfs_extend_item(struct btrfs_trans_handle *trans, 3222void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
3202 struct btrfs_root *root, struct btrfs_path *path,
3203 u32 data_size); 3223 u32 data_size);
3204void btrfs_truncate_item(struct btrfs_trans_handle *trans, 3224void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
3205 struct btrfs_root *root,
3206 struct btrfs_path *path,
3207 u32 new_size, int from_end); 3225 u32 new_size, int from_end);
3208int btrfs_split_item(struct btrfs_trans_handle *trans, 3226int btrfs_split_item(struct btrfs_trans_handle *trans,
3209 struct btrfs_root *root, 3227 struct btrfs_root *root,
@@ -3243,8 +3261,7 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
3243 return btrfs_del_items(trans, root, path, path->slots[0], 1); 3261 return btrfs_del_items(trans, root, path, path->slots[0], 1);
3244} 3262}
3245 3263
3246void setup_items_for_insert(struct btrfs_trans_handle *trans, 3264void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
3247 struct btrfs_root *root, struct btrfs_path *path,
3248 struct btrfs_key *cpu_key, u32 *data_size, 3265 struct btrfs_key *cpu_key, u32 *data_size,
3249 u32 total_data, u32 total_size, int nr); 3266 u32 total_data, u32 total_size, int nr);
3250int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 3267int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
@@ -3264,9 +3281,6 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
3264} 3281}
3265 3282
3266int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 3283int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
3267int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
3268 struct btrfs_root *root, struct btrfs_path *path,
3269 int del);
3270int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, 3284int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
3271 u64 time_seq); 3285 u64 time_seq);
3272static inline int btrfs_next_old_item(struct btrfs_root *root, 3286static inline int btrfs_next_old_item(struct btrfs_root *root,
@@ -3281,7 +3295,6 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
3281{ 3295{
3282 return btrfs_next_old_item(root, p, 0); 3296 return btrfs_next_old_item(root, p, 0);
3283} 3297}
3284int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
3285int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 3298int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
3286int __must_check btrfs_drop_snapshot(struct btrfs_root *root, 3299int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
3287 struct btrfs_block_rsv *block_rsv, 3300 struct btrfs_block_rsv *block_rsv,
@@ -3318,10 +3331,7 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
3318 struct seq_list *elem); 3331 struct seq_list *elem);
3319void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 3332void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
3320 struct seq_list *elem); 3333 struct seq_list *elem);
3321static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) 3334u64 btrfs_tree_mod_seq_prev(u64 seq);
3322{
3323 return atomic_inc_return(&fs_info->tree_mod_seq);
3324}
3325int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); 3335int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
3326 3336
3327/* root-item.c */ 3337/* root-item.c */
@@ -3345,9 +3355,8 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
3345 struct btrfs_root *root, 3355 struct btrfs_root *root,
3346 struct btrfs_key *key, 3356 struct btrfs_key *key,
3347 struct btrfs_root_item *item); 3357 struct btrfs_root_item *item);
3348void btrfs_read_root_item(struct btrfs_root *root, 3358void btrfs_read_root_item(struct extent_buffer *eb, int slot,
3349 struct extent_buffer *eb, int slot, 3359 struct btrfs_root_item *item);
3350 struct btrfs_root_item *item);
3351int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct 3360int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
3352 btrfs_root_item *item, struct btrfs_key *key); 3361 btrfs_root_item *item, struct btrfs_key *key);
3353int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 3362int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
@@ -3380,9 +3389,6 @@ struct btrfs_dir_item *
3380btrfs_search_dir_index_item(struct btrfs_root *root, 3389btrfs_search_dir_index_item(struct btrfs_root *root,
3381 struct btrfs_path *path, u64 dirid, 3390 struct btrfs_path *path, u64 dirid,
3382 const char *name, int name_len); 3391 const char *name, int name_len);
3383struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
3384 struct btrfs_path *path,
3385 const char *name, int name_len);
3386int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, 3392int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
3387 struct btrfs_root *root, 3393 struct btrfs_root *root,
3388 struct btrfs_path *path, 3394 struct btrfs_path *path,
@@ -3460,16 +3466,11 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
3460 struct btrfs_root *root, 3466 struct btrfs_root *root,
3461 struct btrfs_path *path, u64 objectid, 3467 struct btrfs_path *path, u64 objectid,
3462 u64 bytenr, int mod); 3468 u64 bytenr, int mod);
3463u64 btrfs_file_extent_length(struct btrfs_path *path);
3464int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 3469int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
3465 struct btrfs_root *root, 3470 struct btrfs_root *root,
3466 struct btrfs_ordered_sum *sums); 3471 struct btrfs_ordered_sum *sums);
3467int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 3472int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
3468 struct bio *bio, u64 file_start, int contig); 3473 struct bio *bio, u64 file_start, int contig);
3469struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
3470 struct btrfs_root *root,
3471 struct btrfs_path *path,
3472 u64 bytenr, int cow);
3473int btrfs_csum_truncate(struct btrfs_trans_handle *trans, 3474int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
3474 struct btrfs_root *root, struct btrfs_path *path, 3475 struct btrfs_root *root, struct btrfs_path *path,
3475 u64 isize); 3476 u64 isize);
@@ -3531,8 +3532,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3531int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 3532int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
3532int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 3533int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
3533 struct extent_state **cached_state); 3534 struct extent_state **cached_state);
3534int btrfs_writepages(struct address_space *mapping,
3535 struct writeback_control *wbc);
3536int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 3535int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
3537 struct btrfs_root *new_root, u64 new_dirid); 3536 struct btrfs_root *new_root, u64 new_dirid);
3538int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, 3537int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
@@ -3542,7 +3541,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
3542int btrfs_readpage(struct file *file, struct page *page); 3541int btrfs_readpage(struct file *file, struct page *page);
3543void btrfs_evict_inode(struct inode *inode); 3542void btrfs_evict_inode(struct inode *inode);
3544int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 3543int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
3545int btrfs_dirty_inode(struct inode *inode);
3546struct inode *btrfs_alloc_inode(struct super_block *sb); 3544struct inode *btrfs_alloc_inode(struct super_block *sb);
3547void btrfs_destroy_inode(struct inode *inode); 3545void btrfs_destroy_inode(struct inode *inode);
3548int btrfs_drop_inode(struct inode *inode); 3546int btrfs_drop_inode(struct inode *inode);
@@ -3560,7 +3558,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
3560int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, 3558int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3561 struct btrfs_root *root, struct inode *inode); 3559 struct btrfs_root *root, struct inode *inode);
3562int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); 3560int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
3563int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
3564int btrfs_orphan_cleanup(struct btrfs_root *root); 3561int btrfs_orphan_cleanup(struct btrfs_root *root);
3565void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, 3562void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
3566 struct btrfs_root *root); 3563 struct btrfs_root *root);
@@ -3611,7 +3608,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
3611int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 3608int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
3612 struct inode *inode, u64 start, u64 end); 3609 struct inode *inode, u64 start, u64 end);
3613int btrfs_release_file(struct inode *inode, struct file *file); 3610int btrfs_release_file(struct inode *inode, struct file *file);
3614void btrfs_drop_pages(struct page **pages, size_t num_pages);
3615int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, 3611int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
3616 struct page **pages, size_t num_pages, 3612 struct page **pages, size_t num_pages,
3617 loff_t pos, size_t write_bytes, 3613 loff_t pos, size_t write_bytes,
@@ -3634,14 +3630,31 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
3634 3630
3635#ifdef CONFIG_PRINTK 3631#ifdef CONFIG_PRINTK
3636__printf(2, 3) 3632__printf(2, 3)
3637void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...); 3633void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
3638#else 3634#else
3639static inline __printf(2, 3) 3635static inline __printf(2, 3)
3640void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) 3636void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
3641{ 3637{
3642} 3638}
3643#endif 3639#endif
3644 3640
3641#define btrfs_emerg(fs_info, fmt, args...) \
3642 btrfs_printk(fs_info, KERN_EMERG fmt, ##args)
3643#define btrfs_alert(fs_info, fmt, args...) \
3644 btrfs_printk(fs_info, KERN_ALERT fmt, ##args)
3645#define btrfs_crit(fs_info, fmt, args...) \
3646 btrfs_printk(fs_info, KERN_CRIT fmt, ##args)
3647#define btrfs_err(fs_info, fmt, args...) \
3648 btrfs_printk(fs_info, KERN_ERR fmt, ##args)
3649#define btrfs_warn(fs_info, fmt, args...) \
3650 btrfs_printk(fs_info, KERN_WARNING fmt, ##args)
3651#define btrfs_notice(fs_info, fmt, args...) \
3652 btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
3653#define btrfs_info(fs_info, fmt, args...) \
3654 btrfs_printk(fs_info, KERN_INFO fmt, ##args)
3655#define btrfs_debug(fs_info, fmt, args...) \
3656 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
3657
3645__printf(5, 6) 3658__printf(5, 6)
3646void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 3659void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
3647 unsigned int line, int errno, const char *fmt, ...); 3660 unsigned int line, int errno, const char *fmt, ...);
@@ -3663,11 +3676,28 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
3663 disk_super = fs_info->super_copy; 3676 disk_super = fs_info->super_copy;
3664 features = btrfs_super_incompat_flags(disk_super); 3677 features = btrfs_super_incompat_flags(disk_super);
3665 if (!(features & flag)) { 3678 if (!(features & flag)) {
3666 features |= flag; 3679 spin_lock(&fs_info->super_lock);
3667 btrfs_set_super_incompat_flags(disk_super, features); 3680 features = btrfs_super_incompat_flags(disk_super);
3681 if (!(features & flag)) {
3682 features |= flag;
3683 btrfs_set_super_incompat_flags(disk_super, features);
3684 printk(KERN_INFO "btrfs: setting %llu feature flag\n",
3685 flag);
3686 }
3687 spin_unlock(&fs_info->super_lock);
3668 } 3688 }
3669} 3689}
3670 3690
3691#define btrfs_fs_incompat(fs_info, opt) \
3692 __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
3693
3694static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
3695{
3696 struct btrfs_super_block *disk_super;
3697 disk_super = fs_info->super_copy;
3698 return !!(btrfs_super_incompat_flags(disk_super) & flag);
3699}
3700
3671/* 3701/*
3672 * Call btrfs_abort_transaction as early as possible when an error condition is 3702 * Call btrfs_abort_transaction as early as possible when an error condition is
3673 * detected, that way the exact line number is reported. 3703 * detected, that way the exact line number is reported.
@@ -3753,7 +3783,6 @@ void btrfs_scrub_continue_super(struct btrfs_root *root);
3753int btrfs_scrub_cancel(struct btrfs_fs_info *info); 3783int btrfs_scrub_cancel(struct btrfs_fs_info *info);
3754int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, 3784int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
3755 struct btrfs_device *dev); 3785 struct btrfs_device *dev);
3756int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
3757int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, 3786int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3758 struct btrfs_scrub_progress *progress); 3787 struct btrfs_scrub_progress *progress);
3759 3788
@@ -3784,7 +3813,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
3784 struct btrfs_fs_info *fs_info); 3813 struct btrfs_fs_info *fs_info);
3785int btrfs_quota_disable(struct btrfs_trans_handle *trans, 3814int btrfs_quota_disable(struct btrfs_trans_handle *trans,
3786 struct btrfs_fs_info *fs_info); 3815 struct btrfs_fs_info *fs_info);
3787int btrfs_quota_rescan(struct btrfs_fs_info *fs_info); 3816int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
3788int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 3817int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
3789 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 3818 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
3790int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 3819int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 14fce27b4780..f26f38ccd194 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -202,7 +202,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
202 spin_unlock(&root->lock); 202 spin_unlock(&root->lock);
203} 203}
204 204
205struct btrfs_delayed_node *btrfs_first_delayed_node( 205static struct btrfs_delayed_node *btrfs_first_delayed_node(
206 struct btrfs_delayed_root *delayed_root) 206 struct btrfs_delayed_root *delayed_root)
207{ 207{
208 struct list_head *p; 208 struct list_head *p;
@@ -221,7 +221,7 @@ out:
221 return node; 221 return node;
222} 222}
223 223
224struct btrfs_delayed_node *btrfs_next_delayed_node( 224static struct btrfs_delayed_node *btrfs_next_delayed_node(
225 struct btrfs_delayed_node *node) 225 struct btrfs_delayed_node *node)
226{ 226{
227 struct btrfs_delayed_root *delayed_root; 227 struct btrfs_delayed_root *delayed_root;
@@ -282,7 +282,7 @@ static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
282 __btrfs_release_delayed_node(node, 0); 282 __btrfs_release_delayed_node(node, 0);
283} 283}
284 284
285struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( 285static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
286 struct btrfs_delayed_root *delayed_root) 286 struct btrfs_delayed_root *delayed_root)
287{ 287{
288 struct list_head *p; 288 struct list_head *p;
@@ -308,7 +308,7 @@ static inline void btrfs_release_prepared_delayed_node(
308 __btrfs_release_delayed_node(node, 1); 308 __btrfs_release_delayed_node(node, 1);
309} 309}
310 310
311struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) 311static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
312{ 312{
313 struct btrfs_delayed_item *item; 313 struct btrfs_delayed_item *item;
314 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); 314 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
@@ -383,7 +383,7 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
383 return NULL; 383 return NULL;
384} 384}
385 385
386struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( 386static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
387 struct btrfs_delayed_node *delayed_node, 387 struct btrfs_delayed_node *delayed_node,
388 struct btrfs_key *key) 388 struct btrfs_key *key)
389{ 389{
@@ -394,45 +394,6 @@ struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
394 return item; 394 return item;
395} 395}
396 396
397struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
398 struct btrfs_delayed_node *delayed_node,
399 struct btrfs_key *key)
400{
401 struct btrfs_delayed_item *item;
402
403 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
404 NULL, NULL);
405 return item;
406}
407
408struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
409 struct btrfs_delayed_node *delayed_node,
410 struct btrfs_key *key)
411{
412 struct btrfs_delayed_item *item, *next;
413
414 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
415 NULL, &next);
416 if (!item)
417 item = next;
418
419 return item;
420}
421
422struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
423 struct btrfs_delayed_node *delayed_node,
424 struct btrfs_key *key)
425{
426 struct btrfs_delayed_item *item, *next;
427
428 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
429 NULL, &next);
430 if (!item)
431 item = next;
432
433 return item;
434}
435
436static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, 397static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
437 struct btrfs_delayed_item *ins, 398 struct btrfs_delayed_item *ins,
438 int action) 399 int action)
@@ -535,7 +496,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
535 } 496 }
536} 497}
537 498
538struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item( 499static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
539 struct btrfs_delayed_node *delayed_node) 500 struct btrfs_delayed_node *delayed_node)
540{ 501{
541 struct rb_node *p; 502 struct rb_node *p;
@@ -548,7 +509,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
548 return item; 509 return item;
549} 510}
550 511
551struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item( 512static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
552 struct btrfs_delayed_node *delayed_node) 513 struct btrfs_delayed_node *delayed_node)
553{ 514{
554 struct rb_node *p; 515 struct rb_node *p;
@@ -561,7 +522,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
561 return item; 522 return item;
562} 523}
563 524
564struct btrfs_delayed_item *__btrfs_next_delayed_item( 525static struct btrfs_delayed_item *__btrfs_next_delayed_item(
565 struct btrfs_delayed_item *item) 526 struct btrfs_delayed_item *item)
566{ 527{
567 struct rb_node *p; 528 struct rb_node *p;
@@ -766,10 +727,9 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
766 * This helper will insert some continuous items into the same leaf according 727 * This helper will insert some continuous items into the same leaf according
767 * to the free space of the leaf. 728 * to the free space of the leaf.
768 */ 729 */
769static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, 730static int btrfs_batch_insert_items(struct btrfs_root *root,
770 struct btrfs_root *root, 731 struct btrfs_path *path,
771 struct btrfs_path *path, 732 struct btrfs_delayed_item *item)
772 struct btrfs_delayed_item *item)
773{ 733{
774 struct btrfs_delayed_item *curr, *next; 734 struct btrfs_delayed_item *curr, *next;
775 int free_space; 735 int free_space;
@@ -848,7 +808,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
848 btrfs_clear_path_blocking(path, NULL, 0); 808 btrfs_clear_path_blocking(path, NULL, 0);
849 809
850 /* insert the keys of the items */ 810 /* insert the keys of the items */
851 setup_items_for_insert(trans, root, path, keys, data_size, 811 setup_items_for_insert(root, path, keys, data_size,
852 total_data_size, total_size, nitems); 812 total_data_size, total_size, nitems);
853 813
854 /* insert the dir index items */ 814 /* insert the dir index items */
@@ -932,7 +892,7 @@ do_again:
932 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) { 892 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
933 /* insert the continuous items into the same leaf */ 893 /* insert the continuous items into the same leaf */
934 path->slots[0]++; 894 path->slots[0]++;
935 btrfs_batch_insert_items(trans, root, path, curr); 895 btrfs_batch_insert_items(root, path, curr);
936 } 896 }
937 btrfs_release_delayed_item(prev); 897 btrfs_release_delayed_item(prev);
938 btrfs_mark_buffer_dirty(path->nodes[0]); 898 btrfs_mark_buffer_dirty(path->nodes[0]);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index b7a0641ead77..c219463fb1fd 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -40,16 +40,19 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
40 * compare two delayed tree backrefs with same bytenr and type 40 * compare two delayed tree backrefs with same bytenr and type
41 */ 41 */
42static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, 42static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
43 struct btrfs_delayed_tree_ref *ref1) 43 struct btrfs_delayed_tree_ref *ref1, int type)
44{ 44{
45 if (ref1->root < ref2->root) 45 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
46 return -1; 46 if (ref1->root < ref2->root)
47 if (ref1->root > ref2->root) 47 return -1;
48 return 1; 48 if (ref1->root > ref2->root)
49 if (ref1->parent < ref2->parent) 49 return 1;
50 return -1; 50 } else {
51 if (ref1->parent > ref2->parent) 51 if (ref1->parent < ref2->parent)
52 return 1; 52 return -1;
53 if (ref1->parent > ref2->parent)
54 return 1;
55 }
53 return 0; 56 return 0;
54} 57}
55 58
@@ -113,7 +116,8 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
113 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || 116 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
114 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { 117 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
115 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), 118 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
116 btrfs_delayed_node_to_tree_ref(ref1)); 119 btrfs_delayed_node_to_tree_ref(ref1),
120 ref1->type);
117 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY || 121 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
118 ref1->type == BTRFS_SHARED_DATA_REF_KEY) { 122 ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
119 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2), 123 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
@@ -357,8 +361,10 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
357 elem = list_first_entry(&fs_info->tree_mod_seq_list, 361 elem = list_first_entry(&fs_info->tree_mod_seq_list,
358 struct seq_list, list); 362 struct seq_list, list);
359 if (seq >= elem->seq) { 363 if (seq >= elem->seq) {
360 pr_debug("holding back delayed_ref %llu, lowest is " 364 pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n",
361 "%llu (%p)\n", seq, elem->seq, delayed_refs); 365 (u32)(seq >> 32), (u32)seq,
366 (u32)(elem->seq >> 32), (u32)elem->seq,
367 delayed_refs);
362 ret = 1; 368 ret = 1;
363 } 369 }
364 } 370 }
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 502c2158167c..79e594e341c7 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,6 +21,10 @@
21#include "hash.h" 21#include "hash.h"
22#include "transaction.h" 22#include "transaction.h"
23 23
24static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
25 struct btrfs_path *path,
26 const char *name, int name_len);
27
24/* 28/*
25 * insert a name into a directory, doing overflow properly if there is a hash 29 * insert a name into a directory, doing overflow properly if there is a hash
26 * collision. data_size indicates how big the item inserted should be. On 30 * collision. data_size indicates how big the item inserted should be. On
@@ -49,7 +53,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
49 di = btrfs_match_dir_item_name(root, path, name, name_len); 53 di = btrfs_match_dir_item_name(root, path, name, name_len);
50 if (di) 54 if (di)
51 return ERR_PTR(-EEXIST); 55 return ERR_PTR(-EEXIST);
52 btrfs_extend_item(trans, root, path, data_size); 56 btrfs_extend_item(root, path, data_size);
53 } else if (ret < 0) 57 } else if (ret < 0)
54 return ERR_PTR(ret); 58 return ERR_PTR(ret);
55 WARN_ON(ret > 0); 59 WARN_ON(ret > 0);
@@ -379,7 +383,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
379 * this walks through all the entries in a dir item and finds one 383 * this walks through all the entries in a dir item and finds one
380 * for a specific name. 384 * for a specific name.
381 */ 385 */
382struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, 386static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
383 struct btrfs_path *path, 387 struct btrfs_path *path,
384 const char *name, int name_len) 388 const char *name, int name_len)
385{ 389{
@@ -442,8 +446,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
442 start = btrfs_item_ptr_offset(leaf, path->slots[0]); 446 start = btrfs_item_ptr_offset(leaf, path->slots[0]);
443 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 447 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
444 item_len - (ptr + sub_item_len - start)); 448 item_len - (ptr + sub_item_len - start));
445 btrfs_truncate_item(trans, root, path, 449 btrfs_truncate_item(root, path, item_len - sub_item_len, 1);
446 item_len - sub_item_len, 1);
447 } 450 }
448 return ret; 451 return ret;
449} 452}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6d19a0a554aa..4e9ebe1f1827 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -30,6 +30,7 @@
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <linux/ratelimit.h> 32#include <linux/ratelimit.h>
33#include <linux/uuid.h>
33#include <asm/unaligned.h> 34#include <asm/unaligned.h>
34#include "compat.h" 35#include "compat.h"
35#include "ctree.h" 36#include "ctree.h"
@@ -69,6 +70,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
69 int mark); 70 int mark);
70static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 71static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
71 struct extent_io_tree *pinned_extents); 72 struct extent_io_tree *pinned_extents);
73static int btrfs_cleanup_transaction(struct btrfs_root *root);
74static void btrfs_error_commit_super(struct btrfs_root *root);
72 75
73/* 76/*
74 * end_io_wq structs are used to do processing in task context when an IO is 77 * end_io_wq structs are used to do processing in task context when an IO is
@@ -222,7 +225,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
222 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 225 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
223 226
224 write_lock(&em_tree->lock); 227 write_lock(&em_tree->lock);
225 ret = add_extent_mapping(em_tree, em); 228 ret = add_extent_mapping(em_tree, em, 0);
226 if (ret == -EEXIST) { 229 if (ret == -EEXIST) {
227 free_extent_map(em); 230 free_extent_map(em);
228 em = lookup_extent_mapping(em_tree, start, len); 231 em = lookup_extent_mapping(em_tree, start, len);
@@ -238,7 +241,7 @@ out:
238 return em; 241 return em;
239} 242}
240 243
241u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) 244u32 btrfs_csum_data(char *data, u32 seed, size_t len)
242{ 245{
243 return crc32c(seed, data, len); 246 return crc32c(seed, data, len);
244} 247}
@@ -274,7 +277,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
274 if (err) 277 if (err)
275 return 1; 278 return 1;
276 cur_len = min(len, map_len - (offset - map_start)); 279 cur_len = min(len, map_len - (offset - map_start));
277 crc = btrfs_csum_data(root, kaddr + offset - map_start, 280 crc = btrfs_csum_data(kaddr + offset - map_start,
278 crc, cur_len); 281 crc, cur_len);
279 len -= cur_len; 282 len -= cur_len;
280 offset += cur_len; 283 offset += cur_len;
@@ -354,6 +357,49 @@ out:
354} 357}
355 358
356/* 359/*
360 * Return 0 if the superblock checksum type matches the checksum value of that
361 * algorithm. Pass the raw disk superblock data.
362 */
363static int btrfs_check_super_csum(char *raw_disk_sb)
364{
365 struct btrfs_super_block *disk_sb =
366 (struct btrfs_super_block *)raw_disk_sb;
367 u16 csum_type = btrfs_super_csum_type(disk_sb);
368 int ret = 0;
369
370 if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
371 u32 crc = ~(u32)0;
372 const int csum_size = sizeof(crc);
373 char result[csum_size];
374
375 /*
376 * The super_block structure does not span the whole
377 * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
378 * is filled with zeros and is included in the checkum.
379 */
380 crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
381 crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
382 btrfs_csum_final(crc, result);
383
384 if (memcmp(raw_disk_sb, result, csum_size))
385 ret = 1;
386
387 if (ret && btrfs_super_generation(disk_sb) < 10) {
388 printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
389 ret = 0;
390 }
391 }
392
393 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
394 printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
395 csum_type);
396 ret = 1;
397 }
398
399 return ret;
400}
401
402/*
357 * helper to read a given tree block, doing retries as required when 403 * helper to read a given tree block, doing retries as required when
358 * the checksums don't match and we have alternate mirrors to try. 404 * the checksums don't match and we have alternate mirrors to try.
359 */ 405 */
@@ -530,41 +576,6 @@ static noinline int check_leaf(struct btrfs_root *root,
530 return 0; 576 return 0;
531} 577}
532 578
533struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
534 struct page *page, int max_walk)
535{
536 struct extent_buffer *eb;
537 u64 start = page_offset(page);
538 u64 target = start;
539 u64 min_start;
540
541 if (start < max_walk)
542 min_start = 0;
543 else
544 min_start = start - max_walk;
545
546 while (start >= min_start) {
547 eb = find_extent_buffer(tree, start, 0);
548 if (eb) {
549 /*
550 * we found an extent buffer and it contains our page
551 * horray!
552 */
553 if (eb->start <= target &&
554 eb->start + eb->len > target)
555 return eb;
556
557 /* we found an extent buffer that wasn't for us */
558 free_extent_buffer(eb);
559 return NULL;
560 }
561 if (start == 0)
562 break;
563 start -= PAGE_CACHE_SIZE;
564 }
565 return NULL;
566}
567
568static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 579static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
569 struct extent_state *state, int mirror) 580 struct extent_state *state, int mirror)
570{ 581{
@@ -613,6 +624,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
613 goto err; 624 goto err;
614 } 625 }
615 found_level = btrfs_header_level(eb); 626 found_level = btrfs_header_level(eb);
627 if (found_level >= BTRFS_MAX_LEVEL) {
628 btrfs_info(root->fs_info, "bad tree block level %d\n",
629 (int)btrfs_header_level(eb));
630 ret = -EIO;
631 goto err;
632 }
616 633
617 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), 634 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
618 eb, found_level); 635 eb, found_level);
@@ -636,10 +653,9 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
636 if (!ret) 653 if (!ret)
637 set_extent_buffer_uptodate(eb); 654 set_extent_buffer_uptodate(eb);
638err: 655err:
639 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { 656 if (reads_done &&
640 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); 657 test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
641 btree_readahead_hook(root, eb, eb->start, ret); 658 btree_readahead_hook(root, eb, eb->start, ret);
642 }
643 659
644 if (ret) { 660 if (ret) {
645 /* 661 /*
@@ -993,14 +1009,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
993{ 1009{
994 if (PageWriteback(page) || PageDirty(page)) 1010 if (PageWriteback(page) || PageDirty(page))
995 return 0; 1011 return 0;
996 /*
997 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
998 * slab allocation from alloc_extent_state down the callchain where
999 * it'd hit a BUG_ON as those flags are not allowed.
1000 */
1001 gfp_flags &= ~GFP_SLAB_BUG_MASK;
1002 1012
1003 return try_release_extent_buffer(page, gfp_flags); 1013 return try_release_extent_buffer(page);
1004} 1014}
1005 1015
1006static void btree_invalidatepage(struct page *page, unsigned long offset) 1016static void btree_invalidatepage(struct page *page, unsigned long offset)
@@ -1275,6 +1285,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1275 struct btrfs_key key; 1285 struct btrfs_key key;
1276 int ret = 0; 1286 int ret = 0;
1277 u64 bytenr; 1287 u64 bytenr;
1288 uuid_le uuid;
1278 1289
1279 root = btrfs_alloc_root(fs_info); 1290 root = btrfs_alloc_root(fs_info);
1280 if (!root) 1291 if (!root)
@@ -1324,6 +1335,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1324 btrfs_set_root_used(&root->root_item, leaf->len); 1335 btrfs_set_root_used(&root->root_item, leaf->len);
1325 btrfs_set_root_last_snapshot(&root->root_item, 0); 1336 btrfs_set_root_last_snapshot(&root->root_item, 0);
1326 btrfs_set_root_dirid(&root->root_item, 0); 1337 btrfs_set_root_dirid(&root->root_item, 0);
1338 uuid_le_gen(&uuid);
1339 memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
1327 root->root_item.drop_level = 0; 1340 root->root_item.drop_level = 0;
1328 1341
1329 key.objectid = objectid; 1342 key.objectid = objectid;
@@ -1476,7 +1489,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1476 if (ret == 0) { 1489 if (ret == 0) {
1477 l = path->nodes[0]; 1490 l = path->nodes[0];
1478 slot = path->slots[0]; 1491 slot = path->slots[0];
1479 btrfs_read_root_item(tree_root, l, slot, &root->root_item); 1492 btrfs_read_root_item(l, slot, &root->root_item);
1480 memcpy(&root->root_key, location, sizeof(*location)); 1493 memcpy(&root->root_key, location, sizeof(*location));
1481 } 1494 }
1482 btrfs_free_path(path); 1495 btrfs_free_path(path);
@@ -1491,6 +1504,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1491 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1504 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1492 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1505 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1493 blocksize, generation); 1506 blocksize, generation);
1507 if (!root->node || !extent_buffer_uptodate(root->node)) {
1508 ret = (!root->node) ? -ENOMEM : -EIO;
1509
1510 free_extent_buffer(root->node);
1511 kfree(root);
1512 return ERR_PTR(ret);
1513 }
1514
1494 root->commit_root = btrfs_root_node(root); 1515 root->commit_root = btrfs_root_node(root);
1495 BUG_ON(!root->node); /* -ENOMEM */ 1516 BUG_ON(!root->node); /* -ENOMEM */
1496out: 1517out:
@@ -1658,15 +1679,20 @@ static int cleaner_kthread(void *arg)
1658 struct btrfs_root *root = arg; 1679 struct btrfs_root *root = arg;
1659 1680
1660 do { 1681 do {
1682 int again = 0;
1683
1661 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1684 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1662 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1685 down_read_trylock(&root->fs_info->sb->s_umount)) {
1663 btrfs_run_delayed_iputs(root); 1686 if (mutex_trylock(&root->fs_info->cleaner_mutex)) {
1664 btrfs_clean_old_snapshots(root); 1687 btrfs_run_delayed_iputs(root);
1665 mutex_unlock(&root->fs_info->cleaner_mutex); 1688 again = btrfs_clean_one_deleted_snapshot(root);
1689 mutex_unlock(&root->fs_info->cleaner_mutex);
1690 }
1666 btrfs_run_defrag_inodes(root->fs_info); 1691 btrfs_run_defrag_inodes(root->fs_info);
1692 up_read(&root->fs_info->sb->s_umount);
1667 } 1693 }
1668 1694
1669 if (!try_to_freeze()) { 1695 if (!try_to_freeze() && !again) {
1670 set_current_state(TASK_INTERRUPTIBLE); 1696 set_current_state(TASK_INTERRUPTIBLE);
1671 if (!kthread_should_stop()) 1697 if (!kthread_should_stop())
1672 schedule(); 1698 schedule();
@@ -1935,6 +1961,28 @@ static noinline int next_root_backup(struct btrfs_fs_info *info,
1935 return 0; 1961 return 0;
1936} 1962}
1937 1963
1964/* helper to cleanup workers */
1965static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
1966{
1967 btrfs_stop_workers(&fs_info->generic_worker);
1968 btrfs_stop_workers(&fs_info->fixup_workers);
1969 btrfs_stop_workers(&fs_info->delalloc_workers);
1970 btrfs_stop_workers(&fs_info->workers);
1971 btrfs_stop_workers(&fs_info->endio_workers);
1972 btrfs_stop_workers(&fs_info->endio_meta_workers);
1973 btrfs_stop_workers(&fs_info->endio_raid56_workers);
1974 btrfs_stop_workers(&fs_info->rmw_workers);
1975 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
1976 btrfs_stop_workers(&fs_info->endio_write_workers);
1977 btrfs_stop_workers(&fs_info->endio_freespace_worker);
1978 btrfs_stop_workers(&fs_info->submit_workers);
1979 btrfs_stop_workers(&fs_info->delayed_workers);
1980 btrfs_stop_workers(&fs_info->caching_workers);
1981 btrfs_stop_workers(&fs_info->readahead_workers);
1982 btrfs_stop_workers(&fs_info->flush_workers);
1983 btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
1984}
1985
1938/* helper to cleanup tree roots */ 1986/* helper to cleanup tree roots */
1939static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) 1987static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1940{ 1988{
@@ -1972,6 +2020,36 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1972 } 2020 }
1973} 2021}
1974 2022
2023static void del_fs_roots(struct btrfs_fs_info *fs_info)
2024{
2025 int ret;
2026 struct btrfs_root *gang[8];
2027 int i;
2028
2029 while (!list_empty(&fs_info->dead_roots)) {
2030 gang[0] = list_entry(fs_info->dead_roots.next,
2031 struct btrfs_root, root_list);
2032 list_del(&gang[0]->root_list);
2033
2034 if (gang[0]->in_radix) {
2035 btrfs_free_fs_root(fs_info, gang[0]);
2036 } else {
2037 free_extent_buffer(gang[0]->node);
2038 free_extent_buffer(gang[0]->commit_root);
2039 kfree(gang[0]);
2040 }
2041 }
2042
2043 while (1) {
2044 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
2045 (void **)gang, 0,
2046 ARRAY_SIZE(gang));
2047 if (!ret)
2048 break;
2049 for (i = 0; i < ret; i++)
2050 btrfs_free_fs_root(fs_info, gang[i]);
2051 }
2052}
1975 2053
1976int open_ctree(struct super_block *sb, 2054int open_ctree(struct super_block *sb,
1977 struct btrfs_fs_devices *fs_devices, 2055 struct btrfs_fs_devices *fs_devices,
@@ -2060,6 +2138,7 @@ int open_ctree(struct super_block *sb,
2060 spin_lock_init(&fs_info->defrag_inodes_lock); 2138 spin_lock_init(&fs_info->defrag_inodes_lock);
2061 spin_lock_init(&fs_info->free_chunk_lock); 2139 spin_lock_init(&fs_info->free_chunk_lock);
2062 spin_lock_init(&fs_info->tree_mod_seq_lock); 2140 spin_lock_init(&fs_info->tree_mod_seq_lock);
2141 spin_lock_init(&fs_info->super_lock);
2063 rwlock_init(&fs_info->tree_mod_log_lock); 2142 rwlock_init(&fs_info->tree_mod_log_lock);
2064 mutex_init(&fs_info->reloc_mutex); 2143 mutex_init(&fs_info->reloc_mutex);
2065 seqlock_init(&fs_info->profiles_lock); 2144 seqlock_init(&fs_info->profiles_lock);
@@ -2083,7 +2162,7 @@ int open_ctree(struct super_block *sb,
2083 atomic_set(&fs_info->async_submit_draining, 0); 2162 atomic_set(&fs_info->async_submit_draining, 0);
2084 atomic_set(&fs_info->nr_async_bios, 0); 2163 atomic_set(&fs_info->nr_async_bios, 0);
2085 atomic_set(&fs_info->defrag_running, 0); 2164 atomic_set(&fs_info->defrag_running, 0);
2086 atomic_set(&fs_info->tree_mod_seq, 0); 2165 atomic64_set(&fs_info->tree_mod_seq, 0);
2087 fs_info->sb = sb; 2166 fs_info->sb = sb;
2088 fs_info->max_inline = 8192 * 1024; 2167 fs_info->max_inline = 8192 * 1024;
2089 fs_info->metadata_ratio = 0; 2168 fs_info->metadata_ratio = 0;
@@ -2187,11 +2266,13 @@ int open_ctree(struct super_block *sb,
2187 mutex_init(&fs_info->dev_replace.lock); 2266 mutex_init(&fs_info->dev_replace.lock);
2188 2267
2189 spin_lock_init(&fs_info->qgroup_lock); 2268 spin_lock_init(&fs_info->qgroup_lock);
2269 mutex_init(&fs_info->qgroup_ioctl_lock);
2190 fs_info->qgroup_tree = RB_ROOT; 2270 fs_info->qgroup_tree = RB_ROOT;
2191 INIT_LIST_HEAD(&fs_info->dirty_qgroups); 2271 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2192 fs_info->qgroup_seq = 1; 2272 fs_info->qgroup_seq = 1;
2193 fs_info->quota_enabled = 0; 2273 fs_info->quota_enabled = 0;
2194 fs_info->pending_quota_state = 0; 2274 fs_info->pending_quota_state = 0;
2275 mutex_init(&fs_info->qgroup_rescan_lock);
2195 2276
2196 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 2277 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
2197 btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 2278 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2211,12 +2292,31 @@ int open_ctree(struct super_block *sb,
2211 fs_info, BTRFS_ROOT_TREE_OBJECTID); 2292 fs_info, BTRFS_ROOT_TREE_OBJECTID);
2212 2293
2213 invalidate_bdev(fs_devices->latest_bdev); 2294 invalidate_bdev(fs_devices->latest_bdev);
2295
2296 /*
2297 * Read super block and check the signature bytes only
2298 */
2214 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 2299 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
2215 if (!bh) { 2300 if (!bh) {
2216 err = -EINVAL; 2301 err = -EINVAL;
2217 goto fail_alloc; 2302 goto fail_alloc;
2218 } 2303 }
2219 2304
2305 /*
2306 * We want to check superblock checksum, the type is stored inside.
2307 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
2308 */
2309 if (btrfs_check_super_csum(bh->b_data)) {
2310 printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
2311 err = -EINVAL;
2312 goto fail_alloc;
2313 }
2314
2315 /*
2316 * super_copy is zeroed at allocation time and we never touch the
2317 * following bytes up to INFO_SIZE, the checksum is calculated from
2318 * the whole block of INFO_SIZE
2319 */
2220 memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy)); 2320 memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
2221 memcpy(fs_info->super_for_commit, fs_info->super_copy, 2321 memcpy(fs_info->super_for_commit, fs_info->super_copy,
2222 sizeof(*fs_info->super_for_commit)); 2322 sizeof(*fs_info->super_for_commit));
@@ -2224,6 +2324,13 @@ int open_ctree(struct super_block *sb,
2224 2324
2225 memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); 2325 memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
2226 2326
2327 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2328 if (ret) {
2329 printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
2330 err = -EINVAL;
2331 goto fail_alloc;
2332 }
2333
2227 disk_super = fs_info->super_copy; 2334 disk_super = fs_info->super_copy;
2228 if (!btrfs_super_root(disk_super)) 2335 if (!btrfs_super_root(disk_super))
2229 goto fail_alloc; 2336 goto fail_alloc;
@@ -2232,13 +2339,6 @@ int open_ctree(struct super_block *sb,
2232 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) 2339 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
2233 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); 2340 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
2234 2341
2235 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2236 if (ret) {
2237 printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
2238 err = ret;
2239 goto fail_alloc;
2240 }
2241
2242 /* 2342 /*
2243 * run through our array of backup supers and setup 2343 * run through our array of backup supers and setup
2244 * our ring pointer to the oldest one 2344 * our ring pointer to the oldest one
@@ -2290,6 +2390,9 @@ int open_ctree(struct super_block *sb,
2290 if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) 2390 if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
2291 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2391 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2292 2392
2393 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
2394 printk(KERN_ERR "btrfs: has skinny extents\n");
2395
2293 /* 2396 /*
2294 * flag our filesystem as having big metadata blocks if 2397 * flag our filesystem as having big metadata blocks if
2295 * they are bigger than the page size 2398 * they are bigger than the page size
@@ -2319,6 +2422,10 @@ int open_ctree(struct super_block *sb,
2319 goto fail_alloc; 2422 goto fail_alloc;
2320 } 2423 }
2321 2424
2425 /*
2426 * Needn't use the lock because there is no other task which will
2427 * update the flag.
2428 */
2322 btrfs_set_super_incompat_flags(disk_super, features); 2429 btrfs_set_super_incompat_flags(disk_super, features);
2323 2430
2324 features = btrfs_super_compat_ro_flags(disk_super) & 2431 features = btrfs_super_compat_ro_flags(disk_super) &
@@ -2394,6 +2501,8 @@ int open_ctree(struct super_block *sb,
2394 btrfs_init_workers(&fs_info->readahead_workers, "readahead", 2501 btrfs_init_workers(&fs_info->readahead_workers, "readahead",
2395 fs_info->thread_pool_size, 2502 fs_info->thread_pool_size,
2396 &fs_info->generic_worker); 2503 &fs_info->generic_worker);
2504 btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
2505 &fs_info->generic_worker);
2397 2506
2398 /* 2507 /*
2399 * endios are largely parallel and should have a very 2508 * endios are largely parallel and should have a very
@@ -2428,6 +2537,7 @@ int open_ctree(struct super_block *sb,
2428 ret |= btrfs_start_workers(&fs_info->caching_workers); 2537 ret |= btrfs_start_workers(&fs_info->caching_workers);
2429 ret |= btrfs_start_workers(&fs_info->readahead_workers); 2538 ret |= btrfs_start_workers(&fs_info->readahead_workers);
2430 ret |= btrfs_start_workers(&fs_info->flush_workers); 2539 ret |= btrfs_start_workers(&fs_info->flush_workers);
2540 ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
2431 if (ret) { 2541 if (ret) {
2432 err = -ENOMEM; 2542 err = -ENOMEM;
2433 goto fail_sb_buffer; 2543 goto fail_sb_buffer;
@@ -2475,8 +2585,8 @@ int open_ctree(struct super_block *sb,
2475 chunk_root->node = read_tree_block(chunk_root, 2585 chunk_root->node = read_tree_block(chunk_root,
2476 btrfs_super_chunk_root(disk_super), 2586 btrfs_super_chunk_root(disk_super),
2477 blocksize, generation); 2587 blocksize, generation);
2478 BUG_ON(!chunk_root->node); /* -ENOMEM */ 2588 if (!chunk_root->node ||
2479 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2589 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
2480 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 2590 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
2481 sb->s_id); 2591 sb->s_id);
2482 goto fail_tree_roots; 2592 goto fail_tree_roots;
@@ -2661,6 +2771,13 @@ retry_root_backup:
2661 log_tree_root->node = read_tree_block(tree_root, bytenr, 2771 log_tree_root->node = read_tree_block(tree_root, bytenr,
2662 blocksize, 2772 blocksize,
2663 generation + 1); 2773 generation + 1);
2774 if (!log_tree_root->node ||
2775 !extent_buffer_uptodate(log_tree_root->node)) {
2776 printk(KERN_ERR "btrfs: failed to read log tree\n");
2777 free_extent_buffer(log_tree_root->node);
2778 kfree(log_tree_root);
2779 goto fail_trans_kthread;
2780 }
2664 /* returns with log_tree_root freed on success */ 2781 /* returns with log_tree_root freed on success */
2665 ret = btrfs_recover_log_trees(log_tree_root); 2782 ret = btrfs_recover_log_trees(log_tree_root);
2666 if (ret) { 2783 if (ret) {
@@ -2740,6 +2857,8 @@ fail_qgroup:
2740 btrfs_free_qgroup_config(fs_info); 2857 btrfs_free_qgroup_config(fs_info);
2741fail_trans_kthread: 2858fail_trans_kthread:
2742 kthread_stop(fs_info->transaction_kthread); 2859 kthread_stop(fs_info->transaction_kthread);
2860 del_fs_roots(fs_info);
2861 btrfs_cleanup_transaction(fs_info->tree_root);
2743fail_cleaner: 2862fail_cleaner:
2744 kthread_stop(fs_info->cleaner_kthread); 2863 kthread_stop(fs_info->cleaner_kthread);
2745 2864
@@ -2750,6 +2869,7 @@ fail_cleaner:
2750 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2869 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2751 2870
2752fail_block_groups: 2871fail_block_groups:
2872 btrfs_put_block_group_cache(fs_info);
2753 btrfs_free_block_groups(fs_info); 2873 btrfs_free_block_groups(fs_info);
2754 2874
2755fail_tree_roots: 2875fail_tree_roots:
@@ -2757,22 +2877,7 @@ fail_tree_roots:
2757 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2877 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2758 2878
2759fail_sb_buffer: 2879fail_sb_buffer:
2760 btrfs_stop_workers(&fs_info->generic_worker); 2880 btrfs_stop_all_workers(fs_info);
2761 btrfs_stop_workers(&fs_info->readahead_workers);
2762 btrfs_stop_workers(&fs_info->fixup_workers);
2763 btrfs_stop_workers(&fs_info->delalloc_workers);
2764 btrfs_stop_workers(&fs_info->workers);
2765 btrfs_stop_workers(&fs_info->endio_workers);
2766 btrfs_stop_workers(&fs_info->endio_meta_workers);
2767 btrfs_stop_workers(&fs_info->endio_raid56_workers);
2768 btrfs_stop_workers(&fs_info->rmw_workers);
2769 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2770 btrfs_stop_workers(&fs_info->endio_write_workers);
2771 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2772 btrfs_stop_workers(&fs_info->submit_workers);
2773 btrfs_stop_workers(&fs_info->delayed_workers);
2774 btrfs_stop_workers(&fs_info->caching_workers);
2775 btrfs_stop_workers(&fs_info->flush_workers);
2776fail_alloc: 2881fail_alloc:
2777fail_iput: 2882fail_iput:
2778 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2883 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2904,7 +3009,10 @@ static int write_dev_supers(struct btrfs_device *device,
2904 if (wait) { 3009 if (wait) {
2905 bh = __find_get_block(device->bdev, bytenr / 4096, 3010 bh = __find_get_block(device->bdev, bytenr / 4096,
2906 BTRFS_SUPER_INFO_SIZE); 3011 BTRFS_SUPER_INFO_SIZE);
2907 BUG_ON(!bh); 3012 if (!bh) {
3013 errors++;
3014 continue;
3015 }
2908 wait_on_buffer(bh); 3016 wait_on_buffer(bh);
2909 if (!buffer_uptodate(bh)) 3017 if (!buffer_uptodate(bh))
2910 errors++; 3018 errors++;
@@ -2919,7 +3027,7 @@ static int write_dev_supers(struct btrfs_device *device,
2919 btrfs_set_super_bytenr(sb, bytenr); 3027 btrfs_set_super_bytenr(sb, bytenr);
2920 3028
2921 crc = ~(u32)0; 3029 crc = ~(u32)0;
2922 crc = btrfs_csum_data(NULL, (char *)sb + 3030 crc = btrfs_csum_data((char *)sb +
2923 BTRFS_CSUM_SIZE, crc, 3031 BTRFS_CSUM_SIZE, crc,
2924 BTRFS_SUPER_INFO_SIZE - 3032 BTRFS_SUPER_INFO_SIZE -
2925 BTRFS_CSUM_SIZE); 3033 BTRFS_CSUM_SIZE);
@@ -2931,6 +3039,13 @@ static int write_dev_supers(struct btrfs_device *device,
2931 */ 3039 */
2932 bh = __getblk(device->bdev, bytenr / 4096, 3040 bh = __getblk(device->bdev, bytenr / 4096,
2933 BTRFS_SUPER_INFO_SIZE); 3041 BTRFS_SUPER_INFO_SIZE);
3042 if (!bh) {
3043 printk(KERN_ERR "btrfs: couldn't get super "
3044 "buffer head for bytenr %Lu\n", bytenr);
3045 errors++;
3046 continue;
3047 }
3048
2934 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); 3049 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
2935 3050
2936 /* one reference for submit_bh */ 3051 /* one reference for submit_bh */
@@ -3153,7 +3268,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
3153 return num_tolerated_disk_barrier_failures; 3268 return num_tolerated_disk_barrier_failures;
3154} 3269}
3155 3270
3156int write_all_supers(struct btrfs_root *root, int max_mirrors) 3271static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3157{ 3272{
3158 struct list_head *head; 3273 struct list_head *head;
3159 struct btrfs_device *dev; 3274 struct btrfs_device *dev;
@@ -3283,37 +3398,6 @@ static void free_fs_root(struct btrfs_root *root)
3283 kfree(root); 3398 kfree(root);
3284} 3399}
3285 3400
3286static void del_fs_roots(struct btrfs_fs_info *fs_info)
3287{
3288 int ret;
3289 struct btrfs_root *gang[8];
3290 int i;
3291
3292 while (!list_empty(&fs_info->dead_roots)) {
3293 gang[0] = list_entry(fs_info->dead_roots.next,
3294 struct btrfs_root, root_list);
3295 list_del(&gang[0]->root_list);
3296
3297 if (gang[0]->in_radix) {
3298 btrfs_free_fs_root(fs_info, gang[0]);
3299 } else {
3300 free_extent_buffer(gang[0]->node);
3301 free_extent_buffer(gang[0]->commit_root);
3302 kfree(gang[0]);
3303 }
3304 }
3305
3306 while (1) {
3307 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
3308 (void **)gang, 0,
3309 ARRAY_SIZE(gang));
3310 if (!ret)
3311 break;
3312 for (i = 0; i < ret; i++)
3313 btrfs_free_fs_root(fs_info, gang[i]);
3314 }
3315}
3316
3317int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) 3401int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
3318{ 3402{
3319 u64 root_objectid = 0; 3403 u64 root_objectid = 0;
@@ -3349,8 +3433,8 @@ int btrfs_commit_super(struct btrfs_root *root)
3349 3433
3350 mutex_lock(&root->fs_info->cleaner_mutex); 3434 mutex_lock(&root->fs_info->cleaner_mutex);
3351 btrfs_run_delayed_iputs(root); 3435 btrfs_run_delayed_iputs(root);
3352 btrfs_clean_old_snapshots(root);
3353 mutex_unlock(&root->fs_info->cleaner_mutex); 3436 mutex_unlock(&root->fs_info->cleaner_mutex);
3437 wake_up_process(root->fs_info->cleaner_kthread);
3354 3438
3355 /* wait until ongoing cleanup work done */ 3439 /* wait until ongoing cleanup work done */
3356 down_write(&root->fs_info->cleanup_work_sem); 3440 down_write(&root->fs_info->cleanup_work_sem);
@@ -3426,20 +3510,7 @@ int close_ctree(struct btrfs_root *root)
3426 percpu_counter_sum(&fs_info->delalloc_bytes)); 3510 percpu_counter_sum(&fs_info->delalloc_bytes));
3427 } 3511 }
3428 3512
3429 free_extent_buffer(fs_info->extent_root->node); 3513 free_root_pointers(fs_info, 1);
3430 free_extent_buffer(fs_info->extent_root->commit_root);
3431 free_extent_buffer(fs_info->tree_root->node);
3432 free_extent_buffer(fs_info->tree_root->commit_root);
3433 free_extent_buffer(fs_info->chunk_root->node);
3434 free_extent_buffer(fs_info->chunk_root->commit_root);
3435 free_extent_buffer(fs_info->dev_root->node);
3436 free_extent_buffer(fs_info->dev_root->commit_root);
3437 free_extent_buffer(fs_info->csum_root->node);
3438 free_extent_buffer(fs_info->csum_root->commit_root);
3439 if (fs_info->quota_root) {
3440 free_extent_buffer(fs_info->quota_root->node);
3441 free_extent_buffer(fs_info->quota_root->commit_root);
3442 }
3443 3514
3444 btrfs_free_block_groups(fs_info); 3515 btrfs_free_block_groups(fs_info);
3445 3516
@@ -3447,22 +3518,7 @@ int close_ctree(struct btrfs_root *root)
3447 3518
3448 iput(fs_info->btree_inode); 3519 iput(fs_info->btree_inode);
3449 3520
3450 btrfs_stop_workers(&fs_info->generic_worker); 3521 btrfs_stop_all_workers(fs_info);
3451 btrfs_stop_workers(&fs_info->fixup_workers);
3452 btrfs_stop_workers(&fs_info->delalloc_workers);
3453 btrfs_stop_workers(&fs_info->workers);
3454 btrfs_stop_workers(&fs_info->endio_workers);
3455 btrfs_stop_workers(&fs_info->endio_meta_workers);
3456 btrfs_stop_workers(&fs_info->endio_raid56_workers);
3457 btrfs_stop_workers(&fs_info->rmw_workers);
3458 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
3459 btrfs_stop_workers(&fs_info->endio_write_workers);
3460 btrfs_stop_workers(&fs_info->endio_freespace_worker);
3461 btrfs_stop_workers(&fs_info->submit_workers);
3462 btrfs_stop_workers(&fs_info->delayed_workers);
3463 btrfs_stop_workers(&fs_info->caching_workers);
3464 btrfs_stop_workers(&fs_info->readahead_workers);
3465 btrfs_stop_workers(&fs_info->flush_workers);
3466 3522
3467#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 3523#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
3468 if (btrfs_test_opt(root, CHECK_INTEGRITY)) 3524 if (btrfs_test_opt(root, CHECK_INTEGRITY))
@@ -3567,18 +3623,13 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
3567static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 3623static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3568 int read_only) 3624 int read_only)
3569{ 3625{
3570 if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) { 3626 /*
3571 printk(KERN_ERR "btrfs: unsupported checksum algorithm\n"); 3627 * Placeholder for checks
3572 return -EINVAL; 3628 */
3573 }
3574
3575 if (read_only)
3576 return 0;
3577
3578 return 0; 3629 return 0;
3579} 3630}
3580 3631
3581void btrfs_error_commit_super(struct btrfs_root *root) 3632static void btrfs_error_commit_super(struct btrfs_root *root)
3582{ 3633{
3583 mutex_lock(&root->fs_info->cleaner_mutex); 3634 mutex_lock(&root->fs_info->cleaner_mutex);
3584 btrfs_run_delayed_iputs(root); 3635 btrfs_run_delayed_iputs(root);
@@ -3669,6 +3720,9 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3669 continue; 3720 continue;
3670 } 3721 }
3671 3722
3723 if (head->must_insert_reserved)
3724 btrfs_pin_extent(root, ref->bytenr,
3725 ref->num_bytes, 1);
3672 btrfs_free_delayed_extent_op(head->extent_op); 3726 btrfs_free_delayed_extent_op(head->extent_op);
3673 delayed_refs->num_heads--; 3727 delayed_refs->num_heads--;
3674 if (list_empty(&head->cluster)) 3728 if (list_empty(&head->cluster))
@@ -3740,13 +3794,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3740 int mark) 3794 int mark)
3741{ 3795{
3742 int ret; 3796 int ret;
3743 struct page *page;
3744 struct inode *btree_inode = root->fs_info->btree_inode;
3745 struct extent_buffer *eb; 3797 struct extent_buffer *eb;
3746 u64 start = 0; 3798 u64 start = 0;
3747 u64 end; 3799 u64 end;
3748 u64 offset;
3749 unsigned long index;
3750 3800
3751 while (1) { 3801 while (1) {
3752 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 3802 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
@@ -3756,36 +3806,17 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3756 3806
3757 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); 3807 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
3758 while (start <= end) { 3808 while (start <= end) {
3759 index = start >> PAGE_CACHE_SHIFT; 3809 eb = btrfs_find_tree_block(root, start,
3760 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 3810 root->leafsize);
3761 page = find_get_page(btree_inode->i_mapping, index); 3811 start += eb->len;
3762 if (!page) 3812 if (!eb)
3763 continue; 3813 continue;
3764 offset = page_offset(page); 3814 wait_on_extent_buffer_writeback(eb);
3765
3766 spin_lock(&dirty_pages->buffer_lock);
3767 eb = radix_tree_lookup(
3768 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
3769 offset >> PAGE_CACHE_SHIFT);
3770 spin_unlock(&dirty_pages->buffer_lock);
3771 if (eb)
3772 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
3773 &eb->bflags);
3774 if (PageWriteback(page))
3775 end_page_writeback(page);
3776
3777 lock_page(page);
3778 if (PageDirty(page)) {
3779 clear_page_dirty_for_io(page);
3780 spin_lock_irq(&page->mapping->tree_lock);
3781 radix_tree_tag_clear(&page->mapping->page_tree,
3782 page_index(page),
3783 PAGECACHE_TAG_DIRTY);
3784 spin_unlock_irq(&page->mapping->tree_lock);
3785 }
3786 3815
3787 unlock_page(page); 3816 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY,
3788 page_cache_release(page); 3817 &eb->bflags))
3818 clear_extent_buffer_dirty(eb);
3819 free_extent_buffer_stale(eb);
3789 } 3820 }
3790 } 3821 }
3791 3822
@@ -3866,7 +3897,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3866 */ 3897 */
3867} 3898}
3868 3899
3869int btrfs_cleanup_transaction(struct btrfs_root *root) 3900static int btrfs_cleanup_transaction(struct btrfs_root *root)
3870{ 3901{
3871 struct btrfs_transaction *t; 3902 struct btrfs_transaction *t;
3872 LIST_HEAD(list); 3903 LIST_HEAD(list);
@@ -3887,10 +3918,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3887 3918
3888 btrfs_destroy_delayed_refs(t, root); 3919 btrfs_destroy_delayed_refs(t, root);
3889 3920
3890 btrfs_block_rsv_release(root,
3891 &root->fs_info->trans_block_rsv,
3892 t->dirty_pages.dirty_bytes);
3893
3894 /* FIXME: cleanup wait for commit */ 3921 /* FIXME: cleanup wait for commit */
3895 t->in_commit = 1; 3922 t->in_commit = 1;
3896 t->blocked = 1; 3923 t->blocked = 1;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 034d7dc552b2..be69ce1b07a2 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,7 +61,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root, int max_mirrors); 61 struct btrfs_root *root, int max_mirrors);
62struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 62struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
63int btrfs_commit_super(struct btrfs_root *root); 63int btrfs_commit_super(struct btrfs_root *root);
64void btrfs_error_commit_super(struct btrfs_root *root);
65struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 64struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
66 u64 bytenr, u32 blocksize); 65 u64 bytenr, u32 blocksize);
67struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 66struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
@@ -77,7 +76,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
77 int atomic); 76 int atomic);
78int btrfs_set_buffer_uptodate(struct extent_buffer *buf); 77int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
79int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); 78int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
80u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); 79u32 btrfs_csum_data(char *data, u32 seed, size_t len);
81void btrfs_csum_final(u32 crc, char *result); 80void btrfs_csum_final(u32 crc, char *result);
82int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 81int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
83 int metadata); 82 int metadata);
@@ -93,10 +92,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
93 struct btrfs_fs_info *fs_info); 92 struct btrfs_fs_info *fs_info);
94int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 93int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
95 struct btrfs_root *root); 94 struct btrfs_root *root);
96int btrfs_cleanup_transaction(struct btrfs_root *root);
97void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, 95void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
98 struct btrfs_root *root); 96 struct btrfs_root *root);
99void btrfs_abort_devices(struct btrfs_root *root);
100struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, 97struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
101 struct btrfs_fs_info *fs_info, 98 struct btrfs_fs_info *fs_info,
102 u64 objectid); 99 u64 objectid);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3d551231caba..2305b5c5cf00 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -105,6 +105,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
105 u64 num_bytes, int reserve); 105 u64 num_bytes, int reserve);
106static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 106static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
107 u64 num_bytes); 107 u64 num_bytes);
108int btrfs_pin_extent(struct btrfs_root *root,
109 u64 bytenr, u64 num_bytes, int reserved);
108 110
109static noinline int 111static noinline int
110block_group_cache_done(struct btrfs_block_group_cache *cache) 112block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -270,9 +272,27 @@ static int exclude_super_stripes(struct btrfs_root *root,
270 return ret; 272 return ret;
271 273
272 while (nr--) { 274 while (nr--) {
273 cache->bytes_super += stripe_len; 275 u64 start, len;
274 ret = add_excluded_extent(root, logical[nr], 276
275 stripe_len); 277 if (logical[nr] > cache->key.objectid +
278 cache->key.offset)
279 continue;
280
281 if (logical[nr] + stripe_len <= cache->key.objectid)
282 continue;
283
284 start = logical[nr];
285 if (start < cache->key.objectid) {
286 start = cache->key.objectid;
287 len = (logical[nr] + stripe_len) - start;
288 } else {
289 len = min_t(u64, stripe_len,
290 cache->key.objectid +
291 cache->key.offset - start);
292 }
293
294 cache->bytes_super += len;
295 ret = add_excluded_extent(root, start, len);
276 if (ret) { 296 if (ret) {
277 kfree(logical); 297 kfree(logical);
278 return ret; 298 return ret;
@@ -419,8 +439,7 @@ again:
419 if (ret) 439 if (ret)
420 break; 440 break;
421 441
422 if (need_resched() || 442 if (need_resched()) {
423 btrfs_next_leaf(extent_root, path)) {
424 caching_ctl->progress = last; 443 caching_ctl->progress = last;
425 btrfs_release_path(path); 444 btrfs_release_path(path);
426 up_read(&fs_info->extent_commit_sem); 445 up_read(&fs_info->extent_commit_sem);
@@ -428,6 +447,12 @@ again:
428 cond_resched(); 447 cond_resched();
429 goto again; 448 goto again;
430 } 449 }
450
451 ret = btrfs_next_leaf(extent_root, path);
452 if (ret < 0)
453 goto err;
454 if (ret)
455 break;
431 leaf = path->nodes[0]; 456 leaf = path->nodes[0];
432 nritems = btrfs_header_nritems(leaf); 457 nritems = btrfs_header_nritems(leaf);
433 continue; 458 continue;
@@ -442,11 +467,16 @@ again:
442 block_group->key.offset) 467 block_group->key.offset)
443 break; 468 break;
444 469
445 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 470 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
471 key.type == BTRFS_METADATA_ITEM_KEY) {
446 total_found += add_new_free_space(block_group, 472 total_found += add_new_free_space(block_group,
447 fs_info, last, 473 fs_info, last,
448 key.objectid); 474 key.objectid);
449 last = key.objectid + key.offset; 475 if (key.type == BTRFS_METADATA_ITEM_KEY)
476 last = key.objectid +
477 fs_info->tree_root->leafsize;
478 else
479 last = key.objectid + key.offset;
450 480
451 if (total_found > (1024 * 1024 * 2)) { 481 if (total_found > (1024 * 1024 * 2)) {
452 total_found = 0; 482 total_found = 0;
@@ -656,55 +686,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
656 rcu_read_unlock(); 686 rcu_read_unlock();
657} 687}
658 688
659u64 btrfs_find_block_group(struct btrfs_root *root,
660 u64 search_start, u64 search_hint, int owner)
661{
662 struct btrfs_block_group_cache *cache;
663 u64 used;
664 u64 last = max(search_hint, search_start);
665 u64 group_start = 0;
666 int full_search = 0;
667 int factor = 9;
668 int wrapped = 0;
669again:
670 while (1) {
671 cache = btrfs_lookup_first_block_group(root->fs_info, last);
672 if (!cache)
673 break;
674
675 spin_lock(&cache->lock);
676 last = cache->key.objectid + cache->key.offset;
677 used = btrfs_block_group_used(&cache->item);
678
679 if ((full_search || !cache->ro) &&
680 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
681 if (used + cache->pinned + cache->reserved <
682 div_factor(cache->key.offset, factor)) {
683 group_start = cache->key.objectid;
684 spin_unlock(&cache->lock);
685 btrfs_put_block_group(cache);
686 goto found;
687 }
688 }
689 spin_unlock(&cache->lock);
690 btrfs_put_block_group(cache);
691 cond_resched();
692 }
693 if (!wrapped) {
694 last = search_start;
695 wrapped = 1;
696 goto again;
697 }
698 if (!full_search && factor < 10) {
699 last = search_start;
700 full_search = 1;
701 factor = 10;
702 goto again;
703 }
704found:
705 return group_start;
706}
707
708/* simple helper to search for an existing extent at a given offset */ 689/* simple helper to search for an existing extent at a given offset */
709int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 690int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
710{ 691{
@@ -718,15 +699,21 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
718 699
719 key.objectid = start; 700 key.objectid = start;
720 key.offset = len; 701 key.offset = len;
721 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 702 key.type = BTRFS_EXTENT_ITEM_KEY;
722 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 703 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
723 0, 0); 704 0, 0);
705 if (ret > 0) {
706 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
707 if (key.objectid == start &&
708 key.type == BTRFS_METADATA_ITEM_KEY)
709 ret = 0;
710 }
724 btrfs_free_path(path); 711 btrfs_free_path(path);
725 return ret; 712 return ret;
726} 713}
727 714
728/* 715/*
729 * helper function to lookup reference count and flags of extent. 716 * helper function to lookup reference count and flags of a tree block.
730 * 717 *
731 * the head node for delayed ref is used to store the sum of all the 718 * the head node for delayed ref is used to store the sum of all the
732 * reference count modifications queued up in the rbtree. the head 719 * reference count modifications queued up in the rbtree. the head
@@ -736,7 +723,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
736 */ 723 */
737int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 724int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
738 struct btrfs_root *root, u64 bytenr, 725 struct btrfs_root *root, u64 bytenr,
739 u64 num_bytes, u64 *refs, u64 *flags) 726 u64 offset, int metadata, u64 *refs, u64 *flags)
740{ 727{
741 struct btrfs_delayed_ref_head *head; 728 struct btrfs_delayed_ref_head *head;
742 struct btrfs_delayed_ref_root *delayed_refs; 729 struct btrfs_delayed_ref_root *delayed_refs;
@@ -749,13 +736,29 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
749 u64 extent_flags; 736 u64 extent_flags;
750 int ret; 737 int ret;
751 738
739 /*
740 * If we don't have skinny metadata, don't bother doing anything
741 * different
742 */
743 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
744 offset = root->leafsize;
745 metadata = 0;
746 }
747
752 path = btrfs_alloc_path(); 748 path = btrfs_alloc_path();
753 if (!path) 749 if (!path)
754 return -ENOMEM; 750 return -ENOMEM;
755 751
756 key.objectid = bytenr; 752 if (metadata) {
757 key.type = BTRFS_EXTENT_ITEM_KEY; 753 key.objectid = bytenr;
758 key.offset = num_bytes; 754 key.type = BTRFS_METADATA_ITEM_KEY;
755 key.offset = offset;
756 } else {
757 key.objectid = bytenr;
758 key.type = BTRFS_EXTENT_ITEM_KEY;
759 key.offset = offset;
760 }
761
759 if (!trans) { 762 if (!trans) {
760 path->skip_locking = 1; 763 path->skip_locking = 1;
761 path->search_commit_root = 1; 764 path->search_commit_root = 1;
@@ -766,6 +769,13 @@ again:
766 if (ret < 0) 769 if (ret < 0)
767 goto out_free; 770 goto out_free;
768 771
772 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
773 key.type = BTRFS_EXTENT_ITEM_KEY;
774 key.offset = root->leafsize;
775 btrfs_release_path(path);
776 goto again;
777 }
778
769 if (ret == 0) { 779 if (ret == 0) {
770 leaf = path->nodes[0]; 780 leaf = path->nodes[0];
771 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 781 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -1001,7 +1011,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
1001 return ret; 1011 return ret;
1002 BUG_ON(ret); /* Corruption */ 1012 BUG_ON(ret); /* Corruption */
1003 1013
1004 btrfs_extend_item(trans, root, path, new_size); 1014 btrfs_extend_item(root, path, new_size);
1005 1015
1006 leaf = path->nodes[0]; 1016 leaf = path->nodes[0];
1007 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1017 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1453,6 +1463,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1453 int want; 1463 int want;
1454 int ret; 1464 int ret;
1455 int err = 0; 1465 int err = 0;
1466 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1467 SKINNY_METADATA);
1456 1468
1457 key.objectid = bytenr; 1469 key.objectid = bytenr;
1458 key.type = BTRFS_EXTENT_ITEM_KEY; 1470 key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -1464,11 +1476,46 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1464 path->keep_locks = 1; 1476 path->keep_locks = 1;
1465 } else 1477 } else
1466 extra_size = -1; 1478 extra_size = -1;
1479
1480 /*
1481 * Owner is our parent level, so we can just add one to get the level
1482 * for the block we are interested in.
1483 */
1484 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1485 key.type = BTRFS_METADATA_ITEM_KEY;
1486 key.offset = owner;
1487 }
1488
1489again:
1467 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1490 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1468 if (ret < 0) { 1491 if (ret < 0) {
1469 err = ret; 1492 err = ret;
1470 goto out; 1493 goto out;
1471 } 1494 }
1495
1496 /*
1497 * We may be a newly converted file system which still has the old fat
1498 * extent entries for metadata, so try and see if we have one of those.
1499 */
1500 if (ret > 0 && skinny_metadata) {
1501 skinny_metadata = false;
1502 if (path->slots[0]) {
1503 path->slots[0]--;
1504 btrfs_item_key_to_cpu(path->nodes[0], &key,
1505 path->slots[0]);
1506 if (key.objectid == bytenr &&
1507 key.type == BTRFS_EXTENT_ITEM_KEY &&
1508 key.offset == num_bytes)
1509 ret = 0;
1510 }
1511 if (ret) {
1512 key.type = BTRFS_EXTENT_ITEM_KEY;
1513 key.offset = num_bytes;
1514 btrfs_release_path(path);
1515 goto again;
1516 }
1517 }
1518
1472 if (ret && !insert) { 1519 if (ret && !insert) {
1473 err = -ENOENT; 1520 err = -ENOENT;
1474 goto out; 1521 goto out;
@@ -1504,11 +1551,9 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1504 ptr = (unsigned long)(ei + 1); 1551 ptr = (unsigned long)(ei + 1);
1505 end = (unsigned long)ei + item_size; 1552 end = (unsigned long)ei + item_size;
1506 1553
1507 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1554 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1508 ptr += sizeof(struct btrfs_tree_block_info); 1555 ptr += sizeof(struct btrfs_tree_block_info);
1509 BUG_ON(ptr > end); 1556 BUG_ON(ptr > end);
1510 } else {
1511 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1512 } 1557 }
1513 1558
1514 err = -ENOENT; 1559 err = -ENOENT;
@@ -1590,8 +1635,7 @@ out:
1590 * helper to add new inline back ref 1635 * helper to add new inline back ref
1591 */ 1636 */
1592static noinline_for_stack 1637static noinline_for_stack
1593void setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1638void setup_inline_extent_backref(struct btrfs_root *root,
1594 struct btrfs_root *root,
1595 struct btrfs_path *path, 1639 struct btrfs_path *path,
1596 struct btrfs_extent_inline_ref *iref, 1640 struct btrfs_extent_inline_ref *iref,
1597 u64 parent, u64 root_objectid, 1641 u64 parent, u64 root_objectid,
@@ -1614,7 +1658,7 @@ void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1614 type = extent_ref_type(parent, owner); 1658 type = extent_ref_type(parent, owner);
1615 size = btrfs_extent_inline_ref_size(type); 1659 size = btrfs_extent_inline_ref_size(type);
1616 1660
1617 btrfs_extend_item(trans, root, path, size); 1661 btrfs_extend_item(root, path, size);
1618 1662
1619 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1663 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1620 refs = btrfs_extent_refs(leaf, ei); 1664 refs = btrfs_extent_refs(leaf, ei);
@@ -1683,8 +1727,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1683 * helper to update/remove inline back ref 1727 * helper to update/remove inline back ref
1684 */ 1728 */
1685static noinline_for_stack 1729static noinline_for_stack
1686void update_inline_extent_backref(struct btrfs_trans_handle *trans, 1730void update_inline_extent_backref(struct btrfs_root *root,
1687 struct btrfs_root *root,
1688 struct btrfs_path *path, 1731 struct btrfs_path *path,
1689 struct btrfs_extent_inline_ref *iref, 1732 struct btrfs_extent_inline_ref *iref,
1690 int refs_to_mod, 1733 int refs_to_mod,
@@ -1740,7 +1783,7 @@ void update_inline_extent_backref(struct btrfs_trans_handle *trans,
1740 memmove_extent_buffer(leaf, ptr, ptr + size, 1783 memmove_extent_buffer(leaf, ptr, ptr + size,
1741 end - ptr - size); 1784 end - ptr - size);
1742 item_size -= size; 1785 item_size -= size;
1743 btrfs_truncate_item(trans, root, path, item_size, 1); 1786 btrfs_truncate_item(root, path, item_size, 1);
1744 } 1787 }
1745 btrfs_mark_buffer_dirty(leaf); 1788 btrfs_mark_buffer_dirty(leaf);
1746} 1789}
@@ -1762,10 +1805,10 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1762 root_objectid, owner, offset, 1); 1805 root_objectid, owner, offset, 1);
1763 if (ret == 0) { 1806 if (ret == 0) {
1764 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1807 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1765 update_inline_extent_backref(trans, root, path, iref, 1808 update_inline_extent_backref(root, path, iref,
1766 refs_to_add, extent_op); 1809 refs_to_add, extent_op);
1767 } else if (ret == -ENOENT) { 1810 } else if (ret == -ENOENT) {
1768 setup_inline_extent_backref(trans, root, path, iref, parent, 1811 setup_inline_extent_backref(root, path, iref, parent,
1769 root_objectid, owner, offset, 1812 root_objectid, owner, offset,
1770 refs_to_add, extent_op); 1813 refs_to_add, extent_op);
1771 ret = 0; 1814 ret = 0;
@@ -1802,7 +1845,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1802 1845
1803 BUG_ON(!is_data && refs_to_drop != 1); 1846 BUG_ON(!is_data && refs_to_drop != 1);
1804 if (iref) { 1847 if (iref) {
1805 update_inline_extent_backref(trans, root, path, iref, 1848 update_inline_extent_backref(root, path, iref,
1806 -refs_to_drop, NULL); 1849 -refs_to_drop, NULL);
1807 } else if (is_data) { 1850 } else if (is_data) {
1808 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1851 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
@@ -1973,10 +2016,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1973 ref_root = ref->root; 2016 ref_root = ref->root;
1974 2017
1975 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2018 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1976 if (extent_op) { 2019 if (extent_op)
1977 BUG_ON(extent_op->update_key);
1978 flags |= extent_op->flags_to_set; 2020 flags |= extent_op->flags_to_set;
1979 }
1980 ret = alloc_reserved_file_extent(trans, root, 2021 ret = alloc_reserved_file_extent(trans, root,
1981 parent, ref_root, flags, 2022 parent, ref_root, flags,
1982 ref->objectid, ref->offset, 2023 ref->objectid, ref->offset,
@@ -2029,18 +2070,33 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2029 u32 item_size; 2070 u32 item_size;
2030 int ret; 2071 int ret;
2031 int err = 0; 2072 int err = 0;
2073 int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2074 node->type == BTRFS_SHARED_BLOCK_REF_KEY);
2032 2075
2033 if (trans->aborted) 2076 if (trans->aborted)
2034 return 0; 2077 return 0;
2035 2078
2079 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2080 metadata = 0;
2081
2036 path = btrfs_alloc_path(); 2082 path = btrfs_alloc_path();
2037 if (!path) 2083 if (!path)
2038 return -ENOMEM; 2084 return -ENOMEM;
2039 2085
2040 key.objectid = node->bytenr; 2086 key.objectid = node->bytenr;
2041 key.type = BTRFS_EXTENT_ITEM_KEY;
2042 key.offset = node->num_bytes;
2043 2087
2088 if (metadata) {
2089 struct btrfs_delayed_tree_ref *tree_ref;
2090
2091 tree_ref = btrfs_delayed_node_to_tree_ref(node);
2092 key.type = BTRFS_METADATA_ITEM_KEY;
2093 key.offset = tree_ref->level;
2094 } else {
2095 key.type = BTRFS_EXTENT_ITEM_KEY;
2096 key.offset = node->num_bytes;
2097 }
2098
2099again:
2044 path->reada = 1; 2100 path->reada = 1;
2045 path->leave_spinning = 1; 2101 path->leave_spinning = 1;
2046 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 2102 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
@@ -2050,6 +2106,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2050 goto out; 2106 goto out;
2051 } 2107 }
2052 if (ret > 0) { 2108 if (ret > 0) {
2109 if (metadata) {
2110 btrfs_release_path(path);
2111 metadata = 0;
2112
2113 key.offset = node->num_bytes;
2114 key.type = BTRFS_EXTENT_ITEM_KEY;
2115 goto again;
2116 }
2053 err = -EIO; 2117 err = -EIO;
2054 goto out; 2118 goto out;
2055 } 2119 }
@@ -2089,10 +2153,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2089 struct btrfs_key ins; 2153 struct btrfs_key ins;
2090 u64 parent = 0; 2154 u64 parent = 0;
2091 u64 ref_root = 0; 2155 u64 ref_root = 0;
2092 2156 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2093 ins.objectid = node->bytenr; 2157 SKINNY_METADATA);
2094 ins.offset = node->num_bytes;
2095 ins.type = BTRFS_EXTENT_ITEM_KEY;
2096 2158
2097 ref = btrfs_delayed_node_to_tree_ref(node); 2159 ref = btrfs_delayed_node_to_tree_ref(node);
2098 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2160 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
@@ -2100,10 +2162,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2100 else 2162 else
2101 ref_root = ref->root; 2163 ref_root = ref->root;
2102 2164
2165 ins.objectid = node->bytenr;
2166 if (skinny_metadata) {
2167 ins.offset = ref->level;
2168 ins.type = BTRFS_METADATA_ITEM_KEY;
2169 } else {
2170 ins.offset = node->num_bytes;
2171 ins.type = BTRFS_EXTENT_ITEM_KEY;
2172 }
2173
2103 BUG_ON(node->ref_mod != 1); 2174 BUG_ON(node->ref_mod != 1);
2104 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2175 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2105 BUG_ON(!extent_op || !extent_op->update_flags || 2176 BUG_ON(!extent_op || !extent_op->update_flags);
2106 !extent_op->update_key);
2107 ret = alloc_reserved_tree_block(trans, root, 2177 ret = alloc_reserved_tree_block(trans, root,
2108 parent, ref_root, 2178 parent, ref_root,
2109 extent_op->flags_to_set, 2179 extent_op->flags_to_set,
@@ -2307,9 +2377,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2307 btrfs_free_delayed_extent_op(extent_op); 2377 btrfs_free_delayed_extent_op(extent_op);
2308 2378
2309 if (ret) { 2379 if (ret) {
2310 printk(KERN_DEBUG 2380 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2311 "btrfs: run_delayed_extent_op "
2312 "returned %d\n", ret);
2313 spin_lock(&delayed_refs->lock); 2381 spin_lock(&delayed_refs->lock);
2314 btrfs_delayed_ref_unlock(locked_ref); 2382 btrfs_delayed_ref_unlock(locked_ref);
2315 return ret; 2383 return ret;
@@ -2348,8 +2416,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2348 if (ret) { 2416 if (ret) {
2349 btrfs_delayed_ref_unlock(locked_ref); 2417 btrfs_delayed_ref_unlock(locked_ref);
2350 btrfs_put_delayed_ref(ref); 2418 btrfs_put_delayed_ref(ref);
2351 printk(KERN_DEBUG 2419 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2352 "btrfs: run_one_delayed_ref returned %d\n", ret);
2353 spin_lock(&delayed_refs->lock); 2420 spin_lock(&delayed_refs->lock);
2354 return ret; 2421 return ret;
2355 } 2422 }
@@ -2426,9 +2493,11 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2426 if (list_empty(&trans->qgroup_ref_list) != 2493 if (list_empty(&trans->qgroup_ref_list) !=
2427 !trans->delayed_ref_elem.seq) { 2494 !trans->delayed_ref_elem.seq) {
2428 /* list without seq or seq without list */ 2495 /* list without seq or seq without list */
2429 printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n", 2496 btrfs_err(fs_info,
2497 "qgroup accounting update error, list is%s empty, seq is %#x.%x",
2430 list_empty(&trans->qgroup_ref_list) ? "" : " not", 2498 list_empty(&trans->qgroup_ref_list) ? "" : " not",
2431 trans->delayed_ref_elem.seq); 2499 (u32)(trans->delayed_ref_elem.seq >> 32),
2500 (u32)trans->delayed_ref_elem.seq);
2432 BUG(); 2501 BUG();
2433 } 2502 }
2434 2503
@@ -3337,7 +3406,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3337 * progress (either running or paused) picks the target profile (if it's 3406 * progress (either running or paused) picks the target profile (if it's
3338 * already available), otherwise falls back to plain reducing. 3407 * already available), otherwise falls back to plain reducing.
3339 */ 3408 */
3340u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3409static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3341{ 3410{
3342 /* 3411 /*
3343 * we add in the count of missing devices because we want 3412 * we add in the count of missing devices because we want
@@ -3557,6 +3626,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3557 rcu_read_unlock(); 3626 rcu_read_unlock();
3558} 3627}
3559 3628
3629static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
3630{
3631 return (global->size << 1);
3632}
3633
3560static int should_alloc_chunk(struct btrfs_root *root, 3634static int should_alloc_chunk(struct btrfs_root *root,
3561 struct btrfs_space_info *sinfo, int force) 3635 struct btrfs_space_info *sinfo, int force)
3562{ 3636{
@@ -3574,7 +3648,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
3574 * global_rsv, it doesn't change except when the transaction commits. 3648 * global_rsv, it doesn't change except when the transaction commits.
3575 */ 3649 */
3576 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA) 3650 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
3577 num_allocated += global_rsv->size; 3651 num_allocated += calc_global_rsv_need_space(global_rsv);
3578 3652
3579 /* 3653 /*
3580 * in limited mode, we want to have some free space up to 3654 * in limited mode, we want to have some free space up to
@@ -3627,8 +3701,8 @@ static void check_system_chunk(struct btrfs_trans_handle *trans,
3627 3701
3628 thresh = get_system_chunk_thresh(root, type); 3702 thresh = get_system_chunk_thresh(root, type);
3629 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) { 3703 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
3630 printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n", 3704 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
3631 left, thresh, type); 3705 left, thresh, type);
3632 dump_space_info(info, 0, 0); 3706 dump_space_info(info, 0, 0);
3633 } 3707 }
3634 3708
@@ -3746,7 +3820,7 @@ static int can_overcommit(struct btrfs_root *root,
3746{ 3820{
3747 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 3821 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3748 u64 profile = btrfs_get_alloc_profile(root, 0); 3822 u64 profile = btrfs_get_alloc_profile(root, 0);
3749 u64 rsv_size = 0; 3823 u64 space_size;
3750 u64 avail; 3824 u64 avail;
3751 u64 used; 3825 u64 used;
3752 u64 to_add; 3826 u64 to_add;
@@ -3754,18 +3828,16 @@ static int can_overcommit(struct btrfs_root *root,
3754 used = space_info->bytes_used + space_info->bytes_reserved + 3828 used = space_info->bytes_used + space_info->bytes_reserved +
3755 space_info->bytes_pinned + space_info->bytes_readonly; 3829 space_info->bytes_pinned + space_info->bytes_readonly;
3756 3830
3757 spin_lock(&global_rsv->lock);
3758 rsv_size = global_rsv->size;
3759 spin_unlock(&global_rsv->lock);
3760
3761 /* 3831 /*
3762 * We only want to allow over committing if we have lots of actual space 3832 * We only want to allow over committing if we have lots of actual space
3763 * free, but if we don't have enough space to handle the global reserve 3833 * free, but if we don't have enough space to handle the global reserve
3764 * space then we could end up having a real enospc problem when trying 3834 * space then we could end up having a real enospc problem when trying
3765 * to allocate a chunk or some other such important allocation. 3835 * to allocate a chunk or some other such important allocation.
3766 */ 3836 */
3767 rsv_size <<= 1; 3837 spin_lock(&global_rsv->lock);
3768 if (used + rsv_size >= space_info->total_bytes) 3838 space_size = calc_global_rsv_need_space(global_rsv);
3839 spin_unlock(&global_rsv->lock);
3840 if (used + space_size >= space_info->total_bytes)
3769 return 0; 3841 return 0;
3770 3842
3771 used += space_info->bytes_may_use; 3843 used += space_info->bytes_may_use;
@@ -3808,8 +3880,8 @@ static int can_overcommit(struct btrfs_root *root,
3808 return 0; 3880 return 0;
3809} 3881}
3810 3882
3811void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, 3883static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
3812 unsigned long nr_pages) 3884 unsigned long nr_pages)
3813{ 3885{
3814 struct super_block *sb = root->fs_info->sb; 3886 struct super_block *sb = root->fs_info->sb;
3815 int started; 3887 int started;
@@ -3826,7 +3898,8 @@ void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
3826 * the disk). 3898 * the disk).
3827 */ 3899 */
3828 btrfs_start_delalloc_inodes(root, 0); 3900 btrfs_start_delalloc_inodes(root, 0);
3829 btrfs_wait_ordered_extents(root, 0); 3901 if (!current->journal_info)
3902 btrfs_wait_ordered_extents(root, 0);
3830 } 3903 }
3831} 3904}
3832 3905
@@ -5090,9 +5163,11 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
5090 u64 bytenr, u64 num_bytes) 5163 u64 bytenr, u64 num_bytes)
5091{ 5164{
5092 struct btrfs_block_group_cache *cache; 5165 struct btrfs_block_group_cache *cache;
5166 int ret;
5093 5167
5094 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 5168 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
5095 BUG_ON(!cache); /* Logic error */ 5169 if (!cache)
5170 return -EINVAL;
5096 5171
5097 /* 5172 /*
5098 * pull in the free space cache (if any) so that our pin 5173 * pull in the free space cache (if any) so that our pin
@@ -5105,9 +5180,9 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
5105 pin_down_extent(root, cache, bytenr, num_bytes, 0); 5180 pin_down_extent(root, cache, bytenr, num_bytes, 0);
5106 5181
5107 /* remove us from the free space cache (if we're there at all) */ 5182 /* remove us from the free space cache (if we're there at all) */
5108 btrfs_remove_free_space(cache, bytenr, num_bytes); 5183 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
5109 btrfs_put_block_group(cache); 5184 btrfs_put_block_group(cache);
5110 return 0; 5185 return ret;
5111} 5186}
5112 5187
5113/** 5188/**
@@ -5312,6 +5387,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5312 int num_to_del = 1; 5387 int num_to_del = 1;
5313 u32 item_size; 5388 u32 item_size;
5314 u64 refs; 5389 u64 refs;
5390 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
5391 SKINNY_METADATA);
5315 5392
5316 path = btrfs_alloc_path(); 5393 path = btrfs_alloc_path();
5317 if (!path) 5394 if (!path)
@@ -5323,6 +5400,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5323 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 5400 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
5324 BUG_ON(!is_data && refs_to_drop != 1); 5401 BUG_ON(!is_data && refs_to_drop != 1);
5325 5402
5403 if (is_data)
5404 skinny_metadata = 0;
5405
5326 ret = lookup_extent_backref(trans, extent_root, path, &iref, 5406 ret = lookup_extent_backref(trans, extent_root, path, &iref,
5327 bytenr, num_bytes, parent, 5407 bytenr, num_bytes, parent,
5328 root_objectid, owner_objectid, 5408 root_objectid, owner_objectid,
@@ -5339,6 +5419,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5339 found_extent = 1; 5419 found_extent = 1;
5340 break; 5420 break;
5341 } 5421 }
5422 if (key.type == BTRFS_METADATA_ITEM_KEY &&
5423 key.offset == owner_objectid) {
5424 found_extent = 1;
5425 break;
5426 }
5342 if (path->slots[0] - extent_slot > 5) 5427 if (path->slots[0] - extent_slot > 5)
5343 break; 5428 break;
5344 extent_slot--; 5429 extent_slot--;
@@ -5364,12 +5449,39 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5364 key.type = BTRFS_EXTENT_ITEM_KEY; 5449 key.type = BTRFS_EXTENT_ITEM_KEY;
5365 key.offset = num_bytes; 5450 key.offset = num_bytes;
5366 5451
5452 if (!is_data && skinny_metadata) {
5453 key.type = BTRFS_METADATA_ITEM_KEY;
5454 key.offset = owner_objectid;
5455 }
5456
5367 ret = btrfs_search_slot(trans, extent_root, 5457 ret = btrfs_search_slot(trans, extent_root,
5368 &key, path, -1, 1); 5458 &key, path, -1, 1);
5459 if (ret > 0 && skinny_metadata && path->slots[0]) {
5460 /*
5461 * Couldn't find our skinny metadata item,
5462 * see if we have ye olde extent item.
5463 */
5464 path->slots[0]--;
5465 btrfs_item_key_to_cpu(path->nodes[0], &key,
5466 path->slots[0]);
5467 if (key.objectid == bytenr &&
5468 key.type == BTRFS_EXTENT_ITEM_KEY &&
5469 key.offset == num_bytes)
5470 ret = 0;
5471 }
5472
5473 if (ret > 0 && skinny_metadata) {
5474 skinny_metadata = false;
5475 key.type = BTRFS_EXTENT_ITEM_KEY;
5476 key.offset = num_bytes;
5477 btrfs_release_path(path);
5478 ret = btrfs_search_slot(trans, extent_root,
5479 &key, path, -1, 1);
5480 }
5481
5369 if (ret) { 5482 if (ret) {
5370 printk(KERN_ERR "umm, got %d back from search" 5483 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5371 ", was looking for %llu\n", ret, 5484 ret, (unsigned long long)bytenr);
5372 (unsigned long long)bytenr);
5373 if (ret > 0) 5485 if (ret > 0)
5374 btrfs_print_leaf(extent_root, 5486 btrfs_print_leaf(extent_root,
5375 path->nodes[0]); 5487 path->nodes[0]);
@@ -5383,13 +5495,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5383 } else if (ret == -ENOENT) { 5495 } else if (ret == -ENOENT) {
5384 btrfs_print_leaf(extent_root, path->nodes[0]); 5496 btrfs_print_leaf(extent_root, path->nodes[0]);
5385 WARN_ON(1); 5497 WARN_ON(1);
5386 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 5498 btrfs_err(info,
5387 "parent %llu root %llu owner %llu offset %llu\n", 5499 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
5388 (unsigned long long)bytenr, 5500 (unsigned long long)bytenr,
5389 (unsigned long long)parent, 5501 (unsigned long long)parent,
5390 (unsigned long long)root_objectid, 5502 (unsigned long long)root_objectid,
5391 (unsigned long long)owner_objectid, 5503 (unsigned long long)owner_objectid,
5392 (unsigned long long)owner_offset); 5504 (unsigned long long)owner_offset);
5393 } else { 5505 } else {
5394 btrfs_abort_transaction(trans, extent_root, ret); 5506 btrfs_abort_transaction(trans, extent_root, ret);
5395 goto out; 5507 goto out;
@@ -5417,9 +5529,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5417 ret = btrfs_search_slot(trans, extent_root, &key, path, 5529 ret = btrfs_search_slot(trans, extent_root, &key, path,
5418 -1, 1); 5530 -1, 1);
5419 if (ret) { 5531 if (ret) {
5420 printk(KERN_ERR "umm, got %d back from search" 5532 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5421 ", was looking for %llu\n", ret, 5533 ret, (unsigned long long)bytenr);
5422 (unsigned long long)bytenr);
5423 btrfs_print_leaf(extent_root, path->nodes[0]); 5534 btrfs_print_leaf(extent_root, path->nodes[0]);
5424 } 5535 }
5425 if (ret < 0) { 5536 if (ret < 0) {
@@ -5435,7 +5546,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5435 BUG_ON(item_size < sizeof(*ei)); 5546 BUG_ON(item_size < sizeof(*ei));
5436 ei = btrfs_item_ptr(leaf, extent_slot, 5547 ei = btrfs_item_ptr(leaf, extent_slot,
5437 struct btrfs_extent_item); 5548 struct btrfs_extent_item);
5438 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 5549 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
5550 key.type == BTRFS_EXTENT_ITEM_KEY) {
5439 struct btrfs_tree_block_info *bi; 5551 struct btrfs_tree_block_info *bi;
5440 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 5552 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
5441 bi = (struct btrfs_tree_block_info *)(ei + 1); 5553 bi = (struct btrfs_tree_block_info *)(ei + 1);
@@ -5443,7 +5555,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5443 } 5555 }
5444 5556
5445 refs = btrfs_extent_refs(leaf, ei); 5557 refs = btrfs_extent_refs(leaf, ei);
5446 BUG_ON(refs < refs_to_drop); 5558 if (refs < refs_to_drop) {
5559 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
5560 "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
5561 ret = -EINVAL;
5562 btrfs_abort_transaction(trans, extent_root, ret);
5563 goto out;
5564 }
5447 refs -= refs_to_drop; 5565 refs -= refs_to_drop;
5448 5566
5449 if (refs > 0) { 5567 if (refs > 0) {
@@ -5758,7 +5876,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5758 struct btrfs_root *orig_root, 5876 struct btrfs_root *orig_root,
5759 u64 num_bytes, u64 empty_size, 5877 u64 num_bytes, u64 empty_size,
5760 u64 hint_byte, struct btrfs_key *ins, 5878 u64 hint_byte, struct btrfs_key *ins,
5761 u64 data) 5879 u64 flags)
5762{ 5880{
5763 int ret = 0; 5881 int ret = 0;
5764 struct btrfs_root *root = orig_root->fs_info->extent_root; 5882 struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -5769,8 +5887,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5769 int empty_cluster = 2 * 1024 * 1024; 5887 int empty_cluster = 2 * 1024 * 1024;
5770 struct btrfs_space_info *space_info; 5888 struct btrfs_space_info *space_info;
5771 int loop = 0; 5889 int loop = 0;
5772 int index = __get_raid_index(data); 5890 int index = __get_raid_index(flags);
5773 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? 5891 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
5774 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 5892 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
5775 bool found_uncached_bg = false; 5893 bool found_uncached_bg = false;
5776 bool failed_cluster_refill = false; 5894 bool failed_cluster_refill = false;
@@ -5783,11 +5901,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5783 ins->objectid = 0; 5901 ins->objectid = 0;
5784 ins->offset = 0; 5902 ins->offset = 0;
5785 5903
5786 trace_find_free_extent(orig_root, num_bytes, empty_size, data); 5904 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
5787 5905
5788 space_info = __find_space_info(root->fs_info, data); 5906 space_info = __find_space_info(root->fs_info, flags);
5789 if (!space_info) { 5907 if (!space_info) {
5790 printk(KERN_ERR "No space info for %llu\n", data); 5908 btrfs_err(root->fs_info, "No space info for %llu", flags);
5791 return -ENOSPC; 5909 return -ENOSPC;
5792 } 5910 }
5793 5911
@@ -5798,13 +5916,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5798 if (btrfs_mixed_space_info(space_info)) 5916 if (btrfs_mixed_space_info(space_info))
5799 use_cluster = false; 5917 use_cluster = false;
5800 5918
5801 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { 5919 if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
5802 last_ptr = &root->fs_info->meta_alloc_cluster; 5920 last_ptr = &root->fs_info->meta_alloc_cluster;
5803 if (!btrfs_test_opt(root, SSD)) 5921 if (!btrfs_test_opt(root, SSD))
5804 empty_cluster = 64 * 1024; 5922 empty_cluster = 64 * 1024;
5805 } 5923 }
5806 5924
5807 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && 5925 if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
5808 btrfs_test_opt(root, SSD)) { 5926 btrfs_test_opt(root, SSD)) {
5809 last_ptr = &root->fs_info->data_alloc_cluster; 5927 last_ptr = &root->fs_info->data_alloc_cluster;
5810 } 5928 }
@@ -5833,7 +5951,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5833 * However if we are re-searching with an ideal block group 5951 * However if we are re-searching with an ideal block group
5834 * picked out then we don't care that the block group is cached. 5952 * picked out then we don't care that the block group is cached.
5835 */ 5953 */
5836 if (block_group && block_group_bits(block_group, data) && 5954 if (block_group && block_group_bits(block_group, flags) &&
5837 block_group->cached != BTRFS_CACHE_NO) { 5955 block_group->cached != BTRFS_CACHE_NO) {
5838 down_read(&space_info->groups_sem); 5956 down_read(&space_info->groups_sem);
5839 if (list_empty(&block_group->list) || 5957 if (list_empty(&block_group->list) ||
@@ -5871,7 +5989,7 @@ search:
5871 * raid types, but we want to make sure we only allocate 5989 * raid types, but we want to make sure we only allocate
5872 * for the proper type. 5990 * for the proper type.
5873 */ 5991 */
5874 if (!block_group_bits(block_group, data)) { 5992 if (!block_group_bits(block_group, flags)) {
5875 u64 extra = BTRFS_BLOCK_GROUP_DUP | 5993 u64 extra = BTRFS_BLOCK_GROUP_DUP |
5876 BTRFS_BLOCK_GROUP_RAID1 | 5994 BTRFS_BLOCK_GROUP_RAID1 |
5877 BTRFS_BLOCK_GROUP_RAID5 | 5995 BTRFS_BLOCK_GROUP_RAID5 |
@@ -5883,7 +6001,7 @@ search:
5883 * doesn't provide them, bail. This does allow us to 6001 * doesn't provide them, bail. This does allow us to
5884 * fill raid0 from raid1. 6002 * fill raid0 from raid1.
5885 */ 6003 */
5886 if ((data & extra) && !(block_group->flags & extra)) 6004 if ((flags & extra) && !(block_group->flags & extra))
5887 goto loop; 6005 goto loop;
5888 } 6006 }
5889 6007
@@ -5914,7 +6032,7 @@ have_block_group:
5914 if (used_block_group != block_group && 6032 if (used_block_group != block_group &&
5915 (!used_block_group || 6033 (!used_block_group ||
5916 used_block_group->ro || 6034 used_block_group->ro ||
5917 !block_group_bits(used_block_group, data))) { 6035 !block_group_bits(used_block_group, flags))) {
5918 used_block_group = block_group; 6036 used_block_group = block_group;
5919 goto refill_cluster; 6037 goto refill_cluster;
5920 } 6038 }
@@ -6110,7 +6228,7 @@ loop:
6110 index = 0; 6228 index = 0;
6111 loop++; 6229 loop++;
6112 if (loop == LOOP_ALLOC_CHUNK) { 6230 if (loop == LOOP_ALLOC_CHUNK) {
6113 ret = do_chunk_alloc(trans, root, data, 6231 ret = do_chunk_alloc(trans, root, flags,
6114 CHUNK_ALLOC_FORCE); 6232 CHUNK_ALLOC_FORCE);
6115 /* 6233 /*
6116 * Do not bail out on ENOSPC since we 6234 * Do not bail out on ENOSPC since we
@@ -6188,16 +6306,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
6188 struct btrfs_root *root, 6306 struct btrfs_root *root,
6189 u64 num_bytes, u64 min_alloc_size, 6307 u64 num_bytes, u64 min_alloc_size,
6190 u64 empty_size, u64 hint_byte, 6308 u64 empty_size, u64 hint_byte,
6191 struct btrfs_key *ins, u64 data) 6309 struct btrfs_key *ins, int is_data)
6192{ 6310{
6193 bool final_tried = false; 6311 bool final_tried = false;
6312 u64 flags;
6194 int ret; 6313 int ret;
6195 6314
6196 data = btrfs_get_alloc_profile(root, data); 6315 flags = btrfs_get_alloc_profile(root, is_data);
6197again: 6316again:
6198 WARN_ON(num_bytes < root->sectorsize); 6317 WARN_ON(num_bytes < root->sectorsize);
6199 ret = find_free_extent(trans, root, num_bytes, empty_size, 6318 ret = find_free_extent(trans, root, num_bytes, empty_size,
6200 hint_byte, ins, data); 6319 hint_byte, ins, flags);
6201 6320
6202 if (ret == -ENOSPC) { 6321 if (ret == -ENOSPC) {
6203 if (!final_tried) { 6322 if (!final_tried) {
@@ -6210,10 +6329,10 @@ again:
6210 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) { 6329 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
6211 struct btrfs_space_info *sinfo; 6330 struct btrfs_space_info *sinfo;
6212 6331
6213 sinfo = __find_space_info(root->fs_info, data); 6332 sinfo = __find_space_info(root->fs_info, flags);
6214 printk(KERN_ERR "btrfs allocation failed flags %llu, " 6333 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
6215 "wanted %llu\n", (unsigned long long)data, 6334 (unsigned long long)flags,
6216 (unsigned long long)num_bytes); 6335 (unsigned long long)num_bytes);
6217 if (sinfo) 6336 if (sinfo)
6218 dump_space_info(sinfo, num_bytes, 1); 6337 dump_space_info(sinfo, num_bytes, 1);
6219 } 6338 }
@@ -6232,8 +6351,8 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6232 6351
6233 cache = btrfs_lookup_block_group(root->fs_info, start); 6352 cache = btrfs_lookup_block_group(root->fs_info, start);
6234 if (!cache) { 6353 if (!cache) {
6235 printk(KERN_ERR "Unable to find block group for %llu\n", 6354 btrfs_err(root->fs_info, "Unable to find block group for %llu",
6236 (unsigned long long)start); 6355 (unsigned long long)start);
6237 return -ENOSPC; 6356 return -ENOSPC;
6238 } 6357 }
6239 6358
@@ -6328,9 +6447,9 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6328 6447
6329 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6448 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6330 if (ret) { /* -ENOENT, logic error */ 6449 if (ret) { /* -ENOENT, logic error */
6331 printk(KERN_ERR "btrfs update block group failed for %llu " 6450 btrfs_err(fs_info, "update block group failed for %llu %llu",
6332 "%llu\n", (unsigned long long)ins->objectid, 6451 (unsigned long long)ins->objectid,
6333 (unsigned long long)ins->offset); 6452 (unsigned long long)ins->offset);
6334 BUG(); 6453 BUG();
6335 } 6454 }
6336 return ret; 6455 return ret;
@@ -6349,7 +6468,12 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6349 struct btrfs_extent_inline_ref *iref; 6468 struct btrfs_extent_inline_ref *iref;
6350 struct btrfs_path *path; 6469 struct btrfs_path *path;
6351 struct extent_buffer *leaf; 6470 struct extent_buffer *leaf;
6352 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 6471 u32 size = sizeof(*extent_item) + sizeof(*iref);
6472 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6473 SKINNY_METADATA);
6474
6475 if (!skinny_metadata)
6476 size += sizeof(*block_info);
6353 6477
6354 path = btrfs_alloc_path(); 6478 path = btrfs_alloc_path();
6355 if (!path) 6479 if (!path)
@@ -6370,12 +6494,16 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6370 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 6494 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
6371 btrfs_set_extent_flags(leaf, extent_item, 6495 btrfs_set_extent_flags(leaf, extent_item,
6372 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 6496 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6373 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
6374 6497
6375 btrfs_set_tree_block_key(leaf, block_info, key); 6498 if (skinny_metadata) {
6376 btrfs_set_tree_block_level(leaf, block_info, level); 6499 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
6500 } else {
6501 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
6502 btrfs_set_tree_block_key(leaf, block_info, key);
6503 btrfs_set_tree_block_level(leaf, block_info, level);
6504 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
6505 }
6377 6506
6378 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
6379 if (parent > 0) { 6507 if (parent > 0) {
6380 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 6508 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
6381 btrfs_set_extent_inline_ref_type(leaf, iref, 6509 btrfs_set_extent_inline_ref_type(leaf, iref,
@@ -6390,11 +6518,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6390 btrfs_mark_buffer_dirty(leaf); 6518 btrfs_mark_buffer_dirty(leaf);
6391 btrfs_free_path(path); 6519 btrfs_free_path(path);
6392 6520
6393 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6521 ret = update_block_group(root, ins->objectid, root->leafsize, 1);
6394 if (ret) { /* -ENOENT, logic error */ 6522 if (ret) { /* -ENOENT, logic error */
6395 printk(KERN_ERR "btrfs update block group failed for %llu " 6523 btrfs_err(fs_info, "update block group failed for %llu %llu",
6396 "%llu\n", (unsigned long long)ins->objectid, 6524 (unsigned long long)ins->objectid,
6397 (unsigned long long)ins->offset); 6525 (unsigned long long)ins->offset);
6398 BUG(); 6526 BUG();
6399 } 6527 }
6400 return ret; 6528 return ret;
@@ -6439,47 +6567,48 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
6439 if (!caching_ctl) { 6567 if (!caching_ctl) {
6440 BUG_ON(!block_group_cache_done(block_group)); 6568 BUG_ON(!block_group_cache_done(block_group));
6441 ret = btrfs_remove_free_space(block_group, start, num_bytes); 6569 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6442 BUG_ON(ret); /* -ENOMEM */ 6570 if (ret)
6571 goto out;
6443 } else { 6572 } else {
6444 mutex_lock(&caching_ctl->mutex); 6573 mutex_lock(&caching_ctl->mutex);
6445 6574
6446 if (start >= caching_ctl->progress) { 6575 if (start >= caching_ctl->progress) {
6447 ret = add_excluded_extent(root, start, num_bytes); 6576 ret = add_excluded_extent(root, start, num_bytes);
6448 BUG_ON(ret); /* -ENOMEM */
6449 } else if (start + num_bytes <= caching_ctl->progress) { 6577 } else if (start + num_bytes <= caching_ctl->progress) {
6450 ret = btrfs_remove_free_space(block_group, 6578 ret = btrfs_remove_free_space(block_group,
6451 start, num_bytes); 6579 start, num_bytes);
6452 BUG_ON(ret); /* -ENOMEM */
6453 } else { 6580 } else {
6454 num_bytes = caching_ctl->progress - start; 6581 num_bytes = caching_ctl->progress - start;
6455 ret = btrfs_remove_free_space(block_group, 6582 ret = btrfs_remove_free_space(block_group,
6456 start, num_bytes); 6583 start, num_bytes);
6457 BUG_ON(ret); /* -ENOMEM */ 6584 if (ret)
6585 goto out_lock;
6458 6586
6459 start = caching_ctl->progress; 6587 start = caching_ctl->progress;
6460 num_bytes = ins->objectid + ins->offset - 6588 num_bytes = ins->objectid + ins->offset -
6461 caching_ctl->progress; 6589 caching_ctl->progress;
6462 ret = add_excluded_extent(root, start, num_bytes); 6590 ret = add_excluded_extent(root, start, num_bytes);
6463 BUG_ON(ret); /* -ENOMEM */
6464 } 6591 }
6465 6592out_lock:
6466 mutex_unlock(&caching_ctl->mutex); 6593 mutex_unlock(&caching_ctl->mutex);
6467 put_caching_control(caching_ctl); 6594 put_caching_control(caching_ctl);
6595 if (ret)
6596 goto out;
6468 } 6597 }
6469 6598
6470 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 6599 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
6471 RESERVE_ALLOC_NO_ACCOUNT); 6600 RESERVE_ALLOC_NO_ACCOUNT);
6472 BUG_ON(ret); /* logic error */ 6601 BUG_ON(ret); /* logic error */
6473 btrfs_put_block_group(block_group);
6474 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 6602 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
6475 0, owner, offset, ins, 1); 6603 0, owner, offset, ins, 1);
6604out:
6605 btrfs_put_block_group(block_group);
6476 return ret; 6606 return ret;
6477} 6607}
6478 6608
6479struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 6609static struct extent_buffer *
6480 struct btrfs_root *root, 6610btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6481 u64 bytenr, u32 blocksize, 6611 u64 bytenr, u32 blocksize, int level)
6482 int level)
6483{ 6612{
6484 struct extent_buffer *buf; 6613 struct extent_buffer *buf;
6485 6614
@@ -6594,7 +6723,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6594 struct extent_buffer *buf; 6723 struct extent_buffer *buf;
6595 u64 flags = 0; 6724 u64 flags = 0;
6596 int ret; 6725 int ret;
6597 6726 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6727 SKINNY_METADATA);
6598 6728
6599 block_rsv = use_block_rsv(trans, root, blocksize); 6729 block_rsv = use_block_rsv(trans, root, blocksize);
6600 if (IS_ERR(block_rsv)) 6730 if (IS_ERR(block_rsv))
@@ -6627,7 +6757,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6627 else 6757 else
6628 memset(&extent_op->key, 0, sizeof(extent_op->key)); 6758 memset(&extent_op->key, 0, sizeof(extent_op->key));
6629 extent_op->flags_to_set = flags; 6759 extent_op->flags_to_set = flags;
6630 extent_op->update_key = 1; 6760 if (skinny_metadata)
6761 extent_op->update_key = 0;
6762 else
6763 extent_op->update_key = 1;
6631 extent_op->update_flags = 1; 6764 extent_op->update_flags = 1;
6632 extent_op->is_data = 0; 6765 extent_op->is_data = 0;
6633 6766
@@ -6704,8 +6837,9 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
6704 continue; 6837 continue;
6705 6838
6706 /* We don't lock the tree block, it's OK to be racy here */ 6839 /* We don't lock the tree block, it's OK to be racy here */
6707 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 6840 ret = btrfs_lookup_extent_info(trans, root, bytenr,
6708 &refs, &flags); 6841 wc->level - 1, 1, &refs,
6842 &flags);
6709 /* We don't care about errors in readahead. */ 6843 /* We don't care about errors in readahead. */
6710 if (ret < 0) 6844 if (ret < 0)
6711 continue; 6845 continue;
@@ -6772,7 +6906,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6772 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 6906 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
6773 BUG_ON(!path->locks[level]); 6907 BUG_ON(!path->locks[level]);
6774 ret = btrfs_lookup_extent_info(trans, root, 6908 ret = btrfs_lookup_extent_info(trans, root,
6775 eb->start, eb->len, 6909 eb->start, level, 1,
6776 &wc->refs[level], 6910 &wc->refs[level],
6777 &wc->flags[level]); 6911 &wc->flags[level]);
6778 BUG_ON(ret == -ENOMEM); 6912 BUG_ON(ret == -ENOMEM);
@@ -6870,7 +7004,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6870 btrfs_tree_lock(next); 7004 btrfs_tree_lock(next);
6871 btrfs_set_lock_blocking(next); 7005 btrfs_set_lock_blocking(next);
6872 7006
6873 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 7007 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
6874 &wc->refs[level - 1], 7008 &wc->refs[level - 1],
6875 &wc->flags[level - 1]); 7009 &wc->flags[level - 1]);
6876 if (ret < 0) { 7010 if (ret < 0) {
@@ -6878,7 +7012,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6878 return ret; 7012 return ret;
6879 } 7013 }
6880 7014
6881 BUG_ON(wc->refs[level - 1] == 0); 7015 if (unlikely(wc->refs[level - 1] == 0)) {
7016 btrfs_err(root->fs_info, "Missing references.");
7017 BUG();
7018 }
6882 *lookup_info = 0; 7019 *lookup_info = 0;
6883 7020
6884 if (wc->stage == DROP_REFERENCE) { 7021 if (wc->stage == DROP_REFERENCE) {
@@ -6917,8 +7054,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6917 if (reada && level == 1) 7054 if (reada && level == 1)
6918 reada_walk_down(trans, root, wc, path); 7055 reada_walk_down(trans, root, wc, path);
6919 next = read_tree_block(root, bytenr, blocksize, generation); 7056 next = read_tree_block(root, bytenr, blocksize, generation);
6920 if (!next) 7057 if (!next || !extent_buffer_uptodate(next)) {
7058 free_extent_buffer(next);
6921 return -EIO; 7059 return -EIO;
7060 }
6922 btrfs_tree_lock(next); 7061 btrfs_tree_lock(next);
6923 btrfs_set_lock_blocking(next); 7062 btrfs_set_lock_blocking(next);
6924 } 7063 }
@@ -7001,7 +7140,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
7001 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 7140 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
7002 7141
7003 ret = btrfs_lookup_extent_info(trans, root, 7142 ret = btrfs_lookup_extent_info(trans, root,
7004 eb->start, eb->len, 7143 eb->start, level, 1,
7005 &wc->refs[level], 7144 &wc->refs[level],
7006 &wc->flags[level]); 7145 &wc->flags[level]);
7007 if (ret < 0) { 7146 if (ret < 0) {
@@ -7137,6 +7276,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
7137 * reference count by one. if update_ref is true, this function 7276 * reference count by one. if update_ref is true, this function
7138 * also make sure backrefs for the shared block and all lower level 7277 * also make sure backrefs for the shared block and all lower level
7139 * blocks are properly updated. 7278 * blocks are properly updated.
7279 *
7280 * If called with for_reloc == 0, may exit early with -EAGAIN
7140 */ 7281 */
7141int btrfs_drop_snapshot(struct btrfs_root *root, 7282int btrfs_drop_snapshot(struct btrfs_root *root,
7142 struct btrfs_block_rsv *block_rsv, int update_ref, 7283 struct btrfs_block_rsv *block_rsv, int update_ref,
@@ -7211,8 +7352,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7211 7352
7212 ret = btrfs_lookup_extent_info(trans, root, 7353 ret = btrfs_lookup_extent_info(trans, root,
7213 path->nodes[level]->start, 7354 path->nodes[level]->start,
7214 path->nodes[level]->len, 7355 level, 1, &wc->refs[level],
7215 &wc->refs[level],
7216 &wc->flags[level]); 7356 &wc->flags[level]);
7217 if (ret < 0) { 7357 if (ret < 0) {
7218 err = ret; 7358 err = ret;
@@ -7238,6 +7378,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7238 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 7378 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
7239 7379
7240 while (1) { 7380 while (1) {
7381 if (!for_reloc && btrfs_fs_closing(root->fs_info)) {
7382 pr_debug("btrfs: drop snapshot early exit\n");
7383 err = -EAGAIN;
7384 goto out_end_trans;
7385 }
7386
7241 ret = walk_down_tree(trans, root, path, wc); 7387 ret = walk_down_tree(trans, root, path, wc);
7242 if (ret < 0) { 7388 if (ret < 0) {
7243 err = ret; 7389 err = ret;
@@ -8020,10 +8166,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8020 free_excluded_extents(root, cache); 8166 free_excluded_extents(root, cache);
8021 } 8167 }
8022 8168
8169 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8170 if (ret) {
8171 btrfs_remove_free_space_cache(cache);
8172 btrfs_put_block_group(cache);
8173 goto error;
8174 }
8175
8023 ret = update_space_info(info, cache->flags, found_key.offset, 8176 ret = update_space_info(info, cache->flags, found_key.offset,
8024 btrfs_block_group_used(&cache->item), 8177 btrfs_block_group_used(&cache->item),
8025 &space_info); 8178 &space_info);
8026 BUG_ON(ret); /* -ENOMEM */ 8179 if (ret) {
8180 btrfs_remove_free_space_cache(cache);
8181 spin_lock(&info->block_group_cache_lock);
8182 rb_erase(&cache->cache_node,
8183 &info->block_group_cache_tree);
8184 spin_unlock(&info->block_group_cache_lock);
8185 btrfs_put_block_group(cache);
8186 goto error;
8187 }
8188
8027 cache->space_info = space_info; 8189 cache->space_info = space_info;
8028 spin_lock(&cache->space_info->lock); 8190 spin_lock(&cache->space_info->lock);
8029 cache->space_info->bytes_readonly += cache->bytes_super; 8191 cache->space_info->bytes_readonly += cache->bytes_super;
@@ -8031,9 +8193,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8031 8193
8032 __link_block_group(space_info, cache); 8194 __link_block_group(space_info, cache);
8033 8195
8034 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8035 BUG_ON(ret); /* Logic error */
8036
8037 set_avail_alloc_bits(root->fs_info, cache->flags); 8196 set_avail_alloc_bits(root->fs_info, cache->flags);
8038 if (btrfs_chunk_readonly(root, cache->key.objectid)) 8197 if (btrfs_chunk_readonly(root, cache->key.objectid))
8039 set_block_group_ro(cache, 1); 8198 set_block_group_ro(cache, 1);
@@ -8156,9 +8315,24 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8156 8315
8157 free_excluded_extents(root, cache); 8316 free_excluded_extents(root, cache);
8158 8317
8318 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8319 if (ret) {
8320 btrfs_remove_free_space_cache(cache);
8321 btrfs_put_block_group(cache);
8322 return ret;
8323 }
8324
8159 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 8325 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
8160 &cache->space_info); 8326 &cache->space_info);
8161 BUG_ON(ret); /* -ENOMEM */ 8327 if (ret) {
8328 btrfs_remove_free_space_cache(cache);
8329 spin_lock(&root->fs_info->block_group_cache_lock);
8330 rb_erase(&cache->cache_node,
8331 &root->fs_info->block_group_cache_tree);
8332 spin_unlock(&root->fs_info->block_group_cache_lock);
8333 btrfs_put_block_group(cache);
8334 return ret;
8335 }
8162 update_global_block_rsv(root->fs_info); 8336 update_global_block_rsv(root->fs_info);
8163 8337
8164 spin_lock(&cache->space_info->lock); 8338 spin_lock(&cache->space_info->lock);
@@ -8167,9 +8341,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8167 8341
8168 __link_block_group(cache->space_info, cache); 8342 __link_block_group(cache->space_info, cache);
8169 8343
8170 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8171 BUG_ON(ret); /* Logic error */
8172
8173 list_add_tail(&cache->new_bg_list, &trans->new_bgs); 8344 list_add_tail(&cache->new_bg_list, &trans->new_bgs);
8174 8345
8175 set_avail_alloc_bits(extent_root->fs_info, type); 8346 set_avail_alloc_bits(extent_root->fs_info, type);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 73f2bfe3ac93..32d67a822e93 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -24,12 +24,62 @@
24static struct kmem_cache *extent_state_cache; 24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 25static struct kmem_cache *extent_buffer_cache;
26 26
27#ifdef CONFIG_BTRFS_DEBUG
27static LIST_HEAD(buffers); 28static LIST_HEAD(buffers);
28static LIST_HEAD(states); 29static LIST_HEAD(states);
29 30
30#define LEAK_DEBUG 0
31#if LEAK_DEBUG
32static DEFINE_SPINLOCK(leak_lock); 31static DEFINE_SPINLOCK(leak_lock);
32
33static inline
34void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
35{
36 unsigned long flags;
37
38 spin_lock_irqsave(&leak_lock, flags);
39 list_add(new, head);
40 spin_unlock_irqrestore(&leak_lock, flags);
41}
42
43static inline
44void btrfs_leak_debug_del(struct list_head *entry)
45{
46 unsigned long flags;
47
48 spin_lock_irqsave(&leak_lock, flags);
49 list_del(entry);
50 spin_unlock_irqrestore(&leak_lock, flags);
51}
52
53static inline
54void btrfs_leak_debug_check(void)
55{
56 struct extent_state *state;
57 struct extent_buffer *eb;
58
59 while (!list_empty(&states)) {
60 state = list_entry(states.next, struct extent_state, leak_list);
61 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
62 "state %lu in tree %p refs %d\n",
63 (unsigned long long)state->start,
64 (unsigned long long)state->end,
65 state->state, state->tree, atomic_read(&state->refs));
66 list_del(&state->leak_list);
67 kmem_cache_free(extent_state_cache, state);
68 }
69
70 while (!list_empty(&buffers)) {
71 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
72 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
73 "refs %d\n", (unsigned long long)eb->start,
74 eb->len, atomic_read(&eb->refs));
75 list_del(&eb->leak_list);
76 kmem_cache_free(extent_buffer_cache, eb);
77 }
78}
79#else
80#define btrfs_leak_debug_add(new, head) do {} while (0)
81#define btrfs_leak_debug_del(entry) do {} while (0)
82#define btrfs_leak_debug_check() do {} while (0)
33#endif 83#endif
34 84
35#define BUFFER_LRU_MAX 64 85#define BUFFER_LRU_MAX 64
@@ -84,29 +134,7 @@ free_state_cache:
84 134
85void extent_io_exit(void) 135void extent_io_exit(void)
86{ 136{
87 struct extent_state *state; 137 btrfs_leak_debug_check();
88 struct extent_buffer *eb;
89
90 while (!list_empty(&states)) {
91 state = list_entry(states.next, struct extent_state, leak_list);
92 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
93 "state %lu in tree %p refs %d\n",
94 (unsigned long long)state->start,
95 (unsigned long long)state->end,
96 state->state, state->tree, atomic_read(&state->refs));
97 list_del(&state->leak_list);
98 kmem_cache_free(extent_state_cache, state);
99
100 }
101
102 while (!list_empty(&buffers)) {
103 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
104 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
105 "refs %d\n", (unsigned long long)eb->start,
106 eb->len, atomic_read(&eb->refs));
107 list_del(&eb->leak_list);
108 kmem_cache_free(extent_buffer_cache, eb);
109 }
110 138
111 /* 139 /*
112 * Make sure all delayed rcu free are flushed before we 140 * Make sure all delayed rcu free are flushed before we
@@ -134,9 +162,6 @@ void extent_io_tree_init(struct extent_io_tree *tree,
134static struct extent_state *alloc_extent_state(gfp_t mask) 162static struct extent_state *alloc_extent_state(gfp_t mask)
135{ 163{
136 struct extent_state *state; 164 struct extent_state *state;
137#if LEAK_DEBUG
138 unsigned long flags;
139#endif
140 165
141 state = kmem_cache_alloc(extent_state_cache, mask); 166 state = kmem_cache_alloc(extent_state_cache, mask);
142 if (!state) 167 if (!state)
@@ -144,11 +169,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
144 state->state = 0; 169 state->state = 0;
145 state->private = 0; 170 state->private = 0;
146 state->tree = NULL; 171 state->tree = NULL;
147#if LEAK_DEBUG 172 btrfs_leak_debug_add(&state->leak_list, &states);
148 spin_lock_irqsave(&leak_lock, flags);
149 list_add(&state->leak_list, &states);
150 spin_unlock_irqrestore(&leak_lock, flags);
151#endif
152 atomic_set(&state->refs, 1); 173 atomic_set(&state->refs, 1);
153 init_waitqueue_head(&state->wq); 174 init_waitqueue_head(&state->wq);
154 trace_alloc_extent_state(state, mask, _RET_IP_); 175 trace_alloc_extent_state(state, mask, _RET_IP_);
@@ -160,15 +181,8 @@ void free_extent_state(struct extent_state *state)
160 if (!state) 181 if (!state)
161 return; 182 return;
162 if (atomic_dec_and_test(&state->refs)) { 183 if (atomic_dec_and_test(&state->refs)) {
163#if LEAK_DEBUG
164 unsigned long flags;
165#endif
166 WARN_ON(state->tree); 184 WARN_ON(state->tree);
167#if LEAK_DEBUG 185 btrfs_leak_debug_del(&state->leak_list);
168 spin_lock_irqsave(&leak_lock, flags);
169 list_del(&state->leak_list);
170 spin_unlock_irqrestore(&leak_lock, flags);
171#endif
172 trace_free_extent_state(state, _RET_IP_); 186 trace_free_extent_state(state, _RET_IP_);
173 kmem_cache_free(extent_state_cache, state); 187 kmem_cache_free(extent_state_cache, state);
174 } 188 }
@@ -308,21 +322,21 @@ static void merge_state(struct extent_io_tree *tree,
308} 322}
309 323
310static void set_state_cb(struct extent_io_tree *tree, 324static void set_state_cb(struct extent_io_tree *tree,
311 struct extent_state *state, int *bits) 325 struct extent_state *state, unsigned long *bits)
312{ 326{
313 if (tree->ops && tree->ops->set_bit_hook) 327 if (tree->ops && tree->ops->set_bit_hook)
314 tree->ops->set_bit_hook(tree->mapping->host, state, bits); 328 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
315} 329}
316 330
317static void clear_state_cb(struct extent_io_tree *tree, 331static void clear_state_cb(struct extent_io_tree *tree,
318 struct extent_state *state, int *bits) 332 struct extent_state *state, unsigned long *bits)
319{ 333{
320 if (tree->ops && tree->ops->clear_bit_hook) 334 if (tree->ops && tree->ops->clear_bit_hook)
321 tree->ops->clear_bit_hook(tree->mapping->host, state, bits); 335 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
322} 336}
323 337
324static void set_state_bits(struct extent_io_tree *tree, 338static void set_state_bits(struct extent_io_tree *tree,
325 struct extent_state *state, int *bits); 339 struct extent_state *state, unsigned long *bits);
326 340
327/* 341/*
328 * insert an extent_state struct into the tree. 'bits' are set on the 342 * insert an extent_state struct into the tree. 'bits' are set on the
@@ -336,7 +350,7 @@ static void set_state_bits(struct extent_io_tree *tree,
336 */ 350 */
337static int insert_state(struct extent_io_tree *tree, 351static int insert_state(struct extent_io_tree *tree,
338 struct extent_state *state, u64 start, u64 end, 352 struct extent_state *state, u64 start, u64 end,
339 int *bits) 353 unsigned long *bits)
340{ 354{
341 struct rb_node *node; 355 struct rb_node *node;
342 356
@@ -424,10 +438,10 @@ static struct extent_state *next_state(struct extent_state *state)
424 */ 438 */
425static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 439static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
426 struct extent_state *state, 440 struct extent_state *state,
427 int *bits, int wake) 441 unsigned long *bits, int wake)
428{ 442{
429 struct extent_state *next; 443 struct extent_state *next;
430 int bits_to_clear = *bits & ~EXTENT_CTLBITS; 444 unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS;
431 445
432 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 446 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
433 u64 range = state->end - state->start + 1; 447 u64 range = state->end - state->start + 1;
@@ -463,7 +477,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
463 return prealloc; 477 return prealloc;
464} 478}
465 479
466void extent_io_tree_panic(struct extent_io_tree *tree, int err) 480static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
467{ 481{
468 btrfs_panic(tree_fs_info(tree), err, "Locking error: " 482 btrfs_panic(tree_fs_info(tree), err, "Locking error: "
469 "Extent tree was modified by another " 483 "Extent tree was modified by another "
@@ -483,7 +497,7 @@ void extent_io_tree_panic(struct extent_io_tree *tree, int err)
483 * This takes the tree lock, and returns 0 on success and < 0 on error. 497 * This takes the tree lock, and returns 0 on success and < 0 on error.
484 */ 498 */
485int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 499int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
486 int bits, int wake, int delete, 500 unsigned long bits, int wake, int delete,
487 struct extent_state **cached_state, 501 struct extent_state **cached_state,
488 gfp_t mask) 502 gfp_t mask)
489{ 503{
@@ -644,7 +658,8 @@ static void wait_on_state(struct extent_io_tree *tree,
644 * The range [start, end] is inclusive. 658 * The range [start, end] is inclusive.
645 * The tree lock is taken by this function 659 * The tree lock is taken by this function
646 */ 660 */
647void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) 661static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
662 unsigned long bits)
648{ 663{
649 struct extent_state *state; 664 struct extent_state *state;
650 struct rb_node *node; 665 struct rb_node *node;
@@ -685,9 +700,9 @@ out:
685 700
686static void set_state_bits(struct extent_io_tree *tree, 701static void set_state_bits(struct extent_io_tree *tree,
687 struct extent_state *state, 702 struct extent_state *state,
688 int *bits) 703 unsigned long *bits)
689{ 704{
690 int bits_to_set = *bits & ~EXTENT_CTLBITS; 705 unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS;
691 706
692 set_state_cb(tree, state, bits); 707 set_state_cb(tree, state, bits);
693 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 708 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@@ -730,8 +745,9 @@ static void uncache_state(struct extent_state **cached_ptr)
730 745
731static int __must_check 746static int __must_check
732__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 747__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
733 int bits, int exclusive_bits, u64 *failed_start, 748 unsigned long bits, unsigned long exclusive_bits,
734 struct extent_state **cached_state, gfp_t mask) 749 u64 *failed_start, struct extent_state **cached_state,
750 gfp_t mask)
735{ 751{
736 struct extent_state *state; 752 struct extent_state *state;
737 struct extent_state *prealloc = NULL; 753 struct extent_state *prealloc = NULL;
@@ -923,9 +939,9 @@ search_again:
923 goto again; 939 goto again;
924} 940}
925 941
926int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, 942int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
927 u64 *failed_start, struct extent_state **cached_state, 943 unsigned long bits, u64 * failed_start,
928 gfp_t mask) 944 struct extent_state **cached_state, gfp_t mask)
929{ 945{
930 return __set_extent_bit(tree, start, end, bits, 0, failed_start, 946 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
931 cached_state, mask); 947 cached_state, mask);
@@ -950,7 +966,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
950 * boundary bits like LOCK. 966 * boundary bits like LOCK.
951 */ 967 */
952int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 968int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
953 int bits, int clear_bits, 969 unsigned long bits, unsigned long clear_bits,
954 struct extent_state **cached_state, gfp_t mask) 970 struct extent_state **cached_state, gfp_t mask)
955{ 971{
956 struct extent_state *state; 972 struct extent_state *state;
@@ -1143,14 +1159,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1143} 1159}
1144 1160
1145int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1161int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1146 int bits, gfp_t mask) 1162 unsigned long bits, gfp_t mask)
1147{ 1163{
1148 return set_extent_bit(tree, start, end, bits, NULL, 1164 return set_extent_bit(tree, start, end, bits, NULL,
1149 NULL, mask); 1165 NULL, mask);
1150} 1166}
1151 1167
1152int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1168int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1153 int bits, gfp_t mask) 1169 unsigned long bits, gfp_t mask)
1154{ 1170{
1155 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); 1171 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
1156} 1172}
@@ -1189,7 +1205,7 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
1189int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 1205int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1190 struct extent_state **cached_state, gfp_t mask) 1206 struct extent_state **cached_state, gfp_t mask)
1191{ 1207{
1192 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 1208 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
1193 cached_state, mask); 1209 cached_state, mask);
1194} 1210}
1195 1211
@@ -1205,7 +1221,7 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1205 * us if waiting is desired. 1221 * us if waiting is desired.
1206 */ 1222 */
1207int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1223int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1208 int bits, struct extent_state **cached_state) 1224 unsigned long bits, struct extent_state **cached_state)
1209{ 1225{
1210 int err; 1226 int err;
1211 u64 failed_start; 1227 u64 failed_start;
@@ -1313,8 +1329,9 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1313 * return it. tree->lock must be held. NULL will returned if 1329 * return it. tree->lock must be held. NULL will returned if
1314 * nothing was found after 'start' 1330 * nothing was found after 'start'
1315 */ 1331 */
1316struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 1332static struct extent_state *
1317 u64 start, int bits) 1333find_first_extent_bit_state(struct extent_io_tree *tree,
1334 u64 start, unsigned long bits)
1318{ 1335{
1319 struct rb_node *node; 1336 struct rb_node *node;
1320 struct extent_state *state; 1337 struct extent_state *state;
@@ -1348,7 +1365,7 @@ out:
1348 * If nothing was found, 1 is returned. If found something, return 0. 1365 * If nothing was found, 1 is returned. If found something, return 0.
1349 */ 1366 */
1350int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 1367int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1351 u64 *start_ret, u64 *end_ret, int bits, 1368 u64 *start_ret, u64 *end_ret, unsigned long bits,
1352 struct extent_state **cached_state) 1369 struct extent_state **cached_state)
1353{ 1370{
1354 struct extent_state *state; 1371 struct extent_state *state;
@@ -1638,7 +1655,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1638 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1655 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1639 unsigned long nr_pages = end_index - index + 1; 1656 unsigned long nr_pages = end_index - index + 1;
1640 int i; 1657 int i;
1641 int clear_bits = 0; 1658 unsigned long clear_bits = 0;
1642 1659
1643 if (op & EXTENT_CLEAR_UNLOCK) 1660 if (op & EXTENT_CLEAR_UNLOCK)
1644 clear_bits |= EXTENT_LOCKED; 1661 clear_bits |= EXTENT_LOCKED;
@@ -1777,6 +1794,64 @@ out:
1777 return ret; 1794 return ret;
1778} 1795}
1779 1796
1797void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
1798 int count)
1799{
1800 struct rb_node *node;
1801 struct extent_state *state;
1802
1803 spin_lock(&tree->lock);
1804 /*
1805 * this search will find all the extents that end after
1806 * our range starts.
1807 */
1808 node = tree_search(tree, start);
1809 BUG_ON(!node);
1810
1811 state = rb_entry(node, struct extent_state, rb_node);
1812 BUG_ON(state->start != start);
1813
1814 while (count) {
1815 state->private = *csums++;
1816 count--;
1817 state = next_state(state);
1818 }
1819 spin_unlock(&tree->lock);
1820}
1821
1822static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
1823{
1824 struct bio_vec *bvec = bio->bi_io_vec + bio_index;
1825
1826 return page_offset(bvec->bv_page) + bvec->bv_offset;
1827}
1828
1829void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
1830 u32 csums[], int count)
1831{
1832 struct rb_node *node;
1833 struct extent_state *state = NULL;
1834 u64 start;
1835
1836 spin_lock(&tree->lock);
1837 do {
1838 start = __btrfs_get_bio_offset(bio, bio_index);
1839 if (state == NULL || state->start != start) {
1840 node = tree_search(tree, start);
1841 BUG_ON(!node);
1842
1843 state = rb_entry(node, struct extent_state, rb_node);
1844 BUG_ON(state->start != start);
1845 }
1846 state->private = *csums++;
1847 count--;
1848 bio_index++;
1849
1850 state = next_state(state);
1851 } while (count);
1852 spin_unlock(&tree->lock);
1853}
1854
1780int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1855int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1781{ 1856{
1782 struct rb_node *node; 1857 struct rb_node *node;
@@ -1811,7 +1886,7 @@ out:
1811 * range is found set. 1886 * range is found set.
1812 */ 1887 */
1813int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1888int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1814 int bits, int filled, struct extent_state *cached) 1889 unsigned long bits, int filled, struct extent_state *cached)
1815{ 1890{
1816 struct extent_state *state = NULL; 1891 struct extent_state *state = NULL;
1817 struct rb_node *node; 1892 struct rb_node *node;
@@ -2595,7 +2670,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2595 return ret; 2670 return ret;
2596} 2671}
2597 2672
2598void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page) 2673static void attach_extent_buffer_page(struct extent_buffer *eb,
2674 struct page *page)
2599{ 2675{
2600 if (!PagePrivate(page)) { 2676 if (!PagePrivate(page)) {
2601 SetPagePrivate(page); 2677 SetPagePrivate(page);
@@ -2625,7 +2701,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2625 struct page *page, 2701 struct page *page,
2626 get_extent_t *get_extent, 2702 get_extent_t *get_extent,
2627 struct bio **bio, int mirror_num, 2703 struct bio **bio, int mirror_num,
2628 unsigned long *bio_flags) 2704 unsigned long *bio_flags, int rw)
2629{ 2705{
2630 struct inode *inode = page->mapping->host; 2706 struct inode *inode = page->mapping->host;
2631 u64 start = page_offset(page); 2707 u64 start = page_offset(page);
@@ -2771,7 +2847,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2771 } 2847 }
2772 2848
2773 pnr -= page->index; 2849 pnr -= page->index;
2774 ret = submit_extent_page(READ, tree, page, 2850 ret = submit_extent_page(rw, tree, page,
2775 sector, disk_io_size, pg_offset, 2851 sector, disk_io_size, pg_offset,
2776 bdev, bio, pnr, 2852 bdev, bio, pnr,
2777 end_bio_extent_readpage, mirror_num, 2853 end_bio_extent_readpage, mirror_num,
@@ -2804,7 +2880,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2804 int ret; 2880 int ret;
2805 2881
2806 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, 2882 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
2807 &bio_flags); 2883 &bio_flags, READ);
2808 if (bio) 2884 if (bio)
2809 ret = submit_one_bio(READ, bio, mirror_num, bio_flags); 2885 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
2810 return ret; 2886 return ret;
@@ -3103,7 +3179,7 @@ static int eb_wait(void *word)
3103 return 0; 3179 return 0;
3104} 3180}
3105 3181
3106static void wait_on_extent_buffer_writeback(struct extent_buffer *eb) 3182void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3107{ 3183{
3108 wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait, 3184 wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
3109 TASK_UNINTERRUPTIBLE); 3185 TASK_UNINTERRUPTIBLE);
@@ -3228,7 +3304,7 @@ static int write_one_eb(struct extent_buffer *eb,
3228 u64 offset = eb->start; 3304 u64 offset = eb->start;
3229 unsigned long i, num_pages; 3305 unsigned long i, num_pages;
3230 unsigned long bio_flags = 0; 3306 unsigned long bio_flags = 0;
3231 int rw = (epd->sync_io ? WRITE_SYNC : WRITE); 3307 int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
3232 int ret = 0; 3308 int ret = 0;
3233 3309
3234 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 3310 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
@@ -3665,14 +3741,14 @@ int extent_readpages(struct extent_io_tree *tree,
3665 continue; 3741 continue;
3666 for (i = 0; i < nr; i++) { 3742 for (i = 0; i < nr; i++) {
3667 __extent_read_full_page(tree, pagepool[i], get_extent, 3743 __extent_read_full_page(tree, pagepool[i], get_extent,
3668 &bio, 0, &bio_flags); 3744 &bio, 0, &bio_flags, READ);
3669 page_cache_release(pagepool[i]); 3745 page_cache_release(pagepool[i]);
3670 } 3746 }
3671 nr = 0; 3747 nr = 0;
3672 } 3748 }
3673 for (i = 0; i < nr; i++) { 3749 for (i = 0; i < nr; i++) {
3674 __extent_read_full_page(tree, pagepool[i], get_extent, 3750 __extent_read_full_page(tree, pagepool[i], get_extent,
3675 &bio, 0, &bio_flags); 3751 &bio, 0, &bio_flags, READ);
3676 page_cache_release(pagepool[i]); 3752 page_cache_release(pagepool[i]);
3677 } 3753 }
3678 3754
@@ -3713,9 +3789,9 @@ int extent_invalidatepage(struct extent_io_tree *tree,
3713 * are locked or under IO and drops the related state bits if it is safe 3789 * are locked or under IO and drops the related state bits if it is safe
3714 * to drop the page. 3790 * to drop the page.
3715 */ 3791 */
3716int try_release_extent_state(struct extent_map_tree *map, 3792static int try_release_extent_state(struct extent_map_tree *map,
3717 struct extent_io_tree *tree, struct page *page, 3793 struct extent_io_tree *tree,
3718 gfp_t mask) 3794 struct page *page, gfp_t mask)
3719{ 3795{
3720 u64 start = page_offset(page); 3796 u64 start = page_offset(page);
3721 u64 end = start + PAGE_CACHE_SIZE - 1; 3797 u64 end = start + PAGE_CACHE_SIZE - 1;
@@ -4006,12 +4082,7 @@ out:
4006 4082
4007static void __free_extent_buffer(struct extent_buffer *eb) 4083static void __free_extent_buffer(struct extent_buffer *eb)
4008{ 4084{
4009#if LEAK_DEBUG 4085 btrfs_leak_debug_del(&eb->leak_list);
4010 unsigned long flags;
4011 spin_lock_irqsave(&leak_lock, flags);
4012 list_del(&eb->leak_list);
4013 spin_unlock_irqrestore(&leak_lock, flags);
4014#endif
4015 kmem_cache_free(extent_buffer_cache, eb); 4086 kmem_cache_free(extent_buffer_cache, eb);
4016} 4087}
4017 4088
@@ -4021,9 +4092,6 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
4021 gfp_t mask) 4092 gfp_t mask)
4022{ 4093{
4023 struct extent_buffer *eb = NULL; 4094 struct extent_buffer *eb = NULL;
4024#if LEAK_DEBUG
4025 unsigned long flags;
4026#endif
4027 4095
4028 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 4096 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
4029 if (eb == NULL) 4097 if (eb == NULL)
@@ -4043,11 +4111,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
4043 init_waitqueue_head(&eb->write_lock_wq); 4111 init_waitqueue_head(&eb->write_lock_wq);
4044 init_waitqueue_head(&eb->read_lock_wq); 4112 init_waitqueue_head(&eb->read_lock_wq);
4045 4113
4046#if LEAK_DEBUG 4114 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4047 spin_lock_irqsave(&leak_lock, flags); 4115
4048 list_add(&eb->leak_list, &buffers);
4049 spin_unlock_irqrestore(&leak_lock, flags);
4050#endif
4051 spin_lock_init(&eb->refs_lock); 4116 spin_lock_init(&eb->refs_lock);
4052 atomic_set(&eb->refs, 1); 4117 atomic_set(&eb->refs, 1);
4053 atomic_set(&eb->io_pages, 0); 4118 atomic_set(&eb->io_pages, 0);
@@ -4385,7 +4450,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4385} 4450}
4386 4451
4387/* Expects to have eb->eb_lock already held */ 4452/* Expects to have eb->eb_lock already held */
4388static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) 4453static int release_extent_buffer(struct extent_buffer *eb)
4389{ 4454{
4390 WARN_ON(atomic_read(&eb->refs) == 0); 4455 WARN_ON(atomic_read(&eb->refs) == 0);
4391 if (atomic_dec_and_test(&eb->refs)) { 4456 if (atomic_dec_and_test(&eb->refs)) {
@@ -4443,7 +4508,7 @@ void free_extent_buffer(struct extent_buffer *eb)
4443 * I know this is terrible, but it's temporary until we stop tracking 4508 * I know this is terrible, but it's temporary until we stop tracking
4444 * the uptodate bits and such for the extent buffers. 4509 * the uptodate bits and such for the extent buffers.
4445 */ 4510 */
4446 release_extent_buffer(eb, GFP_ATOMIC); 4511 release_extent_buffer(eb);
4447} 4512}
4448 4513
4449void free_extent_buffer_stale(struct extent_buffer *eb) 4514void free_extent_buffer_stale(struct extent_buffer *eb)
@@ -4457,7 +4522,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
4457 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) && 4522 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
4458 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) 4523 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4459 atomic_dec(&eb->refs); 4524 atomic_dec(&eb->refs);
4460 release_extent_buffer(eb, GFP_NOFS); 4525 release_extent_buffer(eb);
4461} 4526}
4462 4527
4463void clear_extent_buffer_dirty(struct extent_buffer *eb) 4528void clear_extent_buffer_dirty(struct extent_buffer *eb)
@@ -4509,17 +4574,6 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
4509 return was_dirty; 4574 return was_dirty;
4510} 4575}
4511 4576
4512static int range_straddles_pages(u64 start, u64 len)
4513{
4514 if (len < PAGE_CACHE_SIZE)
4515 return 1;
4516 if (start & (PAGE_CACHE_SIZE - 1))
4517 return 1;
4518 if ((start + len) & (PAGE_CACHE_SIZE - 1))
4519 return 1;
4520 return 0;
4521}
4522
4523int clear_extent_buffer_uptodate(struct extent_buffer *eb) 4577int clear_extent_buffer_uptodate(struct extent_buffer *eb)
4524{ 4578{
4525 unsigned long i; 4579 unsigned long i;
@@ -4551,37 +4605,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb)
4551 return 0; 4605 return 0;
4552} 4606}
4553 4607
4554int extent_range_uptodate(struct extent_io_tree *tree,
4555 u64 start, u64 end)
4556{
4557 struct page *page;
4558 int ret;
4559 int pg_uptodate = 1;
4560 int uptodate;
4561 unsigned long index;
4562
4563 if (range_straddles_pages(start, end - start + 1)) {
4564 ret = test_range_bit(tree, start, end,
4565 EXTENT_UPTODATE, 1, NULL);
4566 if (ret)
4567 return 1;
4568 }
4569 while (start <= end) {
4570 index = start >> PAGE_CACHE_SHIFT;
4571 page = find_get_page(tree->mapping, index);
4572 if (!page)
4573 return 1;
4574 uptodate = PageUptodate(page);
4575 page_cache_release(page);
4576 if (!uptodate) {
4577 pg_uptodate = 0;
4578 break;
4579 }
4580 start += PAGE_CACHE_SIZE;
4581 }
4582 return pg_uptodate;
4583}
4584
4585int extent_buffer_uptodate(struct extent_buffer *eb) 4608int extent_buffer_uptodate(struct extent_buffer *eb)
4586{ 4609{
4587 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 4610 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -4644,7 +4667,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4644 ClearPageError(page); 4667 ClearPageError(page);
4645 err = __extent_read_full_page(tree, page, 4668 err = __extent_read_full_page(tree, page,
4646 get_extent, &bio, 4669 get_extent, &bio,
4647 mirror_num, &bio_flags); 4670 mirror_num, &bio_flags,
4671 READ | REQ_META);
4648 if (err) 4672 if (err)
4649 ret = err; 4673 ret = err;
4650 } else { 4674 } else {
@@ -4653,7 +4677,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4653 } 4677 }
4654 4678
4655 if (bio) { 4679 if (bio) {
4656 err = submit_one_bio(READ, bio, mirror_num, bio_flags); 4680 err = submit_one_bio(READ | REQ_META, bio, mirror_num,
4681 bio_flags);
4657 if (err) 4682 if (err)
4658 return err; 4683 return err;
4659 } 4684 }
@@ -5017,7 +5042,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5017 } 5042 }
5018} 5043}
5019 5044
5020int try_release_extent_buffer(struct page *page, gfp_t mask) 5045int try_release_extent_buffer(struct page *page)
5021{ 5046{
5022 struct extent_buffer *eb; 5047 struct extent_buffer *eb;
5023 5048
@@ -5047,9 +5072,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
5047 } 5072 }
5048 spin_unlock(&page->mapping->private_lock); 5073 spin_unlock(&page->mapping->private_lock);
5049 5074
5050 if ((mask & GFP_NOFS) == GFP_NOFS)
5051 mask = GFP_NOFS;
5052
5053 /* 5075 /*
5054 * If tree ref isn't set then we know the ref on this eb is a real ref, 5076 * If tree ref isn't set then we know the ref on this eb is a real ref,
5055 * so just return, this page will likely be freed soon anyway. 5077 * so just return, this page will likely be freed soon anyway.
@@ -5059,5 +5081,5 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
5059 return 0; 5081 return 0;
5060 } 5082 }
5061 5083
5062 return release_extent_buffer(eb, mask); 5084 return release_extent_buffer(eb);
5063} 5085}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 258c92156857..a2c03a175009 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -81,9 +81,9 @@ struct extent_io_ops {
81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
82 struct extent_state *state, int uptodate); 82 struct extent_state *state, int uptodate);
83 void (*set_bit_hook)(struct inode *inode, struct extent_state *state, 83 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
84 int *bits); 84 unsigned long *bits);
85 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, 85 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
86 int *bits); 86 unsigned long *bits);
87 void (*merge_extent_hook)(struct inode *inode, 87 void (*merge_extent_hook)(struct inode *inode,
88 struct extent_state *new, 88 struct extent_state *new,
89 struct extent_state *other); 89 struct extent_state *other);
@@ -116,7 +116,9 @@ struct extent_state {
116 /* for use by the FS */ 116 /* for use by the FS */
117 u64 private; 117 u64 private;
118 118
119#ifdef CONFIG_BTRFS_DEBUG
119 struct list_head leak_list; 120 struct list_head leak_list;
121#endif
120}; 122};
121 123
122#define INLINE_EXTENT_BUFFER_PAGES 16 124#define INLINE_EXTENT_BUFFER_PAGES 16
@@ -132,7 +134,6 @@ struct extent_buffer {
132 atomic_t refs; 134 atomic_t refs;
133 atomic_t io_pages; 135 atomic_t io_pages;
134 int read_mirror; 136 int read_mirror;
135 struct list_head leak_list;
136 struct rcu_head rcu_head; 137 struct rcu_head rcu_head;
137 pid_t lock_owner; 138 pid_t lock_owner;
138 139
@@ -159,6 +160,9 @@ struct extent_buffer {
159 wait_queue_head_t read_lock_wq; 160 wait_queue_head_t read_lock_wq;
160 wait_queue_head_t lock_wq; 161 wait_queue_head_t lock_wq;
161 struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; 162 struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
163#ifdef CONFIG_BTRFS_DEBUG
164 struct list_head leak_list;
165#endif
162}; 166};
163 167
164static inline void extent_set_compress_type(unsigned long *bio_flags, 168static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -185,13 +189,10 @@ void extent_io_tree_init(struct extent_io_tree *tree,
185int try_release_extent_mapping(struct extent_map_tree *map, 189int try_release_extent_mapping(struct extent_map_tree *map,
186 struct extent_io_tree *tree, struct page *page, 190 struct extent_io_tree *tree, struct page *page,
187 gfp_t mask); 191 gfp_t mask);
188int try_release_extent_buffer(struct page *page, gfp_t mask); 192int try_release_extent_buffer(struct page *page);
189int try_release_extent_state(struct extent_map_tree *map,
190 struct extent_io_tree *tree, struct page *page,
191 gfp_t mask);
192int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); 193int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
193int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 194int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
194 int bits, struct extent_state **cached); 195 unsigned long bits, struct extent_state **cached);
195int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); 196int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
196int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, 197int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
197 struct extent_state **cached, gfp_t mask); 198 struct extent_state **cached, gfp_t mask);
@@ -207,16 +208,17 @@ u64 count_range_bits(struct extent_io_tree *tree,
207 208
208void free_extent_state(struct extent_state *state); 209void free_extent_state(struct extent_state *state);
209int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 210int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
210 int bits, int filled, struct extent_state *cached_state); 211 unsigned long bits, int filled,
212 struct extent_state *cached_state);
211int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 213int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
212 int bits, gfp_t mask); 214 unsigned long bits, gfp_t mask);
213int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 215int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
214 int bits, int wake, int delete, struct extent_state **cached, 216 unsigned long bits, int wake, int delete,
215 gfp_t mask); 217 struct extent_state **cached, gfp_t mask);
216int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 218int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
217 int bits, gfp_t mask); 219 unsigned long bits, gfp_t mask);
218int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 220int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
219 int bits, u64 *failed_start, 221 unsigned long bits, u64 *failed_start,
220 struct extent_state **cached_state, gfp_t mask); 222 struct extent_state **cached_state, gfp_t mask);
221int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 223int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
222 struct extent_state **cached_state, gfp_t mask); 224 struct extent_state **cached_state, gfp_t mask);
@@ -229,17 +231,15 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
229int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 231int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
230 gfp_t mask); 232 gfp_t mask);
231int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 233int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
232 int bits, int clear_bits, 234 unsigned long bits, unsigned long clear_bits,
233 struct extent_state **cached_state, gfp_t mask); 235 struct extent_state **cached_state, gfp_t mask);
234int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 236int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
235 struct extent_state **cached_state, gfp_t mask); 237 struct extent_state **cached_state, gfp_t mask);
236int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, 238int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
237 struct extent_state **cached_state, gfp_t mask); 239 struct extent_state **cached_state, gfp_t mask);
238int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 240int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
239 u64 *start_ret, u64 *end_ret, int bits, 241 u64 *start_ret, u64 *end_ret, unsigned long bits,
240 struct extent_state **cached_state); 242 struct extent_state **cached_state);
241struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
242 u64 start, int bits);
243int extent_invalidatepage(struct extent_io_tree *tree, 243int extent_invalidatepage(struct extent_io_tree *tree,
244 struct page *page, unsigned long offset); 244 struct page *page, unsigned long offset);
245int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 245int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
@@ -261,6 +261,10 @@ int extent_readpages(struct extent_io_tree *tree,
261int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 261int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
262 __u64 start, __u64 len, get_extent_t *get_extent); 262 __u64 start, __u64 len, get_extent_t *get_extent);
263int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); 263int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
264void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
265 int count);
266void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio,
267 int bvec_index, u32 csums[], int count);
264int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 268int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
265void set_page_extent_mapped(struct page *page); 269void set_page_extent_mapped(struct page *page);
266 270
@@ -278,6 +282,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
278int read_extent_buffer_pages(struct extent_io_tree *tree, 282int read_extent_buffer_pages(struct extent_io_tree *tree,
279 struct extent_buffer *eb, u64 start, int wait, 283 struct extent_buffer *eb, u64 start, int wait,
280 get_extent_t *get_extent, int mirror_num); 284 get_extent_t *get_extent, int mirror_num);
285void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
281 286
282static inline unsigned long num_extent_pages(u64 start, u64 len) 287static inline unsigned long num_extent_pages(u64 start, u64 len)
283{ 288{
@@ -313,7 +318,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
313 unsigned long src_offset, unsigned long len); 318 unsigned long src_offset, unsigned long len);
314void memset_extent_buffer(struct extent_buffer *eb, char c, 319void memset_extent_buffer(struct extent_buffer *eb, char c,
315 unsigned long start, unsigned long len); 320 unsigned long start, unsigned long len);
316void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
317void clear_extent_buffer_dirty(struct extent_buffer *eb); 321void clear_extent_buffer_dirty(struct extent_buffer *eb);
318int set_extent_buffer_dirty(struct extent_buffer *eb); 322int set_extent_buffer_dirty(struct extent_buffer *eb);
319int set_extent_buffer_uptodate(struct extent_buffer *eb); 323int set_extent_buffer_uptodate(struct extent_buffer *eb);
@@ -323,8 +327,6 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
323 unsigned long min_len, char **map, 327 unsigned long min_len, char **map,
324 unsigned long *map_start, 328 unsigned long *map_start,
325 unsigned long *map_len); 329 unsigned long *map_len);
326int extent_range_uptodate(struct extent_io_tree *tree,
327 u64 start, u64 end);
328int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); 330int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
329int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); 331int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
330int extent_clear_unlock_delalloc(struct inode *inode, 332int extent_clear_unlock_delalloc(struct inode *inode,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2834ca5768ea..a4a7a1a8da95 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -174,6 +174,14 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
174 test_bit(EXTENT_FLAG_LOGGING, &next->flags)) 174 test_bit(EXTENT_FLAG_LOGGING, &next->flags))
175 return 0; 175 return 0;
176 176
177 /*
178 * We don't want to merge stuff that hasn't been written to the log yet
179 * since it may not reflect exactly what is on disk, and that would be
180 * bad.
181 */
182 if (!list_empty(&prev->list) || !list_empty(&next->list))
183 return 0;
184
177 if (extent_map_end(prev) == next->start && 185 if (extent_map_end(prev) == next->start &&
178 prev->flags == next->flags && 186 prev->flags == next->flags &&
179 prev->bdev == next->bdev && 187 prev->bdev == next->bdev &&
@@ -209,9 +217,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
209 em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; 217 em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
210 em->mod_start = merge->mod_start; 218 em->mod_start = merge->mod_start;
211 em->generation = max(em->generation, merge->generation); 219 em->generation = max(em->generation, merge->generation);
212 list_move(&em->list, &tree->modified_extents);
213 220
214 list_del_init(&merge->list);
215 rb_erase(&merge->rb_node, &tree->map); 221 rb_erase(&merge->rb_node, &tree->map);
216 free_extent_map(merge); 222 free_extent_map(merge);
217 } 223 }
@@ -227,7 +233,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
227 merge->in_tree = 0; 233 merge->in_tree = 0;
228 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; 234 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
229 em->generation = max(em->generation, merge->generation); 235 em->generation = max(em->generation, merge->generation);
230 list_del_init(&merge->list);
231 free_extent_map(merge); 236 free_extent_map(merge);
232 } 237 }
233} 238}
@@ -302,7 +307,7 @@ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
302 * reference dropped if the merge attempt was successful. 307 * reference dropped if the merge attempt was successful.
303 */ 308 */
304int add_extent_mapping(struct extent_map_tree *tree, 309int add_extent_mapping(struct extent_map_tree *tree,
305 struct extent_map *em) 310 struct extent_map *em, int modified)
306{ 311{
307 int ret = 0; 312 int ret = 0;
308 struct rb_node *rb; 313 struct rb_node *rb;
@@ -324,7 +329,10 @@ int add_extent_mapping(struct extent_map_tree *tree,
324 em->mod_start = em->start; 329 em->mod_start = em->start;
325 em->mod_len = em->len; 330 em->mod_len = em->len;
326 331
327 try_merge_map(tree, em); 332 if (modified)
333 list_move(&em->list, &tree->modified_extents);
334 else
335 try_merge_map(tree, em);
328out: 336out:
329 return ret; 337 return ret;
330} 338}
@@ -337,8 +345,9 @@ static u64 range_end(u64 start, u64 len)
337 return start + len; 345 return start + len;
338} 346}
339 347
340struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree, 348static struct extent_map *
341 u64 start, u64 len, int strict) 349__lookup_extent_mapping(struct extent_map_tree *tree,
350 u64 start, u64 len, int strict)
342{ 351{
343 struct extent_map *em; 352 struct extent_map *em;
344 struct rb_node *rb_node; 353 struct rb_node *rb_node;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index c6598c89cff8..61adc44b7805 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,6 +26,7 @@ struct extent_map {
26 u64 mod_len; 26 u64 mod_len;
27 u64 orig_start; 27 u64 orig_start;
28 u64 orig_block_len; 28 u64 orig_block_len;
29 u64 ram_bytes;
29 u64 block_start; 30 u64 block_start;
30 u64 block_len; 31 u64 block_len;
31 u64 generation; 32 u64 generation;
@@ -61,7 +62,7 @@ void extent_map_tree_init(struct extent_map_tree *tree);
61struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 62struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
62 u64 start, u64 len); 63 u64 start, u64 len);
63int add_extent_mapping(struct extent_map_tree *tree, 64int add_extent_mapping(struct extent_map_tree *tree,
64 struct extent_map *em); 65 struct extent_map *em, int modified);
65int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); 66int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
66 67
67struct extent_map *alloc_extent_map(void); 68struct extent_map *alloc_extent_map(void);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index c4628a201cb3..b193bf324a41 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -83,10 +83,11 @@ out:
83 return ret; 83 return ret;
84} 84}
85 85
86struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 86static struct btrfs_csum_item *
87 struct btrfs_root *root, 87btrfs_lookup_csum(struct btrfs_trans_handle *trans,
88 struct btrfs_path *path, 88 struct btrfs_root *root,
89 u64 bytenr, int cow) 89 struct btrfs_path *path,
90 u64 bytenr, int cow)
90{ 91{
91 int ret; 92 int ret;
92 struct btrfs_key file_key; 93 struct btrfs_key file_key;
@@ -152,32 +153,12 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
152 return ret; 153 return ret;
153} 154}
154 155
155u64 btrfs_file_extent_length(struct btrfs_path *path)
156{
157 int extent_type;
158 struct btrfs_file_extent_item *fi;
159 u64 len;
160
161 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
162 struct btrfs_file_extent_item);
163 extent_type = btrfs_file_extent_type(path->nodes[0], fi);
164
165 if (extent_type == BTRFS_FILE_EXTENT_REG ||
166 extent_type == BTRFS_FILE_EXTENT_PREALLOC)
167 len = btrfs_file_extent_num_bytes(path->nodes[0], fi);
168 else if (extent_type == BTRFS_FILE_EXTENT_INLINE)
169 len = btrfs_file_extent_inline_len(path->nodes[0], fi);
170 else
171 BUG();
172
173 return len;
174}
175
176static int __btrfs_lookup_bio_sums(struct btrfs_root *root, 156static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
177 struct inode *inode, struct bio *bio, 157 struct inode *inode, struct bio *bio,
178 u64 logical_offset, u32 *dst, int dio) 158 u64 logical_offset, u32 *dst, int dio)
179{ 159{
180 u32 sum; 160 u32 sum[16];
161 int len;
181 struct bio_vec *bvec = bio->bi_io_vec; 162 struct bio_vec *bvec = bio->bi_io_vec;
182 int bio_index = 0; 163 int bio_index = 0;
183 u64 offset = 0; 164 u64 offset = 0;
@@ -186,7 +167,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
186 u64 disk_bytenr; 167 u64 disk_bytenr;
187 u32 diff; 168 u32 diff;
188 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 169 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
189 int ret; 170 int count;
190 struct btrfs_path *path; 171 struct btrfs_path *path;
191 struct btrfs_csum_item *item = NULL; 172 struct btrfs_csum_item *item = NULL;
192 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 173 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -214,10 +195,12 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
214 if (dio) 195 if (dio)
215 offset = logical_offset; 196 offset = logical_offset;
216 while (bio_index < bio->bi_vcnt) { 197 while (bio_index < bio->bi_vcnt) {
198 len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index);
217 if (!dio) 199 if (!dio)
218 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 200 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
219 ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); 201 count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum,
220 if (ret == 0) 202 len);
203 if (count)
221 goto found; 204 goto found;
222 205
223 if (!item || disk_bytenr < item_start_offset || 206 if (!item || disk_bytenr < item_start_offset ||
@@ -230,10 +213,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
230 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root, 213 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
231 path, disk_bytenr, 0); 214 path, disk_bytenr, 0);
232 if (IS_ERR(item)) { 215 if (IS_ERR(item)) {
233 ret = PTR_ERR(item); 216 count = 1;
234 if (ret == -ENOENT || ret == -EFBIG) 217 sum[0] = 0;
235 ret = 0;
236 sum = 0;
237 if (BTRFS_I(inode)->root->root_key.objectid == 218 if (BTRFS_I(inode)->root->root_key.objectid ==
238 BTRFS_DATA_RELOC_TREE_OBJECTID) { 219 BTRFS_DATA_RELOC_TREE_OBJECTID) {
239 set_extent_bits(io_tree, offset, 220 set_extent_bits(io_tree, offset,
@@ -269,19 +250,29 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
269 diff = disk_bytenr - item_start_offset; 250 diff = disk_bytenr - item_start_offset;
270 diff = diff / root->sectorsize; 251 diff = diff / root->sectorsize;
271 diff = diff * csum_size; 252 diff = diff * csum_size;
272 253 count = min_t(int, len, (item_last_offset - disk_bytenr) >>
273 read_extent_buffer(path->nodes[0], &sum, 254 inode->i_sb->s_blocksize_bits);
255 read_extent_buffer(path->nodes[0], sum,
274 ((unsigned long)item) + diff, 256 ((unsigned long)item) + diff,
275 csum_size); 257 csum_size * count);
276found: 258found:
277 if (dst) 259 if (dst) {
278 *dst++ = sum; 260 memcpy(dst, sum, count * csum_size);
279 else 261 dst += count;
280 set_state_private(io_tree, offset, sum); 262 } else {
281 disk_bytenr += bvec->bv_len; 263 if (dio)
282 offset += bvec->bv_len; 264 extent_cache_csums_dio(io_tree, offset, sum,
283 bio_index++; 265 count);
284 bvec++; 266 else
267 extent_cache_csums(io_tree, bio, bio_index, sum,
268 count);
269 }
270 while (count--) {
271 disk_bytenr += bvec->bv_len;
272 offset += bvec->bv_len;
273 bio_index++;
274 bvec++;
275 }
285 } 276 }
286 btrfs_free_path(path); 277 btrfs_free_path(path);
287 return 0; 278 return 0;
@@ -358,11 +349,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
358 349
359 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 350 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
360 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 351 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
361 key.type != BTRFS_EXTENT_CSUM_KEY) 352 key.type != BTRFS_EXTENT_CSUM_KEY ||
362 break; 353 key.offset > end)
363
364 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
365 if (key.offset > end)
366 break; 354 break;
367 355
368 if (key.offset > start) 356 if (key.offset > start)
@@ -484,8 +472,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
484 472
485 data = kmap_atomic(bvec->bv_page); 473 data = kmap_atomic(bvec->bv_page);
486 sector_sum->sum = ~(u32)0; 474 sector_sum->sum = ~(u32)0;
487 sector_sum->sum = btrfs_csum_data(root, 475 sector_sum->sum = btrfs_csum_data(data + bvec->bv_offset,
488 data + bvec->bv_offset,
489 sector_sum->sum, 476 sector_sum->sum,
490 bvec->bv_len); 477 bvec->bv_len);
491 kunmap_atomic(data); 478 kunmap_atomic(data);
@@ -518,8 +505,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
518 * This calls btrfs_truncate_item with the correct args based on the 505 * This calls btrfs_truncate_item with the correct args based on the
519 * overlap, and fixes up the key as required. 506 * overlap, and fixes up the key as required.
520 */ 507 */
521static noinline void truncate_one_csum(struct btrfs_trans_handle *trans, 508static noinline void truncate_one_csum(struct btrfs_root *root,
522 struct btrfs_root *root,
523 struct btrfs_path *path, 509 struct btrfs_path *path,
524 struct btrfs_key *key, 510 struct btrfs_key *key,
525 u64 bytenr, u64 len) 511 u64 bytenr, u64 len)
@@ -544,7 +530,7 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
544 */ 530 */
545 u32 new_size = (bytenr - key->offset) >> blocksize_bits; 531 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
546 new_size *= csum_size; 532 new_size *= csum_size;
547 btrfs_truncate_item(trans, root, path, new_size, 1); 533 btrfs_truncate_item(root, path, new_size, 1);
548 } else if (key->offset >= bytenr && csum_end > end_byte && 534 } else if (key->offset >= bytenr && csum_end > end_byte &&
549 end_byte > key->offset) { 535 end_byte > key->offset) {
550 /* 536 /*
@@ -556,10 +542,10 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
556 u32 new_size = (csum_end - end_byte) >> blocksize_bits; 542 u32 new_size = (csum_end - end_byte) >> blocksize_bits;
557 new_size *= csum_size; 543 new_size *= csum_size;
558 544
559 btrfs_truncate_item(trans, root, path, new_size, 0); 545 btrfs_truncate_item(root, path, new_size, 0);
560 546
561 key->offset = end_byte; 547 key->offset = end_byte;
562 btrfs_set_item_key_safe(trans, root, path, key); 548 btrfs_set_item_key_safe(root, path, key);
563 } else { 549 } else {
564 BUG(); 550 BUG();
565 } 551 }
@@ -674,7 +660,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
674 660
675 key.offset = end_byte - 1; 661 key.offset = end_byte - 1;
676 } else { 662 } else {
677 truncate_one_csum(trans, root, path, &key, bytenr, len); 663 truncate_one_csum(root, path, &key, bytenr, len);
678 if (key.offset < bytenr) 664 if (key.offset < bytenr)
679 break; 665 break;
680 } 666 }
@@ -835,7 +821,7 @@ again:
835 diff /= csum_size; 821 diff /= csum_size;
836 diff *= csum_size; 822 diff *= csum_size;
837 823
838 btrfs_extend_item(trans, root, path, diff); 824 btrfs_extend_item(root, path, diff);
839 goto csum; 825 goto csum;
840 } 826 }
841 827
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bc4d54c465a0..4205ba752d40 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -193,8 +193,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
193 * the same inode in the tree, we will merge them together (by 193 * the same inode in the tree, we will merge them together (by
194 * __btrfs_add_inode_defrag()) and free the one that we want to requeue. 194 * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
195 */ 195 */
196void btrfs_requeue_inode_defrag(struct inode *inode, 196static void btrfs_requeue_inode_defrag(struct inode *inode,
197 struct inode_defrag *defrag) 197 struct inode_defrag *defrag)
198{ 198{
199 struct btrfs_root *root = BTRFS_I(inode)->root; 199 struct btrfs_root *root = BTRFS_I(inode)->root;
200 int ret; 200 int ret;
@@ -474,7 +474,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
474/* 474/*
475 * unlocks pages after btrfs_file_write is done with them 475 * unlocks pages after btrfs_file_write is done with them
476 */ 476 */
477void btrfs_drop_pages(struct page **pages, size_t num_pages) 477static void btrfs_drop_pages(struct page **pages, size_t num_pages)
478{ 478{
479 size_t i; 479 size_t i;
480 for (i = 0; i < num_pages; i++) { 480 for (i = 0; i < num_pages; i++) {
@@ -498,9 +498,9 @@ void btrfs_drop_pages(struct page **pages, size_t num_pages)
498 * doing real data extents, marking pages dirty and delalloc as required. 498 * doing real data extents, marking pages dirty and delalloc as required.
499 */ 499 */
500int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, 500int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
501 struct page **pages, size_t num_pages, 501 struct page **pages, size_t num_pages,
502 loff_t pos, size_t write_bytes, 502 loff_t pos, size_t write_bytes,
503 struct extent_state **cached) 503 struct extent_state **cached)
504{ 504{
505 int err = 0; 505 int err = 0;
506 int i; 506 int i;
@@ -553,6 +553,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
553 int testend = 1; 553 int testend = 1;
554 unsigned long flags; 554 unsigned long flags;
555 int compressed = 0; 555 int compressed = 0;
556 bool modified;
556 557
557 WARN_ON(end < start); 558 WARN_ON(end < start);
558 if (end == (u64)-1) { 559 if (end == (u64)-1) {
@@ -562,6 +563,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
562 while (1) { 563 while (1) {
563 int no_splits = 0; 564 int no_splits = 0;
564 565
566 modified = false;
565 if (!split) 567 if (!split)
566 split = alloc_extent_map(); 568 split = alloc_extent_map();
567 if (!split2) 569 if (!split2)
@@ -593,6 +595,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
593 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 595 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
594 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 596 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
595 clear_bit(EXTENT_FLAG_LOGGING, &flags); 597 clear_bit(EXTENT_FLAG_LOGGING, &flags);
598 modified = !list_empty(&em->list);
596 remove_extent_mapping(em_tree, em); 599 remove_extent_mapping(em_tree, em);
597 if (no_splits) 600 if (no_splits)
598 goto next; 601 goto next;
@@ -608,15 +611,15 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
608 split->block_len = em->block_len; 611 split->block_len = em->block_len;
609 else 612 else
610 split->block_len = split->len; 613 split->block_len = split->len;
614 split->ram_bytes = em->ram_bytes;
611 split->orig_block_len = max(split->block_len, 615 split->orig_block_len = max(split->block_len,
612 em->orig_block_len); 616 em->orig_block_len);
613 split->generation = gen; 617 split->generation = gen;
614 split->bdev = em->bdev; 618 split->bdev = em->bdev;
615 split->flags = flags; 619 split->flags = flags;
616 split->compress_type = em->compress_type; 620 split->compress_type = em->compress_type;
617 ret = add_extent_mapping(em_tree, split); 621 ret = add_extent_mapping(em_tree, split, modified);
618 BUG_ON(ret); /* Logic error */ 622 BUG_ON(ret); /* Logic error */
619 list_move(&split->list, &em_tree->modified_extents);
620 free_extent_map(split); 623 free_extent_map(split);
621 split = split2; 624 split = split2;
622 split2 = NULL; 625 split2 = NULL;
@@ -633,6 +636,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
633 split->generation = gen; 636 split->generation = gen;
634 split->orig_block_len = max(em->block_len, 637 split->orig_block_len = max(em->block_len,
635 em->orig_block_len); 638 em->orig_block_len);
639 split->ram_bytes = em->ram_bytes;
636 640
637 if (compressed) { 641 if (compressed) {
638 split->block_len = em->block_len; 642 split->block_len = em->block_len;
@@ -644,9 +648,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
644 split->orig_start = em->orig_start; 648 split->orig_start = em->orig_start;
645 } 649 }
646 650
647 ret = add_extent_mapping(em_tree, split); 651 ret = add_extent_mapping(em_tree, split, modified);
648 BUG_ON(ret); /* Logic error */ 652 BUG_ON(ret); /* Logic error */
649 list_move(&split->list, &em_tree->modified_extents);
650 free_extent_map(split); 653 free_extent_map(split);
651 split = NULL; 654 split = NULL;
652 } 655 }
@@ -822,7 +825,7 @@ next_slot:
822 825
823 memcpy(&new_key, &key, sizeof(new_key)); 826 memcpy(&new_key, &key, sizeof(new_key));
824 new_key.offset = end; 827 new_key.offset = end;
825 btrfs_set_item_key_safe(trans, root, path, &new_key); 828 btrfs_set_item_key_safe(root, path, &new_key);
826 829
827 extent_offset += end - key.offset; 830 extent_offset += end - key.offset;
828 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 831 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@ -1038,7 +1041,7 @@ again:
1038 ino, bytenr, orig_offset, 1041 ino, bytenr, orig_offset,
1039 &other_start, &other_end)) { 1042 &other_start, &other_end)) {
1040 new_key.offset = end; 1043 new_key.offset = end;
1041 btrfs_set_item_key_safe(trans, root, path, &new_key); 1044 btrfs_set_item_key_safe(root, path, &new_key);
1042 fi = btrfs_item_ptr(leaf, path->slots[0], 1045 fi = btrfs_item_ptr(leaf, path->slots[0],
1043 struct btrfs_file_extent_item); 1046 struct btrfs_file_extent_item);
1044 btrfs_set_file_extent_generation(leaf, fi, 1047 btrfs_set_file_extent_generation(leaf, fi,
@@ -1072,7 +1075,7 @@ again:
1072 trans->transid); 1075 trans->transid);
1073 path->slots[0]++; 1076 path->slots[0]++;
1074 new_key.offset = start; 1077 new_key.offset = start;
1075 btrfs_set_item_key_safe(trans, root, path, &new_key); 1078 btrfs_set_item_key_safe(root, path, &new_key);
1076 1079
1077 fi = btrfs_item_ptr(leaf, path->slots[0], 1080 fi = btrfs_item_ptr(leaf, path->slots[0],
1078 struct btrfs_file_extent_item); 1081 struct btrfs_file_extent_item);
@@ -1883,7 +1886,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
1883 1886
1884 path->slots[0]++; 1887 path->slots[0]++;
1885 key.offset = offset; 1888 key.offset = offset;
1886 btrfs_set_item_key_safe(trans, root, path, &key); 1889 btrfs_set_item_key_safe(root, path, &key);
1887 fi = btrfs_item_ptr(leaf, path->slots[0], 1890 fi = btrfs_item_ptr(leaf, path->slots[0],
1888 struct btrfs_file_extent_item); 1891 struct btrfs_file_extent_item);
1889 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - 1892 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@ -1913,6 +1916,7 @@ out:
1913 } else { 1916 } else {
1914 hole_em->start = offset; 1917 hole_em->start = offset;
1915 hole_em->len = end - offset; 1918 hole_em->len = end - offset;
1919 hole_em->ram_bytes = hole_em->len;
1916 hole_em->orig_start = offset; 1920 hole_em->orig_start = offset;
1917 1921
1918 hole_em->block_start = EXTENT_MAP_HOLE; 1922 hole_em->block_start = EXTENT_MAP_HOLE;
@@ -1925,10 +1929,7 @@ out:
1925 do { 1929 do {
1926 btrfs_drop_extent_cache(inode, offset, end - 1, 0); 1930 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
1927 write_lock(&em_tree->lock); 1931 write_lock(&em_tree->lock);
1928 ret = add_extent_mapping(em_tree, hole_em); 1932 ret = add_extent_mapping(em_tree, hole_em, 1);
1929 if (!ret)
1930 list_move(&hole_em->list,
1931 &em_tree->modified_extents);
1932 write_unlock(&em_tree->lock); 1933 write_unlock(&em_tree->lock);
1933 } while (ret == -EEXIST); 1934 } while (ret == -EEXIST);
1934 free_extent_map(hole_em); 1935 free_extent_map(hole_em);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1f84fc09c1a8..ecca6c7375a6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -104,7 +104,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
104 104
105 spin_lock(&block_group->lock); 105 spin_lock(&block_group->lock);
106 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 106 if (!((BTRFS_I(inode)->flags & flags) == flags)) {
107 printk(KERN_INFO "Old style space inode found, converting.\n"); 107 btrfs_info(root->fs_info,
108 "Old style space inode found, converting.");
108 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 109 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
109 BTRFS_INODE_NODATACOW; 110 BTRFS_INODE_NODATACOW;
110 block_group->disk_cache_state = BTRFS_DC_CLEAR; 111 block_group->disk_cache_state = BTRFS_DC_CLEAR;
@@ -119,9 +120,10 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
119 return inode; 120 return inode;
120} 121}
121 122
122int __create_free_space_inode(struct btrfs_root *root, 123static int __create_free_space_inode(struct btrfs_root *root,
123 struct btrfs_trans_handle *trans, 124 struct btrfs_trans_handle *trans,
124 struct btrfs_path *path, u64 ino, u64 offset) 125 struct btrfs_path *path,
126 u64 ino, u64 offset)
125{ 127{
126 struct btrfs_key key; 128 struct btrfs_key key;
127 struct btrfs_disk_key disk_key; 129 struct btrfs_disk_key disk_key;
@@ -431,7 +433,7 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
431 if (index == 0) 433 if (index == 0)
432 offset = sizeof(u32) * io_ctl->num_pages; 434 offset = sizeof(u32) * io_ctl->num_pages;
433 435
434 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 436 crc = btrfs_csum_data(io_ctl->orig + offset, crc,
435 PAGE_CACHE_SIZE - offset); 437 PAGE_CACHE_SIZE - offset);
436 btrfs_csum_final(crc, (char *)&crc); 438 btrfs_csum_final(crc, (char *)&crc);
437 io_ctl_unmap_page(io_ctl); 439 io_ctl_unmap_page(io_ctl);
@@ -461,7 +463,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
461 kunmap(io_ctl->pages[0]); 463 kunmap(io_ctl->pages[0]);
462 464
463 io_ctl_map_page(io_ctl, 0); 465 io_ctl_map_page(io_ctl, 0);
464 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 466 crc = btrfs_csum_data(io_ctl->orig + offset, crc,
465 PAGE_CACHE_SIZE - offset); 467 PAGE_CACHE_SIZE - offset);
466 btrfs_csum_final(crc, (char *)&crc); 468 btrfs_csum_final(crc, (char *)&crc);
467 if (val != crc) { 469 if (val != crc) {
@@ -624,9 +626,9 @@ next:
624 spin_unlock(&ctl->tree_lock); 626 spin_unlock(&ctl->tree_lock);
625} 627}
626 628
627int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 629static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
628 struct btrfs_free_space_ctl *ctl, 630 struct btrfs_free_space_ctl *ctl,
629 struct btrfs_path *path, u64 offset) 631 struct btrfs_path *path, u64 offset)
630{ 632{
631 struct btrfs_free_space_header *header; 633 struct btrfs_free_space_header *header;
632 struct extent_buffer *leaf; 634 struct extent_buffer *leaf;
@@ -669,10 +671,11 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
669 btrfs_release_path(path); 671 btrfs_release_path(path);
670 672
671 if (BTRFS_I(inode)->generation != generation) { 673 if (BTRFS_I(inode)->generation != generation) {
672 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 674 btrfs_err(root->fs_info,
673 " not match free space cache generation (%llu)\n", 675 "free space inode generation (%llu) "
674 (unsigned long long)BTRFS_I(inode)->generation, 676 "did not match free space cache generation (%llu)",
675 (unsigned long long)generation); 677 (unsigned long long)BTRFS_I(inode)->generation,
678 (unsigned long long)generation);
676 return 0; 679 return 0;
677 } 680 }
678 681
@@ -721,8 +724,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
721 ret = link_free_space(ctl, e); 724 ret = link_free_space(ctl, e);
722 spin_unlock(&ctl->tree_lock); 725 spin_unlock(&ctl->tree_lock);
723 if (ret) { 726 if (ret) {
724 printk(KERN_ERR "Duplicate entries in " 727 btrfs_err(root->fs_info,
725 "free space cache, dumping\n"); 728 "Duplicate entries in free space cache, dumping");
726 kmem_cache_free(btrfs_free_space_cachep, e); 729 kmem_cache_free(btrfs_free_space_cachep, e);
727 goto free_cache; 730 goto free_cache;
728 } 731 }
@@ -741,8 +744,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
741 ctl->op->recalc_thresholds(ctl); 744 ctl->op->recalc_thresholds(ctl);
742 spin_unlock(&ctl->tree_lock); 745 spin_unlock(&ctl->tree_lock);
743 if (ret) { 746 if (ret) {
744 printk(KERN_ERR "Duplicate entries in " 747 btrfs_err(root->fs_info,
745 "free space cache, dumping\n"); 748 "Duplicate entries in free space cache, dumping");
746 kmem_cache_free(btrfs_free_space_cachep, e); 749 kmem_cache_free(btrfs_free_space_cachep, e);
747 goto free_cache; 750 goto free_cache;
748 } 751 }
@@ -833,8 +836,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
833 836
834 if (!matched) { 837 if (!matched) {
835 __btrfs_remove_free_space_cache(ctl); 838 __btrfs_remove_free_space_cache(ctl);
836 printk(KERN_ERR "block group %llu has an wrong amount of free " 839 btrfs_err(fs_info, "block group %llu has wrong amount of free space",
837 "space\n", block_group->key.objectid); 840 block_group->key.objectid);
838 ret = -1; 841 ret = -1;
839 } 842 }
840out: 843out:
@@ -845,8 +848,8 @@ out:
845 spin_unlock(&block_group->lock); 848 spin_unlock(&block_group->lock);
846 ret = 0; 849 ret = 0;
847 850
848 printk(KERN_ERR "btrfs: failed to load free space cache " 851 btrfs_err(fs_info, "failed to load free space cache for block group %llu",
849 "for block group %llu\n", block_group->key.objectid); 852 block_group->key.objectid);
850 } 853 }
851 854
852 iput(inode); 855 iput(inode);
@@ -866,11 +869,11 @@ out:
866 * on mount. This will return 0 if it was successfull in writing the cache out, 869 * on mount. This will return 0 if it was successfull in writing the cache out,
867 * and -1 if it was not. 870 * and -1 if it was not.
868 */ 871 */
869int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 872static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
870 struct btrfs_free_space_ctl *ctl, 873 struct btrfs_free_space_ctl *ctl,
871 struct btrfs_block_group_cache *block_group, 874 struct btrfs_block_group_cache *block_group,
872 struct btrfs_trans_handle *trans, 875 struct btrfs_trans_handle *trans,
873 struct btrfs_path *path, u64 offset) 876 struct btrfs_path *path, u64 offset)
874{ 877{
875 struct btrfs_free_space_header *header; 878 struct btrfs_free_space_header *header;
876 struct extent_buffer *leaf; 879 struct extent_buffer *leaf;
@@ -1104,8 +1107,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1104 spin_unlock(&block_group->lock); 1107 spin_unlock(&block_group->lock);
1105 ret = 0; 1108 ret = 0;
1106#ifdef DEBUG 1109#ifdef DEBUG
1107 printk(KERN_ERR "btrfs: failed to write free space cache " 1110 btrfs_err(root->fs_info,
1108 "for block group %llu\n", block_group->key.objectid); 1111 "failed to write free space cache for block group %llu",
1112 block_group->key.objectid);
1109#endif 1113#endif
1110 } 1114 }
1111 1115
@@ -1564,7 +1568,8 @@ again:
1564 search_bytes = ctl->unit; 1568 search_bytes = ctl->unit;
1565 search_bytes = min(search_bytes, end - search_start + 1); 1569 search_bytes = min(search_bytes, end - search_start + 1);
1566 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1570 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1567 BUG_ON(ret < 0 || search_start != *offset); 1571 if (ret < 0 || search_start != *offset)
1572 return -EINVAL;
1568 1573
1569 /* We may have found more bits than what we need */ 1574 /* We may have found more bits than what we need */
1570 search_bytes = min(search_bytes, *bytes); 1575 search_bytes = min(search_bytes, *bytes);
@@ -1970,7 +1975,6 @@ again:
1970 re_search = true; 1975 re_search = true;
1971 goto again; 1976 goto again;
1972 } 1977 }
1973 BUG_ON(ret); /* logic error */
1974out_lock: 1978out_lock:
1975 spin_unlock(&ctl->tree_lock); 1979 spin_unlock(&ctl->tree_lock);
1976out: 1980out:
@@ -2064,7 +2068,8 @@ out:
2064 return 0; 2068 return 0;
2065} 2069}
2066 2070
2067void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) 2071static void __btrfs_remove_free_space_cache_locked(
2072 struct btrfs_free_space_ctl *ctl)
2068{ 2073{
2069 struct btrfs_free_space *info; 2074 struct btrfs_free_space *info;
2070 struct rb_node *node; 2075 struct rb_node *node;
@@ -2931,8 +2936,9 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2931 ret = __load_free_space_cache(root, inode, ctl, path, 0); 2936 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2932 2937
2933 if (ret < 0) 2938 if (ret < 0)
2934 printk(KERN_ERR "btrfs: failed to load free ino cache for " 2939 btrfs_err(fs_info,
2935 "root %llu\n", root->root_key.objectid); 2940 "failed to load free ino cache for root %llu",
2941 root->root_key.objectid);
2936out_put: 2942out_put:
2937 iput(inode); 2943 iput(inode);
2938out: 2944out:
@@ -2959,11 +2965,531 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
2959 if (ret) { 2965 if (ret) {
2960 btrfs_delalloc_release_metadata(inode, inode->i_size); 2966 btrfs_delalloc_release_metadata(inode, inode->i_size);
2961#ifdef DEBUG 2967#ifdef DEBUG
2962 printk(KERN_ERR "btrfs: failed to write free ino cache " 2968 btrfs_err(root->fs_info,
2963 "for root %llu\n", root->root_key.objectid); 2969 "failed to write free ino cache for root %llu",
2970 root->root_key.objectid);
2964#endif 2971#endif
2965 } 2972 }
2966 2973
2967 iput(inode); 2974 iput(inode);
2968 return ret; 2975 return ret;
2969} 2976}
2977
2978#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
2979static struct btrfs_block_group_cache *init_test_block_group(void)
2980{
2981 struct btrfs_block_group_cache *cache;
2982
2983 cache = kzalloc(sizeof(*cache), GFP_NOFS);
2984 if (!cache)
2985 return NULL;
2986 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
2987 GFP_NOFS);
2988 if (!cache->free_space_ctl) {
2989 kfree(cache);
2990 return NULL;
2991 }
2992
2993 cache->key.objectid = 0;
2994 cache->key.offset = 1024 * 1024 * 1024;
2995 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
2996 cache->sectorsize = 4096;
2997
2998 spin_lock_init(&cache->lock);
2999 INIT_LIST_HEAD(&cache->list);
3000 INIT_LIST_HEAD(&cache->cluster_list);
3001 INIT_LIST_HEAD(&cache->new_bg_list);
3002
3003 btrfs_init_free_space_ctl(cache);
3004
3005 return cache;
3006}
3007
3008/*
3009 * Checks to see if the given range is in the free space cache. This is really
3010 * just used to check the absence of space, so if there is free space in the
3011 * range at all we will return 1.
3012 */
3013static int check_exists(struct btrfs_block_group_cache *cache, u64 offset,
3014 u64 bytes)
3015{
3016 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3017 struct btrfs_free_space *info;
3018 int ret = 0;
3019
3020 spin_lock(&ctl->tree_lock);
3021 info = tree_search_offset(ctl, offset, 0, 0);
3022 if (!info) {
3023 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3024 1, 0);
3025 if (!info)
3026 goto out;
3027 }
3028
3029have_info:
3030 if (info->bitmap) {
3031 u64 bit_off, bit_bytes;
3032 struct rb_node *n;
3033 struct btrfs_free_space *tmp;
3034
3035 bit_off = offset;
3036 bit_bytes = ctl->unit;
3037 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes);
3038 if (!ret) {
3039 if (bit_off == offset) {
3040 ret = 1;
3041 goto out;
3042 } else if (bit_off > offset &&
3043 offset + bytes > bit_off) {
3044 ret = 1;
3045 goto out;
3046 }
3047 }
3048
3049 n = rb_prev(&info->offset_index);
3050 while (n) {
3051 tmp = rb_entry(n, struct btrfs_free_space,
3052 offset_index);
3053 if (tmp->offset + tmp->bytes < offset)
3054 break;
3055 if (offset + bytes < tmp->offset) {
3056 n = rb_prev(&info->offset_index);
3057 continue;
3058 }
3059 info = tmp;
3060 goto have_info;
3061 }
3062
3063 n = rb_next(&info->offset_index);
3064 while (n) {
3065 tmp = rb_entry(n, struct btrfs_free_space,
3066 offset_index);
3067 if (offset + bytes < tmp->offset)
3068 break;
3069 if (tmp->offset + tmp->bytes < offset) {
3070 n = rb_next(&info->offset_index);
3071 continue;
3072 }
3073 info = tmp;
3074 goto have_info;
3075 }
3076
3077 goto out;
3078 }
3079
3080 if (info->offset == offset) {
3081 ret = 1;
3082 goto out;
3083 }
3084
3085 if (offset > info->offset && offset < info->offset + info->bytes)
3086 ret = 1;
3087out:
3088 spin_unlock(&ctl->tree_lock);
3089 return ret;
3090}
3091
3092/*
3093 * Use this if you need to make a bitmap or extent entry specifically, it
3094 * doesn't do any of the merging that add_free_space does, this acts a lot like
3095 * how the free space cache loading stuff works, so you can get really weird
3096 * configurations.
3097 */
3098static int add_free_space_entry(struct btrfs_block_group_cache *cache,
3099 u64 offset, u64 bytes, bool bitmap)
3100{
3101 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3102 struct btrfs_free_space *info = NULL, *bitmap_info;
3103 void *map = NULL;
3104 u64 bytes_added;
3105 int ret;
3106
3107again:
3108 if (!info) {
3109 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
3110 if (!info)
3111 return -ENOMEM;
3112 }
3113
3114 if (!bitmap) {
3115 spin_lock(&ctl->tree_lock);
3116 info->offset = offset;
3117 info->bytes = bytes;
3118 ret = link_free_space(ctl, info);
3119 spin_unlock(&ctl->tree_lock);
3120 if (ret)
3121 kmem_cache_free(btrfs_free_space_cachep, info);
3122 return ret;
3123 }
3124
3125 if (!map) {
3126 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
3127 if (!map) {
3128 kmem_cache_free(btrfs_free_space_cachep, info);
3129 return -ENOMEM;
3130 }
3131 }
3132
3133 spin_lock(&ctl->tree_lock);
3134 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3135 1, 0);
3136 if (!bitmap_info) {
3137 info->bitmap = map;
3138 map = NULL;
3139 add_new_bitmap(ctl, info, offset);
3140 bitmap_info = info;
3141 }
3142
3143 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
3144 bytes -= bytes_added;
3145 offset += bytes_added;
3146 spin_unlock(&ctl->tree_lock);
3147
3148 if (bytes)
3149 goto again;
3150
3151 if (map)
3152 kfree(map);
3153 return 0;
3154}
3155
3156/*
3157 * This test just does basic sanity checking, making sure we can add an exten
3158 * entry and remove space from either end and the middle, and make sure we can
3159 * remove space that covers adjacent extent entries.
3160 */
3161static int test_extents(struct btrfs_block_group_cache *cache)
3162{
3163 int ret = 0;
3164
3165 printk(KERN_ERR "Running extent only tests\n");
3166
3167 /* First just make sure we can remove an entire entry */
3168 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
3169 if (ret) {
3170 printk(KERN_ERR "Error adding initial extents %d\n", ret);
3171 return ret;
3172 }
3173
3174 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
3175 if (ret) {
3176 printk(KERN_ERR "Error removing extent %d\n", ret);
3177 return ret;
3178 }
3179
3180 if (check_exists(cache, 0, 4 * 1024 * 1024)) {
3181 printk(KERN_ERR "Full remove left some lingering space\n");
3182 return -1;
3183 }
3184
3185 /* Ok edge and middle cases now */
3186 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
3187 if (ret) {
3188 printk(KERN_ERR "Error adding half extent %d\n", ret);
3189 return ret;
3190 }
3191
3192 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
3193 if (ret) {
3194 printk(KERN_ERR "Error removing tail end %d\n", ret);
3195 return ret;
3196 }
3197
3198 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
3199 if (ret) {
3200 printk(KERN_ERR "Error removing front end %d\n", ret);
3201 return ret;
3202 }
3203
3204 ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
3205 if (ret) {
3206 printk(KERN_ERR "Error removing middle peice %d\n", ret);
3207 return ret;
3208 }
3209
3210 if (check_exists(cache, 0, 1 * 1024 * 1024)) {
3211 printk(KERN_ERR "Still have space at the front\n");
3212 return -1;
3213 }
3214
3215 if (check_exists(cache, 2 * 1024 * 1024, 4096)) {
3216 printk(KERN_ERR "Still have space in the middle\n");
3217 return -1;
3218 }
3219
3220 if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
3221 printk(KERN_ERR "Still have space at the end\n");
3222 return -1;
3223 }
3224
3225 /* Cleanup */
3226 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3227
3228 return 0;
3229}
3230
3231static int test_bitmaps(struct btrfs_block_group_cache *cache)
3232{
3233 u64 next_bitmap_offset;
3234 int ret;
3235
3236 printk(KERN_ERR "Running bitmap only tests\n");
3237
3238 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
3239 if (ret) {
3240 printk(KERN_ERR "Couldn't create a bitmap entry %d\n", ret);
3241 return ret;
3242 }
3243
3244 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
3245 if (ret) {
3246 printk(KERN_ERR "Error removing bitmap full range %d\n", ret);
3247 return ret;
3248 }
3249
3250 if (check_exists(cache, 0, 4 * 1024 * 1024)) {
3251 printk(KERN_ERR "Left some space in bitmap\n");
3252 return -1;
3253 }
3254
3255 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
3256 if (ret) {
3257 printk(KERN_ERR "Couldn't add to our bitmap entry %d\n", ret);
3258 return ret;
3259 }
3260
3261 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
3262 if (ret) {
3263 printk(KERN_ERR "Couldn't remove middle chunk %d\n", ret);
3264 return ret;
3265 }
3266
3267 /*
3268 * The first bitmap we have starts at offset 0 so the next one is just
3269 * at the end of the first bitmap.
3270 */
3271 next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
3272
3273 /* Test a bit straddling two bitmaps */
3274 ret = add_free_space_entry(cache, next_bitmap_offset -
3275 (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
3276 if (ret) {
3277 printk(KERN_ERR "Couldn't add space that straddles two bitmaps"
3278 " %d\n", ret);
3279 return ret;
3280 }
3281
3282 ret = btrfs_remove_free_space(cache, next_bitmap_offset -
3283 (1 * 1024 * 1024), 2 * 1024 * 1024);
3284 if (ret) {
3285 printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
3286 return ret;
3287 }
3288
3289 if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
3290 2 * 1024 * 1024)) {
3291 printk(KERN_ERR "Left some space when removing overlapping\n");
3292 return -1;
3293 }
3294
3295 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3296
3297 return 0;
3298}
3299
3300/* This is the high grade jackassery */
3301static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
3302{
3303 u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
3304 int ret;
3305
3306 printk(KERN_ERR "Running bitmap and extent tests\n");
3307
3308 /*
3309 * First let's do something simple, an extent at the same offset as the
3310 * bitmap, but the free space completely in the extent and then
3311 * completely in the bitmap.
3312 */
3313 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
3314 if (ret) {
3315 printk(KERN_ERR "Couldn't create bitmap entry %d\n", ret);
3316 return ret;
3317 }
3318
3319 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
3320 if (ret) {
3321 printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
3322 return ret;
3323 }
3324
3325 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
3326 if (ret) {
3327 printk(KERN_ERR "Couldn't remove extent entry %d\n", ret);
3328 return ret;
3329 }
3330
3331 if (check_exists(cache, 0, 1 * 1024 * 1024)) {
3332 printk(KERN_ERR "Left remnants after our remove\n");
3333 return -1;
3334 }
3335
3336 /* Now to add back the extent entry and remove from the bitmap */
3337 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
3338 if (ret) {
3339 printk(KERN_ERR "Couldn't re-add extent entry %d\n", ret);
3340 return ret;
3341 }
3342
3343 ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
3344 if (ret) {
3345 printk(KERN_ERR "Couldn't remove from bitmap %d\n", ret);
3346 return ret;
3347 }
3348
3349 if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
3350 printk(KERN_ERR "Left remnants in the bitmap\n");
3351 return -1;
3352 }
3353
3354 /*
3355 * Ok so a little more evil, extent entry and bitmap at the same offset,
3356 * removing an overlapping chunk.
3357 */
3358 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
3359 if (ret) {
3360 printk(KERN_ERR "Couldn't add to a bitmap %d\n", ret);
3361 return ret;
3362 }
3363
3364 ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
3365 if (ret) {
3366 printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
3367 return ret;
3368 }
3369
3370 if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
3371 printk(KERN_ERR "Left over peices after removing "
3372 "overlapping\n");
3373 return -1;
3374 }
3375
3376 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3377
3378 /* Now with the extent entry offset into the bitmap */
3379 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
3380 if (ret) {
3381 printk(KERN_ERR "Couldn't add space to the bitmap %d\n", ret);
3382 return ret;
3383 }
3384
3385 ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
3386 if (ret) {
3387 printk(KERN_ERR "Couldn't add extent to the cache %d\n", ret);
3388 return ret;
3389 }
3390
3391 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
3392 if (ret) {
3393 printk(KERN_ERR "Problem removing overlapping space %d\n", ret);
3394 return ret;
3395 }
3396
3397 if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
3398 printk(KERN_ERR "Left something behind when removing space");
3399 return -1;
3400 }
3401
3402 /*
3403 * This has blown up in the past, the extent entry starts before the
3404 * bitmap entry, but we're trying to remove an offset that falls
3405 * completely within the bitmap range and is in both the extent entry
3406 * and the bitmap entry, looks like this
3407 *
3408 * [ extent ]
3409 * [ bitmap ]
3410 * [ del ]
3411 */
3412 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3413 ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
3414 4 * 1024 * 1024, 1);
3415 if (ret) {
3416 printk(KERN_ERR "Couldn't add bitmap %d\n", ret);
3417 return ret;
3418 }
3419
3420 ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
3421 5 * 1024 * 1024, 0);
3422 if (ret) {
3423 printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
3424 return ret;
3425 }
3426
3427 ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
3428 5 * 1024 * 1024);
3429 if (ret) {
3430 printk(KERN_ERR "Failed to free our space %d\n", ret);
3431 return ret;
3432 }
3433
3434 if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
3435 5 * 1024 * 1024)) {
3436 printk(KERN_ERR "Left stuff over\n");
3437 return -1;
3438 }
3439
3440 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3441
3442 /*
3443 * This blew up before, we have part of the free space in a bitmap and
3444 * then the entirety of the rest of the space in an extent. This used
3445 * to return -EAGAIN back from btrfs_remove_extent, make sure this
3446 * doesn't happen.
3447 */
3448 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
3449 if (ret) {
3450 printk(KERN_ERR "Couldn't add bitmap entry %d\n", ret);
3451 return ret;
3452 }
3453
3454 ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
3455 if (ret) {
3456 printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
3457 return ret;
3458 }
3459
3460 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
3461 if (ret) {
3462 printk(KERN_ERR "Error removing bitmap and extent "
3463 "overlapping %d\n", ret);
3464 return ret;
3465 }
3466
3467 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3468 return 0;
3469}
3470
3471void btrfs_test_free_space_cache(void)
3472{
3473 struct btrfs_block_group_cache *cache;
3474
3475 printk(KERN_ERR "Running btrfs free space cache tests\n");
3476
3477 cache = init_test_block_group();
3478 if (!cache) {
3479 printk(KERN_ERR "Couldn't run the tests\n");
3480 return;
3481 }
3482
3483 if (test_extents(cache))
3484 goto out;
3485 if (test_bitmaps(cache))
3486 goto out;
3487 if (test_bitmaps_and_extents(cache))
3488 goto out;
3489out:
3490 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3491 kfree(cache->free_space_ctl);
3492 kfree(cache);
3493 printk(KERN_ERR "Free space cache tests finished\n");
3494}
3495#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 8f2613f779ed..4dc17d8809c7 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -110,4 +110,9 @@ int btrfs_return_cluster_to_free_space(
110 struct btrfs_free_cluster *cluster); 110 struct btrfs_free_cluster *cluster);
111int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 111int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
112 u64 *trimmed, u64 start, u64 end, u64 minlen); 112 u64 *trimmed, u64 start, u64 end, u64 minlen);
113
114#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
115void btrfs_test_free_space_cache(void);
116#endif
117
113#endif 118#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 48b8fda93132..e0b7034d6343 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -183,10 +183,11 @@ int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
183 return -ENOENT; 183 return -ENOENT;
184} 184}
185 185
186int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, 186static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
187 struct btrfs_root *root, 187 struct btrfs_root *root,
188 const char *name, int name_len, 188 const char *name, int name_len,
189 u64 inode_objectid, u64 ref_objectid, u64 *index) 189 u64 inode_objectid, u64 ref_objectid,
190 u64 *index)
190{ 191{
191 struct btrfs_path *path; 192 struct btrfs_path *path;
192 struct btrfs_key key; 193 struct btrfs_key key;
@@ -246,7 +247,7 @@ int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
246 memmove_extent_buffer(leaf, ptr, ptr + del_len, 247 memmove_extent_buffer(leaf, ptr, ptr + del_len,
247 item_size - (ptr + del_len - item_start)); 248 item_size - (ptr + del_len - item_start));
248 249
249 btrfs_truncate_item(trans, root, path, item_size - del_len, 1); 250 btrfs_truncate_item(root, path, item_size - del_len, 1);
250 251
251out: 252out:
252 btrfs_free_path(path); 253 btrfs_free_path(path);
@@ -309,7 +310,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
309 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); 310 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
310 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 311 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
311 item_size - (ptr + sub_item_len - item_start)); 312 item_size - (ptr + sub_item_len - item_start));
312 btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1); 313 btrfs_truncate_item(root, path, item_size - sub_item_len, 1);
313out: 314out:
314 btrfs_free_path(path); 315 btrfs_free_path(path);
315 316
@@ -361,7 +362,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
361 name, name_len, NULL)) 362 name, name_len, NULL))
362 goto out; 363 goto out;
363 364
364 btrfs_extend_item(trans, root, path, ins_len); 365 btrfs_extend_item(root, path, ins_len);
365 ret = 0; 366 ret = 0;
366 } 367 }
367 if (ret < 0) 368 if (ret < 0)
@@ -417,7 +418,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
417 goto out; 418 goto out;
418 419
419 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 420 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
420 btrfs_extend_item(trans, root, path, ins_len); 421 btrfs_extend_item(root, path, ins_len);
421 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 422 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
422 struct btrfs_inode_ref); 423 struct btrfs_inode_ref);
423 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); 424 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 898da0a01e04..9b31b3b091fc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -101,7 +101,10 @@ static noinline int cow_file_range(struct inode *inode,
101static struct extent_map *create_pinned_em(struct inode *inode, u64 start, 101static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
102 u64 len, u64 orig_start, 102 u64 len, u64 orig_start,
103 u64 block_start, u64 block_len, 103 u64 block_start, u64 block_len,
104 u64 orig_block_len, int type); 104 u64 orig_block_len, u64 ram_bytes,
105 int type);
106
107static int btrfs_dirty_inode(struct inode *inode);
105 108
106static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 109static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
107 struct inode *inode, struct inode *dir, 110 struct inode *inode, struct inode *dir,
@@ -723,6 +726,7 @@ retry:
723 em->block_start = ins.objectid; 726 em->block_start = ins.objectid;
724 em->block_len = ins.offset; 727 em->block_len = ins.offset;
725 em->orig_block_len = ins.offset; 728 em->orig_block_len = ins.offset;
729 em->ram_bytes = async_extent->ram_size;
726 em->bdev = root->fs_info->fs_devices->latest_bdev; 730 em->bdev = root->fs_info->fs_devices->latest_bdev;
727 em->compress_type = async_extent->compress_type; 731 em->compress_type = async_extent->compress_type;
728 set_bit(EXTENT_FLAG_PINNED, &em->flags); 732 set_bit(EXTENT_FLAG_PINNED, &em->flags);
@@ -731,10 +735,7 @@ retry:
731 735
732 while (1) { 736 while (1) {
733 write_lock(&em_tree->lock); 737 write_lock(&em_tree->lock);
734 ret = add_extent_mapping(em_tree, em); 738 ret = add_extent_mapping(em_tree, em, 1);
735 if (!ret)
736 list_move(&em->list,
737 &em_tree->modified_extents);
738 write_unlock(&em_tree->lock); 739 write_unlock(&em_tree->lock);
739 if (ret != -EEXIST) { 740 if (ret != -EEXIST) {
740 free_extent_map(em); 741 free_extent_map(em);
@@ -922,7 +923,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
922 } 923 }
923 924
924 em = alloc_extent_map(); 925 em = alloc_extent_map();
925 BUG_ON(!em); /* -ENOMEM */ 926 if (!em)
927 goto out_reserve;
926 em->start = start; 928 em->start = start;
927 em->orig_start = em->start; 929 em->orig_start = em->start;
928 ram_size = ins.offset; 930 ram_size = ins.offset;
@@ -933,16 +935,14 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
933 em->block_start = ins.objectid; 935 em->block_start = ins.objectid;
934 em->block_len = ins.offset; 936 em->block_len = ins.offset;
935 em->orig_block_len = ins.offset; 937 em->orig_block_len = ins.offset;
938 em->ram_bytes = ram_size;
936 em->bdev = root->fs_info->fs_devices->latest_bdev; 939 em->bdev = root->fs_info->fs_devices->latest_bdev;
937 set_bit(EXTENT_FLAG_PINNED, &em->flags); 940 set_bit(EXTENT_FLAG_PINNED, &em->flags);
938 em->generation = -1; 941 em->generation = -1;
939 942
940 while (1) { 943 while (1) {
941 write_lock(&em_tree->lock); 944 write_lock(&em_tree->lock);
942 ret = add_extent_mapping(em_tree, em); 945 ret = add_extent_mapping(em_tree, em, 1);
943 if (!ret)
944 list_move(&em->list,
945 &em_tree->modified_extents);
946 write_unlock(&em_tree->lock); 946 write_unlock(&em_tree->lock);
947 if (ret != -EEXIST) { 947 if (ret != -EEXIST) {
948 free_extent_map(em); 948 free_extent_map(em);
@@ -951,11 +951,14 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
951 btrfs_drop_extent_cache(inode, start, 951 btrfs_drop_extent_cache(inode, start,
952 start + ram_size - 1, 0); 952 start + ram_size - 1, 0);
953 } 953 }
954 if (ret)
955 goto out_reserve;
954 956
955 cur_alloc_size = ins.offset; 957 cur_alloc_size = ins.offset;
956 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 958 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
957 ram_size, cur_alloc_size, 0); 959 ram_size, cur_alloc_size, 0);
958 BUG_ON(ret); /* -ENOMEM */ 960 if (ret)
961 goto out_reserve;
959 962
960 if (root->root_key.objectid == 963 if (root->root_key.objectid ==
961 BTRFS_DATA_RELOC_TREE_OBJECTID) { 964 BTRFS_DATA_RELOC_TREE_OBJECTID) {
@@ -963,7 +966,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
963 cur_alloc_size); 966 cur_alloc_size);
964 if (ret) { 967 if (ret) {
965 btrfs_abort_transaction(trans, root, ret); 968 btrfs_abort_transaction(trans, root, ret);
966 goto out_unlock; 969 goto out_reserve;
967 } 970 }
968 } 971 }
969 972
@@ -992,6 +995,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
992out: 995out:
993 return ret; 996 return ret;
994 997
998out_reserve:
999 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
995out_unlock: 1000out_unlock:
996 extent_clear_unlock_delalloc(inode, 1001 extent_clear_unlock_delalloc(inode,
997 &BTRFS_I(inode)->io_tree, 1002 &BTRFS_I(inode)->io_tree,
@@ -1195,6 +1200,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1195 u64 disk_bytenr; 1200 u64 disk_bytenr;
1196 u64 num_bytes; 1201 u64 num_bytes;
1197 u64 disk_num_bytes; 1202 u64 disk_num_bytes;
1203 u64 ram_bytes;
1198 int extent_type; 1204 int extent_type;
1199 int ret, err; 1205 int ret, err;
1200 int type; 1206 int type;
@@ -1291,6 +1297,7 @@ next_slot:
1291 struct btrfs_file_extent_item); 1297 struct btrfs_file_extent_item);
1292 extent_type = btrfs_file_extent_type(leaf, fi); 1298 extent_type = btrfs_file_extent_type(leaf, fi);
1293 1299
1300 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1294 if (extent_type == BTRFS_FILE_EXTENT_REG || 1301 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1295 extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 1302 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1296 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1303 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
@@ -1374,6 +1381,7 @@ out_check:
1374 em->block_len = num_bytes; 1381 em->block_len = num_bytes;
1375 em->block_start = disk_bytenr; 1382 em->block_start = disk_bytenr;
1376 em->orig_block_len = disk_num_bytes; 1383 em->orig_block_len = disk_num_bytes;
1384 em->ram_bytes = ram_bytes;
1377 em->bdev = root->fs_info->fs_devices->latest_bdev; 1385 em->bdev = root->fs_info->fs_devices->latest_bdev;
1378 em->mod_start = em->start; 1386 em->mod_start = em->start;
1379 em->mod_len = em->len; 1387 em->mod_len = em->len;
@@ -1382,10 +1390,7 @@ out_check:
1382 em->generation = -1; 1390 em->generation = -1;
1383 while (1) { 1391 while (1) {
1384 write_lock(&em_tree->lock); 1392 write_lock(&em_tree->lock);
1385 ret = add_extent_mapping(em_tree, em); 1393 ret = add_extent_mapping(em_tree, em, 1);
1386 if (!ret)
1387 list_move(&em->list,
1388 &em_tree->modified_extents);
1389 write_unlock(&em_tree->lock); 1394 write_unlock(&em_tree->lock);
1390 if (ret != -EEXIST) { 1395 if (ret != -EEXIST) {
1391 free_extent_map(em); 1396 free_extent_map(em);
@@ -1526,7 +1531,7 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1526 * have pending delalloc work to be done. 1531 * have pending delalloc work to be done.
1527 */ 1532 */
1528static void btrfs_set_bit_hook(struct inode *inode, 1533static void btrfs_set_bit_hook(struct inode *inode,
1529 struct extent_state *state, int *bits) 1534 struct extent_state *state, unsigned long *bits)
1530{ 1535{
1531 1536
1532 /* 1537 /*
@@ -1570,7 +1575,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
1570 * extent_io.c clear_bit_hook, see set_bit_hook for why 1575 * extent_io.c clear_bit_hook, see set_bit_hook for why
1571 */ 1576 */
1572static void btrfs_clear_bit_hook(struct inode *inode, 1577static void btrfs_clear_bit_hook(struct inode *inode,
1573 struct extent_state *state, int *bits) 1578 struct extent_state *state,
1579 unsigned long *bits)
1574{ 1580{
1575 /* 1581 /*
1576 * set_bit and clear bit hooks normally require _irqsave/restore 1582 * set_bit and clear bit hooks normally require _irqsave/restore
@@ -2794,6 +2800,8 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2794 int ret; 2800 int ret;
2795 struct btrfs_root *root = BTRFS_I(inode)->root; 2801 struct btrfs_root *root = BTRFS_I(inode)->root;
2796 u32 csum = ~(u32)0; 2802 u32 csum = ~(u32)0;
2803 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
2804 DEFAULT_RATELIMIT_BURST);
2797 2805
2798 if (PageChecked(page)) { 2806 if (PageChecked(page)) {
2799 ClearPageChecked(page); 2807 ClearPageChecked(page);
@@ -2820,7 +2828,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2820 if (ret) 2828 if (ret)
2821 goto zeroit; 2829 goto zeroit;
2822 2830
2823 csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1); 2831 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1);
2824 btrfs_csum_final(csum, (char *)&csum); 2832 btrfs_csum_final(csum, (char *)&csum);
2825 if (csum != private) 2833 if (csum != private)
2826 goto zeroit; 2834 goto zeroit;
@@ -2830,11 +2838,11 @@ good:
2830 return 0; 2838 return 0;
2831 2839
2832zeroit: 2840zeroit:
2833 printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u " 2841 if (__ratelimit(&_rs))
2834 "private %llu\n", 2842 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
2835 (unsigned long long)btrfs_ino(page->mapping->host), 2843 (unsigned long long)btrfs_ino(page->mapping->host),
2836 (unsigned long long)start, csum, 2844 (unsigned long long)start, csum,
2837 (unsigned long long)private); 2845 (unsigned long long)private);
2838 memset(kaddr + offset, 1, end - start + 1); 2846 memset(kaddr + offset, 1, end - start + 1);
2839 flush_dcache_page(page); 2847 flush_dcache_page(page);
2840 kunmap_atomic(kaddr); 2848 kunmap_atomic(kaddr);
@@ -3020,7 +3028,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
3020 * We have done the truncate/delete so we can go ahead and remove the orphan 3028 * We have done the truncate/delete so we can go ahead and remove the orphan
3021 * item for this particular inode. 3029 * item for this particular inode.
3022 */ 3030 */
3023int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) 3031static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3032 struct inode *inode)
3024{ 3033{
3025 struct btrfs_root *root = BTRFS_I(inode)->root; 3034 struct btrfs_root *root = BTRFS_I(inode)->root;
3026 int delete_item = 0; 3035 int delete_item = 0;
@@ -3115,8 +3124,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3115 */ 3124 */
3116 3125
3117 if (found_key.offset == last_objectid) { 3126 if (found_key.offset == last_objectid) {
3118 printk(KERN_ERR "btrfs: Error removing orphan entry, " 3127 btrfs_err(root->fs_info,
3119 "stopping orphan cleanup\n"); 3128 "Error removing orphan entry, stopping orphan cleanup");
3120 ret = -EINVAL; 3129 ret = -EINVAL;
3121 goto out; 3130 goto out;
3122 } 3131 }
@@ -3173,8 +3182,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3173 ret = PTR_ERR(trans); 3182 ret = PTR_ERR(trans);
3174 goto out; 3183 goto out;
3175 } 3184 }
3176 printk(KERN_ERR "auto deleting %Lu\n", 3185 btrfs_debug(root->fs_info, "auto deleting %Lu",
3177 found_key.objectid); 3186 found_key.objectid);
3178 ret = btrfs_del_orphan_item(trans, root, 3187 ret = btrfs_del_orphan_item(trans, root,
3179 found_key.objectid); 3188 found_key.objectid);
3180 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ 3189 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
@@ -3238,13 +3247,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3238 } 3247 }
3239 3248
3240 if (nr_unlink) 3249 if (nr_unlink)
3241 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); 3250 btrfs_debug(root->fs_info, "unlinked %d orphans", nr_unlink);
3242 if (nr_truncate) 3251 if (nr_truncate)
3243 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); 3252 btrfs_debug(root->fs_info, "truncated %d orphans", nr_truncate);
3244 3253
3245out: 3254out:
3246 if (ret) 3255 if (ret)
3247 printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret); 3256 btrfs_crit(root->fs_info,
3257 "could not do orphan cleanup %d", ret);
3248 btrfs_free_path(path); 3258 btrfs_free_path(path);
3249 return ret; 3259 return ret;
3250} 3260}
@@ -3592,9 +3602,10 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3592 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, 3602 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
3593 dir_ino, &index); 3603 dir_ino, &index);
3594 if (ret) { 3604 if (ret) {
3595 printk(KERN_INFO "btrfs failed to delete reference to %.*s, " 3605 btrfs_info(root->fs_info,
3596 "inode %llu parent %llu\n", name_len, name, 3606 "failed to delete reference to %.*s, inode %llu parent %llu",
3597 (unsigned long long)ino, (unsigned long long)dir_ino); 3607 name_len, name,
3608 (unsigned long long)ino, (unsigned long long)dir_ino);
3598 btrfs_abort_transaction(trans, root, ret); 3609 btrfs_abort_transaction(trans, root, ret);
3599 goto err; 3610 goto err;
3600 } 3611 }
@@ -3616,6 +3627,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3616 dir, index); 3627 dir, index);
3617 if (ret == -ENOENT) 3628 if (ret == -ENOENT)
3618 ret = 0; 3629 ret = 0;
3630 else if (ret)
3631 btrfs_abort_transaction(trans, root, ret);
3619err: 3632err:
3620 btrfs_free_path(path); 3633 btrfs_free_path(path);
3621 if (ret) 3634 if (ret)
@@ -3661,7 +3674,7 @@ static int check_path_shared(struct btrfs_root *root,
3661 eb = path->nodes[level]; 3674 eb = path->nodes[level];
3662 if (!btrfs_block_can_be_shared(root, eb)) 3675 if (!btrfs_block_can_be_shared(root, eb))
3663 continue; 3676 continue;
3664 ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, 3677 ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
3665 &refs, NULL); 3678 &refs, NULL);
3666 if (refs > 1) 3679 if (refs > 1)
3667 return 1; 3680 return 1;
@@ -4176,8 +4189,7 @@ search_again:
4176 } 4189 }
4177 size = 4190 size =
4178 btrfs_file_extent_calc_inline_size(size); 4191 btrfs_file_extent_calc_inline_size(size);
4179 btrfs_truncate_item(trans, root, path, 4192 btrfs_truncate_item(root, path, size, 1);
4180 size, 1);
4181 } else if (root->ref_cows) { 4193 } else if (root->ref_cows) {
4182 inode_sub_bytes(inode, item_end + 1 - 4194 inode_sub_bytes(inode, item_end + 1 -
4183 found_key.offset); 4195 found_key.offset);
@@ -4451,16 +4463,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4451 hole_em->block_start = EXTENT_MAP_HOLE; 4463 hole_em->block_start = EXTENT_MAP_HOLE;
4452 hole_em->block_len = 0; 4464 hole_em->block_len = 0;
4453 hole_em->orig_block_len = 0; 4465 hole_em->orig_block_len = 0;
4466 hole_em->ram_bytes = hole_size;
4454 hole_em->bdev = root->fs_info->fs_devices->latest_bdev; 4467 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
4455 hole_em->compress_type = BTRFS_COMPRESS_NONE; 4468 hole_em->compress_type = BTRFS_COMPRESS_NONE;
4456 hole_em->generation = trans->transid; 4469 hole_em->generation = trans->transid;
4457 4470
4458 while (1) { 4471 while (1) {
4459 write_lock(&em_tree->lock); 4472 write_lock(&em_tree->lock);
4460 err = add_extent_mapping(em_tree, hole_em); 4473 err = add_extent_mapping(em_tree, hole_em, 1);
4461 if (!err)
4462 list_move(&hole_em->list,
4463 &em_tree->modified_extents);
4464 write_unlock(&em_tree->lock); 4474 write_unlock(&em_tree->lock);
4465 if (err != -EEXIST) 4475 if (err != -EEXIST)
4466 break; 4476 break;
@@ -4671,8 +4681,9 @@ void btrfs_evict_inode(struct inode *inode)
4671 ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); 4681 ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
4672 4682
4673 if (ret) { 4683 if (ret) {
4674 printk(KERN_WARNING "Could not get space for a " 4684 btrfs_warn(root->fs_info,
4675 "delete, will truncate on mount %d\n", ret); 4685 "Could not get space for a delete, will truncate on mount %d",
4686 ret);
4676 btrfs_orphan_del(NULL, inode); 4687 btrfs_orphan_del(NULL, inode);
4677 btrfs_free_block_rsv(root, rsv); 4688 btrfs_free_block_rsv(root, rsv);
4678 goto no_delete; 4689 goto no_delete;
@@ -5336,7 +5347,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
5336 * FIXME, needs more benchmarking...there are no reasons other than performance 5347 * FIXME, needs more benchmarking...there are no reasons other than performance
5337 * to keep or drop this code. 5348 * to keep or drop this code.
5338 */ 5349 */
5339int btrfs_dirty_inode(struct inode *inode) 5350static int btrfs_dirty_inode(struct inode *inode)
5340{ 5351{
5341 struct btrfs_root *root = BTRFS_I(inode)->root; 5352 struct btrfs_root *root = BTRFS_I(inode)->root;
5342 struct btrfs_trans_handle *trans; 5353 struct btrfs_trans_handle *trans;
@@ -5978,7 +5989,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree,
5978 em->block_start += start_diff; 5989 em->block_start += start_diff;
5979 em->block_len -= start_diff; 5990 em->block_len -= start_diff;
5980 } 5991 }
5981 return add_extent_mapping(em_tree, em); 5992 return add_extent_mapping(em_tree, em, 0);
5982} 5993}
5983 5994
5984static noinline int uncompress_inline(struct btrfs_path *path, 5995static noinline int uncompress_inline(struct btrfs_path *path,
@@ -6152,6 +6163,7 @@ again:
6152 goto not_found_em; 6163 goto not_found_em;
6153 } 6164 }
6154 6165
6166 em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
6155 if (found_type == BTRFS_FILE_EXTENT_REG || 6167 if (found_type == BTRFS_FILE_EXTENT_REG ||
6156 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 6168 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
6157 em->start = extent_start; 6169 em->start = extent_start;
@@ -6260,18 +6272,18 @@ not_found_em:
6260insert: 6272insert:
6261 btrfs_release_path(path); 6273 btrfs_release_path(path);
6262 if (em->start > start || extent_map_end(em) <= start) { 6274 if (em->start > start || extent_map_end(em) <= start) {
6263 printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " 6275 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
6264 "[%llu %llu]\n", (unsigned long long)em->start, 6276 (unsigned long long)em->start,
6265 (unsigned long long)em->len, 6277 (unsigned long long)em->len,
6266 (unsigned long long)start, 6278 (unsigned long long)start,
6267 (unsigned long long)len); 6279 (unsigned long long)len);
6268 err = -EIO; 6280 err = -EIO;
6269 goto out; 6281 goto out;
6270 } 6282 }
6271 6283
6272 err = 0; 6284 err = 0;
6273 write_lock(&em_tree->lock); 6285 write_lock(&em_tree->lock);
6274 ret = add_extent_mapping(em_tree, em); 6286 ret = add_extent_mapping(em_tree, em, 0);
6275 /* it is possible that someone inserted the extent into the tree 6287 /* it is possible that someone inserted the extent into the tree
6276 * while we had the lock dropped. It is also possible that 6288 * while we had the lock dropped. It is also possible that
6277 * an overlapping map exists in the tree 6289 * an overlapping map exists in the tree
@@ -6483,7 +6495,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
6483 } 6495 }
6484 6496
6485 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 6497 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
6486 ins.offset, ins.offset, 0); 6498 ins.offset, ins.offset, ins.offset, 0);
6487 if (IS_ERR(em)) 6499 if (IS_ERR(em))
6488 goto out; 6500 goto out;
6489 6501
@@ -6503,7 +6515,9 @@ out:
6503 * block must be cow'd 6515 * block must be cow'd
6504 */ 6516 */
6505static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, 6517static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6506 struct inode *inode, u64 offset, u64 len) 6518 struct inode *inode, u64 offset, u64 *len,
6519 u64 *orig_start, u64 *orig_block_len,
6520 u64 *ram_bytes)
6507{ 6521{
6508 struct btrfs_path *path; 6522 struct btrfs_path *path;
6509 int ret; 6523 int ret;
@@ -6560,8 +6574,12 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6560 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6574 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6561 backref_offset = btrfs_file_extent_offset(leaf, fi); 6575 backref_offset = btrfs_file_extent_offset(leaf, fi);
6562 6576
6577 *orig_start = key.offset - backref_offset;
6578 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
6579 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6580
6563 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 6581 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6564 if (extent_end < offset + len) { 6582 if (extent_end < offset + *len) {
6565 /* extent doesn't include our full range, must cow */ 6583 /* extent doesn't include our full range, must cow */
6566 goto out; 6584 goto out;
6567 } 6585 }
@@ -6585,13 +6603,14 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6585 */ 6603 */
6586 disk_bytenr += backref_offset; 6604 disk_bytenr += backref_offset;
6587 disk_bytenr += offset - key.offset; 6605 disk_bytenr += offset - key.offset;
6588 num_bytes = min(offset + len, extent_end) - offset; 6606 num_bytes = min(offset + *len, extent_end) - offset;
6589 if (csum_exist_in_range(root, disk_bytenr, num_bytes)) 6607 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
6590 goto out; 6608 goto out;
6591 /* 6609 /*
6592 * all of the above have passed, it is safe to overwrite this extent 6610 * all of the above have passed, it is safe to overwrite this extent
6593 * without cow 6611 * without cow
6594 */ 6612 */
6613 *len = num_bytes;
6595 ret = 1; 6614 ret = 1;
6596out: 6615out:
6597 btrfs_free_path(path); 6616 btrfs_free_path(path);
@@ -6662,7 +6681,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
6662static struct extent_map *create_pinned_em(struct inode *inode, u64 start, 6681static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
6663 u64 len, u64 orig_start, 6682 u64 len, u64 orig_start,
6664 u64 block_start, u64 block_len, 6683 u64 block_start, u64 block_len,
6665 u64 orig_block_len, int type) 6684 u64 orig_block_len, u64 ram_bytes,
6685 int type)
6666{ 6686{
6667 struct extent_map_tree *em_tree; 6687 struct extent_map_tree *em_tree;
6668 struct extent_map *em; 6688 struct extent_map *em;
@@ -6683,6 +6703,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
6683 em->block_start = block_start; 6703 em->block_start = block_start;
6684 em->bdev = root->fs_info->fs_devices->latest_bdev; 6704 em->bdev = root->fs_info->fs_devices->latest_bdev;
6685 em->orig_block_len = orig_block_len; 6705 em->orig_block_len = orig_block_len;
6706 em->ram_bytes = ram_bytes;
6686 em->generation = -1; 6707 em->generation = -1;
6687 set_bit(EXTENT_FLAG_PINNED, &em->flags); 6708 set_bit(EXTENT_FLAG_PINNED, &em->flags);
6688 if (type == BTRFS_ORDERED_PREALLOC) 6709 if (type == BTRFS_ORDERED_PREALLOC)
@@ -6692,10 +6713,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
6692 btrfs_drop_extent_cache(inode, em->start, 6713 btrfs_drop_extent_cache(inode, em->start,
6693 em->start + em->len - 1, 0); 6714 em->start + em->len - 1, 0);
6694 write_lock(&em_tree->lock); 6715 write_lock(&em_tree->lock);
6695 ret = add_extent_mapping(em_tree, em); 6716 ret = add_extent_mapping(em_tree, em, 1);
6696 if (!ret)
6697 list_move(&em->list,
6698 &em_tree->modified_extents);
6699 write_unlock(&em_tree->lock); 6717 write_unlock(&em_tree->lock);
6700 } while (ret == -EEXIST); 6718 } while (ret == -EEXIST);
6701 6719
@@ -6790,7 +6808,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6790 em->block_start != EXTENT_MAP_HOLE)) { 6808 em->block_start != EXTENT_MAP_HOLE)) {
6791 int type; 6809 int type;
6792 int ret; 6810 int ret;
6793 u64 block_start; 6811 u64 block_start, orig_start, orig_block_len, ram_bytes;
6794 6812
6795 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 6813 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
6796 type = BTRFS_ORDERED_PREALLOC; 6814 type = BTRFS_ORDERED_PREALLOC;
@@ -6808,16 +6826,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6808 if (IS_ERR(trans)) 6826 if (IS_ERR(trans))
6809 goto must_cow; 6827 goto must_cow;
6810 6828
6811 if (can_nocow_odirect(trans, inode, start, len) == 1) { 6829 if (can_nocow_odirect(trans, inode, start, &len, &orig_start,
6812 u64 orig_start = em->orig_start; 6830 &orig_block_len, &ram_bytes) == 1) {
6813 u64 orig_block_len = em->orig_block_len;
6814
6815 if (type == BTRFS_ORDERED_PREALLOC) { 6831 if (type == BTRFS_ORDERED_PREALLOC) {
6816 free_extent_map(em); 6832 free_extent_map(em);
6817 em = create_pinned_em(inode, start, len, 6833 em = create_pinned_em(inode, start, len,
6818 orig_start, 6834 orig_start,
6819 block_start, len, 6835 block_start, len,
6820 orig_block_len, type); 6836 orig_block_len,
6837 ram_bytes, type);
6821 if (IS_ERR(em)) { 6838 if (IS_ERR(em)) {
6822 btrfs_end_transaction(trans, root); 6839 btrfs_end_transaction(trans, root);
6823 goto unlock_err; 6840 goto unlock_err;
@@ -6937,7 +6954,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6937 goto failed; 6954 goto failed;
6938 local_irq_save(flags); 6955 local_irq_save(flags);
6939 kaddr = kmap_atomic(page); 6956 kaddr = kmap_atomic(page);
6940 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 6957 csum = btrfs_csum_data(kaddr + bvec->bv_offset,
6941 csum, bvec->bv_len); 6958 csum, bvec->bv_len);
6942 btrfs_csum_final(csum, (char *)&csum); 6959 btrfs_csum_final(csum, (char *)&csum);
6943 kunmap_atomic(kaddr); 6960 kunmap_atomic(kaddr);
@@ -6946,11 +6963,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6946 flush_dcache_page(bvec->bv_page); 6963 flush_dcache_page(bvec->bv_page);
6947 if (csum != private) { 6964 if (csum != private) {
6948failed: 6965failed:
6949 printk(KERN_ERR "btrfs csum failed ino %llu off" 6966 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
6950 " %llu csum %u private %u\n", 6967 (unsigned long long)btrfs_ino(inode),
6951 (unsigned long long)btrfs_ino(inode), 6968 (unsigned long long)start,
6952 (unsigned long long)start, 6969 csum, (unsigned)private);
6953 csum, (unsigned)private);
6954 err = -EIO; 6970 err = -EIO;
6955 } 6971 }
6956 } 6972 }
@@ -7426,8 +7442,8 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
7426 return extent_write_full_page(tree, page, btrfs_get_extent, wbc); 7442 return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
7427} 7443}
7428 7444
7429int btrfs_writepages(struct address_space *mapping, 7445static int btrfs_writepages(struct address_space *mapping,
7430 struct writeback_control *wbc) 7446 struct writeback_control *wbc)
7431{ 7447{
7432 struct extent_io_tree *tree; 7448 struct extent_io_tree *tree;
7433 7449
@@ -7942,8 +7958,8 @@ void btrfs_destroy_inode(struct inode *inode)
7942 7958
7943 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 7959 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
7944 &BTRFS_I(inode)->runtime_flags)) { 7960 &BTRFS_I(inode)->runtime_flags)) {
7945 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", 7961 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
7946 (unsigned long long)btrfs_ino(inode)); 7962 (unsigned long long)btrfs_ino(inode));
7947 atomic_dec(&root->orphan_inodes); 7963 atomic_dec(&root->orphan_inodes);
7948 } 7964 }
7949 7965
@@ -7952,10 +7968,9 @@ void btrfs_destroy_inode(struct inode *inode)
7952 if (!ordered) 7968 if (!ordered)
7953 break; 7969 break;
7954 else { 7970 else {
7955 printk(KERN_ERR "btrfs found ordered " 7971 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
7956 "extent %llu %llu on inode cleanup\n", 7972 (unsigned long long)ordered->file_offset,
7957 (unsigned long long)ordered->file_offset, 7973 (unsigned long long)ordered->len);
7958 (unsigned long long)ordered->len);
7959 btrfs_remove_ordered_extent(inode, ordered); 7974 btrfs_remove_ordered_extent(inode, ordered);
7960 btrfs_put_ordered_extent(ordered); 7975 btrfs_put_ordered_extent(ordered);
7961 btrfs_put_ordered_extent(ordered); 7976 btrfs_put_ordered_extent(ordered);
@@ -8572,16 +8587,14 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8572 em->block_start = ins.objectid; 8587 em->block_start = ins.objectid;
8573 em->block_len = ins.offset; 8588 em->block_len = ins.offset;
8574 em->orig_block_len = ins.offset; 8589 em->orig_block_len = ins.offset;
8590 em->ram_bytes = ins.offset;
8575 em->bdev = root->fs_info->fs_devices->latest_bdev; 8591 em->bdev = root->fs_info->fs_devices->latest_bdev;
8576 set_bit(EXTENT_FLAG_PREALLOC, &em->flags); 8592 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
8577 em->generation = trans->transid; 8593 em->generation = trans->transid;
8578 8594
8579 while (1) { 8595 while (1) {
8580 write_lock(&em_tree->lock); 8596 write_lock(&em_tree->lock);
8581 ret = add_extent_mapping(em_tree, em); 8597 ret = add_extent_mapping(em_tree, em, 1);
8582 if (!ret)
8583 list_move(&em->list,
8584 &em_tree->modified_extents);
8585 write_unlock(&em_tree->lock); 8598 write_unlock(&em_tree->lock);
8586 if (ret != -EEXIST) 8599 if (ret != -EEXIST)
8587 break; 8600 break;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2c02310ff2d9..0de4a2fcfb24 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -723,7 +723,9 @@ static noinline int btrfs_mksubvol(struct path *parent,
723 struct dentry *dentry; 723 struct dentry *dentry;
724 int error; 724 int error;
725 725
726 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 726 error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
727 if (error == -EINTR)
728 return error;
727 729
728 dentry = lookup_one_len(name, parent->dentry, namelen); 730 dentry = lookup_one_len(name, parent->dentry, namelen);
729 error = PTR_ERR(dentry); 731 error = PTR_ERR(dentry);
@@ -1152,8 +1154,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1152 u64 new_align = ~((u64)128 * 1024 - 1); 1154 u64 new_align = ~((u64)128 * 1024 - 1);
1153 struct page **pages = NULL; 1155 struct page **pages = NULL;
1154 1156
1155 if (extent_thresh == 0) 1157 if (isize == 0)
1156 extent_thresh = 256 * 1024; 1158 return 0;
1159
1160 if (range->start >= isize)
1161 return -EINVAL;
1157 1162
1158 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1163 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
1159 if (range->compress_type > BTRFS_COMPRESS_TYPES) 1164 if (range->compress_type > BTRFS_COMPRESS_TYPES)
@@ -1162,8 +1167,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1162 compress_type = range->compress_type; 1167 compress_type = range->compress_type;
1163 } 1168 }
1164 1169
1165 if (isize == 0) 1170 if (extent_thresh == 0)
1166 return 0; 1171 extent_thresh = 256 * 1024;
1167 1172
1168 /* 1173 /*
1169 * if we were not given a file, allocate a readahead 1174 * if we were not given a file, allocate a readahead
@@ -2086,7 +2091,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2086 if (err) 2091 if (err)
2087 goto out; 2092 goto out;
2088 2093
2089 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 2094 err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
2095 if (err == -EINTR)
2096 goto out;
2090 dentry = lookup_one_len(vol_args->name, parent, namelen); 2097 dentry = lookup_one_len(vol_args->name, parent, namelen);
2091 if (IS_ERR(dentry)) { 2098 if (IS_ERR(dentry)) {
2092 err = PTR_ERR(dentry); 2099 err = PTR_ERR(dentry);
@@ -2425,7 +2432,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2425 2432
2426 mutex_lock(&fs_devices->device_list_mutex); 2433 mutex_lock(&fs_devices->device_list_mutex);
2427 dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL); 2434 dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL);
2428 mutex_unlock(&fs_devices->device_list_mutex);
2429 2435
2430 if (!dev) { 2436 if (!dev) {
2431 ret = -ENODEV; 2437 ret = -ENODEV;
@@ -2449,6 +2455,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2449 } 2455 }
2450 2456
2451out: 2457out:
2458 mutex_unlock(&fs_devices->device_list_mutex);
2452 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 2459 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
2453 ret = -EFAULT; 2460 ret = -EFAULT;
2454 2461
@@ -3003,7 +3010,7 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
3003 } 3010 }
3004} 3011}
3005 3012
3006long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 3013static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
3007{ 3014{
3008 struct btrfs_ioctl_space_args space_args; 3015 struct btrfs_ioctl_space_args space_args;
3009 struct btrfs_ioctl_space_info space; 3016 struct btrfs_ioctl_space_info space;
@@ -3693,12 +3700,11 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
3693 goto drop_write; 3700 goto drop_write;
3694 } 3701 }
3695 3702
3696 if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { 3703 down_write(&root->fs_info->subvol_sem);
3697 trans = btrfs_start_transaction(root, 2); 3704 trans = btrfs_start_transaction(root->fs_info->tree_root, 2);
3698 if (IS_ERR(trans)) { 3705 if (IS_ERR(trans)) {
3699 ret = PTR_ERR(trans); 3706 ret = PTR_ERR(trans);
3700 goto out; 3707 goto out;
3701 }
3702 } 3708 }
3703 3709
3704 switch (sa->cmd) { 3710 switch (sa->cmd) {
@@ -3708,9 +3714,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
3708 case BTRFS_QUOTA_CTL_DISABLE: 3714 case BTRFS_QUOTA_CTL_DISABLE:
3709 ret = btrfs_quota_disable(trans, root->fs_info); 3715 ret = btrfs_quota_disable(trans, root->fs_info);
3710 break; 3716 break;
3711 case BTRFS_QUOTA_CTL_RESCAN:
3712 ret = btrfs_quota_rescan(root->fs_info);
3713 break;
3714 default: 3717 default:
3715 ret = -EINVAL; 3718 ret = -EINVAL;
3716 break; 3719 break;
@@ -3719,13 +3722,12 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
3719 if (copy_to_user(arg, sa, sizeof(*sa))) 3722 if (copy_to_user(arg, sa, sizeof(*sa)))
3720 ret = -EFAULT; 3723 ret = -EFAULT;
3721 3724
3722 if (trans) { 3725 err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
3723 err = btrfs_commit_transaction(trans, root); 3726 if (err && !ret)
3724 if (err && !ret) 3727 ret = err;
3725 ret = err;
3726 }
3727out: 3728out:
3728 kfree(sa); 3729 kfree(sa);
3730 up_write(&root->fs_info->subvol_sem);
3729drop_write: 3731drop_write:
3730 mnt_drop_write_file(file); 3732 mnt_drop_write_file(file);
3731 return ret; 3733 return ret;
@@ -3877,6 +3879,64 @@ drop_write:
3877 return ret; 3879 return ret;
3878} 3880}
3879 3881
3882static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
3883{
3884 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3885 struct btrfs_ioctl_quota_rescan_args *qsa;
3886 int ret;
3887
3888 if (!capable(CAP_SYS_ADMIN))
3889 return -EPERM;
3890
3891 ret = mnt_want_write_file(file);
3892 if (ret)
3893 return ret;
3894
3895 qsa = memdup_user(arg, sizeof(*qsa));
3896 if (IS_ERR(qsa)) {
3897 ret = PTR_ERR(qsa);
3898 goto drop_write;
3899 }
3900
3901 if (qsa->flags) {
3902 ret = -EINVAL;
3903 goto out;
3904 }
3905
3906 ret = btrfs_qgroup_rescan(root->fs_info);
3907
3908out:
3909 kfree(qsa);
3910drop_write:
3911 mnt_drop_write_file(file);
3912 return ret;
3913}
3914
3915static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
3916{
3917 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3918 struct btrfs_ioctl_quota_rescan_args *qsa;
3919 int ret = 0;
3920
3921 if (!capable(CAP_SYS_ADMIN))
3922 return -EPERM;
3923
3924 qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
3925 if (!qsa)
3926 return -ENOMEM;
3927
3928 if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
3929 qsa->flags = 1;
3930 qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
3931 }
3932
3933 if (copy_to_user(arg, qsa, sizeof(*qsa)))
3934 ret = -EFAULT;
3935
3936 kfree(qsa);
3937 return ret;
3938}
3939
3880static long btrfs_ioctl_set_received_subvol(struct file *file, 3940static long btrfs_ioctl_set_received_subvol(struct file *file,
3881 void __user *arg) 3941 void __user *arg)
3882{ 3942{
@@ -4115,6 +4175,10 @@ long btrfs_ioctl(struct file *file, unsigned int
4115 return btrfs_ioctl_qgroup_create(file, argp); 4175 return btrfs_ioctl_qgroup_create(file, argp);
4116 case BTRFS_IOC_QGROUP_LIMIT: 4176 case BTRFS_IOC_QGROUP_LIMIT:
4117 return btrfs_ioctl_qgroup_limit(file, argp); 4177 return btrfs_ioctl_qgroup_limit(file, argp);
4178 case BTRFS_IOC_QUOTA_RESCAN:
4179 return btrfs_ioctl_quota_rescan(file, argp);
4180 case BTRFS_IOC_QUOTA_RESCAN_STATUS:
4181 return btrfs_ioctl_quota_rescan_status(file, argp);
4118 case BTRFS_IOC_DEV_REPLACE: 4182 case BTRFS_IOC_DEV_REPLACE:
4119 return btrfs_ioctl_dev_replace(root, argp); 4183 return btrfs_ioctl_dev_replace(root, argp);
4120 case BTRFS_IOC_GET_FSLABEL: 4184 case BTRFS_IOC_GET_FSLABEL:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index e95df435d897..01277b8f2373 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -24,7 +24,7 @@
24#include "extent_io.h" 24#include "extent_io.h"
25#include "locking.h" 25#include "locking.h"
26 26
27void btrfs_assert_tree_read_locked(struct extent_buffer *eb); 27static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
28 28
29/* 29/*
30 * if we currently have a spinning reader or writer lock 30 * if we currently have a spinning reader or writer lock
@@ -264,7 +264,7 @@ void btrfs_assert_tree_locked(struct extent_buffer *eb)
264 BUG_ON(!atomic_read(&eb->write_locks)); 264 BUG_ON(!atomic_read(&eb->write_locks));
265} 265}
266 266
267void btrfs_assert_tree_read_locked(struct extent_buffer *eb) 267static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
268{ 268{
269 BUG_ON(!atomic_read(&eb->read_locks)); 269 BUG_ON(!atomic_read(&eb->read_locks));
270} 270}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 005c45db699e..1ddd728541ee 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -986,7 +986,7 @@ out:
986 * be reclaimed before their checksum is actually put into the btree 986 * be reclaimed before their checksum is actually put into the btree
987 */ 987 */
988int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, 988int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
989 u32 *sum) 989 u32 *sum, int len)
990{ 990{
991 struct btrfs_ordered_sum *ordered_sum; 991 struct btrfs_ordered_sum *ordered_sum;
992 struct btrfs_sector_sum *sector_sums; 992 struct btrfs_sector_sum *sector_sums;
@@ -995,22 +995,28 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
995 unsigned long num_sectors; 995 unsigned long num_sectors;
996 unsigned long i; 996 unsigned long i;
997 u32 sectorsize = BTRFS_I(inode)->root->sectorsize; 997 u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
998 int ret = 1; 998 int index = 0;
999 999
1000 ordered = btrfs_lookup_ordered_extent(inode, offset); 1000 ordered = btrfs_lookup_ordered_extent(inode, offset);
1001 if (!ordered) 1001 if (!ordered)
1002 return 1; 1002 return 0;
1003 1003
1004 spin_lock_irq(&tree->lock); 1004 spin_lock_irq(&tree->lock);
1005 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 1005 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
1006 if (disk_bytenr >= ordered_sum->bytenr) { 1006 if (disk_bytenr >= ordered_sum->bytenr &&
1007 num_sectors = ordered_sum->len / sectorsize; 1007 disk_bytenr < ordered_sum->bytenr + ordered_sum->len) {
1008 sector_sums = ordered_sum->sums; 1008 i = (disk_bytenr - ordered_sum->bytenr) >>
1009 for (i = 0; i < num_sectors; i++) { 1009 inode->i_sb->s_blocksize_bits;
1010 sector_sums = ordered_sum->sums + i;
1011 num_sectors = ordered_sum->len >>
1012 inode->i_sb->s_blocksize_bits;
1013 for (; i < num_sectors; i++) {
1010 if (sector_sums[i].bytenr == disk_bytenr) { 1014 if (sector_sums[i].bytenr == disk_bytenr) {
1011 *sum = sector_sums[i].sum; 1015 sum[index] = sector_sums[i].sum;
1012 ret = 0; 1016 index++;
1013 goto out; 1017 if (index == len)
1018 goto out;
1019 disk_bytenr += sectorsize;
1014 } 1020 }
1015 } 1021 }
1016 } 1022 }
@@ -1018,7 +1024,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
1018out: 1024out:
1019 spin_unlock_irq(&tree->lock); 1025 spin_unlock_irq(&tree->lock);
1020 btrfs_put_ordered_extent(ordered); 1026 btrfs_put_ordered_extent(ordered);
1021 return ret; 1027 return index;
1022} 1028}
1023 1029
1024 1030
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8eadfe406cdd..58b0e3b0ebad 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -196,7 +196,8 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
196 u64 len); 196 u64 len);
197int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, 197int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
198 struct btrfs_ordered_extent *ordered); 198 struct btrfs_ordered_extent *ordered);
199int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 199int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
200 u32 *sum, int len);
200int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, 201int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
201 struct btrfs_root *root, int wait); 202 struct btrfs_root *root, int wait);
202void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 203void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 920957ecb27e..dc0024f17c1f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -176,7 +176,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
176 176
177 nr = btrfs_header_nritems(l); 177 nr = btrfs_header_nritems(l);
178 178
179 printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", 179 btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
180 (unsigned long long)btrfs_header_bytenr(l), nr, 180 (unsigned long long)btrfs_header_bytenr(l), nr,
181 btrfs_leaf_free_space(root, l)); 181 btrfs_leaf_free_space(root, l));
182 for (i = 0 ; i < nr ; i++) { 182 for (i = 0 ; i < nr ; i++) {
@@ -319,10 +319,9 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
319 btrfs_print_leaf(root, c); 319 btrfs_print_leaf(root, c);
320 return; 320 return;
321 } 321 }
322 printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n", 322 btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u",
323 (unsigned long long)btrfs_header_bytenr(c), 323 (unsigned long long)btrfs_header_bytenr(c),
324 level, nr, 324 level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
325 (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
326 for (i = 0; i < nr; i++) { 325 for (i = 0; i < nr; i++) {
327 btrfs_node_key_to_cpu(c, &key, i); 326 btrfs_node_key_to_cpu(c, &key, i);
328 printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", 327 printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
index da75efe534d5..7faddfacc5bd 100644
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -19,5 +19,5 @@
19#ifndef __PRINT_TREE_ 19#ifndef __PRINT_TREE_
20#define __PRINT_TREE_ 20#define __PRINT_TREE_
21void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); 21void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
22void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t); 22void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c);
23#endif 23#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b44124dd2370..9d49c586995a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -31,13 +31,13 @@
31#include "locking.h" 31#include "locking.h"
32#include "ulist.h" 32#include "ulist.h"
33#include "backref.h" 33#include "backref.h"
34#include "extent_io.h"
34 35
35/* TODO XXX FIXME 36/* TODO XXX FIXME
36 * - subvol delete -> delete when ref goes to 0? delete limits also? 37 * - subvol delete -> delete when ref goes to 0? delete limits also?
37 * - reorganize keys 38 * - reorganize keys
38 * - compressed 39 * - compressed
39 * - sync 40 * - sync
40 * - rescan
41 * - copy also limits on subvol creation 41 * - copy also limits on subvol creation
42 * - limit 42 * - limit
43 * - caches fuer ulists 43 * - caches fuer ulists
@@ -98,7 +98,15 @@ struct btrfs_qgroup_list {
98 struct btrfs_qgroup *member; 98 struct btrfs_qgroup *member;
99}; 99};
100 100
101/* must be called with qgroup_lock held */ 101struct qgroup_rescan {
102 struct btrfs_work work;
103 struct btrfs_fs_info *fs_info;
104};
105
106static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
107 struct qgroup_rescan *qscan);
108
109/* must be called with qgroup_ioctl_lock held */
102static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 110static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
103 u64 qgroupid) 111 u64 qgroupid)
104{ 112{
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
298 } 306 }
299 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 307 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
300 ptr); 308 ptr);
301 /* FIXME read scan element */ 309 fs_info->qgroup_rescan_progress.objectid =
310 btrfs_qgroup_status_rescan(l, ptr);
311 if (fs_info->qgroup_flags &
312 BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
313 struct qgroup_rescan *qscan =
314 kmalloc(sizeof(*qscan), GFP_NOFS);
315 if (!qscan) {
316 ret = -ENOMEM;
317 goto out;
318 }
319 fs_info->qgroup_rescan_progress.type = 0;
320 fs_info->qgroup_rescan_progress.offset = 0;
321 qgroup_rescan_start(fs_info, qscan);
322 }
302 goto next1; 323 goto next1;
303 } 324 }
304 325
@@ -420,8 +441,6 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
420 qgroup = rb_entry(n, struct btrfs_qgroup, node); 441 qgroup = rb_entry(n, struct btrfs_qgroup, node);
421 rb_erase(n, &fs_info->qgroup_tree); 442 rb_erase(n, &fs_info->qgroup_tree);
422 443
423 WARN_ON(!list_empty(&qgroup->dirty));
424
425 while (!list_empty(&qgroup->groups)) { 444 while (!list_empty(&qgroup->groups)) {
426 list = list_first_entry(&qgroup->groups, 445 list = list_first_entry(&qgroup->groups,
427 struct btrfs_qgroup_list, 446 struct btrfs_qgroup_list,
@@ -721,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
721 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 740 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
722 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 741 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
723 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 742 btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
724 /* XXX scan */ 743 btrfs_set_qgroup_status_rescan(l, ptr,
744 fs_info->qgroup_rescan_progress.objectid);
725 745
726 btrfs_mark_buffer_dirty(l); 746 btrfs_mark_buffer_dirty(l);
727 747
@@ -783,19 +803,21 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
783 struct btrfs_fs_info *fs_info) 803 struct btrfs_fs_info *fs_info)
784{ 804{
785 struct btrfs_root *quota_root; 805 struct btrfs_root *quota_root;
806 struct btrfs_root *tree_root = fs_info->tree_root;
786 struct btrfs_path *path = NULL; 807 struct btrfs_path *path = NULL;
787 struct btrfs_qgroup_status_item *ptr; 808 struct btrfs_qgroup_status_item *ptr;
788 struct extent_buffer *leaf; 809 struct extent_buffer *leaf;
789 struct btrfs_key key; 810 struct btrfs_key key;
811 struct btrfs_key found_key;
812 struct btrfs_qgroup *qgroup = NULL;
790 int ret = 0; 813 int ret = 0;
814 int slot;
791 815
792 spin_lock(&fs_info->qgroup_lock); 816 mutex_lock(&fs_info->qgroup_ioctl_lock);
793 if (fs_info->quota_root) { 817 if (fs_info->quota_root) {
794 fs_info->pending_quota_state = 1; 818 fs_info->pending_quota_state = 1;
795 spin_unlock(&fs_info->qgroup_lock);
796 goto out; 819 goto out;
797 } 820 }
798 spin_unlock(&fs_info->qgroup_lock);
799 821
800 /* 822 /*
801 * initially create the quota tree 823 * initially create the quota tree
@@ -830,10 +852,57 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
830 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 852 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
831 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 853 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
832 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 854 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
833 btrfs_set_qgroup_status_scan(leaf, ptr, 0); 855 btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
834 856
835 btrfs_mark_buffer_dirty(leaf); 857 btrfs_mark_buffer_dirty(leaf);
836 858
859 key.objectid = 0;
860 key.type = BTRFS_ROOT_REF_KEY;
861 key.offset = 0;
862
863 btrfs_release_path(path);
864 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
865 if (ret > 0)
866 goto out_add_root;
867 if (ret < 0)
868 goto out_free_path;
869
870
871 while (1) {
872 slot = path->slots[0];
873 leaf = path->nodes[0];
874 btrfs_item_key_to_cpu(leaf, &found_key, slot);
875
876 if (found_key.type == BTRFS_ROOT_REF_KEY) {
877 ret = add_qgroup_item(trans, quota_root,
878 found_key.offset);
879 if (ret)
880 goto out_free_path;
881
882 qgroup = add_qgroup_rb(fs_info, found_key.offset);
883 if (IS_ERR(qgroup)) {
884 ret = PTR_ERR(qgroup);
885 goto out_free_path;
886 }
887 }
888 ret = btrfs_next_item(tree_root, path);
889 if (ret < 0)
890 goto out_free_path;
891 if (ret)
892 break;
893 }
894
895out_add_root:
896 btrfs_release_path(path);
897 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
898 if (ret)
899 goto out_free_path;
900
901 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
902 if (IS_ERR(qgroup)) {
903 ret = PTR_ERR(qgroup);
904 goto out_free_path;
905 }
837 spin_lock(&fs_info->qgroup_lock); 906 spin_lock(&fs_info->qgroup_lock);
838 fs_info->quota_root = quota_root; 907 fs_info->quota_root = quota_root;
839 fs_info->pending_quota_state = 1; 908 fs_info->pending_quota_state = 1;
@@ -847,6 +916,7 @@ out_free_root:
847 kfree(quota_root); 916 kfree(quota_root);
848 } 917 }
849out: 918out:
919 mutex_unlock(&fs_info->qgroup_ioctl_lock);
850 return ret; 920 return ret;
851} 921}
852 922
@@ -857,11 +927,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
857 struct btrfs_root *quota_root; 927 struct btrfs_root *quota_root;
858 int ret = 0; 928 int ret = 0;
859 929
930 mutex_lock(&fs_info->qgroup_ioctl_lock);
931 if (!fs_info->quota_root)
932 goto out;
860 spin_lock(&fs_info->qgroup_lock); 933 spin_lock(&fs_info->qgroup_lock);
861 if (!fs_info->quota_root) {
862 spin_unlock(&fs_info->qgroup_lock);
863 return 0;
864 }
865 fs_info->quota_enabled = 0; 934 fs_info->quota_enabled = 0;
866 fs_info->pending_quota_state = 0; 935 fs_info->pending_quota_state = 0;
867 quota_root = fs_info->quota_root; 936 quota_root = fs_info->quota_root;
@@ -869,8 +938,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
869 btrfs_free_qgroup_config(fs_info); 938 btrfs_free_qgroup_config(fs_info);
870 spin_unlock(&fs_info->qgroup_lock); 939 spin_unlock(&fs_info->qgroup_lock);
871 940
872 if (!quota_root) 941 if (!quota_root) {
873 return -EINVAL; 942 ret = -EINVAL;
943 goto out;
944 }
874 945
875 ret = btrfs_clean_quota_tree(trans, quota_root); 946 ret = btrfs_clean_quota_tree(trans, quota_root);
876 if (ret) 947 if (ret)
@@ -891,39 +962,62 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
891 free_extent_buffer(quota_root->commit_root); 962 free_extent_buffer(quota_root->commit_root);
892 kfree(quota_root); 963 kfree(quota_root);
893out: 964out:
965 mutex_unlock(&fs_info->qgroup_ioctl_lock);
894 return ret; 966 return ret;
895} 967}
896 968
897int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) 969static void qgroup_dirty(struct btrfs_fs_info *fs_info,
970 struct btrfs_qgroup *qgroup)
898{ 971{
899 /* FIXME */ 972 if (list_empty(&qgroup->dirty))
900 return 0; 973 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
901} 974}
902 975
903int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 976int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
904 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 977 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
905{ 978{
906 struct btrfs_root *quota_root; 979 struct btrfs_root *quota_root;
980 struct btrfs_qgroup *parent;
981 struct btrfs_qgroup *member;
982 struct btrfs_qgroup_list *list;
907 int ret = 0; 983 int ret = 0;
908 984
985 mutex_lock(&fs_info->qgroup_ioctl_lock);
909 quota_root = fs_info->quota_root; 986 quota_root = fs_info->quota_root;
910 if (!quota_root) 987 if (!quota_root) {
911 return -EINVAL; 988 ret = -EINVAL;
989 goto out;
990 }
991 member = find_qgroup_rb(fs_info, src);
992 parent = find_qgroup_rb(fs_info, dst);
993 if (!member || !parent) {
994 ret = -EINVAL;
995 goto out;
996 }
997
998 /* check if such qgroup relation exist firstly */
999 list_for_each_entry(list, &member->groups, next_group) {
1000 if (list->group == parent) {
1001 ret = -EEXIST;
1002 goto out;
1003 }
1004 }
912 1005
913 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1006 ret = add_qgroup_relation_item(trans, quota_root, src, dst);
914 if (ret) 1007 if (ret)
915 return ret; 1008 goto out;
916 1009
917 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1010 ret = add_qgroup_relation_item(trans, quota_root, dst, src);
918 if (ret) { 1011 if (ret) {
919 del_qgroup_relation_item(trans, quota_root, src, dst); 1012 del_qgroup_relation_item(trans, quota_root, src, dst);
920 return ret; 1013 goto out;
921 } 1014 }
922 1015
923 spin_lock(&fs_info->qgroup_lock); 1016 spin_lock(&fs_info->qgroup_lock);
924 ret = add_relation_rb(quota_root->fs_info, src, dst); 1017 ret = add_relation_rb(quota_root->fs_info, src, dst);
925 spin_unlock(&fs_info->qgroup_lock); 1018 spin_unlock(&fs_info->qgroup_lock);
926 1019out:
1020 mutex_unlock(&fs_info->qgroup_ioctl_lock);
927 return ret; 1021 return ret;
928} 1022}
929 1023
@@ -931,13 +1025,34 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
931 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1025 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
932{ 1026{
933 struct btrfs_root *quota_root; 1027 struct btrfs_root *quota_root;
1028 struct btrfs_qgroup *parent;
1029 struct btrfs_qgroup *member;
1030 struct btrfs_qgroup_list *list;
934 int ret = 0; 1031 int ret = 0;
935 int err; 1032 int err;
936 1033
1034 mutex_lock(&fs_info->qgroup_ioctl_lock);
937 quota_root = fs_info->quota_root; 1035 quota_root = fs_info->quota_root;
938 if (!quota_root) 1036 if (!quota_root) {
939 return -EINVAL; 1037 ret = -EINVAL;
1038 goto out;
1039 }
940 1040
1041 member = find_qgroup_rb(fs_info, src);
1042 parent = find_qgroup_rb(fs_info, dst);
1043 if (!member || !parent) {
1044 ret = -EINVAL;
1045 goto out;
1046 }
1047
1048 /* check if such qgroup relation exist firstly */
1049 list_for_each_entry(list, &member->groups, next_group) {
1050 if (list->group == parent)
1051 goto exist;
1052 }
1053 ret = -ENOENT;
1054 goto out;
1055exist:
941 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1056 ret = del_qgroup_relation_item(trans, quota_root, src, dst);
942 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1057 err = del_qgroup_relation_item(trans, quota_root, dst, src);
943 if (err && !ret) 1058 if (err && !ret)
@@ -945,9 +1060,9 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
945 1060
946 spin_lock(&fs_info->qgroup_lock); 1061 spin_lock(&fs_info->qgroup_lock);
947 del_relation_rb(fs_info, src, dst); 1062 del_relation_rb(fs_info, src, dst);
948
949 spin_unlock(&fs_info->qgroup_lock); 1063 spin_unlock(&fs_info->qgroup_lock);
950 1064out:
1065 mutex_unlock(&fs_info->qgroup_ioctl_lock);
951 return ret; 1066 return ret;
952} 1067}
953 1068
@@ -958,11 +1073,21 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
958 struct btrfs_qgroup *qgroup; 1073 struct btrfs_qgroup *qgroup;
959 int ret = 0; 1074 int ret = 0;
960 1075
1076 mutex_lock(&fs_info->qgroup_ioctl_lock);
961 quota_root = fs_info->quota_root; 1077 quota_root = fs_info->quota_root;
962 if (!quota_root) 1078 if (!quota_root) {
963 return -EINVAL; 1079 ret = -EINVAL;
1080 goto out;
1081 }
1082 qgroup = find_qgroup_rb(fs_info, qgroupid);
1083 if (qgroup) {
1084 ret = -EEXIST;
1085 goto out;
1086 }
964 1087
965 ret = add_qgroup_item(trans, quota_root, qgroupid); 1088 ret = add_qgroup_item(trans, quota_root, qgroupid);
1089 if (ret)
1090 goto out;
966 1091
967 spin_lock(&fs_info->qgroup_lock); 1092 spin_lock(&fs_info->qgroup_lock);
968 qgroup = add_qgroup_rb(fs_info, qgroupid); 1093 qgroup = add_qgroup_rb(fs_info, qgroupid);
@@ -970,7 +1095,8 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
970 1095
971 if (IS_ERR(qgroup)) 1096 if (IS_ERR(qgroup))
972 ret = PTR_ERR(qgroup); 1097 ret = PTR_ERR(qgroup);
973 1098out:
1099 mutex_unlock(&fs_info->qgroup_ioctl_lock);
974 return ret; 1100 return ret;
975} 1101}
976 1102
@@ -981,27 +1107,32 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
981 struct btrfs_qgroup *qgroup; 1107 struct btrfs_qgroup *qgroup;
982 int ret = 0; 1108 int ret = 0;
983 1109
1110 mutex_lock(&fs_info->qgroup_ioctl_lock);
984 quota_root = fs_info->quota_root; 1111 quota_root = fs_info->quota_root;
985 if (!quota_root) 1112 if (!quota_root) {
986 return -EINVAL; 1113 ret = -EINVAL;
1114 goto out;
1115 }
987 1116
988 /* check if there are no relations to this qgroup */
989 spin_lock(&fs_info->qgroup_lock);
990 qgroup = find_qgroup_rb(fs_info, qgroupid); 1117 qgroup = find_qgroup_rb(fs_info, qgroupid);
991 if (qgroup) { 1118 if (!qgroup) {
992 if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) { 1119 ret = -ENOENT;
993 spin_unlock(&fs_info->qgroup_lock); 1120 goto out;
994 return -EBUSY; 1121 } else {
1122 /* check if there are no relations to this qgroup */
1123 if (!list_empty(&qgroup->groups) ||
1124 !list_empty(&qgroup->members)) {
1125 ret = -EBUSY;
1126 goto out;
995 } 1127 }
996 } 1128 }
997 spin_unlock(&fs_info->qgroup_lock);
998
999 ret = del_qgroup_item(trans, quota_root, qgroupid); 1129 ret = del_qgroup_item(trans, quota_root, qgroupid);
1000 1130
1001 spin_lock(&fs_info->qgroup_lock); 1131 spin_lock(&fs_info->qgroup_lock);
1002 del_qgroup_rb(quota_root->fs_info, qgroupid); 1132 del_qgroup_rb(quota_root->fs_info, qgroupid);
1003 spin_unlock(&fs_info->qgroup_lock); 1133 spin_unlock(&fs_info->qgroup_lock);
1004 1134out:
1135 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1005 return ret; 1136 return ret;
1006} 1137}
1007 1138
@@ -1009,13 +1140,22 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1009 struct btrfs_fs_info *fs_info, u64 qgroupid, 1140 struct btrfs_fs_info *fs_info, u64 qgroupid,
1010 struct btrfs_qgroup_limit *limit) 1141 struct btrfs_qgroup_limit *limit)
1011{ 1142{
1012 struct btrfs_root *quota_root = fs_info->quota_root; 1143 struct btrfs_root *quota_root;
1013 struct btrfs_qgroup *qgroup; 1144 struct btrfs_qgroup *qgroup;
1014 int ret = 0; 1145 int ret = 0;
1015 1146
1016 if (!quota_root) 1147 mutex_lock(&fs_info->qgroup_ioctl_lock);
1017 return -EINVAL; 1148 quota_root = fs_info->quota_root;
1149 if (!quota_root) {
1150 ret = -EINVAL;
1151 goto out;
1152 }
1018 1153
1154 qgroup = find_qgroup_rb(fs_info, qgroupid);
1155 if (!qgroup) {
1156 ret = -ENOENT;
1157 goto out;
1158 }
1019 ret = update_qgroup_limit_item(trans, quota_root, qgroupid, 1159 ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1020 limit->flags, limit->max_rfer, 1160 limit->flags, limit->max_rfer,
1021 limit->max_excl, limit->rsv_rfer, 1161 limit->max_excl, limit->rsv_rfer,
@@ -1027,31 +1167,17 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1027 } 1167 }
1028 1168
1029 spin_lock(&fs_info->qgroup_lock); 1169 spin_lock(&fs_info->qgroup_lock);
1030
1031 qgroup = find_qgroup_rb(fs_info, qgroupid);
1032 if (!qgroup) {
1033 ret = -ENOENT;
1034 goto unlock;
1035 }
1036 qgroup->lim_flags = limit->flags; 1170 qgroup->lim_flags = limit->flags;
1037 qgroup->max_rfer = limit->max_rfer; 1171 qgroup->max_rfer = limit->max_rfer;
1038 qgroup->max_excl = limit->max_excl; 1172 qgroup->max_excl = limit->max_excl;
1039 qgroup->rsv_rfer = limit->rsv_rfer; 1173 qgroup->rsv_rfer = limit->rsv_rfer;
1040 qgroup->rsv_excl = limit->rsv_excl; 1174 qgroup->rsv_excl = limit->rsv_excl;
1041
1042unlock:
1043 spin_unlock(&fs_info->qgroup_lock); 1175 spin_unlock(&fs_info->qgroup_lock);
1044 1176out:
1177 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1045 return ret; 1178 return ret;
1046} 1179}
1047 1180
1048static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1049 struct btrfs_qgroup *qgroup)
1050{
1051 if (list_empty(&qgroup->dirty))
1052 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1053}
1054
1055/* 1181/*
1056 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts 1182 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1057 * the modification into a list that's later used by btrfs_end_transaction to 1183 * the modification into a list that's later used by btrfs_end_transaction to
@@ -1075,6 +1201,144 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1075 return 0; 1201 return 0;
1076} 1202}
1077 1203
1204static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1205 struct ulist *roots, struct ulist *tmp,
1206 u64 seq)
1207{
1208 struct ulist_node *unode;
1209 struct ulist_iterator uiter;
1210 struct ulist_node *tmp_unode;
1211 struct ulist_iterator tmp_uiter;
1212 struct btrfs_qgroup *qg;
1213 int ret;
1214
1215 ULIST_ITER_INIT(&uiter);
1216 while ((unode = ulist_next(roots, &uiter))) {
1217 qg = find_qgroup_rb(fs_info, unode->val);
1218 if (!qg)
1219 continue;
1220
1221 ulist_reinit(tmp);
1222 /* XXX id not needed */
1223 ret = ulist_add(tmp, qg->qgroupid,
1224 (u64)(uintptr_t)qg, GFP_ATOMIC);
1225 if (ret < 0)
1226 return ret;
1227 ULIST_ITER_INIT(&tmp_uiter);
1228 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1229 struct btrfs_qgroup_list *glist;
1230
1231 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1232 if (qg->refcnt < seq)
1233 qg->refcnt = seq + 1;
1234 else
1235 ++qg->refcnt;
1236
1237 list_for_each_entry(glist, &qg->groups, next_group) {
1238 ret = ulist_add(tmp, glist->group->qgroupid,
1239 (u64)(uintptr_t)glist->group,
1240 GFP_ATOMIC);
1241 if (ret < 0)
1242 return ret;
1243 }
1244 }
1245 }
1246
1247 return 0;
1248}
1249
1250static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1251 struct ulist *roots, struct ulist *tmp,
1252 u64 seq, int sgn, u64 num_bytes,
1253 struct btrfs_qgroup *qgroup)
1254{
1255 struct ulist_node *unode;
1256 struct ulist_iterator uiter;
1257 struct btrfs_qgroup *qg;
1258 struct btrfs_qgroup_list *glist;
1259 int ret;
1260
1261 ulist_reinit(tmp);
1262 ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1263 if (ret < 0)
1264 return ret;
1265
1266 ULIST_ITER_INIT(&uiter);
1267 while ((unode = ulist_next(tmp, &uiter))) {
1268 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1269 if (qg->refcnt < seq) {
1270 /* not visited by step 1 */
1271 qg->rfer += sgn * num_bytes;
1272 qg->rfer_cmpr += sgn * num_bytes;
1273 if (roots->nnodes == 0) {
1274 qg->excl += sgn * num_bytes;
1275 qg->excl_cmpr += sgn * num_bytes;
1276 }
1277 qgroup_dirty(fs_info, qg);
1278 }
1279 WARN_ON(qg->tag >= seq);
1280 qg->tag = seq;
1281
1282 list_for_each_entry(glist, &qg->groups, next_group) {
1283 ret = ulist_add(tmp, glist->group->qgroupid,
1284 (uintptr_t)glist->group, GFP_ATOMIC);
1285 if (ret < 0)
1286 return ret;
1287 }
1288 }
1289
1290 return 0;
1291}
1292
1293static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1294 struct ulist *roots, struct ulist *tmp,
1295 u64 seq, int sgn, u64 num_bytes)
1296{
1297 struct ulist_node *unode;
1298 struct ulist_iterator uiter;
1299 struct btrfs_qgroup *qg;
1300 struct ulist_node *tmp_unode;
1301 struct ulist_iterator tmp_uiter;
1302 int ret;
1303
1304 ULIST_ITER_INIT(&uiter);
1305 while ((unode = ulist_next(roots, &uiter))) {
1306 qg = find_qgroup_rb(fs_info, unode->val);
1307 if (!qg)
1308 continue;
1309
1310 ulist_reinit(tmp);
1311 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1312 if (ret < 0)
1313 return ret;
1314
1315 ULIST_ITER_INIT(&tmp_uiter);
1316 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1317 struct btrfs_qgroup_list *glist;
1318
1319 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1320 if (qg->tag == seq)
1321 continue;
1322
1323 if (qg->refcnt - seq == roots->nnodes) {
1324 qg->excl -= sgn * num_bytes;
1325 qg->excl_cmpr -= sgn * num_bytes;
1326 qgroup_dirty(fs_info, qg);
1327 }
1328
1329 list_for_each_entry(glist, &qg->groups, next_group) {
1330 ret = ulist_add(tmp, glist->group->qgroupid,
1331 (uintptr_t)glist->group,
1332 GFP_ATOMIC);
1333 if (ret < 0)
1334 return ret;
1335 }
1336 }
1337 }
1338
1339 return 0;
1340}
1341
1078/* 1342/*
1079 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 1343 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1080 * from the fs. First, all roots referencing the extent are searched, and 1344 * from the fs. First, all roots referencing the extent are searched, and
@@ -1090,10 +1354,8 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1090 struct btrfs_root *quota_root; 1354 struct btrfs_root *quota_root;
1091 u64 ref_root; 1355 u64 ref_root;
1092 struct btrfs_qgroup *qgroup; 1356 struct btrfs_qgroup *qgroup;
1093 struct ulist_node *unode;
1094 struct ulist *roots = NULL; 1357 struct ulist *roots = NULL;
1095 struct ulist *tmp = NULL; 1358 struct ulist *tmp = NULL;
1096 struct ulist_iterator uiter;
1097 u64 seq; 1359 u64 seq;
1098 int ret = 0; 1360 int ret = 0;
1099 int sgn; 1361 int sgn;
@@ -1132,9 +1394,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1132 case BTRFS_ADD_DELAYED_REF: 1394 case BTRFS_ADD_DELAYED_REF:
1133 case BTRFS_ADD_DELAYED_EXTENT: 1395 case BTRFS_ADD_DELAYED_EXTENT:
1134 sgn = 1; 1396 sgn = 1;
1397 seq = btrfs_tree_mod_seq_prev(node->seq);
1135 break; 1398 break;
1136 case BTRFS_DROP_DELAYED_REF: 1399 case BTRFS_DROP_DELAYED_REF:
1137 sgn = -1; 1400 sgn = -1;
1401 seq = node->seq;
1138 break; 1402 break;
1139 case BTRFS_UPDATE_DELAYED_HEAD: 1403 case BTRFS_UPDATE_DELAYED_HEAD:
1140 return 0; 1404 return 0;
@@ -1142,20 +1406,37 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1142 BUG(); 1406 BUG();
1143 } 1407 }
1144 1408
1409 mutex_lock(&fs_info->qgroup_rescan_lock);
1410 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1411 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1412 mutex_unlock(&fs_info->qgroup_rescan_lock);
1413 return 0;
1414 }
1415 }
1416 mutex_unlock(&fs_info->qgroup_rescan_lock);
1417
1145 /* 1418 /*
1146 * the delayed ref sequence number we pass depends on the direction of 1419 * the delayed ref sequence number we pass depends on the direction of
1147 * the operation. for add operations, we pass (node->seq - 1) to skip 1420 * the operation. for add operations, we pass
1421 * tree_mod_log_prev_seq(node->seq) to skip
1148 * the delayed ref's current sequence number, because we need the state 1422 * the delayed ref's current sequence number, because we need the state
1149 * of the tree before the add operation. for delete operations, we pass 1423 * of the tree before the add operation. for delete operations, we pass
1150 * (node->seq) to include the delayed ref's current sequence number, 1424 * (node->seq) to include the delayed ref's current sequence number,
1151 * because we need the state of the tree after the delete operation. 1425 * because we need the state of the tree after the delete operation.
1152 */ 1426 */
1153 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, 1427 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1154 sgn > 0 ? node->seq - 1 : node->seq, &roots);
1155 if (ret < 0) 1428 if (ret < 0)
1156 return ret; 1429 return ret;
1157 1430
1431 mutex_lock(&fs_info->qgroup_rescan_lock);
1158 spin_lock(&fs_info->qgroup_lock); 1432 spin_lock(&fs_info->qgroup_lock);
1433 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1434 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1435 ret = 0;
1436 goto unlock;
1437 }
1438 }
1439
1159 quota_root = fs_info->quota_root; 1440 quota_root = fs_info->quota_root;
1160 if (!quota_root) 1441 if (!quota_root)
1161 goto unlock; 1442 goto unlock;
@@ -1175,106 +1456,29 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1175 seq = fs_info->qgroup_seq; 1456 seq = fs_info->qgroup_seq;
1176 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 1457 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1177 1458
1178 ULIST_ITER_INIT(&uiter); 1459 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1179 while ((unode = ulist_next(roots, &uiter))) { 1460 if (ret)
1180 struct ulist_node *tmp_unode; 1461 goto unlock;
1181 struct ulist_iterator tmp_uiter;
1182 struct btrfs_qgroup *qg;
1183
1184 qg = find_qgroup_rb(fs_info, unode->val);
1185 if (!qg)
1186 continue;
1187
1188 ulist_reinit(tmp);
1189 /* XXX id not needed */
1190 ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
1191 ULIST_ITER_INIT(&tmp_uiter);
1192 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1193 struct btrfs_qgroup_list *glist;
1194
1195 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1196 if (qg->refcnt < seq)
1197 qg->refcnt = seq + 1;
1198 else
1199 ++qg->refcnt;
1200
1201 list_for_each_entry(glist, &qg->groups, next_group) {
1202 ulist_add(tmp, glist->group->qgroupid,
1203 (u64)(uintptr_t)glist->group,
1204 GFP_ATOMIC);
1205 }
1206 }
1207 }
1208 1462
1209 /* 1463 /*
1210 * step 2: walk from the new root 1464 * step 2: walk from the new root
1211 */ 1465 */
1212 ulist_reinit(tmp); 1466 ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn,
1213 ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); 1467 node->num_bytes, qgroup);
1214 ULIST_ITER_INIT(&uiter); 1468 if (ret)
1215 while ((unode = ulist_next(tmp, &uiter))) { 1469 goto unlock;
1216 struct btrfs_qgroup *qg;
1217 struct btrfs_qgroup_list *glist;
1218
1219 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1220 if (qg->refcnt < seq) {
1221 /* not visited by step 1 */
1222 qg->rfer += sgn * node->num_bytes;
1223 qg->rfer_cmpr += sgn * node->num_bytes;
1224 if (roots->nnodes == 0) {
1225 qg->excl += sgn * node->num_bytes;
1226 qg->excl_cmpr += sgn * node->num_bytes;
1227 }
1228 qgroup_dirty(fs_info, qg);
1229 }
1230 WARN_ON(qg->tag >= seq);
1231 qg->tag = seq;
1232
1233 list_for_each_entry(glist, &qg->groups, next_group) {
1234 ulist_add(tmp, glist->group->qgroupid,
1235 (uintptr_t)glist->group, GFP_ATOMIC);
1236 }
1237 }
1238 1470
1239 /* 1471 /*
1240 * step 3: walk again from old refs 1472 * step 3: walk again from old refs
1241 */ 1473 */
1242 ULIST_ITER_INIT(&uiter); 1474 ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn,
1243 while ((unode = ulist_next(roots, &uiter))) { 1475 node->num_bytes);
1244 struct btrfs_qgroup *qg; 1476 if (ret)
1245 struct ulist_node *tmp_unode; 1477 goto unlock;
1246 struct ulist_iterator tmp_uiter;
1247
1248 qg = find_qgroup_rb(fs_info, unode->val);
1249 if (!qg)
1250 continue;
1251
1252 ulist_reinit(tmp);
1253 ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1254 ULIST_ITER_INIT(&tmp_uiter);
1255 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1256 struct btrfs_qgroup_list *glist;
1257
1258 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1259 if (qg->tag == seq)
1260 continue;
1261
1262 if (qg->refcnt - seq == roots->nnodes) {
1263 qg->excl -= sgn * node->num_bytes;
1264 qg->excl_cmpr -= sgn * node->num_bytes;
1265 qgroup_dirty(fs_info, qg);
1266 }
1267 1478
1268 list_for_each_entry(glist, &qg->groups, next_group) {
1269 ulist_add(tmp, glist->group->qgroupid,
1270 (uintptr_t)glist->group,
1271 GFP_ATOMIC);
1272 }
1273 }
1274 }
1275 ret = 0;
1276unlock: 1479unlock:
1277 spin_unlock(&fs_info->qgroup_lock); 1480 spin_unlock(&fs_info->qgroup_lock);
1481 mutex_unlock(&fs_info->qgroup_rescan_lock);
1278 ulist_free(roots); 1482 ulist_free(roots);
1279 ulist_free(tmp); 1483 ulist_free(tmp);
1280 1484
@@ -1289,10 +1493,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1289{ 1493{
1290 struct btrfs_root *quota_root = fs_info->quota_root; 1494 struct btrfs_root *quota_root = fs_info->quota_root;
1291 int ret = 0; 1495 int ret = 0;
1496 int start_rescan_worker = 0;
1292 1497
1293 if (!quota_root) 1498 if (!quota_root)
1294 goto out; 1499 goto out;
1295 1500
1501 if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1502 start_rescan_worker = 1;
1503
1296 fs_info->quota_enabled = fs_info->pending_quota_state; 1504 fs_info->quota_enabled = fs_info->pending_quota_state;
1297 1505
1298 spin_lock(&fs_info->qgroup_lock); 1506 spin_lock(&fs_info->qgroup_lock);
@@ -1318,6 +1526,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1318 if (ret) 1526 if (ret)
1319 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1527 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1320 1528
1529 if (!ret && start_rescan_worker) {
1530 ret = btrfs_qgroup_rescan(fs_info);
1531 if (ret)
1532 pr_err("btrfs: start rescan quota failed: %d\n", ret);
1533 ret = 0;
1534 }
1535
1321out: 1536out:
1322 1537
1323 return ret; 1538 return ret;
@@ -1338,12 +1553,30 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1338 struct btrfs_qgroup *srcgroup; 1553 struct btrfs_qgroup *srcgroup;
1339 struct btrfs_qgroup *dstgroup; 1554 struct btrfs_qgroup *dstgroup;
1340 u32 level_size = 0; 1555 u32 level_size = 0;
1556 u64 nums;
1341 1557
1558 mutex_lock(&fs_info->qgroup_ioctl_lock);
1342 if (!fs_info->quota_enabled) 1559 if (!fs_info->quota_enabled)
1343 return 0; 1560 goto out;
1344 1561
1345 if (!quota_root) 1562 if (!quota_root) {
1346 return -EINVAL; 1563 ret = -EINVAL;
1564 goto out;
1565 }
1566
1567 if (inherit) {
1568 i_qgroups = (u64 *)(inherit + 1);
1569 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1570 2 * inherit->num_excl_copies;
1571 for (i = 0; i < nums; ++i) {
1572 srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1573 if (!srcgroup) {
1574 ret = -EINVAL;
1575 goto out;
1576 }
1577 ++i_qgroups;
1578 }
1579 }
1347 1580
1348 /* 1581 /*
1349 * create a tracking group for the subvol itself 1582 * create a tracking group for the subvol itself
@@ -1470,6 +1703,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1470unlock: 1703unlock:
1471 spin_unlock(&fs_info->qgroup_lock); 1704 spin_unlock(&fs_info->qgroup_lock);
1472out: 1705out:
1706 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1473 return ret; 1707 return ret;
1474} 1708}
1475 1709
@@ -1514,7 +1748,10 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1514 ret = -ENOMEM; 1748 ret = -ENOMEM;
1515 goto out; 1749 goto out;
1516 } 1750 }
1517 ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); 1751 ret = ulist_add(ulist, qgroup->qgroupid,
1752 (uintptr_t)qgroup, GFP_ATOMIC);
1753 if (ret < 0)
1754 goto out;
1518 ULIST_ITER_INIT(&uiter); 1755 ULIST_ITER_INIT(&uiter);
1519 while ((unode = ulist_next(ulist, &uiter))) { 1756 while ((unode = ulist_next(ulist, &uiter))) {
1520 struct btrfs_qgroup *qg; 1757 struct btrfs_qgroup *qg;
@@ -1523,25 +1760,27 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1523 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 1760 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1524 1761
1525 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 1762 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1526 qg->reserved + qg->rfer + num_bytes > 1763 qg->reserved + (s64)qg->rfer + num_bytes >
1527 qg->max_rfer) { 1764 qg->max_rfer) {
1528 ret = -EDQUOT; 1765 ret = -EDQUOT;
1529 goto out; 1766 goto out;
1530 } 1767 }
1531 1768
1532 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 1769 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1533 qg->reserved + qg->excl + num_bytes > 1770 qg->reserved + (s64)qg->excl + num_bytes >
1534 qg->max_excl) { 1771 qg->max_excl) {
1535 ret = -EDQUOT; 1772 ret = -EDQUOT;
1536 goto out; 1773 goto out;
1537 } 1774 }
1538 1775
1539 list_for_each_entry(glist, &qg->groups, next_group) { 1776 list_for_each_entry(glist, &qg->groups, next_group) {
1540 ulist_add(ulist, glist->group->qgroupid, 1777 ret = ulist_add(ulist, glist->group->qgroupid,
1541 (uintptr_t)glist->group, GFP_ATOMIC); 1778 (uintptr_t)glist->group, GFP_ATOMIC);
1779 if (ret < 0)
1780 goto out;
1542 } 1781 }
1543 } 1782 }
1544 1783 ret = 0;
1545 /* 1784 /*
1546 * no limits exceeded, now record the reservation into all qgroups 1785 * no limits exceeded, now record the reservation into all qgroups
1547 */ 1786 */
@@ -1570,6 +1809,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1570 struct ulist_node *unode; 1809 struct ulist_node *unode;
1571 struct ulist_iterator uiter; 1810 struct ulist_iterator uiter;
1572 u64 ref_root = root->root_key.objectid; 1811 u64 ref_root = root->root_key.objectid;
1812 int ret = 0;
1573 1813
1574 if (!is_fstree(ref_root)) 1814 if (!is_fstree(ref_root))
1575 return; 1815 return;
@@ -1592,7 +1832,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1592 btrfs_std_error(fs_info, -ENOMEM); 1832 btrfs_std_error(fs_info, -ENOMEM);
1593 goto out; 1833 goto out;
1594 } 1834 }
1595 ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); 1835 ret = ulist_add(ulist, qgroup->qgroupid,
1836 (uintptr_t)qgroup, GFP_ATOMIC);
1837 if (ret < 0)
1838 goto out;
1596 ULIST_ITER_INIT(&uiter); 1839 ULIST_ITER_INIT(&uiter);
1597 while ((unode = ulist_next(ulist, &uiter))) { 1840 while ((unode = ulist_next(ulist, &uiter))) {
1598 struct btrfs_qgroup *qg; 1841 struct btrfs_qgroup *qg;
@@ -1603,8 +1846,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1603 qg->reserved -= num_bytes; 1846 qg->reserved -= num_bytes;
1604 1847
1605 list_for_each_entry(glist, &qg->groups, next_group) { 1848 list_for_each_entry(glist, &qg->groups, next_group) {
1606 ulist_add(ulist, glist->group->qgroupid, 1849 ret = ulist_add(ulist, glist->group->qgroupid,
1607 (uintptr_t)glist->group, GFP_ATOMIC); 1850 (uintptr_t)glist->group, GFP_ATOMIC);
1851 if (ret < 0)
1852 goto out;
1608 } 1853 }
1609 } 1854 }
1610 1855
@@ -1617,8 +1862,265 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1617{ 1862{
1618 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 1863 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1619 return; 1864 return;
1620 printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n", 1865 pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
1621 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 1866 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1622 trans->delayed_ref_elem.seq); 1867 (u32)(trans->delayed_ref_elem.seq >> 32),
1868 (u32)trans->delayed_ref_elem.seq);
1623 BUG(); 1869 BUG();
1624} 1870}
1871
1872/*
1873 * returns < 0 on error, 0 when more leafs are to be scanned.
1874 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1875 */
1876static int
1877qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
1878 struct btrfs_trans_handle *trans, struct ulist *tmp,
1879 struct extent_buffer *scratch_leaf)
1880{
1881 struct btrfs_key found;
1882 struct btrfs_fs_info *fs_info = qscan->fs_info;
1883 struct ulist *roots = NULL;
1884 struct ulist_node *unode;
1885 struct ulist_iterator uiter;
1886 struct seq_list tree_mod_seq_elem = {};
1887 u64 seq;
1888 int slot;
1889 int ret;
1890
1891 path->leave_spinning = 1;
1892 mutex_lock(&fs_info->qgroup_rescan_lock);
1893 ret = btrfs_search_slot_for_read(fs_info->extent_root,
1894 &fs_info->qgroup_rescan_progress,
1895 path, 1, 0);
1896
1897 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1898 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
1899 fs_info->qgroup_rescan_progress.type,
1900 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
1901 ret);
1902
1903 if (ret) {
1904 /*
1905 * The rescan is about to end, we will not be scanning any
1906 * further blocks. We cannot unset the RESCAN flag here, because
1907 * we want to commit the transaction if everything went well.
1908 * To make the live accounting work in this phase, we set our
1909 * scan progress pointer such that every real extent objectid
1910 * will be smaller.
1911 */
1912 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1913 btrfs_release_path(path);
1914 mutex_unlock(&fs_info->qgroup_rescan_lock);
1915 return ret;
1916 }
1917
1918 btrfs_item_key_to_cpu(path->nodes[0], &found,
1919 btrfs_header_nritems(path->nodes[0]) - 1);
1920 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1921
1922 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1923 memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1924 slot = path->slots[0];
1925 btrfs_release_path(path);
1926 mutex_unlock(&fs_info->qgroup_rescan_lock);
1927
1928 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1929 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1930 if (found.type != BTRFS_EXTENT_ITEM_KEY)
1931 continue;
1932 ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1933 tree_mod_seq_elem.seq, &roots);
1934 if (ret < 0)
1935 goto out;
1936 spin_lock(&fs_info->qgroup_lock);
1937 seq = fs_info->qgroup_seq;
1938 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1939
1940 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1941 if (ret) {
1942 spin_unlock(&fs_info->qgroup_lock);
1943 ulist_free(roots);
1944 goto out;
1945 }
1946
1947 /*
1948 * step2 of btrfs_qgroup_account_ref works from a single root,
1949 * we're doing all at once here.
1950 */
1951 ulist_reinit(tmp);
1952 ULIST_ITER_INIT(&uiter);
1953 while ((unode = ulist_next(roots, &uiter))) {
1954 struct btrfs_qgroup *qg;
1955
1956 qg = find_qgroup_rb(fs_info, unode->val);
1957 if (!qg)
1958 continue;
1959
1960 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1961 GFP_ATOMIC);
1962 if (ret < 0) {
1963 spin_unlock(&fs_info->qgroup_lock);
1964 ulist_free(roots);
1965 goto out;
1966 }
1967 }
1968
1969 /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1970 ULIST_ITER_INIT(&uiter);
1971 while ((unode = ulist_next(tmp, &uiter))) {
1972 struct btrfs_qgroup *qg;
1973 struct btrfs_qgroup_list *glist;
1974
1975 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1976 qg->rfer += found.offset;
1977 qg->rfer_cmpr += found.offset;
1978 WARN_ON(qg->tag >= seq);
1979 if (qg->refcnt - seq == roots->nnodes) {
1980 qg->excl += found.offset;
1981 qg->excl_cmpr += found.offset;
1982 }
1983 qgroup_dirty(fs_info, qg);
1984
1985 list_for_each_entry(glist, &qg->groups, next_group) {
1986 ret = ulist_add(tmp, glist->group->qgroupid,
1987 (uintptr_t)glist->group,
1988 GFP_ATOMIC);
1989 if (ret < 0) {
1990 spin_unlock(&fs_info->qgroup_lock);
1991 ulist_free(roots);
1992 goto out;
1993 }
1994 }
1995 }
1996
1997 spin_unlock(&fs_info->qgroup_lock);
1998 ulist_free(roots);
1999 ret = 0;
2000 }
2001
2002out:
2003 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2004
2005 return ret;
2006}
2007
2008static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2009{
2010 struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
2011 work);
2012 struct btrfs_path *path;
2013 struct btrfs_trans_handle *trans = NULL;
2014 struct btrfs_fs_info *fs_info = qscan->fs_info;
2015 struct ulist *tmp = NULL;
2016 struct extent_buffer *scratch_leaf = NULL;
2017 int err = -ENOMEM;
2018
2019 path = btrfs_alloc_path();
2020 if (!path)
2021 goto out;
2022 tmp = ulist_alloc(GFP_NOFS);
2023 if (!tmp)
2024 goto out;
2025 scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2026 if (!scratch_leaf)
2027 goto out;
2028
2029 err = 0;
2030 while (!err) {
2031 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2032 if (IS_ERR(trans)) {
2033 err = PTR_ERR(trans);
2034 break;
2035 }
2036 if (!fs_info->quota_enabled) {
2037 err = -EINTR;
2038 } else {
2039 err = qgroup_rescan_leaf(qscan, path, trans,
2040 tmp, scratch_leaf);
2041 }
2042 if (err > 0)
2043 btrfs_commit_transaction(trans, fs_info->fs_root);
2044 else
2045 btrfs_end_transaction(trans, fs_info->fs_root);
2046 }
2047
2048out:
2049 kfree(scratch_leaf);
2050 ulist_free(tmp);
2051 btrfs_free_path(path);
2052 kfree(qscan);
2053
2054 mutex_lock(&fs_info->qgroup_rescan_lock);
2055 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2056
2057 if (err == 2 &&
2058 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2059 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2060 } else if (err < 0) {
2061 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2062 }
2063 mutex_unlock(&fs_info->qgroup_rescan_lock);
2064
2065 if (err >= 0) {
2066 pr_info("btrfs: qgroup scan completed%s\n",
2067 err == 2 ? " (inconsistency flag cleared)" : "");
2068 } else {
2069 pr_err("btrfs: qgroup scan failed with %d\n", err);
2070 }
2071}
2072
2073static void
2074qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
2075{
2076 memset(&qscan->work, 0, sizeof(qscan->work));
2077 qscan->work.func = btrfs_qgroup_rescan_worker;
2078 qscan->fs_info = fs_info;
2079
2080 pr_info("btrfs: qgroup scan started\n");
2081 btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
2082}
2083
2084int
2085btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2086{
2087 int ret = 0;
2088 struct rb_node *n;
2089 struct btrfs_qgroup *qgroup;
2090 struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
2091
2092 if (!qscan)
2093 return -ENOMEM;
2094
2095 mutex_lock(&fs_info->qgroup_rescan_lock);
2096 spin_lock(&fs_info->qgroup_lock);
2097 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2098 ret = -EINPROGRESS;
2099 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2100 ret = -EINVAL;
2101 if (ret) {
2102 spin_unlock(&fs_info->qgroup_lock);
2103 mutex_unlock(&fs_info->qgroup_rescan_lock);
2104 kfree(qscan);
2105 return ret;
2106 }
2107
2108 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2109 memset(&fs_info->qgroup_rescan_progress, 0,
2110 sizeof(fs_info->qgroup_rescan_progress));
2111
2112 /* clear all current qgroup tracking information */
2113 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2114 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2115 qgroup->rfer = 0;
2116 qgroup->rfer_cmpr = 0;
2117 qgroup->excl = 0;
2118 qgroup->excl_cmpr = 0;
2119 }
2120 spin_unlock(&fs_info->qgroup_lock);
2121 mutex_unlock(&fs_info->qgroup_rescan_lock);
2122
2123 qgroup_rescan_start(fs_info, qscan);
2124
2125 return 0;
2126}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 9a79fb790adb..0740621daf6c 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -410,7 +410,7 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
410/* 410/*
411 * remove everything in the cache 411 * remove everything in the cache
412 */ 412 */
413void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) 413static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
414{ 414{
415 struct btrfs_stripe_hash_table *table; 415 struct btrfs_stripe_hash_table *table;
416 unsigned long flags; 416 unsigned long flags;
@@ -1010,12 +1010,12 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
1010 * this will try to merge into existing bios if possible, and returns 1010 * this will try to merge into existing bios if possible, and returns
1011 * zero if all went well. 1011 * zero if all went well.
1012 */ 1012 */
1013int rbio_add_io_page(struct btrfs_raid_bio *rbio, 1013static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1014 struct bio_list *bio_list, 1014 struct bio_list *bio_list,
1015 struct page *page, 1015 struct page *page,
1016 int stripe_nr, 1016 int stripe_nr,
1017 unsigned long page_index, 1017 unsigned long page_index,
1018 unsigned long bio_max_len) 1018 unsigned long bio_max_len)
1019{ 1019{
1020 struct bio *last = bio_list->tail; 1020 struct bio *last = bio_list->tail;
1021 u64 last_end = 0; 1021 u64 last_end = 0;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 96b93daa0bbb..1031b69252c5 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -955,10 +955,11 @@ int btrfs_reada_wait(void *handle)
955 while (atomic_read(&rc->elems)) { 955 while (atomic_read(&rc->elems)) {
956 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, 956 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
957 5 * HZ); 957 5 * HZ);
958 dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); 958 dump_devs(rc->root->fs_info,
959 atomic_read(&rc->elems) < 10 ? 1 : 0);
959 } 960 }
960 961
961 dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); 962 dump_devs(rc->root->fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
962 963
963 kref_put(&rc->refcnt, reada_control_release); 964 kref_put(&rc->refcnt, reada_control_release);
964 965
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b67171e6d688..704a1b8d2a2b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -326,8 +326,7 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
326 return NULL; 326 return NULL;
327} 327}
328 328
329void backref_tree_panic(struct rb_node *rb_node, int errno, 329static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
330 u64 bytenr)
331{ 330{
332 331
333 struct btrfs_fs_info *fs_info = NULL; 332 struct btrfs_fs_info *fs_info = NULL;
@@ -619,10 +618,13 @@ static noinline_for_stack
619int find_inline_backref(struct extent_buffer *leaf, int slot, 618int find_inline_backref(struct extent_buffer *leaf, int slot,
620 unsigned long *ptr, unsigned long *end) 619 unsigned long *ptr, unsigned long *end)
621{ 620{
621 struct btrfs_key key;
622 struct btrfs_extent_item *ei; 622 struct btrfs_extent_item *ei;
623 struct btrfs_tree_block_info *bi; 623 struct btrfs_tree_block_info *bi;
624 u32 item_size; 624 u32 item_size;
625 625
626 btrfs_item_key_to_cpu(leaf, &key, slot);
627
626 item_size = btrfs_item_size_nr(leaf, slot); 628 item_size = btrfs_item_size_nr(leaf, slot);
627#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 629#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
628 if (item_size < sizeof(*ei)) { 630 if (item_size < sizeof(*ei)) {
@@ -634,13 +636,18 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
634 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 636 WARN_ON(!(btrfs_extent_flags(leaf, ei) &
635 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 637 BTRFS_EXTENT_FLAG_TREE_BLOCK));
636 638
637 if (item_size <= sizeof(*ei) + sizeof(*bi)) { 639 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
640 item_size <= sizeof(*ei) + sizeof(*bi)) {
638 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 641 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
639 return 1; 642 return 1;
640 } 643 }
641 644
642 bi = (struct btrfs_tree_block_info *)(ei + 1); 645 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
643 *ptr = (unsigned long)(bi + 1); 646 bi = (struct btrfs_tree_block_info *)(ei + 1);
647 *ptr = (unsigned long)(bi + 1);
648 } else {
649 *ptr = (unsigned long)(ei + 1);
650 }
644 *end = (unsigned long)ei + item_size; 651 *end = (unsigned long)ei + item_size;
645 return 0; 652 return 0;
646} 653}
@@ -708,7 +715,7 @@ again:
708 end = 0; 715 end = 0;
709 ptr = 0; 716 ptr = 0;
710 key.objectid = cur->bytenr; 717 key.objectid = cur->bytenr;
711 key.type = BTRFS_EXTENT_ITEM_KEY; 718 key.type = BTRFS_METADATA_ITEM_KEY;
712 key.offset = (u64)-1; 719 key.offset = (u64)-1;
713 720
714 path1->search_commit_root = 1; 721 path1->search_commit_root = 1;
@@ -766,7 +773,8 @@ again:
766 break; 773 break;
767 } 774 }
768 775
769 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 776 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
777 key.type == BTRFS_METADATA_ITEM_KEY) {
770 ret = find_inline_backref(eb, path1->slots[0], 778 ret = find_inline_backref(eb, path1->slots[0],
771 &ptr, &end); 779 &ptr, &end);
772 if (ret) 780 if (ret)
@@ -1762,7 +1770,11 @@ again:
1762 1770
1763 eb = read_tree_block(dest, old_bytenr, blocksize, 1771 eb = read_tree_block(dest, old_bytenr, blocksize,
1764 old_ptr_gen); 1772 old_ptr_gen);
1765 BUG_ON(!eb); 1773 if (!eb || !extent_buffer_uptodate(eb)) {
1774 ret = (!eb) ? -ENOMEM : -EIO;
1775 free_extent_buffer(eb);
1776 return ret;
1777 }
1766 btrfs_tree_lock(eb); 1778 btrfs_tree_lock(eb);
1767 if (cow) { 1779 if (cow) {
1768 ret = btrfs_cow_block(trans, dest, eb, parent, 1780 ret = btrfs_cow_block(trans, dest, eb, parent,
@@ -1915,6 +1927,10 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1915 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 1927 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1916 blocksize = btrfs_level_size(root, i - 1); 1928 blocksize = btrfs_level_size(root, i - 1);
1917 eb = read_tree_block(root, bytenr, blocksize, ptr_gen); 1929 eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1930 if (!eb || !extent_buffer_uptodate(eb)) {
1931 free_extent_buffer(eb);
1932 return -EIO;
1933 }
1918 BUG_ON(btrfs_header_level(eb) != i - 1); 1934 BUG_ON(btrfs_header_level(eb) != i - 1);
1919 path->nodes[i - 1] = eb; 1935 path->nodes[i - 1] = eb;
1920 path->slots[i - 1] = 0; 1936 path->slots[i - 1] = 0;
@@ -2592,7 +2608,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2592 blocksize = btrfs_level_size(root, node->level); 2608 blocksize = btrfs_level_size(root, node->level);
2593 generation = btrfs_node_ptr_generation(upper->eb, slot); 2609 generation = btrfs_node_ptr_generation(upper->eb, slot);
2594 eb = read_tree_block(root, bytenr, blocksize, generation); 2610 eb = read_tree_block(root, bytenr, blocksize, generation);
2595 if (!eb) { 2611 if (!eb || !extent_buffer_uptodate(eb)) {
2612 free_extent_buffer(eb);
2596 err = -EIO; 2613 err = -EIO;
2597 goto next; 2614 goto next;
2598 } 2615 }
@@ -2753,7 +2770,10 @@ static int get_tree_block_key(struct reloc_control *rc,
2753 BUG_ON(block->key_ready); 2770 BUG_ON(block->key_ready);
2754 eb = read_tree_block(rc->extent_root, block->bytenr, 2771 eb = read_tree_block(rc->extent_root, block->bytenr,
2755 block->key.objectid, block->key.offset); 2772 block->key.objectid, block->key.offset);
2756 BUG_ON(!eb); 2773 if (!eb || !extent_buffer_uptodate(eb)) {
2774 free_extent_buffer(eb);
2775 return -EIO;
2776 }
2757 WARN_ON(btrfs_header_level(eb) != block->level); 2777 WARN_ON(btrfs_header_level(eb) != block->level);
2758 if (block->level == 0) 2778 if (block->level == 0)
2759 btrfs_item_key_to_cpu(eb, &block->key, 0); 2779 btrfs_item_key_to_cpu(eb, &block->key, 0);
@@ -2768,8 +2788,13 @@ static int reada_tree_block(struct reloc_control *rc,
2768 struct tree_block *block) 2788 struct tree_block *block)
2769{ 2789{
2770 BUG_ON(block->key_ready); 2790 BUG_ON(block->key_ready);
2771 readahead_tree_block(rc->extent_root, block->bytenr, 2791 if (block->key.type == BTRFS_METADATA_ITEM_KEY)
2772 block->key.objectid, block->key.offset); 2792 readahead_tree_block(rc->extent_root, block->bytenr,
2793 block->key.objectid,
2794 rc->extent_root->leafsize);
2795 else
2796 readahead_tree_block(rc->extent_root, block->bytenr,
2797 block->key.objectid, block->key.offset);
2773 return 0; 2798 return 0;
2774} 2799}
2775 2800
@@ -2850,7 +2875,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2850 path = btrfs_alloc_path(); 2875 path = btrfs_alloc_path();
2851 if (!path) { 2876 if (!path) {
2852 err = -ENOMEM; 2877 err = -ENOMEM;
2853 goto out_path; 2878 goto out_free_blocks;
2854 } 2879 }
2855 2880
2856 rb_node = rb_first(blocks); 2881 rb_node = rb_first(blocks);
@@ -2864,8 +2889,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2864 rb_node = rb_first(blocks); 2889 rb_node = rb_first(blocks);
2865 while (rb_node) { 2890 while (rb_node) {
2866 block = rb_entry(rb_node, struct tree_block, rb_node); 2891 block = rb_entry(rb_node, struct tree_block, rb_node);
2867 if (!block->key_ready) 2892 if (!block->key_ready) {
2868 get_tree_block_key(rc, block); 2893 err = get_tree_block_key(rc, block);
2894 if (err)
2895 goto out_free_path;
2896 }
2869 rb_node = rb_next(rb_node); 2897 rb_node = rb_next(rb_node);
2870 } 2898 }
2871 2899
@@ -2892,8 +2920,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2892out: 2920out:
2893 err = finish_pending_nodes(trans, rc, path, err); 2921 err = finish_pending_nodes(trans, rc, path, err);
2894 2922
2923out_free_path:
2895 btrfs_free_path(path); 2924 btrfs_free_path(path);
2896out_path: 2925out_free_blocks:
2897 free_block_list(blocks); 2926 free_block_list(blocks);
2898 return err; 2927 return err;
2899} 2928}
@@ -2965,7 +2994,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2965 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 2994 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2966 while (1) { 2995 while (1) {
2967 write_lock(&em_tree->lock); 2996 write_lock(&em_tree->lock);
2968 ret = add_extent_mapping(em_tree, em); 2997 ret = add_extent_mapping(em_tree, em, 0);
2969 write_unlock(&em_tree->lock); 2998 write_unlock(&em_tree->lock);
2970 if (ret != -EEXIST) { 2999 if (ret != -EEXIST) {
2971 free_extent_map(em); 3000 free_extent_map(em);
@@ -3176,12 +3205,17 @@ static int add_tree_block(struct reloc_control *rc,
3176 eb = path->nodes[0]; 3205 eb = path->nodes[0];
3177 item_size = btrfs_item_size_nr(eb, path->slots[0]); 3206 item_size = btrfs_item_size_nr(eb, path->slots[0]);
3178 3207
3179 if (item_size >= sizeof(*ei) + sizeof(*bi)) { 3208 if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
3209 item_size >= sizeof(*ei) + sizeof(*bi)) {
3180 ei = btrfs_item_ptr(eb, path->slots[0], 3210 ei = btrfs_item_ptr(eb, path->slots[0],
3181 struct btrfs_extent_item); 3211 struct btrfs_extent_item);
3182 bi = (struct btrfs_tree_block_info *)(ei + 1); 3212 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
3213 bi = (struct btrfs_tree_block_info *)(ei + 1);
3214 level = btrfs_tree_block_level(eb, bi);
3215 } else {
3216 level = (int)extent_key->offset;
3217 }
3183 generation = btrfs_extent_generation(eb, ei); 3218 generation = btrfs_extent_generation(eb, ei);
3184 level = btrfs_tree_block_level(eb, bi);
3185 } else { 3219 } else {
3186#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3220#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3187 u64 ref_owner; 3221 u64 ref_owner;
@@ -3210,7 +3244,7 @@ static int add_tree_block(struct reloc_control *rc,
3210 return -ENOMEM; 3244 return -ENOMEM;
3211 3245
3212 block->bytenr = extent_key->objectid; 3246 block->bytenr = extent_key->objectid;
3213 block->key.objectid = extent_key->offset; 3247 block->key.objectid = rc->extent_root->leafsize;
3214 block->key.offset = generation; 3248 block->key.offset = generation;
3215 block->level = level; 3249 block->level = level;
3216 block->key_ready = 0; 3250 block->key_ready = 0;
@@ -3252,9 +3286,15 @@ static int __add_tree_block(struct reloc_control *rc,
3252 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 3286 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3253 if (ret < 0) 3287 if (ret < 0)
3254 goto out; 3288 goto out;
3255 BUG_ON(ret);
3256 3289
3257 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3290 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3291 if (ret > 0) {
3292 if (key.objectid == bytenr &&
3293 key.type == BTRFS_METADATA_ITEM_KEY)
3294 ret = 0;
3295 }
3296 BUG_ON(ret);
3297
3258 ret = add_tree_block(rc, &key, path, blocks); 3298 ret = add_tree_block(rc, &key, path, blocks);
3259out: 3299out:
3260 btrfs_free_path(path); 3300 btrfs_free_path(path);
@@ -3275,7 +3315,8 @@ static int block_use_full_backref(struct reloc_control *rc,
3275 return 1; 3315 return 1;
3276 3316
3277 ret = btrfs_lookup_extent_info(NULL, rc->extent_root, 3317 ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
3278 eb->start, eb->len, NULL, &flags); 3318 eb->start, btrfs_header_level(eb), 1,
3319 NULL, &flags);
3279 BUG_ON(ret); 3320 BUG_ON(ret);
3280 3321
3281 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 3322 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
@@ -3644,12 +3685,25 @@ next:
3644 break; 3685 break;
3645 } 3686 }
3646 3687
3647 if (key.type != BTRFS_EXTENT_ITEM_KEY || 3688 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3689 key.type != BTRFS_METADATA_ITEM_KEY) {
3690 path->slots[0]++;
3691 goto next;
3692 }
3693
3694 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3648 key.objectid + key.offset <= rc->search_start) { 3695 key.objectid + key.offset <= rc->search_start) {
3649 path->slots[0]++; 3696 path->slots[0]++;
3650 goto next; 3697 goto next;
3651 } 3698 }
3652 3699
3700 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3701 key.objectid + rc->extent_root->leafsize <=
3702 rc->search_start) {
3703 path->slots[0]++;
3704 goto next;
3705 }
3706
3653 ret = find_first_extent_bit(&rc->processed_blocks, 3707 ret = find_first_extent_bit(&rc->processed_blocks,
3654 key.objectid, &start, &end, 3708 key.objectid, &start, &end,
3655 EXTENT_DIRTY, NULL); 3709 EXTENT_DIRTY, NULL);
@@ -3658,7 +3712,11 @@ next:
3658 btrfs_release_path(path); 3712 btrfs_release_path(path);
3659 rc->search_start = end + 1; 3713 rc->search_start = end + 1;
3660 } else { 3714 } else {
3661 rc->search_start = key.objectid + key.offset; 3715 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3716 rc->search_start = key.objectid + key.offset;
3717 else
3718 rc->search_start = key.objectid +
3719 rc->extent_root->leafsize;
3662 memcpy(extent_key, &key, sizeof(key)); 3720 memcpy(extent_key, &key, sizeof(key));
3663 return 0; 3721 return 0;
3664 } 3722 }
@@ -4105,10 +4163,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4105 4163
4106 while (1) { 4164 while (1) {
4107 mutex_lock(&fs_info->cleaner_mutex); 4165 mutex_lock(&fs_info->cleaner_mutex);
4108
4109 btrfs_clean_old_snapshots(fs_info->tree_root);
4110 ret = relocate_block_group(rc); 4166 ret = relocate_block_group(rc);
4111
4112 mutex_unlock(&fs_info->cleaner_mutex); 4167 mutex_unlock(&fs_info->cleaner_mutex);
4113 if (ret < 0) { 4168 if (ret < 0) {
4114 err = ret; 4169 err = ret;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 668af537a3ea..5bf1ed57f178 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -29,9 +29,8 @@
29 * generation numbers as then we know the root was once mounted with an older 29 * generation numbers as then we know the root was once mounted with an older
30 * kernel that was not aware of the root item structure change. 30 * kernel that was not aware of the root item structure change.
31 */ 31 */
32void btrfs_read_root_item(struct btrfs_root *root, 32void btrfs_read_root_item(struct extent_buffer *eb, int slot,
33 struct extent_buffer *eb, int slot, 33 struct btrfs_root_item *item)
34 struct btrfs_root_item *item)
35{ 34{
36 uuid_le uuid; 35 uuid_le uuid;
37 int len; 36 int len;
@@ -104,7 +103,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
104 goto out; 103 goto out;
105 } 104 }
106 if (item) 105 if (item)
107 btrfs_read_root_item(root, l, slot, item); 106 btrfs_read_root_item(l, slot, item);
108 if (key) 107 if (key)
109 memcpy(key, &found_key, sizeof(found_key)); 108 memcpy(key, &found_key, sizeof(found_key));
110 109
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 85e072b956d5..f489e24659a4 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1336,7 +1336,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1336 int page_num; 1336 int page_num;
1337 u8 calculated_csum[BTRFS_CSUM_SIZE]; 1337 u8 calculated_csum[BTRFS_CSUM_SIZE];
1338 u32 crc = ~(u32)0; 1338 u32 crc = ~(u32)0;
1339 struct btrfs_root *root = fs_info->extent_root;
1340 void *mapped_buffer; 1339 void *mapped_buffer;
1341 1340
1342 WARN_ON(!sblock->pagev[0]->page); 1341 WARN_ON(!sblock->pagev[0]->page);
@@ -1365,12 +1364,11 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1365 1364
1366 for (page_num = 0;;) { 1365 for (page_num = 0;;) {
1367 if (page_num == 0 && is_metadata) 1366 if (page_num == 0 && is_metadata)
1368 crc = btrfs_csum_data(root, 1367 crc = btrfs_csum_data(
1369 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE, 1368 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1370 crc, PAGE_SIZE - BTRFS_CSUM_SIZE); 1369 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1371 else 1370 else
1372 crc = btrfs_csum_data(root, mapped_buffer, crc, 1371 crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
1373 PAGE_SIZE);
1374 1372
1375 kunmap_atomic(mapped_buffer); 1373 kunmap_atomic(mapped_buffer);
1376 page_num++; 1374 page_num++;
@@ -1657,7 +1655,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1657 void *buffer; 1655 void *buffer;
1658 u32 crc = ~(u32)0; 1656 u32 crc = ~(u32)0;
1659 int fail = 0; 1657 int fail = 0;
1660 struct btrfs_root *root = sctx->dev_root;
1661 u64 len; 1658 u64 len;
1662 int index; 1659 int index;
1663 1660
@@ -1674,7 +1671,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1674 for (;;) { 1671 for (;;) {
1675 u64 l = min_t(u64, len, PAGE_SIZE); 1672 u64 l = min_t(u64, len, PAGE_SIZE);
1676 1673
1677 crc = btrfs_csum_data(root, buffer, crc, l); 1674 crc = btrfs_csum_data(buffer, crc, l);
1678 kunmap_atomic(buffer); 1675 kunmap_atomic(buffer);
1679 len -= l; 1676 len -= l;
1680 if (len == 0) 1677 if (len == 0)
@@ -1744,7 +1741,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1744 for (;;) { 1741 for (;;) {
1745 u64 l = min_t(u64, len, mapped_size); 1742 u64 l = min_t(u64, len, mapped_size);
1746 1743
1747 crc = btrfs_csum_data(root, p, crc, l); 1744 crc = btrfs_csum_data(p, crc, l);
1748 kunmap_atomic(mapped_buffer); 1745 kunmap_atomic(mapped_buffer);
1749 len -= l; 1746 len -= l;
1750 if (len == 0) 1747 if (len == 0)
@@ -1805,7 +1802,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1805 for (;;) { 1802 for (;;) {
1806 u64 l = min_t(u64, len, mapped_size); 1803 u64 l = min_t(u64, len, mapped_size);
1807 1804
1808 crc = btrfs_csum_data(root, p, crc, l); 1805 crc = btrfs_csum_data(p, crc, l);
1809 kunmap_atomic(mapped_buffer); 1806 kunmap_atomic(mapped_buffer);
1810 len -= l; 1807 len -= l;
1811 if (len == 0) 1808 if (len == 0)
@@ -2236,12 +2233,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2236 u64 flags; 2233 u64 flags;
2237 int ret; 2234 int ret;
2238 int slot; 2235 int slot;
2239 int i;
2240 u64 nstripes; 2236 u64 nstripes;
2241 struct extent_buffer *l; 2237 struct extent_buffer *l;
2242 struct btrfs_key key; 2238 struct btrfs_key key;
2243 u64 physical; 2239 u64 physical;
2244 u64 logical; 2240 u64 logical;
2241 u64 logic_end;
2245 u64 generation; 2242 u64 generation;
2246 int mirror_num; 2243 int mirror_num;
2247 struct reada_control *reada1; 2244 struct reada_control *reada1;
@@ -2255,6 +2252,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2255 u64 extent_len; 2252 u64 extent_len;
2256 struct btrfs_device *extent_dev; 2253 struct btrfs_device *extent_dev;
2257 int extent_mirror_num; 2254 int extent_mirror_num;
2255 int stop_loop;
2258 2256
2259 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 2257 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2260 BTRFS_BLOCK_GROUP_RAID6)) { 2258 BTRFS_BLOCK_GROUP_RAID6)) {
@@ -2315,8 +2313,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2315 key_start.type = BTRFS_EXTENT_ITEM_KEY; 2313 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2316 key_start.offset = (u64)0; 2314 key_start.offset = (u64)0;
2317 key_end.objectid = base + offset + nstripes * increment; 2315 key_end.objectid = base + offset + nstripes * increment;
2318 key_end.type = BTRFS_EXTENT_ITEM_KEY; 2316 key_end.type = BTRFS_METADATA_ITEM_KEY;
2319 key_end.offset = (u64)0; 2317 key_end.offset = (u64)-1;
2320 reada1 = btrfs_reada_add(root, &key_start, &key_end); 2318 reada1 = btrfs_reada_add(root, &key_start, &key_end);
2321 2319
2322 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 2320 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -2354,8 +2352,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2354 */ 2352 */
2355 logical = base + offset; 2353 logical = base + offset;
2356 physical = map->stripes[num].physical; 2354 physical = map->stripes[num].physical;
2355 logic_end = logical + increment * nstripes;
2357 ret = 0; 2356 ret = 0;
2358 for (i = 0; i < nstripes; ++i) { 2357 while (logical < logic_end) {
2359 /* 2358 /*
2360 * canceled? 2359 * canceled?
2361 */ 2360 */
@@ -2391,19 +2390,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2391 wake_up(&fs_info->scrub_pause_wait); 2390 wake_up(&fs_info->scrub_pause_wait);
2392 } 2391 }
2393 2392
2394 ret = btrfs_lookup_csums_range(csum_root, logical,
2395 logical + map->stripe_len - 1,
2396 &sctx->csum_list, 1);
2397 if (ret)
2398 goto out;
2399
2400 key.objectid = logical; 2393 key.objectid = logical;
2401 key.type = BTRFS_EXTENT_ITEM_KEY; 2394 key.type = BTRFS_EXTENT_ITEM_KEY;
2402 key.offset = (u64)0; 2395 key.offset = (u64)-1;
2403 2396
2404 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2397 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2405 if (ret < 0) 2398 if (ret < 0)
2406 goto out; 2399 goto out;
2400
2407 if (ret > 0) { 2401 if (ret > 0) {
2408 ret = btrfs_previous_item(root, path, 0, 2402 ret = btrfs_previous_item(root, path, 0,
2409 BTRFS_EXTENT_ITEM_KEY); 2403 BTRFS_EXTENT_ITEM_KEY);
@@ -2420,7 +2414,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2420 } 2414 }
2421 } 2415 }
2422 2416
2417 stop_loop = 0;
2423 while (1) { 2418 while (1) {
2419 u64 bytes;
2420
2424 l = path->nodes[0]; 2421 l = path->nodes[0];
2425 slot = path->slots[0]; 2422 slot = path->slots[0];
2426 if (slot >= btrfs_header_nritems(l)) { 2423 if (slot >= btrfs_header_nritems(l)) {
@@ -2430,19 +2427,30 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2430 if (ret < 0) 2427 if (ret < 0)
2431 goto out; 2428 goto out;
2432 2429
2430 stop_loop = 1;
2433 break; 2431 break;
2434 } 2432 }
2435 btrfs_item_key_to_cpu(l, &key, slot); 2433 btrfs_item_key_to_cpu(l, &key, slot);
2436 2434
2437 if (key.objectid + key.offset <= logical) 2435 if (key.type == BTRFS_METADATA_ITEM_KEY)
2438 goto next; 2436 bytes = root->leafsize;
2437 else
2438 bytes = key.offset;
2439 2439
2440 if (key.objectid >= logical + map->stripe_len) 2440 if (key.objectid + bytes <= logical)
2441 break; 2441 goto next;
2442 2442
2443 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) 2443 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2444 key.type != BTRFS_METADATA_ITEM_KEY)
2444 goto next; 2445 goto next;
2445 2446
2447 if (key.objectid >= logical + map->stripe_len) {
2448 /* out of this device extent */
2449 if (key.objectid >= logic_end)
2450 stop_loop = 1;
2451 break;
2452 }
2453
2446 extent = btrfs_item_ptr(l, slot, 2454 extent = btrfs_item_ptr(l, slot,
2447 struct btrfs_extent_item); 2455 struct btrfs_extent_item);
2448 flags = btrfs_extent_flags(l, extent); 2456 flags = btrfs_extent_flags(l, extent);
@@ -2458,22 +2466,24 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2458 goto next; 2466 goto next;
2459 } 2467 }
2460 2468
2469again:
2470 extent_logical = key.objectid;
2471 extent_len = bytes;
2472
2461 /* 2473 /*
2462 * trim extent to this stripe 2474 * trim extent to this stripe
2463 */ 2475 */
2464 if (key.objectid < logical) { 2476 if (extent_logical < logical) {
2465 key.offset -= logical - key.objectid; 2477 extent_len -= logical - extent_logical;
2466 key.objectid = logical; 2478 extent_logical = logical;
2467 } 2479 }
2468 if (key.objectid + key.offset > 2480 if (extent_logical + extent_len >
2469 logical + map->stripe_len) { 2481 logical + map->stripe_len) {
2470 key.offset = logical + map->stripe_len - 2482 extent_len = logical + map->stripe_len -
2471 key.objectid; 2483 extent_logical;
2472 } 2484 }
2473 2485
2474 extent_logical = key.objectid; 2486 extent_physical = extent_logical - logical + physical;
2475 extent_physical = key.objectid - logical + physical;
2476 extent_len = key.offset;
2477 extent_dev = scrub_dev; 2487 extent_dev = scrub_dev;
2478 extent_mirror_num = mirror_num; 2488 extent_mirror_num = mirror_num;
2479 if (is_dev_replace) 2489 if (is_dev_replace)
@@ -2481,13 +2491,35 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2481 extent_len, &extent_physical, 2491 extent_len, &extent_physical,
2482 &extent_dev, 2492 &extent_dev,
2483 &extent_mirror_num); 2493 &extent_mirror_num);
2494
2495 ret = btrfs_lookup_csums_range(csum_root, logical,
2496 logical + map->stripe_len - 1,
2497 &sctx->csum_list, 1);
2498 if (ret)
2499 goto out;
2500
2484 ret = scrub_extent(sctx, extent_logical, extent_len, 2501 ret = scrub_extent(sctx, extent_logical, extent_len,
2485 extent_physical, extent_dev, flags, 2502 extent_physical, extent_dev, flags,
2486 generation, extent_mirror_num, 2503 generation, extent_mirror_num,
2487 key.objectid - logical + physical); 2504 extent_physical);
2488 if (ret) 2505 if (ret)
2489 goto out; 2506 goto out;
2490 2507
2508 if (extent_logical + extent_len <
2509 key.objectid + bytes) {
2510 logical += increment;
2511 physical += map->stripe_len;
2512
2513 if (logical < key.objectid + bytes) {
2514 cond_resched();
2515 goto again;
2516 }
2517
2518 if (logical >= logic_end) {
2519 stop_loop = 1;
2520 break;
2521 }
2522 }
2491next: 2523next:
2492 path->slots[0]++; 2524 path->slots[0]++;
2493 } 2525 }
@@ -2495,8 +2527,14 @@ next:
2495 logical += increment; 2527 logical += increment;
2496 physical += map->stripe_len; 2528 physical += map->stripe_len;
2497 spin_lock(&sctx->stat_lock); 2529 spin_lock(&sctx->stat_lock);
2498 sctx->stat.last_physical = physical; 2530 if (stop_loop)
2531 sctx->stat.last_physical = map->stripes[num].physical +
2532 length;
2533 else
2534 sctx->stat.last_physical = physical;
2499 spin_unlock(&sctx->stat_lock); 2535 spin_unlock(&sctx->stat_lock);
2536 if (stop_loop)
2537 break;
2500 } 2538 }
2501out: 2539out:
2502 /* push queued extents */ 2540 /* push queued extents */
@@ -3005,28 +3043,6 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
3005 return 0; 3043 return 0;
3006} 3044}
3007 3045
3008int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
3009{
3010 struct btrfs_fs_info *fs_info = root->fs_info;
3011 struct btrfs_device *dev;
3012 int ret;
3013
3014 /*
3015 * we have to hold the device_list_mutex here so the device
3016 * does not go away in cancel_dev. FIXME: find a better solution
3017 */
3018 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3019 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
3020 if (!dev) {
3021 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3022 return -ENODEV;
3023 }
3024 ret = btrfs_scrub_cancel_dev(fs_info, dev);
3025 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3026
3027 return ret;
3028}
3029
3030int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, 3046int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3031 struct btrfs_scrub_progress *progress) 3047 struct btrfs_scrub_progress *progress)
3032{ 3048{
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c85e7c6b4598..ff40f1c00ce3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -387,7 +387,7 @@ static struct btrfs_path *alloc_path_for_send(void)
387 return path; 387 return path;
388} 388}
389 389
390int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) 390static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
391{ 391{
392 int ret; 392 int ret;
393 mm_segment_t old_fs; 393 mm_segment_t old_fs;
@@ -3479,7 +3479,6 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
3479 struct send_ctx *sctx = ctx; 3479 struct send_ctx *sctx = ctx;
3480 char *found_data = NULL; 3480 char *found_data = NULL;
3481 int found_data_len = 0; 3481 int found_data_len = 0;
3482 struct fs_path *p = NULL;
3483 3482
3484 ret = find_xattr(sctx, sctx->parent_root, sctx->right_path, 3483 ret = find_xattr(sctx, sctx->parent_root, sctx->right_path,
3485 sctx->cmp_key, name, name_len, &found_data, 3484 sctx->cmp_key, name, name_len, &found_data,
@@ -3498,7 +3497,6 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
3498 } 3497 }
3499 3498
3500 kfree(found_data); 3499 kfree(found_data);
3501 fs_path_free(sctx, p);
3502 return ret; 3500 return ret;
3503} 3501}
3504 3502
@@ -4529,9 +4527,11 @@ static int send_subvol(struct send_ctx *sctx)
4529{ 4527{
4530 int ret; 4528 int ret;
4531 4529
4532 ret = send_header(sctx); 4530 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
4533 if (ret < 0) 4531 ret = send_header(sctx);
4534 goto out; 4532 if (ret < 0)
4533 goto out;
4534 }
4535 4535
4536 ret = send_subvol_begin(sctx); 4536 ret = send_subvol_begin(sctx);
4537 if (ret < 0) 4537 if (ret < 0)
@@ -4593,7 +4593,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4593 goto out; 4593 goto out;
4594 } 4594 }
4595 4595
4596 if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) { 4596 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
4597 ret = -EINVAL; 4597 ret = -EINVAL;
4598 goto out; 4598 goto out;
4599 } 4599 }
@@ -4612,8 +4612,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4612 sctx->flags = arg->flags; 4612 sctx->flags = arg->flags;
4613 4613
4614 sctx->send_filp = fget(arg->send_fd); 4614 sctx->send_filp = fget(arg->send_fd);
4615 if (IS_ERR(sctx->send_filp)) { 4615 if (!sctx->send_filp) {
4616 ret = PTR_ERR(sctx->send_filp); 4616 ret = -EBADF;
4617 goto out; 4617 goto out;
4618 } 4618 }
4619 4619
@@ -4704,12 +4704,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4704 if (ret < 0) 4704 if (ret < 0)
4705 goto out; 4705 goto out;
4706 4706
4707 ret = begin_cmd(sctx, BTRFS_SEND_C_END); 4707 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
4708 if (ret < 0) 4708 ret = begin_cmd(sctx, BTRFS_SEND_C_END);
4709 goto out; 4709 if (ret < 0)
4710 ret = send_cmd(sctx); 4710 goto out;
4711 if (ret < 0) 4711 ret = send_cmd(sctx);
4712 goto out; 4712 if (ret < 0)
4713 goto out;
4714 }
4713 4715
4714out: 4716out:
4715 kfree(arg); 4717 kfree(arg);
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 8bb18f7ccaa6..48d425aef05b 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -131,5 +131,4 @@ enum {
131 131
132#ifdef __KERNEL__ 132#ifdef __KERNEL__
133long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); 133long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
134int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off);
135#endif 134#endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f6b88595f858..a4807ced23cc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -56,6 +56,7 @@
56#include "compression.h" 56#include "compression.h"
57#include "rcu-string.h" 57#include "rcu-string.h"
58#include "dev-replace.h" 58#include "dev-replace.h"
59#include "free-space-cache.h"
59 60
60#define CREATE_TRACE_POINTS 61#define CREATE_TRACE_POINTS
61#include <trace/events/btrfs.h> 62#include <trace/events/btrfs.h>
@@ -63,9 +64,9 @@
63static const struct super_operations btrfs_super_ops; 64static const struct super_operations btrfs_super_ops;
64static struct file_system_type btrfs_fs_type; 65static struct file_system_type btrfs_fs_type;
65 66
66static const char *btrfs_decode_error(int errno, char nbuf[16]) 67static const char *btrfs_decode_error(int errno)
67{ 68{
68 char *errstr = NULL; 69 char *errstr = "unknown";
69 70
70 switch (errno) { 71 switch (errno) {
71 case -EIO: 72 case -EIO:
@@ -80,18 +81,18 @@ static const char *btrfs_decode_error(int errno, char nbuf[16])
80 case -EEXIST: 81 case -EEXIST:
81 errstr = "Object already exists"; 82 errstr = "Object already exists";
82 break; 83 break;
83 default: 84 case -ENOSPC:
84 if (nbuf) { 85 errstr = "No space left";
85 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 86 break;
86 errstr = nbuf; 87 case -ENOENT:
87 } 88 errstr = "No such entry";
88 break; 89 break;
89 } 90 }
90 91
91 return errstr; 92 return errstr;
92} 93}
93 94
94static void __save_error_info(struct btrfs_fs_info *fs_info) 95static void save_error_info(struct btrfs_fs_info *fs_info)
95{ 96{
96 /* 97 /*
97 * today we only save the error info into ram. Long term we'll 98 * today we only save the error info into ram. Long term we'll
@@ -100,11 +101,6 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
100 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); 101 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
101} 102}
102 103
103static void save_error_info(struct btrfs_fs_info *fs_info)
104{
105 __save_error_info(fs_info);
106}
107
108/* btrfs handle error by forcing the filesystem readonly */ 104/* btrfs handle error by forcing the filesystem readonly */
109static void btrfs_handle_error(struct btrfs_fs_info *fs_info) 105static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
110{ 106{
@@ -115,7 +111,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
115 111
116 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 112 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
117 sb->s_flags |= MS_RDONLY; 113 sb->s_flags |= MS_RDONLY;
118 printk(KERN_INFO "btrfs is forced readonly\n"); 114 btrfs_info(fs_info, "forced readonly");
119 /* 115 /*
120 * Note that a running device replace operation is not 116 * Note that a running device replace operation is not
121 * canceled here although there is no way to update 117 * canceled here although there is no way to update
@@ -126,7 +122,6 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
126 * mounted writeable again, the device replace 122 * mounted writeable again, the device replace
127 * operation continues. 123 * operation continues.
128 */ 124 */
129// WARN_ON(1);
130 } 125 }
131} 126}
132 127
@@ -139,7 +134,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
139 unsigned int line, int errno, const char *fmt, ...) 134 unsigned int line, int errno, const char *fmt, ...)
140{ 135{
141 struct super_block *sb = fs_info->sb; 136 struct super_block *sb = fs_info->sb;
142 char nbuf[16];
143 const char *errstr; 137 const char *errstr;
144 138
145 /* 139 /*
@@ -149,7 +143,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
149 if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) 143 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
150 return; 144 return;
151 145
152 errstr = btrfs_decode_error(errno, nbuf); 146 errstr = btrfs_decode_error(errno);
153 if (fmt) { 147 if (fmt) {
154 struct va_format vaf; 148 struct va_format vaf;
155 va_list args; 149 va_list args;
@@ -158,19 +152,18 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
158 vaf.fmt = fmt; 152 vaf.fmt = fmt;
159 vaf.va = &args; 153 vaf.va = &args;
160 154
161 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n", 155 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s (%pV)\n",
162 sb->s_id, function, line, errstr, &vaf); 156 sb->s_id, function, line, errno, errstr, &vaf);
163 va_end(args); 157 va_end(args);
164 } else { 158 } else {
165 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", 159 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s\n",
166 sb->s_id, function, line, errstr); 160 sb->s_id, function, line, errno, errstr);
167 } 161 }
168 162
169 /* Don't go through full error handling during mount */ 163 /* Don't go through full error handling during mount */
170 if (sb->s_flags & MS_BORN) { 164 save_error_info(fs_info);
171 save_error_info(fs_info); 165 if (sb->s_flags & MS_BORN)
172 btrfs_handle_error(fs_info); 166 btrfs_handle_error(fs_info);
173 }
174} 167}
175 168
176static const char * const logtypes[] = { 169static const char * const logtypes[] = {
@@ -184,7 +177,7 @@ static const char * const logtypes[] = {
184 "debug", 177 "debug",
185}; 178};
186 179
187void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) 180void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
188{ 181{
189 struct super_block *sb = fs_info->sb; 182 struct super_block *sb = fs_info->sb;
190 char lvl[4]; 183 char lvl[4];
@@ -208,7 +201,7 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
208 vaf.fmt = fmt; 201 vaf.fmt = fmt;
209 vaf.va = &args; 202 vaf.va = &args;
210 203
211 printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf); 204 printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
212 205
213 va_end(args); 206 va_end(args);
214} 207}
@@ -252,18 +245,24 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
252 struct btrfs_root *root, const char *function, 245 struct btrfs_root *root, const char *function,
253 unsigned int line, int errno) 246 unsigned int line, int errno)
254{ 247{
255 WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted\n"); 248 /*
249 * Report first abort since mount
250 */
251 if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
252 &root->fs_info->fs_state)) {
253 WARN(1, KERN_DEBUG "btrfs: Transaction aborted (error %d)\n",
254 errno);
255 }
256 trans->aborted = errno; 256 trans->aborted = errno;
257 /* Nothing used. The other threads that have joined this 257 /* Nothing used. The other threads that have joined this
258 * transaction may be able to continue. */ 258 * transaction may be able to continue. */
259 if (!trans->blocks_used) { 259 if (!trans->blocks_used) {
260 char nbuf[16];
261 const char *errstr; 260 const char *errstr;
262 261
263 errstr = btrfs_decode_error(errno, nbuf); 262 errstr = btrfs_decode_error(errno);
264 btrfs_printk(root->fs_info, 263 btrfs_warn(root->fs_info,
265 "%s:%d: Aborting unused transaction(%s).\n", 264 "%s:%d: Aborting unused transaction(%s).",
266 function, line, errstr); 265 function, line, errstr);
267 return; 266 return;
268 } 267 }
269 ACCESS_ONCE(trans->transaction->aborted) = errno; 268 ACCESS_ONCE(trans->transaction->aborted) = errno;
@@ -276,7 +275,6 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
276void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, 275void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
277 unsigned int line, int errno, const char *fmt, ...) 276 unsigned int line, int errno, const char *fmt, ...)
278{ 277{
279 char nbuf[16];
280 char *s_id = "<unknown>"; 278 char *s_id = "<unknown>";
281 const char *errstr; 279 const char *errstr;
282 struct va_format vaf = { .fmt = fmt }; 280 struct va_format vaf = { .fmt = fmt };
@@ -288,13 +286,13 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
288 va_start(args, fmt); 286 va_start(args, fmt);
289 vaf.va = &args; 287 vaf.va = &args;
290 288
291 errstr = btrfs_decode_error(errno, nbuf); 289 errstr = btrfs_decode_error(errno);
292 if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)) 290 if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
293 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n", 291 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
294 s_id, function, line, &vaf, errstr); 292 s_id, function, line, &vaf, errno, errstr);
295 293
296 printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n", 294 printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
297 s_id, function, line, &vaf, errstr); 295 s_id, function, line, &vaf, errno, errstr);
298 va_end(args); 296 va_end(args);
299 /* Caller calls BUG() */ 297 /* Caller calls BUG() */
300} 298}
@@ -650,7 +648,7 @@ out:
650 */ 648 */
651static int btrfs_parse_early_options(const char *options, fmode_t flags, 649static int btrfs_parse_early_options(const char *options, fmode_t flags,
652 void *holder, char **subvol_name, u64 *subvol_objectid, 650 void *holder, char **subvol_name, u64 *subvol_objectid,
653 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) 651 struct btrfs_fs_devices **fs_devices)
654{ 652{
655 substring_t args[MAX_OPT_ARGS]; 653 substring_t args[MAX_OPT_ARGS];
656 char *device_name, *opts, *orig, *p; 654 char *device_name, *opts, *orig, *p;
@@ -693,16 +691,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
693 } 691 }
694 break; 692 break;
695 case Opt_subvolrootid: 693 case Opt_subvolrootid:
696 intarg = 0; 694 printk(KERN_WARNING
697 error = match_int(&args[0], &intarg); 695 "btrfs: 'subvolrootid' mount option is deprecated and has no effect\n");
698 if (!error) {
699 /* we want the original fs_tree */
700 if (!intarg)
701 *subvol_rootid =
702 BTRFS_FS_TREE_OBJECTID;
703 else
704 *subvol_rootid = intarg;
705 }
706 break; 696 break;
707 case Opt_device: 697 case Opt_device:
708 device_name = match_strdup(&args[0]); 698 device_name = match_strdup(&args[0]);
@@ -876,7 +866,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
876 return 0; 866 return 0;
877 } 867 }
878 868
879 btrfs_wait_ordered_extents(root, 0); 869 btrfs_wait_ordered_extents(root, 1);
880 870
881 trans = btrfs_attach_transaction_barrier(root); 871 trans = btrfs_attach_transaction_barrier(root);
882 if (IS_ERR(trans)) { 872 if (IS_ERR(trans)) {
@@ -1080,7 +1070,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1080 fmode_t mode = FMODE_READ; 1070 fmode_t mode = FMODE_READ;
1081 char *subvol_name = NULL; 1071 char *subvol_name = NULL;
1082 u64 subvol_objectid = 0; 1072 u64 subvol_objectid = 0;
1083 u64 subvol_rootid = 0;
1084 int error = 0; 1073 int error = 0;
1085 1074
1086 if (!(flags & MS_RDONLY)) 1075 if (!(flags & MS_RDONLY))
@@ -1088,7 +1077,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1088 1077
1089 error = btrfs_parse_early_options(data, mode, fs_type, 1078 error = btrfs_parse_early_options(data, mode, fs_type,
1090 &subvol_name, &subvol_objectid, 1079 &subvol_name, &subvol_objectid,
1091 &subvol_rootid, &fs_devices); 1080 &fs_devices);
1092 if (error) { 1081 if (error) {
1093 kfree(subvol_name); 1082 kfree(subvol_name);
1094 return ERR_PTR(error); 1083 return ERR_PTR(error);
@@ -1202,11 +1191,14 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1202 new_pool_size); 1191 new_pool_size);
1203} 1192}
1204 1193
1205static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info, 1194static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
1206 unsigned long old_opts, int flags)
1207{ 1195{
1208 set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); 1196 set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1197}
1209 1198
1199static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
1200 unsigned long old_opts, int flags)
1201{
1210 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && 1202 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1211 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || 1203 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1212 (flags & MS_RDONLY))) { 1204 (flags & MS_RDONLY))) {
@@ -1247,7 +1239,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1247 unsigned int old_metadata_ratio = fs_info->metadata_ratio; 1239 unsigned int old_metadata_ratio = fs_info->metadata_ratio;
1248 int ret; 1240 int ret;
1249 1241
1250 btrfs_remount_prepare(fs_info, old_opts, *flags); 1242 btrfs_remount_prepare(fs_info);
1251 1243
1252 ret = btrfs_parse_options(root, data); 1244 ret = btrfs_parse_options(root, data);
1253 if (ret) { 1245 if (ret) {
@@ -1255,6 +1247,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1255 goto restore; 1247 goto restore;
1256 } 1248 }
1257 1249
1250 btrfs_remount_begin(fs_info, old_opts, *flags);
1258 btrfs_resize_thread_pool(fs_info, 1251 btrfs_resize_thread_pool(fs_info,
1259 fs_info->thread_pool_size, old_thread_pool_size); 1252 fs_info->thread_pool_size, old_thread_pool_size);
1260 1253
@@ -1739,6 +1732,10 @@ static int __init init_btrfs_fs(void)
1739 1732
1740 btrfs_init_lockdep(); 1733 btrfs_init_lockdep();
1741 1734
1735#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
1736 btrfs_test_free_space_cache();
1737#endif
1738
1742 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); 1739 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
1743 return 0; 1740 return 0;
1744 1741
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 50767bbaad6c..0544587d74f4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -34,7 +34,7 @@
34 34
35#define BTRFS_ROOT_TRANS_TAG 0 35#define BTRFS_ROOT_TRANS_TAG 0
36 36
37void put_transaction(struct btrfs_transaction *transaction) 37static void put_transaction(struct btrfs_transaction *transaction)
38{ 38{
39 WARN_ON(atomic_read(&transaction->use_count) == 0); 39 WARN_ON(atomic_read(&transaction->use_count) == 0);
40 if (atomic_dec_and_test(&transaction->use_count)) { 40 if (atomic_dec_and_test(&transaction->use_count)) {
@@ -162,7 +162,7 @@ loop:
162 if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) 162 if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
163 WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " 163 WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
164 "creating a fresh transaction\n"); 164 "creating a fresh transaction\n");
165 atomic_set(&fs_info->tree_mod_seq, 0); 165 atomic64_set(&fs_info->tree_mod_seq, 0);
166 166
167 spin_lock_init(&cur_trans->commit_lock); 167 spin_lock_init(&cur_trans->commit_lock);
168 spin_lock_init(&cur_trans->delayed_refs.lock); 168 spin_lock_init(&cur_trans->delayed_refs.lock);
@@ -707,23 +707,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
707int btrfs_end_transaction(struct btrfs_trans_handle *trans, 707int btrfs_end_transaction(struct btrfs_trans_handle *trans,
708 struct btrfs_root *root) 708 struct btrfs_root *root)
709{ 709{
710 int ret; 710 return __btrfs_end_transaction(trans, root, 0);
711
712 ret = __btrfs_end_transaction(trans, root, 0);
713 if (ret)
714 return ret;
715 return 0;
716} 711}
717 712
718int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 713int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
719 struct btrfs_root *root) 714 struct btrfs_root *root)
720{ 715{
721 int ret; 716 return __btrfs_end_transaction(trans, root, 1);
722
723 ret = __btrfs_end_transaction(trans, root, 1);
724 if (ret)
725 return ret;
726 return 0;
727} 717}
728 718
729int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, 719int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
@@ -948,7 +938,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
948int btrfs_add_dead_root(struct btrfs_root *root) 938int btrfs_add_dead_root(struct btrfs_root *root)
949{ 939{
950 spin_lock(&root->fs_info->trans_lock); 940 spin_lock(&root->fs_info->trans_lock);
951 list_add(&root->root_list, &root->fs_info->dead_roots); 941 list_add_tail(&root->root_list, &root->fs_info->dead_roots);
952 spin_unlock(&root->fs_info->trans_lock); 942 spin_unlock(&root->fs_info->trans_lock);
953 return 0; 943 return 0;
954} 944}
@@ -1179,13 +1169,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1179 memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); 1169 memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
1180 memcpy(new_root_item->parent_uuid, root->root_item.uuid, 1170 memcpy(new_root_item->parent_uuid, root->root_item.uuid,
1181 BTRFS_UUID_SIZE); 1171 BTRFS_UUID_SIZE);
1172 if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
1173 memset(new_root_item->received_uuid, 0,
1174 sizeof(new_root_item->received_uuid));
1175 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1176 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
1177 btrfs_set_root_stransid(new_root_item, 0);
1178 btrfs_set_root_rtransid(new_root_item, 0);
1179 }
1182 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); 1180 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
1183 new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); 1181 new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
1184 btrfs_set_root_otransid(new_root_item, trans->transid); 1182 btrfs_set_root_otransid(new_root_item, trans->transid);
1185 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1186 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
1187 btrfs_set_root_stransid(new_root_item, 0);
1188 btrfs_set_root_rtransid(new_root_item, 0);
1189 1183
1190 old = btrfs_lock_root_node(root); 1184 old = btrfs_lock_root_node(root);
1191 ret = btrfs_cow_block(trans, root, old, NULL, 0, &old); 1185 ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
@@ -1487,6 +1481,10 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
1487 current->journal_info = NULL; 1481 current->journal_info = NULL;
1488 1482
1489 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1483 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1484
1485 spin_lock(&root->fs_info->trans_lock);
1486 root->fs_info->trans_no_join = 0;
1487 spin_unlock(&root->fs_info->trans_lock);
1490} 1488}
1491 1489
1492static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, 1490static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
@@ -1808,7 +1806,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1808 ret = btrfs_write_and_wait_transaction(trans, root); 1806 ret = btrfs_write_and_wait_transaction(trans, root);
1809 if (ret) { 1807 if (ret) {
1810 btrfs_error(root->fs_info, ret, 1808 btrfs_error(root->fs_info, ret,
1811 "Error while writing out transaction."); 1809 "Error while writing out transaction");
1812 mutex_unlock(&root->fs_info->tree_log_mutex); 1810 mutex_unlock(&root->fs_info->tree_log_mutex);
1813 goto cleanup_transaction; 1811 goto cleanup_transaction;
1814 } 1812 }
@@ -1864,8 +1862,7 @@ cleanup_transaction:
1864 btrfs_qgroup_free(root, trans->qgroup_reserved); 1862 btrfs_qgroup_free(root, trans->qgroup_reserved);
1865 trans->qgroup_reserved = 0; 1863 trans->qgroup_reserved = 0;
1866 } 1864 }
1867 btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); 1865 btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
1868// WARN_ON(1);
1869 if (current->journal_info == trans) 1866 if (current->journal_info == trans)
1870 current->journal_info = NULL; 1867 current->journal_info = NULL;
1871 cleanup_transaction(trans, root, ret); 1868 cleanup_transaction(trans, root, ret);
@@ -1874,31 +1871,49 @@ cleanup_transaction:
1874} 1871}
1875 1872
1876/* 1873/*
1877 * interface function to delete all the snapshots we have scheduled for deletion 1874 * return < 0 if error
1875 * 0 if there are no more dead_roots at the time of call
1876 * 1 there are more to be processed, call me again
1877 *
1878 * The return value indicates there are certainly more snapshots to delete, but
1879 * if there comes a new one during processing, it may return 0. We don't mind,
1880 * because btrfs_commit_super will poke cleaner thread and it will process it a
1881 * few seconds later.
1878 */ 1882 */
1879int btrfs_clean_old_snapshots(struct btrfs_root *root) 1883int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1880{ 1884{
1881 LIST_HEAD(list); 1885 int ret;
1882 struct btrfs_fs_info *fs_info = root->fs_info; 1886 struct btrfs_fs_info *fs_info = root->fs_info;
1883 1887
1888 if (fs_info->sb->s_flags & MS_RDONLY) {
1889 pr_debug("btrfs: cleaner called for RO fs!\n");
1890 return 0;
1891 }
1892
1884 spin_lock(&fs_info->trans_lock); 1893 spin_lock(&fs_info->trans_lock);
1885 list_splice_init(&fs_info->dead_roots, &list); 1894 if (list_empty(&fs_info->dead_roots)) {
1895 spin_unlock(&fs_info->trans_lock);
1896 return 0;
1897 }
1898 root = list_first_entry(&fs_info->dead_roots,
1899 struct btrfs_root, root_list);
1900 list_del(&root->root_list);
1886 spin_unlock(&fs_info->trans_lock); 1901 spin_unlock(&fs_info->trans_lock);
1887 1902
1888 while (!list_empty(&list)) { 1903 pr_debug("btrfs: cleaner removing %llu\n",
1889 int ret; 1904 (unsigned long long)root->objectid);
1890
1891 root = list_entry(list.next, struct btrfs_root, root_list);
1892 list_del(&root->root_list);
1893 1905
1894 btrfs_kill_all_delayed_nodes(root); 1906 btrfs_kill_all_delayed_nodes(root);
1895 1907
1896 if (btrfs_header_backref_rev(root->node) < 1908 if (btrfs_header_backref_rev(root->node) <
1897 BTRFS_MIXED_BACKREF_REV) 1909 BTRFS_MIXED_BACKREF_REV)
1898 ret = btrfs_drop_snapshot(root, NULL, 0, 0); 1910 ret = btrfs_drop_snapshot(root, NULL, 0, 0);
1899 else 1911 else
1900 ret =btrfs_drop_snapshot(root, NULL, 1, 0); 1912 ret = btrfs_drop_snapshot(root, NULL, 1, 0);
1901 BUG_ON(ret < 0); 1913 /*
1902 } 1914 * If we encounter a transaction abort during snapshot cleaning, we
1903 return 0; 1915 * don't want to crash here
1916 */
1917 BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS);
1918 return 1;
1904} 1919}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 3c8e0d25c8e4..24c97335a59f 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -123,7 +123,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
123 123
124int btrfs_add_dead_root(struct btrfs_root *root); 124int btrfs_add_dead_root(struct btrfs_root *root);
125int btrfs_defrag_root(struct btrfs_root *root); 125int btrfs_defrag_root(struct btrfs_root *root);
126int btrfs_clean_old_snapshots(struct btrfs_root *root); 126int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
127int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 127int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
128 struct btrfs_root *root); 128 struct btrfs_root *root);
129int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, 129int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
@@ -146,5 +146,4 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
146 struct extent_io_tree *dirty_pages, int mark); 146 struct extent_io_tree *dirty_pages, int mark);
147int btrfs_transaction_blocked(struct btrfs_fs_info *info); 147int btrfs_transaction_blocked(struct btrfs_fs_info *info);
148int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 148int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
149void put_transaction(struct btrfs_transaction *transaction);
150#endif 149#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ef96381569a4..c276ac9a0ec3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -277,17 +277,19 @@ static int process_one_buffer(struct btrfs_root *log,
277 struct extent_buffer *eb, 277 struct extent_buffer *eb,
278 struct walk_control *wc, u64 gen) 278 struct walk_control *wc, u64 gen)
279{ 279{
280 int ret = 0;
281
280 if (wc->pin) 282 if (wc->pin)
281 btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, 283 ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
282 eb->start, eb->len); 284 eb->start, eb->len);
283 285
284 if (btrfs_buffer_uptodate(eb, gen, 0)) { 286 if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
285 if (wc->write) 287 if (wc->write)
286 btrfs_write_tree_block(eb); 288 btrfs_write_tree_block(eb);
287 if (wc->wait) 289 if (wc->wait)
288 btrfs_wait_tree_block_writeback(eb); 290 btrfs_wait_tree_block_writeback(eb);
289 } 291 }
290 return 0; 292 return ret;
291} 293}
292 294
293/* 295/*
@@ -408,9 +410,9 @@ insert:
408 found_size = btrfs_item_size_nr(path->nodes[0], 410 found_size = btrfs_item_size_nr(path->nodes[0],
409 path->slots[0]); 411 path->slots[0]);
410 if (found_size > item_size) 412 if (found_size > item_size)
411 btrfs_truncate_item(trans, root, path, item_size, 1); 413 btrfs_truncate_item(root, path, item_size, 1);
412 else if (found_size < item_size) 414 else if (found_size < item_size)
413 btrfs_extend_item(trans, root, path, 415 btrfs_extend_item(root, path,
414 item_size - found_size); 416 item_size - found_size);
415 } else if (ret) { 417 } else if (ret) {
416 return ret; 418 return ret;
@@ -587,7 +589,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
587 589
588 /* drop any overlapping extents */ 590 /* drop any overlapping extents */
589 ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); 591 ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
590 BUG_ON(ret); 592 if (ret)
593 goto out;
591 594
592 if (found_type == BTRFS_FILE_EXTENT_REG || 595 if (found_type == BTRFS_FILE_EXTENT_REG ||
593 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 596 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -597,7 +600,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
597 600
598 ret = btrfs_insert_empty_item(trans, root, path, key, 601 ret = btrfs_insert_empty_item(trans, root, path, key,
599 sizeof(*item)); 602 sizeof(*item));
600 BUG_ON(ret); 603 if (ret)
604 goto out;
601 dest_offset = btrfs_item_ptr_offset(path->nodes[0], 605 dest_offset = btrfs_item_ptr_offset(path->nodes[0],
602 path->slots[0]); 606 path->slots[0]);
603 copy_extent_buffer(path->nodes[0], eb, dest_offset, 607 copy_extent_buffer(path->nodes[0], eb, dest_offset,
@@ -623,7 +627,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
623 ins.objectid, ins.offset, 627 ins.objectid, ins.offset,
624 0, root->root_key.objectid, 628 0, root->root_key.objectid,
625 key->objectid, offset, 0); 629 key->objectid, offset, 0);
626 BUG_ON(ret); 630 if (ret)
631 goto out;
627 } else { 632 } else {
628 /* 633 /*
629 * insert the extent pointer in the extent 634 * insert the extent pointer in the extent
@@ -632,7 +637,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
632 ret = btrfs_alloc_logged_file_extent(trans, 637 ret = btrfs_alloc_logged_file_extent(trans,
633 root, root->root_key.objectid, 638 root, root->root_key.objectid,
634 key->objectid, offset, &ins); 639 key->objectid, offset, &ins);
635 BUG_ON(ret); 640 if (ret)
641 goto out;
636 } 642 }
637 btrfs_release_path(path); 643 btrfs_release_path(path);
638 644
@@ -649,26 +655,30 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
649 ret = btrfs_lookup_csums_range(root->log_root, 655 ret = btrfs_lookup_csums_range(root->log_root,
650 csum_start, csum_end - 1, 656 csum_start, csum_end - 1,
651 &ordered_sums, 0); 657 &ordered_sums, 0);
652 BUG_ON(ret); 658 if (ret)
659 goto out;
653 while (!list_empty(&ordered_sums)) { 660 while (!list_empty(&ordered_sums)) {
654 struct btrfs_ordered_sum *sums; 661 struct btrfs_ordered_sum *sums;
655 sums = list_entry(ordered_sums.next, 662 sums = list_entry(ordered_sums.next,
656 struct btrfs_ordered_sum, 663 struct btrfs_ordered_sum,
657 list); 664 list);
658 ret = btrfs_csum_file_blocks(trans, 665 if (!ret)
666 ret = btrfs_csum_file_blocks(trans,
659 root->fs_info->csum_root, 667 root->fs_info->csum_root,
660 sums); 668 sums);
661 BUG_ON(ret);
662 list_del(&sums->list); 669 list_del(&sums->list);
663 kfree(sums); 670 kfree(sums);
664 } 671 }
672 if (ret)
673 goto out;
665 } else { 674 } else {
666 btrfs_release_path(path); 675 btrfs_release_path(path);
667 } 676 }
668 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 677 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
669 /* inline extents are easy, we just overwrite them */ 678 /* inline extents are easy, we just overwrite them */
670 ret = overwrite_item(trans, root, path, eb, slot, key); 679 ret = overwrite_item(trans, root, path, eb, slot, key);
671 BUG_ON(ret); 680 if (ret)
681 goto out;
672 } 682 }
673 683
674 inode_add_bytes(inode, nbytes); 684 inode_add_bytes(inode, nbytes);
@@ -713,20 +723,21 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
713 723
714 inode = read_one_inode(root, location.objectid); 724 inode = read_one_inode(root, location.objectid);
715 if (!inode) { 725 if (!inode) {
716 kfree(name); 726 ret = -EIO;
717 return -EIO; 727 goto out;
718 } 728 }
719 729
720 ret = link_to_fixup_dir(trans, root, path, location.objectid); 730 ret = link_to_fixup_dir(trans, root, path, location.objectid);
721 BUG_ON(ret); 731 if (ret)
732 goto out;
722 733
723 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); 734 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
724 BUG_ON(ret); 735 if (ret)
736 goto out;
737 btrfs_run_delayed_items(trans, root);
738out:
725 kfree(name); 739 kfree(name);
726
727 iput(inode); 740 iput(inode);
728
729 btrfs_run_delayed_items(trans, root);
730 return ret; 741 return ret;
731} 742}
732 743
@@ -879,7 +890,8 @@ again:
879 victim_name_len = btrfs_inode_ref_name_len(leaf, 890 victim_name_len = btrfs_inode_ref_name_len(leaf,
880 victim_ref); 891 victim_ref);
881 victim_name = kmalloc(victim_name_len, GFP_NOFS); 892 victim_name = kmalloc(victim_name_len, GFP_NOFS);
882 BUG_ON(!victim_name); 893 if (!victim_name)
894 return -ENOMEM;
883 895
884 read_extent_buffer(leaf, victim_name, 896 read_extent_buffer(leaf, victim_name,
885 (unsigned long)(victim_ref + 1), 897 (unsigned long)(victim_ref + 1),
@@ -895,9 +907,10 @@ again:
895 ret = btrfs_unlink_inode(trans, root, dir, 907 ret = btrfs_unlink_inode(trans, root, dir,
896 inode, victim_name, 908 inode, victim_name,
897 victim_name_len); 909 victim_name_len);
898 BUG_ON(ret);
899 btrfs_run_delayed_items(trans, root);
900 kfree(victim_name); 910 kfree(victim_name);
911 if (ret)
912 return ret;
913 btrfs_run_delayed_items(trans, root);
901 *search_done = 1; 914 *search_done = 1;
902 goto again; 915 goto again;
903 } 916 }
@@ -905,7 +918,6 @@ again:
905 918
906 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 919 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
907 } 920 }
908 BUG_ON(ret);
909 921
910 /* 922 /*
911 * NOTE: we have searched root tree and checked the 923 * NOTE: we have searched root tree and checked the
@@ -939,6 +951,8 @@ again:
939 goto next; 951 goto next;
940 952
941 victim_name = kmalloc(victim_name_len, GFP_NOFS); 953 victim_name = kmalloc(victim_name_len, GFP_NOFS);
954 if (!victim_name)
955 return -ENOMEM;
942 read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, 956 read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
943 victim_name_len); 957 victim_name_len);
944 958
@@ -965,14 +979,16 @@ again:
965 victim_name_len); 979 victim_name_len);
966 btrfs_run_delayed_items(trans, root); 980 btrfs_run_delayed_items(trans, root);
967 } 981 }
968 BUG_ON(ret);
969 iput(victim_parent); 982 iput(victim_parent);
970 kfree(victim_name); 983 kfree(victim_name);
984 if (ret)
985 return ret;
971 *search_done = 1; 986 *search_done = 1;
972 goto again; 987 goto again;
973 } 988 }
974 kfree(victim_name); 989 kfree(victim_name);
975 BUG_ON(ret); 990 if (ret)
991 return ret;
976next: 992next:
977 cur_offset += victim_name_len + sizeof(*extref); 993 cur_offset += victim_name_len + sizeof(*extref);
978 } 994 }
@@ -985,7 +1001,8 @@ next:
985 ref_index, name, namelen, 0); 1001 ref_index, name, namelen, 0);
986 if (di && !IS_ERR(di)) { 1002 if (di && !IS_ERR(di)) {
987 ret = drop_one_dir_item(trans, root, path, dir, di); 1003 ret = drop_one_dir_item(trans, root, path, dir, di);
988 BUG_ON(ret); 1004 if (ret)
1005 return ret;
989 } 1006 }
990 btrfs_release_path(path); 1007 btrfs_release_path(path);
991 1008
@@ -994,7 +1011,8 @@ next:
994 name, namelen, 0); 1011 name, namelen, 0);
995 if (di && !IS_ERR(di)) { 1012 if (di && !IS_ERR(di)) {
996 ret = drop_one_dir_item(trans, root, path, dir, di); 1013 ret = drop_one_dir_item(trans, root, path, dir, di);
997 BUG_ON(ret); 1014 if (ret)
1015 return ret;
998 } 1016 }
999 btrfs_release_path(path); 1017 btrfs_release_path(path);
1000 1018
@@ -1139,15 +1157,19 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1139 parent_objectid, 1157 parent_objectid,
1140 ref_index, name, namelen, 1158 ref_index, name, namelen,
1141 &search_done); 1159 &search_done);
1142 if (ret == 1) 1160 if (ret == 1) {
1161 ret = 0;
1162 goto out;
1163 }
1164 if (ret)
1143 goto out; 1165 goto out;
1144 BUG_ON(ret);
1145 } 1166 }
1146 1167
1147 /* insert our name */ 1168 /* insert our name */
1148 ret = btrfs_add_link(trans, dir, inode, name, namelen, 1169 ret = btrfs_add_link(trans, dir, inode, name, namelen,
1149 0, ref_index); 1170 0, ref_index);
1150 BUG_ON(ret); 1171 if (ret)
1172 goto out;
1151 1173
1152 btrfs_update_inode(trans, root, inode); 1174 btrfs_update_inode(trans, root, inode);
1153 } 1175 }
@@ -1162,13 +1184,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1162 1184
1163 /* finally write the back reference in the inode */ 1185 /* finally write the back reference in the inode */
1164 ret = overwrite_item(trans, root, path, eb, slot, key); 1186 ret = overwrite_item(trans, root, path, eb, slot, key);
1165 BUG_ON(ret);
1166
1167out: 1187out:
1168 btrfs_release_path(path); 1188 btrfs_release_path(path);
1169 iput(dir); 1189 iput(dir);
1170 iput(inode); 1190 iput(inode);
1171 return 0; 1191 return ret;
1172} 1192}
1173 1193
1174static int insert_orphan_item(struct btrfs_trans_handle *trans, 1194static int insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -1326,10 +1346,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1326 if (S_ISDIR(inode->i_mode)) { 1346 if (S_ISDIR(inode->i_mode)) {
1327 ret = replay_dir_deletes(trans, root, NULL, path, 1347 ret = replay_dir_deletes(trans, root, NULL, path,
1328 ino, 1); 1348 ino, 1);
1329 BUG_ON(ret); 1349 if (ret)
1350 goto out;
1330 } 1351 }
1331 ret = insert_orphan_item(trans, root, ino); 1352 ret = insert_orphan_item(trans, root, ino);
1332 BUG_ON(ret);
1333 } 1353 }
1334 1354
1335out: 1355out:
@@ -1374,9 +1394,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1374 return -EIO; 1394 return -EIO;
1375 1395
1376 ret = fixup_inode_link_count(trans, root, inode); 1396 ret = fixup_inode_link_count(trans, root, inode);
1377 BUG_ON(ret);
1378
1379 iput(inode); 1397 iput(inode);
1398 if (ret)
1399 goto out;
1380 1400
1381 /* 1401 /*
1382 * fixup on a directory may create new entries, 1402 * fixup on a directory may create new entries,
@@ -1426,7 +1446,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1426 } else if (ret == -EEXIST) { 1446 } else if (ret == -EEXIST) {
1427 ret = 0; 1447 ret = 0;
1428 } else { 1448 } else {
1429 BUG(); 1449 BUG(); /* Logic Error */
1430 } 1450 }
1431 iput(inode); 1451 iput(inode);
1432 1452
@@ -1495,7 +1515,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1495 struct inode *dir; 1515 struct inode *dir;
1496 u8 log_type; 1516 u8 log_type;
1497 int exists; 1517 int exists;
1498 int ret; 1518 int ret = 0;
1499 1519
1500 dir = read_one_inode(root, key->objectid); 1520 dir = read_one_inode(root, key->objectid);
1501 if (!dir) 1521 if (!dir)
@@ -1527,7 +1547,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1527 key->offset, name, 1547 key->offset, name,
1528 name_len, 1); 1548 name_len, 1);
1529 } else { 1549 } else {
1530 BUG(); 1550 /* Corruption */
1551 ret = -EINVAL;
1552 goto out;
1531 } 1553 }
1532 if (IS_ERR_OR_NULL(dst_di)) { 1554 if (IS_ERR_OR_NULL(dst_di)) {
1533 /* we need a sequence number to insert, so we only 1555 /* we need a sequence number to insert, so we only
@@ -1555,7 +1577,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1555 goto out; 1577 goto out;
1556 1578
1557 ret = drop_one_dir_item(trans, root, path, dir, dst_di); 1579 ret = drop_one_dir_item(trans, root, path, dir, dst_di);
1558 BUG_ON(ret); 1580 if (ret)
1581 goto out;
1559 1582
1560 if (key->type == BTRFS_DIR_INDEX_KEY) 1583 if (key->type == BTRFS_DIR_INDEX_KEY)
1561 goto insert; 1584 goto insert;
@@ -1563,14 +1586,15 @@ out:
1563 btrfs_release_path(path); 1586 btrfs_release_path(path);
1564 kfree(name); 1587 kfree(name);
1565 iput(dir); 1588 iput(dir);
1566 return 0; 1589 return ret;
1567 1590
1568insert: 1591insert:
1569 btrfs_release_path(path); 1592 btrfs_release_path(path);
1570 ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1593 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1571 name, name_len, log_type, &log_key); 1594 name, name_len, log_type, &log_key);
1572 1595 if (ret && ret != -ENOENT)
1573 BUG_ON(ret && ret != -ENOENT); 1596 goto out;
1597 ret = 0;
1574 goto out; 1598 goto out;
1575} 1599}
1576 1600
@@ -1601,7 +1625,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
1601 return -EIO; 1625 return -EIO;
1602 name_len = btrfs_dir_name_len(eb, di); 1626 name_len = btrfs_dir_name_len(eb, di);
1603 ret = replay_one_name(trans, root, path, eb, di, key); 1627 ret = replay_one_name(trans, root, path, eb, di, key);
1604 BUG_ON(ret); 1628 if (ret)
1629 return ret;
1605 ptr = (unsigned long)(di + 1); 1630 ptr = (unsigned long)(di + 1);
1606 ptr += name_len; 1631 ptr += name_len;
1607 } 1632 }
@@ -1762,16 +1787,21 @@ again:
1762 1787
1763 ret = link_to_fixup_dir(trans, root, 1788 ret = link_to_fixup_dir(trans, root,
1764 path, location.objectid); 1789 path, location.objectid);
1765 BUG_ON(ret); 1790 if (ret) {
1791 kfree(name);
1792 iput(inode);
1793 goto out;
1794 }
1795
1766 btrfs_inc_nlink(inode); 1796 btrfs_inc_nlink(inode);
1767 ret = btrfs_unlink_inode(trans, root, dir, inode, 1797 ret = btrfs_unlink_inode(trans, root, dir, inode,
1768 name, name_len); 1798 name, name_len);
1769 BUG_ON(ret); 1799 if (!ret)
1770 1800 btrfs_run_delayed_items(trans, root);
1771 btrfs_run_delayed_items(trans, root);
1772
1773 kfree(name); 1801 kfree(name);
1774 iput(inode); 1802 iput(inode);
1803 if (ret)
1804 goto out;
1775 1805
1776 /* there might still be more names under this key 1806 /* there might still be more names under this key
1777 * check and repeat if required 1807 * check and repeat if required
@@ -1875,7 +1905,8 @@ again:
1875 ret = check_item_in_log(trans, root, log, path, 1905 ret = check_item_in_log(trans, root, log, path,
1876 log_path, dir, 1906 log_path, dir,
1877 &found_key); 1907 &found_key);
1878 BUG_ON(ret); 1908 if (ret)
1909 goto out;
1879 if (found_key.offset == (u64)-1) 1910 if (found_key.offset == (u64)-1)
1880 break; 1911 break;
1881 dir_key.offset = found_key.offset + 1; 1912 dir_key.offset = found_key.offset + 1;
@@ -1952,11 +1983,13 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1952 if (S_ISDIR(mode)) { 1983 if (S_ISDIR(mode)) {
1953 ret = replay_dir_deletes(wc->trans, 1984 ret = replay_dir_deletes(wc->trans,
1954 root, log, path, key.objectid, 0); 1985 root, log, path, key.objectid, 0);
1955 BUG_ON(ret); 1986 if (ret)
1987 break;
1956 } 1988 }
1957 ret = overwrite_item(wc->trans, root, path, 1989 ret = overwrite_item(wc->trans, root, path,
1958 eb, i, &key); 1990 eb, i, &key);
1959 BUG_ON(ret); 1991 if (ret)
1992 break;
1960 1993
1961 /* for regular files, make sure corresponding 1994 /* for regular files, make sure corresponding
1962 * orhpan item exist. extents past the new EOF 1995 * orhpan item exist. extents past the new EOF
@@ -1965,12 +1998,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1965 if (S_ISREG(mode)) { 1998 if (S_ISREG(mode)) {
1966 ret = insert_orphan_item(wc->trans, root, 1999 ret = insert_orphan_item(wc->trans, root,
1967 key.objectid); 2000 key.objectid);
1968 BUG_ON(ret); 2001 if (ret)
2002 break;
1969 } 2003 }
1970 2004
1971 ret = link_to_fixup_dir(wc->trans, root, 2005 ret = link_to_fixup_dir(wc->trans, root,
1972 path, key.objectid); 2006 path, key.objectid);
1973 BUG_ON(ret); 2007 if (ret)
2008 break;
1974 } 2009 }
1975 if (wc->stage < LOG_WALK_REPLAY_ALL) 2010 if (wc->stage < LOG_WALK_REPLAY_ALL)
1976 continue; 2011 continue;
@@ -1979,28 +2014,35 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1979 if (key.type == BTRFS_XATTR_ITEM_KEY) { 2014 if (key.type == BTRFS_XATTR_ITEM_KEY) {
1980 ret = overwrite_item(wc->trans, root, path, 2015 ret = overwrite_item(wc->trans, root, path,
1981 eb, i, &key); 2016 eb, i, &key);
1982 BUG_ON(ret); 2017 if (ret)
2018 break;
1983 } else if (key.type == BTRFS_INODE_REF_KEY) { 2019 } else if (key.type == BTRFS_INODE_REF_KEY) {
1984 ret = add_inode_ref(wc->trans, root, log, path, 2020 ret = add_inode_ref(wc->trans, root, log, path,
1985 eb, i, &key); 2021 eb, i, &key);
1986 BUG_ON(ret && ret != -ENOENT); 2022 if (ret && ret != -ENOENT)
2023 break;
2024 ret = 0;
1987 } else if (key.type == BTRFS_INODE_EXTREF_KEY) { 2025 } else if (key.type == BTRFS_INODE_EXTREF_KEY) {
1988 ret = add_inode_ref(wc->trans, root, log, path, 2026 ret = add_inode_ref(wc->trans, root, log, path,
1989 eb, i, &key); 2027 eb, i, &key);
1990 BUG_ON(ret && ret != -ENOENT); 2028 if (ret && ret != -ENOENT)
2029 break;
2030 ret = 0;
1991 } else if (key.type == BTRFS_EXTENT_DATA_KEY) { 2031 } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
1992 ret = replay_one_extent(wc->trans, root, path, 2032 ret = replay_one_extent(wc->trans, root, path,
1993 eb, i, &key); 2033 eb, i, &key);
1994 BUG_ON(ret); 2034 if (ret)
2035 break;
1995 } else if (key.type == BTRFS_DIR_ITEM_KEY || 2036 } else if (key.type == BTRFS_DIR_ITEM_KEY ||
1996 key.type == BTRFS_DIR_INDEX_KEY) { 2037 key.type == BTRFS_DIR_INDEX_KEY) {
1997 ret = replay_one_dir_item(wc->trans, root, path, 2038 ret = replay_one_dir_item(wc->trans, root, path,
1998 eb, i, &key); 2039 eb, i, &key);
1999 BUG_ON(ret); 2040 if (ret)
2041 break;
2000 } 2042 }
2001 } 2043 }
2002 btrfs_free_path(path); 2044 btrfs_free_path(path);
2003 return 0; 2045 return ret;
2004} 2046}
2005 2047
2006static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, 2048static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
@@ -2045,8 +2087,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2045 2087
2046 if (*level == 1) { 2088 if (*level == 1) {
2047 ret = wc->process_func(root, next, wc, ptr_gen); 2089 ret = wc->process_func(root, next, wc, ptr_gen);
2048 if (ret) 2090 if (ret) {
2091 free_extent_buffer(next);
2049 return ret; 2092 return ret;
2093 }
2050 2094
2051 path->slots[*level]++; 2095 path->slots[*level]++;
2052 if (wc->free) { 2096 if (wc->free) {
@@ -2066,7 +2110,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2066 BTRFS_TREE_LOG_OBJECTID); 2110 BTRFS_TREE_LOG_OBJECTID);
2067 ret = btrfs_free_and_pin_reserved_extent(root, 2111 ret = btrfs_free_and_pin_reserved_extent(root,
2068 bytenr, blocksize); 2112 bytenr, blocksize);
2069 BUG_ON(ret); /* -ENOMEM or logic errors */ 2113 if (ret) {
2114 free_extent_buffer(next);
2115 return ret;
2116 }
2070 } 2117 }
2071 free_extent_buffer(next); 2118 free_extent_buffer(next);
2072 continue; 2119 continue;
@@ -2139,7 +2186,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
2139 ret = btrfs_free_and_pin_reserved_extent(root, 2186 ret = btrfs_free_and_pin_reserved_extent(root,
2140 path->nodes[*level]->start, 2187 path->nodes[*level]->start,
2141 path->nodes[*level]->len); 2188 path->nodes[*level]->len);
2142 BUG_ON(ret); 2189 if (ret)
2190 return ret;
2143 } 2191 }
2144 free_extent_buffer(path->nodes[*level]); 2192 free_extent_buffer(path->nodes[*level]);
2145 path->nodes[*level] = NULL; 2193 path->nodes[*level] = NULL;
@@ -2161,7 +2209,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2161 int wret; 2209 int wret;
2162 int level; 2210 int level;
2163 struct btrfs_path *path; 2211 struct btrfs_path *path;
2164 int i;
2165 int orig_level; 2212 int orig_level;
2166 2213
2167 path = btrfs_alloc_path(); 2214 path = btrfs_alloc_path();
@@ -2213,17 +2260,12 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2213 BTRFS_TREE_LOG_OBJECTID); 2260 BTRFS_TREE_LOG_OBJECTID);
2214 ret = btrfs_free_and_pin_reserved_extent(log, next->start, 2261 ret = btrfs_free_and_pin_reserved_extent(log, next->start,
2215 next->len); 2262 next->len);
2216 BUG_ON(ret); /* -ENOMEM or logic errors */ 2263 if (ret)
2264 goto out;
2217 } 2265 }
2218 } 2266 }
2219 2267
2220out: 2268out:
2221 for (i = 0; i <= orig_level; i++) {
2222 if (path->nodes[i]) {
2223 free_extent_buffer(path->nodes[i]);
2224 path->nodes[i] = NULL;
2225 }
2226 }
2227 btrfs_free_path(path); 2269 btrfs_free_path(path);
2228 return ret; 2270 return ret;
2229} 2271}
@@ -2507,7 +2549,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
2507 2549
2508 if (trans) { 2550 if (trans) {
2509 ret = walk_log_tree(trans, log, &wc); 2551 ret = walk_log_tree(trans, log, &wc);
2510 BUG_ON(ret); 2552
2553 /* I don't think this can happen but just in case */
2554 if (ret)
2555 btrfs_abort_transaction(trans, log, ret);
2511 } 2556 }
2512 2557
2513 while (1) { 2558 while (1) {
@@ -2615,7 +2660,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2615 if (di) { 2660 if (di) {
2616 ret = btrfs_delete_one_dir_name(trans, log, path, di); 2661 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2617 bytes_del += name_len; 2662 bytes_del += name_len;
2618 BUG_ON(ret); 2663 if (ret) {
2664 err = ret;
2665 goto fail;
2666 }
2619 } 2667 }
2620 btrfs_release_path(path); 2668 btrfs_release_path(path);
2621 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, 2669 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
@@ -2627,7 +2675,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2627 if (di) { 2675 if (di) {
2628 ret = btrfs_delete_one_dir_name(trans, log, path, di); 2676 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2629 bytes_del += name_len; 2677 bytes_del += name_len;
2630 BUG_ON(ret); 2678 if (ret) {
2679 err = ret;
2680 goto fail;
2681 }
2631 } 2682 }
2632 2683
2633 /* update the directory size in the log to reflect the names 2684 /* update the directory size in the log to reflect the names
@@ -2966,7 +3017,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2966 3017
2967 while (1) { 3018 while (1) {
2968 ret = btrfs_search_slot(trans, log, &key, path, -1, 1); 3019 ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
2969 BUG_ON(ret == 0); 3020 BUG_ON(ret == 0); /* Logic error */
2970 if (ret < 0) 3021 if (ret < 0)
2971 break; 3022 break;
2972 3023
@@ -3169,7 +3220,11 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3169 log->fs_info->csum_root, 3220 log->fs_info->csum_root,
3170 ds + cs, ds + cs + cl - 1, 3221 ds + cs, ds + cs + cl - 1,
3171 &ordered_sums, 0); 3222 &ordered_sums, 0);
3172 BUG_ON(ret); 3223 if (ret) {
3224 btrfs_release_path(dst_path);
3225 kfree(ins_data);
3226 return ret;
3227 }
3173 } 3228 }
3174 } 3229 }
3175 } 3230 }
@@ -3209,115 +3264,6 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
3209 return 0; 3264 return 0;
3210} 3265}
3211 3266
3212static int drop_adjacent_extents(struct btrfs_trans_handle *trans,
3213 struct btrfs_root *root, struct inode *inode,
3214 struct extent_map *em,
3215 struct btrfs_path *path)
3216{
3217 struct btrfs_file_extent_item *fi;
3218 struct extent_buffer *leaf;
3219 struct btrfs_key key, new_key;
3220 struct btrfs_map_token token;
3221 u64 extent_end;
3222 u64 extent_offset = 0;
3223 int extent_type;
3224 int del_slot = 0;
3225 int del_nr = 0;
3226 int ret = 0;
3227
3228 while (1) {
3229 btrfs_init_map_token(&token);
3230 leaf = path->nodes[0];
3231 path->slots[0]++;
3232 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3233 if (del_nr) {
3234 ret = btrfs_del_items(trans, root, path,
3235 del_slot, del_nr);
3236 if (ret)
3237 return ret;
3238 del_nr = 0;
3239 }
3240
3241 ret = btrfs_next_leaf_write(trans, root, path, 1);
3242 if (ret < 0)
3243 return ret;
3244 if (ret > 0)
3245 return 0;
3246 leaf = path->nodes[0];
3247 }
3248
3249 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3250 if (key.objectid != btrfs_ino(inode) ||
3251 key.type != BTRFS_EXTENT_DATA_KEY ||
3252 key.offset >= em->start + em->len)
3253 break;
3254
3255 fi = btrfs_item_ptr(leaf, path->slots[0],
3256 struct btrfs_file_extent_item);
3257 extent_type = btrfs_token_file_extent_type(leaf, fi, &token);
3258 if (extent_type == BTRFS_FILE_EXTENT_REG ||
3259 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
3260 extent_offset = btrfs_token_file_extent_offset(leaf,
3261 fi, &token);
3262 extent_end = key.offset +
3263 btrfs_token_file_extent_num_bytes(leaf, fi,
3264 &token);
3265 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
3266 extent_end = key.offset +
3267 btrfs_file_extent_inline_len(leaf, fi);
3268 } else {
3269 BUG();
3270 }
3271
3272 if (extent_end <= em->len + em->start) {
3273 if (!del_nr) {
3274 del_slot = path->slots[0];
3275 }
3276 del_nr++;
3277 continue;
3278 }
3279
3280 /*
3281 * Ok so we'll ignore previous items if we log a new extent,
3282 * which can lead to overlapping extents, so if we have an
3283 * existing extent we want to adjust we _have_ to check the next
3284 * guy to make sure we even need this extent anymore, this keeps
3285 * us from panicing in set_item_key_safe.
3286 */
3287 if (path->slots[0] < btrfs_header_nritems(leaf) - 1) {
3288 struct btrfs_key tmp_key;
3289
3290 btrfs_item_key_to_cpu(leaf, &tmp_key,
3291 path->slots[0] + 1);
3292 if (tmp_key.objectid == btrfs_ino(inode) &&
3293 tmp_key.type == BTRFS_EXTENT_DATA_KEY &&
3294 tmp_key.offset <= em->start + em->len) {
3295 if (!del_nr)
3296 del_slot = path->slots[0];
3297 del_nr++;
3298 continue;
3299 }
3300 }
3301
3302 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
3303 memcpy(&new_key, &key, sizeof(new_key));
3304 new_key.offset = em->start + em->len;
3305 btrfs_set_item_key_safe(trans, root, path, &new_key);
3306 extent_offset += em->start + em->len - key.offset;
3307 btrfs_set_token_file_extent_offset(leaf, fi, extent_offset,
3308 &token);
3309 btrfs_set_token_file_extent_num_bytes(leaf, fi, extent_end -
3310 (em->start + em->len),
3311 &token);
3312 btrfs_mark_buffer_dirty(leaf);
3313 }
3314
3315 if (del_nr)
3316 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
3317
3318 return ret;
3319}
3320
3321static int log_one_extent(struct btrfs_trans_handle *trans, 3267static int log_one_extent(struct btrfs_trans_handle *trans,
3322 struct inode *inode, struct btrfs_root *root, 3268 struct inode *inode, struct btrfs_root *root,
3323 struct extent_map *em, struct btrfs_path *path) 3269 struct extent_map *em, struct btrfs_path *path)
@@ -3339,39 +3285,24 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3339 int index = log->log_transid % 2; 3285 int index = log->log_transid % 2;
3340 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3286 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3341 3287
3342insert: 3288 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3289 em->start + em->len, NULL, 0);
3290 if (ret)
3291 return ret;
3292
3343 INIT_LIST_HEAD(&ordered_sums); 3293 INIT_LIST_HEAD(&ordered_sums);
3344 btrfs_init_map_token(&token); 3294 btrfs_init_map_token(&token);
3345 key.objectid = btrfs_ino(inode); 3295 key.objectid = btrfs_ino(inode);
3346 key.type = BTRFS_EXTENT_DATA_KEY; 3296 key.type = BTRFS_EXTENT_DATA_KEY;
3347 key.offset = em->start; 3297 key.offset = em->start;
3348 path->really_keep_locks = 1;
3349 3298
3350 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi)); 3299 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
3351 if (ret && ret != -EEXIST) { 3300 if (ret)
3352 path->really_keep_locks = 0;
3353 return ret; 3301 return ret;
3354 }
3355 leaf = path->nodes[0]; 3302 leaf = path->nodes[0];
3356 fi = btrfs_item_ptr(leaf, path->slots[0], 3303 fi = btrfs_item_ptr(leaf, path->slots[0],
3357 struct btrfs_file_extent_item); 3304 struct btrfs_file_extent_item);
3358 3305
3359 /*
3360 * If we are overwriting an inline extent with a real one then we need
3361 * to just delete the inline extent as it may not be large enough to
3362 * have the entire file_extent_item.
3363 */
3364 if (ret && btrfs_token_file_extent_type(leaf, fi, &token) ==
3365 BTRFS_FILE_EXTENT_INLINE) {
3366 ret = btrfs_del_item(trans, log, path);
3367 btrfs_release_path(path);
3368 if (ret) {
3369 path->really_keep_locks = 0;
3370 return ret;
3371 }
3372 goto insert;
3373 }
3374
3375 btrfs_set_token_file_extent_generation(leaf, fi, em->generation, 3306 btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
3376 &token); 3307 &token);
3377 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3308 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3410,22 +3341,14 @@ insert:
3410 em->start - em->orig_start, 3341 em->start - em->orig_start,
3411 &token); 3342 &token);
3412 btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); 3343 btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
3413 btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->len, &token); 3344 btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
3414 btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, 3345 btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
3415 &token); 3346 &token);
3416 btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); 3347 btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
3417 btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); 3348 btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
3418 btrfs_mark_buffer_dirty(leaf); 3349 btrfs_mark_buffer_dirty(leaf);
3419 3350
3420 /*
3421 * Have to check the extent to the right of us to make sure it doesn't
3422 * fall in our current range. We're ok if the previous extent is in our
3423 * range since the recovery stuff will run us in key order and thus just
3424 * drop the part we overwrote.
3425 */
3426 ret = drop_adjacent_extents(trans, log, inode, em, path);
3427 btrfs_release_path(path); 3351 btrfs_release_path(path);
3428 path->really_keep_locks = 0;
3429 if (ret) { 3352 if (ret) {
3430 return ret; 3353 return ret;
3431 } 3354 }
@@ -3650,8 +3573,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3650 bool fast_search = false; 3573 bool fast_search = false;
3651 u64 ino = btrfs_ino(inode); 3574 u64 ino = btrfs_ino(inode);
3652 3575
3653 log = root->log_root;
3654
3655 path = btrfs_alloc_path(); 3576 path = btrfs_alloc_path();
3656 if (!path) 3577 if (!path)
3657 return -ENOMEM; 3578 return -ENOMEM;
@@ -3918,9 +3839,9 @@ out:
3918 * only logging is done of any parent directories that are older than 3839 * only logging is done of any parent directories that are older than
3919 * the last committed transaction 3840 * the last committed transaction
3920 */ 3841 */
3921int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 3842static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3922 struct btrfs_root *root, struct inode *inode, 3843 struct btrfs_root *root, struct inode *inode,
3923 struct dentry *parent, int exists_only) 3844 struct dentry *parent, int exists_only)
3924{ 3845{
3925 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; 3846 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
3926 struct super_block *sb; 3847 struct super_block *sb;
@@ -4111,6 +4032,9 @@ again:
4111 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 4032 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
4112 if (IS_ERR(wc.replay_dest)) { 4033 if (IS_ERR(wc.replay_dest)) {
4113 ret = PTR_ERR(wc.replay_dest); 4034 ret = PTR_ERR(wc.replay_dest);
4035 free_extent_buffer(log->node);
4036 free_extent_buffer(log->commit_root);
4037 kfree(log);
4114 btrfs_error(fs_info, ret, "Couldn't read target root " 4038 btrfs_error(fs_info, ret, "Couldn't read target root "
4115 "for tree log recovery."); 4039 "for tree log recovery.");
4116 goto error; 4040 goto error;
@@ -4119,12 +4043,10 @@ again:
4119 wc.replay_dest->log_root = log; 4043 wc.replay_dest->log_root = log;
4120 btrfs_record_root_in_trans(trans, wc.replay_dest); 4044 btrfs_record_root_in_trans(trans, wc.replay_dest);
4121 ret = walk_log_tree(trans, log, &wc); 4045 ret = walk_log_tree(trans, log, &wc);
4122 BUG_ON(ret);
4123 4046
4124 if (wc.stage == LOG_WALK_REPLAY_ALL) { 4047 if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
4125 ret = fixup_inode_link_counts(trans, wc.replay_dest, 4048 ret = fixup_inode_link_counts(trans, wc.replay_dest,
4126 path); 4049 path);
4127 BUG_ON(ret);
4128 } 4050 }
4129 4051
4130 key.offset = found_key.offset - 1; 4052 key.offset = found_key.offset - 1;
@@ -4133,6 +4055,9 @@ again:
4133 free_extent_buffer(log->commit_root); 4055 free_extent_buffer(log->commit_root);
4134 kfree(log); 4056 kfree(log);
4135 4057
4058 if (ret)
4059 goto error;
4060
4136 if (found_key.offset == 0) 4061 if (found_key.offset == 0)
4137 break; 4062 break;
4138 } 4063 }
@@ -4153,17 +4078,20 @@ again:
4153 4078
4154 btrfs_free_path(path); 4079 btrfs_free_path(path);
4155 4080
4081 /* step 4: commit the transaction, which also unpins the blocks */
4082 ret = btrfs_commit_transaction(trans, fs_info->tree_root);
4083 if (ret)
4084 return ret;
4085
4156 free_extent_buffer(log_root_tree->node); 4086 free_extent_buffer(log_root_tree->node);
4157 log_root_tree->log_root = NULL; 4087 log_root_tree->log_root = NULL;
4158 fs_info->log_root_recovering = 0; 4088 fs_info->log_root_recovering = 0;
4159
4160 /* step 4: commit the transaction, which also unpins the blocks */
4161 btrfs_commit_transaction(trans, fs_info->tree_root);
4162
4163 kfree(log_root_tree); 4089 kfree(log_root_tree);
4164 return 0;
4165 4090
4091 return 0;
4166error: 4092error:
4093 if (wc.trans)
4094 btrfs_end_transaction(wc.trans, fs_info->tree_root);
4167 btrfs_free_path(path); 4095 btrfs_free_path(path);
4168 return ret; 4096 return ret;
4169} 4097}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 862ac813f6b8..1d4ae0d15a70 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -40,9 +40,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
40 struct inode *inode, u64 dirid); 40 struct inode *inode, u64 dirid);
41void btrfs_end_log_trans(struct btrfs_root *root); 41void btrfs_end_log_trans(struct btrfs_root *root);
42int btrfs_pin_log_trans(struct btrfs_root *root); 42int btrfs_pin_log_trans(struct btrfs_root *root);
43int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
44 struct btrfs_root *root, struct inode *inode,
45 struct dentry *parent, int exists_only);
46void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, 43void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
47 struct inode *dir, struct inode *inode, 44 struct inode *dir, struct inode *inode,
48 int for_rename); 45 int for_rename);
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index ddc61cad0080..7b417e20efe2 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -53,6 +53,7 @@ void ulist_init(struct ulist *ulist)
53 ulist->nnodes = 0; 53 ulist->nnodes = 0;
54 ulist->nodes = ulist->int_nodes; 54 ulist->nodes = ulist->int_nodes;
55 ulist->nodes_alloced = ULIST_SIZE; 55 ulist->nodes_alloced = ULIST_SIZE;
56 ulist->root = RB_ROOT;
56} 57}
57EXPORT_SYMBOL(ulist_init); 58EXPORT_SYMBOL(ulist_init);
58 59
@@ -72,6 +73,7 @@ void ulist_fini(struct ulist *ulist)
72 if (ulist->nodes_alloced > ULIST_SIZE) 73 if (ulist->nodes_alloced > ULIST_SIZE)
73 kfree(ulist->nodes); 74 kfree(ulist->nodes);
74 ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */ 75 ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */
76 ulist->root = RB_ROOT;
75} 77}
76EXPORT_SYMBOL(ulist_fini); 78EXPORT_SYMBOL(ulist_fini);
77 79
@@ -123,6 +125,45 @@ void ulist_free(struct ulist *ulist)
123} 125}
124EXPORT_SYMBOL(ulist_free); 126EXPORT_SYMBOL(ulist_free);
125 127
128static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
129{
130 struct rb_node *n = ulist->root.rb_node;
131 struct ulist_node *u = NULL;
132
133 while (n) {
134 u = rb_entry(n, struct ulist_node, rb_node);
135 if (u->val < val)
136 n = n->rb_right;
137 else if (u->val > val)
138 n = n->rb_left;
139 else
140 return u;
141 }
142 return NULL;
143}
144
145static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
146{
147 struct rb_node **p = &ulist->root.rb_node;
148 struct rb_node *parent = NULL;
149 struct ulist_node *cur = NULL;
150
151 while (*p) {
152 parent = *p;
153 cur = rb_entry(parent, struct ulist_node, rb_node);
154
155 if (cur->val < ins->val)
156 p = &(*p)->rb_right;
157 else if (cur->val > ins->val)
158 p = &(*p)->rb_left;
159 else
160 return -EEXIST;
161 }
162 rb_link_node(&ins->rb_node, parent, p);
163 rb_insert_color(&ins->rb_node, &ulist->root);
164 return 0;
165}
166
126/** 167/**
127 * ulist_add - add an element to the ulist 168 * ulist_add - add an element to the ulist
128 * @ulist: ulist to add the element to 169 * @ulist: ulist to add the element to
@@ -151,14 +192,13 @@ int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask)
151int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, 192int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
152 u64 *old_aux, gfp_t gfp_mask) 193 u64 *old_aux, gfp_t gfp_mask)
153{ 194{
154 int i; 195 int ret = 0;
155 196 struct ulist_node *node = NULL;
156 for (i = 0; i < ulist->nnodes; ++i) { 197 node = ulist_rbtree_search(ulist, val);
157 if (ulist->nodes[i].val == val) { 198 if (node) {
158 if (old_aux) 199 if (old_aux)
159 *old_aux = ulist->nodes[i].aux; 200 *old_aux = node->aux;
160 return 0; 201 return 0;
161 }
162 } 202 }
163 203
164 if (ulist->nnodes >= ulist->nodes_alloced) { 204 if (ulist->nnodes >= ulist->nodes_alloced) {
@@ -187,6 +227,8 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
187 } 227 }
188 ulist->nodes[ulist->nnodes].val = val; 228 ulist->nodes[ulist->nnodes].val = val;
189 ulist->nodes[ulist->nnodes].aux = aux; 229 ulist->nodes[ulist->nnodes].aux = aux;
230 ret = ulist_rbtree_insert(ulist, &ulist->nodes[ulist->nnodes]);
231 BUG_ON(ret);
190 ++ulist->nnodes; 232 ++ulist->nnodes;
191 233
192 return 1; 234 return 1;
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 21a1963439c3..fb36731074b5 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -8,6 +8,9 @@
8#ifndef __ULIST__ 8#ifndef __ULIST__
9#define __ULIST__ 9#define __ULIST__
10 10
11#include <linux/list.h>
12#include <linux/rbtree.h>
13
11/* 14/*
12 * ulist is a generic data structure to hold a collection of unique u64 15 * ulist is a generic data structure to hold a collection of unique u64
13 * values. The only operations it supports is adding to the list and 16 * values. The only operations it supports is adding to the list and
@@ -34,6 +37,7 @@ struct ulist_iterator {
34struct ulist_node { 37struct ulist_node {
35 u64 val; /* value to store */ 38 u64 val; /* value to store */
36 u64 aux; /* auxiliary value saved along with the val */ 39 u64 aux; /* auxiliary value saved along with the val */
40 struct rb_node rb_node; /* used to speed up search */
37}; 41};
38 42
39struct ulist { 43struct ulist {
@@ -54,6 +58,8 @@ struct ulist {
54 */ 58 */
55 struct ulist_node *nodes; 59 struct ulist_node *nodes;
56 60
61 struct rb_root root;
62
57 /* 63 /*
58 * inline storage space for the first ULIST_SIZE entries 64 * inline storage space for the first ULIST_SIZE entries
59 */ 65 */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 678977226570..0e925ced971b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -46,6 +46,7 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
46 struct btrfs_device *device); 46 struct btrfs_device *device);
47static int btrfs_relocate_sys_chunks(struct btrfs_root *root); 47static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
48static void __btrfs_reset_dev_stats(struct btrfs_device *dev); 48static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
49static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
49static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); 50static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
50 51
51static DEFINE_MUTEX(uuid_mutex); 52static DEFINE_MUTEX(uuid_mutex);
@@ -717,9 +718,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
717 if (!device->name) 718 if (!device->name)
718 continue; 719 continue;
719 720
720 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, 721 /* Just open everything we can; ignore failures here */
721 &bdev, &bh); 722 if (btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
722 if (ret) 723 &bdev, &bh))
723 continue; 724 continue;
724 725
725 disk_super = (struct btrfs_super_block *)bh->b_data; 726 disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -1199,10 +1200,10 @@ out:
1199 return ret; 1200 return ret;
1200} 1201}
1201 1202
1202int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, 1203static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1203 struct btrfs_device *device, 1204 struct btrfs_device *device,
1204 u64 chunk_tree, u64 chunk_objectid, 1205 u64 chunk_tree, u64 chunk_objectid,
1205 u64 chunk_offset, u64 start, u64 num_bytes) 1206 u64 chunk_offset, u64 start, u64 num_bytes)
1206{ 1207{
1207 int ret; 1208 int ret;
1208 struct btrfs_path *path; 1209 struct btrfs_path *path;
@@ -1329,9 +1330,9 @@ error:
1329 * the device information is stored in the chunk root 1330 * the device information is stored in the chunk root
1330 * the btrfs_device struct should be fully filled in 1331 * the btrfs_device struct should be fully filled in
1331 */ 1332 */
1332int btrfs_add_device(struct btrfs_trans_handle *trans, 1333static int btrfs_add_device(struct btrfs_trans_handle *trans,
1333 struct btrfs_root *root, 1334 struct btrfs_root *root,
1334 struct btrfs_device *device) 1335 struct btrfs_device *device)
1335{ 1336{
1336 int ret; 1337 int ret;
1337 struct btrfs_path *path; 1338 struct btrfs_path *path;
@@ -1710,8 +1711,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
1710 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 1711 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1711} 1712}
1712 1713
1713int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, 1714static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
1714 struct btrfs_device **device) 1715 struct btrfs_device **device)
1715{ 1716{
1716 int ret = 0; 1717 int ret = 0;
1717 struct btrfs_super_block *disk_super; 1718 struct btrfs_super_block *disk_super;
@@ -3607,7 +3608,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
3607 return 0; 3608 return 0;
3608} 3609}
3609 3610
3610struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { 3611static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
3611 [BTRFS_RAID_RAID10] = { 3612 [BTRFS_RAID_RAID10] = {
3612 .sub_stripes = 2, 3613 .sub_stripes = 2,
3613 .dev_stripes = 1, 3614 .dev_stripes = 1,
@@ -3674,18 +3675,10 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
3674 3675
3675static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) 3676static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
3676{ 3677{
3677 u64 features;
3678
3679 if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))) 3678 if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
3680 return; 3679 return;
3681 3680
3682 features = btrfs_super_incompat_flags(info->super_copy); 3681 btrfs_set_fs_incompat(info, RAID56);
3683 if (features & BTRFS_FEATURE_INCOMPAT_RAID56)
3684 return;
3685
3686 features |= BTRFS_FEATURE_INCOMPAT_RAID56;
3687 btrfs_set_super_incompat_flags(info->super_copy, features);
3688 printk(KERN_INFO "btrfs: setting RAID5/6 feature flag\n");
3689} 3682}
3690 3683
3691static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 3684static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -3932,7 +3925,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3932 3925
3933 em_tree = &extent_root->fs_info->mapping_tree.map_tree; 3926 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
3934 write_lock(&em_tree->lock); 3927 write_lock(&em_tree->lock);
3935 ret = add_extent_mapping(em_tree, em); 3928 ret = add_extent_mapping(em_tree, em, 0);
3936 write_unlock(&em_tree->lock); 3929 write_unlock(&em_tree->lock);
3937 if (ret) { 3930 if (ret) {
3938 free_extent_map(em); 3931 free_extent_map(em);
@@ -4240,9 +4233,25 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
4240 read_lock(&em_tree->lock); 4233 read_lock(&em_tree->lock);
4241 em = lookup_extent_mapping(em_tree, logical, len); 4234 em = lookup_extent_mapping(em_tree, logical, len);
4242 read_unlock(&em_tree->lock); 4235 read_unlock(&em_tree->lock);
4243 BUG_ON(!em);
4244 4236
4245 BUG_ON(em->start > logical || em->start + em->len < logical); 4237 /*
4238 * We could return errors for these cases, but that could get ugly and
4239 * we'd probably do the same thing which is just not do anything else
4240 * and exit, so return 1 so the callers don't try to use other copies.
4241 */
4242 if (!em) {
4243 btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
4244 logical+len);
4245 return 1;
4246 }
4247
4248 if (em->start > logical || em->start + em->len < logical) {
4249 btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
4250 "%Lu-%Lu\n", logical, logical+len, em->start,
4251 em->start + em->len);
4252 return 1;
4253 }
4254
4246 map = (struct map_lookup *)em->bdev; 4255 map = (struct map_lookup *)em->bdev;
4247 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) 4256 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
4248 ret = map->num_stripes; 4257 ret = map->num_stripes;
@@ -4411,13 +4420,19 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4411 read_unlock(&em_tree->lock); 4420 read_unlock(&em_tree->lock);
4412 4421
4413 if (!em) { 4422 if (!em) {
4414 printk(KERN_CRIT "btrfs: unable to find logical %llu len %llu\n", 4423 btrfs_crit(fs_info, "unable to find logical %llu len %llu",
4415 (unsigned long long)logical, 4424 (unsigned long long)logical,
4416 (unsigned long long)*length); 4425 (unsigned long long)*length);
4417 BUG(); 4426 return -EINVAL;
4427 }
4428
4429 if (em->start > logical || em->start + em->len < logical) {
4430 btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
4431 "found %Lu-%Lu\n", logical, em->start,
4432 em->start + em->len);
4433 return -EINVAL;
4418 } 4434 }
4419 4435
4420 BUG_ON(em->start > logical || em->start + em->len < logical);
4421 map = (struct map_lookup *)em->bdev; 4436 map = (struct map_lookup *)em->bdev;
4422 offset = logical - em->start; 4437 offset = logical - em->start;
4423 4438
@@ -5106,9 +5121,9 @@ struct async_sched {
5106 * This will add one bio to the pending list for a device and make sure 5121 * This will add one bio to the pending list for a device and make sure
5107 * the work struct is scheduled. 5122 * the work struct is scheduled.
5108 */ 5123 */
5109noinline void btrfs_schedule_bio(struct btrfs_root *root, 5124static noinline void btrfs_schedule_bio(struct btrfs_root *root,
5110 struct btrfs_device *device, 5125 struct btrfs_device *device,
5111 int rw, struct bio *bio) 5126 int rw, struct bio *bio)
5112{ 5127{
5113 int should_queue = 1; 5128 int should_queue = 1;
5114 struct btrfs_pending_bios *pending_bios; 5129 struct btrfs_pending_bios *pending_bios;
@@ -5308,10 +5323,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5308 } 5323 }
5309 5324
5310 if (map_length < length) { 5325 if (map_length < length) {
5311 printk(KERN_CRIT "btrfs: mapping failed logical %llu bio len %llu " 5326 btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
5312 "len %llu\n", (unsigned long long)logical, 5327 (unsigned long long)logical,
5313 (unsigned long long)length, 5328 (unsigned long long)length,
5314 (unsigned long long)map_length); 5329 (unsigned long long)map_length);
5315 BUG(); 5330 BUG();
5316 } 5331 }
5317 5332
@@ -5476,7 +5491,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
5476 } 5491 }
5477 5492
5478 write_lock(&map_tree->map_tree.lock); 5493 write_lock(&map_tree->map_tree.lock);
5479 ret = add_extent_mapping(&map_tree->map_tree, em); 5494 ret = add_extent_mapping(&map_tree->map_tree, em, 0);
5480 write_unlock(&map_tree->map_tree.lock); 5495 write_unlock(&map_tree->map_tree.lock);
5481 BUG_ON(ret); /* Tree corruption */ 5496 BUG_ON(ret); /* Tree corruption */
5482 free_extent_map(em); 5497 free_extent_map(em);
@@ -5583,8 +5598,8 @@ static int read_one_dev(struct btrfs_root *root,
5583 return -EIO; 5598 return -EIO;
5584 5599
5585 if (!device) { 5600 if (!device) {
5586 printk(KERN_WARNING "warning devid %llu missing\n", 5601 btrfs_warn(root->fs_info, "devid %llu missing",
5587 (unsigned long long)devid); 5602 (unsigned long long)devid);
5588 device = add_missing_dev(root, devid, dev_uuid); 5603 device = add_missing_dev(root, devid, dev_uuid);
5589 if (!device) 5604 if (!device)
5590 return -ENOMEM; 5605 return -ENOMEM;
@@ -5926,7 +5941,7 @@ void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
5926 btrfs_dev_stat_print_on_error(dev); 5941 btrfs_dev_stat_print_on_error(dev);
5927} 5942}
5928 5943
5929void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) 5944static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
5930{ 5945{
5931 if (!dev->dev_stats_valid) 5946 if (!dev->dev_stats_valid)
5932 return; 5947 return;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 062d8604d35b..845ccbb0d2e3 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -254,10 +254,6 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
254#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \ 254#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \
255 (sizeof(struct btrfs_bio_stripe) * (n))) 255 (sizeof(struct btrfs_bio_stripe) * (n)))
256 256
257int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
258 struct btrfs_device *device,
259 u64 chunk_tree, u64 chunk_objectid,
260 u64 chunk_offset, u64 start, u64 num_bytes);
261int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, 257int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
262 u64 logical, u64 *length, 258 u64 logical, u64 *length,
263 struct btrfs_bio **bbio_ret, int mirror_num); 259 struct btrfs_bio **bbio_ret, int mirror_num);
@@ -282,11 +278,6 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
282int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, 278int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
283 char *device_path, 279 char *device_path,
284 struct btrfs_device **device); 280 struct btrfs_device **device);
285int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
286 struct btrfs_device **device);
287int btrfs_add_device(struct btrfs_trans_handle *trans,
288 struct btrfs_root *root,
289 struct btrfs_device *device);
290int btrfs_rm_device(struct btrfs_root *root, char *device_path); 281int btrfs_rm_device(struct btrfs_root *root, char *device_path);
291void btrfs_cleanup_fs_uuids(void); 282void btrfs_cleanup_fs_uuids(void);
292int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); 283int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
@@ -307,7 +298,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
307int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 298int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
308int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 299int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
309 u64 *start, u64 *max_avail); 300 u64 *start, u64 *max_avail);
310void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
311void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 301void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
312int btrfs_get_dev_stats(struct btrfs_root *root, 302int btrfs_get_dev_stats(struct btrfs_root *root,
313 struct btrfs_ioctl_get_dev_stats *stats); 303 struct btrfs_ioctl_get_dev_stats *stats);
@@ -321,9 +311,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
321void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, 311void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
322 struct btrfs_device *tgtdev); 312 struct btrfs_device *tgtdev);
323int btrfs_scratch_superblock(struct btrfs_device *device); 313int btrfs_scratch_superblock(struct btrfs_device *device);
324void btrfs_schedule_bio(struct btrfs_root *root,
325 struct btrfs_device *device,
326 int rw, struct bio *bio);
327int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, 314int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
328 u64 logical, u64 len, int mirror_num); 315 u64 logical, u64 len, int mirror_num);
329unsigned long btrfs_full_stripe_len(struct btrfs_root *root, 316unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 446a6848c554..05740b9789e4 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -406,8 +406,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
406 XATTR_REPLACE); 406 XATTR_REPLACE);
407} 407}
408 408
409int btrfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, 409static int btrfs_initxattrs(struct inode *inode,
410 void *fs_info) 410 const struct xattr *xattr_array, void *fs_info)
411{ 411{
412 const struct xattr *xattr; 412 const struct xattr *xattr;
413 struct btrfs_trans_handle *trans = fs_info; 413 struct btrfs_trans_handle *trans = fs_info;