aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Kconfig22
-rw-r--r--fs/btrfs/backref.c87
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c368
-rw-r--r--fs/btrfs/ctree.h145
-rw-r--r--fs/btrfs/delayed-inode.c66
-rw-r--r--fs/btrfs/delayed-ref.c30
-rw-r--r--fs/btrfs/dir-item.c11
-rw-r--r--fs/btrfs/disk-io.c423
-rw-r--r--fs/btrfs/disk-io.h5
-rw-r--r--fs/btrfs/extent-tree.c633
-rw-r--r--fs/btrfs/extent_io.c346
-rw-r--r--fs/btrfs/extent_io.h46
-rw-r--r--fs/btrfs/extent_map.c23
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/file-item.c108
-rw-r--r--fs/btrfs/file.c50
-rw-r--r--fs/btrfs/free-space-cache.c596
-rw-r--r--fs/btrfs/free-space-cache.h5
-rw-r--r--fs/btrfs/inode-item.c17
-rw-r--r--fs/btrfs/inode.c206
-rw-r--r--fs/btrfs/ioctl.c108
-rw-r--r--fs/btrfs/locking.c4
-rw-r--r--fs/btrfs/ordered-data.c30
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/print-tree.c9
-rw-r--r--fs/btrfs/print-tree.h2
-rw-r--r--fs/btrfs/qgroup.c843
-rw-r--r--fs/btrfs/raid56.c14
-rw-r--r--fs/btrfs/reada.c5
-rw-r--r--fs/btrfs/relocation.c111
-rw-r--r--fs/btrfs/root-tree.c7
-rw-r--r--fs/btrfs/scrub.c133
-rw-r--r--fs/btrfs/send.c42
-rw-r--r--fs/btrfs/send.h1
-rw-r--r--fs/btrfs/super.c107
-rw-r--r--fs/btrfs/transaction.c95
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c438
-rw-r--r--fs/btrfs/tree-log.h3
-rw-r--r--fs/btrfs/ulist.c58
-rw-r--r--fs/btrfs/ulist.h6
-rw-r--r--fs/btrfs/volumes.c112
-rw-r--r--fs/btrfs/volumes.h13
-rw-r--r--fs/btrfs/xattr.c4
48 files changed, 3427 insertions, 1935 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 9a8622a5b867..2b3b83296977 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,5 +1,5 @@
1config BTRFS_FS 1config BTRFS_FS
2 tristate "Btrfs filesystem Unstable disk format" 2 tristate "Btrfs filesystem support"
3 select LIBCRC32C 3 select LIBCRC32C
4 select ZLIB_INFLATE 4 select ZLIB_INFLATE
5 select ZLIB_DEFLATE 5 select ZLIB_DEFLATE
@@ -52,3 +52,23 @@ config BTRFS_FS_CHECK_INTEGRITY
52 In most cases, unless you are a btrfs developer who needs 52 In most cases, unless you are a btrfs developer who needs
53 to verify the integrity of (super)-block write requests 53 to verify the integrity of (super)-block write requests
54 during the run of a regression test, say N 54 during the run of a regression test, say N
55
56config BTRFS_FS_RUN_SANITY_TESTS
57 bool "Btrfs will run sanity tests upon loading"
58 depends on BTRFS_FS
59 help
60 This will run some basic sanity tests on the free space cache
61 code to make sure it is acting as it should. These are mostly
62 regression tests and are only really interesting to btrfs devlopers.
63
64 If unsure, say N.
65
66config BTRFS_DEBUG
67 bool "Btrfs debugging support"
68 depends on BTRFS_FS
69 help
70 Enable run-time debugging support for the btrfs filesystem. This may
71 enable additional and expensive checks with negative impact on
72 performance, or export extra information via sysfs.
73
74 If unsure, say N.
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index bd605c87adfd..b4fb41558111 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -352,6 +352,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
352 err = __resolve_indirect_ref(fs_info, search_commit_root, 352 err = __resolve_indirect_ref(fs_info, search_commit_root,
353 time_seq, ref, parents, 353 time_seq, ref, parents,
354 extent_item_pos); 354 extent_item_pos);
355 if (err == -ENOMEM)
356 goto out;
355 if (err) 357 if (err)
356 continue; 358 continue;
357 359
@@ -367,7 +369,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
367 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); 369 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
368 if (!new_ref) { 370 if (!new_ref) {
369 ret = -ENOMEM; 371 ret = -ENOMEM;
370 break; 372 goto out;
371 } 373 }
372 memcpy(new_ref, ref, sizeof(*ref)); 374 memcpy(new_ref, ref, sizeof(*ref));
373 new_ref->parent = node->val; 375 new_ref->parent = node->val;
@@ -377,7 +379,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
377 } 379 }
378 ulist_reinit(parents); 380 ulist_reinit(parents);
379 } 381 }
380 382out:
381 ulist_free(parents); 383 ulist_free(parents);
382 return ret; 384 return ret;
383} 385}
@@ -421,7 +423,10 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
421 BUG_ON(!ref->wanted_disk_byte); 423 BUG_ON(!ref->wanted_disk_byte);
422 eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, 424 eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
423 fs_info->tree_root->leafsize, 0); 425 fs_info->tree_root->leafsize, 0);
424 BUG_ON(!eb); 426 if (!eb || !extent_buffer_uptodate(eb)) {
427 free_extent_buffer(eb);
428 return -EIO;
429 }
425 btrfs_tree_read_lock(eb); 430 btrfs_tree_read_lock(eb);
426 if (btrfs_header_level(eb) == 0) 431 if (btrfs_header_level(eb) == 0)
427 btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); 432 btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
@@ -443,7 +448,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
443 * having a parent). 448 * having a parent).
444 * mode = 2: merge identical parents 449 * mode = 2: merge identical parents
445 */ 450 */
446static int __merge_refs(struct list_head *head, int mode) 451static void __merge_refs(struct list_head *head, int mode)
447{ 452{
448 struct list_head *pos1; 453 struct list_head *pos1;
449 454
@@ -489,7 +494,6 @@ static int __merge_refs(struct list_head *head, int mode)
489 } 494 }
490 495
491 } 496 }
492 return 0;
493} 497}
494 498
495/* 499/*
@@ -582,7 +586,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
582 default: 586 default:
583 WARN_ON(1); 587 WARN_ON(1);
584 } 588 }
585 BUG_ON(ret); 589 if (ret)
590 return ret;
586 } 591 }
587 592
588 return 0; 593 return 0;
@@ -680,7 +685,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
680 default: 685 default:
681 WARN_ON(1); 686 WARN_ON(1);
682 } 687 }
683 BUG_ON(ret); 688 if (ret)
689 return ret;
684 ptr += btrfs_extent_inline_ref_size(type); 690 ptr += btrfs_extent_inline_ref_size(type);
685 } 691 }
686 692
@@ -762,7 +768,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
762 default: 768 default:
763 WARN_ON(1); 769 WARN_ON(1);
764 } 770 }
765 BUG_ON(ret); 771 if (ret)
772 return ret;
773
766 } 774 }
767 775
768 return ret; 776 return ret;
@@ -880,18 +888,14 @@ again:
880 if (ret) 888 if (ret)
881 goto out; 889 goto out;
882 890
883 ret = __merge_refs(&prefs, 1); 891 __merge_refs(&prefs, 1);
884 if (ret)
885 goto out;
886 892
887 ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq, 893 ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq,
888 &prefs, extent_item_pos); 894 &prefs, extent_item_pos);
889 if (ret) 895 if (ret)
890 goto out; 896 goto out;
891 897
892 ret = __merge_refs(&prefs, 2); 898 __merge_refs(&prefs, 2);
893 if (ret)
894 goto out;
895 899
896 while (!list_empty(&prefs)) { 900 while (!list_empty(&prefs)) {
897 ref = list_first_entry(&prefs, struct __prelim_ref, list); 901 ref = list_first_entry(&prefs, struct __prelim_ref, list);
@@ -900,7 +904,8 @@ again:
900 if (ref->count && ref->root_id && ref->parent == 0) { 904 if (ref->count && ref->root_id && ref->parent == 0) {
901 /* no parent == root of tree */ 905 /* no parent == root of tree */
902 ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); 906 ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
903 BUG_ON(ret < 0); 907 if (ret < 0)
908 goto out;
904 } 909 }
905 if (ref->count && ref->parent) { 910 if (ref->count && ref->parent) {
906 struct extent_inode_elem *eie = NULL; 911 struct extent_inode_elem *eie = NULL;
@@ -911,7 +916,10 @@ again:
911 info_level); 916 info_level);
912 eb = read_tree_block(fs_info->extent_root, 917 eb = read_tree_block(fs_info->extent_root,
913 ref->parent, bsz, 0); 918 ref->parent, bsz, 0);
914 BUG_ON(!eb); 919 if (!eb || !extent_buffer_uptodate(eb)) {
920 free_extent_buffer(eb);
921 return -EIO;
922 }
915 ret = find_extent_in_eb(eb, bytenr, 923 ret = find_extent_in_eb(eb, bytenr,
916 *extent_item_pos, &eie); 924 *extent_item_pos, &eie);
917 ref->inode_list = eie; 925 ref->inode_list = eie;
@@ -920,6 +928,8 @@ again:
920 ret = ulist_add_merge(refs, ref->parent, 928 ret = ulist_add_merge(refs, ref->parent,
921 (uintptr_t)ref->inode_list, 929 (uintptr_t)ref->inode_list,
922 (u64 *)&eie, GFP_NOFS); 930 (u64 *)&eie, GFP_NOFS);
931 if (ret < 0)
932 goto out;
923 if (!ret && extent_item_pos) { 933 if (!ret && extent_item_pos) {
924 /* 934 /*
925 * we've recorded that parent, so we must extend 935 * we've recorded that parent, so we must extend
@@ -930,7 +940,6 @@ again:
930 eie = eie->next; 940 eie = eie->next;
931 eie->next = ref->inode_list; 941 eie->next = ref->inode_list;
932 } 942 }
933 BUG_ON(ret < 0);
934 } 943 }
935 kfree(ref); 944 kfree(ref);
936 } 945 }
@@ -1180,6 +1189,20 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
1180 return ret; 1189 return ret;
1181} 1190}
1182 1191
1192/*
1193 * this iterates to turn a name (from iref/extref) into a full filesystem path.
1194 * Elements of the path are separated by '/' and the path is guaranteed to be
1195 * 0-terminated. the path is only given within the current file system.
1196 * Therefore, it never starts with a '/'. the caller is responsible to provide
1197 * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
1198 * the start point of the resulting string is returned. this pointer is within
1199 * dest, normally.
1200 * in case the path buffer would overflow, the pointer is decremented further
1201 * as if output was written to the buffer, though no more output is actually
1202 * generated. that way, the caller can determine how much space would be
1203 * required for the path to fit into the buffer. in that case, the returned
1204 * value will be smaller than dest. callers must check this!
1205 */
1183char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 1206char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1184 u32 name_len, unsigned long name_off, 1207 u32 name_len, unsigned long name_off,
1185 struct extent_buffer *eb_in, u64 parent, 1208 struct extent_buffer *eb_in, u64 parent,
@@ -1249,32 +1272,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1249} 1272}
1250 1273
1251/* 1274/*
1252 * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements
1253 * of the path are separated by '/' and the path is guaranteed to be
1254 * 0-terminated. the path is only given within the current file system.
1255 * Therefore, it never starts with a '/'. the caller is responsible to provide
1256 * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
1257 * the start point of the resulting string is returned. this pointer is within
1258 * dest, normally.
1259 * in case the path buffer would overflow, the pointer is decremented further
1260 * as if output was written to the buffer, though no more output is actually
1261 * generated. that way, the caller can determine how much space would be
1262 * required for the path to fit into the buffer. in that case, the returned
1263 * value will be smaller than dest. callers must check this!
1264 */
1265char *btrfs_iref_to_path(struct btrfs_root *fs_root,
1266 struct btrfs_path *path,
1267 struct btrfs_inode_ref *iref,
1268 struct extent_buffer *eb_in, u64 parent,
1269 char *dest, u32 size)
1270{
1271 return btrfs_ref_to_path(fs_root, path,
1272 btrfs_inode_ref_name_len(eb_in, iref),
1273 (unsigned long)(iref + 1),
1274 eb_in, parent, dest, size);
1275}
1276
1277/*
1278 * this makes the path point to (logical EXTENT_ITEM *) 1275 * this makes the path point to (logical EXTENT_ITEM *)
1279 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for 1276 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
1280 * tree blocks and <0 on error. 1277 * tree blocks and <0 on error.
@@ -1461,8 +1458,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1461 iterate_extent_inodes_t *iterate, void *ctx) 1458 iterate_extent_inodes_t *iterate, void *ctx)
1462{ 1459{
1463 int ret; 1460 int ret;
1464 struct list_head data_refs = LIST_HEAD_INIT(data_refs);
1465 struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
1466 struct btrfs_trans_handle *trans; 1461 struct btrfs_trans_handle *trans;
1467 struct ulist *refs = NULL; 1462 struct ulist *refs = NULL;
1468 struct ulist *roots = NULL; 1463 struct ulist *roots = NULL;
@@ -1508,11 +1503,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
1508 iterate, ctx); 1503 iterate, ctx);
1509 } 1504 }
1510 ulist_free(roots); 1505 ulist_free(roots);
1511 roots = NULL;
1512 } 1506 }
1513 1507
1514 free_leaf_list(refs); 1508 free_leaf_list(refs);
1515 ulist_free(roots);
1516out: 1509out:
1517 if (!search_commit_root) { 1510 if (!search_commit_root) {
1518 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1511 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 310a7f6d09b1..0f446d7ca2c0 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -59,9 +59,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
59int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 59int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
60 struct btrfs_fs_info *fs_info, u64 bytenr, 60 struct btrfs_fs_info *fs_info, u64 bytenr,
61 u64 time_seq, struct ulist **roots); 61 u64 time_seq, struct ulist **roots);
62char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
63 struct btrfs_inode_ref *iref, struct extent_buffer *eb,
64 u64 parent, char *dest, u32 size);
65char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 62char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
66 u32 name_len, unsigned long name_off, 63 u32 name_len, unsigned long name_off,
67 struct extent_buffer *eb_in, u64 parent, 64 struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d9b97d4960e6..08b286b2a2c5 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -93,7 +93,7 @@ struct btrfs_inode {
93 93
94 unsigned long runtime_flags; 94 unsigned long runtime_flags;
95 95
96 /* Keep track of who's O_SYNC/fsycing currently */ 96 /* Keep track of who's O_SYNC/fsyncing currently */
97 atomic_t sync_writers; 97 atomic_t sync_writers;
98 98
99 /* full 64 bit generation number, struct vfs_inode doesn't have a big 99 /* full 64 bit generation number, struct vfs_inode doesn't have a big
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 15b94089abc4..b189bd1e7a3e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -82,6 +82,10 @@ struct compressed_bio {
82 u32 sums; 82 u32 sums;
83}; 83};
84 84
85static int btrfs_decompress_biovec(int type, struct page **pages_in,
86 u64 disk_start, struct bio_vec *bvec,
87 int vcnt, size_t srclen);
88
85static inline int compressed_bio_size(struct btrfs_root *root, 89static inline int compressed_bio_size(struct btrfs_root *root,
86 unsigned long disk_size) 90 unsigned long disk_size)
87{ 91{
@@ -106,7 +110,6 @@ static int check_compressed_csum(struct inode *inode,
106 u64 disk_start) 110 u64 disk_start)
107{ 111{
108 int ret; 112 int ret;
109 struct btrfs_root *root = BTRFS_I(inode)->root;
110 struct page *page; 113 struct page *page;
111 unsigned long i; 114 unsigned long i;
112 char *kaddr; 115 char *kaddr;
@@ -121,7 +124,7 @@ static int check_compressed_csum(struct inode *inode,
121 csum = ~(u32)0; 124 csum = ~(u32)0;
122 125
123 kaddr = kmap_atomic(page); 126 kaddr = kmap_atomic(page);
124 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE); 127 csum = btrfs_csum_data(kaddr, csum, PAGE_CACHE_SIZE);
125 btrfs_csum_final(csum, (char *)&csum); 128 btrfs_csum_final(csum, (char *)&csum);
126 kunmap_atomic(kaddr); 129 kunmap_atomic(kaddr);
127 130
@@ -739,7 +742,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
739static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES]; 742static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
740static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES]; 743static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
741 744
742struct btrfs_compress_op *btrfs_compress_op[] = { 745static struct btrfs_compress_op *btrfs_compress_op[] = {
743 &btrfs_zlib_compress, 746 &btrfs_zlib_compress,
744 &btrfs_lzo_compress, 747 &btrfs_lzo_compress,
745}; 748};
@@ -910,8 +913,9 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
910 * be contiguous. They all correspond to the range of bytes covered by 913 * be contiguous. They all correspond to the range of bytes covered by
911 * the compressed extent. 914 * the compressed extent.
912 */ 915 */
913int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start, 916static int btrfs_decompress_biovec(int type, struct page **pages_in,
914 struct bio_vec *bvec, int vcnt, size_t srclen) 917 u64 disk_start, struct bio_vec *bvec,
918 int vcnt, size_t srclen)
915{ 919{
916 struct list_head *workspace; 920 struct list_head *workspace;
917 int ret; 921 int ret;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 9afb0a62ae82..0c803b4fbf93 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -30,8 +30,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
30 unsigned long *total_in, 30 unsigned long *total_in,
31 unsigned long *total_out, 31 unsigned long *total_out,
32 unsigned long max_out); 32 unsigned long max_out);
33int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
34 struct bio_vec *bvec, int vcnt, size_t srclen);
35int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page, 33int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
36 unsigned long start_byte, size_t srclen, size_t destlen); 34 unsigned long start_byte, size_t srclen, size_t destlen);
37int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, 35int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ecd25a1b4e51..de6de8e60b46 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,16 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 37 struct btrfs_root *root,
38 struct extent_buffer *dst_buf, 38 struct extent_buffer *dst_buf,
39 struct extent_buffer *src_buf); 39 struct extent_buffer *src_buf);
40static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 40static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
41 struct btrfs_path *path, int level, int slot); 41 int level, int slot);
42static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, 42static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
43 struct extent_buffer *eb); 43 struct extent_buffer *eb);
44struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, 44static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
45 u32 blocksize, u64 parent_transid,
46 u64 time_seq);
47struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root,
48 u64 bytenr, u32 blocksize,
49 u64 time_seq);
50 45
51struct btrfs_path *btrfs_alloc_path(void) 46struct btrfs_path *btrfs_alloc_path(void)
52{ 47{
@@ -208,7 +203,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
208 * tree until you end up with a lock on the root. A locked buffer 203 * tree until you end up with a lock on the root. A locked buffer
209 * is returned, with a reference held. 204 * is returned, with a reference held.
210 */ 205 */
211struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) 206static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
212{ 207{
213 struct extent_buffer *eb; 208 struct extent_buffer *eb;
214 209
@@ -361,6 +356,44 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
361} 356}
362 357
363/* 358/*
359 * Increment the upper half of tree_mod_seq, set lower half zero.
360 *
361 * Must be called with fs_info->tree_mod_seq_lock held.
362 */
363static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
364{
365 u64 seq = atomic64_read(&fs_info->tree_mod_seq);
366 seq &= 0xffffffff00000000ull;
367 seq += 1ull << 32;
368 atomic64_set(&fs_info->tree_mod_seq, seq);
369 return seq;
370}
371
372/*
373 * Increment the lower half of tree_mod_seq.
374 *
375 * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
376 * are generated should not technically require a spin lock here. (Rationale:
377 * incrementing the minor while incrementing the major seq number is between its
378 * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
379 * just returns a unique sequence number as usual.) We have decided to leave
380 * that requirement in here and rethink it once we notice it really imposes a
381 * problem on some workload.
382 */
383static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
384{
385 return atomic64_inc_return(&fs_info->tree_mod_seq);
386}
387
388/*
389 * return the last minor in the previous major tree_mod_seq number
390 */
391u64 btrfs_tree_mod_seq_prev(u64 seq)
392{
393 return (seq & 0xffffffff00000000ull) - 1ull;
394}
395
396/*
364 * This adds a new blocker to the tree mod log's blocker list if the @elem 397 * This adds a new blocker to the tree mod log's blocker list if the @elem
365 * passed does not already have a sequence number set. So when a caller expects 398 * passed does not already have a sequence number set. So when a caller expects
366 * to record tree modifications, it should ensure to set elem->seq to zero 399 * to record tree modifications, it should ensure to set elem->seq to zero
@@ -376,10 +409,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
376 tree_mod_log_write_lock(fs_info); 409 tree_mod_log_write_lock(fs_info);
377 spin_lock(&fs_info->tree_mod_seq_lock); 410 spin_lock(&fs_info->tree_mod_seq_lock);
378 if (!elem->seq) { 411 if (!elem->seq) {
379 elem->seq = btrfs_inc_tree_mod_seq(fs_info); 412 elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
380 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); 413 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
381 } 414 }
382 seq = btrfs_inc_tree_mod_seq(fs_info); 415 seq = btrfs_inc_tree_mod_seq_minor(fs_info);
383 spin_unlock(&fs_info->tree_mod_seq_lock); 416 spin_unlock(&fs_info->tree_mod_seq_lock);
384 tree_mod_log_write_unlock(fs_info); 417 tree_mod_log_write_unlock(fs_info);
385 418
@@ -524,7 +557,10 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
524 if (!tm) 557 if (!tm)
525 return -ENOMEM; 558 return -ENOMEM;
526 559
527 tm->seq = btrfs_inc_tree_mod_seq(fs_info); 560 spin_lock(&fs_info->tree_mod_seq_lock);
561 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
562 spin_unlock(&fs_info->tree_mod_seq_lock);
563
528 return tm->seq; 564 return tm->seq;
529} 565}
530 566
@@ -643,7 +679,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
643static noinline int 679static noinline int
644tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, 680tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
645 struct extent_buffer *old_root, 681 struct extent_buffer *old_root,
646 struct extent_buffer *new_root, gfp_t flags) 682 struct extent_buffer *new_root, gfp_t flags,
683 int log_removal)
647{ 684{
648 struct tree_mod_elem *tm; 685 struct tree_mod_elem *tm;
649 int ret; 686 int ret;
@@ -651,6 +688,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
651 if (tree_mod_dont_log(fs_info, NULL)) 688 if (tree_mod_dont_log(fs_info, NULL))
652 return 0; 689 return 0;
653 690
691 if (log_removal)
692 __tree_mod_log_free_eb(fs_info, old_root);
693
654 ret = tree_mod_alloc(fs_info, flags, &tm); 694 ret = tree_mod_alloc(fs_info, flags, &tm);
655 if (ret < 0) 695 if (ret < 0)
656 goto out; 696 goto out;
@@ -751,8 +791,8 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
751 791
752 for (i = 0; i < nr_items; i++) { 792 for (i = 0; i < nr_items; i++) {
753 ret = tree_mod_log_insert_key_locked(fs_info, src, 793 ret = tree_mod_log_insert_key_locked(fs_info, src,
754 i + src_offset, 794 i + src_offset,
755 MOD_LOG_KEY_REMOVE); 795 MOD_LOG_KEY_REMOVE);
756 BUG_ON(ret < 0); 796 BUG_ON(ret < 0);
757 ret = tree_mod_log_insert_key_locked(fs_info, dst, 797 ret = tree_mod_log_insert_key_locked(fs_info, dst,
758 i + dst_offset, 798 i + dst_offset,
@@ -798,11 +838,12 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
798 838
799static noinline void 839static noinline void
800tree_mod_log_set_root_pointer(struct btrfs_root *root, 840tree_mod_log_set_root_pointer(struct btrfs_root *root,
801 struct extent_buffer *new_root_node) 841 struct extent_buffer *new_root_node,
842 int log_removal)
802{ 843{
803 int ret; 844 int ret;
804 ret = tree_mod_log_insert_root(root->fs_info, root->node, 845 ret = tree_mod_log_insert_root(root->fs_info, root->node,
805 new_root_node, GFP_NOFS); 846 new_root_node, GFP_NOFS, log_removal);
806 BUG_ON(ret < 0); 847 BUG_ON(ret < 0);
807} 848}
808 849
@@ -863,7 +904,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
863 904
864 if (btrfs_block_can_be_shared(root, buf)) { 905 if (btrfs_block_can_be_shared(root, buf)) {
865 ret = btrfs_lookup_extent_info(trans, root, buf->start, 906 ret = btrfs_lookup_extent_info(trans, root, buf->start,
866 buf->len, &refs, &flags); 907 btrfs_header_level(buf), 1,
908 &refs, &flags);
867 if (ret) 909 if (ret)
868 return ret; 910 return ret;
869 if (refs == 0) { 911 if (refs == 0) {
@@ -927,7 +969,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
927 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 969 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
928 BUG_ON(ret); /* -ENOMEM */ 970 BUG_ON(ret); /* -ENOMEM */
929 } 971 }
930 tree_mod_log_free_eb(root->fs_info, buf);
931 clean_tree_block(trans, root, buf); 972 clean_tree_block(trans, root, buf);
932 *last_ref = 1; 973 *last_ref = 1;
933 } 974 }
@@ -1025,7 +1066,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1025 parent_start = 0; 1066 parent_start = 0;
1026 1067
1027 extent_buffer_get(cow); 1068 extent_buffer_get(cow);
1028 tree_mod_log_set_root_pointer(root, cow); 1069 tree_mod_log_set_root_pointer(root, cow, 1);
1029 rcu_assign_pointer(root->node, cow); 1070 rcu_assign_pointer(root->node, cow);
1030 1071
1031 btrfs_free_tree_block(trans, root, buf, parent_start, 1072 btrfs_free_tree_block(trans, root, buf, parent_start,
@@ -1046,6 +1087,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1046 btrfs_set_node_ptr_generation(parent, parent_slot, 1087 btrfs_set_node_ptr_generation(parent, parent_slot,
1047 trans->transid); 1088 trans->transid);
1048 btrfs_mark_buffer_dirty(parent); 1089 btrfs_mark_buffer_dirty(parent);
1090 tree_mod_log_free_eb(root->fs_info, buf);
1049 btrfs_free_tree_block(trans, root, buf, parent_start, 1091 btrfs_free_tree_block(trans, root, buf, parent_start,
1050 last_ref); 1092 last_ref);
1051 } 1093 }
@@ -1063,11 +1105,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1063 */ 1105 */
1064static struct tree_mod_elem * 1106static struct tree_mod_elem *
1065__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, 1107__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1066 struct btrfs_root *root, u64 time_seq) 1108 struct extent_buffer *eb_root, u64 time_seq)
1067{ 1109{
1068 struct tree_mod_elem *tm; 1110 struct tree_mod_elem *tm;
1069 struct tree_mod_elem *found = NULL; 1111 struct tree_mod_elem *found = NULL;
1070 u64 root_logical = root->node->start; 1112 u64 root_logical = eb_root->start;
1071 int looped = 0; 1113 int looped = 0;
1072 1114
1073 if (!time_seq) 1115 if (!time_seq)
@@ -1101,7 +1143,6 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
1101 1143
1102 found = tm; 1144 found = tm;
1103 root_logical = tm->old_root.logical; 1145 root_logical = tm->old_root.logical;
1104 BUG_ON(root_logical == root->node->start);
1105 looped = 1; 1146 looped = 1;
1106 } 1147 }
1107 1148
@@ -1186,6 +1227,13 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1186 btrfs_set_header_nritems(eb, n); 1227 btrfs_set_header_nritems(eb, n);
1187} 1228}
1188 1229
1230/*
1231 * Called with eb read locked. If the buffer cannot be rewinded, the same buffer
1232 * is returned. If rewind operations happen, a fresh buffer is returned. The
1233 * returned buffer is always read-locked. If the returned buffer is not the
1234 * input buffer, the lock on the input buffer is released and the input buffer
1235 * is freed (its refcount is decremented).
1236 */
1189static struct extent_buffer * 1237static struct extent_buffer *
1190tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, 1238tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1191 u64 time_seq) 1239 u64 time_seq)
@@ -1219,8 +1267,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1219 } 1267 }
1220 1268
1221 extent_buffer_get(eb_rewin); 1269 extent_buffer_get(eb_rewin);
1270 btrfs_tree_read_unlock(eb);
1222 free_extent_buffer(eb); 1271 free_extent_buffer(eb);
1223 1272
1273 extent_buffer_get(eb_rewin);
1274 btrfs_tree_read_lock(eb_rewin);
1224 __tree_mod_log_rewind(eb_rewin, time_seq, tm); 1275 __tree_mod_log_rewind(eb_rewin, time_seq, tm);
1225 WARN_ON(btrfs_header_nritems(eb_rewin) > 1276 WARN_ON(btrfs_header_nritems(eb_rewin) >
1226 BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); 1277 BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
@@ -1239,33 +1290,35 @@ static inline struct extent_buffer *
1239get_old_root(struct btrfs_root *root, u64 time_seq) 1290get_old_root(struct btrfs_root *root, u64 time_seq)
1240{ 1291{
1241 struct tree_mod_elem *tm; 1292 struct tree_mod_elem *tm;
1242 struct extent_buffer *eb; 1293 struct extent_buffer *eb = NULL;
1294 struct extent_buffer *eb_root;
1243 struct extent_buffer *old; 1295 struct extent_buffer *old;
1244 struct tree_mod_root *old_root = NULL; 1296 struct tree_mod_root *old_root = NULL;
1245 u64 old_generation = 0; 1297 u64 old_generation = 0;
1246 u64 logical; 1298 u64 logical;
1247 u32 blocksize; 1299 u32 blocksize;
1248 1300
1249 eb = btrfs_read_lock_root_node(root); 1301 eb_root = btrfs_read_lock_root_node(root);
1250 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1302 tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
1251 if (!tm) 1303 if (!tm)
1252 return root->node; 1304 return eb_root;
1253 1305
1254 if (tm->op == MOD_LOG_ROOT_REPLACE) { 1306 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1255 old_root = &tm->old_root; 1307 old_root = &tm->old_root;
1256 old_generation = tm->generation; 1308 old_generation = tm->generation;
1257 logical = old_root->logical; 1309 logical = old_root->logical;
1258 } else { 1310 } else {
1259 logical = root->node->start; 1311 logical = eb_root->start;
1260 } 1312 }
1261 1313
1262 tm = tree_mod_log_search(root->fs_info, logical, time_seq); 1314 tm = tree_mod_log_search(root->fs_info, logical, time_seq);
1263 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 1315 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1264 btrfs_tree_read_unlock(root->node); 1316 btrfs_tree_read_unlock(eb_root);
1265 free_extent_buffer(root->node); 1317 free_extent_buffer(eb_root);
1266 blocksize = btrfs_level_size(root, old_root->level); 1318 blocksize = btrfs_level_size(root, old_root->level);
1267 old = read_tree_block(root, logical, blocksize, 0); 1319 old = read_tree_block(root, logical, blocksize, 0);
1268 if (!old) { 1320 if (!old || !extent_buffer_uptodate(old)) {
1321 free_extent_buffer(old);
1269 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", 1322 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
1270 logical); 1323 logical);
1271 WARN_ON(1); 1324 WARN_ON(1);
@@ -1274,13 +1327,13 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1274 free_extent_buffer(old); 1327 free_extent_buffer(old);
1275 } 1328 }
1276 } else if (old_root) { 1329 } else if (old_root) {
1277 btrfs_tree_read_unlock(root->node); 1330 btrfs_tree_read_unlock(eb_root);
1278 free_extent_buffer(root->node); 1331 free_extent_buffer(eb_root);
1279 eb = alloc_dummy_extent_buffer(logical, root->nodesize); 1332 eb = alloc_dummy_extent_buffer(logical, root->nodesize);
1280 } else { 1333 } else {
1281 eb = btrfs_clone_extent_buffer(root->node); 1334 eb = btrfs_clone_extent_buffer(eb_root);
1282 btrfs_tree_read_unlock(root->node); 1335 btrfs_tree_read_unlock(eb_root);
1283 free_extent_buffer(root->node); 1336 free_extent_buffer(eb_root);
1284 } 1337 }
1285 1338
1286 if (!eb) 1339 if (!eb)
@@ -1290,7 +1343,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1290 if (old_root) { 1343 if (old_root) {
1291 btrfs_set_header_bytenr(eb, eb->start); 1344 btrfs_set_header_bytenr(eb, eb->start);
1292 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); 1345 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1293 btrfs_set_header_owner(eb, root->root_key.objectid); 1346 btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
1294 btrfs_set_header_level(eb, old_root->level); 1347 btrfs_set_header_level(eb, old_root->level);
1295 btrfs_set_header_generation(eb, old_generation); 1348 btrfs_set_header_generation(eb, old_generation);
1296 } 1349 }
@@ -1307,15 +1360,15 @@ int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
1307{ 1360{
1308 struct tree_mod_elem *tm; 1361 struct tree_mod_elem *tm;
1309 int level; 1362 int level;
1363 struct extent_buffer *eb_root = btrfs_root_node(root);
1310 1364
1311 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); 1365 tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
1312 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) { 1366 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
1313 level = tm->old_root.level; 1367 level = tm->old_root.level;
1314 } else { 1368 } else {
1315 rcu_read_lock(); 1369 level = btrfs_header_level(eb_root);
1316 level = btrfs_header_level(root->node);
1317 rcu_read_unlock();
1318 } 1370 }
1371 free_extent_buffer(eb_root);
1319 1372
1320 return level; 1373 return level;
1321} 1374}
@@ -1510,8 +1563,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1510 if (!cur) { 1563 if (!cur) {
1511 cur = read_tree_block(root, blocknr, 1564 cur = read_tree_block(root, blocknr,
1512 blocksize, gen); 1565 blocksize, gen);
1513 if (!cur) 1566 if (!cur || !extent_buffer_uptodate(cur)) {
1567 free_extent_buffer(cur);
1514 return -EIO; 1568 return -EIO;
1569 }
1515 } else if (!uptodate) { 1570 } else if (!uptodate) {
1516 err = btrfs_read_buffer(cur, gen); 1571 err = btrfs_read_buffer(cur, gen);
1517 if (err) { 1572 if (err) {
@@ -1676,6 +1731,8 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
1676 struct extent_buffer *parent, int slot) 1731 struct extent_buffer *parent, int slot)
1677{ 1732{
1678 int level = btrfs_header_level(parent); 1733 int level = btrfs_header_level(parent);
1734 struct extent_buffer *eb;
1735
1679 if (slot < 0) 1736 if (slot < 0)
1680 return NULL; 1737 return NULL;
1681 if (slot >= btrfs_header_nritems(parent)) 1738 if (slot >= btrfs_header_nritems(parent))
@@ -1683,9 +1740,15 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
1683 1740
1684 BUG_ON(level == 0); 1741 BUG_ON(level == 0);
1685 1742
1686 return read_tree_block(root, btrfs_node_blockptr(parent, slot), 1743 eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
1687 btrfs_level_size(root, level - 1), 1744 btrfs_level_size(root, level - 1),
1688 btrfs_node_ptr_generation(parent, slot)); 1745 btrfs_node_ptr_generation(parent, slot));
1746 if (eb && !extent_buffer_uptodate(eb)) {
1747 free_extent_buffer(eb);
1748 eb = NULL;
1749 }
1750
1751 return eb;
1689} 1752}
1690 1753
1691/* 1754/*
@@ -1750,8 +1813,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1750 goto enospc; 1813 goto enospc;
1751 } 1814 }
1752 1815
1753 tree_mod_log_free_eb(root->fs_info, root->node); 1816 tree_mod_log_set_root_pointer(root, child, 1);
1754 tree_mod_log_set_root_pointer(root, child);
1755 rcu_assign_pointer(root->node, child); 1817 rcu_assign_pointer(root->node, child);
1756 1818
1757 add_root_to_dirty_list(root); 1819 add_root_to_dirty_list(root);
@@ -1815,7 +1877,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1815 if (btrfs_header_nritems(right) == 0) { 1877 if (btrfs_header_nritems(right) == 0) {
1816 clean_tree_block(trans, root, right); 1878 clean_tree_block(trans, root, right);
1817 btrfs_tree_unlock(right); 1879 btrfs_tree_unlock(right);
1818 del_ptr(trans, root, path, level + 1, pslot + 1); 1880 del_ptr(root, path, level + 1, pslot + 1);
1819 root_sub_used(root, right->len); 1881 root_sub_used(root, right->len);
1820 btrfs_free_tree_block(trans, root, right, 0, 1); 1882 btrfs_free_tree_block(trans, root, right, 0, 1);
1821 free_extent_buffer_stale(right); 1883 free_extent_buffer_stale(right);
@@ -1859,7 +1921,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1859 if (btrfs_header_nritems(mid) == 0) { 1921 if (btrfs_header_nritems(mid) == 0) {
1860 clean_tree_block(trans, root, mid); 1922 clean_tree_block(trans, root, mid);
1861 btrfs_tree_unlock(mid); 1923 btrfs_tree_unlock(mid);
1862 del_ptr(trans, root, path, level + 1, pslot); 1924 del_ptr(root, path, level + 1, pslot);
1863 root_sub_used(root, mid->len); 1925 root_sub_used(root, mid->len);
1864 btrfs_free_tree_block(trans, root, mid, 0, 1); 1926 btrfs_free_tree_block(trans, root, mid, 0, 1);
1865 free_extent_buffer_stale(mid); 1927 free_extent_buffer_stale(mid);
@@ -2207,9 +2269,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
2207 int no_skips = 0; 2269 int no_skips = 0;
2208 struct extent_buffer *t; 2270 struct extent_buffer *t;
2209 2271
2210 if (path->really_keep_locks)
2211 return;
2212
2213 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 2272 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2214 if (!path->nodes[i]) 2273 if (!path->nodes[i])
2215 break; 2274 break;
@@ -2257,7 +2316,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
2257{ 2316{
2258 int i; 2317 int i;
2259 2318
2260 if (path->keep_locks || path->really_keep_locks) 2319 if (path->keep_locks)
2261 return; 2320 return;
2262 2321
2263 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 2322 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -2490,7 +2549,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2490 if (!cow) 2549 if (!cow)
2491 write_lock_level = -1; 2550 write_lock_level = -1;
2492 2551
2493 if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level)) 2552 if (cow && (p->keep_locks || p->lowest_level))
2494 write_lock_level = BTRFS_MAX_LEVEL; 2553 write_lock_level = BTRFS_MAX_LEVEL;
2495 2554
2496 min_write_lock_level = write_lock_level; 2555 min_write_lock_level = write_lock_level;
@@ -2792,15 +2851,9 @@ again:
2792 btrfs_clear_path_blocking(p, b, 2851 btrfs_clear_path_blocking(p, b,
2793 BTRFS_READ_LOCK); 2852 BTRFS_READ_LOCK);
2794 } 2853 }
2854 b = tree_mod_log_rewind(root->fs_info, b, time_seq);
2795 p->locks[level] = BTRFS_READ_LOCK; 2855 p->locks[level] = BTRFS_READ_LOCK;
2796 p->nodes[level] = b; 2856 p->nodes[level] = b;
2797 b = tree_mod_log_rewind(root->fs_info, b, time_seq);
2798 if (b != p->nodes[level]) {
2799 btrfs_tree_unlock_rw(p->nodes[level],
2800 p->locks[level]);
2801 p->locks[level] = 0;
2802 p->nodes[level] = b;
2803 }
2804 } else { 2857 } else {
2805 p->slots[level] = slot; 2858 p->slots[level] = slot;
2806 unlock_up(p, level, lowest_unlock, 0, NULL); 2859 unlock_up(p, level, lowest_unlock, 0, NULL);
@@ -2899,8 +2952,7 @@ again:
2899 * higher levels 2952 * higher levels
2900 * 2953 *
2901 */ 2954 */
2902static void fixup_low_keys(struct btrfs_trans_handle *trans, 2955static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
2903 struct btrfs_root *root, struct btrfs_path *path,
2904 struct btrfs_disk_key *key, int level) 2956 struct btrfs_disk_key *key, int level)
2905{ 2957{
2906 int i; 2958 int i;
@@ -2925,8 +2977,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans,
2925 * This function isn't completely safe. It's the caller's responsibility 2977 * This function isn't completely safe. It's the caller's responsibility
2926 * that the new key won't break the order 2978 * that the new key won't break the order
2927 */ 2979 */
2928void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 2980void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
2929 struct btrfs_root *root, struct btrfs_path *path,
2930 struct btrfs_key *new_key) 2981 struct btrfs_key *new_key)
2931{ 2982{
2932 struct btrfs_disk_key disk_key; 2983 struct btrfs_disk_key disk_key;
@@ -2948,7 +2999,7 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
2948 btrfs_set_item_key(eb, &disk_key, slot); 2999 btrfs_set_item_key(eb, &disk_key, slot);
2949 btrfs_mark_buffer_dirty(eb); 3000 btrfs_mark_buffer_dirty(eb);
2950 if (slot == 0) 3001 if (slot == 0)
2951 fixup_low_keys(trans, root, path, &disk_key, 1); 3002 fixup_low_keys(root, path, &disk_key, 1);
2952} 3003}
2953 3004
2954/* 3005/*
@@ -3090,7 +3141,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
3090 */ 3141 */
3091static noinline int insert_new_root(struct btrfs_trans_handle *trans, 3142static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3092 struct btrfs_root *root, 3143 struct btrfs_root *root,
3093 struct btrfs_path *path, int level) 3144 struct btrfs_path *path, int level, int log_removal)
3094{ 3145{
3095 u64 lower_gen; 3146 u64 lower_gen;
3096 struct extent_buffer *lower; 3147 struct extent_buffer *lower;
@@ -3141,7 +3192,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
3141 btrfs_mark_buffer_dirty(c); 3192 btrfs_mark_buffer_dirty(c);
3142 3193
3143 old = root->node; 3194 old = root->node;
3144 tree_mod_log_set_root_pointer(root, c); 3195 tree_mod_log_set_root_pointer(root, c, log_removal);
3145 rcu_assign_pointer(root->node, c); 3196 rcu_assign_pointer(root->node, c);
3146 3197
3147 /* the super has an extra ref to root->node */ 3198 /* the super has an extra ref to root->node */
@@ -3222,8 +3273,17 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3222 c = path->nodes[level]; 3273 c = path->nodes[level];
3223 WARN_ON(btrfs_header_generation(c) != trans->transid); 3274 WARN_ON(btrfs_header_generation(c) != trans->transid);
3224 if (c == root->node) { 3275 if (c == root->node) {
3225 /* trying to split the root, lets make a new one */ 3276 /*
3226 ret = insert_new_root(trans, root, path, level + 1); 3277 * trying to split the root, lets make a new one
3278 *
3279 * tree mod log: We pass 0 as log_removal parameter to
3280 * insert_new_root, because that root buffer will be kept as a
3281 * normal node. We are going to log removal of half of the
3282 * elements below with tree_mod_log_eb_copy. We're holding a
3283 * tree lock on the buffer, which is why we cannot race with
3284 * other tree_mod_log users.
3285 */
3286 ret = insert_new_root(trans, root, path, level + 1, 0);
3227 if (ret) 3287 if (ret)
3228 return ret; 3288 return ret;
3229 } else { 3289 } else {
@@ -3677,7 +3737,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
3677 clean_tree_block(trans, root, right); 3737 clean_tree_block(trans, root, right);
3678 3738
3679 btrfs_item_key(right, &disk_key, 0); 3739 btrfs_item_key(right, &disk_key, 0);
3680 fixup_low_keys(trans, root, path, &disk_key, 1); 3740 fixup_low_keys(root, path, &disk_key, 1);
3681 3741
3682 /* then fixup the leaf pointer in the path */ 3742 /* then fixup the leaf pointer in the path */
3683 if (path->slots[0] < push_items) { 3743 if (path->slots[0] < push_items) {
@@ -3943,7 +4003,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
3943 } 4003 }
3944 4004
3945 if (!path->nodes[1]) { 4005 if (!path->nodes[1]) {
3946 ret = insert_new_root(trans, root, path, 1); 4006 ret = insert_new_root(trans, root, path, 1, 1);
3947 if (ret) 4007 if (ret)
3948 return ret; 4008 return ret;
3949 } 4009 }
@@ -4037,8 +4097,7 @@ again:
4037 path->nodes[0] = right; 4097 path->nodes[0] = right;
4038 path->slots[0] = 0; 4098 path->slots[0] = 0;
4039 if (path->slots[1] == 0) 4099 if (path->slots[1] == 0)
4040 fixup_low_keys(trans, root, path, 4100 fixup_low_keys(root, path, &disk_key, 1);
4041 &disk_key, 1);
4042 } 4101 }
4043 btrfs_mark_buffer_dirty(right); 4102 btrfs_mark_buffer_dirty(right);
4044 return ret; 4103 return ret;
@@ -4254,7 +4313,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4254 return ret; 4313 return ret;
4255 4314
4256 path->slots[0]++; 4315 path->slots[0]++;
4257 setup_items_for_insert(trans, root, path, new_key, &item_size, 4316 setup_items_for_insert(root, path, new_key, &item_size,
4258 item_size, item_size + 4317 item_size, item_size +
4259 sizeof(struct btrfs_item), 1); 4318 sizeof(struct btrfs_item), 1);
4260 leaf = path->nodes[0]; 4319 leaf = path->nodes[0];
@@ -4271,9 +4330,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4271 * off the end of the item or if we shift the item to chop bytes off 4330 * off the end of the item or if we shift the item to chop bytes off
4272 * the front. 4331 * the front.
4273 */ 4332 */
4274void btrfs_truncate_item(struct btrfs_trans_handle *trans, 4333void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
4275 struct btrfs_root *root,
4276 struct btrfs_path *path,
4277 u32 new_size, int from_end) 4334 u32 new_size, int from_end)
4278{ 4335{
4279 int slot; 4336 int slot;
@@ -4357,7 +4414,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
4357 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 4414 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4358 btrfs_set_item_key(leaf, &disk_key, slot); 4415 btrfs_set_item_key(leaf, &disk_key, slot);
4359 if (slot == 0) 4416 if (slot == 0)
4360 fixup_low_keys(trans, root, path, &disk_key, 1); 4417 fixup_low_keys(root, path, &disk_key, 1);
4361 } 4418 }
4362 4419
4363 item = btrfs_item_nr(leaf, slot); 4420 item = btrfs_item_nr(leaf, slot);
@@ -4373,8 +4430,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
4373/* 4430/*
4374 * make the item pointed to by the path bigger, data_size is the new size. 4431 * make the item pointed to by the path bigger, data_size is the new size.
4375 */ 4432 */
4376void btrfs_extend_item(struct btrfs_trans_handle *trans, 4433void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
4377 struct btrfs_root *root, struct btrfs_path *path,
4378 u32 data_size) 4434 u32 data_size)
4379{ 4435{
4380 int slot; 4436 int slot;
@@ -4444,8 +4500,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
4444 * to save stack depth by doing the bulk of the work in a function 4500 * to save stack depth by doing the bulk of the work in a function
4445 * that doesn't call btrfs_search_slot 4501 * that doesn't call btrfs_search_slot
4446 */ 4502 */
4447void setup_items_for_insert(struct btrfs_trans_handle *trans, 4503void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4448 struct btrfs_root *root, struct btrfs_path *path,
4449 struct btrfs_key *cpu_key, u32 *data_size, 4504 struct btrfs_key *cpu_key, u32 *data_size,
4450 u32 total_data, u32 total_size, int nr) 4505 u32 total_data, u32 total_size, int nr)
4451{ 4506{
@@ -4521,7 +4576,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
4521 4576
4522 if (slot == 0) { 4577 if (slot == 0) {
4523 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 4578 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
4524 fixup_low_keys(trans, root, path, &disk_key, 1); 4579 fixup_low_keys(root, path, &disk_key, 1);
4525 } 4580 }
4526 btrfs_unlock_up_safe(path, 1); 4581 btrfs_unlock_up_safe(path, 1);
4527 btrfs_mark_buffer_dirty(leaf); 4582 btrfs_mark_buffer_dirty(leaf);
@@ -4561,7 +4616,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4561 slot = path->slots[0]; 4616 slot = path->slots[0];
4562 BUG_ON(slot < 0); 4617 BUG_ON(slot < 0);
4563 4618
4564 setup_items_for_insert(trans, root, path, cpu_key, data_size, 4619 setup_items_for_insert(root, path, cpu_key, data_size,
4565 total_data, total_size, nr); 4620 total_data, total_size, nr);
4566 return 0; 4621 return 0;
4567} 4622}
@@ -4599,8 +4654,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
4599 * the tree should have been previously balanced so the deletion does not 4654 * the tree should have been previously balanced so the deletion does not
4600 * empty a node. 4655 * empty a node.
4601 */ 4656 */
4602static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 4657static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4603 struct btrfs_path *path, int level, int slot) 4658 int level, int slot)
4604{ 4659{
4605 struct extent_buffer *parent = path->nodes[level]; 4660 struct extent_buffer *parent = path->nodes[level];
4606 u32 nritems; 4661 u32 nritems;
@@ -4632,7 +4687,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4632 struct btrfs_disk_key disk_key; 4687 struct btrfs_disk_key disk_key;
4633 4688
4634 btrfs_node_key(parent, &disk_key, 0); 4689 btrfs_node_key(parent, &disk_key, 0);
4635 fixup_low_keys(trans, root, path, &disk_key, level + 1); 4690 fixup_low_keys(root, path, &disk_key, level + 1);
4636 } 4691 }
4637 btrfs_mark_buffer_dirty(parent); 4692 btrfs_mark_buffer_dirty(parent);
4638} 4693}
@@ -4653,7 +4708,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4653 struct extent_buffer *leaf) 4708 struct extent_buffer *leaf)
4654{ 4709{
4655 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 4710 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
4656 del_ptr(trans, root, path, 1, path->slots[1]); 4711 del_ptr(root, path, 1, path->slots[1]);
4657 4712
4658 /* 4713 /*
4659 * btrfs_free_extent is expensive, we want to make sure we 4714 * btrfs_free_extent is expensive, we want to make sure we
@@ -4734,7 +4789,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4734 struct btrfs_disk_key disk_key; 4789 struct btrfs_disk_key disk_key;
4735 4790
4736 btrfs_item_key(leaf, &disk_key, 0); 4791 btrfs_item_key(leaf, &disk_key, 0);
4737 fixup_low_keys(trans, root, path, &disk_key, 1); 4792 fixup_low_keys(root, path, &disk_key, 1);
4738 } 4793 }
4739 4794
4740 /* delete the leaf if it is mostly empty */ 4795 /* delete the leaf if it is mostly empty */
@@ -5454,139 +5509,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
5454 return btrfs_next_old_leaf(root, path, 0); 5509 return btrfs_next_old_leaf(root, path, 0);
5455} 5510}
5456 5511
5457/* Release the path up to but not including the given level */
5458static void btrfs_release_level(struct btrfs_path *path, int level)
5459{
5460 int i;
5461
5462 for (i = 0; i < level; i++) {
5463 path->slots[i] = 0;
5464 if (!path->nodes[i])
5465 continue;
5466 if (path->locks[i]) {
5467 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
5468 path->locks[i] = 0;
5469 }
5470 free_extent_buffer(path->nodes[i]);
5471 path->nodes[i] = NULL;
5472 }
5473}
5474
5475/*
5476 * This function assumes 2 things
5477 *
5478 * 1) You are using path->keep_locks
5479 * 2) You are not inserting items.
5480 *
5481 * If either of these are not true do not use this function. If you need a next
5482 * leaf with either of these not being true then this function can be easily
5483 * adapted to do that, but at the moment these are the limitations.
5484 */
5485int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
5486 struct btrfs_root *root, struct btrfs_path *path,
5487 int del)
5488{
5489 struct extent_buffer *b;
5490 struct btrfs_key key;
5491 u32 nritems;
5492 int level = 1;
5493 int slot;
5494 int ret = 1;
5495 int write_lock_level = BTRFS_MAX_LEVEL;
5496 int ins_len = del ? -1 : 0;
5497
5498 WARN_ON(!(path->keep_locks || path->really_keep_locks));
5499
5500 nritems = btrfs_header_nritems(path->nodes[0]);
5501 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
5502
5503 while (path->nodes[level]) {
5504 nritems = btrfs_header_nritems(path->nodes[level]);
5505 if (!(path->locks[level] & BTRFS_WRITE_LOCK)) {
5506search:
5507 btrfs_release_path(path);
5508 ret = btrfs_search_slot(trans, root, &key, path,
5509 ins_len, 1);
5510 if (ret < 0)
5511 goto out;
5512 level = 1;
5513 continue;
5514 }
5515
5516 if (path->slots[level] >= nritems - 1) {
5517 level++;
5518 continue;
5519 }
5520
5521 btrfs_release_level(path, level);
5522 break;
5523 }
5524
5525 if (!path->nodes[level]) {
5526 ret = 1;
5527 goto out;
5528 }
5529
5530 path->slots[level]++;
5531 b = path->nodes[level];
5532
5533 while (b) {
5534 level = btrfs_header_level(b);
5535
5536 if (!should_cow_block(trans, root, b))
5537 goto cow_done;
5538
5539 btrfs_set_path_blocking(path);
5540 ret = btrfs_cow_block(trans, root, b,
5541 path->nodes[level + 1],
5542 path->slots[level + 1], &b);
5543 if (ret)
5544 goto out;
5545cow_done:
5546 path->nodes[level] = b;
5547 btrfs_clear_path_blocking(path, NULL, 0);
5548 if (level != 0) {
5549 ret = setup_nodes_for_search(trans, root, path, b,
5550 level, ins_len,
5551 &write_lock_level);
5552 if (ret == -EAGAIN)
5553 goto search;
5554 if (ret)
5555 goto out;
5556
5557 b = path->nodes[level];
5558 slot = path->slots[level];
5559
5560 ret = read_block_for_search(trans, root, path,
5561 &b, level, slot, &key, 0);
5562 if (ret == -EAGAIN)
5563 goto search;
5564 if (ret)
5565 goto out;
5566 level = btrfs_header_level(b);
5567 if (!btrfs_try_tree_write_lock(b)) {
5568 btrfs_set_path_blocking(path);
5569 btrfs_tree_lock(b);
5570 btrfs_clear_path_blocking(path, b,
5571 BTRFS_WRITE_LOCK);
5572 }
5573 path->locks[level] = BTRFS_WRITE_LOCK;
5574 path->nodes[level] = b;
5575 path->slots[level] = 0;
5576 } else {
5577 path->slots[level] = 0;
5578 ret = 0;
5579 break;
5580 }
5581 }
5582
5583out:
5584 if (ret)
5585 btrfs_release_path(path);
5586
5587 return ret;
5588}
5589
5590int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, 5512int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5591 u64 time_seq) 5513 u64 time_seq)
5592{ 5514{
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0d82922179db..63c328a9ce95 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -340,6 +340,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
340 */ 340 */
341#define BTRFS_FS_STATE_ERROR 0 341#define BTRFS_FS_STATE_ERROR 0
342#define BTRFS_FS_STATE_REMOUNTING 1 342#define BTRFS_FS_STATE_REMOUNTING 1
343#define BTRFS_FS_STATE_TRANS_ABORTED 2
343 344
344/* Super block flags */ 345/* Super block flags */
345/* Errors detected */ 346/* Errors detected */
@@ -508,6 +509,7 @@ struct btrfs_super_block {
508 509
509#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) 510#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
510#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) 511#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
512#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
511 513
512#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 514#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
513#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 515#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
@@ -518,7 +520,8 @@ struct btrfs_super_block {
518 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ 520 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
519 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ 521 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
520 BTRFS_FEATURE_INCOMPAT_RAID56 | \ 522 BTRFS_FEATURE_INCOMPAT_RAID56 | \
521 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) 523 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
524 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
522 525
523/* 526/*
524 * A leaf is full of items. offset and size tell us where to find 527 * A leaf is full of items. offset and size tell us where to find
@@ -583,7 +586,6 @@ struct btrfs_path {
583 unsigned int skip_locking:1; 586 unsigned int skip_locking:1;
584 unsigned int leave_spinning:1; 587 unsigned int leave_spinning:1;
585 unsigned int search_commit_root:1; 588 unsigned int search_commit_root:1;
586 unsigned int really_keep_locks:1;
587}; 589};
588 590
589/* 591/*
@@ -1019,9 +1021,9 @@ struct btrfs_block_group_item {
1019 */ 1021 */
1020#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0) 1022#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
1021/* 1023/*
1022 * SCANNING is set during the initialization phase 1024 * RESCAN is set during the initialization phase
1023 */ 1025 */
1024#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1) 1026#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1)
1025/* 1027/*
1026 * Some qgroup entries are known to be out of date, 1028 * Some qgroup entries are known to be out of date,
1027 * either because the configuration has changed in a way that 1029 * either because the configuration has changed in a way that
@@ -1050,7 +1052,7 @@ struct btrfs_qgroup_status_item {
1050 * only used during scanning to record the progress 1052 * only used during scanning to record the progress
1051 * of the scan. It contains a logical address 1053 * of the scan. It contains a logical address
1052 */ 1054 */
1053 __le64 scan; 1055 __le64 rescan;
1054} __attribute__ ((__packed__)); 1056} __attribute__ ((__packed__));
1055 1057
1056struct btrfs_qgroup_info_item { 1058struct btrfs_qgroup_info_item {
@@ -1360,6 +1362,17 @@ struct btrfs_fs_info {
1360 wait_queue_head_t transaction_blocked_wait; 1362 wait_queue_head_t transaction_blocked_wait;
1361 wait_queue_head_t async_submit_wait; 1363 wait_queue_head_t async_submit_wait;
1362 1364
1365 /*
1366 * Used to protect the incompat_flags, compat_flags, compat_ro_flags
1367 * when they are updated.
1368 *
1369 * Because we do not clear the flags for ever, so we needn't use
1370 * the lock on the read side.
1371 *
1372 * We also needn't use the lock when we mount the fs, because
1373 * there is no other task which will update the flag.
1374 */
1375 spinlock_t super_lock;
1363 struct btrfs_super_block *super_copy; 1376 struct btrfs_super_block *super_copy;
1364 struct btrfs_super_block *super_for_commit; 1377 struct btrfs_super_block *super_for_commit;
1365 struct block_device *__bdev; 1378 struct block_device *__bdev;
@@ -1409,7 +1422,7 @@ struct btrfs_fs_info {
1409 1422
1410 /* this protects tree_mod_seq_list */ 1423 /* this protects tree_mod_seq_list */
1411 spinlock_t tree_mod_seq_lock; 1424 spinlock_t tree_mod_seq_lock;
1412 atomic_t tree_mod_seq; 1425 atomic64_t tree_mod_seq;
1413 struct list_head tree_mod_seq_list; 1426 struct list_head tree_mod_seq_list;
1414 struct seq_list tree_mod_seq_elem; 1427 struct seq_list tree_mod_seq_elem;
1415 1428
@@ -1581,12 +1594,20 @@ struct btrfs_fs_info {
1581 struct rb_root qgroup_tree; 1594 struct rb_root qgroup_tree;
1582 spinlock_t qgroup_lock; 1595 spinlock_t qgroup_lock;
1583 1596
1597 /* protect user change for quota operations */
1598 struct mutex qgroup_ioctl_lock;
1599
1584 /* list of dirty qgroups to be written at next commit */ 1600 /* list of dirty qgroups to be written at next commit */
1585 struct list_head dirty_qgroups; 1601 struct list_head dirty_qgroups;
1586 1602
1587 /* used by btrfs_qgroup_record_ref for an efficient tree traversal */ 1603 /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
1588 u64 qgroup_seq; 1604 u64 qgroup_seq;
1589 1605
1606 /* qgroup rescan items */
1607 struct mutex qgroup_rescan_lock; /* protects the progress item */
1608 struct btrfs_key qgroup_rescan_progress;
1609 struct btrfs_workers qgroup_rescan_workers;
1610
1590 /* filesystem state */ 1611 /* filesystem state */
1591 unsigned long fs_state; 1612 unsigned long fs_state;
1592 1613
@@ -1808,6 +1829,12 @@ struct btrfs_ioctl_defrag_range_args {
1808 */ 1829 */
1809#define BTRFS_EXTENT_ITEM_KEY 168 1830#define BTRFS_EXTENT_ITEM_KEY 168
1810 1831
1832/*
1833 * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
1834 * the length, so we save the level in key->offset instead of the length.
1835 */
1836#define BTRFS_METADATA_ITEM_KEY 169
1837
1811#define BTRFS_TREE_BLOCK_REF_KEY 176 1838#define BTRFS_TREE_BLOCK_REF_KEY 176
1812 1839
1813#define BTRFS_EXTENT_DATA_REF_KEY 178 1840#define BTRFS_EXTENT_DATA_REF_KEY 178
@@ -2766,8 +2793,10 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
2766 2793
2767static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 2794static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
2768{ 2795{
2769 int t = btrfs_super_csum_type(s); 2796 u16 t = btrfs_super_csum_type(s);
2770 BUG_ON(t >= ARRAY_SIZE(btrfs_csum_sizes)); 2797 /*
2798 * csum type is validated at mount time
2799 */
2771 return btrfs_csum_sizes[t]; 2800 return btrfs_csum_sizes[t];
2772} 2801}
2773 2802
@@ -2864,8 +2893,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
2864 version, 64); 2893 version, 64);
2865BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, 2894BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
2866 flags, 64); 2895 flags, 64);
2867BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item, 2896BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
2868 scan, 64); 2897 rescan, 64);
2869 2898
2870/* btrfs_qgroup_info_item */ 2899/* btrfs_qgroup_info_item */
2871BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, 2900BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
@@ -3005,7 +3034,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
3005int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 3034int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
3006int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 3035int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
3007 struct btrfs_root *root, u64 bytenr, 3036 struct btrfs_root *root, u64 bytenr,
3008 u64 num_bytes, u64 *refs, u64 *flags); 3037 u64 offset, int metadata, u64 *refs, u64 *flags);
3009int btrfs_pin_extent(struct btrfs_root *root, 3038int btrfs_pin_extent(struct btrfs_root *root,
3010 u64 bytenr, u64 num, int reserved); 3039 u64 bytenr, u64 num, int reserved);
3011int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, 3040int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
@@ -3017,8 +3046,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
3017 struct btrfs_fs_info *info, 3046 struct btrfs_fs_info *info,
3018 u64 bytenr); 3047 u64 bytenr);
3019void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 3048void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
3020u64 btrfs_find_block_group(struct btrfs_root *root,
3021 u64 search_start, u64 search_hint, int owner);
3022struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 3049struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3023 struct btrfs_root *root, u32 blocksize, 3050 struct btrfs_root *root, u32 blocksize,
3024 u64 parent, u64 root_objectid, 3051 u64 parent, u64 root_objectid,
@@ -3028,10 +3055,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
3028 struct btrfs_root *root, 3055 struct btrfs_root *root,
3029 struct extent_buffer *buf, 3056 struct extent_buffer *buf,
3030 u64 parent, int last_ref); 3057 u64 parent, int last_ref);
3031struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3032 struct btrfs_root *root,
3033 u64 bytenr, u32 blocksize,
3034 int level);
3035int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 3058int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
3036 struct btrfs_root *root, 3059 struct btrfs_root *root,
3037 u64 root_objectid, u64 owner, 3060 u64 root_objectid, u64 owner,
@@ -3044,7 +3067,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3044 struct btrfs_root *root, 3067 struct btrfs_root *root,
3045 u64 num_bytes, u64 min_alloc_size, 3068 u64 num_bytes, u64 min_alloc_size,
3046 u64 empty_size, u64 hint_byte, 3069 u64 empty_size, u64 hint_byte,
3047 struct btrfs_key *ins, u64 data); 3070 struct btrfs_key *ins, int is_data);
3048int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3071int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3049 struct extent_buffer *buf, int full_backref, int for_cow); 3072 struct extent_buffer *buf, int full_backref, int for_cow);
3050int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3073int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3084,7 +3107,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
3084 struct btrfs_root *root, u64 group_start); 3107 struct btrfs_root *root, u64 group_start);
3085void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, 3108void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
3086 struct btrfs_root *root); 3109 struct btrfs_root *root);
3087u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
3088u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); 3110u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
3089void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 3111void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
3090 3112
@@ -3161,8 +3183,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
3161int btrfs_previous_item(struct btrfs_root *root, 3183int btrfs_previous_item(struct btrfs_root *root,
3162 struct btrfs_path *path, u64 min_objectid, 3184 struct btrfs_path *path, u64 min_objectid,
3163 int type); 3185 int type);
3164void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 3186void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
3165 struct btrfs_root *root, struct btrfs_path *path,
3166 struct btrfs_key *new_key); 3187 struct btrfs_key *new_key);
3167struct extent_buffer *btrfs_root_node(struct btrfs_root *root); 3188struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
3168struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); 3189struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
@@ -3198,12 +3219,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
3198 struct extent_buffer **cow_ret, u64 new_root_objectid); 3219 struct extent_buffer **cow_ret, u64 new_root_objectid);
3199int btrfs_block_can_be_shared(struct btrfs_root *root, 3220int btrfs_block_can_be_shared(struct btrfs_root *root,
3200 struct extent_buffer *buf); 3221 struct extent_buffer *buf);
3201void btrfs_extend_item(struct btrfs_trans_handle *trans, 3222void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
3202 struct btrfs_root *root, struct btrfs_path *path,
3203 u32 data_size); 3223 u32 data_size);
3204void btrfs_truncate_item(struct btrfs_trans_handle *trans, 3224void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
3205 struct btrfs_root *root,
3206 struct btrfs_path *path,
3207 u32 new_size, int from_end); 3225 u32 new_size, int from_end);
3208int btrfs_split_item(struct btrfs_trans_handle *trans, 3226int btrfs_split_item(struct btrfs_trans_handle *trans,
3209 struct btrfs_root *root, 3227 struct btrfs_root *root,
@@ -3243,8 +3261,7 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
3243 return btrfs_del_items(trans, root, path, path->slots[0], 1); 3261 return btrfs_del_items(trans, root, path, path->slots[0], 1);
3244} 3262}
3245 3263
3246void setup_items_for_insert(struct btrfs_trans_handle *trans, 3264void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
3247 struct btrfs_root *root, struct btrfs_path *path,
3248 struct btrfs_key *cpu_key, u32 *data_size, 3265 struct btrfs_key *cpu_key, u32 *data_size,
3249 u32 total_data, u32 total_size, int nr); 3266 u32 total_data, u32 total_size, int nr);
3250int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 3267int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
@@ -3264,9 +3281,6 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
3264} 3281}
3265 3282
3266int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 3283int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
3267int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
3268 struct btrfs_root *root, struct btrfs_path *path,
3269 int del);
3270int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, 3284int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
3271 u64 time_seq); 3285 u64 time_seq);
3272static inline int btrfs_next_old_item(struct btrfs_root *root, 3286static inline int btrfs_next_old_item(struct btrfs_root *root,
@@ -3281,7 +3295,6 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
3281{ 3295{
3282 return btrfs_next_old_item(root, p, 0); 3296 return btrfs_next_old_item(root, p, 0);
3283} 3297}
3284int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
3285int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 3298int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
3286int __must_check btrfs_drop_snapshot(struct btrfs_root *root, 3299int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
3287 struct btrfs_block_rsv *block_rsv, 3300 struct btrfs_block_rsv *block_rsv,
@@ -3318,10 +3331,7 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
3318 struct seq_list *elem); 3331 struct seq_list *elem);
3319void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 3332void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
3320 struct seq_list *elem); 3333 struct seq_list *elem);
3321static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) 3334u64 btrfs_tree_mod_seq_prev(u64 seq);
3322{
3323 return atomic_inc_return(&fs_info->tree_mod_seq);
3324}
3325int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq); 3335int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
3326 3336
3327/* root-item.c */ 3337/* root-item.c */
@@ -3345,9 +3355,8 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
3345 struct btrfs_root *root, 3355 struct btrfs_root *root,
3346 struct btrfs_key *key, 3356 struct btrfs_key *key,
3347 struct btrfs_root_item *item); 3357 struct btrfs_root_item *item);
3348void btrfs_read_root_item(struct btrfs_root *root, 3358void btrfs_read_root_item(struct extent_buffer *eb, int slot,
3349 struct extent_buffer *eb, int slot, 3359 struct btrfs_root_item *item);
3350 struct btrfs_root_item *item);
3351int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct 3360int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
3352 btrfs_root_item *item, struct btrfs_key *key); 3361 btrfs_root_item *item, struct btrfs_key *key);
3353int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); 3362int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
@@ -3380,9 +3389,6 @@ struct btrfs_dir_item *
3380btrfs_search_dir_index_item(struct btrfs_root *root, 3389btrfs_search_dir_index_item(struct btrfs_root *root,
3381 struct btrfs_path *path, u64 dirid, 3390 struct btrfs_path *path, u64 dirid,
3382 const char *name, int name_len); 3391 const char *name, int name_len);
3383struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
3384 struct btrfs_path *path,
3385 const char *name, int name_len);
3386int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, 3392int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
3387 struct btrfs_root *root, 3393 struct btrfs_root *root,
3388 struct btrfs_path *path, 3394 struct btrfs_path *path,
@@ -3460,16 +3466,11 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
3460 struct btrfs_root *root, 3466 struct btrfs_root *root,
3461 struct btrfs_path *path, u64 objectid, 3467 struct btrfs_path *path, u64 objectid,
3462 u64 bytenr, int mod); 3468 u64 bytenr, int mod);
3463u64 btrfs_file_extent_length(struct btrfs_path *path);
3464int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 3469int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
3465 struct btrfs_root *root, 3470 struct btrfs_root *root,
3466 struct btrfs_ordered_sum *sums); 3471 struct btrfs_ordered_sum *sums);
3467int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 3472int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
3468 struct bio *bio, u64 file_start, int contig); 3473 struct bio *bio, u64 file_start, int contig);
3469struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
3470 struct btrfs_root *root,
3471 struct btrfs_path *path,
3472 u64 bytenr, int cow);
3473int btrfs_csum_truncate(struct btrfs_trans_handle *trans, 3474int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
3474 struct btrfs_root *root, struct btrfs_path *path, 3475 struct btrfs_root *root, struct btrfs_path *path,
3475 u64 isize); 3476 u64 isize);
@@ -3531,8 +3532,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3531int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 3532int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
3532int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 3533int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
3533 struct extent_state **cached_state); 3534 struct extent_state **cached_state);
3534int btrfs_writepages(struct address_space *mapping,
3535 struct writeback_control *wbc);
3536int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 3535int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
3537 struct btrfs_root *new_root, u64 new_dirid); 3536 struct btrfs_root *new_root, u64 new_dirid);
3538int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, 3537int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
@@ -3542,7 +3541,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
3542int btrfs_readpage(struct file *file, struct page *page); 3541int btrfs_readpage(struct file *file, struct page *page);
3543void btrfs_evict_inode(struct inode *inode); 3542void btrfs_evict_inode(struct inode *inode);
3544int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 3543int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
3545int btrfs_dirty_inode(struct inode *inode);
3546struct inode *btrfs_alloc_inode(struct super_block *sb); 3544struct inode *btrfs_alloc_inode(struct super_block *sb);
3547void btrfs_destroy_inode(struct inode *inode); 3545void btrfs_destroy_inode(struct inode *inode);
3548int btrfs_drop_inode(struct inode *inode); 3546int btrfs_drop_inode(struct inode *inode);
@@ -3560,7 +3558,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
3560int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, 3558int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3561 struct btrfs_root *root, struct inode *inode); 3559 struct btrfs_root *root, struct inode *inode);
3562int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); 3560int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
3563int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
3564int btrfs_orphan_cleanup(struct btrfs_root *root); 3561int btrfs_orphan_cleanup(struct btrfs_root *root);
3565void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, 3562void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
3566 struct btrfs_root *root); 3563 struct btrfs_root *root);
@@ -3611,7 +3608,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
3611int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 3608int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
3612 struct inode *inode, u64 start, u64 end); 3609 struct inode *inode, u64 start, u64 end);
3613int btrfs_release_file(struct inode *inode, struct file *file); 3610int btrfs_release_file(struct inode *inode, struct file *file);
3614void btrfs_drop_pages(struct page **pages, size_t num_pages);
3615int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, 3611int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
3616 struct page **pages, size_t num_pages, 3612 struct page **pages, size_t num_pages,
3617 loff_t pos, size_t write_bytes, 3613 loff_t pos, size_t write_bytes,
@@ -3634,14 +3630,31 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
3634 3630
3635#ifdef CONFIG_PRINTK 3631#ifdef CONFIG_PRINTK
3636__printf(2, 3) 3632__printf(2, 3)
3637void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...); 3633void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
3638#else 3634#else
3639static inline __printf(2, 3) 3635static inline __printf(2, 3)
3640void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) 3636void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
3641{ 3637{
3642} 3638}
3643#endif 3639#endif
3644 3640
3641#define btrfs_emerg(fs_info, fmt, args...) \
3642 btrfs_printk(fs_info, KERN_EMERG fmt, ##args)
3643#define btrfs_alert(fs_info, fmt, args...) \
3644 btrfs_printk(fs_info, KERN_ALERT fmt, ##args)
3645#define btrfs_crit(fs_info, fmt, args...) \
3646 btrfs_printk(fs_info, KERN_CRIT fmt, ##args)
3647#define btrfs_err(fs_info, fmt, args...) \
3648 btrfs_printk(fs_info, KERN_ERR fmt, ##args)
3649#define btrfs_warn(fs_info, fmt, args...) \
3650 btrfs_printk(fs_info, KERN_WARNING fmt, ##args)
3651#define btrfs_notice(fs_info, fmt, args...) \
3652 btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
3653#define btrfs_info(fs_info, fmt, args...) \
3654 btrfs_printk(fs_info, KERN_INFO fmt, ##args)
3655#define btrfs_debug(fs_info, fmt, args...) \
3656 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
3657
3645__printf(5, 6) 3658__printf(5, 6)
3646void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, 3659void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
3647 unsigned int line, int errno, const char *fmt, ...); 3660 unsigned int line, int errno, const char *fmt, ...);
@@ -3663,11 +3676,28 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
3663 disk_super = fs_info->super_copy; 3676 disk_super = fs_info->super_copy;
3664 features = btrfs_super_incompat_flags(disk_super); 3677 features = btrfs_super_incompat_flags(disk_super);
3665 if (!(features & flag)) { 3678 if (!(features & flag)) {
3666 features |= flag; 3679 spin_lock(&fs_info->super_lock);
3667 btrfs_set_super_incompat_flags(disk_super, features); 3680 features = btrfs_super_incompat_flags(disk_super);
3681 if (!(features & flag)) {
3682 features |= flag;
3683 btrfs_set_super_incompat_flags(disk_super, features);
3684 printk(KERN_INFO "btrfs: setting %llu feature flag\n",
3685 flag);
3686 }
3687 spin_unlock(&fs_info->super_lock);
3668 } 3688 }
3669} 3689}
3670 3690
3691#define btrfs_fs_incompat(fs_info, opt) \
3692 __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
3693
3694static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
3695{
3696 struct btrfs_super_block *disk_super;
3697 disk_super = fs_info->super_copy;
3698 return !!(btrfs_super_incompat_flags(disk_super) & flag);
3699}
3700
3671/* 3701/*
3672 * Call btrfs_abort_transaction as early as possible when an error condition is 3702 * Call btrfs_abort_transaction as early as possible when an error condition is
3673 * detected, that way the exact line number is reported. 3703 * detected, that way the exact line number is reported.
@@ -3753,7 +3783,6 @@ void btrfs_scrub_continue_super(struct btrfs_root *root);
3753int btrfs_scrub_cancel(struct btrfs_fs_info *info); 3783int btrfs_scrub_cancel(struct btrfs_fs_info *info);
3754int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, 3784int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
3755 struct btrfs_device *dev); 3785 struct btrfs_device *dev);
3756int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
3757int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, 3786int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3758 struct btrfs_scrub_progress *progress); 3787 struct btrfs_scrub_progress *progress);
3759 3788
@@ -3784,7 +3813,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
3784 struct btrfs_fs_info *fs_info); 3813 struct btrfs_fs_info *fs_info);
3785int btrfs_quota_disable(struct btrfs_trans_handle *trans, 3814int btrfs_quota_disable(struct btrfs_trans_handle *trans,
3786 struct btrfs_fs_info *fs_info); 3815 struct btrfs_fs_info *fs_info);
3787int btrfs_quota_rescan(struct btrfs_fs_info *fs_info); 3816int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
3788int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 3817int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
3789 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 3818 struct btrfs_fs_info *fs_info, u64 src, u64 dst);
3790int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 3819int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 14fce27b4780..f26f38ccd194 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -202,7 +202,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
202 spin_unlock(&root->lock); 202 spin_unlock(&root->lock);
203} 203}
204 204
205struct btrfs_delayed_node *btrfs_first_delayed_node( 205static struct btrfs_delayed_node *btrfs_first_delayed_node(
206 struct btrfs_delayed_root *delayed_root) 206 struct btrfs_delayed_root *delayed_root)
207{ 207{
208 struct list_head *p; 208 struct list_head *p;
@@ -221,7 +221,7 @@ out:
221 return node; 221 return node;
222} 222}
223 223
224struct btrfs_delayed_node *btrfs_next_delayed_node( 224static struct btrfs_delayed_node *btrfs_next_delayed_node(
225 struct btrfs_delayed_node *node) 225 struct btrfs_delayed_node *node)
226{ 226{
227 struct btrfs_delayed_root *delayed_root; 227 struct btrfs_delayed_root *delayed_root;
@@ -282,7 +282,7 @@ static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
282 __btrfs_release_delayed_node(node, 0); 282 __btrfs_release_delayed_node(node, 0);
283} 283}
284 284
285struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( 285static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
286 struct btrfs_delayed_root *delayed_root) 286 struct btrfs_delayed_root *delayed_root)
287{ 287{
288 struct list_head *p; 288 struct list_head *p;
@@ -308,7 +308,7 @@ static inline void btrfs_release_prepared_delayed_node(
308 __btrfs_release_delayed_node(node, 1); 308 __btrfs_release_delayed_node(node, 1);
309} 309}
310 310
311struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) 311static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
312{ 312{
313 struct btrfs_delayed_item *item; 313 struct btrfs_delayed_item *item;
314 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); 314 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
@@ -383,7 +383,7 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
383 return NULL; 383 return NULL;
384} 384}
385 385
386struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( 386static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
387 struct btrfs_delayed_node *delayed_node, 387 struct btrfs_delayed_node *delayed_node,
388 struct btrfs_key *key) 388 struct btrfs_key *key)
389{ 389{
@@ -394,45 +394,6 @@ struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
394 return item; 394 return item;
395} 395}
396 396
397struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
398 struct btrfs_delayed_node *delayed_node,
399 struct btrfs_key *key)
400{
401 struct btrfs_delayed_item *item;
402
403 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
404 NULL, NULL);
405 return item;
406}
407
408struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
409 struct btrfs_delayed_node *delayed_node,
410 struct btrfs_key *key)
411{
412 struct btrfs_delayed_item *item, *next;
413
414 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
415 NULL, &next);
416 if (!item)
417 item = next;
418
419 return item;
420}
421
422struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
423 struct btrfs_delayed_node *delayed_node,
424 struct btrfs_key *key)
425{
426 struct btrfs_delayed_item *item, *next;
427
428 item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
429 NULL, &next);
430 if (!item)
431 item = next;
432
433 return item;
434}
435
436static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, 397static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
437 struct btrfs_delayed_item *ins, 398 struct btrfs_delayed_item *ins,
438 int action) 399 int action)
@@ -535,7 +496,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
535 } 496 }
536} 497}
537 498
538struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item( 499static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
539 struct btrfs_delayed_node *delayed_node) 500 struct btrfs_delayed_node *delayed_node)
540{ 501{
541 struct rb_node *p; 502 struct rb_node *p;
@@ -548,7 +509,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
548 return item; 509 return item;
549} 510}
550 511
551struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item( 512static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
552 struct btrfs_delayed_node *delayed_node) 513 struct btrfs_delayed_node *delayed_node)
553{ 514{
554 struct rb_node *p; 515 struct rb_node *p;
@@ -561,7 +522,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
561 return item; 522 return item;
562} 523}
563 524
564struct btrfs_delayed_item *__btrfs_next_delayed_item( 525static struct btrfs_delayed_item *__btrfs_next_delayed_item(
565 struct btrfs_delayed_item *item) 526 struct btrfs_delayed_item *item)
566{ 527{
567 struct rb_node *p; 528 struct rb_node *p;
@@ -766,10 +727,9 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
766 * This helper will insert some continuous items into the same leaf according 727 * This helper will insert some continuous items into the same leaf according
767 * to the free space of the leaf. 728 * to the free space of the leaf.
768 */ 729 */
769static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, 730static int btrfs_batch_insert_items(struct btrfs_root *root,
770 struct btrfs_root *root, 731 struct btrfs_path *path,
771 struct btrfs_path *path, 732 struct btrfs_delayed_item *item)
772 struct btrfs_delayed_item *item)
773{ 733{
774 struct btrfs_delayed_item *curr, *next; 734 struct btrfs_delayed_item *curr, *next;
775 int free_space; 735 int free_space;
@@ -848,7 +808,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
848 btrfs_clear_path_blocking(path, NULL, 0); 808 btrfs_clear_path_blocking(path, NULL, 0);
849 809
850 /* insert the keys of the items */ 810 /* insert the keys of the items */
851 setup_items_for_insert(trans, root, path, keys, data_size, 811 setup_items_for_insert(root, path, keys, data_size,
852 total_data_size, total_size, nitems); 812 total_data_size, total_size, nitems);
853 813
854 /* insert the dir index items */ 814 /* insert the dir index items */
@@ -932,7 +892,7 @@ do_again:
932 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) { 892 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
933 /* insert the continuous items into the same leaf */ 893 /* insert the continuous items into the same leaf */
934 path->slots[0]++; 894 path->slots[0]++;
935 btrfs_batch_insert_items(trans, root, path, curr); 895 btrfs_batch_insert_items(root, path, curr);
936 } 896 }
937 btrfs_release_delayed_item(prev); 897 btrfs_release_delayed_item(prev);
938 btrfs_mark_buffer_dirty(path->nodes[0]); 898 btrfs_mark_buffer_dirty(path->nodes[0]);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index b7a0641ead77..c219463fb1fd 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -40,16 +40,19 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
40 * compare two delayed tree backrefs with same bytenr and type 40 * compare two delayed tree backrefs with same bytenr and type
41 */ 41 */
42static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, 42static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
43 struct btrfs_delayed_tree_ref *ref1) 43 struct btrfs_delayed_tree_ref *ref1, int type)
44{ 44{
45 if (ref1->root < ref2->root) 45 if (type == BTRFS_TREE_BLOCK_REF_KEY) {
46 return -1; 46 if (ref1->root < ref2->root)
47 if (ref1->root > ref2->root) 47 return -1;
48 return 1; 48 if (ref1->root > ref2->root)
49 if (ref1->parent < ref2->parent) 49 return 1;
50 return -1; 50 } else {
51 if (ref1->parent > ref2->parent) 51 if (ref1->parent < ref2->parent)
52 return 1; 52 return -1;
53 if (ref1->parent > ref2->parent)
54 return 1;
55 }
53 return 0; 56 return 0;
54} 57}
55 58
@@ -113,7 +116,8 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
113 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || 116 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
114 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) { 117 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
115 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2), 118 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
116 btrfs_delayed_node_to_tree_ref(ref1)); 119 btrfs_delayed_node_to_tree_ref(ref1),
120 ref1->type);
117 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY || 121 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
118 ref1->type == BTRFS_SHARED_DATA_REF_KEY) { 122 ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
119 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2), 123 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
@@ -357,8 +361,10 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
357 elem = list_first_entry(&fs_info->tree_mod_seq_list, 361 elem = list_first_entry(&fs_info->tree_mod_seq_list,
358 struct seq_list, list); 362 struct seq_list, list);
359 if (seq >= elem->seq) { 363 if (seq >= elem->seq) {
360 pr_debug("holding back delayed_ref %llu, lowest is " 364 pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n",
361 "%llu (%p)\n", seq, elem->seq, delayed_refs); 365 (u32)(seq >> 32), (u32)seq,
366 (u32)(elem->seq >> 32), (u32)elem->seq,
367 delayed_refs);
362 ret = 1; 368 ret = 1;
363 } 369 }
364 } 370 }
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 502c2158167c..79e594e341c7 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,6 +21,10 @@
21#include "hash.h" 21#include "hash.h"
22#include "transaction.h" 22#include "transaction.h"
23 23
24static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
25 struct btrfs_path *path,
26 const char *name, int name_len);
27
24/* 28/*
25 * insert a name into a directory, doing overflow properly if there is a hash 29 * insert a name into a directory, doing overflow properly if there is a hash
26 * collision. data_size indicates how big the item inserted should be. On 30 * collision. data_size indicates how big the item inserted should be. On
@@ -49,7 +53,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
49 di = btrfs_match_dir_item_name(root, path, name, name_len); 53 di = btrfs_match_dir_item_name(root, path, name, name_len);
50 if (di) 54 if (di)
51 return ERR_PTR(-EEXIST); 55 return ERR_PTR(-EEXIST);
52 btrfs_extend_item(trans, root, path, data_size); 56 btrfs_extend_item(root, path, data_size);
53 } else if (ret < 0) 57 } else if (ret < 0)
54 return ERR_PTR(ret); 58 return ERR_PTR(ret);
55 WARN_ON(ret > 0); 59 WARN_ON(ret > 0);
@@ -379,7 +383,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
379 * this walks through all the entries in a dir item and finds one 383 * this walks through all the entries in a dir item and finds one
380 * for a specific name. 384 * for a specific name.
381 */ 385 */
382struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, 386static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
383 struct btrfs_path *path, 387 struct btrfs_path *path,
384 const char *name, int name_len) 388 const char *name, int name_len)
385{ 389{
@@ -442,8 +446,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
442 start = btrfs_item_ptr_offset(leaf, path->slots[0]); 446 start = btrfs_item_ptr_offset(leaf, path->slots[0]);
443 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 447 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
444 item_len - (ptr + sub_item_len - start)); 448 item_len - (ptr + sub_item_len - start));
445 btrfs_truncate_item(trans, root, path, 449 btrfs_truncate_item(root, path, item_len - sub_item_len, 1);
446 item_len - sub_item_len, 1);
447 } 450 }
448 return ret; 451 return ret;
449} 452}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7d84651e850b..4e9ebe1f1827 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -30,6 +30,7 @@
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h> 31#include <linux/migrate.h>
32#include <linux/ratelimit.h> 32#include <linux/ratelimit.h>
33#include <linux/uuid.h>
33#include <asm/unaligned.h> 34#include <asm/unaligned.h>
34#include "compat.h" 35#include "compat.h"
35#include "ctree.h" 36#include "ctree.h"
@@ -69,6 +70,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
69 int mark); 70 int mark);
70static int btrfs_destroy_pinned_extent(struct btrfs_root *root, 71static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
71 struct extent_io_tree *pinned_extents); 72 struct extent_io_tree *pinned_extents);
73static int btrfs_cleanup_transaction(struct btrfs_root *root);
74static void btrfs_error_commit_super(struct btrfs_root *root);
72 75
73/* 76/*
74 * end_io_wq structs are used to do processing in task context when an IO is 77 * end_io_wq structs are used to do processing in task context when an IO is
@@ -222,7 +225,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
222 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 225 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
223 226
224 write_lock(&em_tree->lock); 227 write_lock(&em_tree->lock);
225 ret = add_extent_mapping(em_tree, em); 228 ret = add_extent_mapping(em_tree, em, 0);
226 if (ret == -EEXIST) { 229 if (ret == -EEXIST) {
227 free_extent_map(em); 230 free_extent_map(em);
228 em = lookup_extent_mapping(em_tree, start, len); 231 em = lookup_extent_mapping(em_tree, start, len);
@@ -238,7 +241,7 @@ out:
238 return em; 241 return em;
239} 242}
240 243
241u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) 244u32 btrfs_csum_data(char *data, u32 seed, size_t len)
242{ 245{
243 return crc32c(seed, data, len); 246 return crc32c(seed, data, len);
244} 247}
@@ -274,7 +277,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
274 if (err) 277 if (err)
275 return 1; 278 return 1;
276 cur_len = min(len, map_len - (offset - map_start)); 279 cur_len = min(len, map_len - (offset - map_start));
277 crc = btrfs_csum_data(root, kaddr + offset - map_start, 280 crc = btrfs_csum_data(kaddr + offset - map_start,
278 crc, cur_len); 281 crc, cur_len);
279 len -= cur_len; 282 len -= cur_len;
280 offset += cur_len; 283 offset += cur_len;
@@ -354,6 +357,49 @@ out:
354} 357}
355 358
356/* 359/*
360 * Return 0 if the superblock checksum type matches the checksum value of that
361 * algorithm. Pass the raw disk superblock data.
362 */
363static int btrfs_check_super_csum(char *raw_disk_sb)
364{
365 struct btrfs_super_block *disk_sb =
366 (struct btrfs_super_block *)raw_disk_sb;
367 u16 csum_type = btrfs_super_csum_type(disk_sb);
368 int ret = 0;
369
370 if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
371 u32 crc = ~(u32)0;
372 const int csum_size = sizeof(crc);
373 char result[csum_size];
374
375 /*
376 * The super_block structure does not span the whole
377 * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
378 * is filled with zeros and is included in the checkum.
379 */
380 crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
381 crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
382 btrfs_csum_final(crc, result);
383
384 if (memcmp(raw_disk_sb, result, csum_size))
385 ret = 1;
386
387 if (ret && btrfs_super_generation(disk_sb) < 10) {
388 printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
389 ret = 0;
390 }
391 }
392
393 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
394 printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
395 csum_type);
396 ret = 1;
397 }
398
399 return ret;
400}
401
402/*
357 * helper to read a given tree block, doing retries as required when 403 * helper to read a given tree block, doing retries as required when
358 * the checksums don't match and we have alternate mirrors to try. 404 * the checksums don't match and we have alternate mirrors to try.
359 */ 405 */
@@ -530,41 +576,6 @@ static noinline int check_leaf(struct btrfs_root *root,
530 return 0; 576 return 0;
531} 577}
532 578
533struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
534 struct page *page, int max_walk)
535{
536 struct extent_buffer *eb;
537 u64 start = page_offset(page);
538 u64 target = start;
539 u64 min_start;
540
541 if (start < max_walk)
542 min_start = 0;
543 else
544 min_start = start - max_walk;
545
546 while (start >= min_start) {
547 eb = find_extent_buffer(tree, start, 0);
548 if (eb) {
549 /*
550 * we found an extent buffer and it contains our page
551 * horray!
552 */
553 if (eb->start <= target &&
554 eb->start + eb->len > target)
555 return eb;
556
557 /* we found an extent buffer that wasn't for us */
558 free_extent_buffer(eb);
559 return NULL;
560 }
561 if (start == 0)
562 break;
563 start -= PAGE_CACHE_SIZE;
564 }
565 return NULL;
566}
567
568static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 579static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
569 struct extent_state *state, int mirror) 580 struct extent_state *state, int mirror)
570{ 581{
@@ -613,6 +624,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
613 goto err; 624 goto err;
614 } 625 }
615 found_level = btrfs_header_level(eb); 626 found_level = btrfs_header_level(eb);
627 if (found_level >= BTRFS_MAX_LEVEL) {
628 btrfs_info(root->fs_info, "bad tree block level %d\n",
629 (int)btrfs_header_level(eb));
630 ret = -EIO;
631 goto err;
632 }
616 633
617 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), 634 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
618 eb, found_level); 635 eb, found_level);
@@ -636,10 +653,9 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
636 if (!ret) 653 if (!ret)
637 set_extent_buffer_uptodate(eb); 654 set_extent_buffer_uptodate(eb);
638err: 655err:
639 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { 656 if (reads_done &&
640 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); 657 test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
641 btree_readahead_hook(root, eb, eb->start, ret); 658 btree_readahead_hook(root, eb, eb->start, ret);
642 }
643 659
644 if (ret) { 660 if (ret) {
645 /* 661 /*
@@ -993,14 +1009,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
993{ 1009{
994 if (PageWriteback(page) || PageDirty(page)) 1010 if (PageWriteback(page) || PageDirty(page))
995 return 0; 1011 return 0;
996 /*
997 * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
998 * slab allocation from alloc_extent_state down the callchain where
999 * it'd hit a BUG_ON as those flags are not allowed.
1000 */
1001 gfp_flags &= ~GFP_SLAB_BUG_MASK;
1002 1012
1003 return try_release_extent_buffer(page, gfp_flags); 1013 return try_release_extent_buffer(page);
1004} 1014}
1005 1015
1006static void btree_invalidatepage(struct page *page, unsigned long offset) 1016static void btree_invalidatepage(struct page *page, unsigned long offset)
@@ -1275,6 +1285,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1275 struct btrfs_key key; 1285 struct btrfs_key key;
1276 int ret = 0; 1286 int ret = 0;
1277 u64 bytenr; 1287 u64 bytenr;
1288 uuid_le uuid;
1278 1289
1279 root = btrfs_alloc_root(fs_info); 1290 root = btrfs_alloc_root(fs_info);
1280 if (!root) 1291 if (!root)
@@ -1291,6 +1302,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1291 0, objectid, NULL, 0, 0, 0); 1302 0, objectid, NULL, 0, 0, 0);
1292 if (IS_ERR(leaf)) { 1303 if (IS_ERR(leaf)) {
1293 ret = PTR_ERR(leaf); 1304 ret = PTR_ERR(leaf);
1305 leaf = NULL;
1294 goto fail; 1306 goto fail;
1295 } 1307 }
1296 1308
@@ -1323,6 +1335,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1323 btrfs_set_root_used(&root->root_item, leaf->len); 1335 btrfs_set_root_used(&root->root_item, leaf->len);
1324 btrfs_set_root_last_snapshot(&root->root_item, 0); 1336 btrfs_set_root_last_snapshot(&root->root_item, 0);
1325 btrfs_set_root_dirid(&root->root_item, 0); 1337 btrfs_set_root_dirid(&root->root_item, 0);
1338 uuid_le_gen(&uuid);
1339 memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
1326 root->root_item.drop_level = 0; 1340 root->root_item.drop_level = 0;
1327 1341
1328 key.objectid = objectid; 1342 key.objectid = objectid;
@@ -1334,11 +1348,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1334 1348
1335 btrfs_tree_unlock(leaf); 1349 btrfs_tree_unlock(leaf);
1336 1350
1351 return root;
1352
1337fail: 1353fail:
1338 if (ret) 1354 if (leaf) {
1339 return ERR_PTR(ret); 1355 btrfs_tree_unlock(leaf);
1356 free_extent_buffer(leaf);
1357 }
1358 kfree(root);
1340 1359
1341 return root; 1360 return ERR_PTR(ret);
1342} 1361}
1343 1362
1344static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, 1363static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
@@ -1470,7 +1489,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1470 if (ret == 0) { 1489 if (ret == 0) {
1471 l = path->nodes[0]; 1490 l = path->nodes[0];
1472 slot = path->slots[0]; 1491 slot = path->slots[0];
1473 btrfs_read_root_item(tree_root, l, slot, &root->root_item); 1492 btrfs_read_root_item(l, slot, &root->root_item);
1474 memcpy(&root->root_key, location, sizeof(*location)); 1493 memcpy(&root->root_key, location, sizeof(*location));
1475 } 1494 }
1476 btrfs_free_path(path); 1495 btrfs_free_path(path);
@@ -1485,6 +1504,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1485 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1504 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1486 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1505 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1487 blocksize, generation); 1506 blocksize, generation);
1507 if (!root->node || !extent_buffer_uptodate(root->node)) {
1508 ret = (!root->node) ? -ENOMEM : -EIO;
1509
1510 free_extent_buffer(root->node);
1511 kfree(root);
1512 return ERR_PTR(ret);
1513 }
1514
1488 root->commit_root = btrfs_root_node(root); 1515 root->commit_root = btrfs_root_node(root);
1489 BUG_ON(!root->node); /* -ENOMEM */ 1516 BUG_ON(!root->node); /* -ENOMEM */
1490out: 1517out:
@@ -1652,15 +1679,20 @@ static int cleaner_kthread(void *arg)
1652 struct btrfs_root *root = arg; 1679 struct btrfs_root *root = arg;
1653 1680
1654 do { 1681 do {
1682 int again = 0;
1683
1655 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1684 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1656 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1685 down_read_trylock(&root->fs_info->sb->s_umount)) {
1657 btrfs_run_delayed_iputs(root); 1686 if (mutex_trylock(&root->fs_info->cleaner_mutex)) {
1658 btrfs_clean_old_snapshots(root); 1687 btrfs_run_delayed_iputs(root);
1659 mutex_unlock(&root->fs_info->cleaner_mutex); 1688 again = btrfs_clean_one_deleted_snapshot(root);
1689 mutex_unlock(&root->fs_info->cleaner_mutex);
1690 }
1660 btrfs_run_defrag_inodes(root->fs_info); 1691 btrfs_run_defrag_inodes(root->fs_info);
1692 up_read(&root->fs_info->sb->s_umount);
1661 } 1693 }
1662 1694
1663 if (!try_to_freeze()) { 1695 if (!try_to_freeze() && !again) {
1664 set_current_state(TASK_INTERRUPTIBLE); 1696 set_current_state(TASK_INTERRUPTIBLE);
1665 if (!kthread_should_stop()) 1697 if (!kthread_should_stop())
1666 schedule(); 1698 schedule();
@@ -1929,6 +1961,28 @@ static noinline int next_root_backup(struct btrfs_fs_info *info,
1929 return 0; 1961 return 0;
1930} 1962}
1931 1963
1964/* helper to cleanup workers */
1965static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
1966{
1967 btrfs_stop_workers(&fs_info->generic_worker);
1968 btrfs_stop_workers(&fs_info->fixup_workers);
1969 btrfs_stop_workers(&fs_info->delalloc_workers);
1970 btrfs_stop_workers(&fs_info->workers);
1971 btrfs_stop_workers(&fs_info->endio_workers);
1972 btrfs_stop_workers(&fs_info->endio_meta_workers);
1973 btrfs_stop_workers(&fs_info->endio_raid56_workers);
1974 btrfs_stop_workers(&fs_info->rmw_workers);
1975 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
1976 btrfs_stop_workers(&fs_info->endio_write_workers);
1977 btrfs_stop_workers(&fs_info->endio_freespace_worker);
1978 btrfs_stop_workers(&fs_info->submit_workers);
1979 btrfs_stop_workers(&fs_info->delayed_workers);
1980 btrfs_stop_workers(&fs_info->caching_workers);
1981 btrfs_stop_workers(&fs_info->readahead_workers);
1982 btrfs_stop_workers(&fs_info->flush_workers);
1983 btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
1984}
1985
1932/* helper to cleanup tree roots */ 1986/* helper to cleanup tree roots */
1933static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) 1987static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1934{ 1988{
@@ -1966,6 +2020,36 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1966 } 2020 }
1967} 2021}
1968 2022
2023static void del_fs_roots(struct btrfs_fs_info *fs_info)
2024{
2025 int ret;
2026 struct btrfs_root *gang[8];
2027 int i;
2028
2029 while (!list_empty(&fs_info->dead_roots)) {
2030 gang[0] = list_entry(fs_info->dead_roots.next,
2031 struct btrfs_root, root_list);
2032 list_del(&gang[0]->root_list);
2033
2034 if (gang[0]->in_radix) {
2035 btrfs_free_fs_root(fs_info, gang[0]);
2036 } else {
2037 free_extent_buffer(gang[0]->node);
2038 free_extent_buffer(gang[0]->commit_root);
2039 kfree(gang[0]);
2040 }
2041 }
2042
2043 while (1) {
2044 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
2045 (void **)gang, 0,
2046 ARRAY_SIZE(gang));
2047 if (!ret)
2048 break;
2049 for (i = 0; i < ret; i++)
2050 btrfs_free_fs_root(fs_info, gang[i]);
2051 }
2052}
1969 2053
1970int open_ctree(struct super_block *sb, 2054int open_ctree(struct super_block *sb,
1971 struct btrfs_fs_devices *fs_devices, 2055 struct btrfs_fs_devices *fs_devices,
@@ -2054,6 +2138,7 @@ int open_ctree(struct super_block *sb,
2054 spin_lock_init(&fs_info->defrag_inodes_lock); 2138 spin_lock_init(&fs_info->defrag_inodes_lock);
2055 spin_lock_init(&fs_info->free_chunk_lock); 2139 spin_lock_init(&fs_info->free_chunk_lock);
2056 spin_lock_init(&fs_info->tree_mod_seq_lock); 2140 spin_lock_init(&fs_info->tree_mod_seq_lock);
2141 spin_lock_init(&fs_info->super_lock);
2057 rwlock_init(&fs_info->tree_mod_log_lock); 2142 rwlock_init(&fs_info->tree_mod_log_lock);
2058 mutex_init(&fs_info->reloc_mutex); 2143 mutex_init(&fs_info->reloc_mutex);
2059 seqlock_init(&fs_info->profiles_lock); 2144 seqlock_init(&fs_info->profiles_lock);
@@ -2077,7 +2162,7 @@ int open_ctree(struct super_block *sb,
2077 atomic_set(&fs_info->async_submit_draining, 0); 2162 atomic_set(&fs_info->async_submit_draining, 0);
2078 atomic_set(&fs_info->nr_async_bios, 0); 2163 atomic_set(&fs_info->nr_async_bios, 0);
2079 atomic_set(&fs_info->defrag_running, 0); 2164 atomic_set(&fs_info->defrag_running, 0);
2080 atomic_set(&fs_info->tree_mod_seq, 0); 2165 atomic64_set(&fs_info->tree_mod_seq, 0);
2081 fs_info->sb = sb; 2166 fs_info->sb = sb;
2082 fs_info->max_inline = 8192 * 1024; 2167 fs_info->max_inline = 8192 * 1024;
2083 fs_info->metadata_ratio = 0; 2168 fs_info->metadata_ratio = 0;
@@ -2181,11 +2266,13 @@ int open_ctree(struct super_block *sb,
2181 mutex_init(&fs_info->dev_replace.lock); 2266 mutex_init(&fs_info->dev_replace.lock);
2182 2267
2183 spin_lock_init(&fs_info->qgroup_lock); 2268 spin_lock_init(&fs_info->qgroup_lock);
2269 mutex_init(&fs_info->qgroup_ioctl_lock);
2184 fs_info->qgroup_tree = RB_ROOT; 2270 fs_info->qgroup_tree = RB_ROOT;
2185 INIT_LIST_HEAD(&fs_info->dirty_qgroups); 2271 INIT_LIST_HEAD(&fs_info->dirty_qgroups);
2186 fs_info->qgroup_seq = 1; 2272 fs_info->qgroup_seq = 1;
2187 fs_info->quota_enabled = 0; 2273 fs_info->quota_enabled = 0;
2188 fs_info->pending_quota_state = 0; 2274 fs_info->pending_quota_state = 0;
2275 mutex_init(&fs_info->qgroup_rescan_lock);
2189 2276
2190 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 2277 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
2191 btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 2278 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2205,12 +2292,31 @@ int open_ctree(struct super_block *sb,
2205 fs_info, BTRFS_ROOT_TREE_OBJECTID); 2292 fs_info, BTRFS_ROOT_TREE_OBJECTID);
2206 2293
2207 invalidate_bdev(fs_devices->latest_bdev); 2294 invalidate_bdev(fs_devices->latest_bdev);
2295
2296 /*
2297 * Read super block and check the signature bytes only
2298 */
2208 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 2299 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
2209 if (!bh) { 2300 if (!bh) {
2210 err = -EINVAL; 2301 err = -EINVAL;
2211 goto fail_alloc; 2302 goto fail_alloc;
2212 } 2303 }
2213 2304
2305 /*
2306 * We want to check superblock checksum, the type is stored inside.
2307 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
2308 */
2309 if (btrfs_check_super_csum(bh->b_data)) {
2310 printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
2311 err = -EINVAL;
2312 goto fail_alloc;
2313 }
2314
2315 /*
2316 * super_copy is zeroed at allocation time and we never touch the
2317 * following bytes up to INFO_SIZE, the checksum is calculated from
2318 * the whole block of INFO_SIZE
2319 */
2214 memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy)); 2320 memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
2215 memcpy(fs_info->super_for_commit, fs_info->super_copy, 2321 memcpy(fs_info->super_for_commit, fs_info->super_copy,
2216 sizeof(*fs_info->super_for_commit)); 2322 sizeof(*fs_info->super_for_commit));
@@ -2218,6 +2324,13 @@ int open_ctree(struct super_block *sb,
2218 2324
2219 memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); 2325 memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
2220 2326
2327 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2328 if (ret) {
2329 printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
2330 err = -EINVAL;
2331 goto fail_alloc;
2332 }
2333
2221 disk_super = fs_info->super_copy; 2334 disk_super = fs_info->super_copy;
2222 if (!btrfs_super_root(disk_super)) 2335 if (!btrfs_super_root(disk_super))
2223 goto fail_alloc; 2336 goto fail_alloc;
@@ -2226,13 +2339,6 @@ int open_ctree(struct super_block *sb,
2226 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) 2339 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
2227 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); 2340 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
2228 2341
2229 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2230 if (ret) {
2231 printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
2232 err = ret;
2233 goto fail_alloc;
2234 }
2235
2236 /* 2342 /*
2237 * run through our array of backup supers and setup 2343 * run through our array of backup supers and setup
2238 * our ring pointer to the oldest one 2344 * our ring pointer to the oldest one
@@ -2284,6 +2390,9 @@ int open_ctree(struct super_block *sb,
2284 if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO) 2390 if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
2285 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2391 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2286 2392
2393 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
2394 printk(KERN_ERR "btrfs: has skinny extents\n");
2395
2287 /* 2396 /*
2288 * flag our filesystem as having big metadata blocks if 2397 * flag our filesystem as having big metadata blocks if
2289 * they are bigger than the page size 2398 * they are bigger than the page size
@@ -2313,6 +2422,10 @@ int open_ctree(struct super_block *sb,
2313 goto fail_alloc; 2422 goto fail_alloc;
2314 } 2423 }
2315 2424
2425 /*
2426 * Needn't use the lock because there is no other task which will
2427 * update the flag.
2428 */
2316 btrfs_set_super_incompat_flags(disk_super, features); 2429 btrfs_set_super_incompat_flags(disk_super, features);
2317 2430
2318 features = btrfs_super_compat_ro_flags(disk_super) & 2431 features = btrfs_super_compat_ro_flags(disk_super) &
@@ -2388,6 +2501,8 @@ int open_ctree(struct super_block *sb,
2388 btrfs_init_workers(&fs_info->readahead_workers, "readahead", 2501 btrfs_init_workers(&fs_info->readahead_workers, "readahead",
2389 fs_info->thread_pool_size, 2502 fs_info->thread_pool_size,
2390 &fs_info->generic_worker); 2503 &fs_info->generic_worker);
2504 btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
2505 &fs_info->generic_worker);
2391 2506
2392 /* 2507 /*
2393 * endios are largely parallel and should have a very 2508 * endios are largely parallel and should have a very
@@ -2422,6 +2537,7 @@ int open_ctree(struct super_block *sb,
2422 ret |= btrfs_start_workers(&fs_info->caching_workers); 2537 ret |= btrfs_start_workers(&fs_info->caching_workers);
2423 ret |= btrfs_start_workers(&fs_info->readahead_workers); 2538 ret |= btrfs_start_workers(&fs_info->readahead_workers);
2424 ret |= btrfs_start_workers(&fs_info->flush_workers); 2539 ret |= btrfs_start_workers(&fs_info->flush_workers);
2540 ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
2425 if (ret) { 2541 if (ret) {
2426 err = -ENOMEM; 2542 err = -ENOMEM;
2427 goto fail_sb_buffer; 2543 goto fail_sb_buffer;
@@ -2469,8 +2585,8 @@ int open_ctree(struct super_block *sb,
2469 chunk_root->node = read_tree_block(chunk_root, 2585 chunk_root->node = read_tree_block(chunk_root,
2470 btrfs_super_chunk_root(disk_super), 2586 btrfs_super_chunk_root(disk_super),
2471 blocksize, generation); 2587 blocksize, generation);
2472 BUG_ON(!chunk_root->node); /* -ENOMEM */ 2588 if (!chunk_root->node ||
2473 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2589 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
2474 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 2590 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
2475 sb->s_id); 2591 sb->s_id);
2476 goto fail_tree_roots; 2592 goto fail_tree_roots;
@@ -2655,6 +2771,13 @@ retry_root_backup:
2655 log_tree_root->node = read_tree_block(tree_root, bytenr, 2771 log_tree_root->node = read_tree_block(tree_root, bytenr,
2656 blocksize, 2772 blocksize,
2657 generation + 1); 2773 generation + 1);
2774 if (!log_tree_root->node ||
2775 !extent_buffer_uptodate(log_tree_root->node)) {
2776 printk(KERN_ERR "btrfs: failed to read log tree\n");
2777 free_extent_buffer(log_tree_root->node);
2778 kfree(log_tree_root);
2779 goto fail_trans_kthread;
2780 }
2658 /* returns with log_tree_root freed on success */ 2781 /* returns with log_tree_root freed on success */
2659 ret = btrfs_recover_log_trees(log_tree_root); 2782 ret = btrfs_recover_log_trees(log_tree_root);
2660 if (ret) { 2783 if (ret) {
@@ -2734,6 +2857,8 @@ fail_qgroup:
2734 btrfs_free_qgroup_config(fs_info); 2857 btrfs_free_qgroup_config(fs_info);
2735fail_trans_kthread: 2858fail_trans_kthread:
2736 kthread_stop(fs_info->transaction_kthread); 2859 kthread_stop(fs_info->transaction_kthread);
2860 del_fs_roots(fs_info);
2861 btrfs_cleanup_transaction(fs_info->tree_root);
2737fail_cleaner: 2862fail_cleaner:
2738 kthread_stop(fs_info->cleaner_kthread); 2863 kthread_stop(fs_info->cleaner_kthread);
2739 2864
@@ -2744,6 +2869,7 @@ fail_cleaner:
2744 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2869 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2745 2870
2746fail_block_groups: 2871fail_block_groups:
2872 btrfs_put_block_group_cache(fs_info);
2747 btrfs_free_block_groups(fs_info); 2873 btrfs_free_block_groups(fs_info);
2748 2874
2749fail_tree_roots: 2875fail_tree_roots:
@@ -2751,22 +2877,7 @@ fail_tree_roots:
2751 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2877 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2752 2878
2753fail_sb_buffer: 2879fail_sb_buffer:
2754 btrfs_stop_workers(&fs_info->generic_worker); 2880 btrfs_stop_all_workers(fs_info);
2755 btrfs_stop_workers(&fs_info->readahead_workers);
2756 btrfs_stop_workers(&fs_info->fixup_workers);
2757 btrfs_stop_workers(&fs_info->delalloc_workers);
2758 btrfs_stop_workers(&fs_info->workers);
2759 btrfs_stop_workers(&fs_info->endio_workers);
2760 btrfs_stop_workers(&fs_info->endio_meta_workers);
2761 btrfs_stop_workers(&fs_info->endio_raid56_workers);
2762 btrfs_stop_workers(&fs_info->rmw_workers);
2763 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2764 btrfs_stop_workers(&fs_info->endio_write_workers);
2765 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2766 btrfs_stop_workers(&fs_info->submit_workers);
2767 btrfs_stop_workers(&fs_info->delayed_workers);
2768 btrfs_stop_workers(&fs_info->caching_workers);
2769 btrfs_stop_workers(&fs_info->flush_workers);
2770fail_alloc: 2881fail_alloc:
2771fail_iput: 2882fail_iput:
2772 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2883 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2898,7 +3009,10 @@ static int write_dev_supers(struct btrfs_device *device,
2898 if (wait) { 3009 if (wait) {
2899 bh = __find_get_block(device->bdev, bytenr / 4096, 3010 bh = __find_get_block(device->bdev, bytenr / 4096,
2900 BTRFS_SUPER_INFO_SIZE); 3011 BTRFS_SUPER_INFO_SIZE);
2901 BUG_ON(!bh); 3012 if (!bh) {
3013 errors++;
3014 continue;
3015 }
2902 wait_on_buffer(bh); 3016 wait_on_buffer(bh);
2903 if (!buffer_uptodate(bh)) 3017 if (!buffer_uptodate(bh))
2904 errors++; 3018 errors++;
@@ -2913,7 +3027,7 @@ static int write_dev_supers(struct btrfs_device *device,
2913 btrfs_set_super_bytenr(sb, bytenr); 3027 btrfs_set_super_bytenr(sb, bytenr);
2914 3028
2915 crc = ~(u32)0; 3029 crc = ~(u32)0;
2916 crc = btrfs_csum_data(NULL, (char *)sb + 3030 crc = btrfs_csum_data((char *)sb +
2917 BTRFS_CSUM_SIZE, crc, 3031 BTRFS_CSUM_SIZE, crc,
2918 BTRFS_SUPER_INFO_SIZE - 3032 BTRFS_SUPER_INFO_SIZE -
2919 BTRFS_CSUM_SIZE); 3033 BTRFS_CSUM_SIZE);
@@ -2925,6 +3039,13 @@ static int write_dev_supers(struct btrfs_device *device,
2925 */ 3039 */
2926 bh = __getblk(device->bdev, bytenr / 4096, 3040 bh = __getblk(device->bdev, bytenr / 4096,
2927 BTRFS_SUPER_INFO_SIZE); 3041 BTRFS_SUPER_INFO_SIZE);
3042 if (!bh) {
3043 printk(KERN_ERR "btrfs: couldn't get super "
3044 "buffer head for bytenr %Lu\n", bytenr);
3045 errors++;
3046 continue;
3047 }
3048
2928 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); 3049 memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
2929 3050
2930 /* one reference for submit_bh */ 3051 /* one reference for submit_bh */
@@ -3147,7 +3268,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
3147 return num_tolerated_disk_barrier_failures; 3268 return num_tolerated_disk_barrier_failures;
3148} 3269}
3149 3270
3150int write_all_supers(struct btrfs_root *root, int max_mirrors) 3271static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3151{ 3272{
3152 struct list_head *head; 3273 struct list_head *head;
3153 struct btrfs_device *dev; 3274 struct btrfs_device *dev;
@@ -3253,7 +3374,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
3253 if (btrfs_root_refs(&root->root_item) == 0) 3374 if (btrfs_root_refs(&root->root_item) == 0)
3254 synchronize_srcu(&fs_info->subvol_srcu); 3375 synchronize_srcu(&fs_info->subvol_srcu);
3255 3376
3256 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 3377 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
3257 btrfs_free_log(NULL, root); 3378 btrfs_free_log(NULL, root);
3258 btrfs_free_log_root_tree(NULL, fs_info); 3379 btrfs_free_log_root_tree(NULL, fs_info);
3259 } 3380 }
@@ -3277,37 +3398,6 @@ static void free_fs_root(struct btrfs_root *root)
3277 kfree(root); 3398 kfree(root);
3278} 3399}
3279 3400
3280static void del_fs_roots(struct btrfs_fs_info *fs_info)
3281{
3282 int ret;
3283 struct btrfs_root *gang[8];
3284 int i;
3285
3286 while (!list_empty(&fs_info->dead_roots)) {
3287 gang[0] = list_entry(fs_info->dead_roots.next,
3288 struct btrfs_root, root_list);
3289 list_del(&gang[0]->root_list);
3290
3291 if (gang[0]->in_radix) {
3292 btrfs_free_fs_root(fs_info, gang[0]);
3293 } else {
3294 free_extent_buffer(gang[0]->node);
3295 free_extent_buffer(gang[0]->commit_root);
3296 kfree(gang[0]);
3297 }
3298 }
3299
3300 while (1) {
3301 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
3302 (void **)gang, 0,
3303 ARRAY_SIZE(gang));
3304 if (!ret)
3305 break;
3306 for (i = 0; i < ret; i++)
3307 btrfs_free_fs_root(fs_info, gang[i]);
3308 }
3309}
3310
3311int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) 3401int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
3312{ 3402{
3313 u64 root_objectid = 0; 3403 u64 root_objectid = 0;
@@ -3343,8 +3433,8 @@ int btrfs_commit_super(struct btrfs_root *root)
3343 3433
3344 mutex_lock(&root->fs_info->cleaner_mutex); 3434 mutex_lock(&root->fs_info->cleaner_mutex);
3345 btrfs_run_delayed_iputs(root); 3435 btrfs_run_delayed_iputs(root);
3346 btrfs_clean_old_snapshots(root);
3347 mutex_unlock(&root->fs_info->cleaner_mutex); 3436 mutex_unlock(&root->fs_info->cleaner_mutex);
3437 wake_up_process(root->fs_info->cleaner_kthread);
3348 3438
3349 /* wait until ongoing cleanup work done */ 3439 /* wait until ongoing cleanup work done */
3350 down_write(&root->fs_info->cleanup_work_sem); 3440 down_write(&root->fs_info->cleanup_work_sem);
@@ -3420,20 +3510,7 @@ int close_ctree(struct btrfs_root *root)
3420 percpu_counter_sum(&fs_info->delalloc_bytes)); 3510 percpu_counter_sum(&fs_info->delalloc_bytes));
3421 } 3511 }
3422 3512
3423 free_extent_buffer(fs_info->extent_root->node); 3513 free_root_pointers(fs_info, 1);
3424 free_extent_buffer(fs_info->extent_root->commit_root);
3425 free_extent_buffer(fs_info->tree_root->node);
3426 free_extent_buffer(fs_info->tree_root->commit_root);
3427 free_extent_buffer(fs_info->chunk_root->node);
3428 free_extent_buffer(fs_info->chunk_root->commit_root);
3429 free_extent_buffer(fs_info->dev_root->node);
3430 free_extent_buffer(fs_info->dev_root->commit_root);
3431 free_extent_buffer(fs_info->csum_root->node);
3432 free_extent_buffer(fs_info->csum_root->commit_root);
3433 if (fs_info->quota_root) {
3434 free_extent_buffer(fs_info->quota_root->node);
3435 free_extent_buffer(fs_info->quota_root->commit_root);
3436 }
3437 3514
3438 btrfs_free_block_groups(fs_info); 3515 btrfs_free_block_groups(fs_info);
3439 3516
@@ -3441,22 +3518,7 @@ int close_ctree(struct btrfs_root *root)
3441 3518
3442 iput(fs_info->btree_inode); 3519 iput(fs_info->btree_inode);
3443 3520
3444 btrfs_stop_workers(&fs_info->generic_worker); 3521 btrfs_stop_all_workers(fs_info);
3445 btrfs_stop_workers(&fs_info->fixup_workers);
3446 btrfs_stop_workers(&fs_info->delalloc_workers);
3447 btrfs_stop_workers(&fs_info->workers);
3448 btrfs_stop_workers(&fs_info->endio_workers);
3449 btrfs_stop_workers(&fs_info->endio_meta_workers);
3450 btrfs_stop_workers(&fs_info->endio_raid56_workers);
3451 btrfs_stop_workers(&fs_info->rmw_workers);
3452 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
3453 btrfs_stop_workers(&fs_info->endio_write_workers);
3454 btrfs_stop_workers(&fs_info->endio_freespace_worker);
3455 btrfs_stop_workers(&fs_info->submit_workers);
3456 btrfs_stop_workers(&fs_info->delayed_workers);
3457 btrfs_stop_workers(&fs_info->caching_workers);
3458 btrfs_stop_workers(&fs_info->readahead_workers);
3459 btrfs_stop_workers(&fs_info->flush_workers);
3460 3522
3461#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 3523#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
3462 if (btrfs_test_opt(root, CHECK_INTEGRITY)) 3524 if (btrfs_test_opt(root, CHECK_INTEGRITY))
@@ -3561,18 +3623,13 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
3561static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 3623static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3562 int read_only) 3624 int read_only)
3563{ 3625{
3564 if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) { 3626 /*
3565 printk(KERN_ERR "btrfs: unsupported checksum algorithm\n"); 3627 * Placeholder for checks
3566 return -EINVAL; 3628 */
3567 }
3568
3569 if (read_only)
3570 return 0;
3571
3572 return 0; 3629 return 0;
3573} 3630}
3574 3631
3575void btrfs_error_commit_super(struct btrfs_root *root) 3632static void btrfs_error_commit_super(struct btrfs_root *root)
3576{ 3633{
3577 mutex_lock(&root->fs_info->cleaner_mutex); 3634 mutex_lock(&root->fs_info->cleaner_mutex);
3578 btrfs_run_delayed_iputs(root); 3635 btrfs_run_delayed_iputs(root);
@@ -3663,6 +3720,9 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3663 continue; 3720 continue;
3664 } 3721 }
3665 3722
3723 if (head->must_insert_reserved)
3724 btrfs_pin_extent(root, ref->bytenr,
3725 ref->num_bytes, 1);
3666 btrfs_free_delayed_extent_op(head->extent_op); 3726 btrfs_free_delayed_extent_op(head->extent_op);
3667 delayed_refs->num_heads--; 3727 delayed_refs->num_heads--;
3668 if (list_empty(&head->cluster)) 3728 if (list_empty(&head->cluster))
@@ -3734,13 +3794,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3734 int mark) 3794 int mark)
3735{ 3795{
3736 int ret; 3796 int ret;
3737 struct page *page;
3738 struct inode *btree_inode = root->fs_info->btree_inode;
3739 struct extent_buffer *eb; 3797 struct extent_buffer *eb;
3740 u64 start = 0; 3798 u64 start = 0;
3741 u64 end; 3799 u64 end;
3742 u64 offset;
3743 unsigned long index;
3744 3800
3745 while (1) { 3801 while (1) {
3746 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 3802 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
@@ -3750,36 +3806,17 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
3750 3806
3751 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); 3807 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
3752 while (start <= end) { 3808 while (start <= end) {
3753 index = start >> PAGE_CACHE_SHIFT; 3809 eb = btrfs_find_tree_block(root, start,
3754 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 3810 root->leafsize);
3755 page = find_get_page(btree_inode->i_mapping, index); 3811 start += eb->len;
3756 if (!page) 3812 if (!eb)
3757 continue; 3813 continue;
3758 offset = page_offset(page); 3814 wait_on_extent_buffer_writeback(eb);
3759
3760 spin_lock(&dirty_pages->buffer_lock);
3761 eb = radix_tree_lookup(
3762 &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
3763 offset >> PAGE_CACHE_SHIFT);
3764 spin_unlock(&dirty_pages->buffer_lock);
3765 if (eb)
3766 ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
3767 &eb->bflags);
3768 if (PageWriteback(page))
3769 end_page_writeback(page);
3770
3771 lock_page(page);
3772 if (PageDirty(page)) {
3773 clear_page_dirty_for_io(page);
3774 spin_lock_irq(&page->mapping->tree_lock);
3775 radix_tree_tag_clear(&page->mapping->page_tree,
3776 page_index(page),
3777 PAGECACHE_TAG_DIRTY);
3778 spin_unlock_irq(&page->mapping->tree_lock);
3779 }
3780 3815
3781 unlock_page(page); 3816 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY,
3782 page_cache_release(page); 3817 &eb->bflags))
3818 clear_extent_buffer_dirty(eb);
3819 free_extent_buffer_stale(eb);
3783 } 3820 }
3784 } 3821 }
3785 3822
@@ -3860,7 +3897,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
3860 */ 3897 */
3861} 3898}
3862 3899
3863int btrfs_cleanup_transaction(struct btrfs_root *root) 3900static int btrfs_cleanup_transaction(struct btrfs_root *root)
3864{ 3901{
3865 struct btrfs_transaction *t; 3902 struct btrfs_transaction *t;
3866 LIST_HEAD(list); 3903 LIST_HEAD(list);
@@ -3881,10 +3918,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3881 3918
3882 btrfs_destroy_delayed_refs(t, root); 3919 btrfs_destroy_delayed_refs(t, root);
3883 3920
3884 btrfs_block_rsv_release(root,
3885 &root->fs_info->trans_block_rsv,
3886 t->dirty_pages.dirty_bytes);
3887
3888 /* FIXME: cleanup wait for commit */ 3921 /* FIXME: cleanup wait for commit */
3889 t->in_commit = 1; 3922 t->in_commit = 1;
3890 t->blocked = 1; 3923 t->blocked = 1;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 034d7dc552b2..be69ce1b07a2 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,7 +61,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root, int max_mirrors); 61 struct btrfs_root *root, int max_mirrors);
62struct buffer_head *btrfs_read_dev_super(struct block_device *bdev); 62struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
63int btrfs_commit_super(struct btrfs_root *root); 63int btrfs_commit_super(struct btrfs_root *root);
64void btrfs_error_commit_super(struct btrfs_root *root);
65struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 64struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
66 u64 bytenr, u32 blocksize); 65 u64 bytenr, u32 blocksize);
67struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, 66struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
@@ -77,7 +76,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
77 int atomic); 76 int atomic);
78int btrfs_set_buffer_uptodate(struct extent_buffer *buf); 77int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
79int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); 78int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
80u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); 79u32 btrfs_csum_data(char *data, u32 seed, size_t len);
81void btrfs_csum_final(u32 crc, char *result); 80void btrfs_csum_final(u32 crc, char *result);
82int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 81int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
83 int metadata); 82 int metadata);
@@ -93,10 +92,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
93 struct btrfs_fs_info *fs_info); 92 struct btrfs_fs_info *fs_info);
94int btrfs_add_log_tree(struct btrfs_trans_handle *trans, 93int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
95 struct btrfs_root *root); 94 struct btrfs_root *root);
96int btrfs_cleanup_transaction(struct btrfs_root *root);
97void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans, 95void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
98 struct btrfs_root *root); 96 struct btrfs_root *root);
99void btrfs_abort_devices(struct btrfs_root *root);
100struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, 97struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
101 struct btrfs_fs_info *fs_info, 98 struct btrfs_fs_info *fs_info,
102 u64 objectid); 99 u64 objectid);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9ac2eca681eb..2305b5c5cf00 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -105,6 +105,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
105 u64 num_bytes, int reserve); 105 u64 num_bytes, int reserve);
106static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 106static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
107 u64 num_bytes); 107 u64 num_bytes);
108int btrfs_pin_extent(struct btrfs_root *root,
109 u64 bytenr, u64 num_bytes, int reserved);
108 110
109static noinline int 111static noinline int
110block_group_cache_done(struct btrfs_block_group_cache *cache) 112block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -257,7 +259,8 @@ static int exclude_super_stripes(struct btrfs_root *root,
257 cache->bytes_super += stripe_len; 259 cache->bytes_super += stripe_len;
258 ret = add_excluded_extent(root, cache->key.objectid, 260 ret = add_excluded_extent(root, cache->key.objectid,
259 stripe_len); 261 stripe_len);
260 BUG_ON(ret); /* -ENOMEM */ 262 if (ret)
263 return ret;
261 } 264 }
262 265
263 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 266 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -265,13 +268,35 @@ static int exclude_super_stripes(struct btrfs_root *root,
265 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 268 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
266 cache->key.objectid, bytenr, 269 cache->key.objectid, bytenr,
267 0, &logical, &nr, &stripe_len); 270 0, &logical, &nr, &stripe_len);
268 BUG_ON(ret); /* -ENOMEM */ 271 if (ret)
272 return ret;
269 273
270 while (nr--) { 274 while (nr--) {
271 cache->bytes_super += stripe_len; 275 u64 start, len;
272 ret = add_excluded_extent(root, logical[nr], 276
273 stripe_len); 277 if (logical[nr] > cache->key.objectid +
274 BUG_ON(ret); /* -ENOMEM */ 278 cache->key.offset)
279 continue;
280
281 if (logical[nr] + stripe_len <= cache->key.objectid)
282 continue;
283
284 start = logical[nr];
285 if (start < cache->key.objectid) {
286 start = cache->key.objectid;
287 len = (logical[nr] + stripe_len) - start;
288 } else {
289 len = min_t(u64, stripe_len,
290 cache->key.objectid +
291 cache->key.offset - start);
292 }
293
294 cache->bytes_super += len;
295 ret = add_excluded_extent(root, start, len);
296 if (ret) {
297 kfree(logical);
298 return ret;
299 }
275 } 300 }
276 301
277 kfree(logical); 302 kfree(logical);
@@ -414,8 +439,7 @@ again:
414 if (ret) 439 if (ret)
415 break; 440 break;
416 441
417 if (need_resched() || 442 if (need_resched()) {
418 btrfs_next_leaf(extent_root, path)) {
419 caching_ctl->progress = last; 443 caching_ctl->progress = last;
420 btrfs_release_path(path); 444 btrfs_release_path(path);
421 up_read(&fs_info->extent_commit_sem); 445 up_read(&fs_info->extent_commit_sem);
@@ -423,6 +447,12 @@ again:
423 cond_resched(); 447 cond_resched();
424 goto again; 448 goto again;
425 } 449 }
450
451 ret = btrfs_next_leaf(extent_root, path);
452 if (ret < 0)
453 goto err;
454 if (ret)
455 break;
426 leaf = path->nodes[0]; 456 leaf = path->nodes[0];
427 nritems = btrfs_header_nritems(leaf); 457 nritems = btrfs_header_nritems(leaf);
428 continue; 458 continue;
@@ -437,11 +467,16 @@ again:
437 block_group->key.offset) 467 block_group->key.offset)
438 break; 468 break;
439 469
440 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 470 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
471 key.type == BTRFS_METADATA_ITEM_KEY) {
441 total_found += add_new_free_space(block_group, 472 total_found += add_new_free_space(block_group,
442 fs_info, last, 473 fs_info, last,
443 key.objectid); 474 key.objectid);
444 last = key.objectid + key.offset; 475 if (key.type == BTRFS_METADATA_ITEM_KEY)
476 last = key.objectid +
477 fs_info->tree_root->leafsize;
478 else
479 last = key.objectid + key.offset;
445 480
446 if (total_found > (1024 * 1024 * 2)) { 481 if (total_found > (1024 * 1024 * 2)) {
447 total_found = 0; 482 total_found = 0;
@@ -651,55 +686,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
651 rcu_read_unlock(); 686 rcu_read_unlock();
652} 687}
653 688
654u64 btrfs_find_block_group(struct btrfs_root *root,
655 u64 search_start, u64 search_hint, int owner)
656{
657 struct btrfs_block_group_cache *cache;
658 u64 used;
659 u64 last = max(search_hint, search_start);
660 u64 group_start = 0;
661 int full_search = 0;
662 int factor = 9;
663 int wrapped = 0;
664again:
665 while (1) {
666 cache = btrfs_lookup_first_block_group(root->fs_info, last);
667 if (!cache)
668 break;
669
670 spin_lock(&cache->lock);
671 last = cache->key.objectid + cache->key.offset;
672 used = btrfs_block_group_used(&cache->item);
673
674 if ((full_search || !cache->ro) &&
675 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
676 if (used + cache->pinned + cache->reserved <
677 div_factor(cache->key.offset, factor)) {
678 group_start = cache->key.objectid;
679 spin_unlock(&cache->lock);
680 btrfs_put_block_group(cache);
681 goto found;
682 }
683 }
684 spin_unlock(&cache->lock);
685 btrfs_put_block_group(cache);
686 cond_resched();
687 }
688 if (!wrapped) {
689 last = search_start;
690 wrapped = 1;
691 goto again;
692 }
693 if (!full_search && factor < 10) {
694 last = search_start;
695 full_search = 1;
696 factor = 10;
697 goto again;
698 }
699found:
700 return group_start;
701}
702
703/* simple helper to search for an existing extent at a given offset */ 689/* simple helper to search for an existing extent at a given offset */
704int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 690int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
705{ 691{
@@ -713,15 +699,21 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
713 699
714 key.objectid = start; 700 key.objectid = start;
715 key.offset = len; 701 key.offset = len;
716 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 702 key.type = BTRFS_EXTENT_ITEM_KEY;
717 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 703 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
718 0, 0); 704 0, 0);
705 if (ret > 0) {
706 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
707 if (key.objectid == start &&
708 key.type == BTRFS_METADATA_ITEM_KEY)
709 ret = 0;
710 }
719 btrfs_free_path(path); 711 btrfs_free_path(path);
720 return ret; 712 return ret;
721} 713}
722 714
723/* 715/*
724 * helper function to lookup reference count and flags of extent. 716 * helper function to lookup reference count and flags of a tree block.
725 * 717 *
726 * the head node for delayed ref is used to store the sum of all the 718 * the head node for delayed ref is used to store the sum of all the
727 * reference count modifications queued up in the rbtree. the head 719 * reference count modifications queued up in the rbtree. the head
@@ -731,7 +723,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
731 */ 723 */
732int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 724int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
733 struct btrfs_root *root, u64 bytenr, 725 struct btrfs_root *root, u64 bytenr,
734 u64 num_bytes, u64 *refs, u64 *flags) 726 u64 offset, int metadata, u64 *refs, u64 *flags)
735{ 727{
736 struct btrfs_delayed_ref_head *head; 728 struct btrfs_delayed_ref_head *head;
737 struct btrfs_delayed_ref_root *delayed_refs; 729 struct btrfs_delayed_ref_root *delayed_refs;
@@ -744,13 +736,29 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
744 u64 extent_flags; 736 u64 extent_flags;
745 int ret; 737 int ret;
746 738
739 /*
740 * If we don't have skinny metadata, don't bother doing anything
741 * different
742 */
743 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
744 offset = root->leafsize;
745 metadata = 0;
746 }
747
747 path = btrfs_alloc_path(); 748 path = btrfs_alloc_path();
748 if (!path) 749 if (!path)
749 return -ENOMEM; 750 return -ENOMEM;
750 751
751 key.objectid = bytenr; 752 if (metadata) {
752 key.type = BTRFS_EXTENT_ITEM_KEY; 753 key.objectid = bytenr;
753 key.offset = num_bytes; 754 key.type = BTRFS_METADATA_ITEM_KEY;
755 key.offset = offset;
756 } else {
757 key.objectid = bytenr;
758 key.type = BTRFS_EXTENT_ITEM_KEY;
759 key.offset = offset;
760 }
761
754 if (!trans) { 762 if (!trans) {
755 path->skip_locking = 1; 763 path->skip_locking = 1;
756 path->search_commit_root = 1; 764 path->search_commit_root = 1;
@@ -761,6 +769,13 @@ again:
761 if (ret < 0) 769 if (ret < 0)
762 goto out_free; 770 goto out_free;
763 771
772 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
773 key.type = BTRFS_EXTENT_ITEM_KEY;
774 key.offset = root->leafsize;
775 btrfs_release_path(path);
776 goto again;
777 }
778
764 if (ret == 0) { 779 if (ret == 0) {
765 leaf = path->nodes[0]; 780 leaf = path->nodes[0];
766 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 781 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -996,7 +1011,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
996 return ret; 1011 return ret;
997 BUG_ON(ret); /* Corruption */ 1012 BUG_ON(ret); /* Corruption */
998 1013
999 btrfs_extend_item(trans, root, path, new_size); 1014 btrfs_extend_item(root, path, new_size);
1000 1015
1001 leaf = path->nodes[0]; 1016 leaf = path->nodes[0];
1002 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1017 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1448,6 +1463,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1448 int want; 1463 int want;
1449 int ret; 1464 int ret;
1450 int err = 0; 1465 int err = 0;
1466 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
1467 SKINNY_METADATA);
1451 1468
1452 key.objectid = bytenr; 1469 key.objectid = bytenr;
1453 key.type = BTRFS_EXTENT_ITEM_KEY; 1470 key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -1459,11 +1476,46 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1459 path->keep_locks = 1; 1476 path->keep_locks = 1;
1460 } else 1477 } else
1461 extra_size = -1; 1478 extra_size = -1;
1479
1480 /*
1481 * Owner is our parent level, so we can just add one to get the level
1482 * for the block we are interested in.
1483 */
1484 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
1485 key.type = BTRFS_METADATA_ITEM_KEY;
1486 key.offset = owner;
1487 }
1488
1489again:
1462 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1490 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1463 if (ret < 0) { 1491 if (ret < 0) {
1464 err = ret; 1492 err = ret;
1465 goto out; 1493 goto out;
1466 } 1494 }
1495
1496 /*
1497 * We may be a newly converted file system which still has the old fat
1498 * extent entries for metadata, so try and see if we have one of those.
1499 */
1500 if (ret > 0 && skinny_metadata) {
1501 skinny_metadata = false;
1502 if (path->slots[0]) {
1503 path->slots[0]--;
1504 btrfs_item_key_to_cpu(path->nodes[0], &key,
1505 path->slots[0]);
1506 if (key.objectid == bytenr &&
1507 key.type == BTRFS_EXTENT_ITEM_KEY &&
1508 key.offset == num_bytes)
1509 ret = 0;
1510 }
1511 if (ret) {
1512 key.type = BTRFS_EXTENT_ITEM_KEY;
1513 key.offset = num_bytes;
1514 btrfs_release_path(path);
1515 goto again;
1516 }
1517 }
1518
1467 if (ret && !insert) { 1519 if (ret && !insert) {
1468 err = -ENOENT; 1520 err = -ENOENT;
1469 goto out; 1521 goto out;
@@ -1499,11 +1551,9 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1499 ptr = (unsigned long)(ei + 1); 1551 ptr = (unsigned long)(ei + 1);
1500 end = (unsigned long)ei + item_size; 1552 end = (unsigned long)ei + item_size;
1501 1553
1502 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1554 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
1503 ptr += sizeof(struct btrfs_tree_block_info); 1555 ptr += sizeof(struct btrfs_tree_block_info);
1504 BUG_ON(ptr > end); 1556 BUG_ON(ptr > end);
1505 } else {
1506 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1507 } 1557 }
1508 1558
1509 err = -ENOENT; 1559 err = -ENOENT;
@@ -1585,8 +1635,7 @@ out:
1585 * helper to add new inline back ref 1635 * helper to add new inline back ref
1586 */ 1636 */
1587static noinline_for_stack 1637static noinline_for_stack
1588void setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1638void setup_inline_extent_backref(struct btrfs_root *root,
1589 struct btrfs_root *root,
1590 struct btrfs_path *path, 1639 struct btrfs_path *path,
1591 struct btrfs_extent_inline_ref *iref, 1640 struct btrfs_extent_inline_ref *iref,
1592 u64 parent, u64 root_objectid, 1641 u64 parent, u64 root_objectid,
@@ -1609,7 +1658,7 @@ void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1609 type = extent_ref_type(parent, owner); 1658 type = extent_ref_type(parent, owner);
1610 size = btrfs_extent_inline_ref_size(type); 1659 size = btrfs_extent_inline_ref_size(type);
1611 1660
1612 btrfs_extend_item(trans, root, path, size); 1661 btrfs_extend_item(root, path, size);
1613 1662
1614 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1663 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1615 refs = btrfs_extent_refs(leaf, ei); 1664 refs = btrfs_extent_refs(leaf, ei);
@@ -1678,8 +1727,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1678 * helper to update/remove inline back ref 1727 * helper to update/remove inline back ref
1679 */ 1728 */
1680static noinline_for_stack 1729static noinline_for_stack
1681void update_inline_extent_backref(struct btrfs_trans_handle *trans, 1730void update_inline_extent_backref(struct btrfs_root *root,
1682 struct btrfs_root *root,
1683 struct btrfs_path *path, 1731 struct btrfs_path *path,
1684 struct btrfs_extent_inline_ref *iref, 1732 struct btrfs_extent_inline_ref *iref,
1685 int refs_to_mod, 1733 int refs_to_mod,
@@ -1735,7 +1783,7 @@ void update_inline_extent_backref(struct btrfs_trans_handle *trans,
1735 memmove_extent_buffer(leaf, ptr, ptr + size, 1783 memmove_extent_buffer(leaf, ptr, ptr + size,
1736 end - ptr - size); 1784 end - ptr - size);
1737 item_size -= size; 1785 item_size -= size;
1738 btrfs_truncate_item(trans, root, path, item_size, 1); 1786 btrfs_truncate_item(root, path, item_size, 1);
1739 } 1787 }
1740 btrfs_mark_buffer_dirty(leaf); 1788 btrfs_mark_buffer_dirty(leaf);
1741} 1789}
@@ -1757,10 +1805,10 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1757 root_objectid, owner, offset, 1); 1805 root_objectid, owner, offset, 1);
1758 if (ret == 0) { 1806 if (ret == 0) {
1759 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1807 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1760 update_inline_extent_backref(trans, root, path, iref, 1808 update_inline_extent_backref(root, path, iref,
1761 refs_to_add, extent_op); 1809 refs_to_add, extent_op);
1762 } else if (ret == -ENOENT) { 1810 } else if (ret == -ENOENT) {
1763 setup_inline_extent_backref(trans, root, path, iref, parent, 1811 setup_inline_extent_backref(root, path, iref, parent,
1764 root_objectid, owner, offset, 1812 root_objectid, owner, offset,
1765 refs_to_add, extent_op); 1813 refs_to_add, extent_op);
1766 ret = 0; 1814 ret = 0;
@@ -1797,7 +1845,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1797 1845
1798 BUG_ON(!is_data && refs_to_drop != 1); 1846 BUG_ON(!is_data && refs_to_drop != 1);
1799 if (iref) { 1847 if (iref) {
1800 update_inline_extent_backref(trans, root, path, iref, 1848 update_inline_extent_backref(root, path, iref,
1801 -refs_to_drop, NULL); 1849 -refs_to_drop, NULL);
1802 } else if (is_data) { 1850 } else if (is_data) {
1803 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1851 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
@@ -1968,10 +2016,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1968 ref_root = ref->root; 2016 ref_root = ref->root;
1969 2017
1970 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2018 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1971 if (extent_op) { 2019 if (extent_op)
1972 BUG_ON(extent_op->update_key);
1973 flags |= extent_op->flags_to_set; 2020 flags |= extent_op->flags_to_set;
1974 }
1975 ret = alloc_reserved_file_extent(trans, root, 2021 ret = alloc_reserved_file_extent(trans, root,
1976 parent, ref_root, flags, 2022 parent, ref_root, flags,
1977 ref->objectid, ref->offset, 2023 ref->objectid, ref->offset,
@@ -2024,18 +2070,33 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2024 u32 item_size; 2070 u32 item_size;
2025 int ret; 2071 int ret;
2026 int err = 0; 2072 int err = 0;
2073 int metadata = (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2074 node->type == BTRFS_SHARED_BLOCK_REF_KEY);
2027 2075
2028 if (trans->aborted) 2076 if (trans->aborted)
2029 return 0; 2077 return 0;
2030 2078
2079 if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
2080 metadata = 0;
2081
2031 path = btrfs_alloc_path(); 2082 path = btrfs_alloc_path();
2032 if (!path) 2083 if (!path)
2033 return -ENOMEM; 2084 return -ENOMEM;
2034 2085
2035 key.objectid = node->bytenr; 2086 key.objectid = node->bytenr;
2036 key.type = BTRFS_EXTENT_ITEM_KEY;
2037 key.offset = node->num_bytes;
2038 2087
2088 if (metadata) {
2089 struct btrfs_delayed_tree_ref *tree_ref;
2090
2091 tree_ref = btrfs_delayed_node_to_tree_ref(node);
2092 key.type = BTRFS_METADATA_ITEM_KEY;
2093 key.offset = tree_ref->level;
2094 } else {
2095 key.type = BTRFS_EXTENT_ITEM_KEY;
2096 key.offset = node->num_bytes;
2097 }
2098
2099again:
2039 path->reada = 1; 2100 path->reada = 1;
2040 path->leave_spinning = 1; 2101 path->leave_spinning = 1;
2041 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 2102 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
@@ -2045,6 +2106,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
2045 goto out; 2106 goto out;
2046 } 2107 }
2047 if (ret > 0) { 2108 if (ret > 0) {
2109 if (metadata) {
2110 btrfs_release_path(path);
2111 metadata = 0;
2112
2113 key.offset = node->num_bytes;
2114 key.type = BTRFS_EXTENT_ITEM_KEY;
2115 goto again;
2116 }
2048 err = -EIO; 2117 err = -EIO;
2049 goto out; 2118 goto out;
2050 } 2119 }
@@ -2084,10 +2153,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2084 struct btrfs_key ins; 2153 struct btrfs_key ins;
2085 u64 parent = 0; 2154 u64 parent = 0;
2086 u64 ref_root = 0; 2155 u64 ref_root = 0;
2087 2156 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
2088 ins.objectid = node->bytenr; 2157 SKINNY_METADATA);
2089 ins.offset = node->num_bytes;
2090 ins.type = BTRFS_EXTENT_ITEM_KEY;
2091 2158
2092 ref = btrfs_delayed_node_to_tree_ref(node); 2159 ref = btrfs_delayed_node_to_tree_ref(node);
2093 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2160 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
@@ -2095,10 +2162,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2095 else 2162 else
2096 ref_root = ref->root; 2163 ref_root = ref->root;
2097 2164
2165 ins.objectid = node->bytenr;
2166 if (skinny_metadata) {
2167 ins.offset = ref->level;
2168 ins.type = BTRFS_METADATA_ITEM_KEY;
2169 } else {
2170 ins.offset = node->num_bytes;
2171 ins.type = BTRFS_EXTENT_ITEM_KEY;
2172 }
2173
2098 BUG_ON(node->ref_mod != 1); 2174 BUG_ON(node->ref_mod != 1);
2099 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2175 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2100 BUG_ON(!extent_op || !extent_op->update_flags || 2176 BUG_ON(!extent_op || !extent_op->update_flags);
2101 !extent_op->update_key);
2102 ret = alloc_reserved_tree_block(trans, root, 2177 ret = alloc_reserved_tree_block(trans, root,
2103 parent, ref_root, 2178 parent, ref_root,
2104 extent_op->flags_to_set, 2179 extent_op->flags_to_set,
@@ -2302,9 +2377,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2302 btrfs_free_delayed_extent_op(extent_op); 2377 btrfs_free_delayed_extent_op(extent_op);
2303 2378
2304 if (ret) { 2379 if (ret) {
2305 printk(KERN_DEBUG 2380 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2306 "btrfs: run_delayed_extent_op "
2307 "returned %d\n", ret);
2308 spin_lock(&delayed_refs->lock); 2381 spin_lock(&delayed_refs->lock);
2309 btrfs_delayed_ref_unlock(locked_ref); 2382 btrfs_delayed_ref_unlock(locked_ref);
2310 return ret; 2383 return ret;
@@ -2343,8 +2416,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2343 if (ret) { 2416 if (ret) {
2344 btrfs_delayed_ref_unlock(locked_ref); 2417 btrfs_delayed_ref_unlock(locked_ref);
2345 btrfs_put_delayed_ref(ref); 2418 btrfs_put_delayed_ref(ref);
2346 printk(KERN_DEBUG 2419 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2347 "btrfs: run_one_delayed_ref returned %d\n", ret);
2348 spin_lock(&delayed_refs->lock); 2420 spin_lock(&delayed_refs->lock);
2349 return ret; 2421 return ret;
2350 } 2422 }
@@ -2421,9 +2493,11 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2421 if (list_empty(&trans->qgroup_ref_list) != 2493 if (list_empty(&trans->qgroup_ref_list) !=
2422 !trans->delayed_ref_elem.seq) { 2494 !trans->delayed_ref_elem.seq) {
2423 /* list without seq or seq without list */ 2495 /* list without seq or seq without list */
2424 printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n", 2496 btrfs_err(fs_info,
2497 "qgroup accounting update error, list is%s empty, seq is %#x.%x",
2425 list_empty(&trans->qgroup_ref_list) ? "" : " not", 2498 list_empty(&trans->qgroup_ref_list) ? "" : " not",
2426 trans->delayed_ref_elem.seq); 2499 (u32)(trans->delayed_ref_elem.seq >> 32),
2500 (u32)trans->delayed_ref_elem.seq);
2427 BUG(); 2501 BUG();
2428 } 2502 }
2429 2503
@@ -3332,7 +3406,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
3332 * progress (either running or paused) picks the target profile (if it's 3406 * progress (either running or paused) picks the target profile (if it's
3333 * already available), otherwise falls back to plain reducing. 3407 * already available), otherwise falls back to plain reducing.
3334 */ 3408 */
3335u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3409static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3336{ 3410{
3337 /* 3411 /*
3338 * we add in the count of missing devices because we want 3412 * we add in the count of missing devices because we want
@@ -3552,6 +3626,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3552 rcu_read_unlock(); 3626 rcu_read_unlock();
3553} 3627}
3554 3628
3629static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
3630{
3631 return (global->size << 1);
3632}
3633
3555static int should_alloc_chunk(struct btrfs_root *root, 3634static int should_alloc_chunk(struct btrfs_root *root,
3556 struct btrfs_space_info *sinfo, int force) 3635 struct btrfs_space_info *sinfo, int force)
3557{ 3636{
@@ -3569,7 +3648,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
3569 * global_rsv, it doesn't change except when the transaction commits. 3648 * global_rsv, it doesn't change except when the transaction commits.
3570 */ 3649 */
3571 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA) 3650 if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
3572 num_allocated += global_rsv->size; 3651 num_allocated += calc_global_rsv_need_space(global_rsv);
3573 3652
3574 /* 3653 /*
3575 * in limited mode, we want to have some free space up to 3654 * in limited mode, we want to have some free space up to
@@ -3622,8 +3701,8 @@ static void check_system_chunk(struct btrfs_trans_handle *trans,
3622 3701
3623 thresh = get_system_chunk_thresh(root, type); 3702 thresh = get_system_chunk_thresh(root, type);
3624 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) { 3703 if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
3625 printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n", 3704 btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
3626 left, thresh, type); 3705 left, thresh, type);
3627 dump_space_info(info, 0, 0); 3706 dump_space_info(info, 0, 0);
3628 } 3707 }
3629 3708
@@ -3741,7 +3820,7 @@ static int can_overcommit(struct btrfs_root *root,
3741{ 3820{
3742 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 3821 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3743 u64 profile = btrfs_get_alloc_profile(root, 0); 3822 u64 profile = btrfs_get_alloc_profile(root, 0);
3744 u64 rsv_size = 0; 3823 u64 space_size;
3745 u64 avail; 3824 u64 avail;
3746 u64 used; 3825 u64 used;
3747 u64 to_add; 3826 u64 to_add;
@@ -3749,18 +3828,16 @@ static int can_overcommit(struct btrfs_root *root,
3749 used = space_info->bytes_used + space_info->bytes_reserved + 3828 used = space_info->bytes_used + space_info->bytes_reserved +
3750 space_info->bytes_pinned + space_info->bytes_readonly; 3829 space_info->bytes_pinned + space_info->bytes_readonly;
3751 3830
3752 spin_lock(&global_rsv->lock);
3753 rsv_size = global_rsv->size;
3754 spin_unlock(&global_rsv->lock);
3755
3756 /* 3831 /*
3757 * We only want to allow over committing if we have lots of actual space 3832 * We only want to allow over committing if we have lots of actual space
3758 * free, but if we don't have enough space to handle the global reserve 3833 * free, but if we don't have enough space to handle the global reserve
3759 * space then we could end up having a real enospc problem when trying 3834 * space then we could end up having a real enospc problem when trying
3760 * to allocate a chunk or some other such important allocation. 3835 * to allocate a chunk or some other such important allocation.
3761 */ 3836 */
3762 rsv_size <<= 1; 3837 spin_lock(&global_rsv->lock);
3763 if (used + rsv_size >= space_info->total_bytes) 3838 space_size = calc_global_rsv_need_space(global_rsv);
3839 spin_unlock(&global_rsv->lock);
3840 if (used + space_size >= space_info->total_bytes)
3764 return 0; 3841 return 0;
3765 3842
3766 used += space_info->bytes_may_use; 3843 used += space_info->bytes_may_use;
@@ -3803,8 +3880,8 @@ static int can_overcommit(struct btrfs_root *root,
3803 return 0; 3880 return 0;
3804} 3881}
3805 3882
3806void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, 3883static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
3807 unsigned long nr_pages) 3884 unsigned long nr_pages)
3808{ 3885{
3809 struct super_block *sb = root->fs_info->sb; 3886 struct super_block *sb = root->fs_info->sb;
3810 int started; 3887 int started;
@@ -3821,7 +3898,8 @@ void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
3821 * the disk). 3898 * the disk).
3822 */ 3899 */
3823 btrfs_start_delalloc_inodes(root, 0); 3900 btrfs_start_delalloc_inodes(root, 0);
3824 btrfs_wait_ordered_extents(root, 0); 3901 if (!current->journal_info)
3902 btrfs_wait_ordered_extents(root, 0);
3825 } 3903 }
3826} 3904}
3827 3905
@@ -4438,7 +4516,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4438 spin_lock(&sinfo->lock); 4516 spin_lock(&sinfo->lock);
4439 spin_lock(&block_rsv->lock); 4517 spin_lock(&block_rsv->lock);
4440 4518
4441 block_rsv->size = num_bytes; 4519 block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
4442 4520
4443 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 4521 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
4444 sinfo->bytes_reserved + sinfo->bytes_readonly + 4522 sinfo->bytes_reserved + sinfo->bytes_readonly +
@@ -4793,14 +4871,49 @@ out_fail:
4793 * If the inodes csum_bytes is the same as the original 4871 * If the inodes csum_bytes is the same as the original
4794 * csum_bytes then we know we haven't raced with any free()ers 4872 * csum_bytes then we know we haven't raced with any free()ers
4795 * so we can just reduce our inodes csum bytes and carry on. 4873 * so we can just reduce our inodes csum bytes and carry on.
4796 * Otherwise we have to do the normal free thing to account for
4797 * the case that the free side didn't free up its reserve
4798 * because of this outstanding reservation.
4799 */ 4874 */
4800 if (BTRFS_I(inode)->csum_bytes == csum_bytes) 4875 if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
4801 calc_csum_metadata_size(inode, num_bytes, 0); 4876 calc_csum_metadata_size(inode, num_bytes, 0);
4802 else 4877 } else {
4803 to_free = calc_csum_metadata_size(inode, num_bytes, 0); 4878 u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
4879 u64 bytes;
4880
4881 /*
4882 * This is tricky, but first we need to figure out how much we
4883 * free'd from any free-ers that occured during this
4884 * reservation, so we reset ->csum_bytes to the csum_bytes
4885 * before we dropped our lock, and then call the free for the
4886 * number of bytes that were freed while we were trying our
4887 * reservation.
4888 */
4889 bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
4890 BTRFS_I(inode)->csum_bytes = csum_bytes;
4891 to_free = calc_csum_metadata_size(inode, bytes, 0);
4892
4893
4894 /*
4895 * Now we need to see how much we would have freed had we not
4896 * been making this reservation and our ->csum_bytes were not
4897 * artificially inflated.
4898 */
4899 BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
4900 bytes = csum_bytes - orig_csum_bytes;
4901 bytes = calc_csum_metadata_size(inode, bytes, 0);
4902
4903 /*
4904 * Now reset ->csum_bytes to what it should be. If bytes is
4905 * more than to_free then we would have free'd more space had we
4906 * not had an artificially high ->csum_bytes, so we need to free
4907 * the remainder. If bytes is the same or less then we don't
4908 * need to do anything, the other free-ers did the correct
4909 * thing.
4910 */
4911 BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
4912 if (bytes > to_free)
4913 to_free = bytes - to_free;
4914 else
4915 to_free = 0;
4916 }
4804 spin_unlock(&BTRFS_I(inode)->lock); 4917 spin_unlock(&BTRFS_I(inode)->lock);
4805 if (dropped) 4918 if (dropped)
4806 to_free += btrfs_calc_trans_metadata_size(root, dropped); 4919 to_free += btrfs_calc_trans_metadata_size(root, dropped);
@@ -5050,9 +5163,11 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
5050 u64 bytenr, u64 num_bytes) 5163 u64 bytenr, u64 num_bytes)
5051{ 5164{
5052 struct btrfs_block_group_cache *cache; 5165 struct btrfs_block_group_cache *cache;
5166 int ret;
5053 5167
5054 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 5168 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
5055 BUG_ON(!cache); /* Logic error */ 5169 if (!cache)
5170 return -EINVAL;
5056 5171
5057 /* 5172 /*
5058 * pull in the free space cache (if any) so that our pin 5173 * pull in the free space cache (if any) so that our pin
@@ -5065,9 +5180,9 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
5065 pin_down_extent(root, cache, bytenr, num_bytes, 0); 5180 pin_down_extent(root, cache, bytenr, num_bytes, 0);
5066 5181
5067 /* remove us from the free space cache (if we're there at all) */ 5182 /* remove us from the free space cache (if we're there at all) */
5068 btrfs_remove_free_space(cache, bytenr, num_bytes); 5183 ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
5069 btrfs_put_block_group(cache); 5184 btrfs_put_block_group(cache);
5070 return 0; 5185 return ret;
5071} 5186}
5072 5187
5073/** 5188/**
@@ -5272,6 +5387,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5272 int num_to_del = 1; 5387 int num_to_del = 1;
5273 u32 item_size; 5388 u32 item_size;
5274 u64 refs; 5389 u64 refs;
5390 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
5391 SKINNY_METADATA);
5275 5392
5276 path = btrfs_alloc_path(); 5393 path = btrfs_alloc_path();
5277 if (!path) 5394 if (!path)
@@ -5283,6 +5400,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5283 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 5400 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
5284 BUG_ON(!is_data && refs_to_drop != 1); 5401 BUG_ON(!is_data && refs_to_drop != 1);
5285 5402
5403 if (is_data)
5404 skinny_metadata = 0;
5405
5286 ret = lookup_extent_backref(trans, extent_root, path, &iref, 5406 ret = lookup_extent_backref(trans, extent_root, path, &iref,
5287 bytenr, num_bytes, parent, 5407 bytenr, num_bytes, parent,
5288 root_objectid, owner_objectid, 5408 root_objectid, owner_objectid,
@@ -5299,6 +5419,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5299 found_extent = 1; 5419 found_extent = 1;
5300 break; 5420 break;
5301 } 5421 }
5422 if (key.type == BTRFS_METADATA_ITEM_KEY &&
5423 key.offset == owner_objectid) {
5424 found_extent = 1;
5425 break;
5426 }
5302 if (path->slots[0] - extent_slot > 5) 5427 if (path->slots[0] - extent_slot > 5)
5303 break; 5428 break;
5304 extent_slot--; 5429 extent_slot--;
@@ -5324,12 +5449,39 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5324 key.type = BTRFS_EXTENT_ITEM_KEY; 5449 key.type = BTRFS_EXTENT_ITEM_KEY;
5325 key.offset = num_bytes; 5450 key.offset = num_bytes;
5326 5451
5452 if (!is_data && skinny_metadata) {
5453 key.type = BTRFS_METADATA_ITEM_KEY;
5454 key.offset = owner_objectid;
5455 }
5456
5327 ret = btrfs_search_slot(trans, extent_root, 5457 ret = btrfs_search_slot(trans, extent_root,
5328 &key, path, -1, 1); 5458 &key, path, -1, 1);
5459 if (ret > 0 && skinny_metadata && path->slots[0]) {
5460 /*
5461 * Couldn't find our skinny metadata item,
5462 * see if we have ye olde extent item.
5463 */
5464 path->slots[0]--;
5465 btrfs_item_key_to_cpu(path->nodes[0], &key,
5466 path->slots[0]);
5467 if (key.objectid == bytenr &&
5468 key.type == BTRFS_EXTENT_ITEM_KEY &&
5469 key.offset == num_bytes)
5470 ret = 0;
5471 }
5472
5473 if (ret > 0 && skinny_metadata) {
5474 skinny_metadata = false;
5475 key.type = BTRFS_EXTENT_ITEM_KEY;
5476 key.offset = num_bytes;
5477 btrfs_release_path(path);
5478 ret = btrfs_search_slot(trans, extent_root,
5479 &key, path, -1, 1);
5480 }
5481
5329 if (ret) { 5482 if (ret) {
5330 printk(KERN_ERR "umm, got %d back from search" 5483 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5331 ", was looking for %llu\n", ret, 5484 ret, (unsigned long long)bytenr);
5332 (unsigned long long)bytenr);
5333 if (ret > 0) 5485 if (ret > 0)
5334 btrfs_print_leaf(extent_root, 5486 btrfs_print_leaf(extent_root,
5335 path->nodes[0]); 5487 path->nodes[0]);
@@ -5343,13 +5495,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5343 } else if (ret == -ENOENT) { 5495 } else if (ret == -ENOENT) {
5344 btrfs_print_leaf(extent_root, path->nodes[0]); 5496 btrfs_print_leaf(extent_root, path->nodes[0]);
5345 WARN_ON(1); 5497 WARN_ON(1);
5346 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 5498 btrfs_err(info,
5347 "parent %llu root %llu owner %llu offset %llu\n", 5499 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
5348 (unsigned long long)bytenr, 5500 (unsigned long long)bytenr,
5349 (unsigned long long)parent, 5501 (unsigned long long)parent,
5350 (unsigned long long)root_objectid, 5502 (unsigned long long)root_objectid,
5351 (unsigned long long)owner_objectid, 5503 (unsigned long long)owner_objectid,
5352 (unsigned long long)owner_offset); 5504 (unsigned long long)owner_offset);
5353 } else { 5505 } else {
5354 btrfs_abort_transaction(trans, extent_root, ret); 5506 btrfs_abort_transaction(trans, extent_root, ret);
5355 goto out; 5507 goto out;
@@ -5377,9 +5529,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5377 ret = btrfs_search_slot(trans, extent_root, &key, path, 5529 ret = btrfs_search_slot(trans, extent_root, &key, path,
5378 -1, 1); 5530 -1, 1);
5379 if (ret) { 5531 if (ret) {
5380 printk(KERN_ERR "umm, got %d back from search" 5532 btrfs_err(info, "umm, got %d back from search, was looking for %llu",
5381 ", was looking for %llu\n", ret, 5533 ret, (unsigned long long)bytenr);
5382 (unsigned long long)bytenr);
5383 btrfs_print_leaf(extent_root, path->nodes[0]); 5534 btrfs_print_leaf(extent_root, path->nodes[0]);
5384 } 5535 }
5385 if (ret < 0) { 5536 if (ret < 0) {
@@ -5395,7 +5546,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5395 BUG_ON(item_size < sizeof(*ei)); 5546 BUG_ON(item_size < sizeof(*ei));
5396 ei = btrfs_item_ptr(leaf, extent_slot, 5547 ei = btrfs_item_ptr(leaf, extent_slot,
5397 struct btrfs_extent_item); 5548 struct btrfs_extent_item);
5398 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 5549 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
5550 key.type == BTRFS_EXTENT_ITEM_KEY) {
5399 struct btrfs_tree_block_info *bi; 5551 struct btrfs_tree_block_info *bi;
5400 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 5552 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
5401 bi = (struct btrfs_tree_block_info *)(ei + 1); 5553 bi = (struct btrfs_tree_block_info *)(ei + 1);
@@ -5403,7 +5555,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5403 } 5555 }
5404 5556
5405 refs = btrfs_extent_refs(leaf, ei); 5557 refs = btrfs_extent_refs(leaf, ei);
5406 BUG_ON(refs < refs_to_drop); 5558 if (refs < refs_to_drop) {
5559 btrfs_err(info, "trying to drop %d refs but we only have %Lu "
5560 "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
5561 ret = -EINVAL;
5562 btrfs_abort_transaction(trans, extent_root, ret);
5563 goto out;
5564 }
5407 refs -= refs_to_drop; 5565 refs -= refs_to_drop;
5408 5566
5409 if (refs > 0) { 5567 if (refs > 0) {
@@ -5718,7 +5876,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5718 struct btrfs_root *orig_root, 5876 struct btrfs_root *orig_root,
5719 u64 num_bytes, u64 empty_size, 5877 u64 num_bytes, u64 empty_size,
5720 u64 hint_byte, struct btrfs_key *ins, 5878 u64 hint_byte, struct btrfs_key *ins,
5721 u64 data) 5879 u64 flags)
5722{ 5880{
5723 int ret = 0; 5881 int ret = 0;
5724 struct btrfs_root *root = orig_root->fs_info->extent_root; 5882 struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -5729,8 +5887,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5729 int empty_cluster = 2 * 1024 * 1024; 5887 int empty_cluster = 2 * 1024 * 1024;
5730 struct btrfs_space_info *space_info; 5888 struct btrfs_space_info *space_info;
5731 int loop = 0; 5889 int loop = 0;
5732 int index = __get_raid_index(data); 5890 int index = __get_raid_index(flags);
5733 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? 5891 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
5734 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 5892 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
5735 bool found_uncached_bg = false; 5893 bool found_uncached_bg = false;
5736 bool failed_cluster_refill = false; 5894 bool failed_cluster_refill = false;
@@ -5743,11 +5901,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5743 ins->objectid = 0; 5901 ins->objectid = 0;
5744 ins->offset = 0; 5902 ins->offset = 0;
5745 5903
5746 trace_find_free_extent(orig_root, num_bytes, empty_size, data); 5904 trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
5747 5905
5748 space_info = __find_space_info(root->fs_info, data); 5906 space_info = __find_space_info(root->fs_info, flags);
5749 if (!space_info) { 5907 if (!space_info) {
5750 printk(KERN_ERR "No space info for %llu\n", data); 5908 btrfs_err(root->fs_info, "No space info for %llu", flags);
5751 return -ENOSPC; 5909 return -ENOSPC;
5752 } 5910 }
5753 5911
@@ -5758,13 +5916,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5758 if (btrfs_mixed_space_info(space_info)) 5916 if (btrfs_mixed_space_info(space_info))
5759 use_cluster = false; 5917 use_cluster = false;
5760 5918
5761 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { 5919 if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
5762 last_ptr = &root->fs_info->meta_alloc_cluster; 5920 last_ptr = &root->fs_info->meta_alloc_cluster;
5763 if (!btrfs_test_opt(root, SSD)) 5921 if (!btrfs_test_opt(root, SSD))
5764 empty_cluster = 64 * 1024; 5922 empty_cluster = 64 * 1024;
5765 } 5923 }
5766 5924
5767 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && 5925 if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
5768 btrfs_test_opt(root, SSD)) { 5926 btrfs_test_opt(root, SSD)) {
5769 last_ptr = &root->fs_info->data_alloc_cluster; 5927 last_ptr = &root->fs_info->data_alloc_cluster;
5770 } 5928 }
@@ -5793,7 +5951,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5793 * However if we are re-searching with an ideal block group 5951 * However if we are re-searching with an ideal block group
5794 * picked out then we don't care that the block group is cached. 5952 * picked out then we don't care that the block group is cached.
5795 */ 5953 */
5796 if (block_group && block_group_bits(block_group, data) && 5954 if (block_group && block_group_bits(block_group, flags) &&
5797 block_group->cached != BTRFS_CACHE_NO) { 5955 block_group->cached != BTRFS_CACHE_NO) {
5798 down_read(&space_info->groups_sem); 5956 down_read(&space_info->groups_sem);
5799 if (list_empty(&block_group->list) || 5957 if (list_empty(&block_group->list) ||
@@ -5831,7 +5989,7 @@ search:
5831 * raid types, but we want to make sure we only allocate 5989 * raid types, but we want to make sure we only allocate
5832 * for the proper type. 5990 * for the proper type.
5833 */ 5991 */
5834 if (!block_group_bits(block_group, data)) { 5992 if (!block_group_bits(block_group, flags)) {
5835 u64 extra = BTRFS_BLOCK_GROUP_DUP | 5993 u64 extra = BTRFS_BLOCK_GROUP_DUP |
5836 BTRFS_BLOCK_GROUP_RAID1 | 5994 BTRFS_BLOCK_GROUP_RAID1 |
5837 BTRFS_BLOCK_GROUP_RAID5 | 5995 BTRFS_BLOCK_GROUP_RAID5 |
@@ -5843,7 +6001,7 @@ search:
5843 * doesn't provide them, bail. This does allow us to 6001 * doesn't provide them, bail. This does allow us to
5844 * fill raid0 from raid1. 6002 * fill raid0 from raid1.
5845 */ 6003 */
5846 if ((data & extra) && !(block_group->flags & extra)) 6004 if ((flags & extra) && !(block_group->flags & extra))
5847 goto loop; 6005 goto loop;
5848 } 6006 }
5849 6007
@@ -5874,7 +6032,7 @@ have_block_group:
5874 if (used_block_group != block_group && 6032 if (used_block_group != block_group &&
5875 (!used_block_group || 6033 (!used_block_group ||
5876 used_block_group->ro || 6034 used_block_group->ro ||
5877 !block_group_bits(used_block_group, data))) { 6035 !block_group_bits(used_block_group, flags))) {
5878 used_block_group = block_group; 6036 used_block_group = block_group;
5879 goto refill_cluster; 6037 goto refill_cluster;
5880 } 6038 }
@@ -6070,7 +6228,7 @@ loop:
6070 index = 0; 6228 index = 0;
6071 loop++; 6229 loop++;
6072 if (loop == LOOP_ALLOC_CHUNK) { 6230 if (loop == LOOP_ALLOC_CHUNK) {
6073 ret = do_chunk_alloc(trans, root, data, 6231 ret = do_chunk_alloc(trans, root, flags,
6074 CHUNK_ALLOC_FORCE); 6232 CHUNK_ALLOC_FORCE);
6075 /* 6233 /*
6076 * Do not bail out on ENOSPC since we 6234 * Do not bail out on ENOSPC since we
@@ -6148,16 +6306,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
6148 struct btrfs_root *root, 6306 struct btrfs_root *root,
6149 u64 num_bytes, u64 min_alloc_size, 6307 u64 num_bytes, u64 min_alloc_size,
6150 u64 empty_size, u64 hint_byte, 6308 u64 empty_size, u64 hint_byte,
6151 struct btrfs_key *ins, u64 data) 6309 struct btrfs_key *ins, int is_data)
6152{ 6310{
6153 bool final_tried = false; 6311 bool final_tried = false;
6312 u64 flags;
6154 int ret; 6313 int ret;
6155 6314
6156 data = btrfs_get_alloc_profile(root, data); 6315 flags = btrfs_get_alloc_profile(root, is_data);
6157again: 6316again:
6158 WARN_ON(num_bytes < root->sectorsize); 6317 WARN_ON(num_bytes < root->sectorsize);
6159 ret = find_free_extent(trans, root, num_bytes, empty_size, 6318 ret = find_free_extent(trans, root, num_bytes, empty_size,
6160 hint_byte, ins, data); 6319 hint_byte, ins, flags);
6161 6320
6162 if (ret == -ENOSPC) { 6321 if (ret == -ENOSPC) {
6163 if (!final_tried) { 6322 if (!final_tried) {
@@ -6170,10 +6329,10 @@ again:
6170 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) { 6329 } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
6171 struct btrfs_space_info *sinfo; 6330 struct btrfs_space_info *sinfo;
6172 6331
6173 sinfo = __find_space_info(root->fs_info, data); 6332 sinfo = __find_space_info(root->fs_info, flags);
6174 printk(KERN_ERR "btrfs allocation failed flags %llu, " 6333 btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
6175 "wanted %llu\n", (unsigned long long)data, 6334 (unsigned long long)flags,
6176 (unsigned long long)num_bytes); 6335 (unsigned long long)num_bytes);
6177 if (sinfo) 6336 if (sinfo)
6178 dump_space_info(sinfo, num_bytes, 1); 6337 dump_space_info(sinfo, num_bytes, 1);
6179 } 6338 }
@@ -6192,8 +6351,8 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
6192 6351
6193 cache = btrfs_lookup_block_group(root->fs_info, start); 6352 cache = btrfs_lookup_block_group(root->fs_info, start);
6194 if (!cache) { 6353 if (!cache) {
6195 printk(KERN_ERR "Unable to find block group for %llu\n", 6354 btrfs_err(root->fs_info, "Unable to find block group for %llu",
6196 (unsigned long long)start); 6355 (unsigned long long)start);
6197 return -ENOSPC; 6356 return -ENOSPC;
6198 } 6357 }
6199 6358
@@ -6288,9 +6447,9 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6288 6447
6289 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6448 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6290 if (ret) { /* -ENOENT, logic error */ 6449 if (ret) { /* -ENOENT, logic error */
6291 printk(KERN_ERR "btrfs update block group failed for %llu " 6450 btrfs_err(fs_info, "update block group failed for %llu %llu",
6292 "%llu\n", (unsigned long long)ins->objectid, 6451 (unsigned long long)ins->objectid,
6293 (unsigned long long)ins->offset); 6452 (unsigned long long)ins->offset);
6294 BUG(); 6453 BUG();
6295 } 6454 }
6296 return ret; 6455 return ret;
@@ -6309,7 +6468,12 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6309 struct btrfs_extent_inline_ref *iref; 6468 struct btrfs_extent_inline_ref *iref;
6310 struct btrfs_path *path; 6469 struct btrfs_path *path;
6311 struct extent_buffer *leaf; 6470 struct extent_buffer *leaf;
6312 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 6471 u32 size = sizeof(*extent_item) + sizeof(*iref);
6472 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6473 SKINNY_METADATA);
6474
6475 if (!skinny_metadata)
6476 size += sizeof(*block_info);
6313 6477
6314 path = btrfs_alloc_path(); 6478 path = btrfs_alloc_path();
6315 if (!path) 6479 if (!path)
@@ -6330,12 +6494,16 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6330 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 6494 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
6331 btrfs_set_extent_flags(leaf, extent_item, 6495 btrfs_set_extent_flags(leaf, extent_item,
6332 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 6496 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
6333 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
6334 6497
6335 btrfs_set_tree_block_key(leaf, block_info, key); 6498 if (skinny_metadata) {
6336 btrfs_set_tree_block_level(leaf, block_info, level); 6499 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
6500 } else {
6501 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
6502 btrfs_set_tree_block_key(leaf, block_info, key);
6503 btrfs_set_tree_block_level(leaf, block_info, level);
6504 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
6505 }
6337 6506
6338 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
6339 if (parent > 0) { 6507 if (parent > 0) {
6340 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 6508 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
6341 btrfs_set_extent_inline_ref_type(leaf, iref, 6509 btrfs_set_extent_inline_ref_type(leaf, iref,
@@ -6350,11 +6518,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6350 btrfs_mark_buffer_dirty(leaf); 6518 btrfs_mark_buffer_dirty(leaf);
6351 btrfs_free_path(path); 6519 btrfs_free_path(path);
6352 6520
6353 ret = update_block_group(root, ins->objectid, ins->offset, 1); 6521 ret = update_block_group(root, ins->objectid, root->leafsize, 1);
6354 if (ret) { /* -ENOENT, logic error */ 6522 if (ret) { /* -ENOENT, logic error */
6355 printk(KERN_ERR "btrfs update block group failed for %llu " 6523 btrfs_err(fs_info, "update block group failed for %llu %llu",
6356 "%llu\n", (unsigned long long)ins->objectid, 6524 (unsigned long long)ins->objectid,
6357 (unsigned long long)ins->offset); 6525 (unsigned long long)ins->offset);
6358 BUG(); 6526 BUG();
6359 } 6527 }
6360 return ret; 6528 return ret;
@@ -6399,47 +6567,48 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
6399 if (!caching_ctl) { 6567 if (!caching_ctl) {
6400 BUG_ON(!block_group_cache_done(block_group)); 6568 BUG_ON(!block_group_cache_done(block_group));
6401 ret = btrfs_remove_free_space(block_group, start, num_bytes); 6569 ret = btrfs_remove_free_space(block_group, start, num_bytes);
6402 BUG_ON(ret); /* -ENOMEM */ 6570 if (ret)
6571 goto out;
6403 } else { 6572 } else {
6404 mutex_lock(&caching_ctl->mutex); 6573 mutex_lock(&caching_ctl->mutex);
6405 6574
6406 if (start >= caching_ctl->progress) { 6575 if (start >= caching_ctl->progress) {
6407 ret = add_excluded_extent(root, start, num_bytes); 6576 ret = add_excluded_extent(root, start, num_bytes);
6408 BUG_ON(ret); /* -ENOMEM */
6409 } else if (start + num_bytes <= caching_ctl->progress) { 6577 } else if (start + num_bytes <= caching_ctl->progress) {
6410 ret = btrfs_remove_free_space(block_group, 6578 ret = btrfs_remove_free_space(block_group,
6411 start, num_bytes); 6579 start, num_bytes);
6412 BUG_ON(ret); /* -ENOMEM */
6413 } else { 6580 } else {
6414 num_bytes = caching_ctl->progress - start; 6581 num_bytes = caching_ctl->progress - start;
6415 ret = btrfs_remove_free_space(block_group, 6582 ret = btrfs_remove_free_space(block_group,
6416 start, num_bytes); 6583 start, num_bytes);
6417 BUG_ON(ret); /* -ENOMEM */ 6584 if (ret)
6585 goto out_lock;
6418 6586
6419 start = caching_ctl->progress; 6587 start = caching_ctl->progress;
6420 num_bytes = ins->objectid + ins->offset - 6588 num_bytes = ins->objectid + ins->offset -
6421 caching_ctl->progress; 6589 caching_ctl->progress;
6422 ret = add_excluded_extent(root, start, num_bytes); 6590 ret = add_excluded_extent(root, start, num_bytes);
6423 BUG_ON(ret); /* -ENOMEM */
6424 } 6591 }
6425 6592out_lock:
6426 mutex_unlock(&caching_ctl->mutex); 6593 mutex_unlock(&caching_ctl->mutex);
6427 put_caching_control(caching_ctl); 6594 put_caching_control(caching_ctl);
6595 if (ret)
6596 goto out;
6428 } 6597 }
6429 6598
6430 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 6599 ret = btrfs_update_reserved_bytes(block_group, ins->offset,
6431 RESERVE_ALLOC_NO_ACCOUNT); 6600 RESERVE_ALLOC_NO_ACCOUNT);
6432 BUG_ON(ret); /* logic error */ 6601 BUG_ON(ret); /* logic error */
6433 btrfs_put_block_group(block_group);
6434 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 6602 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
6435 0, owner, offset, ins, 1); 6603 0, owner, offset, ins, 1);
6604out:
6605 btrfs_put_block_group(block_group);
6436 return ret; 6606 return ret;
6437} 6607}
6438 6608
6439struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 6609static struct extent_buffer *
6440 struct btrfs_root *root, 6610btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
6441 u64 bytenr, u32 blocksize, 6611 u64 bytenr, u32 blocksize, int level)
6442 int level)
6443{ 6612{
6444 struct extent_buffer *buf; 6613 struct extent_buffer *buf;
6445 6614
@@ -6554,7 +6723,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6554 struct extent_buffer *buf; 6723 struct extent_buffer *buf;
6555 u64 flags = 0; 6724 u64 flags = 0;
6556 int ret; 6725 int ret;
6557 6726 bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
6727 SKINNY_METADATA);
6558 6728
6559 block_rsv = use_block_rsv(trans, root, blocksize); 6729 block_rsv = use_block_rsv(trans, root, blocksize);
6560 if (IS_ERR(block_rsv)) 6730 if (IS_ERR(block_rsv))
@@ -6587,7 +6757,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6587 else 6757 else
6588 memset(&extent_op->key, 0, sizeof(extent_op->key)); 6758 memset(&extent_op->key, 0, sizeof(extent_op->key));
6589 extent_op->flags_to_set = flags; 6759 extent_op->flags_to_set = flags;
6590 extent_op->update_key = 1; 6760 if (skinny_metadata)
6761 extent_op->update_key = 0;
6762 else
6763 extent_op->update_key = 1;
6591 extent_op->update_flags = 1; 6764 extent_op->update_flags = 1;
6592 extent_op->is_data = 0; 6765 extent_op->is_data = 0;
6593 6766
@@ -6664,8 +6837,9 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
6664 continue; 6837 continue;
6665 6838
6666 /* We don't lock the tree block, it's OK to be racy here */ 6839 /* We don't lock the tree block, it's OK to be racy here */
6667 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 6840 ret = btrfs_lookup_extent_info(trans, root, bytenr,
6668 &refs, &flags); 6841 wc->level - 1, 1, &refs,
6842 &flags);
6669 /* We don't care about errors in readahead. */ 6843 /* We don't care about errors in readahead. */
6670 if (ret < 0) 6844 if (ret < 0)
6671 continue; 6845 continue;
@@ -6732,7 +6906,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
6732 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 6906 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
6733 BUG_ON(!path->locks[level]); 6907 BUG_ON(!path->locks[level]);
6734 ret = btrfs_lookup_extent_info(trans, root, 6908 ret = btrfs_lookup_extent_info(trans, root,
6735 eb->start, eb->len, 6909 eb->start, level, 1,
6736 &wc->refs[level], 6910 &wc->refs[level],
6737 &wc->flags[level]); 6911 &wc->flags[level]);
6738 BUG_ON(ret == -ENOMEM); 6912 BUG_ON(ret == -ENOMEM);
@@ -6830,7 +7004,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6830 btrfs_tree_lock(next); 7004 btrfs_tree_lock(next);
6831 btrfs_set_lock_blocking(next); 7005 btrfs_set_lock_blocking(next);
6832 7006
6833 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 7007 ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
6834 &wc->refs[level - 1], 7008 &wc->refs[level - 1],
6835 &wc->flags[level - 1]); 7009 &wc->flags[level - 1]);
6836 if (ret < 0) { 7010 if (ret < 0) {
@@ -6838,7 +7012,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6838 return ret; 7012 return ret;
6839 } 7013 }
6840 7014
6841 BUG_ON(wc->refs[level - 1] == 0); 7015 if (unlikely(wc->refs[level - 1] == 0)) {
7016 btrfs_err(root->fs_info, "Missing references.");
7017 BUG();
7018 }
6842 *lookup_info = 0; 7019 *lookup_info = 0;
6843 7020
6844 if (wc->stage == DROP_REFERENCE) { 7021 if (wc->stage == DROP_REFERENCE) {
@@ -6877,8 +7054,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6877 if (reada && level == 1) 7054 if (reada && level == 1)
6878 reada_walk_down(trans, root, wc, path); 7055 reada_walk_down(trans, root, wc, path);
6879 next = read_tree_block(root, bytenr, blocksize, generation); 7056 next = read_tree_block(root, bytenr, blocksize, generation);
6880 if (!next) 7057 if (!next || !extent_buffer_uptodate(next)) {
7058 free_extent_buffer(next);
6881 return -EIO; 7059 return -EIO;
7060 }
6882 btrfs_tree_lock(next); 7061 btrfs_tree_lock(next);
6883 btrfs_set_lock_blocking(next); 7062 btrfs_set_lock_blocking(next);
6884 } 7063 }
@@ -6961,7 +7140,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6961 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 7140 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
6962 7141
6963 ret = btrfs_lookup_extent_info(trans, root, 7142 ret = btrfs_lookup_extent_info(trans, root,
6964 eb->start, eb->len, 7143 eb->start, level, 1,
6965 &wc->refs[level], 7144 &wc->refs[level],
6966 &wc->flags[level]); 7145 &wc->flags[level]);
6967 if (ret < 0) { 7146 if (ret < 0) {
@@ -7097,6 +7276,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
7097 * reference count by one. if update_ref is true, this function 7276 * reference count by one. if update_ref is true, this function
7098 * also make sure backrefs for the shared block and all lower level 7277 * also make sure backrefs for the shared block and all lower level
7099 * blocks are properly updated. 7278 * blocks are properly updated.
7279 *
7280 * If called with for_reloc == 0, may exit early with -EAGAIN
7100 */ 7281 */
7101int btrfs_drop_snapshot(struct btrfs_root *root, 7282int btrfs_drop_snapshot(struct btrfs_root *root,
7102 struct btrfs_block_rsv *block_rsv, int update_ref, 7283 struct btrfs_block_rsv *block_rsv, int update_ref,
@@ -7171,8 +7352,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7171 7352
7172 ret = btrfs_lookup_extent_info(trans, root, 7353 ret = btrfs_lookup_extent_info(trans, root,
7173 path->nodes[level]->start, 7354 path->nodes[level]->start,
7174 path->nodes[level]->len, 7355 level, 1, &wc->refs[level],
7175 &wc->refs[level],
7176 &wc->flags[level]); 7356 &wc->flags[level]);
7177 if (ret < 0) { 7357 if (ret < 0) {
7178 err = ret; 7358 err = ret;
@@ -7198,6 +7378,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7198 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 7378 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
7199 7379
7200 while (1) { 7380 while (1) {
7381 if (!for_reloc && btrfs_fs_closing(root->fs_info)) {
7382 pr_debug("btrfs: drop snapshot early exit\n");
7383 err = -EAGAIN;
7384 goto out_end_trans;
7385 }
7386
7201 ret = walk_down_tree(trans, root, path, wc); 7387 ret = walk_down_tree(trans, root, path, wc);
7202 if (ret < 0) { 7388 if (ret < 0) {
7203 err = ret; 7389 err = ret;
@@ -7947,7 +8133,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7947 * info has super bytes accounted for, otherwise we'll think 8133 * info has super bytes accounted for, otherwise we'll think
7948 * we have more space than we actually do. 8134 * we have more space than we actually do.
7949 */ 8135 */
7950 exclude_super_stripes(root, cache); 8136 ret = exclude_super_stripes(root, cache);
8137 if (ret) {
8138 /*
8139 * We may have excluded something, so call this just in
8140 * case.
8141 */
8142 free_excluded_extents(root, cache);
8143 kfree(cache->free_space_ctl);
8144 kfree(cache);
8145 goto error;
8146 }
7951 8147
7952 /* 8148 /*
7953 * check for two cases, either we are full, and therefore 8149 * check for two cases, either we are full, and therefore
@@ -7970,10 +8166,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7970 free_excluded_extents(root, cache); 8166 free_excluded_extents(root, cache);
7971 } 8167 }
7972 8168
8169 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8170 if (ret) {
8171 btrfs_remove_free_space_cache(cache);
8172 btrfs_put_block_group(cache);
8173 goto error;
8174 }
8175
7973 ret = update_space_info(info, cache->flags, found_key.offset, 8176 ret = update_space_info(info, cache->flags, found_key.offset,
7974 btrfs_block_group_used(&cache->item), 8177 btrfs_block_group_used(&cache->item),
7975 &space_info); 8178 &space_info);
7976 BUG_ON(ret); /* -ENOMEM */ 8179 if (ret) {
8180 btrfs_remove_free_space_cache(cache);
8181 spin_lock(&info->block_group_cache_lock);
8182 rb_erase(&cache->cache_node,
8183 &info->block_group_cache_tree);
8184 spin_unlock(&info->block_group_cache_lock);
8185 btrfs_put_block_group(cache);
8186 goto error;
8187 }
8188
7977 cache->space_info = space_info; 8189 cache->space_info = space_info;
7978 spin_lock(&cache->space_info->lock); 8190 spin_lock(&cache->space_info->lock);
7979 cache->space_info->bytes_readonly += cache->bytes_super; 8191 cache->space_info->bytes_readonly += cache->bytes_super;
@@ -7981,9 +8193,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7981 8193
7982 __link_block_group(space_info, cache); 8194 __link_block_group(space_info, cache);
7983 8195
7984 ret = btrfs_add_block_group_cache(root->fs_info, cache);
7985 BUG_ON(ret); /* Logic error */
7986
7987 set_avail_alloc_bits(root->fs_info, cache->flags); 8196 set_avail_alloc_bits(root->fs_info, cache->flags);
7988 if (btrfs_chunk_readonly(root, cache->key.objectid)) 8197 if (btrfs_chunk_readonly(root, cache->key.objectid))
7989 set_block_group_ro(cache, 1); 8198 set_block_group_ro(cache, 1);
@@ -8089,16 +8298,41 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8089 8298
8090 cache->last_byte_to_unpin = (u64)-1; 8299 cache->last_byte_to_unpin = (u64)-1;
8091 cache->cached = BTRFS_CACHE_FINISHED; 8300 cache->cached = BTRFS_CACHE_FINISHED;
8092 exclude_super_stripes(root, cache); 8301 ret = exclude_super_stripes(root, cache);
8302 if (ret) {
8303 /*
8304 * We may have excluded something, so call this just in
8305 * case.
8306 */
8307 free_excluded_extents(root, cache);
8308 kfree(cache->free_space_ctl);
8309 kfree(cache);
8310 return ret;
8311 }
8093 8312
8094 add_new_free_space(cache, root->fs_info, chunk_offset, 8313 add_new_free_space(cache, root->fs_info, chunk_offset,
8095 chunk_offset + size); 8314 chunk_offset + size);
8096 8315
8097 free_excluded_extents(root, cache); 8316 free_excluded_extents(root, cache);
8098 8317
8318 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8319 if (ret) {
8320 btrfs_remove_free_space_cache(cache);
8321 btrfs_put_block_group(cache);
8322 return ret;
8323 }
8324
8099 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 8325 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
8100 &cache->space_info); 8326 &cache->space_info);
8101 BUG_ON(ret); /* -ENOMEM */ 8327 if (ret) {
8328 btrfs_remove_free_space_cache(cache);
8329 spin_lock(&root->fs_info->block_group_cache_lock);
8330 rb_erase(&cache->cache_node,
8331 &root->fs_info->block_group_cache_tree);
8332 spin_unlock(&root->fs_info->block_group_cache_lock);
8333 btrfs_put_block_group(cache);
8334 return ret;
8335 }
8102 update_global_block_rsv(root->fs_info); 8336 update_global_block_rsv(root->fs_info);
8103 8337
8104 spin_lock(&cache->space_info->lock); 8338 spin_lock(&cache->space_info->lock);
@@ -8107,9 +8341,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8107 8341
8108 __link_block_group(cache->space_info, cache); 8342 __link_block_group(cache->space_info, cache);
8109 8343
8110 ret = btrfs_add_block_group_cache(root->fs_info, cache);
8111 BUG_ON(ret); /* Logic error */
8112
8113 list_add_tail(&cache->new_bg_list, &trans->new_bgs); 8344 list_add_tail(&cache->new_bg_list, &trans->new_bgs);
8114 8345
8115 set_avail_alloc_bits(extent_root->fs_info, type); 8346 set_avail_alloc_bits(extent_root->fs_info, type);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..32d67a822e93 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -24,12 +24,62 @@
24static struct kmem_cache *extent_state_cache; 24static struct kmem_cache *extent_state_cache;
25static struct kmem_cache *extent_buffer_cache; 25static struct kmem_cache *extent_buffer_cache;
26 26
27#ifdef CONFIG_BTRFS_DEBUG
27static LIST_HEAD(buffers); 28static LIST_HEAD(buffers);
28static LIST_HEAD(states); 29static LIST_HEAD(states);
29 30
30#define LEAK_DEBUG 0
31#if LEAK_DEBUG
32static DEFINE_SPINLOCK(leak_lock); 31static DEFINE_SPINLOCK(leak_lock);
32
33static inline
34void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
35{
36 unsigned long flags;
37
38 spin_lock_irqsave(&leak_lock, flags);
39 list_add(new, head);
40 spin_unlock_irqrestore(&leak_lock, flags);
41}
42
43static inline
44void btrfs_leak_debug_del(struct list_head *entry)
45{
46 unsigned long flags;
47
48 spin_lock_irqsave(&leak_lock, flags);
49 list_del(entry);
50 spin_unlock_irqrestore(&leak_lock, flags);
51}
52
53static inline
54void btrfs_leak_debug_check(void)
55{
56 struct extent_state *state;
57 struct extent_buffer *eb;
58
59 while (!list_empty(&states)) {
60 state = list_entry(states.next, struct extent_state, leak_list);
61 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
62 "state %lu in tree %p refs %d\n",
63 (unsigned long long)state->start,
64 (unsigned long long)state->end,
65 state->state, state->tree, atomic_read(&state->refs));
66 list_del(&state->leak_list);
67 kmem_cache_free(extent_state_cache, state);
68 }
69
70 while (!list_empty(&buffers)) {
71 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
72 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
73 "refs %d\n", (unsigned long long)eb->start,
74 eb->len, atomic_read(&eb->refs));
75 list_del(&eb->leak_list);
76 kmem_cache_free(extent_buffer_cache, eb);
77 }
78}
79#else
80#define btrfs_leak_debug_add(new, head) do {} while (0)
81#define btrfs_leak_debug_del(entry) do {} while (0)
82#define btrfs_leak_debug_check() do {} while (0)
33#endif 83#endif
34 84
35#define BUFFER_LRU_MAX 64 85#define BUFFER_LRU_MAX 64
@@ -84,29 +134,7 @@ free_state_cache:
84 134
85void extent_io_exit(void) 135void extent_io_exit(void)
86{ 136{
87 struct extent_state *state; 137 btrfs_leak_debug_check();
88 struct extent_buffer *eb;
89
90 while (!list_empty(&states)) {
91 state = list_entry(states.next, struct extent_state, leak_list);
92 printk(KERN_ERR "btrfs state leak: start %llu end %llu "
93 "state %lu in tree %p refs %d\n",
94 (unsigned long long)state->start,
95 (unsigned long long)state->end,
96 state->state, state->tree, atomic_read(&state->refs));
97 list_del(&state->leak_list);
98 kmem_cache_free(extent_state_cache, state);
99
100 }
101
102 while (!list_empty(&buffers)) {
103 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
104 printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
105 "refs %d\n", (unsigned long long)eb->start,
106 eb->len, atomic_read(&eb->refs));
107 list_del(&eb->leak_list);
108 kmem_cache_free(extent_buffer_cache, eb);
109 }
110 138
111 /* 139 /*
112 * Make sure all delayed rcu free are flushed before we 140 * Make sure all delayed rcu free are flushed before we
@@ -134,9 +162,6 @@ void extent_io_tree_init(struct extent_io_tree *tree,
134static struct extent_state *alloc_extent_state(gfp_t mask) 162static struct extent_state *alloc_extent_state(gfp_t mask)
135{ 163{
136 struct extent_state *state; 164 struct extent_state *state;
137#if LEAK_DEBUG
138 unsigned long flags;
139#endif
140 165
141 state = kmem_cache_alloc(extent_state_cache, mask); 166 state = kmem_cache_alloc(extent_state_cache, mask);
142 if (!state) 167 if (!state)
@@ -144,11 +169,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
144 state->state = 0; 169 state->state = 0;
145 state->private = 0; 170 state->private = 0;
146 state->tree = NULL; 171 state->tree = NULL;
147#if LEAK_DEBUG 172 btrfs_leak_debug_add(&state->leak_list, &states);
148 spin_lock_irqsave(&leak_lock, flags);
149 list_add(&state->leak_list, &states);
150 spin_unlock_irqrestore(&leak_lock, flags);
151#endif
152 atomic_set(&state->refs, 1); 173 atomic_set(&state->refs, 1);
153 init_waitqueue_head(&state->wq); 174 init_waitqueue_head(&state->wq);
154 trace_alloc_extent_state(state, mask, _RET_IP_); 175 trace_alloc_extent_state(state, mask, _RET_IP_);
@@ -160,15 +181,8 @@ void free_extent_state(struct extent_state *state)
160 if (!state) 181 if (!state)
161 return; 182 return;
162 if (atomic_dec_and_test(&state->refs)) { 183 if (atomic_dec_and_test(&state->refs)) {
163#if LEAK_DEBUG
164 unsigned long flags;
165#endif
166 WARN_ON(state->tree); 184 WARN_ON(state->tree);
167#if LEAK_DEBUG 185 btrfs_leak_debug_del(&state->leak_list);
168 spin_lock_irqsave(&leak_lock, flags);
169 list_del(&state->leak_list);
170 spin_unlock_irqrestore(&leak_lock, flags);
171#endif
172 trace_free_extent_state(state, _RET_IP_); 186 trace_free_extent_state(state, _RET_IP_);
173 kmem_cache_free(extent_state_cache, state); 187 kmem_cache_free(extent_state_cache, state);
174 } 188 }
@@ -308,21 +322,21 @@ static void merge_state(struct extent_io_tree *tree,
308} 322}
309 323
310static void set_state_cb(struct extent_io_tree *tree, 324static void set_state_cb(struct extent_io_tree *tree,
311 struct extent_state *state, int *bits) 325 struct extent_state *state, unsigned long *bits)
312{ 326{
313 if (tree->ops && tree->ops->set_bit_hook) 327 if (tree->ops && tree->ops->set_bit_hook)
314 tree->ops->set_bit_hook(tree->mapping->host, state, bits); 328 tree->ops->set_bit_hook(tree->mapping->host, state, bits);
315} 329}
316 330
317static void clear_state_cb(struct extent_io_tree *tree, 331static void clear_state_cb(struct extent_io_tree *tree,
318 struct extent_state *state, int *bits) 332 struct extent_state *state, unsigned long *bits)
319{ 333{
320 if (tree->ops && tree->ops->clear_bit_hook) 334 if (tree->ops && tree->ops->clear_bit_hook)
321 tree->ops->clear_bit_hook(tree->mapping->host, state, bits); 335 tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
322} 336}
323 337
324static void set_state_bits(struct extent_io_tree *tree, 338static void set_state_bits(struct extent_io_tree *tree,
325 struct extent_state *state, int *bits); 339 struct extent_state *state, unsigned long *bits);
326 340
327/* 341/*
328 * insert an extent_state struct into the tree. 'bits' are set on the 342 * insert an extent_state struct into the tree. 'bits' are set on the
@@ -336,7 +350,7 @@ static void set_state_bits(struct extent_io_tree *tree,
336 */ 350 */
337static int insert_state(struct extent_io_tree *tree, 351static int insert_state(struct extent_io_tree *tree,
338 struct extent_state *state, u64 start, u64 end, 352 struct extent_state *state, u64 start, u64 end,
339 int *bits) 353 unsigned long *bits)
340{ 354{
341 struct rb_node *node; 355 struct rb_node *node;
342 356
@@ -424,10 +438,10 @@ static struct extent_state *next_state(struct extent_state *state)
424 */ 438 */
425static struct extent_state *clear_state_bit(struct extent_io_tree *tree, 439static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
426 struct extent_state *state, 440 struct extent_state *state,
427 int *bits, int wake) 441 unsigned long *bits, int wake)
428{ 442{
429 struct extent_state *next; 443 struct extent_state *next;
430 int bits_to_clear = *bits & ~EXTENT_CTLBITS; 444 unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS;
431 445
432 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 446 if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
433 u64 range = state->end - state->start + 1; 447 u64 range = state->end - state->start + 1;
@@ -463,7 +477,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
463 return prealloc; 477 return prealloc;
464} 478}
465 479
466void extent_io_tree_panic(struct extent_io_tree *tree, int err) 480static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
467{ 481{
468 btrfs_panic(tree_fs_info(tree), err, "Locking error: " 482 btrfs_panic(tree_fs_info(tree), err, "Locking error: "
469 "Extent tree was modified by another " 483 "Extent tree was modified by another "
@@ -483,7 +497,7 @@ void extent_io_tree_panic(struct extent_io_tree *tree, int err)
483 * This takes the tree lock, and returns 0 on success and < 0 on error. 497 * This takes the tree lock, and returns 0 on success and < 0 on error.
484 */ 498 */
485int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 499int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
486 int bits, int wake, int delete, 500 unsigned long bits, int wake, int delete,
487 struct extent_state **cached_state, 501 struct extent_state **cached_state,
488 gfp_t mask) 502 gfp_t mask)
489{ 503{
@@ -644,7 +658,8 @@ static void wait_on_state(struct extent_io_tree *tree,
644 * The range [start, end] is inclusive. 658 * The range [start, end] is inclusive.
645 * The tree lock is taken by this function 659 * The tree lock is taken by this function
646 */ 660 */
647void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) 661static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
662 unsigned long bits)
648{ 663{
649 struct extent_state *state; 664 struct extent_state *state;
650 struct rb_node *node; 665 struct rb_node *node;
@@ -685,9 +700,9 @@ out:
685 700
686static void set_state_bits(struct extent_io_tree *tree, 701static void set_state_bits(struct extent_io_tree *tree,
687 struct extent_state *state, 702 struct extent_state *state,
688 int *bits) 703 unsigned long *bits)
689{ 704{
690 int bits_to_set = *bits & ~EXTENT_CTLBITS; 705 unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS;
691 706
692 set_state_cb(tree, state, bits); 707 set_state_cb(tree, state, bits);
693 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 708 if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@@ -730,8 +745,9 @@ static void uncache_state(struct extent_state **cached_ptr)
730 745
731static int __must_check 746static int __must_check
732__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 747__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
733 int bits, int exclusive_bits, u64 *failed_start, 748 unsigned long bits, unsigned long exclusive_bits,
734 struct extent_state **cached_state, gfp_t mask) 749 u64 *failed_start, struct extent_state **cached_state,
750 gfp_t mask)
735{ 751{
736 struct extent_state *state; 752 struct extent_state *state;
737 struct extent_state *prealloc = NULL; 753 struct extent_state *prealloc = NULL;
@@ -923,9 +939,9 @@ search_again:
923 goto again; 939 goto again;
924} 940}
925 941
926int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, 942int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
927 u64 *failed_start, struct extent_state **cached_state, 943 unsigned long bits, u64 * failed_start,
928 gfp_t mask) 944 struct extent_state **cached_state, gfp_t mask)
929{ 945{
930 return __set_extent_bit(tree, start, end, bits, 0, failed_start, 946 return __set_extent_bit(tree, start, end, bits, 0, failed_start,
931 cached_state, mask); 947 cached_state, mask);
@@ -950,7 +966,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
950 * boundary bits like LOCK. 966 * boundary bits like LOCK.
951 */ 967 */
952int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 968int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
953 int bits, int clear_bits, 969 unsigned long bits, unsigned long clear_bits,
954 struct extent_state **cached_state, gfp_t mask) 970 struct extent_state **cached_state, gfp_t mask)
955{ 971{
956 struct extent_state *state; 972 struct extent_state *state;
@@ -1143,14 +1159,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
1143} 1159}
1144 1160
1145int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1161int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1146 int bits, gfp_t mask) 1162 unsigned long bits, gfp_t mask)
1147{ 1163{
1148 return set_extent_bit(tree, start, end, bits, NULL, 1164 return set_extent_bit(tree, start, end, bits, NULL,
1149 NULL, mask); 1165 NULL, mask);
1150} 1166}
1151 1167
1152int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1168int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1153 int bits, gfp_t mask) 1169 unsigned long bits, gfp_t mask)
1154{ 1170{
1155 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); 1171 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
1156} 1172}
@@ -1189,7 +1205,7 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
1189int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 1205int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1190 struct extent_state **cached_state, gfp_t mask) 1206 struct extent_state **cached_state, gfp_t mask)
1191{ 1207{
1192 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 1208 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
1193 cached_state, mask); 1209 cached_state, mask);
1194} 1210}
1195 1211
@@ -1205,7 +1221,7 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
1205 * us if waiting is desired. 1221 * us if waiting is desired.
1206 */ 1222 */
1207int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 1223int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
1208 int bits, struct extent_state **cached_state) 1224 unsigned long bits, struct extent_state **cached_state)
1209{ 1225{
1210 int err; 1226 int err;
1211 u64 failed_start; 1227 u64 failed_start;
@@ -1257,6 +1273,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
1257 GFP_NOFS); 1273 GFP_NOFS);
1258} 1274}
1259 1275
1276int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
1277{
1278 unsigned long index = start >> PAGE_CACHE_SHIFT;
1279 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1280 struct page *page;
1281
1282 while (index <= end_index) {
1283 page = find_get_page(inode->i_mapping, index);
1284 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1285 clear_page_dirty_for_io(page);
1286 page_cache_release(page);
1287 index++;
1288 }
1289 return 0;
1290}
1291
1292int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
1293{
1294 unsigned long index = start >> PAGE_CACHE_SHIFT;
1295 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1296 struct page *page;
1297
1298 while (index <= end_index) {
1299 page = find_get_page(inode->i_mapping, index);
1300 BUG_ON(!page); /* Pages should be in the extent_io_tree */
1301 account_page_redirty(page);
1302 __set_page_dirty_nobuffers(page);
1303 page_cache_release(page);
1304 index++;
1305 }
1306 return 0;
1307}
1308
1260/* 1309/*
1261 * helper function to set both pages and extents in the tree writeback 1310 * helper function to set both pages and extents in the tree writeback
1262 */ 1311 */
@@ -1280,8 +1329,9 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
1280 * return it. tree->lock must be held. NULL will returned if 1329 * return it. tree->lock must be held. NULL will returned if
1281 * nothing was found after 'start' 1330 * nothing was found after 'start'
1282 */ 1331 */
1283struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 1332static struct extent_state *
1284 u64 start, int bits) 1333find_first_extent_bit_state(struct extent_io_tree *tree,
1334 u64 start, unsigned long bits)
1285{ 1335{
1286 struct rb_node *node; 1336 struct rb_node *node;
1287 struct extent_state *state; 1337 struct extent_state *state;
@@ -1315,7 +1365,7 @@ out:
1315 * If nothing was found, 1 is returned. If found something, return 0. 1365 * If nothing was found, 1 is returned. If found something, return 0.
1316 */ 1366 */
1317int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 1367int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
1318 u64 *start_ret, u64 *end_ret, int bits, 1368 u64 *start_ret, u64 *end_ret, unsigned long bits,
1319 struct extent_state **cached_state) 1369 struct extent_state **cached_state)
1320{ 1370{
1321 struct extent_state *state; 1371 struct extent_state *state;
@@ -1605,7 +1655,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1605 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1655 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1606 unsigned long nr_pages = end_index - index + 1; 1656 unsigned long nr_pages = end_index - index + 1;
1607 int i; 1657 int i;
1608 int clear_bits = 0; 1658 unsigned long clear_bits = 0;
1609 1659
1610 if (op & EXTENT_CLEAR_UNLOCK) 1660 if (op & EXTENT_CLEAR_UNLOCK)
1611 clear_bits |= EXTENT_LOCKED; 1661 clear_bits |= EXTENT_LOCKED;
@@ -1744,6 +1794,64 @@ out:
1744 return ret; 1794 return ret;
1745} 1795}
1746 1796
1797void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
1798 int count)
1799{
1800 struct rb_node *node;
1801 struct extent_state *state;
1802
1803 spin_lock(&tree->lock);
1804 /*
1805 * this search will find all the extents that end after
1806 * our range starts.
1807 */
1808 node = tree_search(tree, start);
1809 BUG_ON(!node);
1810
1811 state = rb_entry(node, struct extent_state, rb_node);
1812 BUG_ON(state->start != start);
1813
1814 while (count) {
1815 state->private = *csums++;
1816 count--;
1817 state = next_state(state);
1818 }
1819 spin_unlock(&tree->lock);
1820}
1821
1822static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
1823{
1824 struct bio_vec *bvec = bio->bi_io_vec + bio_index;
1825
1826 return page_offset(bvec->bv_page) + bvec->bv_offset;
1827}
1828
1829void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
1830 u32 csums[], int count)
1831{
1832 struct rb_node *node;
1833 struct extent_state *state = NULL;
1834 u64 start;
1835
1836 spin_lock(&tree->lock);
1837 do {
1838 start = __btrfs_get_bio_offset(bio, bio_index);
1839 if (state == NULL || state->start != start) {
1840 node = tree_search(tree, start);
1841 BUG_ON(!node);
1842
1843 state = rb_entry(node, struct extent_state, rb_node);
1844 BUG_ON(state->start != start);
1845 }
1846 state->private = *csums++;
1847 count--;
1848 bio_index++;
1849
1850 state = next_state(state);
1851 } while (count);
1852 spin_unlock(&tree->lock);
1853}
1854
1747int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1855int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1748{ 1856{
1749 struct rb_node *node; 1857 struct rb_node *node;
@@ -1778,7 +1886,7 @@ out:
1778 * range is found set. 1886 * range is found set.
1779 */ 1887 */
1780int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1888int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1781 int bits, int filled, struct extent_state *cached) 1889 unsigned long bits, int filled, struct extent_state *cached)
1782{ 1890{
1783 struct extent_state *state = NULL; 1891 struct extent_state *state = NULL;
1784 struct rb_node *node; 1892 struct rb_node *node;
@@ -2527,8 +2635,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2527 if (old_compressed) 2635 if (old_compressed)
2528 contig = bio->bi_sector == sector; 2636 contig = bio->bi_sector == sector;
2529 else 2637 else
2530 contig = bio->bi_sector + (bio->bi_size >> 9) == 2638 contig = bio_end_sector(bio) == sector;
2531 sector;
2532 2639
2533 if (prev_bio_flags != bio_flags || !contig || 2640 if (prev_bio_flags != bio_flags || !contig ||
2534 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || 2641 merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
@@ -2563,7 +2670,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2563 return ret; 2670 return ret;
2564} 2671}
2565 2672
2566void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page) 2673static void attach_extent_buffer_page(struct extent_buffer *eb,
2674 struct page *page)
2567{ 2675{
2568 if (!PagePrivate(page)) { 2676 if (!PagePrivate(page)) {
2569 SetPagePrivate(page); 2677 SetPagePrivate(page);
@@ -2593,7 +2701,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2593 struct page *page, 2701 struct page *page,
2594 get_extent_t *get_extent, 2702 get_extent_t *get_extent,
2595 struct bio **bio, int mirror_num, 2703 struct bio **bio, int mirror_num,
2596 unsigned long *bio_flags) 2704 unsigned long *bio_flags, int rw)
2597{ 2705{
2598 struct inode *inode = page->mapping->host; 2706 struct inode *inode = page->mapping->host;
2599 u64 start = page_offset(page); 2707 u64 start = page_offset(page);
@@ -2739,7 +2847,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2739 } 2847 }
2740 2848
2741 pnr -= page->index; 2849 pnr -= page->index;
2742 ret = submit_extent_page(READ, tree, page, 2850 ret = submit_extent_page(rw, tree, page,
2743 sector, disk_io_size, pg_offset, 2851 sector, disk_io_size, pg_offset,
2744 bdev, bio, pnr, 2852 bdev, bio, pnr,
2745 end_bio_extent_readpage, mirror_num, 2853 end_bio_extent_readpage, mirror_num,
@@ -2772,7 +2880,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2772 int ret; 2880 int ret;
2773 2881
2774 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num, 2882 ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
2775 &bio_flags); 2883 &bio_flags, READ);
2776 if (bio) 2884 if (bio)
2777 ret = submit_one_bio(READ, bio, mirror_num, bio_flags); 2885 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
2778 return ret; 2886 return ret;
@@ -3071,7 +3179,7 @@ static int eb_wait(void *word)
3071 return 0; 3179 return 0;
3072} 3180}
3073 3181
3074static void wait_on_extent_buffer_writeback(struct extent_buffer *eb) 3182void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
3075{ 3183{
3076 wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait, 3184 wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
3077 TASK_UNINTERRUPTIBLE); 3185 TASK_UNINTERRUPTIBLE);
@@ -3196,7 +3304,7 @@ static int write_one_eb(struct extent_buffer *eb,
3196 u64 offset = eb->start; 3304 u64 offset = eb->start;
3197 unsigned long i, num_pages; 3305 unsigned long i, num_pages;
3198 unsigned long bio_flags = 0; 3306 unsigned long bio_flags = 0;
3199 int rw = (epd->sync_io ? WRITE_SYNC : WRITE); 3307 int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
3200 int ret = 0; 3308 int ret = 0;
3201 3309
3202 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 3310 clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
@@ -3633,14 +3741,14 @@ int extent_readpages(struct extent_io_tree *tree,
3633 continue; 3741 continue;
3634 for (i = 0; i < nr; i++) { 3742 for (i = 0; i < nr; i++) {
3635 __extent_read_full_page(tree, pagepool[i], get_extent, 3743 __extent_read_full_page(tree, pagepool[i], get_extent,
3636 &bio, 0, &bio_flags); 3744 &bio, 0, &bio_flags, READ);
3637 page_cache_release(pagepool[i]); 3745 page_cache_release(pagepool[i]);
3638 } 3746 }
3639 nr = 0; 3747 nr = 0;
3640 } 3748 }
3641 for (i = 0; i < nr; i++) { 3749 for (i = 0; i < nr; i++) {
3642 __extent_read_full_page(tree, pagepool[i], get_extent, 3750 __extent_read_full_page(tree, pagepool[i], get_extent,
3643 &bio, 0, &bio_flags); 3751 &bio, 0, &bio_flags, READ);
3644 page_cache_release(pagepool[i]); 3752 page_cache_release(pagepool[i]);
3645 } 3753 }
3646 3754
@@ -3681,9 +3789,9 @@ int extent_invalidatepage(struct extent_io_tree *tree,
3681 * are locked or under IO and drops the related state bits if it is safe 3789 * are locked or under IO and drops the related state bits if it is safe
3682 * to drop the page. 3790 * to drop the page.
3683 */ 3791 */
3684int try_release_extent_state(struct extent_map_tree *map, 3792static int try_release_extent_state(struct extent_map_tree *map,
3685 struct extent_io_tree *tree, struct page *page, 3793 struct extent_io_tree *tree,
3686 gfp_t mask) 3794 struct page *page, gfp_t mask)
3687{ 3795{
3688 u64 start = page_offset(page); 3796 u64 start = page_offset(page);
3689 u64 end = start + PAGE_CACHE_SIZE - 1; 3797 u64 end = start + PAGE_CACHE_SIZE - 1;
@@ -3974,12 +4082,7 @@ out:
3974 4082
3975static void __free_extent_buffer(struct extent_buffer *eb) 4083static void __free_extent_buffer(struct extent_buffer *eb)
3976{ 4084{
3977#if LEAK_DEBUG 4085 btrfs_leak_debug_del(&eb->leak_list);
3978 unsigned long flags;
3979 spin_lock_irqsave(&leak_lock, flags);
3980 list_del(&eb->leak_list);
3981 spin_unlock_irqrestore(&leak_lock, flags);
3982#endif
3983 kmem_cache_free(extent_buffer_cache, eb); 4086 kmem_cache_free(extent_buffer_cache, eb);
3984} 4087}
3985 4088
@@ -3989,9 +4092,6 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
3989 gfp_t mask) 4092 gfp_t mask)
3990{ 4093{
3991 struct extent_buffer *eb = NULL; 4094 struct extent_buffer *eb = NULL;
3992#if LEAK_DEBUG
3993 unsigned long flags;
3994#endif
3995 4095
3996 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 4096 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
3997 if (eb == NULL) 4097 if (eb == NULL)
@@ -4011,11 +4111,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
4011 init_waitqueue_head(&eb->write_lock_wq); 4111 init_waitqueue_head(&eb->write_lock_wq);
4012 init_waitqueue_head(&eb->read_lock_wq); 4112 init_waitqueue_head(&eb->read_lock_wq);
4013 4113
4014#if LEAK_DEBUG 4114 btrfs_leak_debug_add(&eb->leak_list, &buffers);
4015 spin_lock_irqsave(&leak_lock, flags); 4115
4016 list_add(&eb->leak_list, &buffers);
4017 spin_unlock_irqrestore(&leak_lock, flags);
4018#endif
4019 spin_lock_init(&eb->refs_lock); 4116 spin_lock_init(&eb->refs_lock);
4020 atomic_set(&eb->refs, 1); 4117 atomic_set(&eb->refs, 1);
4021 atomic_set(&eb->io_pages, 0); 4118 atomic_set(&eb->io_pages, 0);
@@ -4353,7 +4450,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
4353} 4450}
4354 4451
4355/* Expects to have eb->eb_lock already held */ 4452/* Expects to have eb->eb_lock already held */
4356static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) 4453static int release_extent_buffer(struct extent_buffer *eb)
4357{ 4454{
4358 WARN_ON(atomic_read(&eb->refs) == 0); 4455 WARN_ON(atomic_read(&eb->refs) == 0);
4359 if (atomic_dec_and_test(&eb->refs)) { 4456 if (atomic_dec_and_test(&eb->refs)) {
@@ -4411,7 +4508,7 @@ void free_extent_buffer(struct extent_buffer *eb)
4411 * I know this is terrible, but it's temporary until we stop tracking 4508 * I know this is terrible, but it's temporary until we stop tracking
4412 * the uptodate bits and such for the extent buffers. 4509 * the uptodate bits and such for the extent buffers.
4413 */ 4510 */
4414 release_extent_buffer(eb, GFP_ATOMIC); 4511 release_extent_buffer(eb);
4415} 4512}
4416 4513
4417void free_extent_buffer_stale(struct extent_buffer *eb) 4514void free_extent_buffer_stale(struct extent_buffer *eb)
@@ -4425,7 +4522,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
4425 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) && 4522 if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
4426 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) 4523 test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
4427 atomic_dec(&eb->refs); 4524 atomic_dec(&eb->refs);
4428 release_extent_buffer(eb, GFP_NOFS); 4525 release_extent_buffer(eb);
4429} 4526}
4430 4527
4431void clear_extent_buffer_dirty(struct extent_buffer *eb) 4528void clear_extent_buffer_dirty(struct extent_buffer *eb)
@@ -4477,17 +4574,6 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
4477 return was_dirty; 4574 return was_dirty;
4478} 4575}
4479 4576
4480static int range_straddles_pages(u64 start, u64 len)
4481{
4482 if (len < PAGE_CACHE_SIZE)
4483 return 1;
4484 if (start & (PAGE_CACHE_SIZE - 1))
4485 return 1;
4486 if ((start + len) & (PAGE_CACHE_SIZE - 1))
4487 return 1;
4488 return 0;
4489}
4490
4491int clear_extent_buffer_uptodate(struct extent_buffer *eb) 4577int clear_extent_buffer_uptodate(struct extent_buffer *eb)
4492{ 4578{
4493 unsigned long i; 4579 unsigned long i;
@@ -4519,37 +4605,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb)
4519 return 0; 4605 return 0;
4520} 4606}
4521 4607
4522int extent_range_uptodate(struct extent_io_tree *tree,
4523 u64 start, u64 end)
4524{
4525 struct page *page;
4526 int ret;
4527 int pg_uptodate = 1;
4528 int uptodate;
4529 unsigned long index;
4530
4531 if (range_straddles_pages(start, end - start + 1)) {
4532 ret = test_range_bit(tree, start, end,
4533 EXTENT_UPTODATE, 1, NULL);
4534 if (ret)
4535 return 1;
4536 }
4537 while (start <= end) {
4538 index = start >> PAGE_CACHE_SHIFT;
4539 page = find_get_page(tree->mapping, index);
4540 if (!page)
4541 return 1;
4542 uptodate = PageUptodate(page);
4543 page_cache_release(page);
4544 if (!uptodate) {
4545 pg_uptodate = 0;
4546 break;
4547 }
4548 start += PAGE_CACHE_SIZE;
4549 }
4550 return pg_uptodate;
4551}
4552
4553int extent_buffer_uptodate(struct extent_buffer *eb) 4608int extent_buffer_uptodate(struct extent_buffer *eb)
4554{ 4609{
4555 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 4610 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -4612,7 +4667,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4612 ClearPageError(page); 4667 ClearPageError(page);
4613 err = __extent_read_full_page(tree, page, 4668 err = __extent_read_full_page(tree, page,
4614 get_extent, &bio, 4669 get_extent, &bio,
4615 mirror_num, &bio_flags); 4670 mirror_num, &bio_flags,
4671 READ | REQ_META);
4616 if (err) 4672 if (err)
4617 ret = err; 4673 ret = err;
4618 } else { 4674 } else {
@@ -4621,7 +4677,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
4621 } 4677 }
4622 4678
4623 if (bio) { 4679 if (bio) {
4624 err = submit_one_bio(READ, bio, mirror_num, bio_flags); 4680 err = submit_one_bio(READ | REQ_META, bio, mirror_num,
4681 bio_flags);
4625 if (err) 4682 if (err)
4626 return err; 4683 return err;
4627 } 4684 }
@@ -4985,7 +5042,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
4985 } 5042 }
4986} 5043}
4987 5044
4988int try_release_extent_buffer(struct page *page, gfp_t mask) 5045int try_release_extent_buffer(struct page *page)
4989{ 5046{
4990 struct extent_buffer *eb; 5047 struct extent_buffer *eb;
4991 5048
@@ -5015,9 +5072,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
5015 } 5072 }
5016 spin_unlock(&page->mapping->private_lock); 5073 spin_unlock(&page->mapping->private_lock);
5017 5074
5018 if ((mask & GFP_NOFS) == GFP_NOFS)
5019 mask = GFP_NOFS;
5020
5021 /* 5075 /*
5022 * If tree ref isn't set then we know the ref on this eb is a real ref, 5076 * If tree ref isn't set then we know the ref on this eb is a real ref,
5023 * so just return, this page will likely be freed soon anyway. 5077 * so just return, this page will likely be freed soon anyway.
@@ -5027,5 +5081,5 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
5027 return 0; 5081 return 0;
5028 } 5082 }
5029 5083
5030 return release_extent_buffer(eb, mask); 5084 return release_extent_buffer(eb);
5031} 5085}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6068a1985560..a2c03a175009 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -81,9 +81,9 @@ struct extent_io_ops {
81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, 81 int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
82 struct extent_state *state, int uptodate); 82 struct extent_state *state, int uptodate);
83 void (*set_bit_hook)(struct inode *inode, struct extent_state *state, 83 void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
84 int *bits); 84 unsigned long *bits);
85 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, 85 void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
86 int *bits); 86 unsigned long *bits);
87 void (*merge_extent_hook)(struct inode *inode, 87 void (*merge_extent_hook)(struct inode *inode,
88 struct extent_state *new, 88 struct extent_state *new,
89 struct extent_state *other); 89 struct extent_state *other);
@@ -116,7 +116,9 @@ struct extent_state {
116 /* for use by the FS */ 116 /* for use by the FS */
117 u64 private; 117 u64 private;
118 118
119#ifdef CONFIG_BTRFS_DEBUG
119 struct list_head leak_list; 120 struct list_head leak_list;
121#endif
120}; 122};
121 123
122#define INLINE_EXTENT_BUFFER_PAGES 16 124#define INLINE_EXTENT_BUFFER_PAGES 16
@@ -132,7 +134,6 @@ struct extent_buffer {
132 atomic_t refs; 134 atomic_t refs;
133 atomic_t io_pages; 135 atomic_t io_pages;
134 int read_mirror; 136 int read_mirror;
135 struct list_head leak_list;
136 struct rcu_head rcu_head; 137 struct rcu_head rcu_head;
137 pid_t lock_owner; 138 pid_t lock_owner;
138 139
@@ -159,6 +160,9 @@ struct extent_buffer {
159 wait_queue_head_t read_lock_wq; 160 wait_queue_head_t read_lock_wq;
160 wait_queue_head_t lock_wq; 161 wait_queue_head_t lock_wq;
161 struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; 162 struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
163#ifdef CONFIG_BTRFS_DEBUG
164 struct list_head leak_list;
165#endif
162}; 166};
163 167
164static inline void extent_set_compress_type(unsigned long *bio_flags, 168static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -185,13 +189,10 @@ void extent_io_tree_init(struct extent_io_tree *tree,
185int try_release_extent_mapping(struct extent_map_tree *map, 189int try_release_extent_mapping(struct extent_map_tree *map,
186 struct extent_io_tree *tree, struct page *page, 190 struct extent_io_tree *tree, struct page *page,
187 gfp_t mask); 191 gfp_t mask);
188int try_release_extent_buffer(struct page *page, gfp_t mask); 192int try_release_extent_buffer(struct page *page);
189int try_release_extent_state(struct extent_map_tree *map,
190 struct extent_io_tree *tree, struct page *page,
191 gfp_t mask);
192int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); 193int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
193int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 194int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
194 int bits, struct extent_state **cached); 195 unsigned long bits, struct extent_state **cached);
195int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); 196int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
196int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, 197int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
197 struct extent_state **cached, gfp_t mask); 198 struct extent_state **cached, gfp_t mask);
@@ -207,16 +208,17 @@ u64 count_range_bits(struct extent_io_tree *tree,
207 208
208void free_extent_state(struct extent_state *state); 209void free_extent_state(struct extent_state *state);
209int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 210int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
210 int bits, int filled, struct extent_state *cached_state); 211 unsigned long bits, int filled,
212 struct extent_state *cached_state);
211int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 213int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
212 int bits, gfp_t mask); 214 unsigned long bits, gfp_t mask);
213int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 215int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
214 int bits, int wake, int delete, struct extent_state **cached, 216 unsigned long bits, int wake, int delete,
215 gfp_t mask); 217 struct extent_state **cached, gfp_t mask);
216int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 218int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
217 int bits, gfp_t mask); 219 unsigned long bits, gfp_t mask);
218int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 220int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
219 int bits, u64 *failed_start, 221 unsigned long bits, u64 *failed_start,
220 struct extent_state **cached_state, gfp_t mask); 222 struct extent_state **cached_state, gfp_t mask);
221int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 223int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
222 struct extent_state **cached_state, gfp_t mask); 224 struct extent_state **cached_state, gfp_t mask);
@@ -229,17 +231,15 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
229int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 231int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
230 gfp_t mask); 232 gfp_t mask);
231int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 233int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
232 int bits, int clear_bits, 234 unsigned long bits, unsigned long clear_bits,
233 struct extent_state **cached_state, gfp_t mask); 235 struct extent_state **cached_state, gfp_t mask);
234int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 236int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
235 struct extent_state **cached_state, gfp_t mask); 237 struct extent_state **cached_state, gfp_t mask);
236int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, 238int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
237 struct extent_state **cached_state, gfp_t mask); 239 struct extent_state **cached_state, gfp_t mask);
238int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 240int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
239 u64 *start_ret, u64 *end_ret, int bits, 241 u64 *start_ret, u64 *end_ret, unsigned long bits,
240 struct extent_state **cached_state); 242 struct extent_state **cached_state);
241struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
242 u64 start, int bits);
243int extent_invalidatepage(struct extent_io_tree *tree, 243int extent_invalidatepage(struct extent_io_tree *tree,
244 struct page *page, unsigned long offset); 244 struct page *page, unsigned long offset);
245int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 245int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
@@ -261,6 +261,10 @@ int extent_readpages(struct extent_io_tree *tree,
261int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 261int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
262 __u64 start, __u64 len, get_extent_t *get_extent); 262 __u64 start, __u64 len, get_extent_t *get_extent);
263int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); 263int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
264void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
265 int count);
266void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio,
267 int bvec_index, u32 csums[], int count);
264int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 268int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
265void set_page_extent_mapped(struct page *page); 269void set_page_extent_mapped(struct page *page);
266 270
@@ -278,6 +282,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
278int read_extent_buffer_pages(struct extent_io_tree *tree, 282int read_extent_buffer_pages(struct extent_io_tree *tree,
279 struct extent_buffer *eb, u64 start, int wait, 283 struct extent_buffer *eb, u64 start, int wait,
280 get_extent_t *get_extent, int mirror_num); 284 get_extent_t *get_extent, int mirror_num);
285void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
281 286
282static inline unsigned long num_extent_pages(u64 start, u64 len) 287static inline unsigned long num_extent_pages(u64 start, u64 len)
283{ 288{
@@ -313,7 +318,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
313 unsigned long src_offset, unsigned long len); 318 unsigned long src_offset, unsigned long len);
314void memset_extent_buffer(struct extent_buffer *eb, char c, 319void memset_extent_buffer(struct extent_buffer *eb, char c,
315 unsigned long start, unsigned long len); 320 unsigned long start, unsigned long len);
316void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
317void clear_extent_buffer_dirty(struct extent_buffer *eb); 321void clear_extent_buffer_dirty(struct extent_buffer *eb);
318int set_extent_buffer_dirty(struct extent_buffer *eb); 322int set_extent_buffer_dirty(struct extent_buffer *eb);
319int set_extent_buffer_uptodate(struct extent_buffer *eb); 323int set_extent_buffer_uptodate(struct extent_buffer *eb);
@@ -323,8 +327,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
323 unsigned long min_len, char **map, 327 unsigned long min_len, char **map,
324 unsigned long *map_start, 328 unsigned long *map_start,
325 unsigned long *map_len); 329 unsigned long *map_len);
326int extent_range_uptodate(struct extent_io_tree *tree, 330int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
327 u64 start, u64 end); 331int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
328int extent_clear_unlock_delalloc(struct inode *inode, 332int extent_clear_unlock_delalloc(struct inode *inode,
329 struct extent_io_tree *tree, 333 struct extent_io_tree *tree,
330 u64 start, u64 end, struct page *locked_page, 334 u64 start, u64 end, struct page *locked_page,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2834ca5768ea..a4a7a1a8da95 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -174,6 +174,14 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
174 test_bit(EXTENT_FLAG_LOGGING, &next->flags)) 174 test_bit(EXTENT_FLAG_LOGGING, &next->flags))
175 return 0; 175 return 0;
176 176
177 /*
178 * We don't want to merge stuff that hasn't been written to the log yet
179 * since it may not reflect exactly what is on disk, and that would be
180 * bad.
181 */
182 if (!list_empty(&prev->list) || !list_empty(&next->list))
183 return 0;
184
177 if (extent_map_end(prev) == next->start && 185 if (extent_map_end(prev) == next->start &&
178 prev->flags == next->flags && 186 prev->flags == next->flags &&
179 prev->bdev == next->bdev && 187 prev->bdev == next->bdev &&
@@ -209,9 +217,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
209 em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; 217 em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
210 em->mod_start = merge->mod_start; 218 em->mod_start = merge->mod_start;
211 em->generation = max(em->generation, merge->generation); 219 em->generation = max(em->generation, merge->generation);
212 list_move(&em->list, &tree->modified_extents);
213 220
214 list_del_init(&merge->list);
215 rb_erase(&merge->rb_node, &tree->map); 221 rb_erase(&merge->rb_node, &tree->map);
216 free_extent_map(merge); 222 free_extent_map(merge);
217 } 223 }
@@ -227,7 +233,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
227 merge->in_tree = 0; 233 merge->in_tree = 0;
228 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; 234 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
229 em->generation = max(em->generation, merge->generation); 235 em->generation = max(em->generation, merge->generation);
230 list_del_init(&merge->list);
231 free_extent_map(merge); 236 free_extent_map(merge);
232 } 237 }
233} 238}
@@ -302,7 +307,7 @@ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
302 * reference dropped if the merge attempt was successful. 307 * reference dropped if the merge attempt was successful.
303 */ 308 */
304int add_extent_mapping(struct extent_map_tree *tree, 309int add_extent_mapping(struct extent_map_tree *tree,
305 struct extent_map *em) 310 struct extent_map *em, int modified)
306{ 311{
307 int ret = 0; 312 int ret = 0;
308 struct rb_node *rb; 313 struct rb_node *rb;
@@ -324,7 +329,10 @@ int add_extent_mapping(struct extent_map_tree *tree,
324 em->mod_start = em->start; 329 em->mod_start = em->start;
325 em->mod_len = em->len; 330 em->mod_len = em->len;
326 331
327 try_merge_map(tree, em); 332 if (modified)
333 list_move(&em->list, &tree->modified_extents);
334 else
335 try_merge_map(tree, em);
328out: 336out:
329 return ret; 337 return ret;
330} 338}
@@ -337,8 +345,9 @@ static u64 range_end(u64 start, u64 len)
337 return start + len; 345 return start + len;
338} 346}
339 347
340struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree, 348static struct extent_map *
341 u64 start, u64 len, int strict) 349__lookup_extent_mapping(struct extent_map_tree *tree,
350 u64 start, u64 len, int strict)
342{ 351{
343 struct extent_map *em; 352 struct extent_map *em;
344 struct rb_node *rb_node; 353 struct rb_node *rb_node;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index c6598c89cff8..61adc44b7805 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,6 +26,7 @@ struct extent_map {
26 u64 mod_len; 26 u64 mod_len;
27 u64 orig_start; 27 u64 orig_start;
28 u64 orig_block_len; 28 u64 orig_block_len;
29 u64 ram_bytes;
29 u64 block_start; 30 u64 block_start;
30 u64 block_len; 31 u64 block_len;
31 u64 generation; 32 u64 generation;
@@ -61,7 +62,7 @@ void extent_map_tree_init(struct extent_map_tree *tree);
61struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 62struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
62 u64 start, u64 len); 63 u64 start, u64 len);
63int add_extent_mapping(struct extent_map_tree *tree, 64int add_extent_mapping(struct extent_map_tree *tree,
64 struct extent_map *em); 65 struct extent_map *em, int modified);
65int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); 66int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
66 67
67struct extent_map *alloc_extent_map(void); 68struct extent_map *alloc_extent_map(void);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index ec160202be3e..b193bf324a41 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -83,10 +83,11 @@ out:
83 return ret; 83 return ret;
84} 84}
85 85
86struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 86static struct btrfs_csum_item *
87 struct btrfs_root *root, 87btrfs_lookup_csum(struct btrfs_trans_handle *trans,
88 struct btrfs_path *path, 88 struct btrfs_root *root,
89 u64 bytenr, int cow) 89 struct btrfs_path *path,
90 u64 bytenr, int cow)
90{ 91{
91 int ret; 92 int ret;
92 struct btrfs_key file_key; 93 struct btrfs_key file_key;
@@ -118,9 +119,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
118 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 119 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
119 csums_in_item /= csum_size; 120 csums_in_item /= csum_size;
120 121
121 if (csum_offset >= csums_in_item) { 122 if (csum_offset == csums_in_item) {
122 ret = -EFBIG; 123 ret = -EFBIG;
123 goto fail; 124 goto fail;
125 } else if (csum_offset > csums_in_item) {
126 goto fail;
124 } 127 }
125 } 128 }
126 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); 129 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
@@ -150,32 +153,12 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
150 return ret; 153 return ret;
151} 154}
152 155
153u64 btrfs_file_extent_length(struct btrfs_path *path)
154{
155 int extent_type;
156 struct btrfs_file_extent_item *fi;
157 u64 len;
158
159 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
160 struct btrfs_file_extent_item);
161 extent_type = btrfs_file_extent_type(path->nodes[0], fi);
162
163 if (extent_type == BTRFS_FILE_EXTENT_REG ||
164 extent_type == BTRFS_FILE_EXTENT_PREALLOC)
165 len = btrfs_file_extent_num_bytes(path->nodes[0], fi);
166 else if (extent_type == BTRFS_FILE_EXTENT_INLINE)
167 len = btrfs_file_extent_inline_len(path->nodes[0], fi);
168 else
169 BUG();
170
171 return len;
172}
173
174static int __btrfs_lookup_bio_sums(struct btrfs_root *root, 156static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
175 struct inode *inode, struct bio *bio, 157 struct inode *inode, struct bio *bio,
176 u64 logical_offset, u32 *dst, int dio) 158 u64 logical_offset, u32 *dst, int dio)
177{ 159{
178 u32 sum; 160 u32 sum[16];
161 int len;
179 struct bio_vec *bvec = bio->bi_io_vec; 162 struct bio_vec *bvec = bio->bi_io_vec;
180 int bio_index = 0; 163 int bio_index = 0;
181 u64 offset = 0; 164 u64 offset = 0;
@@ -184,7 +167,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
184 u64 disk_bytenr; 167 u64 disk_bytenr;
185 u32 diff; 168 u32 diff;
186 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 169 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
187 int ret; 170 int count;
188 struct btrfs_path *path; 171 struct btrfs_path *path;
189 struct btrfs_csum_item *item = NULL; 172 struct btrfs_csum_item *item = NULL;
190 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 173 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -212,10 +195,12 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
212 if (dio) 195 if (dio)
213 offset = logical_offset; 196 offset = logical_offset;
214 while (bio_index < bio->bi_vcnt) { 197 while (bio_index < bio->bi_vcnt) {
198 len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index);
215 if (!dio) 199 if (!dio)
216 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 200 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
217 ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); 201 count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum,
218 if (ret == 0) 202 len);
203 if (count)
219 goto found; 204 goto found;
220 205
221 if (!item || disk_bytenr < item_start_offset || 206 if (!item || disk_bytenr < item_start_offset ||
@@ -228,10 +213,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
228 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root, 213 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
229 path, disk_bytenr, 0); 214 path, disk_bytenr, 0);
230 if (IS_ERR(item)) { 215 if (IS_ERR(item)) {
231 ret = PTR_ERR(item); 216 count = 1;
232 if (ret == -ENOENT || ret == -EFBIG) 217 sum[0] = 0;
233 ret = 0;
234 sum = 0;
235 if (BTRFS_I(inode)->root->root_key.objectid == 218 if (BTRFS_I(inode)->root->root_key.objectid ==
236 BTRFS_DATA_RELOC_TREE_OBJECTID) { 219 BTRFS_DATA_RELOC_TREE_OBJECTID) {
237 set_extent_bits(io_tree, offset, 220 set_extent_bits(io_tree, offset,
@@ -267,19 +250,29 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
267 diff = disk_bytenr - item_start_offset; 250 diff = disk_bytenr - item_start_offset;
268 diff = diff / root->sectorsize; 251 diff = diff / root->sectorsize;
269 diff = diff * csum_size; 252 diff = diff * csum_size;
270 253 count = min_t(int, len, (item_last_offset - disk_bytenr) >>
271 read_extent_buffer(path->nodes[0], &sum, 254 inode->i_sb->s_blocksize_bits);
255 read_extent_buffer(path->nodes[0], sum,
272 ((unsigned long)item) + diff, 256 ((unsigned long)item) + diff,
273 csum_size); 257 csum_size * count);
274found: 258found:
275 if (dst) 259 if (dst) {
276 *dst++ = sum; 260 memcpy(dst, sum, count * csum_size);
277 else 261 dst += count;
278 set_state_private(io_tree, offset, sum); 262 } else {
279 disk_bytenr += bvec->bv_len; 263 if (dio)
280 offset += bvec->bv_len; 264 extent_cache_csums_dio(io_tree, offset, sum,
281 bio_index++; 265 count);
282 bvec++; 266 else
267 extent_cache_csums(io_tree, bio, bio_index, sum,
268 count);
269 }
270 while (count--) {
271 disk_bytenr += bvec->bv_len;
272 offset += bvec->bv_len;
273 bio_index++;
274 bvec++;
275 }
283 } 276 }
284 btrfs_free_path(path); 277 btrfs_free_path(path);
285 return 0; 278 return 0;
@@ -356,11 +349,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
356 349
357 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 350 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
358 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 351 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
359 key.type != BTRFS_EXTENT_CSUM_KEY) 352 key.type != BTRFS_EXTENT_CSUM_KEY ||
360 break; 353 key.offset > end)
361
362 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
363 if (key.offset > end)
364 break; 354 break;
365 355
366 if (key.offset > start) 356 if (key.offset > start)
@@ -482,8 +472,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
482 472
483 data = kmap_atomic(bvec->bv_page); 473 data = kmap_atomic(bvec->bv_page);
484 sector_sum->sum = ~(u32)0; 474 sector_sum->sum = ~(u32)0;
485 sector_sum->sum = btrfs_csum_data(root, 475 sector_sum->sum = btrfs_csum_data(data + bvec->bv_offset,
486 data + bvec->bv_offset,
487 sector_sum->sum, 476 sector_sum->sum,
488 bvec->bv_len); 477 bvec->bv_len);
489 kunmap_atomic(data); 478 kunmap_atomic(data);
@@ -516,8 +505,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
516 * This calls btrfs_truncate_item with the correct args based on the 505 * This calls btrfs_truncate_item with the correct args based on the
517 * overlap, and fixes up the key as required. 506 * overlap, and fixes up the key as required.
518 */ 507 */
519static noinline void truncate_one_csum(struct btrfs_trans_handle *trans, 508static noinline void truncate_one_csum(struct btrfs_root *root,
520 struct btrfs_root *root,
521 struct btrfs_path *path, 509 struct btrfs_path *path,
522 struct btrfs_key *key, 510 struct btrfs_key *key,
523 u64 bytenr, u64 len) 511 u64 bytenr, u64 len)
@@ -542,7 +530,7 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
542 */ 530 */
543 u32 new_size = (bytenr - key->offset) >> blocksize_bits; 531 u32 new_size = (bytenr - key->offset) >> blocksize_bits;
544 new_size *= csum_size; 532 new_size *= csum_size;
545 btrfs_truncate_item(trans, root, path, new_size, 1); 533 btrfs_truncate_item(root, path, new_size, 1);
546 } else if (key->offset >= bytenr && csum_end > end_byte && 534 } else if (key->offset >= bytenr && csum_end > end_byte &&
547 end_byte > key->offset) { 535 end_byte > key->offset) {
548 /* 536 /*
@@ -554,10 +542,10 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
554 u32 new_size = (csum_end - end_byte) >> blocksize_bits; 542 u32 new_size = (csum_end - end_byte) >> blocksize_bits;
555 new_size *= csum_size; 543 new_size *= csum_size;
556 544
557 btrfs_truncate_item(trans, root, path, new_size, 0); 545 btrfs_truncate_item(root, path, new_size, 0);
558 546
559 key->offset = end_byte; 547 key->offset = end_byte;
560 btrfs_set_item_key_safe(trans, root, path, key); 548 btrfs_set_item_key_safe(root, path, key);
561 } else { 549 } else {
562 BUG(); 550 BUG();
563 } 551 }
@@ -672,7 +660,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
672 660
673 key.offset = end_byte - 1; 661 key.offset = end_byte - 1;
674 } else { 662 } else {
675 truncate_one_csum(trans, root, path, &key, bytenr, len); 663 truncate_one_csum(root, path, &key, bytenr, len);
676 if (key.offset < bytenr) 664 if (key.offset < bytenr)
677 break; 665 break;
678 } 666 }
@@ -728,7 +716,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
728 return -ENOMEM; 716 return -ENOMEM;
729 717
730 sector_sum = sums->sums; 718 sector_sum = sums->sums;
731 trans->adding_csums = 1;
732again: 719again:
733 next_offset = (u64)-1; 720 next_offset = (u64)-1;
734 found_next = 0; 721 found_next = 0;
@@ -834,7 +821,7 @@ again:
834 diff /= csum_size; 821 diff /= csum_size;
835 diff *= csum_size; 822 diff *= csum_size;
836 823
837 btrfs_extend_item(trans, root, path, diff); 824 btrfs_extend_item(root, path, diff);
838 goto csum; 825 goto csum;
839 } 826 }
840 827
@@ -899,7 +886,6 @@ next_sector:
899 goto again; 886 goto again;
900 } 887 }
901out: 888out:
902 trans->adding_csums = 0;
903 btrfs_free_path(path); 889 btrfs_free_path(path);
904 return ret; 890 return ret;
905 891
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5b4ea5f55b8f..4205ba752d40 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,6 +24,7 @@
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
26#include <linux/mpage.h> 26#include <linux/mpage.h>
27#include <linux/aio.h>
27#include <linux/falloc.h> 28#include <linux/falloc.h>
28#include <linux/swap.h> 29#include <linux/swap.h>
29#include <linux/writeback.h> 30#include <linux/writeback.h>
@@ -192,8 +193,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
192 * the same inode in the tree, we will merge them together (by 193 * the same inode in the tree, we will merge them together (by
193 * __btrfs_add_inode_defrag()) and free the one that we want to requeue. 194 * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
194 */ 195 */
195void btrfs_requeue_inode_defrag(struct inode *inode, 196static void btrfs_requeue_inode_defrag(struct inode *inode,
196 struct inode_defrag *defrag) 197 struct inode_defrag *defrag)
197{ 198{
198 struct btrfs_root *root = BTRFS_I(inode)->root; 199 struct btrfs_root *root = BTRFS_I(inode)->root;
199 int ret; 200 int ret;
@@ -473,7 +474,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
473/* 474/*
474 * unlocks pages after btrfs_file_write is done with them 475 * unlocks pages after btrfs_file_write is done with them
475 */ 476 */
476void btrfs_drop_pages(struct page **pages, size_t num_pages) 477static void btrfs_drop_pages(struct page **pages, size_t num_pages)
477{ 478{
478 size_t i; 479 size_t i;
479 for (i = 0; i < num_pages; i++) { 480 for (i = 0; i < num_pages; i++) {
@@ -497,9 +498,9 @@ void btrfs_drop_pages(struct page **pages, size_t num_pages)
497 * doing real data extents, marking pages dirty and delalloc as required. 498 * doing real data extents, marking pages dirty and delalloc as required.
498 */ 499 */
499int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, 500int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
500 struct page **pages, size_t num_pages, 501 struct page **pages, size_t num_pages,
501 loff_t pos, size_t write_bytes, 502 loff_t pos, size_t write_bytes,
502 struct extent_state **cached) 503 struct extent_state **cached)
503{ 504{
504 int err = 0; 505 int err = 0;
505 int i; 506 int i;
@@ -552,6 +553,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
552 int testend = 1; 553 int testend = 1;
553 unsigned long flags; 554 unsigned long flags;
554 int compressed = 0; 555 int compressed = 0;
556 bool modified;
555 557
556 WARN_ON(end < start); 558 WARN_ON(end < start);
557 if (end == (u64)-1) { 559 if (end == (u64)-1) {
@@ -561,6 +563,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
561 while (1) { 563 while (1) {
562 int no_splits = 0; 564 int no_splits = 0;
563 565
566 modified = false;
564 if (!split) 567 if (!split)
565 split = alloc_extent_map(); 568 split = alloc_extent_map();
566 if (!split2) 569 if (!split2)
@@ -592,6 +595,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
592 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 595 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
593 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 596 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
594 clear_bit(EXTENT_FLAG_LOGGING, &flags); 597 clear_bit(EXTENT_FLAG_LOGGING, &flags);
598 modified = !list_empty(&em->list);
595 remove_extent_mapping(em_tree, em); 599 remove_extent_mapping(em_tree, em);
596 if (no_splits) 600 if (no_splits)
597 goto next; 601 goto next;
@@ -607,15 +611,15 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
607 split->block_len = em->block_len; 611 split->block_len = em->block_len;
608 else 612 else
609 split->block_len = split->len; 613 split->block_len = split->len;
614 split->ram_bytes = em->ram_bytes;
610 split->orig_block_len = max(split->block_len, 615 split->orig_block_len = max(split->block_len,
611 em->orig_block_len); 616 em->orig_block_len);
612 split->generation = gen; 617 split->generation = gen;
613 split->bdev = em->bdev; 618 split->bdev = em->bdev;
614 split->flags = flags; 619 split->flags = flags;
615 split->compress_type = em->compress_type; 620 split->compress_type = em->compress_type;
616 ret = add_extent_mapping(em_tree, split); 621 ret = add_extent_mapping(em_tree, split, modified);
617 BUG_ON(ret); /* Logic error */ 622 BUG_ON(ret); /* Logic error */
618 list_move(&split->list, &em_tree->modified_extents);
619 free_extent_map(split); 623 free_extent_map(split);
620 split = split2; 624 split = split2;
621 split2 = NULL; 625 split2 = NULL;
@@ -632,6 +636,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
632 split->generation = gen; 636 split->generation = gen;
633 split->orig_block_len = max(em->block_len, 637 split->orig_block_len = max(em->block_len,
634 em->orig_block_len); 638 em->orig_block_len);
639 split->ram_bytes = em->ram_bytes;
635 640
636 if (compressed) { 641 if (compressed) {
637 split->block_len = em->block_len; 642 split->block_len = em->block_len;
@@ -643,9 +648,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
643 split->orig_start = em->orig_start; 648 split->orig_start = em->orig_start;
644 } 649 }
645 650
646 ret = add_extent_mapping(em_tree, split); 651 ret = add_extent_mapping(em_tree, split, modified);
647 BUG_ON(ret); /* Logic error */ 652 BUG_ON(ret); /* Logic error */
648 list_move(&split->list, &em_tree->modified_extents);
649 free_extent_map(split); 653 free_extent_map(split);
650 split = NULL; 654 split = NULL;
651 } 655 }
@@ -821,7 +825,7 @@ next_slot:
821 825
822 memcpy(&new_key, &key, sizeof(new_key)); 826 memcpy(&new_key, &key, sizeof(new_key));
823 new_key.offset = end; 827 new_key.offset = end;
824 btrfs_set_item_key_safe(trans, root, path, &new_key); 828 btrfs_set_item_key_safe(root, path, &new_key);
825 829
826 extent_offset += end - key.offset; 830 extent_offset += end - key.offset;
827 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 831 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@ -1037,7 +1041,7 @@ again:
1037 ino, bytenr, orig_offset, 1041 ino, bytenr, orig_offset,
1038 &other_start, &other_end)) { 1042 &other_start, &other_end)) {
1039 new_key.offset = end; 1043 new_key.offset = end;
1040 btrfs_set_item_key_safe(trans, root, path, &new_key); 1044 btrfs_set_item_key_safe(root, path, &new_key);
1041 fi = btrfs_item_ptr(leaf, path->slots[0], 1045 fi = btrfs_item_ptr(leaf, path->slots[0],
1042 struct btrfs_file_extent_item); 1046 struct btrfs_file_extent_item);
1043 btrfs_set_file_extent_generation(leaf, fi, 1047 btrfs_set_file_extent_generation(leaf, fi,
@@ -1071,7 +1075,7 @@ again:
1071 trans->transid); 1075 trans->transid);
1072 path->slots[0]++; 1076 path->slots[0]++;
1073 new_key.offset = start; 1077 new_key.offset = start;
1074 btrfs_set_item_key_safe(trans, root, path, &new_key); 1078 btrfs_set_item_key_safe(root, path, &new_key);
1075 1079
1076 fi = btrfs_item_ptr(leaf, path->slots[0], 1080 fi = btrfs_item_ptr(leaf, path->slots[0],
1077 struct btrfs_file_extent_item); 1081 struct btrfs_file_extent_item);
@@ -1514,8 +1518,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1514 size_t count, ocount; 1518 size_t count, ocount;
1515 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); 1519 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1516 1520
1517 sb_start_write(inode->i_sb);
1518
1519 mutex_lock(&inode->i_mutex); 1521 mutex_lock(&inode->i_mutex);
1520 1522
1521 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); 1523 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -1617,7 +1619,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1617 if (sync) 1619 if (sync)
1618 atomic_dec(&BTRFS_I(inode)->sync_writers); 1620 atomic_dec(&BTRFS_I(inode)->sync_writers);
1619out: 1621out:
1620 sb_end_write(inode->i_sb);
1621 current->backing_dev_info = NULL; 1622 current->backing_dev_info = NULL;
1622 return num_written ? num_written : err; 1623 return num_written ? num_written : err;
1623} 1624}
@@ -1885,7 +1886,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
1885 1886
1886 path->slots[0]++; 1887 path->slots[0]++;
1887 key.offset = offset; 1888 key.offset = offset;
1888 btrfs_set_item_key_safe(trans, root, path, &key); 1889 btrfs_set_item_key_safe(root, path, &key);
1889 fi = btrfs_item_ptr(leaf, path->slots[0], 1890 fi = btrfs_item_ptr(leaf, path->slots[0],
1890 struct btrfs_file_extent_item); 1891 struct btrfs_file_extent_item);
1891 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end - 1892 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@ -1915,6 +1916,7 @@ out:
1915 } else { 1916 } else {
1916 hole_em->start = offset; 1917 hole_em->start = offset;
1917 hole_em->len = end - offset; 1918 hole_em->len = end - offset;
1919 hole_em->ram_bytes = hole_em->len;
1918 hole_em->orig_start = offset; 1920 hole_em->orig_start = offset;
1919 1921
1920 hole_em->block_start = EXTENT_MAP_HOLE; 1922 hole_em->block_start = EXTENT_MAP_HOLE;
@@ -1927,10 +1929,7 @@ out:
1927 do { 1929 do {
1928 btrfs_drop_extent_cache(inode, offset, end - 1, 0); 1930 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
1929 write_lock(&em_tree->lock); 1931 write_lock(&em_tree->lock);
1930 ret = add_extent_mapping(em_tree, hole_em); 1932 ret = add_extent_mapping(em_tree, hole_em, 1);
1931 if (!ret)
1932 list_move(&hole_em->list,
1933 &em_tree->modified_extents);
1934 write_unlock(&em_tree->lock); 1933 write_unlock(&em_tree->lock);
1935 } while (ret == -EEXIST); 1934 } while (ret == -EEXIST);
1936 free_extent_map(hole_em); 1935 free_extent_map(hole_em);
@@ -2142,6 +2141,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2142{ 2141{
2143 struct inode *inode = file_inode(file); 2142 struct inode *inode = file_inode(file);
2144 struct extent_state *cached_state = NULL; 2143 struct extent_state *cached_state = NULL;
2144 struct btrfs_root *root = BTRFS_I(inode)->root;
2145 u64 cur_offset; 2145 u64 cur_offset;
2146 u64 last_byte; 2146 u64 last_byte;
2147 u64 alloc_start; 2147 u64 alloc_start;
@@ -2169,6 +2169,11 @@ static long btrfs_fallocate(struct file *file, int mode,
2169 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); 2169 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
2170 if (ret) 2170 if (ret)
2171 return ret; 2171 return ret;
2172 if (root->fs_info->quota_enabled) {
2173 ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
2174 if (ret)
2175 goto out_reserve_fail;
2176 }
2172 2177
2173 /* 2178 /*
2174 * wait for ordered IO before we have any locks. We'll loop again 2179 * wait for ordered IO before we have any locks. We'll loop again
@@ -2272,6 +2277,9 @@ static long btrfs_fallocate(struct file *file, int mode,
2272 &cached_state, GFP_NOFS); 2277 &cached_state, GFP_NOFS);
2273out: 2278out:
2274 mutex_unlock(&inode->i_mutex); 2279 mutex_unlock(&inode->i_mutex);
2280 if (root->fs_info->quota_enabled)
2281 btrfs_qgroup_free(root, alloc_end - alloc_start);
2282out_reserve_fail:
2275 /* Let go of our reservation. */ 2283 /* Let go of our reservation. */
2276 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); 2284 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
2277 return ret; 2285 return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1f84fc09c1a8..ecca6c7375a6 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -104,7 +104,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
104 104
105 spin_lock(&block_group->lock); 105 spin_lock(&block_group->lock);
106 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 106 if (!((BTRFS_I(inode)->flags & flags) == flags)) {
107 printk(KERN_INFO "Old style space inode found, converting.\n"); 107 btrfs_info(root->fs_info,
108 "Old style space inode found, converting.");
108 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 109 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
109 BTRFS_INODE_NODATACOW; 110 BTRFS_INODE_NODATACOW;
110 block_group->disk_cache_state = BTRFS_DC_CLEAR; 111 block_group->disk_cache_state = BTRFS_DC_CLEAR;
@@ -119,9 +120,10 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
119 return inode; 120 return inode;
120} 121}
121 122
122int __create_free_space_inode(struct btrfs_root *root, 123static int __create_free_space_inode(struct btrfs_root *root,
123 struct btrfs_trans_handle *trans, 124 struct btrfs_trans_handle *trans,
124 struct btrfs_path *path, u64 ino, u64 offset) 125 struct btrfs_path *path,
126 u64 ino, u64 offset)
125{ 127{
126 struct btrfs_key key; 128 struct btrfs_key key;
127 struct btrfs_disk_key disk_key; 129 struct btrfs_disk_key disk_key;
@@ -431,7 +433,7 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
431 if (index == 0) 433 if (index == 0)
432 offset = sizeof(u32) * io_ctl->num_pages; 434 offset = sizeof(u32) * io_ctl->num_pages;
433 435
434 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 436 crc = btrfs_csum_data(io_ctl->orig + offset, crc,
435 PAGE_CACHE_SIZE - offset); 437 PAGE_CACHE_SIZE - offset);
436 btrfs_csum_final(crc, (char *)&crc); 438 btrfs_csum_final(crc, (char *)&crc);
437 io_ctl_unmap_page(io_ctl); 439 io_ctl_unmap_page(io_ctl);
@@ -461,7 +463,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
461 kunmap(io_ctl->pages[0]); 463 kunmap(io_ctl->pages[0]);
462 464
463 io_ctl_map_page(io_ctl, 0); 465 io_ctl_map_page(io_ctl, 0);
464 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 466 crc = btrfs_csum_data(io_ctl->orig + offset, crc,
465 PAGE_CACHE_SIZE - offset); 467 PAGE_CACHE_SIZE - offset);
466 btrfs_csum_final(crc, (char *)&crc); 468 btrfs_csum_final(crc, (char *)&crc);
467 if (val != crc) { 469 if (val != crc) {
@@ -624,9 +626,9 @@ next:
624 spin_unlock(&ctl->tree_lock); 626 spin_unlock(&ctl->tree_lock);
625} 627}
626 628
627int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 629static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
628 struct btrfs_free_space_ctl *ctl, 630 struct btrfs_free_space_ctl *ctl,
629 struct btrfs_path *path, u64 offset) 631 struct btrfs_path *path, u64 offset)
630{ 632{
631 struct btrfs_free_space_header *header; 633 struct btrfs_free_space_header *header;
632 struct extent_buffer *leaf; 634 struct extent_buffer *leaf;
@@ -669,10 +671,11 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
669 btrfs_release_path(path); 671 btrfs_release_path(path);
670 672
671 if (BTRFS_I(inode)->generation != generation) { 673 if (BTRFS_I(inode)->generation != generation) {
672 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 674 btrfs_err(root->fs_info,
673 " not match free space cache generation (%llu)\n", 675 "free space inode generation (%llu) "
674 (unsigned long long)BTRFS_I(inode)->generation, 676 "did not match free space cache generation (%llu)",
675 (unsigned long long)generation); 677 (unsigned long long)BTRFS_I(inode)->generation,
678 (unsigned long long)generation);
676 return 0; 679 return 0;
677 } 680 }
678 681
@@ -721,8 +724,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
721 ret = link_free_space(ctl, e); 724 ret = link_free_space(ctl, e);
722 spin_unlock(&ctl->tree_lock); 725 spin_unlock(&ctl->tree_lock);
723 if (ret) { 726 if (ret) {
724 printk(KERN_ERR "Duplicate entries in " 727 btrfs_err(root->fs_info,
725 "free space cache, dumping\n"); 728 "Duplicate entries in free space cache, dumping");
726 kmem_cache_free(btrfs_free_space_cachep, e); 729 kmem_cache_free(btrfs_free_space_cachep, e);
727 goto free_cache; 730 goto free_cache;
728 } 731 }
@@ -741,8 +744,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
741 ctl->op->recalc_thresholds(ctl); 744 ctl->op->recalc_thresholds(ctl);
742 spin_unlock(&ctl->tree_lock); 745 spin_unlock(&ctl->tree_lock);
743 if (ret) { 746 if (ret) {
744 printk(KERN_ERR "Duplicate entries in " 747 btrfs_err(root->fs_info,
745 "free space cache, dumping\n"); 748 "Duplicate entries in free space cache, dumping");
746 kmem_cache_free(btrfs_free_space_cachep, e); 749 kmem_cache_free(btrfs_free_space_cachep, e);
747 goto free_cache; 750 goto free_cache;
748 } 751 }
@@ -833,8 +836,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
833 836
834 if (!matched) { 837 if (!matched) {
835 __btrfs_remove_free_space_cache(ctl); 838 __btrfs_remove_free_space_cache(ctl);
836 printk(KERN_ERR "block group %llu has an wrong amount of free " 839 btrfs_err(fs_info, "block group %llu has wrong amount of free space",
837 "space\n", block_group->key.objectid); 840 block_group->key.objectid);
838 ret = -1; 841 ret = -1;
839 } 842 }
840out: 843out:
@@ -845,8 +848,8 @@ out:
845 spin_unlock(&block_group->lock); 848 spin_unlock(&block_group->lock);
846 ret = 0; 849 ret = 0;
847 850
848 printk(KERN_ERR "btrfs: failed to load free space cache " 851 btrfs_err(fs_info, "failed to load free space cache for block group %llu",
849 "for block group %llu\n", block_group->key.objectid); 852 block_group->key.objectid);
850 } 853 }
851 854
852 iput(inode); 855 iput(inode);
@@ -866,11 +869,11 @@ out:
866 * on mount. This will return 0 if it was successfull in writing the cache out, 869 * on mount. This will return 0 if it was successfull in writing the cache out,
867 * and -1 if it was not. 870 * and -1 if it was not.
868 */ 871 */
869int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 872static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
870 struct btrfs_free_space_ctl *ctl, 873 struct btrfs_free_space_ctl *ctl,
871 struct btrfs_block_group_cache *block_group, 874 struct btrfs_block_group_cache *block_group,
872 struct btrfs_trans_handle *trans, 875 struct btrfs_trans_handle *trans,
873 struct btrfs_path *path, u64 offset) 876 struct btrfs_path *path, u64 offset)
874{ 877{
875 struct btrfs_free_space_header *header; 878 struct btrfs_free_space_header *header;
876 struct extent_buffer *leaf; 879 struct extent_buffer *leaf;
@@ -1104,8 +1107,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
1104 spin_unlock(&block_group->lock); 1107 spin_unlock(&block_group->lock);
1105 ret = 0; 1108 ret = 0;
1106#ifdef DEBUG 1109#ifdef DEBUG
1107 printk(KERN_ERR "btrfs: failed to write free space cache " 1110 btrfs_err(root->fs_info,
1108 "for block group %llu\n", block_group->key.objectid); 1111 "failed to write free space cache for block group %llu",
1112 block_group->key.objectid);
1109#endif 1113#endif
1110 } 1114 }
1111 1115
@@ -1564,7 +1568,8 @@ again:
1564 search_bytes = ctl->unit; 1568 search_bytes = ctl->unit;
1565 search_bytes = min(search_bytes, end - search_start + 1); 1569 search_bytes = min(search_bytes, end - search_start + 1);
1566 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1570 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1567 BUG_ON(ret < 0 || search_start != *offset); 1571 if (ret < 0 || search_start != *offset)
1572 return -EINVAL;
1568 1573
1569 /* We may have found more bits than what we need */ 1574 /* We may have found more bits than what we need */
1570 search_bytes = min(search_bytes, *bytes); 1575 search_bytes = min(search_bytes, *bytes);
@@ -1970,7 +1975,6 @@ again:
1970 re_search = true; 1975 re_search = true;
1971 goto again; 1976 goto again;
1972 } 1977 }
1973 BUG_ON(ret); /* logic error */
1974out_lock: 1978out_lock:
1975 spin_unlock(&ctl->tree_lock); 1979 spin_unlock(&ctl->tree_lock);
1976out: 1980out:
@@ -2064,7 +2068,8 @@ out:
2064 return 0; 2068 return 0;
2065} 2069}
2066 2070
2067void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) 2071static void __btrfs_remove_free_space_cache_locked(
2072 struct btrfs_free_space_ctl *ctl)
2068{ 2073{
2069 struct btrfs_free_space *info; 2074 struct btrfs_free_space *info;
2070 struct rb_node *node; 2075 struct rb_node *node;
@@ -2931,8 +2936,9 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2931 ret = __load_free_space_cache(root, inode, ctl, path, 0); 2936 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2932 2937
2933 if (ret < 0) 2938 if (ret < 0)
2934 printk(KERN_ERR "btrfs: failed to load free ino cache for " 2939 btrfs_err(fs_info,
2935 "root %llu\n", root->root_key.objectid); 2940 "failed to load free ino cache for root %llu",
2941 root->root_key.objectid);
2936out_put: 2942out_put:
2937 iput(inode); 2943 iput(inode);
2938out: 2944out:
@@ -2959,11 +2965,531 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
2959 if (ret) { 2965 if (ret) {
2960 btrfs_delalloc_release_metadata(inode, inode->i_size); 2966 btrfs_delalloc_release_metadata(inode, inode->i_size);
2961#ifdef DEBUG 2967#ifdef DEBUG
2962 printk(KERN_ERR "btrfs: failed to write free ino cache " 2968 btrfs_err(root->fs_info,
2963 "for root %llu\n", root->root_key.objectid); 2969 "failed to write free ino cache for root %llu",
2970 root->root_key.objectid);
2964#endif 2971#endif
2965 } 2972 }
2966 2973
2967 iput(inode); 2974 iput(inode);
2968 return ret; 2975 return ret;
2969} 2976}
2977
2978#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
2979static struct btrfs_block_group_cache *init_test_block_group(void)
2980{
2981 struct btrfs_block_group_cache *cache;
2982
2983 cache = kzalloc(sizeof(*cache), GFP_NOFS);
2984 if (!cache)
2985 return NULL;
2986 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
2987 GFP_NOFS);
2988 if (!cache->free_space_ctl) {
2989 kfree(cache);
2990 return NULL;
2991 }
2992
2993 cache->key.objectid = 0;
2994 cache->key.offset = 1024 * 1024 * 1024;
2995 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
2996 cache->sectorsize = 4096;
2997
2998 spin_lock_init(&cache->lock);
2999 INIT_LIST_HEAD(&cache->list);
3000 INIT_LIST_HEAD(&cache->cluster_list);
3001 INIT_LIST_HEAD(&cache->new_bg_list);
3002
3003 btrfs_init_free_space_ctl(cache);
3004
3005 return cache;
3006}
3007
3008/*
3009 * Checks to see if the given range is in the free space cache. This is really
3010 * just used to check the absence of space, so if there is free space in the
3011 * range at all we will return 1.
3012 */
3013static int check_exists(struct btrfs_block_group_cache *cache, u64 offset,
3014 u64 bytes)
3015{
3016 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3017 struct btrfs_free_space *info;
3018 int ret = 0;
3019
3020 spin_lock(&ctl->tree_lock);
3021 info = tree_search_offset(ctl, offset, 0, 0);
3022 if (!info) {
3023 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3024 1, 0);
3025 if (!info)
3026 goto out;
3027 }
3028
3029have_info:
3030 if (info->bitmap) {
3031 u64 bit_off, bit_bytes;
3032 struct rb_node *n;
3033 struct btrfs_free_space *tmp;
3034
3035 bit_off = offset;
3036 bit_bytes = ctl->unit;
3037 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes);
3038 if (!ret) {
3039 if (bit_off == offset) {
3040 ret = 1;
3041 goto out;
3042 } else if (bit_off > offset &&
3043 offset + bytes > bit_off) {
3044 ret = 1;
3045 goto out;
3046 }
3047 }
3048
3049 n = rb_prev(&info->offset_index);
3050 while (n) {
3051 tmp = rb_entry(n, struct btrfs_free_space,
3052 offset_index);
3053 if (tmp->offset + tmp->bytes < offset)
3054 break;
3055 if (offset + bytes < tmp->offset) {
3056 n = rb_prev(&info->offset_index);
3057 continue;
3058 }
3059 info = tmp;
3060 goto have_info;
3061 }
3062
3063 n = rb_next(&info->offset_index);
3064 while (n) {
3065 tmp = rb_entry(n, struct btrfs_free_space,
3066 offset_index);
3067 if (offset + bytes < tmp->offset)
3068 break;
3069 if (tmp->offset + tmp->bytes < offset) {
3070 n = rb_next(&info->offset_index);
3071 continue;
3072 }
3073 info = tmp;
3074 goto have_info;
3075 }
3076
3077 goto out;
3078 }
3079
3080 if (info->offset == offset) {
3081 ret = 1;
3082 goto out;
3083 }
3084
3085 if (offset > info->offset && offset < info->offset + info->bytes)
3086 ret = 1;
3087out:
3088 spin_unlock(&ctl->tree_lock);
3089 return ret;
3090}
3091
3092/*
3093 * Use this if you need to make a bitmap or extent entry specifically, it
3094 * doesn't do any of the merging that add_free_space does, this acts a lot like
3095 * how the free space cache loading stuff works, so you can get really weird
3096 * configurations.
3097 */
3098static int add_free_space_entry(struct btrfs_block_group_cache *cache,
3099 u64 offset, u64 bytes, bool bitmap)
3100{
3101 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
3102 struct btrfs_free_space *info = NULL, *bitmap_info;
3103 void *map = NULL;
3104 u64 bytes_added;
3105 int ret;
3106
3107again:
3108 if (!info) {
3109 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
3110 if (!info)
3111 return -ENOMEM;
3112 }
3113
3114 if (!bitmap) {
3115 spin_lock(&ctl->tree_lock);
3116 info->offset = offset;
3117 info->bytes = bytes;
3118 ret = link_free_space(ctl, info);
3119 spin_unlock(&ctl->tree_lock);
3120 if (ret)
3121 kmem_cache_free(btrfs_free_space_cachep, info);
3122 return ret;
3123 }
3124
3125 if (!map) {
3126 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
3127 if (!map) {
3128 kmem_cache_free(btrfs_free_space_cachep, info);
3129 return -ENOMEM;
3130 }
3131 }
3132
3133 spin_lock(&ctl->tree_lock);
3134 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
3135 1, 0);
3136 if (!bitmap_info) {
3137 info->bitmap = map;
3138 map = NULL;
3139 add_new_bitmap(ctl, info, offset);
3140 bitmap_info = info;
3141 }
3142
3143 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
3144 bytes -= bytes_added;
3145 offset += bytes_added;
3146 spin_unlock(&ctl->tree_lock);
3147
3148 if (bytes)
3149 goto again;
3150
3151 if (map)
3152 kfree(map);
3153 return 0;
3154}
3155
3156/*
3157 * This test just does basic sanity checking, making sure we can add an exten
3158 * entry and remove space from either end and the middle, and make sure we can
3159 * remove space that covers adjacent extent entries.
3160 */
3161static int test_extents(struct btrfs_block_group_cache *cache)
3162{
3163 int ret = 0;
3164
3165 printk(KERN_ERR "Running extent only tests\n");
3166
3167 /* First just make sure we can remove an entire entry */
3168 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
3169 if (ret) {
3170 printk(KERN_ERR "Error adding initial extents %d\n", ret);
3171 return ret;
3172 }
3173
3174 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
3175 if (ret) {
3176 printk(KERN_ERR "Error removing extent %d\n", ret);
3177 return ret;
3178 }
3179
3180 if (check_exists(cache, 0, 4 * 1024 * 1024)) {
3181 printk(KERN_ERR "Full remove left some lingering space\n");
3182 return -1;
3183 }
3184
3185 /* Ok edge and middle cases now */
3186 ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
3187 if (ret) {
3188 printk(KERN_ERR "Error adding half extent %d\n", ret);
3189 return ret;
3190 }
3191
3192 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
3193 if (ret) {
3194 printk(KERN_ERR "Error removing tail end %d\n", ret);
3195 return ret;
3196 }
3197
3198 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
3199 if (ret) {
3200 printk(KERN_ERR "Error removing front end %d\n", ret);
3201 return ret;
3202 }
3203
3204 ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
3205 if (ret) {
3206 printk(KERN_ERR "Error removing middle peice %d\n", ret);
3207 return ret;
3208 }
3209
3210 if (check_exists(cache, 0, 1 * 1024 * 1024)) {
3211 printk(KERN_ERR "Still have space at the front\n");
3212 return -1;
3213 }
3214
3215 if (check_exists(cache, 2 * 1024 * 1024, 4096)) {
3216 printk(KERN_ERR "Still have space in the middle\n");
3217 return -1;
3218 }
3219
3220 if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
3221 printk(KERN_ERR "Still have space at the end\n");
3222 return -1;
3223 }
3224
3225 /* Cleanup */
3226 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3227
3228 return 0;
3229}
3230
3231static int test_bitmaps(struct btrfs_block_group_cache *cache)
3232{
3233 u64 next_bitmap_offset;
3234 int ret;
3235
3236 printk(KERN_ERR "Running bitmap only tests\n");
3237
3238 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
3239 if (ret) {
3240 printk(KERN_ERR "Couldn't create a bitmap entry %d\n", ret);
3241 return ret;
3242 }
3243
3244 ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
3245 if (ret) {
3246 printk(KERN_ERR "Error removing bitmap full range %d\n", ret);
3247 return ret;
3248 }
3249
3250 if (check_exists(cache, 0, 4 * 1024 * 1024)) {
3251 printk(KERN_ERR "Left some space in bitmap\n");
3252 return -1;
3253 }
3254
3255 ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
3256 if (ret) {
3257 printk(KERN_ERR "Couldn't add to our bitmap entry %d\n", ret);
3258 return ret;
3259 }
3260
3261 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
3262 if (ret) {
3263 printk(KERN_ERR "Couldn't remove middle chunk %d\n", ret);
3264 return ret;
3265 }
3266
3267 /*
3268 * The first bitmap we have starts at offset 0 so the next one is just
3269 * at the end of the first bitmap.
3270 */
3271 next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
3272
3273 /* Test a bit straddling two bitmaps */
3274 ret = add_free_space_entry(cache, next_bitmap_offset -
3275 (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
3276 if (ret) {
3277 printk(KERN_ERR "Couldn't add space that straddles two bitmaps"
3278 " %d\n", ret);
3279 return ret;
3280 }
3281
3282 ret = btrfs_remove_free_space(cache, next_bitmap_offset -
3283 (1 * 1024 * 1024), 2 * 1024 * 1024);
3284 if (ret) {
3285 printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
3286 return ret;
3287 }
3288
3289 if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
3290 2 * 1024 * 1024)) {
3291 printk(KERN_ERR "Left some space when removing overlapping\n");
3292 return -1;
3293 }
3294
3295 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3296
3297 return 0;
3298}
3299
3300/* This is the high grade jackassery */
3301static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
3302{
3303 u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
3304 int ret;
3305
3306 printk(KERN_ERR "Running bitmap and extent tests\n");
3307
3308 /*
3309 * First let's do something simple, an extent at the same offset as the
3310 * bitmap, but the free space completely in the extent and then
3311 * completely in the bitmap.
3312 */
3313 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
3314 if (ret) {
3315 printk(KERN_ERR "Couldn't create bitmap entry %d\n", ret);
3316 return ret;
3317 }
3318
3319 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
3320 if (ret) {
3321 printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
3322 return ret;
3323 }
3324
3325 ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
3326 if (ret) {
3327 printk(KERN_ERR "Couldn't remove extent entry %d\n", ret);
3328 return ret;
3329 }
3330
3331 if (check_exists(cache, 0, 1 * 1024 * 1024)) {
3332 printk(KERN_ERR "Left remnants after our remove\n");
3333 return -1;
3334 }
3335
3336 /* Now to add back the extent entry and remove from the bitmap */
3337 ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
3338 if (ret) {
3339 printk(KERN_ERR "Couldn't re-add extent entry %d\n", ret);
3340 return ret;
3341 }
3342
3343 ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
3344 if (ret) {
3345 printk(KERN_ERR "Couldn't remove from bitmap %d\n", ret);
3346 return ret;
3347 }
3348
3349 if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
3350 printk(KERN_ERR "Left remnants in the bitmap\n");
3351 return -1;
3352 }
3353
3354 /*
3355 * Ok so a little more evil, extent entry and bitmap at the same offset,
3356 * removing an overlapping chunk.
3357 */
3358 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
3359 if (ret) {
3360 printk(KERN_ERR "Couldn't add to a bitmap %d\n", ret);
3361 return ret;
3362 }
3363
3364 ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
3365 if (ret) {
3366 printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
3367 return ret;
3368 }
3369
3370 if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
3371 printk(KERN_ERR "Left over peices after removing "
3372 "overlapping\n");
3373 return -1;
3374 }
3375
3376 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3377
3378 /* Now with the extent entry offset into the bitmap */
3379 ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
3380 if (ret) {
3381 printk(KERN_ERR "Couldn't add space to the bitmap %d\n", ret);
3382 return ret;
3383 }
3384
3385 ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
3386 if (ret) {
3387 printk(KERN_ERR "Couldn't add extent to the cache %d\n", ret);
3388 return ret;
3389 }
3390
3391 ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
3392 if (ret) {
3393 printk(KERN_ERR "Problem removing overlapping space %d\n", ret);
3394 return ret;
3395 }
3396
3397 if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
3398 printk(KERN_ERR "Left something behind when removing space");
3399 return -1;
3400 }
3401
3402 /*
3403 * This has blown up in the past, the extent entry starts before the
3404 * bitmap entry, but we're trying to remove an offset that falls
3405 * completely within the bitmap range and is in both the extent entry
3406 * and the bitmap entry, looks like this
3407 *
3408 * [ extent ]
3409 * [ bitmap ]
3410 * [ del ]
3411 */
3412 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3413 ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
3414 4 * 1024 * 1024, 1);
3415 if (ret) {
3416 printk(KERN_ERR "Couldn't add bitmap %d\n", ret);
3417 return ret;
3418 }
3419
3420 ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
3421 5 * 1024 * 1024, 0);
3422 if (ret) {
3423 printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
3424 return ret;
3425 }
3426
3427 ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
3428 5 * 1024 * 1024);
3429 if (ret) {
3430 printk(KERN_ERR "Failed to free our space %d\n", ret);
3431 return ret;
3432 }
3433
3434 if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
3435 5 * 1024 * 1024)) {
3436 printk(KERN_ERR "Left stuff over\n");
3437 return -1;
3438 }
3439
3440 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3441
3442 /*
3443 * This blew up before, we have part of the free space in a bitmap and
3444 * then the entirety of the rest of the space in an extent. This used
3445 * to return -EAGAIN back from btrfs_remove_extent, make sure this
3446 * doesn't happen.
3447 */
3448 ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
3449 if (ret) {
3450 printk(KERN_ERR "Couldn't add bitmap entry %d\n", ret);
3451 return ret;
3452 }
3453
3454 ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
3455 if (ret) {
3456 printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
3457 return ret;
3458 }
3459
3460 ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
3461 if (ret) {
3462 printk(KERN_ERR "Error removing bitmap and extent "
3463 "overlapping %d\n", ret);
3464 return ret;
3465 }
3466
3467 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3468 return 0;
3469}
3470
3471void btrfs_test_free_space_cache(void)
3472{
3473 struct btrfs_block_group_cache *cache;
3474
3475 printk(KERN_ERR "Running btrfs free space cache tests\n");
3476
3477 cache = init_test_block_group();
3478 if (!cache) {
3479 printk(KERN_ERR "Couldn't run the tests\n");
3480 return;
3481 }
3482
3483 if (test_extents(cache))
3484 goto out;
3485 if (test_bitmaps(cache))
3486 goto out;
3487 if (test_bitmaps_and_extents(cache))
3488 goto out;
3489out:
3490 __btrfs_remove_free_space_cache(cache->free_space_ctl);
3491 kfree(cache->free_space_ctl);
3492 kfree(cache);
3493 printk(KERN_ERR "Free space cache tests finished\n");
3494}
3495#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 8f2613f779ed..4dc17d8809c7 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -110,4 +110,9 @@ int btrfs_return_cluster_to_free_space(
110 struct btrfs_free_cluster *cluster); 110 struct btrfs_free_cluster *cluster);
111int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 111int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
112 u64 *trimmed, u64 start, u64 end, u64 minlen); 112 u64 *trimmed, u64 start, u64 end, u64 minlen);
113
114#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
115void btrfs_test_free_space_cache(void);
116#endif
117
113#endif 118#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 48b8fda93132..e0b7034d6343 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -183,10 +183,11 @@ int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
183 return -ENOENT; 183 return -ENOENT;
184} 184}
185 185
186int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, 186static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
187 struct btrfs_root *root, 187 struct btrfs_root *root,
188 const char *name, int name_len, 188 const char *name, int name_len,
189 u64 inode_objectid, u64 ref_objectid, u64 *index) 189 u64 inode_objectid, u64 ref_objectid,
190 u64 *index)
190{ 191{
191 struct btrfs_path *path; 192 struct btrfs_path *path;
192 struct btrfs_key key; 193 struct btrfs_key key;
@@ -246,7 +247,7 @@ int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
246 memmove_extent_buffer(leaf, ptr, ptr + del_len, 247 memmove_extent_buffer(leaf, ptr, ptr + del_len,
247 item_size - (ptr + del_len - item_start)); 248 item_size - (ptr + del_len - item_start));
248 249
249 btrfs_truncate_item(trans, root, path, item_size - del_len, 1); 250 btrfs_truncate_item(root, path, item_size - del_len, 1);
250 251
251out: 252out:
252 btrfs_free_path(path); 253 btrfs_free_path(path);
@@ -309,7 +310,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
309 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]); 310 item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
310 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, 311 memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
311 item_size - (ptr + sub_item_len - item_start)); 312 item_size - (ptr + sub_item_len - item_start));
312 btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1); 313 btrfs_truncate_item(root, path, item_size - sub_item_len, 1);
313out: 314out:
314 btrfs_free_path(path); 315 btrfs_free_path(path);
315 316
@@ -361,7 +362,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
361 name, name_len, NULL)) 362 name, name_len, NULL))
362 goto out; 363 goto out;
363 364
364 btrfs_extend_item(trans, root, path, ins_len); 365 btrfs_extend_item(root, path, ins_len);
365 ret = 0; 366 ret = 0;
366 } 367 }
367 if (ret < 0) 368 if (ret < 0)
@@ -417,7 +418,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
417 goto out; 418 goto out;
418 419
419 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 420 old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
420 btrfs_extend_item(trans, root, path, ins_len); 421 btrfs_extend_item(root, path, ins_len);
421 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 422 ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
422 struct btrfs_inode_ref); 423 struct btrfs_inode_ref);
423 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size); 424 ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ca1b767d51f7..9b31b3b091fc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,6 +32,7 @@
32#include <linux/writeback.h> 32#include <linux/writeback.h>
33#include <linux/statfs.h> 33#include <linux/statfs.h>
34#include <linux/compat.h> 34#include <linux/compat.h>
35#include <linux/aio.h>
35#include <linux/bit_spinlock.h> 36#include <linux/bit_spinlock.h>
36#include <linux/xattr.h> 37#include <linux/xattr.h>
37#include <linux/posix_acl.h> 38#include <linux/posix_acl.h>
@@ -100,7 +101,10 @@ static noinline int cow_file_range(struct inode *inode,
100static struct extent_map *create_pinned_em(struct inode *inode, u64 start, 101static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
101 u64 len, u64 orig_start, 102 u64 len, u64 orig_start,
102 u64 block_start, u64 block_len, 103 u64 block_start, u64 block_len,
103 u64 orig_block_len, int type); 104 u64 orig_block_len, u64 ram_bytes,
105 int type);
106
107static int btrfs_dirty_inode(struct inode *inode);
104 108
105static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 109static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
106 struct inode *inode, struct inode *dir, 110 struct inode *inode, struct inode *dir,
@@ -353,6 +357,7 @@ static noinline int compress_file_range(struct inode *inode,
353 int i; 357 int i;
354 int will_compress; 358 int will_compress;
355 int compress_type = root->fs_info->compress_type; 359 int compress_type = root->fs_info->compress_type;
360 int redirty = 0;
356 361
357 /* if this is a small write inside eof, kick off a defrag */ 362 /* if this is a small write inside eof, kick off a defrag */
358 if ((end - start + 1) < 16 * 1024 && 363 if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +420,17 @@ again:
415 if (BTRFS_I(inode)->force_compress) 420 if (BTRFS_I(inode)->force_compress)
416 compress_type = BTRFS_I(inode)->force_compress; 421 compress_type = BTRFS_I(inode)->force_compress;
417 422
423 /*
424 * we need to call clear_page_dirty_for_io on each
425 * page in the range. Otherwise applications with the file
426 * mmap'd can wander in and change the page contents while
427 * we are compressing them.
428 *
429 * If the compression fails for any reason, we set the pages
430 * dirty again later on.
431 */
432 extent_range_clear_dirty_for_io(inode, start, end);
433 redirty = 1;
418 ret = btrfs_compress_pages(compress_type, 434 ret = btrfs_compress_pages(compress_type,
419 inode->i_mapping, start, 435 inode->i_mapping, start,
420 total_compressed, pages, 436 total_compressed, pages,
@@ -554,6 +570,8 @@ cleanup_and_bail_uncompressed:
554 __set_page_dirty_nobuffers(locked_page); 570 __set_page_dirty_nobuffers(locked_page);
555 /* unlocked later on in the async handlers */ 571 /* unlocked later on in the async handlers */
556 } 572 }
573 if (redirty)
574 extent_range_redirty_for_io(inode, start, end);
557 add_async_extent(async_cow, start, end - start + 1, 575 add_async_extent(async_cow, start, end - start + 1,
558 0, NULL, 0, BTRFS_COMPRESS_NONE); 576 0, NULL, 0, BTRFS_COMPRESS_NONE);
559 *num_added += 1; 577 *num_added += 1;
@@ -708,6 +726,7 @@ retry:
708 em->block_start = ins.objectid; 726 em->block_start = ins.objectid;
709 em->block_len = ins.offset; 727 em->block_len = ins.offset;
710 em->orig_block_len = ins.offset; 728 em->orig_block_len = ins.offset;
729 em->ram_bytes = async_extent->ram_size;
711 em->bdev = root->fs_info->fs_devices->latest_bdev; 730 em->bdev = root->fs_info->fs_devices->latest_bdev;
712 em->compress_type = async_extent->compress_type; 731 em->compress_type = async_extent->compress_type;
713 set_bit(EXTENT_FLAG_PINNED, &em->flags); 732 set_bit(EXTENT_FLAG_PINNED, &em->flags);
@@ -716,10 +735,7 @@ retry:
716 735
717 while (1) { 736 while (1) {
718 write_lock(&em_tree->lock); 737 write_lock(&em_tree->lock);
719 ret = add_extent_mapping(em_tree, em); 738 ret = add_extent_mapping(em_tree, em, 1);
720 if (!ret)
721 list_move(&em->list,
722 &em_tree->modified_extents);
723 write_unlock(&em_tree->lock); 739 write_unlock(&em_tree->lock);
724 if (ret != -EEXIST) { 740 if (ret != -EEXIST) {
725 free_extent_map(em); 741 free_extent_map(em);
@@ -907,7 +923,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
907 } 923 }
908 924
909 em = alloc_extent_map(); 925 em = alloc_extent_map();
910 BUG_ON(!em); /* -ENOMEM */ 926 if (!em)
927 goto out_reserve;
911 em->start = start; 928 em->start = start;
912 em->orig_start = em->start; 929 em->orig_start = em->start;
913 ram_size = ins.offset; 930 ram_size = ins.offset;
@@ -918,16 +935,14 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
918 em->block_start = ins.objectid; 935 em->block_start = ins.objectid;
919 em->block_len = ins.offset; 936 em->block_len = ins.offset;
920 em->orig_block_len = ins.offset; 937 em->orig_block_len = ins.offset;
938 em->ram_bytes = ram_size;
921 em->bdev = root->fs_info->fs_devices->latest_bdev; 939 em->bdev = root->fs_info->fs_devices->latest_bdev;
922 set_bit(EXTENT_FLAG_PINNED, &em->flags); 940 set_bit(EXTENT_FLAG_PINNED, &em->flags);
923 em->generation = -1; 941 em->generation = -1;
924 942
925 while (1) { 943 while (1) {
926 write_lock(&em_tree->lock); 944 write_lock(&em_tree->lock);
927 ret = add_extent_mapping(em_tree, em); 945 ret = add_extent_mapping(em_tree, em, 1);
928 if (!ret)
929 list_move(&em->list,
930 &em_tree->modified_extents);
931 write_unlock(&em_tree->lock); 946 write_unlock(&em_tree->lock);
932 if (ret != -EEXIST) { 947 if (ret != -EEXIST) {
933 free_extent_map(em); 948 free_extent_map(em);
@@ -936,11 +951,14 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
936 btrfs_drop_extent_cache(inode, start, 951 btrfs_drop_extent_cache(inode, start,
937 start + ram_size - 1, 0); 952 start + ram_size - 1, 0);
938 } 953 }
954 if (ret)
955 goto out_reserve;
939 956
940 cur_alloc_size = ins.offset; 957 cur_alloc_size = ins.offset;
941 ret = btrfs_add_ordered_extent(inode, start, ins.objectid, 958 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
942 ram_size, cur_alloc_size, 0); 959 ram_size, cur_alloc_size, 0);
943 BUG_ON(ret); /* -ENOMEM */ 960 if (ret)
961 goto out_reserve;
944 962
945 if (root->root_key.objectid == 963 if (root->root_key.objectid ==
946 BTRFS_DATA_RELOC_TREE_OBJECTID) { 964 BTRFS_DATA_RELOC_TREE_OBJECTID) {
@@ -948,7 +966,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
948 cur_alloc_size); 966 cur_alloc_size);
949 if (ret) { 967 if (ret) {
950 btrfs_abort_transaction(trans, root, ret); 968 btrfs_abort_transaction(trans, root, ret);
951 goto out_unlock; 969 goto out_reserve;
952 } 970 }
953 } 971 }
954 972
@@ -977,6 +995,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
977out: 995out:
978 return ret; 996 return ret;
979 997
998out_reserve:
999 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
980out_unlock: 1000out_unlock:
981 extent_clear_unlock_delalloc(inode, 1001 extent_clear_unlock_delalloc(inode,
982 &BTRFS_I(inode)->io_tree, 1002 &BTRFS_I(inode)->io_tree,
@@ -1180,6 +1200,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1180 u64 disk_bytenr; 1200 u64 disk_bytenr;
1181 u64 num_bytes; 1201 u64 num_bytes;
1182 u64 disk_num_bytes; 1202 u64 disk_num_bytes;
1203 u64 ram_bytes;
1183 int extent_type; 1204 int extent_type;
1184 int ret, err; 1205 int ret, err;
1185 int type; 1206 int type;
@@ -1276,6 +1297,7 @@ next_slot:
1276 struct btrfs_file_extent_item); 1297 struct btrfs_file_extent_item);
1277 extent_type = btrfs_file_extent_type(leaf, fi); 1298 extent_type = btrfs_file_extent_type(leaf, fi);
1278 1299
1300 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1279 if (extent_type == BTRFS_FILE_EXTENT_REG || 1301 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1280 extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 1302 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1281 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1303 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
@@ -1359,6 +1381,7 @@ out_check:
1359 em->block_len = num_bytes; 1381 em->block_len = num_bytes;
1360 em->block_start = disk_bytenr; 1382 em->block_start = disk_bytenr;
1361 em->orig_block_len = disk_num_bytes; 1383 em->orig_block_len = disk_num_bytes;
1384 em->ram_bytes = ram_bytes;
1362 em->bdev = root->fs_info->fs_devices->latest_bdev; 1385 em->bdev = root->fs_info->fs_devices->latest_bdev;
1363 em->mod_start = em->start; 1386 em->mod_start = em->start;
1364 em->mod_len = em->len; 1387 em->mod_len = em->len;
@@ -1367,10 +1390,7 @@ out_check:
1367 em->generation = -1; 1390 em->generation = -1;
1368 while (1) { 1391 while (1) {
1369 write_lock(&em_tree->lock); 1392 write_lock(&em_tree->lock);
1370 ret = add_extent_mapping(em_tree, em); 1393 ret = add_extent_mapping(em_tree, em, 1);
1371 if (!ret)
1372 list_move(&em->list,
1373 &em_tree->modified_extents);
1374 write_unlock(&em_tree->lock); 1394 write_unlock(&em_tree->lock);
1375 if (ret != -EEXIST) { 1395 if (ret != -EEXIST) {
1376 free_extent_map(em); 1396 free_extent_map(em);
@@ -1511,7 +1531,7 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1511 * have pending delalloc work to be done. 1531 * have pending delalloc work to be done.
1512 */ 1532 */
1513static void btrfs_set_bit_hook(struct inode *inode, 1533static void btrfs_set_bit_hook(struct inode *inode,
1514 struct extent_state *state, int *bits) 1534 struct extent_state *state, unsigned long *bits)
1515{ 1535{
1516 1536
1517 /* 1537 /*
@@ -1555,7 +1575,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
1555 * extent_io.c clear_bit_hook, see set_bit_hook for why 1575 * extent_io.c clear_bit_hook, see set_bit_hook for why
1556 */ 1576 */
1557static void btrfs_clear_bit_hook(struct inode *inode, 1577static void btrfs_clear_bit_hook(struct inode *inode,
1558 struct extent_state *state, int *bits) 1578 struct extent_state *state,
1579 unsigned long *bits)
1559{ 1580{
1560 /* 1581 /*
1561 * set_bit and clear bit hooks normally require _irqsave/restore 1582 * set_bit and clear bit hooks normally require _irqsave/restore
@@ -1743,8 +1764,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1743 struct btrfs_ordered_sum *sum; 1764 struct btrfs_ordered_sum *sum;
1744 1765
1745 list_for_each_entry(sum, list, list) { 1766 list_for_each_entry(sum, list, list) {
1767 trans->adding_csums = 1;
1746 btrfs_csum_file_blocks(trans, 1768 btrfs_csum_file_blocks(trans,
1747 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1769 BTRFS_I(inode)->root->fs_info->csum_root, sum);
1770 trans->adding_csums = 0;
1748 } 1771 }
1749 return 0; 1772 return 0;
1750} 1773}
@@ -2777,6 +2800,8 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2777 int ret; 2800 int ret;
2778 struct btrfs_root *root = BTRFS_I(inode)->root; 2801 struct btrfs_root *root = BTRFS_I(inode)->root;
2779 u32 csum = ~(u32)0; 2802 u32 csum = ~(u32)0;
2803 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
2804 DEFAULT_RATELIMIT_BURST);
2780 2805
2781 if (PageChecked(page)) { 2806 if (PageChecked(page)) {
2782 ClearPageChecked(page); 2807 ClearPageChecked(page);
@@ -2803,7 +2828,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2803 if (ret) 2828 if (ret)
2804 goto zeroit; 2829 goto zeroit;
2805 2830
2806 csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1); 2831 csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1);
2807 btrfs_csum_final(csum, (char *)&csum); 2832 btrfs_csum_final(csum, (char *)&csum);
2808 if (csum != private) 2833 if (csum != private)
2809 goto zeroit; 2834 goto zeroit;
@@ -2813,11 +2838,11 @@ good:
2813 return 0; 2838 return 0;
2814 2839
2815zeroit: 2840zeroit:
2816 printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u " 2841 if (__ratelimit(&_rs))
2817 "private %llu\n", 2842 btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
2818 (unsigned long long)btrfs_ino(page->mapping->host), 2843 (unsigned long long)btrfs_ino(page->mapping->host),
2819 (unsigned long long)start, csum, 2844 (unsigned long long)start, csum,
2820 (unsigned long long)private); 2845 (unsigned long long)private);
2821 memset(kaddr + offset, 1, end - start + 1); 2846 memset(kaddr + offset, 1, end - start + 1);
2822 flush_dcache_page(page); 2847 flush_dcache_page(page);
2823 kunmap_atomic(kaddr); 2848 kunmap_atomic(kaddr);
@@ -3003,7 +3028,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
3003 * We have done the truncate/delete so we can go ahead and remove the orphan 3028 * We have done the truncate/delete so we can go ahead and remove the orphan
3004 * item for this particular inode. 3029 * item for this particular inode.
3005 */ 3030 */
3006int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) 3031static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3032 struct inode *inode)
3007{ 3033{
3008 struct btrfs_root *root = BTRFS_I(inode)->root; 3034 struct btrfs_root *root = BTRFS_I(inode)->root;
3009 int delete_item = 0; 3035 int delete_item = 0;
@@ -3098,8 +3124,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3098 */ 3124 */
3099 3125
3100 if (found_key.offset == last_objectid) { 3126 if (found_key.offset == last_objectid) {
3101 printk(KERN_ERR "btrfs: Error removing orphan entry, " 3127 btrfs_err(root->fs_info,
3102 "stopping orphan cleanup\n"); 3128 "Error removing orphan entry, stopping orphan cleanup");
3103 ret = -EINVAL; 3129 ret = -EINVAL;
3104 goto out; 3130 goto out;
3105 } 3131 }
@@ -3156,8 +3182,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3156 ret = PTR_ERR(trans); 3182 ret = PTR_ERR(trans);
3157 goto out; 3183 goto out;
3158 } 3184 }
3159 printk(KERN_ERR "auto deleting %Lu\n", 3185 btrfs_debug(root->fs_info, "auto deleting %Lu",
3160 found_key.objectid); 3186 found_key.objectid);
3161 ret = btrfs_del_orphan_item(trans, root, 3187 ret = btrfs_del_orphan_item(trans, root,
3162 found_key.objectid); 3188 found_key.objectid);
3163 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */ 3189 BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
@@ -3221,13 +3247,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
3221 } 3247 }
3222 3248
3223 if (nr_unlink) 3249 if (nr_unlink)
3224 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); 3250 btrfs_debug(root->fs_info, "unlinked %d orphans", nr_unlink);
3225 if (nr_truncate) 3251 if (nr_truncate)
3226 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); 3252 btrfs_debug(root->fs_info, "truncated %d orphans", nr_truncate);
3227 3253
3228out: 3254out:
3229 if (ret) 3255 if (ret)
3230 printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret); 3256 btrfs_crit(root->fs_info,
3257 "could not do orphan cleanup %d", ret);
3231 btrfs_free_path(path); 3258 btrfs_free_path(path);
3232 return ret; 3259 return ret;
3233} 3260}
@@ -3575,9 +3602,10 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3575 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, 3602 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
3576 dir_ino, &index); 3603 dir_ino, &index);
3577 if (ret) { 3604 if (ret) {
3578 printk(KERN_INFO "btrfs failed to delete reference to %.*s, " 3605 btrfs_info(root->fs_info,
3579 "inode %llu parent %llu\n", name_len, name, 3606 "failed to delete reference to %.*s, inode %llu parent %llu",
3580 (unsigned long long)ino, (unsigned long long)dir_ino); 3607 name_len, name,
3608 (unsigned long long)ino, (unsigned long long)dir_ino);
3581 btrfs_abort_transaction(trans, root, ret); 3609 btrfs_abort_transaction(trans, root, ret);
3582 goto err; 3610 goto err;
3583 } 3611 }
@@ -3599,6 +3627,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3599 dir, index); 3627 dir, index);
3600 if (ret == -ENOENT) 3628 if (ret == -ENOENT)
3601 ret = 0; 3629 ret = 0;
3630 else if (ret)
3631 btrfs_abort_transaction(trans, root, ret);
3602err: 3632err:
3603 btrfs_free_path(path); 3633 btrfs_free_path(path);
3604 if (ret) 3634 if (ret)
@@ -3644,7 +3674,7 @@ static int check_path_shared(struct btrfs_root *root,
3644 eb = path->nodes[level]; 3674 eb = path->nodes[level];
3645 if (!btrfs_block_can_be_shared(root, eb)) 3675 if (!btrfs_block_can_be_shared(root, eb))
3646 continue; 3676 continue;
3647 ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, 3677 ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
3648 &refs, NULL); 3678 &refs, NULL);
3649 if (refs > 1) 3679 if (refs > 1)
3650 return 1; 3680 return 1;
@@ -3679,11 +3709,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
3679 * 1 for the dir item 3709 * 1 for the dir item
3680 * 1 for the dir index 3710 * 1 for the dir index
3681 * 1 for the inode ref 3711 * 1 for the inode ref
3682 * 1 for the inode ref in the tree log
3683 * 2 for the dir entries in the log
3684 * 1 for the inode 3712 * 1 for the inode
3685 */ 3713 */
3686 trans = btrfs_start_transaction(root, 8); 3714 trans = btrfs_start_transaction(root, 5);
3687 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) 3715 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
3688 return trans; 3716 return trans;
3689 3717
@@ -4161,8 +4189,7 @@ search_again:
4161 } 4189 }
4162 size = 4190 size =
4163 btrfs_file_extent_calc_inline_size(size); 4191 btrfs_file_extent_calc_inline_size(size);
4164 btrfs_truncate_item(trans, root, path, 4192 btrfs_truncate_item(root, path, size, 1);
4165 size, 1);
4166 } else if (root->ref_cows) { 4193 } else if (root->ref_cows) {
4167 inode_sub_bytes(inode, item_end + 1 - 4194 inode_sub_bytes(inode, item_end + 1 -
4168 found_key.offset); 4195 found_key.offset);
@@ -4436,16 +4463,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4436 hole_em->block_start = EXTENT_MAP_HOLE; 4463 hole_em->block_start = EXTENT_MAP_HOLE;
4437 hole_em->block_len = 0; 4464 hole_em->block_len = 0;
4438 hole_em->orig_block_len = 0; 4465 hole_em->orig_block_len = 0;
4466 hole_em->ram_bytes = hole_size;
4439 hole_em->bdev = root->fs_info->fs_devices->latest_bdev; 4467 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
4440 hole_em->compress_type = BTRFS_COMPRESS_NONE; 4468 hole_em->compress_type = BTRFS_COMPRESS_NONE;
4441 hole_em->generation = trans->transid; 4469 hole_em->generation = trans->transid;
4442 4470
4443 while (1) { 4471 while (1) {
4444 write_lock(&em_tree->lock); 4472 write_lock(&em_tree->lock);
4445 err = add_extent_mapping(em_tree, hole_em); 4473 err = add_extent_mapping(em_tree, hole_em, 1);
4446 if (!err)
4447 list_move(&hole_em->list,
4448 &em_tree->modified_extents);
4449 write_unlock(&em_tree->lock); 4474 write_unlock(&em_tree->lock);
4450 if (err != -EEXIST) 4475 if (err != -EEXIST)
4451 break; 4476 break;
@@ -4656,8 +4681,9 @@ void btrfs_evict_inode(struct inode *inode)
4656 ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); 4681 ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
4657 4682
4658 if (ret) { 4683 if (ret) {
4659 printk(KERN_WARNING "Could not get space for a " 4684 btrfs_warn(root->fs_info,
4660 "delete, will truncate on mount %d\n", ret); 4685 "Could not get space for a delete, will truncate on mount %d",
4686 ret);
4661 btrfs_orphan_del(NULL, inode); 4687 btrfs_orphan_del(NULL, inode);
4662 btrfs_free_block_rsv(root, rsv); 4688 btrfs_free_block_rsv(root, rsv);
4663 goto no_delete; 4689 goto no_delete;
@@ -5321,7 +5347,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
5321 * FIXME, needs more benchmarking...there are no reasons other than performance 5347 * FIXME, needs more benchmarking...there are no reasons other than performance
5322 * to keep or drop this code. 5348 * to keep or drop this code.
5323 */ 5349 */
5324int btrfs_dirty_inode(struct inode *inode) 5350static int btrfs_dirty_inode(struct inode *inode)
5325{ 5351{
5326 struct btrfs_root *root = BTRFS_I(inode)->root; 5352 struct btrfs_root *root = BTRFS_I(inode)->root;
5327 struct btrfs_trans_handle *trans; 5353 struct btrfs_trans_handle *trans;
@@ -5963,7 +5989,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree,
5963 em->block_start += start_diff; 5989 em->block_start += start_diff;
5964 em->block_len -= start_diff; 5990 em->block_len -= start_diff;
5965 } 5991 }
5966 return add_extent_mapping(em_tree, em); 5992 return add_extent_mapping(em_tree, em, 0);
5967} 5993}
5968 5994
5969static noinline int uncompress_inline(struct btrfs_path *path, 5995static noinline int uncompress_inline(struct btrfs_path *path,
@@ -6137,6 +6163,7 @@ again:
6137 goto not_found_em; 6163 goto not_found_em;
6138 } 6164 }
6139 6165
6166 em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
6140 if (found_type == BTRFS_FILE_EXTENT_REG || 6167 if (found_type == BTRFS_FILE_EXTENT_REG ||
6141 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 6168 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
6142 em->start = extent_start; 6169 em->start = extent_start;
@@ -6245,18 +6272,18 @@ not_found_em:
6245insert: 6272insert:
6246 btrfs_release_path(path); 6273 btrfs_release_path(path);
6247 if (em->start > start || extent_map_end(em) <= start) { 6274 if (em->start > start || extent_map_end(em) <= start) {
6248 printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed " 6275 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
6249 "[%llu %llu]\n", (unsigned long long)em->start, 6276 (unsigned long long)em->start,
6250 (unsigned long long)em->len, 6277 (unsigned long long)em->len,
6251 (unsigned long long)start, 6278 (unsigned long long)start,
6252 (unsigned long long)len); 6279 (unsigned long long)len);
6253 err = -EIO; 6280 err = -EIO;
6254 goto out; 6281 goto out;
6255 } 6282 }
6256 6283
6257 err = 0; 6284 err = 0;
6258 write_lock(&em_tree->lock); 6285 write_lock(&em_tree->lock);
6259 ret = add_extent_mapping(em_tree, em); 6286 ret = add_extent_mapping(em_tree, em, 0);
6260 /* it is possible that someone inserted the extent into the tree 6287 /* it is possible that someone inserted the extent into the tree
6261 * while we had the lock dropped. It is also possible that 6288 * while we had the lock dropped. It is also possible that
6262 * an overlapping map exists in the tree 6289 * an overlapping map exists in the tree
@@ -6468,7 +6495,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
6468 } 6495 }
6469 6496
6470 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 6497 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
6471 ins.offset, ins.offset, 0); 6498 ins.offset, ins.offset, ins.offset, 0);
6472 if (IS_ERR(em)) 6499 if (IS_ERR(em))
6473 goto out; 6500 goto out;
6474 6501
@@ -6488,7 +6515,9 @@ out:
6488 * block must be cow'd 6515 * block must be cow'd
6489 */ 6516 */
6490static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, 6517static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6491 struct inode *inode, u64 offset, u64 len) 6518 struct inode *inode, u64 offset, u64 *len,
6519 u64 *orig_start, u64 *orig_block_len,
6520 u64 *ram_bytes)
6492{ 6521{
6493 struct btrfs_path *path; 6522 struct btrfs_path *path;
6494 int ret; 6523 int ret;
@@ -6545,8 +6574,12 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6545 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6574 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6546 backref_offset = btrfs_file_extent_offset(leaf, fi); 6575 backref_offset = btrfs_file_extent_offset(leaf, fi);
6547 6576
6577 *orig_start = key.offset - backref_offset;
6578 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
6579 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6580
6548 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); 6581 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6549 if (extent_end < offset + len) { 6582 if (extent_end < offset + *len) {
6550 /* extent doesn't include our full range, must cow */ 6583 /* extent doesn't include our full range, must cow */
6551 goto out; 6584 goto out;
6552 } 6585 }
@@ -6570,13 +6603,14 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
6570 */ 6603 */
6571 disk_bytenr += backref_offset; 6604 disk_bytenr += backref_offset;
6572 disk_bytenr += offset - key.offset; 6605 disk_bytenr += offset - key.offset;
6573 num_bytes = min(offset + len, extent_end) - offset; 6606 num_bytes = min(offset + *len, extent_end) - offset;
6574 if (csum_exist_in_range(root, disk_bytenr, num_bytes)) 6607 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
6575 goto out; 6608 goto out;
6576 /* 6609 /*
6577 * all of the above have passed, it is safe to overwrite this extent 6610 * all of the above have passed, it is safe to overwrite this extent
6578 * without cow 6611 * without cow
6579 */ 6612 */
6613 *len = num_bytes;
6580 ret = 1; 6614 ret = 1;
6581out: 6615out:
6582 btrfs_free_path(path); 6616 btrfs_free_path(path);
@@ -6647,7 +6681,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
6647static struct extent_map *create_pinned_em(struct inode *inode, u64 start, 6681static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
6648 u64 len, u64 orig_start, 6682 u64 len, u64 orig_start,
6649 u64 block_start, u64 block_len, 6683 u64 block_start, u64 block_len,
6650 u64 orig_block_len, int type) 6684 u64 orig_block_len, u64 ram_bytes,
6685 int type)
6651{ 6686{
6652 struct extent_map_tree *em_tree; 6687 struct extent_map_tree *em_tree;
6653 struct extent_map *em; 6688 struct extent_map *em;
@@ -6668,6 +6703,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
6668 em->block_start = block_start; 6703 em->block_start = block_start;
6669 em->bdev = root->fs_info->fs_devices->latest_bdev; 6704 em->bdev = root->fs_info->fs_devices->latest_bdev;
6670 em->orig_block_len = orig_block_len; 6705 em->orig_block_len = orig_block_len;
6706 em->ram_bytes = ram_bytes;
6671 em->generation = -1; 6707 em->generation = -1;
6672 set_bit(EXTENT_FLAG_PINNED, &em->flags); 6708 set_bit(EXTENT_FLAG_PINNED, &em->flags);
6673 if (type == BTRFS_ORDERED_PREALLOC) 6709 if (type == BTRFS_ORDERED_PREALLOC)
@@ -6677,10 +6713,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
6677 btrfs_drop_extent_cache(inode, em->start, 6713 btrfs_drop_extent_cache(inode, em->start,
6678 em->start + em->len - 1, 0); 6714 em->start + em->len - 1, 0);
6679 write_lock(&em_tree->lock); 6715 write_lock(&em_tree->lock);
6680 ret = add_extent_mapping(em_tree, em); 6716 ret = add_extent_mapping(em_tree, em, 1);
6681 if (!ret)
6682 list_move(&em->list,
6683 &em_tree->modified_extents);
6684 write_unlock(&em_tree->lock); 6717 write_unlock(&em_tree->lock);
6685 } while (ret == -EEXIST); 6718 } while (ret == -EEXIST);
6686 6719
@@ -6775,7 +6808,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6775 em->block_start != EXTENT_MAP_HOLE)) { 6808 em->block_start != EXTENT_MAP_HOLE)) {
6776 int type; 6809 int type;
6777 int ret; 6810 int ret;
6778 u64 block_start; 6811 u64 block_start, orig_start, orig_block_len, ram_bytes;
6779 6812
6780 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 6813 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
6781 type = BTRFS_ORDERED_PREALLOC; 6814 type = BTRFS_ORDERED_PREALLOC;
@@ -6793,16 +6826,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6793 if (IS_ERR(trans)) 6826 if (IS_ERR(trans))
6794 goto must_cow; 6827 goto must_cow;
6795 6828
6796 if (can_nocow_odirect(trans, inode, start, len) == 1) { 6829 if (can_nocow_odirect(trans, inode, start, &len, &orig_start,
6797 u64 orig_start = em->orig_start; 6830 &orig_block_len, &ram_bytes) == 1) {
6798 u64 orig_block_len = em->orig_block_len;
6799
6800 if (type == BTRFS_ORDERED_PREALLOC) { 6831 if (type == BTRFS_ORDERED_PREALLOC) {
6801 free_extent_map(em); 6832 free_extent_map(em);
6802 em = create_pinned_em(inode, start, len, 6833 em = create_pinned_em(inode, start, len,
6803 orig_start, 6834 orig_start,
6804 block_start, len, 6835 block_start, len,
6805 orig_block_len, type); 6836 orig_block_len,
6837 ram_bytes, type);
6806 if (IS_ERR(em)) { 6838 if (IS_ERR(em)) {
6807 btrfs_end_transaction(trans, root); 6839 btrfs_end_transaction(trans, root);
6808 goto unlock_err; 6840 goto unlock_err;
@@ -6922,7 +6954,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6922 goto failed; 6954 goto failed;
6923 local_irq_save(flags); 6955 local_irq_save(flags);
6924 kaddr = kmap_atomic(page); 6956 kaddr = kmap_atomic(page);
6925 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 6957 csum = btrfs_csum_data(kaddr + bvec->bv_offset,
6926 csum, bvec->bv_len); 6958 csum, bvec->bv_len);
6927 btrfs_csum_final(csum, (char *)&csum); 6959 btrfs_csum_final(csum, (char *)&csum);
6928 kunmap_atomic(kaddr); 6960 kunmap_atomic(kaddr);
@@ -6931,11 +6963,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6931 flush_dcache_page(bvec->bv_page); 6963 flush_dcache_page(bvec->bv_page);
6932 if (csum != private) { 6964 if (csum != private) {
6933failed: 6965failed:
6934 printk(KERN_ERR "btrfs csum failed ino %llu off" 6966 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
6935 " %llu csum %u private %u\n", 6967 (unsigned long long)btrfs_ino(inode),
6936 (unsigned long long)btrfs_ino(inode), 6968 (unsigned long long)start,
6937 (unsigned long long)start, 6969 csum, (unsigned)private);
6938 csum, (unsigned)private);
6939 err = -EIO; 6970 err = -EIO;
6940 } 6971 }
6941 } 6972 }
@@ -7411,8 +7442,8 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
7411 return extent_write_full_page(tree, page, btrfs_get_extent, wbc); 7442 return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
7412} 7443}
7413 7444
7414int btrfs_writepages(struct address_space *mapping, 7445static int btrfs_writepages(struct address_space *mapping,
7415 struct writeback_control *wbc) 7446 struct writeback_control *wbc)
7416{ 7447{
7417 struct extent_io_tree *tree; 7448 struct extent_io_tree *tree;
7418 7449
@@ -7927,8 +7958,8 @@ void btrfs_destroy_inode(struct inode *inode)
7927 7958
7928 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 7959 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
7929 &BTRFS_I(inode)->runtime_flags)) { 7960 &BTRFS_I(inode)->runtime_flags)) {
7930 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", 7961 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
7931 (unsigned long long)btrfs_ino(inode)); 7962 (unsigned long long)btrfs_ino(inode));
7932 atomic_dec(&root->orphan_inodes); 7963 atomic_dec(&root->orphan_inodes);
7933 } 7964 }
7934 7965
@@ -7937,10 +7968,9 @@ void btrfs_destroy_inode(struct inode *inode)
7937 if (!ordered) 7968 if (!ordered)
7938 break; 7969 break;
7939 else { 7970 else {
7940 printk(KERN_ERR "btrfs found ordered " 7971 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
7941 "extent %llu %llu on inode cleanup\n", 7972 (unsigned long long)ordered->file_offset,
7942 (unsigned long long)ordered->file_offset, 7973 (unsigned long long)ordered->len);
7943 (unsigned long long)ordered->len);
7944 btrfs_remove_ordered_extent(inode, ordered); 7974 btrfs_remove_ordered_extent(inode, ordered);
7945 btrfs_put_ordered_extent(ordered); 7975 btrfs_put_ordered_extent(ordered);
7946 btrfs_put_ordered_extent(ordered); 7976 btrfs_put_ordered_extent(ordered);
@@ -8127,7 +8157,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8127 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items 8157 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
8128 * should cover the worst case number of items we'll modify. 8158 * should cover the worst case number of items we'll modify.
8129 */ 8159 */
8130 trans = btrfs_start_transaction(root, 20); 8160 trans = btrfs_start_transaction(root, 11);
8131 if (IS_ERR(trans)) { 8161 if (IS_ERR(trans)) {
8132 ret = PTR_ERR(trans); 8162 ret = PTR_ERR(trans);
8133 goto out_notrans; 8163 goto out_notrans;
@@ -8557,16 +8587,14 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
8557 em->block_start = ins.objectid; 8587 em->block_start = ins.objectid;
8558 em->block_len = ins.offset; 8588 em->block_len = ins.offset;
8559 em->orig_block_len = ins.offset; 8589 em->orig_block_len = ins.offset;
8590 em->ram_bytes = ins.offset;
8560 em->bdev = root->fs_info->fs_devices->latest_bdev; 8591 em->bdev = root->fs_info->fs_devices->latest_bdev;
8561 set_bit(EXTENT_FLAG_PREALLOC, &em->flags); 8592 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
8562 em->generation = trans->transid; 8593 em->generation = trans->transid;
8563 8594
8564 while (1) { 8595 while (1) {
8565 write_lock(&em_tree->lock); 8596 write_lock(&em_tree->lock);
8566 ret = add_extent_mapping(em_tree, em); 8597 ret = add_extent_mapping(em_tree, em, 1);
8567 if (!ret)
8568 list_move(&em->list,
8569 &em_tree->modified_extents);
8570 write_unlock(&em_tree->lock); 8598 write_unlock(&em_tree->lock);
8571 if (ret != -EEXIST) 8599 if (ret != -EEXIST)
8572 break; 8600 break;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2c02310ff2d9..0de4a2fcfb24 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -723,7 +723,9 @@ static noinline int btrfs_mksubvol(struct path *parent,
723 struct dentry *dentry; 723 struct dentry *dentry;
724 int error; 724 int error;
725 725
726 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 726 error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
727 if (error == -EINTR)
728 return error;
727 729
728 dentry = lookup_one_len(name, parent->dentry, namelen); 730 dentry = lookup_one_len(name, parent->dentry, namelen);
729 error = PTR_ERR(dentry); 731 error = PTR_ERR(dentry);
@@ -1152,8 +1154,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1152 u64 new_align = ~((u64)128 * 1024 - 1); 1154 u64 new_align = ~((u64)128 * 1024 - 1);
1153 struct page **pages = NULL; 1155 struct page **pages = NULL;
1154 1156
1155 if (extent_thresh == 0) 1157 if (isize == 0)
1156 extent_thresh = 256 * 1024; 1158 return 0;
1159
1160 if (range->start >= isize)
1161 return -EINVAL;
1157 1162
1158 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1163 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
1159 if (range->compress_type > BTRFS_COMPRESS_TYPES) 1164 if (range->compress_type > BTRFS_COMPRESS_TYPES)
@@ -1162,8 +1167,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1162 compress_type = range->compress_type; 1167 compress_type = range->compress_type;
1163 } 1168 }
1164 1169
1165 if (isize == 0) 1170 if (extent_thresh == 0)
1166 return 0; 1171 extent_thresh = 256 * 1024;
1167 1172
1168 /* 1173 /*
1169 * if we were not given a file, allocate a readahead 1174 * if we were not given a file, allocate a readahead
@@ -2086,7 +2091,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2086 if (err) 2091 if (err)
2087 goto out; 2092 goto out;
2088 2093
2089 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 2094 err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
2095 if (err == -EINTR)
2096 goto out;
2090 dentry = lookup_one_len(vol_args->name, parent, namelen); 2097 dentry = lookup_one_len(vol_args->name, parent, namelen);
2091 if (IS_ERR(dentry)) { 2098 if (IS_ERR(dentry)) {
2092 err = PTR_ERR(dentry); 2099 err = PTR_ERR(dentry);
@@ -2425,7 +2432,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2425 2432
2426 mutex_lock(&fs_devices->device_list_mutex); 2433 mutex_lock(&fs_devices->device_list_mutex);
2427 dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL); 2434 dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL);
2428 mutex_unlock(&fs_devices->device_list_mutex);
2429 2435
2430 if (!dev) { 2436 if (!dev) {
2431 ret = -ENODEV; 2437 ret = -ENODEV;
@@ -2449,6 +2455,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
2449 } 2455 }
2450 2456
2451out: 2457out:
2458 mutex_unlock(&fs_devices->device_list_mutex);
2452 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 2459 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
2453 ret = -EFAULT; 2460 ret = -EFAULT;
2454 2461
@@ -3003,7 +3010,7 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
3003 } 3010 }
3004} 3011}
3005 3012
3006long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 3013static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
3007{ 3014{
3008 struct btrfs_ioctl_space_args space_args; 3015 struct btrfs_ioctl_space_args space_args;
3009 struct btrfs_ioctl_space_info space; 3016 struct btrfs_ioctl_space_info space;
@@ -3693,12 +3700,11 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
3693 goto drop_write; 3700 goto drop_write;
3694 } 3701 }
3695 3702
3696 if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { 3703 down_write(&root->fs_info->subvol_sem);
3697 trans = btrfs_start_transaction(root, 2); 3704 trans = btrfs_start_transaction(root->fs_info->tree_root, 2);
3698 if (IS_ERR(trans)) { 3705 if (IS_ERR(trans)) {
3699 ret = PTR_ERR(trans); 3706 ret = PTR_ERR(trans);
3700 goto out; 3707 goto out;
3701 }
3702 } 3708 }
3703 3709
3704 switch (sa->cmd) { 3710 switch (sa->cmd) {
@@ -3708,9 +3714,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
3708 case BTRFS_QUOTA_CTL_DISABLE: 3714 case BTRFS_QUOTA_CTL_DISABLE:
3709 ret = btrfs_quota_disable(trans, root->fs_info); 3715 ret = btrfs_quota_disable(trans, root->fs_info);
3710 break; 3716 break;
3711 case BTRFS_QUOTA_CTL_RESCAN:
3712 ret = btrfs_quota_rescan(root->fs_info);
3713 break;
3714 default: 3717 default:
3715 ret = -EINVAL; 3718 ret = -EINVAL;
3716 break; 3719 break;
@@ -3719,13 +3722,12 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
3719 if (copy_to_user(arg, sa, sizeof(*sa))) 3722 if (copy_to_user(arg, sa, sizeof(*sa)))
3720 ret = -EFAULT; 3723 ret = -EFAULT;
3721 3724
3722 if (trans) { 3725 err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
3723 err = btrfs_commit_transaction(trans, root); 3726 if (err && !ret)
3724 if (err && !ret) 3727 ret = err;
3725 ret = err;
3726 }
3727out: 3728out:
3728 kfree(sa); 3729 kfree(sa);
3730 up_write(&root->fs_info->subvol_sem);
3729drop_write: 3731drop_write:
3730 mnt_drop_write_file(file); 3732 mnt_drop_write_file(file);
3731 return ret; 3733 return ret;
@@ -3877,6 +3879,64 @@ drop_write:
3877 return ret; 3879 return ret;
3878} 3880}
3879 3881
3882static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
3883{
3884 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3885 struct btrfs_ioctl_quota_rescan_args *qsa;
3886 int ret;
3887
3888 if (!capable(CAP_SYS_ADMIN))
3889 return -EPERM;
3890
3891 ret = mnt_want_write_file(file);
3892 if (ret)
3893 return ret;
3894
3895 qsa = memdup_user(arg, sizeof(*qsa));
3896 if (IS_ERR(qsa)) {
3897 ret = PTR_ERR(qsa);
3898 goto drop_write;
3899 }
3900
3901 if (qsa->flags) {
3902 ret = -EINVAL;
3903 goto out;
3904 }
3905
3906 ret = btrfs_qgroup_rescan(root->fs_info);
3907
3908out:
3909 kfree(qsa);
3910drop_write:
3911 mnt_drop_write_file(file);
3912 return ret;
3913}
3914
3915static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
3916{
3917 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3918 struct btrfs_ioctl_quota_rescan_args *qsa;
3919 int ret = 0;
3920
3921 if (!capable(CAP_SYS_ADMIN))
3922 return -EPERM;
3923
3924 qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
3925 if (!qsa)
3926 return -ENOMEM;
3927
3928 if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
3929 qsa->flags = 1;
3930 qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
3931 }
3932
3933 if (copy_to_user(arg, qsa, sizeof(*qsa)))
3934 ret = -EFAULT;
3935
3936 kfree(qsa);
3937 return ret;
3938}
3939
3880static long btrfs_ioctl_set_received_subvol(struct file *file, 3940static long btrfs_ioctl_set_received_subvol(struct file *file,
3881 void __user *arg) 3941 void __user *arg)
3882{ 3942{
@@ -4115,6 +4175,10 @@ long btrfs_ioctl(struct file *file, unsigned int
4115 return btrfs_ioctl_qgroup_create(file, argp); 4175 return btrfs_ioctl_qgroup_create(file, argp);
4116 case BTRFS_IOC_QGROUP_LIMIT: 4176 case BTRFS_IOC_QGROUP_LIMIT:
4117 return btrfs_ioctl_qgroup_limit(file, argp); 4177 return btrfs_ioctl_qgroup_limit(file, argp);
4178 case BTRFS_IOC_QUOTA_RESCAN:
4179 return btrfs_ioctl_quota_rescan(file, argp);
4180 case BTRFS_IOC_QUOTA_RESCAN_STATUS:
4181 return btrfs_ioctl_quota_rescan_status(file, argp);
4118 case BTRFS_IOC_DEV_REPLACE: 4182 case BTRFS_IOC_DEV_REPLACE:
4119 return btrfs_ioctl_dev_replace(root, argp); 4183 return btrfs_ioctl_dev_replace(root, argp);
4120 case BTRFS_IOC_GET_FSLABEL: 4184 case BTRFS_IOC_GET_FSLABEL:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index e95df435d897..01277b8f2373 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -24,7 +24,7 @@
24#include "extent_io.h" 24#include "extent_io.h"
25#include "locking.h" 25#include "locking.h"
26 26
27void btrfs_assert_tree_read_locked(struct extent_buffer *eb); 27static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
28 28
29/* 29/*
30 * if we currently have a spinning reader or writer lock 30 * if we currently have a spinning reader or writer lock
@@ -264,7 +264,7 @@ void btrfs_assert_tree_locked(struct extent_buffer *eb)
264 BUG_ON(!atomic_read(&eb->write_locks)); 264 BUG_ON(!atomic_read(&eb->write_locks));
265} 265}
266 266
267void btrfs_assert_tree_read_locked(struct extent_buffer *eb) 267static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
268{ 268{
269 BUG_ON(!atomic_read(&eb->read_locks)); 269 BUG_ON(!atomic_read(&eb->read_locks));
270} 270}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index dc08d77b717e..1ddd728541ee 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
557 INIT_LIST_HEAD(&splice); 557 INIT_LIST_HEAD(&splice);
558 INIT_LIST_HEAD(&works); 558 INIT_LIST_HEAD(&works);
559 559
560 mutex_lock(&root->fs_info->ordered_operations_mutex);
560 spin_lock(&root->fs_info->ordered_extent_lock); 561 spin_lock(&root->fs_info->ordered_extent_lock);
561 list_splice_init(&root->fs_info->ordered_extents, &splice); 562 list_splice_init(&root->fs_info->ordered_extents, &splice);
562 while (!list_empty(&splice)) { 563 while (!list_empty(&splice)) {
@@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
600 601
601 cond_resched(); 602 cond_resched();
602 } 603 }
604 mutex_unlock(&root->fs_info->ordered_operations_mutex);
603} 605}
604 606
605/* 607/*
@@ -984,7 +986,7 @@ out:
984 * be reclaimed before their checksum is actually put into the btree 986 * be reclaimed before their checksum is actually put into the btree
985 */ 987 */
986int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, 988int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
987 u32 *sum) 989 u32 *sum, int len)
988{ 990{
989 struct btrfs_ordered_sum *ordered_sum; 991 struct btrfs_ordered_sum *ordered_sum;
990 struct btrfs_sector_sum *sector_sums; 992 struct btrfs_sector_sum *sector_sums;
@@ -993,22 +995,28 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
993 unsigned long num_sectors; 995 unsigned long num_sectors;
994 unsigned long i; 996 unsigned long i;
995 u32 sectorsize = BTRFS_I(inode)->root->sectorsize; 997 u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
996 int ret = 1; 998 int index = 0;
997 999
998 ordered = btrfs_lookup_ordered_extent(inode, offset); 1000 ordered = btrfs_lookup_ordered_extent(inode, offset);
999 if (!ordered) 1001 if (!ordered)
1000 return 1; 1002 return 0;
1001 1003
1002 spin_lock_irq(&tree->lock); 1004 spin_lock_irq(&tree->lock);
1003 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 1005 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
1004 if (disk_bytenr >= ordered_sum->bytenr) { 1006 if (disk_bytenr >= ordered_sum->bytenr &&
1005 num_sectors = ordered_sum->len / sectorsize; 1007 disk_bytenr < ordered_sum->bytenr + ordered_sum->len) {
1006 sector_sums = ordered_sum->sums; 1008 i = (disk_bytenr - ordered_sum->bytenr) >>
1007 for (i = 0; i < num_sectors; i++) { 1009 inode->i_sb->s_blocksize_bits;
1010 sector_sums = ordered_sum->sums + i;
1011 num_sectors = ordered_sum->len >>
1012 inode->i_sb->s_blocksize_bits;
1013 for (; i < num_sectors; i++) {
1008 if (sector_sums[i].bytenr == disk_bytenr) { 1014 if (sector_sums[i].bytenr == disk_bytenr) {
1009 *sum = sector_sums[i].sum; 1015 sum[index] = sector_sums[i].sum;
1010 ret = 0; 1016 index++;
1011 goto out; 1017 if (index == len)
1018 goto out;
1019 disk_bytenr += sectorsize;
1012 } 1020 }
1013 } 1021 }
1014 } 1022 }
@@ -1016,7 +1024,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
1016out: 1024out:
1017 spin_unlock_irq(&tree->lock); 1025 spin_unlock_irq(&tree->lock);
1018 btrfs_put_ordered_extent(ordered); 1026 btrfs_put_ordered_extent(ordered);
1019 return ret; 1027 return index;
1020} 1028}
1021 1029
1022 1030
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8eadfe406cdd..58b0e3b0ebad 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -196,7 +196,8 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
196 u64 len); 196 u64 len);
197int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, 197int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
198 struct btrfs_ordered_extent *ordered); 198 struct btrfs_ordered_extent *ordered);
199int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 199int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
200 u32 *sum, int len);
200int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, 201int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
201 struct btrfs_root *root, int wait); 202 struct btrfs_root *root, int wait);
202void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 203void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 920957ecb27e..dc0024f17c1f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -176,7 +176,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
176 176
177 nr = btrfs_header_nritems(l); 177 nr = btrfs_header_nritems(l);
178 178
179 printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", 179 btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
180 (unsigned long long)btrfs_header_bytenr(l), nr, 180 (unsigned long long)btrfs_header_bytenr(l), nr,
181 btrfs_leaf_free_space(root, l)); 181 btrfs_leaf_free_space(root, l));
182 for (i = 0 ; i < nr ; i++) { 182 for (i = 0 ; i < nr ; i++) {
@@ -319,10 +319,9 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
319 btrfs_print_leaf(root, c); 319 btrfs_print_leaf(root, c);
320 return; 320 return;
321 } 321 }
322 printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n", 322 btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u",
323 (unsigned long long)btrfs_header_bytenr(c), 323 (unsigned long long)btrfs_header_bytenr(c),
324 level, nr, 324 level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
325 (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
326 for (i = 0; i < nr; i++) { 325 for (i = 0; i < nr; i++) {
327 btrfs_node_key_to_cpu(c, &key, i); 326 btrfs_node_key_to_cpu(c, &key, i);
328 printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n", 327 printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
index da75efe534d5..7faddfacc5bd 100644
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -19,5 +19,5 @@
19#ifndef __PRINT_TREE_ 19#ifndef __PRINT_TREE_
20#define __PRINT_TREE_ 20#define __PRINT_TREE_
21void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); 21void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
22void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t); 22void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c);
23#endif 23#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5471e47d6559..9d49c586995a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -31,13 +31,13 @@
31#include "locking.h" 31#include "locking.h"
32#include "ulist.h" 32#include "ulist.h"
33#include "backref.h" 33#include "backref.h"
34#include "extent_io.h"
34 35
35/* TODO XXX FIXME 36/* TODO XXX FIXME
36 * - subvol delete -> delete when ref goes to 0? delete limits also? 37 * - subvol delete -> delete when ref goes to 0? delete limits also?
37 * - reorganize keys 38 * - reorganize keys
38 * - compressed 39 * - compressed
39 * - sync 40 * - sync
40 * - rescan
41 * - copy also limits on subvol creation 41 * - copy also limits on subvol creation
42 * - limit 42 * - limit
43 * - caches fuer ulists 43 * - caches fuer ulists
@@ -98,7 +98,15 @@ struct btrfs_qgroup_list {
98 struct btrfs_qgroup *member; 98 struct btrfs_qgroup *member;
99}; 99};
100 100
101/* must be called with qgroup_lock held */ 101struct qgroup_rescan {
102 struct btrfs_work work;
103 struct btrfs_fs_info *fs_info;
104};
105
106static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
107 struct qgroup_rescan *qscan);
108
109/* must be called with qgroup_ioctl_lock held */
102static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 110static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
103 u64 qgroupid) 111 u64 qgroupid)
104{ 112{
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
298 } 306 }
299 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 307 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
300 ptr); 308 ptr);
301 /* FIXME read scan element */ 309 fs_info->qgroup_rescan_progress.objectid =
310 btrfs_qgroup_status_rescan(l, ptr);
311 if (fs_info->qgroup_flags &
312 BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
313 struct qgroup_rescan *qscan =
314 kmalloc(sizeof(*qscan), GFP_NOFS);
315 if (!qscan) {
316 ret = -ENOMEM;
317 goto out;
318 }
319 fs_info->qgroup_rescan_progress.type = 0;
320 fs_info->qgroup_rescan_progress.offset = 0;
321 qgroup_rescan_start(fs_info, qscan);
322 }
302 goto next1; 323 goto next1;
303 } 324 }
304 325
@@ -420,8 +441,6 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
420 qgroup = rb_entry(n, struct btrfs_qgroup, node); 441 qgroup = rb_entry(n, struct btrfs_qgroup, node);
421 rb_erase(n, &fs_info->qgroup_tree); 442 rb_erase(n, &fs_info->qgroup_tree);
422 443
423 WARN_ON(!list_empty(&qgroup->dirty));
424
425 while (!list_empty(&qgroup->groups)) { 444 while (!list_empty(&qgroup->groups)) {
426 list = list_first_entry(&qgroup->groups, 445 list = list_first_entry(&qgroup->groups,
427 struct btrfs_qgroup_list, 446 struct btrfs_qgroup_list,
@@ -721,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
721 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 740 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
722 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 741 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
723 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 742 btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
724 /* XXX scan */ 743 btrfs_set_qgroup_status_rescan(l, ptr,
744 fs_info->qgroup_rescan_progress.objectid);
725 745
726 btrfs_mark_buffer_dirty(l); 746 btrfs_mark_buffer_dirty(l);
727 747
@@ -783,19 +803,21 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
783 struct btrfs_fs_info *fs_info) 803 struct btrfs_fs_info *fs_info)
784{ 804{
785 struct btrfs_root *quota_root; 805 struct btrfs_root *quota_root;
806 struct btrfs_root *tree_root = fs_info->tree_root;
786 struct btrfs_path *path = NULL; 807 struct btrfs_path *path = NULL;
787 struct btrfs_qgroup_status_item *ptr; 808 struct btrfs_qgroup_status_item *ptr;
788 struct extent_buffer *leaf; 809 struct extent_buffer *leaf;
789 struct btrfs_key key; 810 struct btrfs_key key;
811 struct btrfs_key found_key;
812 struct btrfs_qgroup *qgroup = NULL;
790 int ret = 0; 813 int ret = 0;
814 int slot;
791 815
792 spin_lock(&fs_info->qgroup_lock); 816 mutex_lock(&fs_info->qgroup_ioctl_lock);
793 if (fs_info->quota_root) { 817 if (fs_info->quota_root) {
794 fs_info->pending_quota_state = 1; 818 fs_info->pending_quota_state = 1;
795 spin_unlock(&fs_info->qgroup_lock);
796 goto out; 819 goto out;
797 } 820 }
798 spin_unlock(&fs_info->qgroup_lock);
799 821
800 /* 822 /*
801 * initially create the quota tree 823 * initially create the quota tree
@@ -830,10 +852,57 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
830 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 852 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
831 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 853 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
832 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 854 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
833 btrfs_set_qgroup_status_scan(leaf, ptr, 0); 855 btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
834 856
835 btrfs_mark_buffer_dirty(leaf); 857 btrfs_mark_buffer_dirty(leaf);
836 858
859 key.objectid = 0;
860 key.type = BTRFS_ROOT_REF_KEY;
861 key.offset = 0;
862
863 btrfs_release_path(path);
864 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
865 if (ret > 0)
866 goto out_add_root;
867 if (ret < 0)
868 goto out_free_path;
869
870
871 while (1) {
872 slot = path->slots[0];
873 leaf = path->nodes[0];
874 btrfs_item_key_to_cpu(leaf, &found_key, slot);
875
876 if (found_key.type == BTRFS_ROOT_REF_KEY) {
877 ret = add_qgroup_item(trans, quota_root,
878 found_key.offset);
879 if (ret)
880 goto out_free_path;
881
882 qgroup = add_qgroup_rb(fs_info, found_key.offset);
883 if (IS_ERR(qgroup)) {
884 ret = PTR_ERR(qgroup);
885 goto out_free_path;
886 }
887 }
888 ret = btrfs_next_item(tree_root, path);
889 if (ret < 0)
890 goto out_free_path;
891 if (ret)
892 break;
893 }
894
895out_add_root:
896 btrfs_release_path(path);
897 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
898 if (ret)
899 goto out_free_path;
900
901 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
902 if (IS_ERR(qgroup)) {
903 ret = PTR_ERR(qgroup);
904 goto out_free_path;
905 }
837 spin_lock(&fs_info->qgroup_lock); 906 spin_lock(&fs_info->qgroup_lock);
838 fs_info->quota_root = quota_root; 907 fs_info->quota_root = quota_root;
839 fs_info->pending_quota_state = 1; 908 fs_info->pending_quota_state = 1;
@@ -847,6 +916,7 @@ out_free_root:
847 kfree(quota_root); 916 kfree(quota_root);
848 } 917 }
849out: 918out:
919 mutex_unlock(&fs_info->qgroup_ioctl_lock);
850 return ret; 920 return ret;
851} 921}
852 922
@@ -857,11 +927,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
857 struct btrfs_root *quota_root; 927 struct btrfs_root *quota_root;
858 int ret = 0; 928 int ret = 0;
859 929
930 mutex_lock(&fs_info->qgroup_ioctl_lock);
931 if (!fs_info->quota_root)
932 goto out;
860 spin_lock(&fs_info->qgroup_lock); 933 spin_lock(&fs_info->qgroup_lock);
861 if (!fs_info->quota_root) {
862 spin_unlock(&fs_info->qgroup_lock);
863 return 0;
864 }
865 fs_info->quota_enabled = 0; 934 fs_info->quota_enabled = 0;
866 fs_info->pending_quota_state = 0; 935 fs_info->pending_quota_state = 0;
867 quota_root = fs_info->quota_root; 936 quota_root = fs_info->quota_root;
@@ -869,8 +938,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
869 btrfs_free_qgroup_config(fs_info); 938 btrfs_free_qgroup_config(fs_info);
870 spin_unlock(&fs_info->qgroup_lock); 939 spin_unlock(&fs_info->qgroup_lock);
871 940
872 if (!quota_root) 941 if (!quota_root) {
873 return -EINVAL; 942 ret = -EINVAL;
943 goto out;
944 }
874 945
875 ret = btrfs_clean_quota_tree(trans, quota_root); 946 ret = btrfs_clean_quota_tree(trans, quota_root);
876 if (ret) 947 if (ret)
@@ -891,39 +962,62 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
891 free_extent_buffer(quota_root->commit_root); 962 free_extent_buffer(quota_root->commit_root);
892 kfree(quota_root); 963 kfree(quota_root);
893out: 964out:
965 mutex_unlock(&fs_info->qgroup_ioctl_lock);
894 return ret; 966 return ret;
895} 967}
896 968
897int btrfs_quota_rescan(struct btrfs_fs_info *fs_info) 969static void qgroup_dirty(struct btrfs_fs_info *fs_info,
970 struct btrfs_qgroup *qgroup)
898{ 971{
899 /* FIXME */ 972 if (list_empty(&qgroup->dirty))
900 return 0; 973 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
901} 974}
902 975
903int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 976int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
904 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 977 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
905{ 978{
906 struct btrfs_root *quota_root; 979 struct btrfs_root *quota_root;
980 struct btrfs_qgroup *parent;
981 struct btrfs_qgroup *member;
982 struct btrfs_qgroup_list *list;
907 int ret = 0; 983 int ret = 0;
908 984
985 mutex_lock(&fs_info->qgroup_ioctl_lock);
909 quota_root = fs_info->quota_root; 986 quota_root = fs_info->quota_root;
910 if (!quota_root) 987 if (!quota_root) {
911 return -EINVAL; 988 ret = -EINVAL;
989 goto out;
990 }
991 member = find_qgroup_rb(fs_info, src);
992 parent = find_qgroup_rb(fs_info, dst);
993 if (!member || !parent) {
994 ret = -EINVAL;
995 goto out;
996 }
997
998 /* check if such qgroup relation exist firstly */
999 list_for_each_entry(list, &member->groups, next_group) {
1000 if (list->group == parent) {
1001 ret = -EEXIST;
1002 goto out;
1003 }
1004 }
912 1005
913 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1006 ret = add_qgroup_relation_item(trans, quota_root, src, dst);
914 if (ret) 1007 if (ret)
915 return ret; 1008 goto out;
916 1009
917 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1010 ret = add_qgroup_relation_item(trans, quota_root, dst, src);
918 if (ret) { 1011 if (ret) {
919 del_qgroup_relation_item(trans, quota_root, src, dst); 1012 del_qgroup_relation_item(trans, quota_root, src, dst);
920 return ret; 1013 goto out;
921 } 1014 }
922 1015
923 spin_lock(&fs_info->qgroup_lock); 1016 spin_lock(&fs_info->qgroup_lock);
924 ret = add_relation_rb(quota_root->fs_info, src, dst); 1017 ret = add_relation_rb(quota_root->fs_info, src, dst);
925 spin_unlock(&fs_info->qgroup_lock); 1018 spin_unlock(&fs_info->qgroup_lock);
926 1019out:
1020 mutex_unlock(&fs_info->qgroup_ioctl_lock);
927 return ret; 1021 return ret;
928} 1022}
929 1023
@@ -931,13 +1025,34 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
931 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1025 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
932{ 1026{
933 struct btrfs_root *quota_root; 1027 struct btrfs_root *quota_root;
1028 struct btrfs_qgroup *parent;
1029 struct btrfs_qgroup *member;
1030 struct btrfs_qgroup_list *list;
934 int ret = 0; 1031 int ret = 0;
935 int err; 1032 int err;
936 1033
1034 mutex_lock(&fs_info->qgroup_ioctl_lock);
937 quota_root = fs_info->quota_root; 1035 quota_root = fs_info->quota_root;
938 if (!quota_root) 1036 if (!quota_root) {
939 return -EINVAL; 1037 ret = -EINVAL;
1038 goto out;
1039 }
1040
1041 member = find_qgroup_rb(fs_info, src);
1042 parent = find_qgroup_rb(fs_info, dst);
1043 if (!member || !parent) {
1044 ret = -EINVAL;
1045 goto out;
1046 }
940 1047
1048 /* check if such qgroup relation exist firstly */
1049 list_for_each_entry(list, &member->groups, next_group) {
1050 if (list->group == parent)
1051 goto exist;
1052 }
1053 ret = -ENOENT;
1054 goto out;
1055exist:
941 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1056 ret = del_qgroup_relation_item(trans, quota_root, src, dst);
942 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1057 err = del_qgroup_relation_item(trans, quota_root, dst, src);
943 if (err && !ret) 1058 if (err && !ret)
@@ -945,9 +1060,9 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
945 1060
946 spin_lock(&fs_info->qgroup_lock); 1061 spin_lock(&fs_info->qgroup_lock);
947 del_relation_rb(fs_info, src, dst); 1062 del_relation_rb(fs_info, src, dst);
948
949 spin_unlock(&fs_info->qgroup_lock); 1063 spin_unlock(&fs_info->qgroup_lock);
950 1064out:
1065 mutex_unlock(&fs_info->qgroup_ioctl_lock);
951 return ret; 1066 return ret;
952} 1067}
953 1068
@@ -958,11 +1073,21 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
958 struct btrfs_qgroup *qgroup; 1073 struct btrfs_qgroup *qgroup;
959 int ret = 0; 1074 int ret = 0;
960 1075
1076 mutex_lock(&fs_info->qgroup_ioctl_lock);
961 quota_root = fs_info->quota_root; 1077 quota_root = fs_info->quota_root;
962 if (!quota_root) 1078 if (!quota_root) {
963 return -EINVAL; 1079 ret = -EINVAL;
1080 goto out;
1081 }
1082 qgroup = find_qgroup_rb(fs_info, qgroupid);
1083 if (qgroup) {
1084 ret = -EEXIST;
1085 goto out;
1086 }
964 1087
965 ret = add_qgroup_item(trans, quota_root, qgroupid); 1088 ret = add_qgroup_item(trans, quota_root, qgroupid);
1089 if (ret)
1090 goto out;
966 1091
967 spin_lock(&fs_info->qgroup_lock); 1092 spin_lock(&fs_info->qgroup_lock);
968 qgroup = add_qgroup_rb(fs_info, qgroupid); 1093 qgroup = add_qgroup_rb(fs_info, qgroupid);
@@ -970,7 +1095,8 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
970 1095
971 if (IS_ERR(qgroup)) 1096 if (IS_ERR(qgroup))
972 ret = PTR_ERR(qgroup); 1097 ret = PTR_ERR(qgroup);
973 1098out:
1099 mutex_unlock(&fs_info->qgroup_ioctl_lock);
974 return ret; 1100 return ret;
975} 1101}
976 1102
@@ -981,27 +1107,32 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
981 struct btrfs_qgroup *qgroup; 1107 struct btrfs_qgroup *qgroup;
982 int ret = 0; 1108 int ret = 0;
983 1109
1110 mutex_lock(&fs_info->qgroup_ioctl_lock);
984 quota_root = fs_info->quota_root; 1111 quota_root = fs_info->quota_root;
985 if (!quota_root) 1112 if (!quota_root) {
986 return -EINVAL; 1113 ret = -EINVAL;
1114 goto out;
1115 }
987 1116
988 /* check if there are no relations to this qgroup */
989 spin_lock(&fs_info->qgroup_lock);
990 qgroup = find_qgroup_rb(fs_info, qgroupid); 1117 qgroup = find_qgroup_rb(fs_info, qgroupid);
991 if (qgroup) { 1118 if (!qgroup) {
992 if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) { 1119 ret = -ENOENT;
993 spin_unlock(&fs_info->qgroup_lock); 1120 goto out;
994 return -EBUSY; 1121 } else {
1122 /* check if there are no relations to this qgroup */
1123 if (!list_empty(&qgroup->groups) ||
1124 !list_empty(&qgroup->members)) {
1125 ret = -EBUSY;
1126 goto out;
995 } 1127 }
996 } 1128 }
997 spin_unlock(&fs_info->qgroup_lock);
998
999 ret = del_qgroup_item(trans, quota_root, qgroupid); 1129 ret = del_qgroup_item(trans, quota_root, qgroupid);
1000 1130
1001 spin_lock(&fs_info->qgroup_lock); 1131 spin_lock(&fs_info->qgroup_lock);
1002 del_qgroup_rb(quota_root->fs_info, qgroupid); 1132 del_qgroup_rb(quota_root->fs_info, qgroupid);
1003 spin_unlock(&fs_info->qgroup_lock); 1133 spin_unlock(&fs_info->qgroup_lock);
1004 1134out:
1135 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1005 return ret; 1136 return ret;
1006} 1137}
1007 1138
@@ -1009,13 +1140,22 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1009 struct btrfs_fs_info *fs_info, u64 qgroupid, 1140 struct btrfs_fs_info *fs_info, u64 qgroupid,
1010 struct btrfs_qgroup_limit *limit) 1141 struct btrfs_qgroup_limit *limit)
1011{ 1142{
1012 struct btrfs_root *quota_root = fs_info->quota_root; 1143 struct btrfs_root *quota_root;
1013 struct btrfs_qgroup *qgroup; 1144 struct btrfs_qgroup *qgroup;
1014 int ret = 0; 1145 int ret = 0;
1015 1146
1016 if (!quota_root) 1147 mutex_lock(&fs_info->qgroup_ioctl_lock);
1017 return -EINVAL; 1148 quota_root = fs_info->quota_root;
1149 if (!quota_root) {
1150 ret = -EINVAL;
1151 goto out;
1152 }
1018 1153
1154 qgroup = find_qgroup_rb(fs_info, qgroupid);
1155 if (!qgroup) {
1156 ret = -ENOENT;
1157 goto out;
1158 }
1019 ret = update_qgroup_limit_item(trans, quota_root, qgroupid, 1159 ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1020 limit->flags, limit->max_rfer, 1160 limit->flags, limit->max_rfer,
1021 limit->max_excl, limit->rsv_rfer, 1161 limit->max_excl, limit->rsv_rfer,
@@ -1027,31 +1167,17 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1027 } 1167 }
1028 1168
1029 spin_lock(&fs_info->qgroup_lock); 1169 spin_lock(&fs_info->qgroup_lock);
1030
1031 qgroup = find_qgroup_rb(fs_info, qgroupid);
1032 if (!qgroup) {
1033 ret = -ENOENT;
1034 goto unlock;
1035 }
1036 qgroup->lim_flags = limit->flags; 1170 qgroup->lim_flags = limit->flags;
1037 qgroup->max_rfer = limit->max_rfer; 1171 qgroup->max_rfer = limit->max_rfer;
1038 qgroup->max_excl = limit->max_excl; 1172 qgroup->max_excl = limit->max_excl;
1039 qgroup->rsv_rfer = limit->rsv_rfer; 1173 qgroup->rsv_rfer = limit->rsv_rfer;
1040 qgroup->rsv_excl = limit->rsv_excl; 1174 qgroup->rsv_excl = limit->rsv_excl;
1041
1042unlock:
1043 spin_unlock(&fs_info->qgroup_lock); 1175 spin_unlock(&fs_info->qgroup_lock);
1044 1176out:
1177 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1045 return ret; 1178 return ret;
1046} 1179}
1047 1180
1048static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1049 struct btrfs_qgroup *qgroup)
1050{
1051 if (list_empty(&qgroup->dirty))
1052 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1053}
1054
1055/* 1181/*
1056 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts 1182 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1057 * the modification into a list that's later used by btrfs_end_transaction to 1183 * the modification into a list that's later used by btrfs_end_transaction to
@@ -1075,6 +1201,144 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1075 return 0; 1201 return 0;
1076} 1202}
1077 1203
1204static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1205 struct ulist *roots, struct ulist *tmp,
1206 u64 seq)
1207{
1208 struct ulist_node *unode;
1209 struct ulist_iterator uiter;
1210 struct ulist_node *tmp_unode;
1211 struct ulist_iterator tmp_uiter;
1212 struct btrfs_qgroup *qg;
1213 int ret;
1214
1215 ULIST_ITER_INIT(&uiter);
1216 while ((unode = ulist_next(roots, &uiter))) {
1217 qg = find_qgroup_rb(fs_info, unode->val);
1218 if (!qg)
1219 continue;
1220
1221 ulist_reinit(tmp);
1222 /* XXX id not needed */
1223 ret = ulist_add(tmp, qg->qgroupid,
1224 (u64)(uintptr_t)qg, GFP_ATOMIC);
1225 if (ret < 0)
1226 return ret;
1227 ULIST_ITER_INIT(&tmp_uiter);
1228 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1229 struct btrfs_qgroup_list *glist;
1230
1231 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1232 if (qg->refcnt < seq)
1233 qg->refcnt = seq + 1;
1234 else
1235 ++qg->refcnt;
1236
1237 list_for_each_entry(glist, &qg->groups, next_group) {
1238 ret = ulist_add(tmp, glist->group->qgroupid,
1239 (u64)(uintptr_t)glist->group,
1240 GFP_ATOMIC);
1241 if (ret < 0)
1242 return ret;
1243 }
1244 }
1245 }
1246
1247 return 0;
1248}
1249
1250static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1251 struct ulist *roots, struct ulist *tmp,
1252 u64 seq, int sgn, u64 num_bytes,
1253 struct btrfs_qgroup *qgroup)
1254{
1255 struct ulist_node *unode;
1256 struct ulist_iterator uiter;
1257 struct btrfs_qgroup *qg;
1258 struct btrfs_qgroup_list *glist;
1259 int ret;
1260
1261 ulist_reinit(tmp);
1262 ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1263 if (ret < 0)
1264 return ret;
1265
1266 ULIST_ITER_INIT(&uiter);
1267 while ((unode = ulist_next(tmp, &uiter))) {
1268 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1269 if (qg->refcnt < seq) {
1270 /* not visited by step 1 */
1271 qg->rfer += sgn * num_bytes;
1272 qg->rfer_cmpr += sgn * num_bytes;
1273 if (roots->nnodes == 0) {
1274 qg->excl += sgn * num_bytes;
1275 qg->excl_cmpr += sgn * num_bytes;
1276 }
1277 qgroup_dirty(fs_info, qg);
1278 }
1279 WARN_ON(qg->tag >= seq);
1280 qg->tag = seq;
1281
1282 list_for_each_entry(glist, &qg->groups, next_group) {
1283 ret = ulist_add(tmp, glist->group->qgroupid,
1284 (uintptr_t)glist->group, GFP_ATOMIC);
1285 if (ret < 0)
1286 return ret;
1287 }
1288 }
1289
1290 return 0;
1291}
1292
1293static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1294 struct ulist *roots, struct ulist *tmp,
1295 u64 seq, int sgn, u64 num_bytes)
1296{
1297 struct ulist_node *unode;
1298 struct ulist_iterator uiter;
1299 struct btrfs_qgroup *qg;
1300 struct ulist_node *tmp_unode;
1301 struct ulist_iterator tmp_uiter;
1302 int ret;
1303
1304 ULIST_ITER_INIT(&uiter);
1305 while ((unode = ulist_next(roots, &uiter))) {
1306 qg = find_qgroup_rb(fs_info, unode->val);
1307 if (!qg)
1308 continue;
1309
1310 ulist_reinit(tmp);
1311 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1312 if (ret < 0)
1313 return ret;
1314
1315 ULIST_ITER_INIT(&tmp_uiter);
1316 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1317 struct btrfs_qgroup_list *glist;
1318
1319 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1320 if (qg->tag == seq)
1321 continue;
1322
1323 if (qg->refcnt - seq == roots->nnodes) {
1324 qg->excl -= sgn * num_bytes;
1325 qg->excl_cmpr -= sgn * num_bytes;
1326 qgroup_dirty(fs_info, qg);
1327 }
1328
1329 list_for_each_entry(glist, &qg->groups, next_group) {
1330 ret = ulist_add(tmp, glist->group->qgroupid,
1331 (uintptr_t)glist->group,
1332 GFP_ATOMIC);
1333 if (ret < 0)
1334 return ret;
1335 }
1336 }
1337 }
1338
1339 return 0;
1340}
1341
1078/* 1342/*
1079 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 1343 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1080 * from the fs. First, all roots referencing the extent are searched, and 1344 * from the fs. First, all roots referencing the extent are searched, and
@@ -1090,10 +1354,8 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1090 struct btrfs_root *quota_root; 1354 struct btrfs_root *quota_root;
1091 u64 ref_root; 1355 u64 ref_root;
1092 struct btrfs_qgroup *qgroup; 1356 struct btrfs_qgroup *qgroup;
1093 struct ulist_node *unode;
1094 struct ulist *roots = NULL; 1357 struct ulist *roots = NULL;
1095 struct ulist *tmp = NULL; 1358 struct ulist *tmp = NULL;
1096 struct ulist_iterator uiter;
1097 u64 seq; 1359 u64 seq;
1098 int ret = 0; 1360 int ret = 0;
1099 int sgn; 1361 int sgn;
@@ -1132,9 +1394,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1132 case BTRFS_ADD_DELAYED_REF: 1394 case BTRFS_ADD_DELAYED_REF:
1133 case BTRFS_ADD_DELAYED_EXTENT: 1395 case BTRFS_ADD_DELAYED_EXTENT:
1134 sgn = 1; 1396 sgn = 1;
1397 seq = btrfs_tree_mod_seq_prev(node->seq);
1135 break; 1398 break;
1136 case BTRFS_DROP_DELAYED_REF: 1399 case BTRFS_DROP_DELAYED_REF:
1137 sgn = -1; 1400 sgn = -1;
1401 seq = node->seq;
1138 break; 1402 break;
1139 case BTRFS_UPDATE_DELAYED_HEAD: 1403 case BTRFS_UPDATE_DELAYED_HEAD:
1140 return 0; 1404 return 0;
@@ -1142,20 +1406,37 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1142 BUG(); 1406 BUG();
1143 } 1407 }
1144 1408
1409 mutex_lock(&fs_info->qgroup_rescan_lock);
1410 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1411 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1412 mutex_unlock(&fs_info->qgroup_rescan_lock);
1413 return 0;
1414 }
1415 }
1416 mutex_unlock(&fs_info->qgroup_rescan_lock);
1417
1145 /* 1418 /*
1146 * the delayed ref sequence number we pass depends on the direction of 1419 * the delayed ref sequence number we pass depends on the direction of
1147 * the operation. for add operations, we pass (node->seq - 1) to skip 1420 * the operation. for add operations, we pass
1421 * tree_mod_log_prev_seq(node->seq) to skip
1148 * the delayed ref's current sequence number, because we need the state 1422 * the delayed ref's current sequence number, because we need the state
1149 * of the tree before the add operation. for delete operations, we pass 1423 * of the tree before the add operation. for delete operations, we pass
1150 * (node->seq) to include the delayed ref's current sequence number, 1424 * (node->seq) to include the delayed ref's current sequence number,
1151 * because we need the state of the tree after the delete operation. 1425 * because we need the state of the tree after the delete operation.
1152 */ 1426 */
1153 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, 1427 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1154 sgn > 0 ? node->seq - 1 : node->seq, &roots);
1155 if (ret < 0) 1428 if (ret < 0)
1156 goto out; 1429 return ret;
1157 1430
1431 mutex_lock(&fs_info->qgroup_rescan_lock);
1158 spin_lock(&fs_info->qgroup_lock); 1432 spin_lock(&fs_info->qgroup_lock);
1433 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1434 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1435 ret = 0;
1436 goto unlock;
1437 }
1438 }
1439
1159 quota_root = fs_info->quota_root; 1440 quota_root = fs_info->quota_root;
1160 if (!quota_root) 1441 if (!quota_root)
1161 goto unlock; 1442 goto unlock;
@@ -1175,107 +1456,29 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1175 seq = fs_info->qgroup_seq; 1456 seq = fs_info->qgroup_seq;
1176 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 1457 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1177 1458
1178 ULIST_ITER_INIT(&uiter); 1459 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1179 while ((unode = ulist_next(roots, &uiter))) { 1460 if (ret)
1180 struct ulist_node *tmp_unode; 1461 goto unlock;
1181 struct ulist_iterator tmp_uiter;
1182 struct btrfs_qgroup *qg;
1183
1184 qg = find_qgroup_rb(fs_info, unode->val);
1185 if (!qg)
1186 continue;
1187
1188 ulist_reinit(tmp);
1189 /* XXX id not needed */
1190 ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
1191 ULIST_ITER_INIT(&tmp_uiter);
1192 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1193 struct btrfs_qgroup_list *glist;
1194
1195 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1196 if (qg->refcnt < seq)
1197 qg->refcnt = seq + 1;
1198 else
1199 ++qg->refcnt;
1200
1201 list_for_each_entry(glist, &qg->groups, next_group) {
1202 ulist_add(tmp, glist->group->qgroupid,
1203 (u64)(uintptr_t)glist->group,
1204 GFP_ATOMIC);
1205 }
1206 }
1207 }
1208 1462
1209 /* 1463 /*
1210 * step 2: walk from the new root 1464 * step 2: walk from the new root
1211 */ 1465 */
1212 ulist_reinit(tmp); 1466 ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn,
1213 ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); 1467 node->num_bytes, qgroup);
1214 ULIST_ITER_INIT(&uiter); 1468 if (ret)
1215 while ((unode = ulist_next(tmp, &uiter))) { 1469 goto unlock;
1216 struct btrfs_qgroup *qg;
1217 struct btrfs_qgroup_list *glist;
1218
1219 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1220 if (qg->refcnt < seq) {
1221 /* not visited by step 1 */
1222 qg->rfer += sgn * node->num_bytes;
1223 qg->rfer_cmpr += sgn * node->num_bytes;
1224 if (roots->nnodes == 0) {
1225 qg->excl += sgn * node->num_bytes;
1226 qg->excl_cmpr += sgn * node->num_bytes;
1227 }
1228 qgroup_dirty(fs_info, qg);
1229 }
1230 WARN_ON(qg->tag >= seq);
1231 qg->tag = seq;
1232
1233 list_for_each_entry(glist, &qg->groups, next_group) {
1234 ulist_add(tmp, glist->group->qgroupid,
1235 (uintptr_t)glist->group, GFP_ATOMIC);
1236 }
1237 }
1238 1470
1239 /* 1471 /*
1240 * step 3: walk again from old refs 1472 * step 3: walk again from old refs
1241 */ 1473 */
1242 ULIST_ITER_INIT(&uiter); 1474 ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn,
1243 while ((unode = ulist_next(roots, &uiter))) { 1475 node->num_bytes);
1244 struct btrfs_qgroup *qg; 1476 if (ret)
1245 struct ulist_node *tmp_unode; 1477 goto unlock;
1246 struct ulist_iterator tmp_uiter;
1247
1248 qg = find_qgroup_rb(fs_info, unode->val);
1249 if (!qg)
1250 continue;
1251
1252 ulist_reinit(tmp);
1253 ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1254 ULIST_ITER_INIT(&tmp_uiter);
1255 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1256 struct btrfs_qgroup_list *glist;
1257
1258 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1259 if (qg->tag == seq)
1260 continue;
1261
1262 if (qg->refcnt - seq == roots->nnodes) {
1263 qg->excl -= sgn * node->num_bytes;
1264 qg->excl_cmpr -= sgn * node->num_bytes;
1265 qgroup_dirty(fs_info, qg);
1266 }
1267 1478
1268 list_for_each_entry(glist, &qg->groups, next_group) {
1269 ulist_add(tmp, glist->group->qgroupid,
1270 (uintptr_t)glist->group,
1271 GFP_ATOMIC);
1272 }
1273 }
1274 }
1275 ret = 0;
1276unlock: 1479unlock:
1277 spin_unlock(&fs_info->qgroup_lock); 1480 spin_unlock(&fs_info->qgroup_lock);
1278out: 1481 mutex_unlock(&fs_info->qgroup_rescan_lock);
1279 ulist_free(roots); 1482 ulist_free(roots);
1280 ulist_free(tmp); 1483 ulist_free(tmp);
1281 1484
@@ -1290,10 +1493,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1290{ 1493{
1291 struct btrfs_root *quota_root = fs_info->quota_root; 1494 struct btrfs_root *quota_root = fs_info->quota_root;
1292 int ret = 0; 1495 int ret = 0;
1496 int start_rescan_worker = 0;
1293 1497
1294 if (!quota_root) 1498 if (!quota_root)
1295 goto out; 1499 goto out;
1296 1500
1501 if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1502 start_rescan_worker = 1;
1503
1297 fs_info->quota_enabled = fs_info->pending_quota_state; 1504 fs_info->quota_enabled = fs_info->pending_quota_state;
1298 1505
1299 spin_lock(&fs_info->qgroup_lock); 1506 spin_lock(&fs_info->qgroup_lock);
@@ -1319,6 +1526,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1319 if (ret) 1526 if (ret)
1320 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1527 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1321 1528
1529 if (!ret && start_rescan_worker) {
1530 ret = btrfs_qgroup_rescan(fs_info);
1531 if (ret)
1532 pr_err("btrfs: start rescan quota failed: %d\n", ret);
1533 ret = 0;
1534 }
1535
1322out: 1536out:
1323 1537
1324 return ret; 1538 return ret;
@@ -1339,12 +1553,30 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1339 struct btrfs_qgroup *srcgroup; 1553 struct btrfs_qgroup *srcgroup;
1340 struct btrfs_qgroup *dstgroup; 1554 struct btrfs_qgroup *dstgroup;
1341 u32 level_size = 0; 1555 u32 level_size = 0;
1556 u64 nums;
1342 1557
1558 mutex_lock(&fs_info->qgroup_ioctl_lock);
1343 if (!fs_info->quota_enabled) 1559 if (!fs_info->quota_enabled)
1344 return 0; 1560 goto out;
1345 1561
1346 if (!quota_root) 1562 if (!quota_root) {
1347 return -EINVAL; 1563 ret = -EINVAL;
1564 goto out;
1565 }
1566
1567 if (inherit) {
1568 i_qgroups = (u64 *)(inherit + 1);
1569 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1570 2 * inherit->num_excl_copies;
1571 for (i = 0; i < nums; ++i) {
1572 srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1573 if (!srcgroup) {
1574 ret = -EINVAL;
1575 goto out;
1576 }
1577 ++i_qgroups;
1578 }
1579 }
1348 1580
1349 /* 1581 /*
1350 * create a tracking group for the subvol itself 1582 * create a tracking group for the subvol itself
@@ -1471,6 +1703,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1471unlock: 1703unlock:
1472 spin_unlock(&fs_info->qgroup_lock); 1704 spin_unlock(&fs_info->qgroup_lock);
1473out: 1705out:
1706 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1474 return ret; 1707 return ret;
1475} 1708}
1476 1709
@@ -1515,7 +1748,10 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1515 ret = -ENOMEM; 1748 ret = -ENOMEM;
1516 goto out; 1749 goto out;
1517 } 1750 }
1518 ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); 1751 ret = ulist_add(ulist, qgroup->qgroupid,
1752 (uintptr_t)qgroup, GFP_ATOMIC);
1753 if (ret < 0)
1754 goto out;
1519 ULIST_ITER_INIT(&uiter); 1755 ULIST_ITER_INIT(&uiter);
1520 while ((unode = ulist_next(ulist, &uiter))) { 1756 while ((unode = ulist_next(ulist, &uiter))) {
1521 struct btrfs_qgroup *qg; 1757 struct btrfs_qgroup *qg;
@@ -1524,25 +1760,27 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1524 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux; 1760 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1525 1761
1526 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 1762 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1527 qg->reserved + qg->rfer + num_bytes > 1763 qg->reserved + (s64)qg->rfer + num_bytes >
1528 qg->max_rfer) { 1764 qg->max_rfer) {
1529 ret = -EDQUOT; 1765 ret = -EDQUOT;
1530 goto out; 1766 goto out;
1531 } 1767 }
1532 1768
1533 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 1769 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1534 qg->reserved + qg->excl + num_bytes > 1770 qg->reserved + (s64)qg->excl + num_bytes >
1535 qg->max_excl) { 1771 qg->max_excl) {
1536 ret = -EDQUOT; 1772 ret = -EDQUOT;
1537 goto out; 1773 goto out;
1538 } 1774 }
1539 1775
1540 list_for_each_entry(glist, &qg->groups, next_group) { 1776 list_for_each_entry(glist, &qg->groups, next_group) {
1541 ulist_add(ulist, glist->group->qgroupid, 1777 ret = ulist_add(ulist, glist->group->qgroupid,
1542 (uintptr_t)glist->group, GFP_ATOMIC); 1778 (uintptr_t)glist->group, GFP_ATOMIC);
1779 if (ret < 0)
1780 goto out;
1543 } 1781 }
1544 } 1782 }
1545 1783 ret = 0;
1546 /* 1784 /*
1547 * no limits exceeded, now record the reservation into all qgroups 1785 * no limits exceeded, now record the reservation into all qgroups
1548 */ 1786 */
@@ -1571,6 +1809,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1571 struct ulist_node *unode; 1809 struct ulist_node *unode;
1572 struct ulist_iterator uiter; 1810 struct ulist_iterator uiter;
1573 u64 ref_root = root->root_key.objectid; 1811 u64 ref_root = root->root_key.objectid;
1812 int ret = 0;
1574 1813
1575 if (!is_fstree(ref_root)) 1814 if (!is_fstree(ref_root))
1576 return; 1815 return;
@@ -1593,7 +1832,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1593 btrfs_std_error(fs_info, -ENOMEM); 1832 btrfs_std_error(fs_info, -ENOMEM);
1594 goto out; 1833 goto out;
1595 } 1834 }
1596 ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC); 1835 ret = ulist_add(ulist, qgroup->qgroupid,
1836 (uintptr_t)qgroup, GFP_ATOMIC);
1837 if (ret < 0)
1838 goto out;
1597 ULIST_ITER_INIT(&uiter); 1839 ULIST_ITER_INIT(&uiter);
1598 while ((unode = ulist_next(ulist, &uiter))) { 1840 while ((unode = ulist_next(ulist, &uiter))) {
1599 struct btrfs_qgroup *qg; 1841 struct btrfs_qgroup *qg;
@@ -1604,8 +1846,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1604 qg->reserved -= num_bytes; 1846 qg->reserved -= num_bytes;
1605 1847
1606 list_for_each_entry(glist, &qg->groups, next_group) { 1848 list_for_each_entry(glist, &qg->groups, next_group) {
1607 ulist_add(ulist, glist->group->qgroupid, 1849 ret = ulist_add(ulist, glist->group->qgroupid,
1608 (uintptr_t)glist->group, GFP_ATOMIC); 1850 (uintptr_t)glist->group, GFP_ATOMIC);
1851 if (ret < 0)
1852 goto out;
1609 } 1853 }
1610 } 1854 }
1611 1855
@@ -1618,8 +1862,265 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1618{ 1862{
1619 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 1863 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1620 return; 1864 return;
1621 printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n", 1865 pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
1622 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 1866 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1623 trans->delayed_ref_elem.seq); 1867 (u32)(trans->delayed_ref_elem.seq >> 32),
1868 (u32)trans->delayed_ref_elem.seq);
1624 BUG(); 1869 BUG();
1625} 1870}
1871
1872/*
1873 * returns < 0 on error, 0 when more leafs are to be scanned.
1874 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1875 */
1876static int
1877qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
1878 struct btrfs_trans_handle *trans, struct ulist *tmp,
1879 struct extent_buffer *scratch_leaf)
1880{
1881 struct btrfs_key found;
1882 struct btrfs_fs_info *fs_info = qscan->fs_info;
1883 struct ulist *roots = NULL;
1884 struct ulist_node *unode;
1885 struct ulist_iterator uiter;
1886 struct seq_list tree_mod_seq_elem = {};
1887 u64 seq;
1888 int slot;
1889 int ret;
1890
1891 path->leave_spinning = 1;
1892 mutex_lock(&fs_info->qgroup_rescan_lock);
1893 ret = btrfs_search_slot_for_read(fs_info->extent_root,
1894 &fs_info->qgroup_rescan_progress,
1895 path, 1, 0);
1896
1897 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1898 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
1899 fs_info->qgroup_rescan_progress.type,
1900 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
1901 ret);
1902
1903 if (ret) {
1904 /*
1905 * The rescan is about to end, we will not be scanning any
1906 * further blocks. We cannot unset the RESCAN flag here, because
1907 * we want to commit the transaction if everything went well.
1908 * To make the live accounting work in this phase, we set our
1909 * scan progress pointer such that every real extent objectid
1910 * will be smaller.
1911 */
1912 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1913 btrfs_release_path(path);
1914 mutex_unlock(&fs_info->qgroup_rescan_lock);
1915 return ret;
1916 }
1917
1918 btrfs_item_key_to_cpu(path->nodes[0], &found,
1919 btrfs_header_nritems(path->nodes[0]) - 1);
1920 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1921
1922 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1923 memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1924 slot = path->slots[0];
1925 btrfs_release_path(path);
1926 mutex_unlock(&fs_info->qgroup_rescan_lock);
1927
1928 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1929 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1930 if (found.type != BTRFS_EXTENT_ITEM_KEY)
1931 continue;
1932 ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1933 tree_mod_seq_elem.seq, &roots);
1934 if (ret < 0)
1935 goto out;
1936 spin_lock(&fs_info->qgroup_lock);
1937 seq = fs_info->qgroup_seq;
1938 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1939
1940 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1941 if (ret) {
1942 spin_unlock(&fs_info->qgroup_lock);
1943 ulist_free(roots);
1944 goto out;
1945 }
1946
1947 /*
1948 * step2 of btrfs_qgroup_account_ref works from a single root,
1949 * we're doing all at once here.
1950 */
1951 ulist_reinit(tmp);
1952 ULIST_ITER_INIT(&uiter);
1953 while ((unode = ulist_next(roots, &uiter))) {
1954 struct btrfs_qgroup *qg;
1955
1956 qg = find_qgroup_rb(fs_info, unode->val);
1957 if (!qg)
1958 continue;
1959
1960 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1961 GFP_ATOMIC);
1962 if (ret < 0) {
1963 spin_unlock(&fs_info->qgroup_lock);
1964 ulist_free(roots);
1965 goto out;
1966 }
1967 }
1968
1969 /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1970 ULIST_ITER_INIT(&uiter);
1971 while ((unode = ulist_next(tmp, &uiter))) {
1972 struct btrfs_qgroup *qg;
1973 struct btrfs_qgroup_list *glist;
1974
1975 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1976 qg->rfer += found.offset;
1977 qg->rfer_cmpr += found.offset;
1978 WARN_ON(qg->tag >= seq);
1979 if (qg->refcnt - seq == roots->nnodes) {
1980 qg->excl += found.offset;
1981 qg->excl_cmpr += found.offset;
1982 }
1983 qgroup_dirty(fs_info, qg);
1984
1985 list_for_each_entry(glist, &qg->groups, next_group) {
1986 ret = ulist_add(tmp, glist->group->qgroupid,
1987 (uintptr_t)glist->group,
1988 GFP_ATOMIC);
1989 if (ret < 0) {
1990 spin_unlock(&fs_info->qgroup_lock);
1991 ulist_free(roots);
1992 goto out;
1993 }
1994 }
1995 }
1996
1997 spin_unlock(&fs_info->qgroup_lock);
1998 ulist_free(roots);
1999 ret = 0;
2000 }
2001
2002out:
2003 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2004
2005 return ret;
2006}
2007
2008static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2009{
2010 struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
2011 work);
2012 struct btrfs_path *path;
2013 struct btrfs_trans_handle *trans = NULL;
2014 struct btrfs_fs_info *fs_info = qscan->fs_info;
2015 struct ulist *tmp = NULL;
2016 struct extent_buffer *scratch_leaf = NULL;
2017 int err = -ENOMEM;
2018
2019 path = btrfs_alloc_path();
2020 if (!path)
2021 goto out;
2022 tmp = ulist_alloc(GFP_NOFS);
2023 if (!tmp)
2024 goto out;
2025 scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2026 if (!scratch_leaf)
2027 goto out;
2028
2029 err = 0;
2030 while (!err) {
2031 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2032 if (IS_ERR(trans)) {
2033 err = PTR_ERR(trans);
2034 break;
2035 }
2036 if (!fs_info->quota_enabled) {
2037 err = -EINTR;
2038 } else {
2039 err = qgroup_rescan_leaf(qscan, path, trans,
2040 tmp, scratch_leaf);
2041 }
2042 if (err > 0)
2043 btrfs_commit_transaction(trans, fs_info->fs_root);
2044 else
2045 btrfs_end_transaction(trans, fs_info->fs_root);
2046 }
2047
2048out:
2049 kfree(scratch_leaf);
2050 ulist_free(tmp);
2051 btrfs_free_path(path);
2052 kfree(qscan);
2053
2054 mutex_lock(&fs_info->qgroup_rescan_lock);
2055 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2056
2057 if (err == 2 &&
2058 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2059 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2060 } else if (err < 0) {
2061 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2062 }
2063 mutex_unlock(&fs_info->qgroup_rescan_lock);
2064
2065 if (err >= 0) {
2066 pr_info("btrfs: qgroup scan completed%s\n",
2067 err == 2 ? " (inconsistency flag cleared)" : "");
2068 } else {
2069 pr_err("btrfs: qgroup scan failed with %d\n", err);
2070 }
2071}
2072
2073static void
2074qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
2075{
2076 memset(&qscan->work, 0, sizeof(qscan->work));
2077 qscan->work.func = btrfs_qgroup_rescan_worker;
2078 qscan->fs_info = fs_info;
2079
2080 pr_info("btrfs: qgroup scan started\n");
2081 btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
2082}
2083
2084int
2085btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2086{
2087 int ret = 0;
2088 struct rb_node *n;
2089 struct btrfs_qgroup *qgroup;
2090 struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
2091
2092 if (!qscan)
2093 return -ENOMEM;
2094
2095 mutex_lock(&fs_info->qgroup_rescan_lock);
2096 spin_lock(&fs_info->qgroup_lock);
2097 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2098 ret = -EINPROGRESS;
2099 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2100 ret = -EINVAL;
2101 if (ret) {
2102 spin_unlock(&fs_info->qgroup_lock);
2103 mutex_unlock(&fs_info->qgroup_rescan_lock);
2104 kfree(qscan);
2105 return ret;
2106 }
2107
2108 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2109 memset(&fs_info->qgroup_rescan_progress, 0,
2110 sizeof(fs_info->qgroup_rescan_progress));
2111
2112 /* clear all current qgroup tracking information */
2113 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2114 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2115 qgroup->rfer = 0;
2116 qgroup->rfer_cmpr = 0;
2117 qgroup->excl = 0;
2118 qgroup->excl_cmpr = 0;
2119 }
2120 spin_unlock(&fs_info->qgroup_lock);
2121 mutex_unlock(&fs_info->qgroup_rescan_lock);
2122
2123 qgroup_rescan_start(fs_info, qscan);
2124
2125 return 0;
2126}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 9a79fb790adb..0740621daf6c 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -410,7 +410,7 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
410/* 410/*
411 * remove everything in the cache 411 * remove everything in the cache
412 */ 412 */
413void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) 413static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
414{ 414{
415 struct btrfs_stripe_hash_table *table; 415 struct btrfs_stripe_hash_table *table;
416 unsigned long flags; 416 unsigned long flags;
@@ -1010,12 +1010,12 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
1010 * this will try to merge into existing bios if possible, and returns 1010 * this will try to merge into existing bios if possible, and returns
1011 * zero if all went well. 1011 * zero if all went well.
1012 */ 1012 */
1013int rbio_add_io_page(struct btrfs_raid_bio *rbio, 1013static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1014 struct bio_list *bio_list, 1014 struct bio_list *bio_list,
1015 struct page *page, 1015 struct page *page,
1016 int stripe_nr, 1016 int stripe_nr,
1017 unsigned long page_index, 1017 unsigned long page_index,
1018 unsigned long bio_max_len) 1018 unsigned long bio_max_len)
1019{ 1019{
1020 struct bio *last = bio_list->tail; 1020 struct bio *last = bio_list->tail;
1021 u64 last_end = 0; 1021 u64 last_end = 0;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 96b93daa0bbb..1031b69252c5 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -955,10 +955,11 @@ int btrfs_reada_wait(void *handle)
955 while (atomic_read(&rc->elems)) { 955 while (atomic_read(&rc->elems)) {
956 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, 956 wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
957 5 * HZ); 957 5 * HZ);
958 dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); 958 dump_devs(rc->root->fs_info,
959 atomic_read(&rc->elems) < 10 ? 1 : 0);
959 } 960 }
960 961
961 dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0); 962 dump_devs(rc->root->fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
962 963
963 kref_put(&rc->refcnt, reada_control_release); 964 kref_put(&rc->refcnt, reada_control_release);
964 965
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b67171e6d688..704a1b8d2a2b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -326,8 +326,7 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
326 return NULL; 326 return NULL;
327} 327}
328 328
329void backref_tree_panic(struct rb_node *rb_node, int errno, 329static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
330 u64 bytenr)
331{ 330{
332 331
333 struct btrfs_fs_info *fs_info = NULL; 332 struct btrfs_fs_info *fs_info = NULL;
@@ -619,10 +618,13 @@ static noinline_for_stack
619int find_inline_backref(struct extent_buffer *leaf, int slot, 618int find_inline_backref(struct extent_buffer *leaf, int slot,
620 unsigned long *ptr, unsigned long *end) 619 unsigned long *ptr, unsigned long *end)
621{ 620{
621 struct btrfs_key key;
622 struct btrfs_extent_item *ei; 622 struct btrfs_extent_item *ei;
623 struct btrfs_tree_block_info *bi; 623 struct btrfs_tree_block_info *bi;
624 u32 item_size; 624 u32 item_size;
625 625
626 btrfs_item_key_to_cpu(leaf, &key, slot);
627
626 item_size = btrfs_item_size_nr(leaf, slot); 628 item_size = btrfs_item_size_nr(leaf, slot);
627#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 629#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
628 if (item_size < sizeof(*ei)) { 630 if (item_size < sizeof(*ei)) {
@@ -634,13 +636,18 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
634 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 636 WARN_ON(!(btrfs_extent_flags(leaf, ei) &
635 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 637 BTRFS_EXTENT_FLAG_TREE_BLOCK));
636 638
637 if (item_size <= sizeof(*ei) + sizeof(*bi)) { 639 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
640 item_size <= sizeof(*ei) + sizeof(*bi)) {
638 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 641 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
639 return 1; 642 return 1;
640 } 643 }
641 644
642 bi = (struct btrfs_tree_block_info *)(ei + 1); 645 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
643 *ptr = (unsigned long)(bi + 1); 646 bi = (struct btrfs_tree_block_info *)(ei + 1);
647 *ptr = (unsigned long)(bi + 1);
648 } else {
649 *ptr = (unsigned long)(ei + 1);
650 }
644 *end = (unsigned long)ei + item_size; 651 *end = (unsigned long)ei + item_size;
645 return 0; 652 return 0;
646} 653}
@@ -708,7 +715,7 @@ again:
708 end = 0; 715 end = 0;
709 ptr = 0; 716 ptr = 0;
710 key.objectid = cur->bytenr; 717 key.objectid = cur->bytenr;
711 key.type = BTRFS_EXTENT_ITEM_KEY; 718 key.type = BTRFS_METADATA_ITEM_KEY;
712 key.offset = (u64)-1; 719 key.offset = (u64)-1;
713 720
714 path1->search_commit_root = 1; 721 path1->search_commit_root = 1;
@@ -766,7 +773,8 @@ again:
766 break; 773 break;
767 } 774 }
768 775
769 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 776 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
777 key.type == BTRFS_METADATA_ITEM_KEY) {
770 ret = find_inline_backref(eb, path1->slots[0], 778 ret = find_inline_backref(eb, path1->slots[0],
771 &ptr, &end); 779 &ptr, &end);
772 if (ret) 780 if (ret)
@@ -1762,7 +1770,11 @@ again:
1762 1770
1763 eb = read_tree_block(dest, old_bytenr, blocksize, 1771 eb = read_tree_block(dest, old_bytenr, blocksize,
1764 old_ptr_gen); 1772 old_ptr_gen);
1765 BUG_ON(!eb); 1773 if (!eb || !extent_buffer_uptodate(eb)) {
1774 ret = (!eb) ? -ENOMEM : -EIO;
1775 free_extent_buffer(eb);
1776 return ret;
1777 }
1766 btrfs_tree_lock(eb); 1778 btrfs_tree_lock(eb);
1767 if (cow) { 1779 if (cow) {
1768 ret = btrfs_cow_block(trans, dest, eb, parent, 1780 ret = btrfs_cow_block(trans, dest, eb, parent,
@@ -1915,6 +1927,10 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1915 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 1927 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1916 blocksize = btrfs_level_size(root, i - 1); 1928 blocksize = btrfs_level_size(root, i - 1);
1917 eb = read_tree_block(root, bytenr, blocksize, ptr_gen); 1929 eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1930 if (!eb || !extent_buffer_uptodate(eb)) {
1931 free_extent_buffer(eb);
1932 return -EIO;
1933 }
1918 BUG_ON(btrfs_header_level(eb) != i - 1); 1934 BUG_ON(btrfs_header_level(eb) != i - 1);
1919 path->nodes[i - 1] = eb; 1935 path->nodes[i - 1] = eb;
1920 path->slots[i - 1] = 0; 1936 path->slots[i - 1] = 0;
@@ -2592,7 +2608,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2592 blocksize = btrfs_level_size(root, node->level); 2608 blocksize = btrfs_level_size(root, node->level);
2593 generation = btrfs_node_ptr_generation(upper->eb, slot); 2609 generation = btrfs_node_ptr_generation(upper->eb, slot);
2594 eb = read_tree_block(root, bytenr, blocksize, generation); 2610 eb = read_tree_block(root, bytenr, blocksize, generation);
2595 if (!eb) { 2611 if (!eb || !extent_buffer_uptodate(eb)) {
2612 free_extent_buffer(eb);
2596 err = -EIO; 2613 err = -EIO;
2597 goto next; 2614 goto next;
2598 } 2615 }
@@ -2753,7 +2770,10 @@ static int get_tree_block_key(struct reloc_control *rc,
2753 BUG_ON(block->key_ready); 2770 BUG_ON(block->key_ready);
2754 eb = read_tree_block(rc->extent_root, block->bytenr, 2771 eb = read_tree_block(rc->extent_root, block->bytenr,
2755 block->key.objectid, block->key.offset); 2772 block->key.objectid, block->key.offset);
2756 BUG_ON(!eb); 2773 if (!eb || !extent_buffer_uptodate(eb)) {
2774 free_extent_buffer(eb);
2775 return -EIO;
2776 }
2757 WARN_ON(btrfs_header_level(eb) != block->level); 2777 WARN_ON(btrfs_header_level(eb) != block->level);
2758 if (block->level == 0) 2778 if (block->level == 0)
2759 btrfs_item_key_to_cpu(eb, &block->key, 0); 2779 btrfs_item_key_to_cpu(eb, &block->key, 0);
@@ -2768,8 +2788,13 @@ static int reada_tree_block(struct reloc_control *rc,
2768 struct tree_block *block) 2788 struct tree_block *block)
2769{ 2789{
2770 BUG_ON(block->key_ready); 2790 BUG_ON(block->key_ready);
2771 readahead_tree_block(rc->extent_root, block->bytenr, 2791 if (block->key.type == BTRFS_METADATA_ITEM_KEY)
2772 block->key.objectid, block->key.offset); 2792 readahead_tree_block(rc->extent_root, block->bytenr,
2793 block->key.objectid,
2794 rc->extent_root->leafsize);
2795 else
2796 readahead_tree_block(rc->extent_root, block->bytenr,
2797 block->key.objectid, block->key.offset);
2773 return 0; 2798 return 0;
2774} 2799}
2775 2800
@@ -2850,7 +2875,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2850 path = btrfs_alloc_path(); 2875 path = btrfs_alloc_path();
2851 if (!path) { 2876 if (!path) {
2852 err = -ENOMEM; 2877 err = -ENOMEM;
2853 goto out_path; 2878 goto out_free_blocks;
2854 } 2879 }
2855 2880
2856 rb_node = rb_first(blocks); 2881 rb_node = rb_first(blocks);
@@ -2864,8 +2889,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2864 rb_node = rb_first(blocks); 2889 rb_node = rb_first(blocks);
2865 while (rb_node) { 2890 while (rb_node) {
2866 block = rb_entry(rb_node, struct tree_block, rb_node); 2891 block = rb_entry(rb_node, struct tree_block, rb_node);
2867 if (!block->key_ready) 2892 if (!block->key_ready) {
2868 get_tree_block_key(rc, block); 2893 err = get_tree_block_key(rc, block);
2894 if (err)
2895 goto out_free_path;
2896 }
2869 rb_node = rb_next(rb_node); 2897 rb_node = rb_next(rb_node);
2870 } 2898 }
2871 2899
@@ -2892,8 +2920,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2892out: 2920out:
2893 err = finish_pending_nodes(trans, rc, path, err); 2921 err = finish_pending_nodes(trans, rc, path, err);
2894 2922
2923out_free_path:
2895 btrfs_free_path(path); 2924 btrfs_free_path(path);
2896out_path: 2925out_free_blocks:
2897 free_block_list(blocks); 2926 free_block_list(blocks);
2898 return err; 2927 return err;
2899} 2928}
@@ -2965,7 +2994,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2965 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 2994 lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2966 while (1) { 2995 while (1) {
2967 write_lock(&em_tree->lock); 2996 write_lock(&em_tree->lock);
2968 ret = add_extent_mapping(em_tree, em); 2997 ret = add_extent_mapping(em_tree, em, 0);
2969 write_unlock(&em_tree->lock); 2998 write_unlock(&em_tree->lock);
2970 if (ret != -EEXIST) { 2999 if (ret != -EEXIST) {
2971 free_extent_map(em); 3000 free_extent_map(em);
@@ -3176,12 +3205,17 @@ static int add_tree_block(struct reloc_control *rc,
3176 eb = path->nodes[0]; 3205 eb = path->nodes[0];
3177 item_size = btrfs_item_size_nr(eb, path->slots[0]); 3206 item_size = btrfs_item_size_nr(eb, path->slots[0]);
3178 3207
3179 if (item_size >= sizeof(*ei) + sizeof(*bi)) { 3208 if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
3209 item_size >= sizeof(*ei) + sizeof(*bi)) {
3180 ei = btrfs_item_ptr(eb, path->slots[0], 3210 ei = btrfs_item_ptr(eb, path->slots[0],
3181 struct btrfs_extent_item); 3211 struct btrfs_extent_item);
3182 bi = (struct btrfs_tree_block_info *)(ei + 1); 3212 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
3213 bi = (struct btrfs_tree_block_info *)(ei + 1);
3214 level = btrfs_tree_block_level(eb, bi);
3215 } else {
3216 level = (int)extent_key->offset;
3217 }
3183 generation = btrfs_extent_generation(eb, ei); 3218 generation = btrfs_extent_generation(eb, ei);
3184 level = btrfs_tree_block_level(eb, bi);
3185 } else { 3219 } else {
3186#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3220#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3187 u64 ref_owner; 3221 u64 ref_owner;
@@ -3210,7 +3244,7 @@ static int add_tree_block(struct reloc_control *rc,
3210 return -ENOMEM; 3244 return -ENOMEM;
3211 3245
3212 block->bytenr = extent_key->objectid; 3246 block->bytenr = extent_key->objectid;
3213 block->key.objectid = extent_key->offset; 3247 block->key.objectid = rc->extent_root->leafsize;
3214 block->key.offset = generation; 3248 block->key.offset = generation;
3215 block->level = level; 3249 block->level = level;
3216 block->key_ready = 0; 3250 block->key_ready = 0;
@@ -3252,9 +3286,15 @@ static int __add_tree_block(struct reloc_control *rc,
3252 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 3286 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3253 if (ret < 0) 3287 if (ret < 0)
3254 goto out; 3288 goto out;
3255 BUG_ON(ret);
3256 3289
3257 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3290 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3291 if (ret > 0) {
3292 if (key.objectid == bytenr &&
3293 key.type == BTRFS_METADATA_ITEM_KEY)
3294 ret = 0;
3295 }
3296 BUG_ON(ret);
3297
3258 ret = add_tree_block(rc, &key, path, blocks); 3298 ret = add_tree_block(rc, &key, path, blocks);
3259out: 3299out:
3260 btrfs_free_path(path); 3300 btrfs_free_path(path);
@@ -3275,7 +3315,8 @@ static int block_use_full_backref(struct reloc_control *rc,
3275 return 1; 3315 return 1;
3276 3316
3277 ret = btrfs_lookup_extent_info(NULL, rc->extent_root, 3317 ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
3278 eb->start, eb->len, NULL, &flags); 3318 eb->start, btrfs_header_level(eb), 1,
3319 NULL, &flags);
3279 BUG_ON(ret); 3320 BUG_ON(ret);
3280 3321
3281 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 3322 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
@@ -3644,12 +3685,25 @@ next:
3644 break; 3685 break;
3645 } 3686 }
3646 3687
3647 if (key.type != BTRFS_EXTENT_ITEM_KEY || 3688 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3689 key.type != BTRFS_METADATA_ITEM_KEY) {
3690 path->slots[0]++;
3691 goto next;
3692 }
3693
3694 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
3648 key.objectid + key.offset <= rc->search_start) { 3695 key.objectid + key.offset <= rc->search_start) {
3649 path->slots[0]++; 3696 path->slots[0]++;
3650 goto next; 3697 goto next;
3651 } 3698 }
3652 3699
3700 if (key.type == BTRFS_METADATA_ITEM_KEY &&
3701 key.objectid + rc->extent_root->leafsize <=
3702 rc->search_start) {
3703 path->slots[0]++;
3704 goto next;
3705 }
3706
3653 ret = find_first_extent_bit(&rc->processed_blocks, 3707 ret = find_first_extent_bit(&rc->processed_blocks,
3654 key.objectid, &start, &end, 3708 key.objectid, &start, &end,
3655 EXTENT_DIRTY, NULL); 3709 EXTENT_DIRTY, NULL);
@@ -3658,7 +3712,11 @@ next:
3658 btrfs_release_path(path); 3712 btrfs_release_path(path);
3659 rc->search_start = end + 1; 3713 rc->search_start = end + 1;
3660 } else { 3714 } else {
3661 rc->search_start = key.objectid + key.offset; 3715 if (key.type == BTRFS_EXTENT_ITEM_KEY)
3716 rc->search_start = key.objectid + key.offset;
3717 else
3718 rc->search_start = key.objectid +
3719 rc->extent_root->leafsize;
3662 memcpy(extent_key, &key, sizeof(key)); 3720 memcpy(extent_key, &key, sizeof(key));
3663 return 0; 3721 return 0;
3664 } 3722 }
@@ -4105,10 +4163,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4105 4163
4106 while (1) { 4164 while (1) {
4107 mutex_lock(&fs_info->cleaner_mutex); 4165 mutex_lock(&fs_info->cleaner_mutex);
4108
4109 btrfs_clean_old_snapshots(fs_info->tree_root);
4110 ret = relocate_block_group(rc); 4166 ret = relocate_block_group(rc);
4111
4112 mutex_unlock(&fs_info->cleaner_mutex); 4167 mutex_unlock(&fs_info->cleaner_mutex);
4113 if (ret < 0) { 4168 if (ret < 0) {
4114 err = ret; 4169 err = ret;
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 668af537a3ea..5bf1ed57f178 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -29,9 +29,8 @@
29 * generation numbers as then we know the root was once mounted with an older 29 * generation numbers as then we know the root was once mounted with an older
30 * kernel that was not aware of the root item structure change. 30 * kernel that was not aware of the root item structure change.
31 */ 31 */
32void btrfs_read_root_item(struct btrfs_root *root, 32void btrfs_read_root_item(struct extent_buffer *eb, int slot,
33 struct extent_buffer *eb, int slot, 33 struct btrfs_root_item *item)
34 struct btrfs_root_item *item)
35{ 34{
36 uuid_le uuid; 35 uuid_le uuid;
37 int len; 36 int len;
@@ -104,7 +103,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
104 goto out; 103 goto out;
105 } 104 }
106 if (item) 105 if (item)
107 btrfs_read_root_item(root, l, slot, item); 106 btrfs_read_root_item(l, slot, item);
108 if (key) 107 if (key)
109 memcpy(key, &found_key, sizeof(found_key)); 108 memcpy(key, &found_key, sizeof(found_key));
110 109
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 53c3501fa4ca..f489e24659a4 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
542 eb = path->nodes[0]; 542 eb = path->nodes[0];
543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); 543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
544 item_size = btrfs_item_size_nr(eb, path->slots[0]); 544 item_size = btrfs_item_size_nr(eb, path->slots[0]);
545 btrfs_release_path(path);
546 545
547 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 546 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
548 do { 547 do {
@@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
558 ret < 0 ? -1 : ref_level, 557 ret < 0 ? -1 : ref_level,
559 ret < 0 ? -1 : ref_root); 558 ret < 0 ? -1 : ref_root);
560 } while (ret != 1); 559 } while (ret != 1);
560 btrfs_release_path(path);
561 } else { 561 } else {
562 btrfs_release_path(path);
562 swarn.path = path; 563 swarn.path = path;
563 swarn.dev = dev; 564 swarn.dev = dev;
564 iterate_extent_inodes(fs_info, found_key.objectid, 565 iterate_extent_inodes(fs_info, found_key.objectid,
@@ -1335,7 +1336,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1335 int page_num; 1336 int page_num;
1336 u8 calculated_csum[BTRFS_CSUM_SIZE]; 1337 u8 calculated_csum[BTRFS_CSUM_SIZE];
1337 u32 crc = ~(u32)0; 1338 u32 crc = ~(u32)0;
1338 struct btrfs_root *root = fs_info->extent_root;
1339 void *mapped_buffer; 1339 void *mapped_buffer;
1340 1340
1341 WARN_ON(!sblock->pagev[0]->page); 1341 WARN_ON(!sblock->pagev[0]->page);
@@ -1364,12 +1364,11 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1364 1364
1365 for (page_num = 0;;) { 1365 for (page_num = 0;;) {
1366 if (page_num == 0 && is_metadata) 1366 if (page_num == 0 && is_metadata)
1367 crc = btrfs_csum_data(root, 1367 crc = btrfs_csum_data(
1368 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE, 1368 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1369 crc, PAGE_SIZE - BTRFS_CSUM_SIZE); 1369 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1370 else 1370 else
1371 crc = btrfs_csum_data(root, mapped_buffer, crc, 1371 crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
1372 PAGE_SIZE);
1373 1372
1374 kunmap_atomic(mapped_buffer); 1373 kunmap_atomic(mapped_buffer);
1375 page_num++; 1374 page_num++;
@@ -1656,7 +1655,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1656 void *buffer; 1655 void *buffer;
1657 u32 crc = ~(u32)0; 1656 u32 crc = ~(u32)0;
1658 int fail = 0; 1657 int fail = 0;
1659 struct btrfs_root *root = sctx->dev_root;
1660 u64 len; 1658 u64 len;
1661 int index; 1659 int index;
1662 1660
@@ -1673,7 +1671,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
1673 for (;;) { 1671 for (;;) {
1674 u64 l = min_t(u64, len, PAGE_SIZE); 1672 u64 l = min_t(u64, len, PAGE_SIZE);
1675 1673
1676 crc = btrfs_csum_data(root, buffer, crc, l); 1674 crc = btrfs_csum_data(buffer, crc, l);
1677 kunmap_atomic(buffer); 1675 kunmap_atomic(buffer);
1678 len -= l; 1676 len -= l;
1679 if (len == 0) 1677 if (len == 0)
@@ -1743,7 +1741,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
1743 for (;;) { 1741 for (;;) {
1744 u64 l = min_t(u64, len, mapped_size); 1742 u64 l = min_t(u64, len, mapped_size);
1745 1743
1746 crc = btrfs_csum_data(root, p, crc, l); 1744 crc = btrfs_csum_data(p, crc, l);
1747 kunmap_atomic(mapped_buffer); 1745 kunmap_atomic(mapped_buffer);
1748 len -= l; 1746 len -= l;
1749 if (len == 0) 1747 if (len == 0)
@@ -1804,7 +1802,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
1804 for (;;) { 1802 for (;;) {
1805 u64 l = min_t(u64, len, mapped_size); 1803 u64 l = min_t(u64, len, mapped_size);
1806 1804
1807 crc = btrfs_csum_data(root, p, crc, l); 1805 crc = btrfs_csum_data(p, crc, l);
1808 kunmap_atomic(mapped_buffer); 1806 kunmap_atomic(mapped_buffer);
1809 len -= l; 1807 len -= l;
1810 if (len == 0) 1808 if (len == 0)
@@ -2235,12 +2233,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2235 u64 flags; 2233 u64 flags;
2236 int ret; 2234 int ret;
2237 int slot; 2235 int slot;
2238 int i;
2239 u64 nstripes; 2236 u64 nstripes;
2240 struct extent_buffer *l; 2237 struct extent_buffer *l;
2241 struct btrfs_key key; 2238 struct btrfs_key key;
2242 u64 physical; 2239 u64 physical;
2243 u64 logical; 2240 u64 logical;
2241 u64 logic_end;
2244 u64 generation; 2242 u64 generation;
2245 int mirror_num; 2243 int mirror_num;
2246 struct reada_control *reada1; 2244 struct reada_control *reada1;
@@ -2254,6 +2252,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2254 u64 extent_len; 2252 u64 extent_len;
2255 struct btrfs_device *extent_dev; 2253 struct btrfs_device *extent_dev;
2256 int extent_mirror_num; 2254 int extent_mirror_num;
2255 int stop_loop;
2257 2256
2258 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | 2257 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2259 BTRFS_BLOCK_GROUP_RAID6)) { 2258 BTRFS_BLOCK_GROUP_RAID6)) {
@@ -2314,8 +2313,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2314 key_start.type = BTRFS_EXTENT_ITEM_KEY; 2313 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2315 key_start.offset = (u64)0; 2314 key_start.offset = (u64)0;
2316 key_end.objectid = base + offset + nstripes * increment; 2315 key_end.objectid = base + offset + nstripes * increment;
2317 key_end.type = BTRFS_EXTENT_ITEM_KEY; 2316 key_end.type = BTRFS_METADATA_ITEM_KEY;
2318 key_end.offset = (u64)0; 2317 key_end.offset = (u64)-1;
2319 reada1 = btrfs_reada_add(root, &key_start, &key_end); 2318 reada1 = btrfs_reada_add(root, &key_start, &key_end);
2320 2319
2321 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID; 2320 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -2353,8 +2352,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2353 */ 2352 */
2354 logical = base + offset; 2353 logical = base + offset;
2355 physical = map->stripes[num].physical; 2354 physical = map->stripes[num].physical;
2355 logic_end = logical + increment * nstripes;
2356 ret = 0; 2356 ret = 0;
2357 for (i = 0; i < nstripes; ++i) { 2357 while (logical < logic_end) {
2358 /* 2358 /*
2359 * canceled? 2359 * canceled?
2360 */ 2360 */
@@ -2390,19 +2390,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2390 wake_up(&fs_info->scrub_pause_wait); 2390 wake_up(&fs_info->scrub_pause_wait);
2391 } 2391 }
2392 2392
2393 ret = btrfs_lookup_csums_range(csum_root, logical,
2394 logical + map->stripe_len - 1,
2395 &sctx->csum_list, 1);
2396 if (ret)
2397 goto out;
2398
2399 key.objectid = logical; 2393 key.objectid = logical;
2400 key.type = BTRFS_EXTENT_ITEM_KEY; 2394 key.type = BTRFS_EXTENT_ITEM_KEY;
2401 key.offset = (u64)0; 2395 key.offset = (u64)-1;
2402 2396
2403 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2397 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2404 if (ret < 0) 2398 if (ret < 0)
2405 goto out; 2399 goto out;
2400
2406 if (ret > 0) { 2401 if (ret > 0) {
2407 ret = btrfs_previous_item(root, path, 0, 2402 ret = btrfs_previous_item(root, path, 0,
2408 BTRFS_EXTENT_ITEM_KEY); 2403 BTRFS_EXTENT_ITEM_KEY);
@@ -2419,7 +2414,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2419 } 2414 }
2420 } 2415 }
2421 2416
2417 stop_loop = 0;
2422 while (1) { 2418 while (1) {
2419 u64 bytes;
2420
2423 l = path->nodes[0]; 2421 l = path->nodes[0];
2424 slot = path->slots[0]; 2422 slot = path->slots[0];
2425 if (slot >= btrfs_header_nritems(l)) { 2423 if (slot >= btrfs_header_nritems(l)) {
@@ -2429,19 +2427,30 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2429 if (ret < 0) 2427 if (ret < 0)
2430 goto out; 2428 goto out;
2431 2429
2430 stop_loop = 1;
2432 break; 2431 break;
2433 } 2432 }
2434 btrfs_item_key_to_cpu(l, &key, slot); 2433 btrfs_item_key_to_cpu(l, &key, slot);
2435 2434
2436 if (key.objectid + key.offset <= logical) 2435 if (key.type == BTRFS_METADATA_ITEM_KEY)
2437 goto next; 2436 bytes = root->leafsize;
2437 else
2438 bytes = key.offset;
2438 2439
2439 if (key.objectid >= logical + map->stripe_len) 2440 if (key.objectid + bytes <= logical)
2440 break; 2441 goto next;
2441 2442
2442 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) 2443 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2444 key.type != BTRFS_METADATA_ITEM_KEY)
2443 goto next; 2445 goto next;
2444 2446
2447 if (key.objectid >= logical + map->stripe_len) {
2448 /* out of this device extent */
2449 if (key.objectid >= logic_end)
2450 stop_loop = 1;
2451 break;
2452 }
2453
2445 extent = btrfs_item_ptr(l, slot, 2454 extent = btrfs_item_ptr(l, slot,
2446 struct btrfs_extent_item); 2455 struct btrfs_extent_item);
2447 flags = btrfs_extent_flags(l, extent); 2456 flags = btrfs_extent_flags(l, extent);
@@ -2457,22 +2466,24 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2457 goto next; 2466 goto next;
2458 } 2467 }
2459 2468
2469again:
2470 extent_logical = key.objectid;
2471 extent_len = bytes;
2472
2460 /* 2473 /*
2461 * trim extent to this stripe 2474 * trim extent to this stripe
2462 */ 2475 */
2463 if (key.objectid < logical) { 2476 if (extent_logical < logical) {
2464 key.offset -= logical - key.objectid; 2477 extent_len -= logical - extent_logical;
2465 key.objectid = logical; 2478 extent_logical = logical;
2466 } 2479 }
2467 if (key.objectid + key.offset > 2480 if (extent_logical + extent_len >
2468 logical + map->stripe_len) { 2481 logical + map->stripe_len) {
2469 key.offset = logical + map->stripe_len - 2482 extent_len = logical + map->stripe_len -
2470 key.objectid; 2483 extent_logical;
2471 } 2484 }
2472 2485
2473 extent_logical = key.objectid; 2486 extent_physical = extent_logical - logical + physical;
2474 extent_physical = key.objectid - logical + physical;
2475 extent_len = key.offset;
2476 extent_dev = scrub_dev; 2487 extent_dev = scrub_dev;
2477 extent_mirror_num = mirror_num; 2488 extent_mirror_num = mirror_num;
2478 if (is_dev_replace) 2489 if (is_dev_replace)
@@ -2480,13 +2491,35 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2480 extent_len, &extent_physical, 2491 extent_len, &extent_physical,
2481 &extent_dev, 2492 &extent_dev,
2482 &extent_mirror_num); 2493 &extent_mirror_num);
2494
2495 ret = btrfs_lookup_csums_range(csum_root, logical,
2496 logical + map->stripe_len - 1,
2497 &sctx->csum_list, 1);
2498 if (ret)
2499 goto out;
2500
2483 ret = scrub_extent(sctx, extent_logical, extent_len, 2501 ret = scrub_extent(sctx, extent_logical, extent_len,
2484 extent_physical, extent_dev, flags, 2502 extent_physical, extent_dev, flags,
2485 generation, extent_mirror_num, 2503 generation, extent_mirror_num,
2486 key.objectid - logical + physical); 2504 extent_physical);
2487 if (ret) 2505 if (ret)
2488 goto out; 2506 goto out;
2489 2507
2508 if (extent_logical + extent_len <
2509 key.objectid + bytes) {
2510 logical += increment;
2511 physical += map->stripe_len;
2512
2513 if (logical < key.objectid + bytes) {
2514 cond_resched();
2515 goto again;
2516 }
2517
2518 if (logical >= logic_end) {
2519 stop_loop = 1;
2520 break;
2521 }
2522 }
2490next: 2523next:
2491 path->slots[0]++; 2524 path->slots[0]++;
2492 } 2525 }
@@ -2494,8 +2527,14 @@ next:
2494 logical += increment; 2527 logical += increment;
2495 physical += map->stripe_len; 2528 physical += map->stripe_len;
2496 spin_lock(&sctx->stat_lock); 2529 spin_lock(&sctx->stat_lock);
2497 sctx->stat.last_physical = physical; 2530 if (stop_loop)
2531 sctx->stat.last_physical = map->stripes[num].physical +
2532 length;
2533 else
2534 sctx->stat.last_physical = physical;
2498 spin_unlock(&sctx->stat_lock); 2535 spin_unlock(&sctx->stat_lock);
2536 if (stop_loop)
2537 break;
2499 } 2538 }
2500out: 2539out:
2501 /* push queued extents */ 2540 /* push queued extents */
@@ -3004,28 +3043,6 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
3004 return 0; 3043 return 0;
3005} 3044}
3006 3045
3007int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
3008{
3009 struct btrfs_fs_info *fs_info = root->fs_info;
3010 struct btrfs_device *dev;
3011 int ret;
3012
3013 /*
3014 * we have to hold the device_list_mutex here so the device
3015 * does not go away in cancel_dev. FIXME: find a better solution
3016 */
3017 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3018 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
3019 if (!dev) {
3020 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3021 return -ENODEV;
3022 }
3023 ret = btrfs_scrub_cancel_dev(fs_info, dev);
3024 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3025
3026 return ret;
3027}
3028
3029int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, 3046int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3030 struct btrfs_scrub_progress *progress) 3047 struct btrfs_scrub_progress *progress)
3031{ 3048{
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f7a8b861058b..ff40f1c00ce3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -387,7 +387,7 @@ static struct btrfs_path *alloc_path_for_send(void)
387 return path; 387 return path;
388} 388}
389 389
390int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) 390static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
391{ 391{
392 int ret; 392 int ret;
393 mm_segment_t old_fs; 393 mm_segment_t old_fs;
@@ -3479,7 +3479,6 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
3479 struct send_ctx *sctx = ctx; 3479 struct send_ctx *sctx = ctx;
3480 char *found_data = NULL; 3480 char *found_data = NULL;
3481 int found_data_len = 0; 3481 int found_data_len = 0;
3482 struct fs_path *p = NULL;
3483 3482
3484 ret = find_xattr(sctx, sctx->parent_root, sctx->right_path, 3483 ret = find_xattr(sctx, sctx->parent_root, sctx->right_path,
3485 sctx->cmp_key, name, name_len, &found_data, 3484 sctx->cmp_key, name, name_len, &found_data,
@@ -3498,7 +3497,6 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
3498 } 3497 }
3499 3498
3500 kfree(found_data); 3499 kfree(found_data);
3501 fs_path_free(sctx, p);
3502 return ret; 3500 return ret;
3503} 3501}
3504 3502
@@ -3945,12 +3943,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,
3945 found_key.type != key.type) { 3943 found_key.type != key.type) {
3946 key.offset += right_len; 3944 key.offset += right_len;
3947 break; 3945 break;
3948 } else { 3946 }
3949 if (found_key.offset != key.offset + right_len) { 3947 if (found_key.offset != key.offset + right_len) {
3950 /* Should really not happen */ 3948 ret = 0;
3951 ret = -EIO; 3949 goto out;
3952 goto out;
3953 }
3954 } 3950 }
3955 key = found_key; 3951 key = found_key;
3956 } 3952 }
@@ -4531,9 +4527,11 @@ static int send_subvol(struct send_ctx *sctx)
4531{ 4527{
4532 int ret; 4528 int ret;
4533 4529
4534 ret = send_header(sctx); 4530 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
4535 if (ret < 0) 4531 ret = send_header(sctx);
4536 goto out; 4532 if (ret < 0)
4533 goto out;
4534 }
4537 4535
4538 ret = send_subvol_begin(sctx); 4536 ret = send_subvol_begin(sctx);
4539 if (ret < 0) 4537 if (ret < 0)
@@ -4595,7 +4593,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4595 goto out; 4593 goto out;
4596 } 4594 }
4597 4595
4598 if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) { 4596 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
4599 ret = -EINVAL; 4597 ret = -EINVAL;
4600 goto out; 4598 goto out;
4601 } 4599 }
@@ -4614,8 +4612,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4614 sctx->flags = arg->flags; 4612 sctx->flags = arg->flags;
4615 4613
4616 sctx->send_filp = fget(arg->send_fd); 4614 sctx->send_filp = fget(arg->send_fd);
4617 if (IS_ERR(sctx->send_filp)) { 4615 if (!sctx->send_filp) {
4618 ret = PTR_ERR(sctx->send_filp); 4616 ret = -EBADF;
4619 goto out; 4617 goto out;
4620 } 4618 }
4621 4619
@@ -4706,12 +4704,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4706 if (ret < 0) 4704 if (ret < 0)
4707 goto out; 4705 goto out;
4708 4706
4709 ret = begin_cmd(sctx, BTRFS_SEND_C_END); 4707 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
4710 if (ret < 0) 4708 ret = begin_cmd(sctx, BTRFS_SEND_C_END);
4711 goto out; 4709 if (ret < 0)
4712 ret = send_cmd(sctx); 4710 goto out;
4713 if (ret < 0) 4711 ret = send_cmd(sctx);
4714 goto out; 4712 if (ret < 0)
4713 goto out;
4714 }
4715 4715
4716out: 4716out:
4717 kfree(arg); 4717 kfree(arg);
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 8bb18f7ccaa6..48d425aef05b 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -131,5 +131,4 @@ enum {
131 131
132#ifdef __KERNEL__ 132#ifdef __KERNEL__
133long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); 133long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
134int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off);
135#endif 134#endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f6b88595f858..a4807ced23cc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -56,6 +56,7 @@
56#include "compression.h" 56#include "compression.h"
57#include "rcu-string.h" 57#include "rcu-string.h"
58#include "dev-replace.h" 58#include "dev-replace.h"
59#include "free-space-cache.h"
59 60
60#define CREATE_TRACE_POINTS 61#define CREATE_TRACE_POINTS
61#include <trace/events/btrfs.h> 62#include <trace/events/btrfs.h>
@@ -63,9 +64,9 @@
63static const struct super_operations btrfs_super_ops; 64static const struct super_operations btrfs_super_ops;
64static struct file_system_type btrfs_fs_type; 65static struct file_system_type btrfs_fs_type;
65 66
66static const char *btrfs_decode_error(int errno, char nbuf[16]) 67static const char *btrfs_decode_error(int errno)
67{ 68{
68 char *errstr = NULL; 69 char *errstr = "unknown";
69 70
70 switch (errno) { 71 switch (errno) {
71 case -EIO: 72 case -EIO:
@@ -80,18 +81,18 @@ static const char *btrfs_decode_error(int errno, char nbuf[16])
80 case -EEXIST: 81 case -EEXIST:
81 errstr = "Object already exists"; 82 errstr = "Object already exists";
82 break; 83 break;
83 default: 84 case -ENOSPC:
84 if (nbuf) { 85 errstr = "No space left";
85 if (snprintf(nbuf, 16, "error %d", -errno) >= 0) 86 break;
86 errstr = nbuf; 87 case -ENOENT:
87 } 88 errstr = "No such entry";
88 break; 89 break;
89 } 90 }
90 91
91 return errstr; 92 return errstr;
92} 93}
93 94
94static void __save_error_info(struct btrfs_fs_info *fs_info) 95static void save_error_info(struct btrfs_fs_info *fs_info)
95{ 96{
96 /* 97 /*
97 * today we only save the error info into ram. Long term we'll 98 * today we only save the error info into ram. Long term we'll
@@ -100,11 +101,6 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
100 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); 101 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
101} 102}
102 103
103static void save_error_info(struct btrfs_fs_info *fs_info)
104{
105 __save_error_info(fs_info);
106}
107
108/* btrfs handle error by forcing the filesystem readonly */ 104/* btrfs handle error by forcing the filesystem readonly */
109static void btrfs_handle_error(struct btrfs_fs_info *fs_info) 105static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
110{ 106{
@@ -115,7 +111,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
115 111
116 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 112 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
117 sb->s_flags |= MS_RDONLY; 113 sb->s_flags |= MS_RDONLY;
118 printk(KERN_INFO "btrfs is forced readonly\n"); 114 btrfs_info(fs_info, "forced readonly");
119 /* 115 /*
120 * Note that a running device replace operation is not 116 * Note that a running device replace operation is not
121 * canceled here although there is no way to update 117 * canceled here although there is no way to update
@@ -126,7 +122,6 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
126 * mounted writeable again, the device replace 122 * mounted writeable again, the device replace
127 * operation continues. 123 * operation continues.
128 */ 124 */
129// WARN_ON(1);
130 } 125 }
131} 126}
132 127
@@ -139,7 +134,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
139 unsigned int line, int errno, const char *fmt, ...) 134 unsigned int line, int errno, const char *fmt, ...)
140{ 135{
141 struct super_block *sb = fs_info->sb; 136 struct super_block *sb = fs_info->sb;
142 char nbuf[16];
143 const char *errstr; 137 const char *errstr;
144 138
145 /* 139 /*
@@ -149,7 +143,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
149 if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) 143 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
150 return; 144 return;
151 145
152 errstr = btrfs_decode_error(errno, nbuf); 146 errstr = btrfs_decode_error(errno);
153 if (fmt) { 147 if (fmt) {
154 struct va_format vaf; 148 struct va_format vaf;
155 va_list args; 149 va_list args;
@@ -158,19 +152,18 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
158 vaf.fmt = fmt; 152 vaf.fmt = fmt;
159 vaf.va = &args; 153 vaf.va = &args;
160 154
161 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n", 155 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s (%pV)\n",
162 sb->s_id, function, line, errstr, &vaf); 156 sb->s_id, function, line, errno, errstr, &vaf);
163 va_end(args); 157 va_end(args);
164 } else { 158 } else {
165 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", 159 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s\n",
166 sb->s_id, function, line, errstr); 160 sb->s_id, function, line, errno, errstr);
167 } 161 }
168 162
169 /* Don't go through full error handling during mount */ 163 /* Don't go through full error handling during mount */
170 if (sb->s_flags & MS_BORN) { 164 save_error_info(fs_info);
171 save_error_info(fs_info); 165 if (sb->s_flags & MS_BORN)
172 btrfs_handle_error(fs_info); 166 btrfs_handle_error(fs_info);
173 }
174} 167}
175 168
176static const char * const logtypes[] = { 169static const char * const logtypes[] = {
@@ -184,7 +177,7 @@ static const char * const logtypes[] = {
184 "debug", 177 "debug",
185}; 178};
186 179
187void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...) 180void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
188{ 181{
189 struct super_block *sb = fs_info->sb; 182 struct super_block *sb = fs_info->sb;
190 char lvl[4]; 183 char lvl[4];
@@ -208,7 +201,7 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
208 vaf.fmt = fmt; 201 vaf.fmt = fmt;
209 vaf.va = &args; 202 vaf.va = &args;
210 203
211 printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf); 204 printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
212 205
213 va_end(args); 206 va_end(args);
214} 207}
@@ -252,18 +245,24 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
252 struct btrfs_root *root, const char *function, 245 struct btrfs_root *root, const char *function,
253 unsigned int line, int errno) 246 unsigned int line, int errno)
254{ 247{
255 WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted\n"); 248 /*
249 * Report first abort since mount
250 */
251 if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
252 &root->fs_info->fs_state)) {
253 WARN(1, KERN_DEBUG "btrfs: Transaction aborted (error %d)\n",
254 errno);
255 }
256 trans->aborted = errno; 256 trans->aborted = errno;
257 /* Nothing used. The other threads that have joined this 257 /* Nothing used. The other threads that have joined this
258 * transaction may be able to continue. */ 258 * transaction may be able to continue. */
259 if (!trans->blocks_used) { 259 if (!trans->blocks_used) {
260 char nbuf[16];
261 const char *errstr; 260 const char *errstr;
262 261
263 errstr = btrfs_decode_error(errno, nbuf); 262 errstr = btrfs_decode_error(errno);
264 btrfs_printk(root->fs_info, 263 btrfs_warn(root->fs_info,
265 "%s:%d: Aborting unused transaction(%s).\n", 264 "%s:%d: Aborting unused transaction(%s).",
266 function, line, errstr); 265 function, line, errstr);
267 return; 266 return;
268 } 267 }
269 ACCESS_ONCE(trans->transaction->aborted) = errno; 268 ACCESS_ONCE(trans->transaction->aborted) = errno;
@@ -276,7 +275,6 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
276void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, 275void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
277 unsigned int line, int errno, const char *fmt, ...) 276 unsigned int line, int errno, const char *fmt, ...)
278{ 277{
279 char nbuf[16];
280 char *s_id = "<unknown>"; 278 char *s_id = "<unknown>";
281 const char *errstr; 279 const char *errstr;
282 struct va_format vaf = { .fmt = fmt }; 280 struct va_format vaf = { .fmt = fmt };
@@ -288,13 +286,13 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
288 va_start(args, fmt); 286 va_start(args, fmt);
289 vaf.va = &args; 287 vaf.va = &args;
290 288
291 errstr = btrfs_decode_error(errno, nbuf); 289 errstr = btrfs_decode_error(errno);
292 if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)) 290 if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
293 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n", 291 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
294 s_id, function, line, &vaf, errstr); 292 s_id, function, line, &vaf, errno, errstr);
295 293
296 printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n", 294 printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
297 s_id, function, line, &vaf, errstr); 295 s_id, function, line, &vaf, errno, errstr);
298 va_end(args); 296 va_end(args);
299 /* Caller calls BUG() */ 297 /* Caller calls BUG() */
300} 298}
@@ -650,7 +648,7 @@ out:
650 */ 648 */
651static int btrfs_parse_early_options(const char *options, fmode_t flags, 649static int btrfs_parse_early_options(const char *options, fmode_t flags,
652 void *holder, char **subvol_name, u64 *subvol_objectid, 650 void *holder, char **subvol_name, u64 *subvol_objectid,
653 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices) 651 struct btrfs_fs_devices **fs_devices)
654{ 652{
655 substring_t args[MAX_OPT_ARGS]; 653 substring_t args[MAX_OPT_ARGS];
656 char *device_name, *opts, *orig, *p; 654 char *device_name, *opts, *orig, *p;
@@ -693,16 +691,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
693 } 691 }
694 break; 692 break;
695 case Opt_subvolrootid: 693 case Opt_subvolrootid:
696 intarg = 0; 694 printk(KERN_WARNING
697 error = match_int(&args[0], &intarg); 695 "btrfs: 'subvolrootid' mount option is deprecated and has no effect\n");
698 if (!error) {
699 /* we want the original fs_tree */
700 if (!intarg)
701 *subvol_rootid =
702 BTRFS_FS_TREE_OBJECTID;
703 else
704 *subvol_rootid = intarg;
705 }
706 break; 696 break;
707 case Opt_device: 697 case Opt_device:
708 device_name = match_strdup(&args[0]); 698 device_name = match_strdup(&args[0]);
@@ -876,7 +866,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
876 return 0; 866 return 0;
877 } 867 }
878 868
879 btrfs_wait_ordered_extents(root, 0); 869 btrfs_wait_ordered_extents(root, 1);
880 870
881 trans = btrfs_attach_transaction_barrier(root); 871 trans = btrfs_attach_transaction_barrier(root);
882 if (IS_ERR(trans)) { 872 if (IS_ERR(trans)) {
@@ -1080,7 +1070,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1080 fmode_t mode = FMODE_READ; 1070 fmode_t mode = FMODE_READ;
1081 char *subvol_name = NULL; 1071 char *subvol_name = NULL;
1082 u64 subvol_objectid = 0; 1072 u64 subvol_objectid = 0;
1083 u64 subvol_rootid = 0;
1084 int error = 0; 1073 int error = 0;
1085 1074
1086 if (!(flags & MS_RDONLY)) 1075 if (!(flags & MS_RDONLY))
@@ -1088,7 +1077,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1088 1077
1089 error = btrfs_parse_early_options(data, mode, fs_type, 1078 error = btrfs_parse_early_options(data, mode, fs_type,
1090 &subvol_name, &subvol_objectid, 1079 &subvol_name, &subvol_objectid,
1091 &subvol_rootid, &fs_devices); 1080 &fs_devices);
1092 if (error) { 1081 if (error) {
1093 kfree(subvol_name); 1082 kfree(subvol_name);
1094 return ERR_PTR(error); 1083 return ERR_PTR(error);
@@ -1202,11 +1191,14 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1202 new_pool_size); 1191 new_pool_size);
1203} 1192}
1204 1193
1205static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info, 1194static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
1206 unsigned long old_opts, int flags)
1207{ 1195{
1208 set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); 1196 set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1197}
1209 1198
1199static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
1200 unsigned long old_opts, int flags)
1201{
1210 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && 1202 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1211 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || 1203 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1212 (flags & MS_RDONLY))) { 1204 (flags & MS_RDONLY))) {
@@ -1247,7 +1239,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1247 unsigned int old_metadata_ratio = fs_info->metadata_ratio; 1239 unsigned int old_metadata_ratio = fs_info->metadata_ratio;
1248 int ret; 1240 int ret;
1249 1241
1250 btrfs_remount_prepare(fs_info, old_opts, *flags); 1242 btrfs_remount_prepare(fs_info);
1251 1243
1252 ret = btrfs_parse_options(root, data); 1244 ret = btrfs_parse_options(root, data);
1253 if (ret) { 1245 if (ret) {
@@ -1255,6 +1247,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1255 goto restore; 1247 goto restore;
1256 } 1248 }
1257 1249
1250 btrfs_remount_begin(fs_info, old_opts, *flags);
1258 btrfs_resize_thread_pool(fs_info, 1251 btrfs_resize_thread_pool(fs_info,
1259 fs_info->thread_pool_size, old_thread_pool_size); 1252 fs_info->thread_pool_size, old_thread_pool_size);
1260 1253
@@ -1739,6 +1732,10 @@ static int __init init_btrfs_fs(void)
1739 1732
1740 btrfs_init_lockdep(); 1733 btrfs_init_lockdep();
1741 1734
1735#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
1736 btrfs_test_free_space_cache();
1737#endif
1738
1742 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); 1739 printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
1743 return 0; 1740 return 0;
1744 1741
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 50767bbaad6c..0544587d74f4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -34,7 +34,7 @@
34 34
35#define BTRFS_ROOT_TRANS_TAG 0 35#define BTRFS_ROOT_TRANS_TAG 0
36 36
37void put_transaction(struct btrfs_transaction *transaction) 37static void put_transaction(struct btrfs_transaction *transaction)
38{ 38{
39 WARN_ON(atomic_read(&transaction->use_count) == 0); 39 WARN_ON(atomic_read(&transaction->use_count) == 0);
40 if (atomic_dec_and_test(&transaction->use_count)) { 40 if (atomic_dec_and_test(&transaction->use_count)) {
@@ -162,7 +162,7 @@ loop:
162 if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) 162 if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
163 WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " 163 WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
164 "creating a fresh transaction\n"); 164 "creating a fresh transaction\n");
165 atomic_set(&fs_info->tree_mod_seq, 0); 165 atomic64_set(&fs_info->tree_mod_seq, 0);
166 166
167 spin_lock_init(&cur_trans->commit_lock); 167 spin_lock_init(&cur_trans->commit_lock);
168 spin_lock_init(&cur_trans->delayed_refs.lock); 168 spin_lock_init(&cur_trans->delayed_refs.lock);
@@ -707,23 +707,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
707int btrfs_end_transaction(struct btrfs_trans_handle *trans, 707int btrfs_end_transaction(struct btrfs_trans_handle *trans,
708 struct btrfs_root *root) 708 struct btrfs_root *root)
709{ 709{
710 int ret; 710 return __btrfs_end_transaction(trans, root, 0);
711
712 ret = __btrfs_end_transaction(trans, root, 0);
713 if (ret)
714 return ret;
715 return 0;
716} 711}
717 712
718int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 713int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
719 struct btrfs_root *root) 714 struct btrfs_root *root)
720{ 715{
721 int ret; 716 return __btrfs_end_transaction(trans, root, 1);
722
723 ret = __btrfs_end_transaction(trans, root, 1);
724 if (ret)
725 return ret;
726 return 0;
727} 717}
728 718
729int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, 719int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
@@ -948,7 +938,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
948int btrfs_add_dead_root(struct btrfs_root *root) 938int btrfs_add_dead_root(struct btrfs_root *root)
949{ 939{
950 spin_lock(&root->fs_info->trans_lock); 940 spin_lock(&root->fs_info->trans_lock);
951 list_add(&root->root_list, &root->fs_info->dead_roots); 941 list_add_tail(&root->root_list, &root->fs_info->dead_roots);
952 spin_unlock(&root->fs_info->trans_lock); 942 spin_unlock(&root->fs_info->trans_lock);
953 return 0; 943 return 0;
954} 944}
@@ -1179,13 +1169,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
1179 memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); 1169 memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
1180 memcpy(new_root_item->parent_uuid, root->root_item.uuid, 1170 memcpy(new_root_item->parent_uuid, root->root_item.uuid,
1181 BTRFS_UUID_SIZE); 1171 BTRFS_UUID_SIZE);
1172 if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
1173 memset(new_root_item->received_uuid, 0,
1174 sizeof(new_root_item->received_uuid));
1175 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1176 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
1177 btrfs_set_root_stransid(new_root_item, 0);
1178 btrfs_set_root_rtransid(new_root_item, 0);
1179 }
1182 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); 1180 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
1183 new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec); 1181 new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
1184 btrfs_set_root_otransid(new_root_item, trans->transid); 1182 btrfs_set_root_otransid(new_root_item, trans->transid);
1185 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
1186 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
1187 btrfs_set_root_stransid(new_root_item, 0);
1188 btrfs_set_root_rtransid(new_root_item, 0);
1189 1183
1190 old = btrfs_lock_root_node(root); 1184 old = btrfs_lock_root_node(root);
1191 ret = btrfs_cow_block(trans, root, old, NULL, 0, &old); 1185 ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
@@ -1487,6 +1481,10 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
1487 current->journal_info = NULL; 1481 current->journal_info = NULL;
1488 1482
1489 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1483 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1484
1485 spin_lock(&root->fs_info->trans_lock);
1486 root->fs_info->trans_no_join = 0;
1487 spin_unlock(&root->fs_info->trans_lock);
1490} 1488}
1491 1489
1492static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, 1490static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
@@ -1808,7 +1806,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1808 ret = btrfs_write_and_wait_transaction(trans, root); 1806 ret = btrfs_write_and_wait_transaction(trans, root);
1809 if (ret) { 1807 if (ret) {
1810 btrfs_error(root->fs_info, ret, 1808 btrfs_error(root->fs_info, ret,
1811 "Error while writing out transaction."); 1809 "Error while writing out transaction");
1812 mutex_unlock(&root->fs_info->tree_log_mutex); 1810 mutex_unlock(&root->fs_info->tree_log_mutex);
1813 goto cleanup_transaction; 1811 goto cleanup_transaction;
1814 } 1812 }
@@ -1864,8 +1862,7 @@ cleanup_transaction:
1864 btrfs_qgroup_free(root, trans->qgroup_reserved); 1862 btrfs_qgroup_free(root, trans->qgroup_reserved);
1865 trans->qgroup_reserved = 0; 1863 trans->qgroup_reserved = 0;
1866 } 1864 }
1867 btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); 1865 btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
1868// WARN_ON(1);
1869 if (current->journal_info == trans) 1866 if (current->journal_info == trans)
1870 current->journal_info = NULL; 1867 current->journal_info = NULL;
1871 cleanup_transaction(trans, root, ret); 1868 cleanup_transaction(trans, root, ret);
@@ -1874,31 +1871,49 @@ cleanup_transaction:
1874} 1871}
1875 1872
1876/* 1873/*
1877 * interface function to delete all the snapshots we have scheduled for deletion 1874 * return < 0 if error
1875 * 0 if there are no more dead_roots at the time of call
1876 * 1 there are more to be processed, call me again
1877 *
1878 * The return value indicates there are certainly more snapshots to delete, but
1879 * if there comes a new one during processing, it may return 0. We don't mind,
1880 * because btrfs_commit_super will poke cleaner thread and it will process it a
1881 * few seconds later.
1878 */ 1882 */
1879int btrfs_clean_old_snapshots(struct btrfs_root *root) 1883int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1880{ 1884{
1881 LIST_HEAD(list); 1885 int ret;
1882 struct btrfs_fs_info *fs_info = root->fs_info; 1886 struct btrfs_fs_info *fs_info = root->fs_info;
1883 1887
1888 if (fs_info->sb->s_flags & MS_RDONLY) {
1889 pr_debug("btrfs: cleaner called for RO fs!\n");
1890 return 0;
1891 }
1892
1884 spin_lock(&fs_info->trans_lock); 1893 spin_lock(&fs_info->trans_lock);
1885 list_splice_init(&fs_info->dead_roots, &list); 1894 if (list_empty(&fs_info->dead_roots)) {
1895 spin_unlock(&fs_info->trans_lock);
1896 return 0;
1897 }
1898 root = list_first_entry(&fs_info->dead_roots,
1899 struct btrfs_root, root_list);
1900 list_del(&root->root_list);
1886 spin_unlock(&fs_info->trans_lock); 1901 spin_unlock(&fs_info->trans_lock);
1887 1902
1888 while (!list_empty(&list)) { 1903 pr_debug("btrfs: cleaner removing %llu\n",
1889 int ret; 1904 (unsigned long long)root->objectid);
1890
1891 root = list_entry(list.next, struct btrfs_root, root_list);
1892 list_del(&root->root_list);
1893 1905
1894 btrfs_kill_all_delayed_nodes(root); 1906 btrfs_kill_all_delayed_nodes(root);
1895 1907
1896 if (btrfs_header_backref_rev(root->node) < 1908 if (btrfs_header_backref_rev(root->node) <
1897 BTRFS_MIXED_BACKREF_REV) 1909 BTRFS_MIXED_BACKREF_REV)
1898 ret = btrfs_drop_snapshot(root, NULL, 0, 0); 1910 ret = btrfs_drop_snapshot(root, NULL, 0, 0);
1899 else 1911 else
1900 ret =btrfs_drop_snapshot(root, NULL, 1, 0); 1912 ret = btrfs_drop_snapshot(root, NULL, 1, 0);
1901 BUG_ON(ret < 0); 1913 /*
1902 } 1914 * If we encounter a transaction abort during snapshot cleaning, we
1903 return 0; 1915 * don't want to crash here
1916 */
1917 BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS);
1918 return 1;
1904} 1919}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 3c8e0d25c8e4..24c97335a59f 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -123,7 +123,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
123 123
124int btrfs_add_dead_root(struct btrfs_root *root); 124int btrfs_add_dead_root(struct btrfs_root *root);
125int btrfs_defrag_root(struct btrfs_root *root); 125int btrfs_defrag_root(struct btrfs_root *root);
126int btrfs_clean_old_snapshots(struct btrfs_root *root); 126int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
127int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 127int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
128 struct btrfs_root *root); 128 struct btrfs_root *root);
129int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, 129int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
@@ -146,5 +146,4 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
146 struct extent_io_tree *dirty_pages, int mark); 146 struct extent_io_tree *dirty_pages, int mark);
147int btrfs_transaction_blocked(struct btrfs_fs_info *info); 147int btrfs_transaction_blocked(struct btrfs_fs_info *info);
148int btrfs_transaction_in_commit(struct btrfs_fs_info *info); 148int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
149void put_transaction(struct btrfs_transaction *transaction);
150#endif 149#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 451fad96ecd1..c276ac9a0ec3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -277,17 +277,19 @@ static int process_one_buffer(struct btrfs_root *log,
277 struct extent_buffer *eb, 277 struct extent_buffer *eb,
278 struct walk_control *wc, u64 gen) 278 struct walk_control *wc, u64 gen)
279{ 279{
280 int ret = 0;
281
280 if (wc->pin) 282 if (wc->pin)
281 btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, 283 ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
282 eb->start, eb->len); 284 eb->start, eb->len);
283 285
284 if (btrfs_buffer_uptodate(eb, gen, 0)) { 286 if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
285 if (wc->write) 287 if (wc->write)
286 btrfs_write_tree_block(eb); 288 btrfs_write_tree_block(eb);
287 if (wc->wait) 289 if (wc->wait)
288 btrfs_wait_tree_block_writeback(eb); 290 btrfs_wait_tree_block_writeback(eb);
289 } 291 }
290 return 0; 292 return ret;
291} 293}
292 294
293/* 295/*
@@ -317,6 +319,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
317 unsigned long src_ptr; 319 unsigned long src_ptr;
318 unsigned long dst_ptr; 320 unsigned long dst_ptr;
319 int overwrite_root = 0; 321 int overwrite_root = 0;
322 bool inode_item = key->type == BTRFS_INODE_ITEM_KEY;
320 323
321 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 324 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
322 overwrite_root = 1; 325 overwrite_root = 1;
@@ -326,6 +329,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
326 329
327 /* look for the key in the destination tree */ 330 /* look for the key in the destination tree */
328 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 331 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
332 if (ret < 0)
333 return ret;
334
329 if (ret == 0) { 335 if (ret == 0) {
330 char *src_copy; 336 char *src_copy;
331 char *dst_copy; 337 char *dst_copy;
@@ -367,6 +373,30 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
367 return 0; 373 return 0;
368 } 374 }
369 375
376 /*
377 * We need to load the old nbytes into the inode so when we
378 * replay the extents we've logged we get the right nbytes.
379 */
380 if (inode_item) {
381 struct btrfs_inode_item *item;
382 u64 nbytes;
383
384 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
385 struct btrfs_inode_item);
386 nbytes = btrfs_inode_nbytes(path->nodes[0], item);
387 item = btrfs_item_ptr(eb, slot,
388 struct btrfs_inode_item);
389 btrfs_set_inode_nbytes(eb, item, nbytes);
390 }
391 } else if (inode_item) {
392 struct btrfs_inode_item *item;
393
394 /*
395 * New inode, set nbytes to 0 so that the nbytes comes out
396 * properly when we replay the extents.
397 */
398 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
399 btrfs_set_inode_nbytes(eb, item, 0);
370 } 400 }
371insert: 401insert:
372 btrfs_release_path(path); 402 btrfs_release_path(path);
@@ -380,9 +410,9 @@ insert:
380 found_size = btrfs_item_size_nr(path->nodes[0], 410 found_size = btrfs_item_size_nr(path->nodes[0],
381 path->slots[0]); 411 path->slots[0]);
382 if (found_size > item_size) 412 if (found_size > item_size)
383 btrfs_truncate_item(trans, root, path, item_size, 1); 413 btrfs_truncate_item(root, path, item_size, 1);
384 else if (found_size < item_size) 414 else if (found_size < item_size)
385 btrfs_extend_item(trans, root, path, 415 btrfs_extend_item(root, path,
386 item_size - found_size); 416 item_size - found_size);
387 } else if (ret) { 417 } else if (ret) {
388 return ret; 418 return ret;
@@ -486,7 +516,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
486 int found_type; 516 int found_type;
487 u64 extent_end; 517 u64 extent_end;
488 u64 start = key->offset; 518 u64 start = key->offset;
489 u64 saved_nbytes; 519 u64 nbytes = 0;
490 struct btrfs_file_extent_item *item; 520 struct btrfs_file_extent_item *item;
491 struct inode *inode = NULL; 521 struct inode *inode = NULL;
492 unsigned long size; 522 unsigned long size;
@@ -496,10 +526,19 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
496 found_type = btrfs_file_extent_type(eb, item); 526 found_type = btrfs_file_extent_type(eb, item);
497 527
498 if (found_type == BTRFS_FILE_EXTENT_REG || 528 if (found_type == BTRFS_FILE_EXTENT_REG ||
499 found_type == BTRFS_FILE_EXTENT_PREALLOC) 529 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
500 extent_end = start + btrfs_file_extent_num_bytes(eb, item); 530 nbytes = btrfs_file_extent_num_bytes(eb, item);
501 else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 531 extent_end = start + nbytes;
532
533 /*
534 * We don't add to the inodes nbytes if we are prealloc or a
535 * hole.
536 */
537 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
538 nbytes = 0;
539 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
502 size = btrfs_file_extent_inline_len(eb, item); 540 size = btrfs_file_extent_inline_len(eb, item);
541 nbytes = btrfs_file_extent_ram_bytes(eb, item);
503 extent_end = ALIGN(start + size, root->sectorsize); 542 extent_end = ALIGN(start + size, root->sectorsize);
504 } else { 543 } else {
505 ret = 0; 544 ret = 0;
@@ -548,10 +587,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
548 } 587 }
549 btrfs_release_path(path); 588 btrfs_release_path(path);
550 589
551 saved_nbytes = inode_get_bytes(inode);
552 /* drop any overlapping extents */ 590 /* drop any overlapping extents */
553 ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1); 591 ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
554 BUG_ON(ret); 592 if (ret)
593 goto out;
555 594
556 if (found_type == BTRFS_FILE_EXTENT_REG || 595 if (found_type == BTRFS_FILE_EXTENT_REG ||
557 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 596 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -561,7 +600,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
561 600
562 ret = btrfs_insert_empty_item(trans, root, path, key, 601 ret = btrfs_insert_empty_item(trans, root, path, key,
563 sizeof(*item)); 602 sizeof(*item));
564 BUG_ON(ret); 603 if (ret)
604 goto out;
565 dest_offset = btrfs_item_ptr_offset(path->nodes[0], 605 dest_offset = btrfs_item_ptr_offset(path->nodes[0],
566 path->slots[0]); 606 path->slots[0]);
567 copy_extent_buffer(path->nodes[0], eb, dest_offset, 607 copy_extent_buffer(path->nodes[0], eb, dest_offset,
@@ -587,7 +627,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
587 ins.objectid, ins.offset, 627 ins.objectid, ins.offset,
588 0, root->root_key.objectid, 628 0, root->root_key.objectid,
589 key->objectid, offset, 0); 629 key->objectid, offset, 0);
590 BUG_ON(ret); 630 if (ret)
631 goto out;
591 } else { 632 } else {
592 /* 633 /*
593 * insert the extent pointer in the extent 634 * insert the extent pointer in the extent
@@ -596,7 +637,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
596 ret = btrfs_alloc_logged_file_extent(trans, 637 ret = btrfs_alloc_logged_file_extent(trans,
597 root, root->root_key.objectid, 638 root, root->root_key.objectid,
598 key->objectid, offset, &ins); 639 key->objectid, offset, &ins);
599 BUG_ON(ret); 640 if (ret)
641 goto out;
600 } 642 }
601 btrfs_release_path(path); 643 btrfs_release_path(path);
602 644
@@ -613,29 +655,33 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
613 ret = btrfs_lookup_csums_range(root->log_root, 655 ret = btrfs_lookup_csums_range(root->log_root,
614 csum_start, csum_end - 1, 656 csum_start, csum_end - 1,
615 &ordered_sums, 0); 657 &ordered_sums, 0);
616 BUG_ON(ret); 658 if (ret)
659 goto out;
617 while (!list_empty(&ordered_sums)) { 660 while (!list_empty(&ordered_sums)) {
618 struct btrfs_ordered_sum *sums; 661 struct btrfs_ordered_sum *sums;
619 sums = list_entry(ordered_sums.next, 662 sums = list_entry(ordered_sums.next,
620 struct btrfs_ordered_sum, 663 struct btrfs_ordered_sum,
621 list); 664 list);
622 ret = btrfs_csum_file_blocks(trans, 665 if (!ret)
666 ret = btrfs_csum_file_blocks(trans,
623 root->fs_info->csum_root, 667 root->fs_info->csum_root,
624 sums); 668 sums);
625 BUG_ON(ret);
626 list_del(&sums->list); 669 list_del(&sums->list);
627 kfree(sums); 670 kfree(sums);
628 } 671 }
672 if (ret)
673 goto out;
629 } else { 674 } else {
630 btrfs_release_path(path); 675 btrfs_release_path(path);
631 } 676 }
632 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 677 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
633 /* inline extents are easy, we just overwrite them */ 678 /* inline extents are easy, we just overwrite them */
634 ret = overwrite_item(trans, root, path, eb, slot, key); 679 ret = overwrite_item(trans, root, path, eb, slot, key);
635 BUG_ON(ret); 680 if (ret)
681 goto out;
636 } 682 }
637 683
638 inode_set_bytes(inode, saved_nbytes); 684 inode_add_bytes(inode, nbytes);
639 ret = btrfs_update_inode(trans, root, inode); 685 ret = btrfs_update_inode(trans, root, inode);
640out: 686out:
641 if (inode) 687 if (inode)
@@ -677,20 +723,21 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
677 723
678 inode = read_one_inode(root, location.objectid); 724 inode = read_one_inode(root, location.objectid);
679 if (!inode) { 725 if (!inode) {
680 kfree(name); 726 ret = -EIO;
681 return -EIO; 727 goto out;
682 } 728 }
683 729
684 ret = link_to_fixup_dir(trans, root, path, location.objectid); 730 ret = link_to_fixup_dir(trans, root, path, location.objectid);
685 BUG_ON(ret); 731 if (ret)
732 goto out;
686 733
687 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); 734 ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
688 BUG_ON(ret); 735 if (ret)
736 goto out;
737 btrfs_run_delayed_items(trans, root);
738out:
689 kfree(name); 739 kfree(name);
690
691 iput(inode); 740 iput(inode);
692
693 btrfs_run_delayed_items(trans, root);
694 return ret; 741 return ret;
695} 742}
696 743
@@ -843,7 +890,8 @@ again:
843 victim_name_len = btrfs_inode_ref_name_len(leaf, 890 victim_name_len = btrfs_inode_ref_name_len(leaf,
844 victim_ref); 891 victim_ref);
845 victim_name = kmalloc(victim_name_len, GFP_NOFS); 892 victim_name = kmalloc(victim_name_len, GFP_NOFS);
846 BUG_ON(!victim_name); 893 if (!victim_name)
894 return -ENOMEM;
847 895
848 read_extent_buffer(leaf, victim_name, 896 read_extent_buffer(leaf, victim_name,
849 (unsigned long)(victim_ref + 1), 897 (unsigned long)(victim_ref + 1),
@@ -859,9 +907,10 @@ again:
859 ret = btrfs_unlink_inode(trans, root, dir, 907 ret = btrfs_unlink_inode(trans, root, dir,
860 inode, victim_name, 908 inode, victim_name,
861 victim_name_len); 909 victim_name_len);
862 BUG_ON(ret);
863 btrfs_run_delayed_items(trans, root);
864 kfree(victim_name); 910 kfree(victim_name);
911 if (ret)
912 return ret;
913 btrfs_run_delayed_items(trans, root);
865 *search_done = 1; 914 *search_done = 1;
866 goto again; 915 goto again;
867 } 916 }
@@ -869,7 +918,6 @@ again:
869 918
870 ptr = (unsigned long)(victim_ref + 1) + victim_name_len; 919 ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
871 } 920 }
872 BUG_ON(ret);
873 921
874 /* 922 /*
875 * NOTE: we have searched root tree and checked the 923 * NOTE: we have searched root tree and checked the
@@ -903,6 +951,8 @@ again:
903 goto next; 951 goto next;
904 952
905 victim_name = kmalloc(victim_name_len, GFP_NOFS); 953 victim_name = kmalloc(victim_name_len, GFP_NOFS);
954 if (!victim_name)
955 return -ENOMEM;
906 read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name, 956 read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
907 victim_name_len); 957 victim_name_len);
908 958
@@ -929,14 +979,16 @@ again:
929 victim_name_len); 979 victim_name_len);
930 btrfs_run_delayed_items(trans, root); 980 btrfs_run_delayed_items(trans, root);
931 } 981 }
932 BUG_ON(ret);
933 iput(victim_parent); 982 iput(victim_parent);
934 kfree(victim_name); 983 kfree(victim_name);
984 if (ret)
985 return ret;
935 *search_done = 1; 986 *search_done = 1;
936 goto again; 987 goto again;
937 } 988 }
938 kfree(victim_name); 989 kfree(victim_name);
939 BUG_ON(ret); 990 if (ret)
991 return ret;
940next: 992next:
941 cur_offset += victim_name_len + sizeof(*extref); 993 cur_offset += victim_name_len + sizeof(*extref);
942 } 994 }
@@ -949,7 +1001,8 @@ next:
949 ref_index, name, namelen, 0); 1001 ref_index, name, namelen, 0);
950 if (di && !IS_ERR(di)) { 1002 if (di && !IS_ERR(di)) {
951 ret = drop_one_dir_item(trans, root, path, dir, di); 1003 ret = drop_one_dir_item(trans, root, path, dir, di);
952 BUG_ON(ret); 1004 if (ret)
1005 return ret;
953 } 1006 }
954 btrfs_release_path(path); 1007 btrfs_release_path(path);
955 1008
@@ -958,7 +1011,8 @@ next:
958 name, namelen, 0); 1011 name, namelen, 0);
959 if (di && !IS_ERR(di)) { 1012 if (di && !IS_ERR(di)) {
960 ret = drop_one_dir_item(trans, root, path, dir, di); 1013 ret = drop_one_dir_item(trans, root, path, dir, di);
961 BUG_ON(ret); 1014 if (ret)
1015 return ret;
962 } 1016 }
963 btrfs_release_path(path); 1017 btrfs_release_path(path);
964 1018
@@ -1103,15 +1157,19 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1103 parent_objectid, 1157 parent_objectid,
1104 ref_index, name, namelen, 1158 ref_index, name, namelen,
1105 &search_done); 1159 &search_done);
1106 if (ret == 1) 1160 if (ret == 1) {
1161 ret = 0;
1162 goto out;
1163 }
1164 if (ret)
1107 goto out; 1165 goto out;
1108 BUG_ON(ret);
1109 } 1166 }
1110 1167
1111 /* insert our name */ 1168 /* insert our name */
1112 ret = btrfs_add_link(trans, dir, inode, name, namelen, 1169 ret = btrfs_add_link(trans, dir, inode, name, namelen,
1113 0, ref_index); 1170 0, ref_index);
1114 BUG_ON(ret); 1171 if (ret)
1172 goto out;
1115 1173
1116 btrfs_update_inode(trans, root, inode); 1174 btrfs_update_inode(trans, root, inode);
1117 } 1175 }
@@ -1126,13 +1184,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
1126 1184
1127 /* finally write the back reference in the inode */ 1185 /* finally write the back reference in the inode */
1128 ret = overwrite_item(trans, root, path, eb, slot, key); 1186 ret = overwrite_item(trans, root, path, eb, slot, key);
1129 BUG_ON(ret);
1130
1131out: 1187out:
1132 btrfs_release_path(path); 1188 btrfs_release_path(path);
1133 iput(dir); 1189 iput(dir);
1134 iput(inode); 1190 iput(inode);
1135 return 0; 1191 return ret;
1136} 1192}
1137 1193
1138static int insert_orphan_item(struct btrfs_trans_handle *trans, 1194static int insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -1290,10 +1346,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1290 if (S_ISDIR(inode->i_mode)) { 1346 if (S_ISDIR(inode->i_mode)) {
1291 ret = replay_dir_deletes(trans, root, NULL, path, 1347 ret = replay_dir_deletes(trans, root, NULL, path,
1292 ino, 1); 1348 ino, 1);
1293 BUG_ON(ret); 1349 if (ret)
1350 goto out;
1294 } 1351 }
1295 ret = insert_orphan_item(trans, root, ino); 1352 ret = insert_orphan_item(trans, root, ino);
1296 BUG_ON(ret);
1297 } 1353 }
1298 1354
1299out: 1355out:
@@ -1338,9 +1394,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
1338 return -EIO; 1394 return -EIO;
1339 1395
1340 ret = fixup_inode_link_count(trans, root, inode); 1396 ret = fixup_inode_link_count(trans, root, inode);
1341 BUG_ON(ret);
1342
1343 iput(inode); 1397 iput(inode);
1398 if (ret)
1399 goto out;
1344 1400
1345 /* 1401 /*
1346 * fixup on a directory may create new entries, 1402 * fixup on a directory may create new entries,
@@ -1390,7 +1446,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
1390 } else if (ret == -EEXIST) { 1446 } else if (ret == -EEXIST) {
1391 ret = 0; 1447 ret = 0;
1392 } else { 1448 } else {
1393 BUG(); 1449 BUG(); /* Logic Error */
1394 } 1450 }
1395 iput(inode); 1451 iput(inode);
1396 1452
@@ -1459,7 +1515,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1459 struct inode *dir; 1515 struct inode *dir;
1460 u8 log_type; 1516 u8 log_type;
1461 int exists; 1517 int exists;
1462 int ret; 1518 int ret = 0;
1463 1519
1464 dir = read_one_inode(root, key->objectid); 1520 dir = read_one_inode(root, key->objectid);
1465 if (!dir) 1521 if (!dir)
@@ -1491,7 +1547,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1491 key->offset, name, 1547 key->offset, name,
1492 name_len, 1); 1548 name_len, 1);
1493 } else { 1549 } else {
1494 BUG(); 1550 /* Corruption */
1551 ret = -EINVAL;
1552 goto out;
1495 } 1553 }
1496 if (IS_ERR_OR_NULL(dst_di)) { 1554 if (IS_ERR_OR_NULL(dst_di)) {
1497 /* we need a sequence number to insert, so we only 1555 /* we need a sequence number to insert, so we only
@@ -1519,7 +1577,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1519 goto out; 1577 goto out;
1520 1578
1521 ret = drop_one_dir_item(trans, root, path, dir, dst_di); 1579 ret = drop_one_dir_item(trans, root, path, dir, dst_di);
1522 BUG_ON(ret); 1580 if (ret)
1581 goto out;
1523 1582
1524 if (key->type == BTRFS_DIR_INDEX_KEY) 1583 if (key->type == BTRFS_DIR_INDEX_KEY)
1525 goto insert; 1584 goto insert;
@@ -1527,14 +1586,15 @@ out:
1527 btrfs_release_path(path); 1586 btrfs_release_path(path);
1528 kfree(name); 1587 kfree(name);
1529 iput(dir); 1588 iput(dir);
1530 return 0; 1589 return ret;
1531 1590
1532insert: 1591insert:
1533 btrfs_release_path(path); 1592 btrfs_release_path(path);
1534 ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1593 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1535 name, name_len, log_type, &log_key); 1594 name, name_len, log_type, &log_key);
1536 1595 if (ret && ret != -ENOENT)
1537 BUG_ON(ret && ret != -ENOENT); 1596 goto out;
1597 ret = 0;
1538 goto out; 1598 goto out;
1539} 1599}
1540 1600
@@ -1565,7 +1625,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
1565 return -EIO; 1625 return -EIO;
1566 name_len = btrfs_dir_name_len(eb, di); 1626 name_len = btrfs_dir_name_len(eb, di);
1567 ret = replay_one_name(trans, root, path, eb, di, key); 1627 ret = replay_one_name(trans, root, path, eb, di, key);
1568 BUG_ON(ret); 1628 if (ret)
1629 return ret;
1569 ptr = (unsigned long)(di + 1); 1630 ptr = (unsigned long)(di + 1);
1570 ptr += name_len; 1631 ptr += name_len;
1571 } 1632 }
@@ -1726,16 +1787,21 @@ again:
1726 1787
1727 ret = link_to_fixup_dir(trans, root, 1788 ret = link_to_fixup_dir(trans, root,
1728 path, location.objectid); 1789 path, location.objectid);
1729 BUG_ON(ret); 1790 if (ret) {
1791 kfree(name);
1792 iput(inode);
1793 goto out;
1794 }
1795
1730 btrfs_inc_nlink(inode); 1796 btrfs_inc_nlink(inode);
1731 ret = btrfs_unlink_inode(trans, root, dir, inode, 1797 ret = btrfs_unlink_inode(trans, root, dir, inode,
1732 name, name_len); 1798 name, name_len);
1733 BUG_ON(ret); 1799 if (!ret)
1734 1800 btrfs_run_delayed_items(trans, root);
1735 btrfs_run_delayed_items(trans, root);
1736
1737 kfree(name); 1801 kfree(name);
1738 iput(inode); 1802 iput(inode);
1803 if (ret)
1804 goto out;
1739 1805
1740 /* there might still be more names under this key 1806 /* there might still be more names under this key
1741 * check and repeat if required 1807 * check and repeat if required
@@ -1839,7 +1905,8 @@ again:
1839 ret = check_item_in_log(trans, root, log, path, 1905 ret = check_item_in_log(trans, root, log, path,
1840 log_path, dir, 1906 log_path, dir,
1841 &found_key); 1907 &found_key);
1842 BUG_ON(ret); 1908 if (ret)
1909 goto out;
1843 if (found_key.offset == (u64)-1) 1910 if (found_key.offset == (u64)-1)
1844 break; 1911 break;
1845 dir_key.offset = found_key.offset + 1; 1912 dir_key.offset = found_key.offset + 1;
@@ -1916,11 +1983,13 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1916 if (S_ISDIR(mode)) { 1983 if (S_ISDIR(mode)) {
1917 ret = replay_dir_deletes(wc->trans, 1984 ret = replay_dir_deletes(wc->trans,
1918 root, log, path, key.objectid, 0); 1985 root, log, path, key.objectid, 0);
1919 BUG_ON(ret); 1986 if (ret)
1987 break;
1920 } 1988 }
1921 ret = overwrite_item(wc->trans, root, path, 1989 ret = overwrite_item(wc->trans, root, path,
1922 eb, i, &key); 1990 eb, i, &key);
1923 BUG_ON(ret); 1991 if (ret)
1992 break;
1924 1993
1925 /* for regular files, make sure corresponding 1994 /* for regular files, make sure corresponding
1926 * orhpan item exist. extents past the new EOF 1995 * orhpan item exist. extents past the new EOF
@@ -1929,12 +1998,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1929 if (S_ISREG(mode)) { 1998 if (S_ISREG(mode)) {
1930 ret = insert_orphan_item(wc->trans, root, 1999 ret = insert_orphan_item(wc->trans, root,
1931 key.objectid); 2000 key.objectid);
1932 BUG_ON(ret); 2001 if (ret)
2002 break;
1933 } 2003 }
1934 2004
1935 ret = link_to_fixup_dir(wc->trans, root, 2005 ret = link_to_fixup_dir(wc->trans, root,
1936 path, key.objectid); 2006 path, key.objectid);
1937 BUG_ON(ret); 2007 if (ret)
2008 break;
1938 } 2009 }
1939 if (wc->stage < LOG_WALK_REPLAY_ALL) 2010 if (wc->stage < LOG_WALK_REPLAY_ALL)
1940 continue; 2011 continue;
@@ -1943,28 +2014,35 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1943 if (key.type == BTRFS_XATTR_ITEM_KEY) { 2014 if (key.type == BTRFS_XATTR_ITEM_KEY) {
1944 ret = overwrite_item(wc->trans, root, path, 2015 ret = overwrite_item(wc->trans, root, path,
1945 eb, i, &key); 2016 eb, i, &key);
1946 BUG_ON(ret); 2017 if (ret)
2018 break;
1947 } else if (key.type == BTRFS_INODE_REF_KEY) { 2019 } else if (key.type == BTRFS_INODE_REF_KEY) {
1948 ret = add_inode_ref(wc->trans, root, log, path, 2020 ret = add_inode_ref(wc->trans, root, log, path,
1949 eb, i, &key); 2021 eb, i, &key);
1950 BUG_ON(ret && ret != -ENOENT); 2022 if (ret && ret != -ENOENT)
2023 break;
2024 ret = 0;
1951 } else if (key.type == BTRFS_INODE_EXTREF_KEY) { 2025 } else if (key.type == BTRFS_INODE_EXTREF_KEY) {
1952 ret = add_inode_ref(wc->trans, root, log, path, 2026 ret = add_inode_ref(wc->trans, root, log, path,
1953 eb, i, &key); 2027 eb, i, &key);
1954 BUG_ON(ret && ret != -ENOENT); 2028 if (ret && ret != -ENOENT)
2029 break;
2030 ret = 0;
1955 } else if (key.type == BTRFS_EXTENT_DATA_KEY) { 2031 } else if (key.type == BTRFS_EXTENT_DATA_KEY) {
1956 ret = replay_one_extent(wc->trans, root, path, 2032 ret = replay_one_extent(wc->trans, root, path,
1957 eb, i, &key); 2033 eb, i, &key);
1958 BUG_ON(ret); 2034 if (ret)
2035 break;
1959 } else if (key.type == BTRFS_DIR_ITEM_KEY || 2036 } else if (key.type == BTRFS_DIR_ITEM_KEY ||
1960 key.type == BTRFS_DIR_INDEX_KEY) { 2037 key.type == BTRFS_DIR_INDEX_KEY) {
1961 ret = replay_one_dir_item(wc->trans, root, path, 2038 ret = replay_one_dir_item(wc->trans, root, path,
1962 eb, i, &key); 2039 eb, i, &key);
1963 BUG_ON(ret); 2040 if (ret)
2041 break;
1964 } 2042 }
1965 } 2043 }
1966 btrfs_free_path(path); 2044 btrfs_free_path(path);
1967 return 0; 2045 return ret;
1968} 2046}
1969 2047
1970static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, 2048static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
@@ -2009,8 +2087,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2009 2087
2010 if (*level == 1) { 2088 if (*level == 1) {
2011 ret = wc->process_func(root, next, wc, ptr_gen); 2089 ret = wc->process_func(root, next, wc, ptr_gen);
2012 if (ret) 2090 if (ret) {
2091 free_extent_buffer(next);
2013 return ret; 2092 return ret;
2093 }
2014 2094
2015 path->slots[*level]++; 2095 path->slots[*level]++;
2016 if (wc->free) { 2096 if (wc->free) {
@@ -2030,7 +2110,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2030 BTRFS_TREE_LOG_OBJECTID); 2110 BTRFS_TREE_LOG_OBJECTID);
2031 ret = btrfs_free_and_pin_reserved_extent(root, 2111 ret = btrfs_free_and_pin_reserved_extent(root,
2032 bytenr, blocksize); 2112 bytenr, blocksize);
2033 BUG_ON(ret); /* -ENOMEM or logic errors */ 2113 if (ret) {
2114 free_extent_buffer(next);
2115 return ret;
2116 }
2034 } 2117 }
2035 free_extent_buffer(next); 2118 free_extent_buffer(next);
2036 continue; 2119 continue;
@@ -2103,7 +2186,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
2103 ret = btrfs_free_and_pin_reserved_extent(root, 2186 ret = btrfs_free_and_pin_reserved_extent(root,
2104 path->nodes[*level]->start, 2187 path->nodes[*level]->start,
2105 path->nodes[*level]->len); 2188 path->nodes[*level]->len);
2106 BUG_ON(ret); 2189 if (ret)
2190 return ret;
2107 } 2191 }
2108 free_extent_buffer(path->nodes[*level]); 2192 free_extent_buffer(path->nodes[*level]);
2109 path->nodes[*level] = NULL; 2193 path->nodes[*level] = NULL;
@@ -2125,7 +2209,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2125 int wret; 2209 int wret;
2126 int level; 2210 int level;
2127 struct btrfs_path *path; 2211 struct btrfs_path *path;
2128 int i;
2129 int orig_level; 2212 int orig_level;
2130 2213
2131 path = btrfs_alloc_path(); 2214 path = btrfs_alloc_path();
@@ -2177,17 +2260,12 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2177 BTRFS_TREE_LOG_OBJECTID); 2260 BTRFS_TREE_LOG_OBJECTID);
2178 ret = btrfs_free_and_pin_reserved_extent(log, next->start, 2261 ret = btrfs_free_and_pin_reserved_extent(log, next->start,
2179 next->len); 2262 next->len);
2180 BUG_ON(ret); /* -ENOMEM or logic errors */ 2263 if (ret)
2264 goto out;
2181 } 2265 }
2182 } 2266 }
2183 2267
2184out: 2268out:
2185 for (i = 0; i <= orig_level; i++) {
2186 if (path->nodes[i]) {
2187 free_extent_buffer(path->nodes[i]);
2188 path->nodes[i] = NULL;
2189 }
2190 }
2191 btrfs_free_path(path); 2269 btrfs_free_path(path);
2192 return ret; 2270 return ret;
2193} 2271}
@@ -2471,7 +2549,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
2471 2549
2472 if (trans) { 2550 if (trans) {
2473 ret = walk_log_tree(trans, log, &wc); 2551 ret = walk_log_tree(trans, log, &wc);
2474 BUG_ON(ret); 2552
2553 /* I don't think this can happen but just in case */
2554 if (ret)
2555 btrfs_abort_transaction(trans, log, ret);
2475 } 2556 }
2476 2557
2477 while (1) { 2558 while (1) {
@@ -2579,7 +2660,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2579 if (di) { 2660 if (di) {
2580 ret = btrfs_delete_one_dir_name(trans, log, path, di); 2661 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2581 bytes_del += name_len; 2662 bytes_del += name_len;
2582 BUG_ON(ret); 2663 if (ret) {
2664 err = ret;
2665 goto fail;
2666 }
2583 } 2667 }
2584 btrfs_release_path(path); 2668 btrfs_release_path(path);
2585 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, 2669 di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
@@ -2591,7 +2675,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
2591 if (di) { 2675 if (di) {
2592 ret = btrfs_delete_one_dir_name(trans, log, path, di); 2676 ret = btrfs_delete_one_dir_name(trans, log, path, di);
2593 bytes_del += name_len; 2677 bytes_del += name_len;
2594 BUG_ON(ret); 2678 if (ret) {
2679 err = ret;
2680 goto fail;
2681 }
2595 } 2682 }
2596 2683
2597 /* update the directory size in the log to reflect the names 2684 /* update the directory size in the log to reflect the names
@@ -2930,7 +3017,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2930 3017
2931 while (1) { 3018 while (1) {
2932 ret = btrfs_search_slot(trans, log, &key, path, -1, 1); 3019 ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
2933 BUG_ON(ret == 0); 3020 BUG_ON(ret == 0); /* Logic error */
2934 if (ret < 0) 3021 if (ret < 0)
2935 break; 3022 break;
2936 3023
@@ -3133,7 +3220,11 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3133 log->fs_info->csum_root, 3220 log->fs_info->csum_root,
3134 ds + cs, ds + cs + cl - 1, 3221 ds + cs, ds + cs + cl - 1,
3135 &ordered_sums, 0); 3222 &ordered_sums, 0);
3136 BUG_ON(ret); 3223 if (ret) {
3224 btrfs_release_path(dst_path);
3225 kfree(ins_data);
3226 return ret;
3227 }
3137 } 3228 }
3138 } 3229 }
3139 } 3230 }
@@ -3173,115 +3264,6 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
3173 return 0; 3264 return 0;
3174} 3265}
3175 3266
3176static int drop_adjacent_extents(struct btrfs_trans_handle *trans,
3177 struct btrfs_root *root, struct inode *inode,
3178 struct extent_map *em,
3179 struct btrfs_path *path)
3180{
3181 struct btrfs_file_extent_item *fi;
3182 struct extent_buffer *leaf;
3183 struct btrfs_key key, new_key;
3184 struct btrfs_map_token token;
3185 u64 extent_end;
3186 u64 extent_offset = 0;
3187 int extent_type;
3188 int del_slot = 0;
3189 int del_nr = 0;
3190 int ret = 0;
3191
3192 while (1) {
3193 btrfs_init_map_token(&token);
3194 leaf = path->nodes[0];
3195 path->slots[0]++;
3196 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3197 if (del_nr) {
3198 ret = btrfs_del_items(trans, root, path,
3199 del_slot, del_nr);
3200 if (ret)
3201 return ret;
3202 del_nr = 0;
3203 }
3204
3205 ret = btrfs_next_leaf_write(trans, root, path, 1);
3206 if (ret < 0)
3207 return ret;
3208 if (ret > 0)
3209 return 0;
3210 leaf = path->nodes[0];
3211 }
3212
3213 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3214 if (key.objectid != btrfs_ino(inode) ||
3215 key.type != BTRFS_EXTENT_DATA_KEY ||
3216 key.offset >= em->start + em->len)
3217 break;
3218
3219 fi = btrfs_item_ptr(leaf, path->slots[0],
3220 struct btrfs_file_extent_item);
3221 extent_type = btrfs_token_file_extent_type(leaf, fi, &token);
3222 if (extent_type == BTRFS_FILE_EXTENT_REG ||
3223 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
3224 extent_offset = btrfs_token_file_extent_offset(leaf,
3225 fi, &token);
3226 extent_end = key.offset +
3227 btrfs_token_file_extent_num_bytes(leaf, fi,
3228 &token);
3229 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
3230 extent_end = key.offset +
3231 btrfs_file_extent_inline_len(leaf, fi);
3232 } else {
3233 BUG();
3234 }
3235
3236 if (extent_end <= em->len + em->start) {
3237 if (!del_nr) {
3238 del_slot = path->slots[0];
3239 }
3240 del_nr++;
3241 continue;
3242 }
3243
3244 /*
3245 * Ok so we'll ignore previous items if we log a new extent,
3246 * which can lead to overlapping extents, so if we have an
3247 * existing extent we want to adjust we _have_ to check the next
3248 * guy to make sure we even need this extent anymore, this keeps
3249 * us from panicing in set_item_key_safe.
3250 */
3251 if (path->slots[0] < btrfs_header_nritems(leaf) - 1) {
3252 struct btrfs_key tmp_key;
3253
3254 btrfs_item_key_to_cpu(leaf, &tmp_key,
3255 path->slots[0] + 1);
3256 if (tmp_key.objectid == btrfs_ino(inode) &&
3257 tmp_key.type == BTRFS_EXTENT_DATA_KEY &&
3258 tmp_key.offset <= em->start + em->len) {
3259 if (!del_nr)
3260 del_slot = path->slots[0];
3261 del_nr++;
3262 continue;
3263 }
3264 }
3265
3266 BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
3267 memcpy(&new_key, &key, sizeof(new_key));
3268 new_key.offset = em->start + em->len;
3269 btrfs_set_item_key_safe(trans, root, path, &new_key);
3270 extent_offset += em->start + em->len - key.offset;
3271 btrfs_set_token_file_extent_offset(leaf, fi, extent_offset,
3272 &token);
3273 btrfs_set_token_file_extent_num_bytes(leaf, fi, extent_end -
3274 (em->start + em->len),
3275 &token);
3276 btrfs_mark_buffer_dirty(leaf);
3277 }
3278
3279 if (del_nr)
3280 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
3281
3282 return ret;
3283}
3284
3285static int log_one_extent(struct btrfs_trans_handle *trans, 3267static int log_one_extent(struct btrfs_trans_handle *trans,
3286 struct inode *inode, struct btrfs_root *root, 3268 struct inode *inode, struct btrfs_root *root,
3287 struct extent_map *em, struct btrfs_path *path) 3269 struct extent_map *em, struct btrfs_path *path)
@@ -3303,39 +3285,24 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3303 int index = log->log_transid % 2; 3285 int index = log->log_transid % 2;
3304 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3286 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3305 3287
3306insert: 3288 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3289 em->start + em->len, NULL, 0);
3290 if (ret)
3291 return ret;
3292
3307 INIT_LIST_HEAD(&ordered_sums); 3293 INIT_LIST_HEAD(&ordered_sums);
3308 btrfs_init_map_token(&token); 3294 btrfs_init_map_token(&token);
3309 key.objectid = btrfs_ino(inode); 3295 key.objectid = btrfs_ino(inode);
3310 key.type = BTRFS_EXTENT_DATA_KEY; 3296 key.type = BTRFS_EXTENT_DATA_KEY;
3311 key.offset = em->start; 3297 key.offset = em->start;
3312 path->really_keep_locks = 1;
3313 3298
3314 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi)); 3299 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
3315 if (ret && ret != -EEXIST) { 3300 if (ret)
3316 path->really_keep_locks = 0;
3317 return ret; 3301 return ret;
3318 }
3319 leaf = path->nodes[0]; 3302 leaf = path->nodes[0];
3320 fi = btrfs_item_ptr(leaf, path->slots[0], 3303 fi = btrfs_item_ptr(leaf, path->slots[0],
3321 struct btrfs_file_extent_item); 3304 struct btrfs_file_extent_item);
3322 3305
3323 /*
3324 * If we are overwriting an inline extent with a real one then we need
3325 * to just delete the inline extent as it may not be large enough to
3326 * have the entire file_extent_item.
3327 */
3328 if (ret && btrfs_token_file_extent_type(leaf, fi, &token) ==
3329 BTRFS_FILE_EXTENT_INLINE) {
3330 ret = btrfs_del_item(trans, log, path);
3331 btrfs_release_path(path);
3332 if (ret) {
3333 path->really_keep_locks = 0;
3334 return ret;
3335 }
3336 goto insert;
3337 }
3338
3339 btrfs_set_token_file_extent_generation(leaf, fi, em->generation, 3306 btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
3340 &token); 3307 &token);
3341 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { 3308 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3374,22 +3341,14 @@ insert:
3374 em->start - em->orig_start, 3341 em->start - em->orig_start,
3375 &token); 3342 &token);
3376 btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); 3343 btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
3377 btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->len, &token); 3344 btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
3378 btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, 3345 btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
3379 &token); 3346 &token);
3380 btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); 3347 btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
3381 btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); 3348 btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
3382 btrfs_mark_buffer_dirty(leaf); 3349 btrfs_mark_buffer_dirty(leaf);
3383 3350
3384 /*
3385 * Have to check the extent to the right of us to make sure it doesn't
3386 * fall in our current range. We're ok if the previous extent is in our
3387 * range since the recovery stuff will run us in key order and thus just
3388 * drop the part we overwrote.
3389 */
3390 ret = drop_adjacent_extents(trans, log, inode, em, path);
3391 btrfs_release_path(path); 3351 btrfs_release_path(path);
3392 path->really_keep_locks = 0;
3393 if (ret) { 3352 if (ret) {
3394 return ret; 3353 return ret;
3395 } 3354 }
@@ -3614,8 +3573,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3614 bool fast_search = false; 3573 bool fast_search = false;
3615 u64 ino = btrfs_ino(inode); 3574 u64 ino = btrfs_ino(inode);
3616 3575
3617 log = root->log_root;
3618
3619 path = btrfs_alloc_path(); 3576 path = btrfs_alloc_path();
3620 if (!path) 3577 if (!path)
3621 return -ENOMEM; 3578 return -ENOMEM;
@@ -3882,9 +3839,9 @@ out:
3882 * only logging is done of any parent directories that are older than 3839 * only logging is done of any parent directories that are older than
3883 * the last committed transaction 3840 * the last committed transaction
3884 */ 3841 */
3885int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 3842static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3886 struct btrfs_root *root, struct inode *inode, 3843 struct btrfs_root *root, struct inode *inode,
3887 struct dentry *parent, int exists_only) 3844 struct dentry *parent, int exists_only)
3888{ 3845{
3889 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; 3846 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
3890 struct super_block *sb; 3847 struct super_block *sb;
@@ -4075,6 +4032,9 @@ again:
4075 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); 4032 wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
4076 if (IS_ERR(wc.replay_dest)) { 4033 if (IS_ERR(wc.replay_dest)) {
4077 ret = PTR_ERR(wc.replay_dest); 4034 ret = PTR_ERR(wc.replay_dest);
4035 free_extent_buffer(log->node);
4036 free_extent_buffer(log->commit_root);
4037 kfree(log);
4078 btrfs_error(fs_info, ret, "Couldn't read target root " 4038 btrfs_error(fs_info, ret, "Couldn't read target root "
4079 "for tree log recovery."); 4039 "for tree log recovery.");
4080 goto error; 4040 goto error;
@@ -4083,12 +4043,10 @@ again:
4083 wc.replay_dest->log_root = log; 4043 wc.replay_dest->log_root = log;
4084 btrfs_record_root_in_trans(trans, wc.replay_dest); 4044 btrfs_record_root_in_trans(trans, wc.replay_dest);
4085 ret = walk_log_tree(trans, log, &wc); 4045 ret = walk_log_tree(trans, log, &wc);
4086 BUG_ON(ret);
4087 4046
4088 if (wc.stage == LOG_WALK_REPLAY_ALL) { 4047 if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
4089 ret = fixup_inode_link_counts(trans, wc.replay_dest, 4048 ret = fixup_inode_link_counts(trans, wc.replay_dest,
4090 path); 4049 path);
4091 BUG_ON(ret);
4092 } 4050 }
4093 4051
4094 key.offset = found_key.offset - 1; 4052 key.offset = found_key.offset - 1;
@@ -4097,6 +4055,9 @@ again:
4097 free_extent_buffer(log->commit_root); 4055 free_extent_buffer(log->commit_root);
4098 kfree(log); 4056 kfree(log);
4099 4057
4058 if (ret)
4059 goto error;
4060
4100 if (found_key.offset == 0) 4061 if (found_key.offset == 0)
4101 break; 4062 break;
4102 } 4063 }
@@ -4117,17 +4078,20 @@ again:
4117 4078
4118 btrfs_free_path(path); 4079 btrfs_free_path(path);
4119 4080
4081 /* step 4: commit the transaction, which also unpins the blocks */
4082 ret = btrfs_commit_transaction(trans, fs_info->tree_root);
4083 if (ret)
4084 return ret;
4085
4120 free_extent_buffer(log_root_tree->node); 4086 free_extent_buffer(log_root_tree->node);
4121 log_root_tree->log_root = NULL; 4087 log_root_tree->log_root = NULL;
4122 fs_info->log_root_recovering = 0; 4088 fs_info->log_root_recovering = 0;
4123
4124 /* step 4: commit the transaction, which also unpins the blocks */
4125 btrfs_commit_transaction(trans, fs_info->tree_root);
4126
4127 kfree(log_root_tree); 4089 kfree(log_root_tree);
4128 return 0;
4129 4090
4091 return 0;
4130error: 4092error:
4093 if (wc.trans)
4094 btrfs_end_transaction(wc.trans, fs_info->tree_root);
4131 btrfs_free_path(path); 4095 btrfs_free_path(path);
4132 return ret; 4096 return ret;
4133} 4097}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 862ac813f6b8..1d4ae0d15a70 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -40,9 +40,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
40 struct inode *inode, u64 dirid); 40 struct inode *inode, u64 dirid);
41void btrfs_end_log_trans(struct btrfs_root *root); 41void btrfs_end_log_trans(struct btrfs_root *root);
42int btrfs_pin_log_trans(struct btrfs_root *root); 42int btrfs_pin_log_trans(struct btrfs_root *root);
43int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
44 struct btrfs_root *root, struct inode *inode,
45 struct dentry *parent, int exists_only);
46void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, 43void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
47 struct inode *dir, struct inode *inode, 44 struct inode *dir, struct inode *inode,
48 int for_rename); 45 int for_rename);
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index ddc61cad0080..7b417e20efe2 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -53,6 +53,7 @@ void ulist_init(struct ulist *ulist)
53 ulist->nnodes = 0; 53 ulist->nnodes = 0;
54 ulist->nodes = ulist->int_nodes; 54 ulist->nodes = ulist->int_nodes;
55 ulist->nodes_alloced = ULIST_SIZE; 55 ulist->nodes_alloced = ULIST_SIZE;
56 ulist->root = RB_ROOT;
56} 57}
57EXPORT_SYMBOL(ulist_init); 58EXPORT_SYMBOL(ulist_init);
58 59
@@ -72,6 +73,7 @@ void ulist_fini(struct ulist *ulist)
72 if (ulist->nodes_alloced > ULIST_SIZE) 73 if (ulist->nodes_alloced > ULIST_SIZE)
73 kfree(ulist->nodes); 74 kfree(ulist->nodes);
74 ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */ 75 ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */
76 ulist->root = RB_ROOT;
75} 77}
76EXPORT_SYMBOL(ulist_fini); 78EXPORT_SYMBOL(ulist_fini);
77 79
@@ -123,6 +125,45 @@ void ulist_free(struct ulist *ulist)
123} 125}
124EXPORT_SYMBOL(ulist_free); 126EXPORT_SYMBOL(ulist_free);
125 127
128static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
129{
130 struct rb_node *n = ulist->root.rb_node;
131 struct ulist_node *u = NULL;
132
133 while (n) {
134 u = rb_entry(n, struct ulist_node, rb_node);
135 if (u->val < val)
136 n = n->rb_right;
137 else if (u->val > val)
138 n = n->rb_left;
139 else
140 return u;
141 }
142 return NULL;
143}
144
145static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
146{
147 struct rb_node **p = &ulist->root.rb_node;
148 struct rb_node *parent = NULL;
149 struct ulist_node *cur = NULL;
150
151 while (*p) {
152 parent = *p;
153 cur = rb_entry(parent, struct ulist_node, rb_node);
154
155 if (cur->val < ins->val)
156 p = &(*p)->rb_right;
157 else if (cur->val > ins->val)
158 p = &(*p)->rb_left;
159 else
160 return -EEXIST;
161 }
162 rb_link_node(&ins->rb_node, parent, p);
163 rb_insert_color(&ins->rb_node, &ulist->root);
164 return 0;
165}
166
126/** 167/**
127 * ulist_add - add an element to the ulist 168 * ulist_add - add an element to the ulist
128 * @ulist: ulist to add the element to 169 * @ulist: ulist to add the element to
@@ -151,14 +192,13 @@ int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask)
151int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, 192int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
152 u64 *old_aux, gfp_t gfp_mask) 193 u64 *old_aux, gfp_t gfp_mask)
153{ 194{
154 int i; 195 int ret = 0;
155 196 struct ulist_node *node = NULL;
156 for (i = 0; i < ulist->nnodes; ++i) { 197 node = ulist_rbtree_search(ulist, val);
157 if (ulist->nodes[i].val == val) { 198 if (node) {
158 if (old_aux) 199 if (old_aux)
159 *old_aux = ulist->nodes[i].aux; 200 *old_aux = node->aux;
160 return 0; 201 return 0;
161 }
162 } 202 }
163 203
164 if (ulist->nnodes >= ulist->nodes_alloced) { 204 if (ulist->nnodes >= ulist->nodes_alloced) {
@@ -187,6 +227,8 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
187 } 227 }
188 ulist->nodes[ulist->nnodes].val = val; 228 ulist->nodes[ulist->nnodes].val = val;
189 ulist->nodes[ulist->nnodes].aux = aux; 229 ulist->nodes[ulist->nnodes].aux = aux;
230 ret = ulist_rbtree_insert(ulist, &ulist->nodes[ulist->nnodes]);
231 BUG_ON(ret);
190 ++ulist->nnodes; 232 ++ulist->nnodes;
191 233
192 return 1; 234 return 1;
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 21a1963439c3..fb36731074b5 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -8,6 +8,9 @@
8#ifndef __ULIST__ 8#ifndef __ULIST__
9#define __ULIST__ 9#define __ULIST__
10 10
11#include <linux/list.h>
12#include <linux/rbtree.h>
13
11/* 14/*
12 * ulist is a generic data structure to hold a collection of unique u64 15 * ulist is a generic data structure to hold a collection of unique u64
13 * values. The only operations it supports is adding to the list and 16 * values. The only operations it supports is adding to the list and
@@ -34,6 +37,7 @@ struct ulist_iterator {
34struct ulist_node { 37struct ulist_node {
35 u64 val; /* value to store */ 38 u64 val; /* value to store */
36 u64 aux; /* auxiliary value saved along with the val */ 39 u64 aux; /* auxiliary value saved along with the val */
40 struct rb_node rb_node; /* used to speed up search */
37}; 41};
38 42
39struct ulist { 43struct ulist {
@@ -54,6 +58,8 @@ struct ulist {
54 */ 58 */
55 struct ulist_node *nodes; 59 struct ulist_node *nodes;
56 60
61 struct rb_root root;
62
57 /* 63 /*
58 * inline storage space for the first ULIST_SIZE entries 64 * inline storage space for the first ULIST_SIZE entries
59 */ 65 */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5989a92236f7..0e925ced971b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -46,6 +46,7 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
46 struct btrfs_device *device); 46 struct btrfs_device *device);
47static int btrfs_relocate_sys_chunks(struct btrfs_root *root); 47static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
48static void __btrfs_reset_dev_stats(struct btrfs_device *dev); 48static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
49static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
49static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); 50static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
50 51
51static DEFINE_MUTEX(uuid_mutex); 52static DEFINE_MUTEX(uuid_mutex);
@@ -717,9 +718,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
717 if (!device->name) 718 if (!device->name)
718 continue; 719 continue;
719 720
720 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, 721 /* Just open everything we can; ignore failures here */
721 &bdev, &bh); 722 if (btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
722 if (ret) 723 &bdev, &bh))
723 continue; 724 continue;
724 725
725 disk_super = (struct btrfs_super_block *)bh->b_data; 726 disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -1199,10 +1200,10 @@ out:
1199 return ret; 1200 return ret;
1200} 1201}
1201 1202
1202int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, 1203static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1203 struct btrfs_device *device, 1204 struct btrfs_device *device,
1204 u64 chunk_tree, u64 chunk_objectid, 1205 u64 chunk_tree, u64 chunk_objectid,
1205 u64 chunk_offset, u64 start, u64 num_bytes) 1206 u64 chunk_offset, u64 start, u64 num_bytes)
1206{ 1207{
1207 int ret; 1208 int ret;
1208 struct btrfs_path *path; 1209 struct btrfs_path *path;
@@ -1329,9 +1330,9 @@ error:
1329 * the device information is stored in the chunk root 1330 * the device information is stored in the chunk root
1330 * the btrfs_device struct should be fully filled in 1331 * the btrfs_device struct should be fully filled in
1331 */ 1332 */
1332int btrfs_add_device(struct btrfs_trans_handle *trans, 1333static int btrfs_add_device(struct btrfs_trans_handle *trans,
1333 struct btrfs_root *root, 1334 struct btrfs_root *root,
1334 struct btrfs_device *device) 1335 struct btrfs_device *device)
1335{ 1336{
1336 int ret; 1337 int ret;
1337 struct btrfs_path *path; 1338 struct btrfs_path *path;
@@ -1710,8 +1711,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
1710 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 1711 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1711} 1712}
1712 1713
1713int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, 1714static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
1714 struct btrfs_device **device) 1715 struct btrfs_device **device)
1715{ 1716{
1716 int ret = 0; 1717 int ret = 0;
1717 struct btrfs_super_block *disk_super; 1718 struct btrfs_super_block *disk_super;
@@ -3607,7 +3608,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
3607 return 0; 3608 return 0;
3608} 3609}
3609 3610
3610struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { 3611static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
3611 [BTRFS_RAID_RAID10] = { 3612 [BTRFS_RAID_RAID10] = {
3612 .sub_stripes = 2, 3613 .sub_stripes = 2,
3613 .dev_stripes = 1, 3614 .dev_stripes = 1,
@@ -3674,18 +3675,10 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
3674 3675
3675static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) 3676static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
3676{ 3677{
3677 u64 features;
3678
3679 if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))) 3678 if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
3680 return; 3679 return;
3681 3680
3682 features = btrfs_super_incompat_flags(info->super_copy); 3681 btrfs_set_fs_incompat(info, RAID56);
3683 if (features & BTRFS_FEATURE_INCOMPAT_RAID56)
3684 return;
3685
3686 features |= BTRFS_FEATURE_INCOMPAT_RAID56;
3687 btrfs_set_super_incompat_flags(info->super_copy, features);
3688 printk(KERN_INFO "btrfs: setting RAID5/6 feature flag\n");
3689} 3682}
3690 3683
3691static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 3684static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -3932,7 +3925,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3932 3925
3933 em_tree = &extent_root->fs_info->mapping_tree.map_tree; 3926 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
3934 write_lock(&em_tree->lock); 3927 write_lock(&em_tree->lock);
3935 ret = add_extent_mapping(em_tree, em); 3928 ret = add_extent_mapping(em_tree, em, 0);
3936 write_unlock(&em_tree->lock); 3929 write_unlock(&em_tree->lock);
3937 if (ret) { 3930 if (ret) {
3938 free_extent_map(em); 3931 free_extent_map(em);
@@ -4240,9 +4233,25 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
4240 read_lock(&em_tree->lock); 4233 read_lock(&em_tree->lock);
4241 em = lookup_extent_mapping(em_tree, logical, len); 4234 em = lookup_extent_mapping(em_tree, logical, len);
4242 read_unlock(&em_tree->lock); 4235 read_unlock(&em_tree->lock);
4243 BUG_ON(!em);
4244 4236
4245 BUG_ON(em->start > logical || em->start + em->len < logical); 4237 /*
4238 * We could return errors for these cases, but that could get ugly and
4239 * we'd probably do the same thing which is just not do anything else
4240 * and exit, so return 1 so the callers don't try to use other copies.
4241 */
4242 if (!em) {
4243 btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
4244 logical+len);
4245 return 1;
4246 }
4247
4248 if (em->start > logical || em->start + em->len < logical) {
4249 btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
4250 "%Lu-%Lu\n", logical, logical+len, em->start,
4251 em->start + em->len);
4252 return 1;
4253 }
4254
4246 map = (struct map_lookup *)em->bdev; 4255 map = (struct map_lookup *)em->bdev;
4247 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1)) 4256 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
4248 ret = map->num_stripes; 4257 ret = map->num_stripes;
@@ -4411,13 +4420,19 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4411 read_unlock(&em_tree->lock); 4420 read_unlock(&em_tree->lock);
4412 4421
4413 if (!em) { 4422 if (!em) {
4414 printk(KERN_CRIT "btrfs: unable to find logical %llu len %llu\n", 4423 btrfs_crit(fs_info, "unable to find logical %llu len %llu",
4415 (unsigned long long)logical, 4424 (unsigned long long)logical,
4416 (unsigned long long)*length); 4425 (unsigned long long)*length);
4417 BUG(); 4426 return -EINVAL;
4427 }
4428
4429 if (em->start > logical || em->start + em->len < logical) {
4430 btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
4431 "found %Lu-%Lu\n", logical, em->start,
4432 em->start + em->len);
4433 return -EINVAL;
4418 } 4434 }
4419 4435
4420 BUG_ON(em->start > logical || em->start + em->len < logical);
4421 map = (struct map_lookup *)em->bdev; 4436 map = (struct map_lookup *)em->bdev;
4422 offset = logical - em->start; 4437 offset = logical - em->start;
4423 4438
@@ -4935,7 +4950,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
4935 em = lookup_extent_mapping(em_tree, chunk_start, 1); 4950 em = lookup_extent_mapping(em_tree, chunk_start, 1);
4936 read_unlock(&em_tree->lock); 4951 read_unlock(&em_tree->lock);
4937 4952
4938 BUG_ON(!em || em->start != chunk_start); 4953 if (!em) {
4954 printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
4955 chunk_start);
4956 return -EIO;
4957 }
4958
4959 if (em->start != chunk_start) {
4960 printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
4961 em->start, chunk_start);
4962 free_extent_map(em);
4963 return -EIO;
4964 }
4939 map = (struct map_lookup *)em->bdev; 4965 map = (struct map_lookup *)em->bdev;
4940 4966
4941 length = em->len; 4967 length = em->len;
@@ -5095,9 +5121,9 @@ struct async_sched {
5095 * This will add one bio to the pending list for a device and make sure 5121 * This will add one bio to the pending list for a device and make sure
5096 * the work struct is scheduled. 5122 * the work struct is scheduled.
5097 */ 5123 */
5098noinline void btrfs_schedule_bio(struct btrfs_root *root, 5124static noinline void btrfs_schedule_bio(struct btrfs_root *root,
5099 struct btrfs_device *device, 5125 struct btrfs_device *device,
5100 int rw, struct bio *bio) 5126 int rw, struct bio *bio)
5101{ 5127{
5102 int should_queue = 1; 5128 int should_queue = 1;
5103 struct btrfs_pending_bios *pending_bios; 5129 struct btrfs_pending_bios *pending_bios;
@@ -5166,7 +5192,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
5166 } 5192 }
5167 5193
5168 prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; 5194 prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
5169 if ((bio->bi_size >> 9) > max_sectors) 5195 if (bio_sectors(bio) > max_sectors)
5170 return 0; 5196 return 0;
5171 5197
5172 if (!q->merge_bvec_fn) 5198 if (!q->merge_bvec_fn)
@@ -5297,10 +5323,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5297 } 5323 }
5298 5324
5299 if (map_length < length) { 5325 if (map_length < length) {
5300 printk(KERN_CRIT "btrfs: mapping failed logical %llu bio len %llu " 5326 btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
5301 "len %llu\n", (unsigned long long)logical, 5327 (unsigned long long)logical,
5302 (unsigned long long)length, 5328 (unsigned long long)length,
5303 (unsigned long long)map_length); 5329 (unsigned long long)map_length);
5304 BUG(); 5330 BUG();
5305 } 5331 }
5306 5332
@@ -5465,7 +5491,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
5465 } 5491 }
5466 5492
5467 write_lock(&map_tree->map_tree.lock); 5493 write_lock(&map_tree->map_tree.lock);
5468 ret = add_extent_mapping(&map_tree->map_tree, em); 5494 ret = add_extent_mapping(&map_tree->map_tree, em, 0);
5469 write_unlock(&map_tree->map_tree.lock); 5495 write_unlock(&map_tree->map_tree.lock);
5470 BUG_ON(ret); /* Tree corruption */ 5496 BUG_ON(ret); /* Tree corruption */
5471 free_extent_map(em); 5497 free_extent_map(em);
@@ -5572,8 +5598,8 @@ static int read_one_dev(struct btrfs_root *root,
5572 return -EIO; 5598 return -EIO;
5573 5599
5574 if (!device) { 5600 if (!device) {
5575 printk(KERN_WARNING "warning devid %llu missing\n", 5601 btrfs_warn(root->fs_info, "devid %llu missing",
5576 (unsigned long long)devid); 5602 (unsigned long long)devid);
5577 device = add_missing_dev(root, devid, dev_uuid); 5603 device = add_missing_dev(root, devid, dev_uuid);
5578 if (!device) 5604 if (!device)
5579 return -ENOMEM; 5605 return -ENOMEM;
@@ -5915,7 +5941,7 @@ void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
5915 btrfs_dev_stat_print_on_error(dev); 5941 btrfs_dev_stat_print_on_error(dev);
5916} 5942}
5917 5943
5918void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) 5944static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
5919{ 5945{
5920 if (!dev->dev_stats_valid) 5946 if (!dev->dev_stats_valid)
5921 return; 5947 return;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 062d8604d35b..845ccbb0d2e3 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -254,10 +254,6 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
254#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \ 254#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \
255 (sizeof(struct btrfs_bio_stripe) * (n))) 255 (sizeof(struct btrfs_bio_stripe) * (n)))
256 256
257int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
258 struct btrfs_device *device,
259 u64 chunk_tree, u64 chunk_objectid,
260 u64 chunk_offset, u64 start, u64 num_bytes);
261int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, 257int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
262 u64 logical, u64 *length, 258 u64 logical, u64 *length,
263 struct btrfs_bio **bbio_ret, int mirror_num); 259 struct btrfs_bio **bbio_ret, int mirror_num);
@@ -282,11 +278,6 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
282int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, 278int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
283 char *device_path, 279 char *device_path,
284 struct btrfs_device **device); 280 struct btrfs_device **device);
285int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
286 struct btrfs_device **device);
287int btrfs_add_device(struct btrfs_trans_handle *trans,
288 struct btrfs_root *root,
289 struct btrfs_device *device);
290int btrfs_rm_device(struct btrfs_root *root, char *device_path); 281int btrfs_rm_device(struct btrfs_root *root, char *device_path);
291void btrfs_cleanup_fs_uuids(void); 282void btrfs_cleanup_fs_uuids(void);
292int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); 283int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
@@ -307,7 +298,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
307int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); 298int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
308int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, 299int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
309 u64 *start, u64 *max_avail); 300 u64 *start, u64 *max_avail);
310void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
311void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index); 301void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
312int btrfs_get_dev_stats(struct btrfs_root *root, 302int btrfs_get_dev_stats(struct btrfs_root *root,
313 struct btrfs_ioctl_get_dev_stats *stats); 303 struct btrfs_ioctl_get_dev_stats *stats);
@@ -321,9 +311,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
321void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, 311void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
322 struct btrfs_device *tgtdev); 312 struct btrfs_device *tgtdev);
323int btrfs_scratch_superblock(struct btrfs_device *device); 313int btrfs_scratch_superblock(struct btrfs_device *device);
324void btrfs_schedule_bio(struct btrfs_root *root,
325 struct btrfs_device *device,
326 int rw, struct bio *bio);
327int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, 314int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
328 u64 logical, u64 len, int mirror_num); 315 u64 logical, u64 len, int mirror_num);
329unsigned long btrfs_full_stripe_len(struct btrfs_root *root, 316unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 446a6848c554..05740b9789e4 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -406,8 +406,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
406 XATTR_REPLACE); 406 XATTR_REPLACE);
407} 407}
408 408
409int btrfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, 409static int btrfs_initxattrs(struct inode *inode,
410 void *fs_info) 410 const struct xattr *xattr_array, void *fs_info)
411{ 411{
412 const struct xattr *xattr; 412 const struct xattr *xattr;
413 struct btrfs_trans_handle *trans = fs_info; 413 struct btrfs_trans_handle *trans = fs_info;