aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-30 23:08:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-30 23:08:20 -0500
commite7651b819e90da924991d727d3c007200a18670d (patch)
treee7a943b5bb56c384972944fd86767a3f079b8a98
parent060e8e3b6f8fc0ba97de2276249fbd80fa25b0a2 (diff)
parentcf93da7bcf450cb4595055d491a0519cb39e68ed (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This is a pretty big pull, and most of these changes have been floating in btrfs-next for a long time. Filipe's properties work is a cool building block for inheriting attributes like compression down on a per inode basis. Jeff Mahoney kicked in code to export filesystem info into sysfs. Otherwise, lots of performance improvements, cleanups and bug fixes. Looks like there are still a few other small pending incrementals, but I wanted to get the bulk of this in first" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (149 commits) Btrfs: fix spin_unlock in check_ref_cleanup Btrfs: setup inode location during btrfs_init_inode_locked Btrfs: don't use ram_bytes for uncompressed inline items Btrfs: fix btrfs_search_slot_for_read backwards iteration Btrfs: do not export ulist functions Btrfs: rework ulist with list+rb_tree Btrfs: fix memory leaks on walking backrefs failure Btrfs: fix send file hole detection leading to data corruption Btrfs: add a reschedule point in btrfs_find_all_roots() Btrfs: make send's file extent item search more efficient Btrfs: fix to catch all errors when resolving indirect ref Btrfs: fix protection between walking backrefs and root deletion btrfs: fix warning while merging two adjacent extents Btrfs: fix infinite path build loops in incremental send btrfs: undo sysfs when open_ctree() fails Btrfs: fix snprintf usage by send's gen_unique_name btrfs: fix defrag 32-bit integer overflow btrfs: sysfs: list the NO_HOLES feature btrfs: sysfs: don't show reserved incompat feature btrfs: call permission checks earlier in ioctls and return EPERM ...
-rw-r--r--Documentation/filesystems/btrfs.txt47
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/backref.c195
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/check-integrity.c12
-rw-r--r--fs/btrfs/compression.c12
-rw-r--r--fs/btrfs/ctree.c552
-rw-r--r--fs/btrfs/ctree.h134
-rw-r--r--fs/btrfs/delayed-inode.c208
-rw-r--r--fs/btrfs/delayed-inode.h8
-rw-r--r--fs/btrfs/delayed-ref.c300
-rw-r--r--fs/btrfs/delayed-ref.h26
-rw-r--r--fs/btrfs/dev-replace.c56
-rw-r--r--fs/btrfs/dir-item.c8
-rw-r--r--fs/btrfs/disk-io.c253
-rw-r--r--fs/btrfs/extent-tree.c617
-rw-r--r--fs/btrfs/extent_io.c228
-rw-r--r--fs/btrfs/extent_io.h9
-rw-r--r--fs/btrfs/extent_map.c74
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c216
-rw-r--r--fs/btrfs/free-space-cache.c23
-rw-r--r--fs/btrfs/hash.c50
-rw-r--r--fs/btrfs/hash.h11
-rw-r--r--fs/btrfs/inode-item.c65
-rw-r--r--fs/btrfs/inode.c446
-rw-r--r--fs/btrfs/ioctl.c348
-rw-r--r--fs/btrfs/lzo.c6
-rw-r--r--fs/btrfs/ordered-data.c15
-rw-r--r--fs/btrfs/orphan.c20
-rw-r--r--fs/btrfs/print-tree.c4
-rw-r--r--fs/btrfs/props.c427
-rw-r--r--fs/btrfs/props.h42
-rw-r--r--fs/btrfs/qgroup.c57
-rw-r--r--fs/btrfs/reada.c9
-rw-r--r--fs/btrfs/relocation.c105
-rw-r--r--fs/btrfs/root-tree.c19
-rw-r--r--fs/btrfs/scrub.c134
-rw-r--r--fs/btrfs/send.c961
-rw-r--r--fs/btrfs/super.c241
-rw-r--r--fs/btrfs/sysfs.c617
-rw-r--r--fs/btrfs/sysfs.h64
-rw-r--r--fs/btrfs/tests/btrfs-tests.h2
-rw-r--r--fs/btrfs/transaction.c55
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c209
-rw-r--r--fs/btrfs/ulist.c117
-rw-r--r--fs/btrfs/ulist.h39
-rw-r--r--fs/btrfs/uuid-tree.c13
-rw-r--r--fs/btrfs/volumes.c89
-rw-r--r--fs/btrfs/xattr.c12
-rw-r--r--fs/btrfs/zlib.c8
-rw-r--r--include/linux/rwsem.h11
-rw-r--r--include/trace/events/btrfs.h23
-rw-r--r--include/uapi/linux/btrfs.h13
-rw-r--r--include/uapi/linux/xattr.h3
-rw-r--r--lib/kobject.c1
58 files changed, 5173 insertions, 2057 deletions
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt
index 5dd282dda55c..d11cc2f8077b 100644
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.txt
@@ -38,7 +38,7 @@ Mount Options
38============= 38=============
39 39
40When mounting a btrfs filesystem, the following option are accepted. 40When mounting a btrfs filesystem, the following option are accepted.
41Unless otherwise specified, all options default to off. 41Options with (*) are default options and will not show in the mount options.
42 42
43 alloc_start=<bytes> 43 alloc_start=<bytes>
44 Debugging option to force all block allocations above a certain 44 Debugging option to force all block allocations above a certain
@@ -46,10 +46,12 @@ Unless otherwise specified, all options default to off.
46 bytes, optionally with a K, M, or G suffix, case insensitive. 46 bytes, optionally with a K, M, or G suffix, case insensitive.
47 Default is 1MB. 47 Default is 1MB.
48 48
49 noautodefrag(*)
49 autodefrag 50 autodefrag
50 Detect small random writes into files and queue them up for the 51 Disable/enable auto defragmentation.
51 defrag process. Works best for small files; Not well suited for 52 Auto defragmentation detects small random writes into files and queue
52 large database workloads. 53 them up for the defrag process. Works best for small files;
54 Not well suited for large database workloads.
53 55
54 check_int 56 check_int
55 check_int_data 57 check_int_data
@@ -96,21 +98,26 @@ Unless otherwise specified, all options default to off.
96 can be avoided. Especially useful when trying to mount a multi-device 98 can be avoided. Especially useful when trying to mount a multi-device
97 setup as root. May be specified multiple times for multiple devices. 99 setup as root. May be specified multiple times for multiple devices.
98 100
101 nodiscard(*)
99 discard 102 discard
100 Issue frequent commands to let the block device reclaim space freed by 103 Disable/enable discard mount option.
101 the filesystem. This is useful for SSD devices, thinly provisioned 104 Discard issues frequent commands to let the block device reclaim space
105 freed by the filesystem.
106 This is useful for SSD devices, thinly provisioned
102 LUNs and virtual machine images, but may have a significant 107 LUNs and virtual machine images, but may have a significant
103 performance impact. (The fstrim command is also available to 108 performance impact. (The fstrim command is also available to
104 initiate batch trims from userspace). 109 initiate batch trims from userspace).
105 110
111 noenospc_debug(*)
106 enospc_debug 112 enospc_debug
107 Debugging option to be more verbose in some ENOSPC conditions. 113 Disable/enable debugging option to be more verbose in some ENOSPC conditions.
108 114
109 fatal_errors=<action> 115 fatal_errors=<action>
110 Action to take when encountering a fatal error: 116 Action to take when encountering a fatal error:
111 "bug" - BUG() on a fatal error. This is the default. 117 "bug" - BUG() on a fatal error. This is the default.
112 "panic" - panic() on a fatal error. 118 "panic" - panic() on a fatal error.
113 119
120 noflushoncommit(*)
114 flushoncommit 121 flushoncommit
115 The 'flushoncommit' mount option forces any data dirtied by a write in a 122 The 'flushoncommit' mount option forces any data dirtied by a write in a
116 prior transaction to commit as part of the current commit. This makes 123 prior transaction to commit as part of the current commit. This makes
@@ -134,26 +141,32 @@ Unless otherwise specified, all options default to off.
134 Specify that 1 metadata chunk should be allocated after every <value> 141 Specify that 1 metadata chunk should be allocated after every <value>
135 data chunks. Off by default. 142 data chunks. Off by default.
136 143
144 acl(*)
137 noacl 145 noacl
138 Disable support for Posix Access Control Lists (ACLs). See the 146 Enable/disable support for Posix Access Control Lists (ACLs). See the
139 acl(5) manual page for more information about ACLs. 147 acl(5) manual page for more information about ACLs.
140 148
149 barrier(*)
141 nobarrier 150 nobarrier
142 Disables the use of block layer write barriers. Write barriers ensure 151 Enable/disable the use of block layer write barriers. Write barriers
143 that certain IOs make it through the device cache and are on persistent 152 ensure that certain IOs make it through the device cache and are on
144 storage. If used on a device with a volatile (non-battery-backed) 153 persistent storage. If disabled on a device with a volatile
145 write-back cache, this option will lead to filesystem corruption on a 154 (non-battery-backed) write-back cache, nobarrier option will lead to
146 system crash or power loss. 155 filesystem corruption on a system crash or power loss.
147 156
157 datacow(*)
148 nodatacow 158 nodatacow
149 Disable data copy-on-write for newly created files. Implies nodatasum, 159 Enable/disable data copy-on-write for newly created files.
150 and disables all compression. 160 Nodatacow implies nodatasum, and disables all compression.
151 161
162 datasum(*)
152 nodatasum 163 nodatasum
153 Disable data checksumming for newly created files. 164 Enable/disable data checksumming for newly created files.
165 Datasum implies datacow.
154 166
167 treelog(*)
155 notreelog 168 notreelog
156 Disable the tree logging used for fsync and O_SYNC writes. 169 Enable/disable the tree logging used for fsync and O_SYNC writes.
157 170
158 recovery 171 recovery
159 Enable autorecovery attempts if a bad tree root is found at mount time. 172 Enable autorecovery attempts if a bad tree root is found at mount time.
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index aa976eced2d2..a66768ebc8d1 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,6 +1,7 @@
1config BTRFS_FS 1config BTRFS_FS
2 tristate "Btrfs filesystem support" 2 tristate "Btrfs filesystem support"
3 select LIBCRC32C 3 select CRYPTO
4 select CRYPTO_CRC32C
4 select ZLIB_INFLATE 5 select ZLIB_INFLATE
5 select ZLIB_DEFLATE 6 select ZLIB_DEFLATE
6 select LZO_COMPRESS 7 select LZO_COMPRESS
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 1a44e42d602a..f341a98031d2 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ 11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
12 uuid-tree.o 12 uuid-tree.o props.o hash.o
13 13
14btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o 14btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
15btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o 15btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 3775947429b2..aded3ef3d3d4 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -66,6 +66,16 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
66 return 0; 66 return 0;
67} 67}
68 68
69static void free_inode_elem_list(struct extent_inode_elem *eie)
70{
71 struct extent_inode_elem *eie_next;
72
73 for (; eie; eie = eie_next) {
74 eie_next = eie->next;
75 kfree(eie);
76 }
77}
78
69static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, 79static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte,
70 u64 extent_item_pos, 80 u64 extent_item_pos,
71 struct extent_inode_elem **eie) 81 struct extent_inode_elem **eie)
@@ -209,18 +219,19 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
209} 219}
210 220
211static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 221static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
212 struct ulist *parents, int level, 222 struct ulist *parents, struct __prelim_ref *ref,
213 struct btrfs_key *key_for_search, u64 time_seq, 223 int level, u64 time_seq, const u64 *extent_item_pos)
214 u64 wanted_disk_byte,
215 const u64 *extent_item_pos)
216{ 224{
217 int ret = 0; 225 int ret = 0;
218 int slot; 226 int slot;
219 struct extent_buffer *eb; 227 struct extent_buffer *eb;
220 struct btrfs_key key; 228 struct btrfs_key key;
229 struct btrfs_key *key_for_search = &ref->key_for_search;
221 struct btrfs_file_extent_item *fi; 230 struct btrfs_file_extent_item *fi;
222 struct extent_inode_elem *eie = NULL, *old = NULL; 231 struct extent_inode_elem *eie = NULL, *old = NULL;
223 u64 disk_byte; 232 u64 disk_byte;
233 u64 wanted_disk_byte = ref->wanted_disk_byte;
234 u64 count = 0;
224 235
225 if (level != 0) { 236 if (level != 0) {
226 eb = path->nodes[level]; 237 eb = path->nodes[level];
@@ -238,7 +249,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
238 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) 249 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
239 ret = btrfs_next_old_leaf(root, path, time_seq); 250 ret = btrfs_next_old_leaf(root, path, time_seq);
240 251
241 while (!ret) { 252 while (!ret && count < ref->count) {
242 eb = path->nodes[0]; 253 eb = path->nodes[0];
243 slot = path->slots[0]; 254 slot = path->slots[0];
244 255
@@ -254,6 +265,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
254 if (disk_byte == wanted_disk_byte) { 265 if (disk_byte == wanted_disk_byte) {
255 eie = NULL; 266 eie = NULL;
256 old = NULL; 267 old = NULL;
268 count++;
257 if (extent_item_pos) { 269 if (extent_item_pos) {
258 ret = check_extent_in_eb(&key, eb, fi, 270 ret = check_extent_in_eb(&key, eb, fi,
259 *extent_item_pos, 271 *extent_item_pos,
@@ -273,6 +285,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
273 old = old->next; 285 old = old->next;
274 old->next = eie; 286 old->next = eie;
275 } 287 }
288 eie = NULL;
276 } 289 }
277next: 290next:
278 ret = btrfs_next_old_item(root, path, time_seq); 291 ret = btrfs_next_old_item(root, path, time_seq);
@@ -280,6 +293,8 @@ next:
280 293
281 if (ret > 0) 294 if (ret > 0)
282 ret = 0; 295 ret = 0;
296 else if (ret < 0)
297 free_inode_elem_list(eie);
283 return ret; 298 return ret;
284} 299}
285 300
@@ -299,23 +314,34 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
299 int ret = 0; 314 int ret = 0;
300 int root_level; 315 int root_level;
301 int level = ref->level; 316 int level = ref->level;
317 int index;
302 318
303 root_key.objectid = ref->root_id; 319 root_key.objectid = ref->root_id;
304 root_key.type = BTRFS_ROOT_ITEM_KEY; 320 root_key.type = BTRFS_ROOT_ITEM_KEY;
305 root_key.offset = (u64)-1; 321 root_key.offset = (u64)-1;
322
323 index = srcu_read_lock(&fs_info->subvol_srcu);
324
306 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 325 root = btrfs_read_fs_root_no_name(fs_info, &root_key);
307 if (IS_ERR(root)) { 326 if (IS_ERR(root)) {
327 srcu_read_unlock(&fs_info->subvol_srcu, index);
308 ret = PTR_ERR(root); 328 ret = PTR_ERR(root);
309 goto out; 329 goto out;
310 } 330 }
311 331
312 root_level = btrfs_old_root_level(root, time_seq); 332 root_level = btrfs_old_root_level(root, time_seq);
313 333
314 if (root_level + 1 == level) 334 if (root_level + 1 == level) {
335 srcu_read_unlock(&fs_info->subvol_srcu, index);
315 goto out; 336 goto out;
337 }
316 338
317 path->lowest_level = level; 339 path->lowest_level = level;
318 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); 340 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
341
342 /* root node has been locked, we can release @subvol_srcu safely here */
343 srcu_read_unlock(&fs_info->subvol_srcu, index);
344
319 pr_debug("search slot in root %llu (level %d, ref count %d) returned " 345 pr_debug("search slot in root %llu (level %d, ref count %d) returned "
320 "%d for key (%llu %u %llu)\n", 346 "%d for key (%llu %u %llu)\n",
321 ref->root_id, level, ref->count, ret, 347 ref->root_id, level, ref->count, ret,
@@ -334,9 +360,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
334 eb = path->nodes[level]; 360 eb = path->nodes[level];
335 } 361 }
336 362
337 ret = add_all_parents(root, path, parents, level, &ref->key_for_search, 363 ret = add_all_parents(root, path, parents, ref, level, time_seq,
338 time_seq, ref->wanted_disk_byte, 364 extent_item_pos);
339 extent_item_pos);
340out: 365out:
341 path->lowest_level = 0; 366 path->lowest_level = 0;
342 btrfs_release_path(path); 367 btrfs_release_path(path);
@@ -376,10 +401,16 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
376 continue; 401 continue;
377 err = __resolve_indirect_ref(fs_info, path, time_seq, ref, 402 err = __resolve_indirect_ref(fs_info, path, time_seq, ref,
378 parents, extent_item_pos); 403 parents, extent_item_pos);
379 if (err == -ENOMEM) 404 /*
380 goto out; 405 * we can only tolerate ENOENT,otherwise,we should catch error
381 if (err) 406 * and return directly.
407 */
408 if (err == -ENOENT) {
382 continue; 409 continue;
410 } else if (err) {
411 ret = err;
412 goto out;
413 }
383 414
384 /* we put the first parent into the ref at hand */ 415 /* we put the first parent into the ref at hand */
385 ULIST_ITER_INIT(&uiter); 416 ULIST_ITER_INIT(&uiter);
@@ -538,14 +569,13 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
538 if (extent_op && extent_op->update_key) 569 if (extent_op && extent_op->update_key)
539 btrfs_disk_key_to_cpu(&op_key, &extent_op->key); 570 btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
540 571
541 while ((n = rb_prev(n))) { 572 spin_lock(&head->lock);
573 n = rb_first(&head->ref_root);
574 while (n) {
542 struct btrfs_delayed_ref_node *node; 575 struct btrfs_delayed_ref_node *node;
543 node = rb_entry(n, struct btrfs_delayed_ref_node, 576 node = rb_entry(n, struct btrfs_delayed_ref_node,
544 rb_node); 577 rb_node);
545 if (node->bytenr != head->node.bytenr) 578 n = rb_next(n);
546 break;
547 WARN_ON(node->is_head);
548
549 if (node->seq > seq) 579 if (node->seq > seq)
550 continue; 580 continue;
551 581
@@ -612,10 +642,10 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
612 WARN_ON(1); 642 WARN_ON(1);
613 } 643 }
614 if (ret) 644 if (ret)
615 return ret; 645 break;
616 } 646 }
617 647 spin_unlock(&head->lock);
618 return 0; 648 return ret;
619} 649}
620 650
621/* 651/*
@@ -828,6 +858,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
828 struct list_head prefs_delayed; 858 struct list_head prefs_delayed;
829 struct list_head prefs; 859 struct list_head prefs;
830 struct __prelim_ref *ref; 860 struct __prelim_ref *ref;
861 struct extent_inode_elem *eie = NULL;
831 862
832 INIT_LIST_HEAD(&prefs); 863 INIT_LIST_HEAD(&prefs);
833 INIT_LIST_HEAD(&prefs_delayed); 864 INIT_LIST_HEAD(&prefs_delayed);
@@ -882,15 +913,15 @@ again:
882 btrfs_put_delayed_ref(&head->node); 913 btrfs_put_delayed_ref(&head->node);
883 goto again; 914 goto again;
884 } 915 }
916 spin_unlock(&delayed_refs->lock);
885 ret = __add_delayed_refs(head, time_seq, 917 ret = __add_delayed_refs(head, time_seq,
886 &prefs_delayed); 918 &prefs_delayed);
887 mutex_unlock(&head->mutex); 919 mutex_unlock(&head->mutex);
888 if (ret) { 920 if (ret)
889 spin_unlock(&delayed_refs->lock);
890 goto out; 921 goto out;
891 } 922 } else {
923 spin_unlock(&delayed_refs->lock);
892 } 924 }
893 spin_unlock(&delayed_refs->lock);
894 } 925 }
895 926
896 if (path->slots[0]) { 927 if (path->slots[0]) {
@@ -941,7 +972,6 @@ again:
941 goto out; 972 goto out;
942 } 973 }
943 if (ref->count && ref->parent) { 974 if (ref->count && ref->parent) {
944 struct extent_inode_elem *eie = NULL;
945 if (extent_item_pos && !ref->inode_list) { 975 if (extent_item_pos && !ref->inode_list) {
946 u32 bsz; 976 u32 bsz;
947 struct extent_buffer *eb; 977 struct extent_buffer *eb;
@@ -976,6 +1006,7 @@ again:
976 eie = eie->next; 1006 eie = eie->next;
977 eie->next = ref->inode_list; 1007 eie->next = ref->inode_list;
978 } 1008 }
1009 eie = NULL;
979 } 1010 }
980 list_del(&ref->list); 1011 list_del(&ref->list);
981 kmem_cache_free(btrfs_prelim_ref_cache, ref); 1012 kmem_cache_free(btrfs_prelim_ref_cache, ref);
@@ -994,7 +1025,8 @@ out:
994 list_del(&ref->list); 1025 list_del(&ref->list);
995 kmem_cache_free(btrfs_prelim_ref_cache, ref); 1026 kmem_cache_free(btrfs_prelim_ref_cache, ref);
996 } 1027 }
997 1028 if (ret < 0)
1029 free_inode_elem_list(eie);
998 return ret; 1030 return ret;
999} 1031}
1000 1032
@@ -1002,7 +1034,6 @@ static void free_leaf_list(struct ulist *blocks)
1002{ 1034{
1003 struct ulist_node *node = NULL; 1035 struct ulist_node *node = NULL;
1004 struct extent_inode_elem *eie; 1036 struct extent_inode_elem *eie;
1005 struct extent_inode_elem *eie_next;
1006 struct ulist_iterator uiter; 1037 struct ulist_iterator uiter;
1007 1038
1008 ULIST_ITER_INIT(&uiter); 1039 ULIST_ITER_INIT(&uiter);
@@ -1010,10 +1041,7 @@ static void free_leaf_list(struct ulist *blocks)
1010 if (!node->aux) 1041 if (!node->aux)
1011 continue; 1042 continue;
1012 eie = (struct extent_inode_elem *)(uintptr_t)node->aux; 1043 eie = (struct extent_inode_elem *)(uintptr_t)node->aux;
1013 for (; eie; eie = eie_next) { 1044 free_inode_elem_list(eie);
1014 eie_next = eie->next;
1015 kfree(eie);
1016 }
1017 node->aux = 0; 1045 node->aux = 0;
1018 } 1046 }
1019 1047
@@ -1101,44 +1129,13 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1101 if (!node) 1129 if (!node)
1102 break; 1130 break;
1103 bytenr = node->val; 1131 bytenr = node->val;
1132 cond_resched();
1104 } 1133 }
1105 1134
1106 ulist_free(tmp); 1135 ulist_free(tmp);
1107 return 0; 1136 return 0;
1108} 1137}
1109 1138
1110
1111static int __inode_info(u64 inum, u64 ioff, u8 key_type,
1112 struct btrfs_root *fs_root, struct btrfs_path *path,
1113 struct btrfs_key *found_key)
1114{
1115 int ret;
1116 struct btrfs_key key;
1117 struct extent_buffer *eb;
1118
1119 key.type = key_type;
1120 key.objectid = inum;
1121 key.offset = ioff;
1122
1123 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
1124 if (ret < 0)
1125 return ret;
1126
1127 eb = path->nodes[0];
1128 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
1129 ret = btrfs_next_leaf(fs_root, path);
1130 if (ret)
1131 return ret;
1132 eb = path->nodes[0];
1133 }
1134
1135 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
1136 if (found_key->type != key.type || found_key->objectid != key.objectid)
1137 return 1;
1138
1139 return 0;
1140}
1141
1142/* 1139/*
1143 * this makes the path point to (inum INODE_ITEM ioff) 1140 * this makes the path point to (inum INODE_ITEM ioff)
1144 */ 1141 */
@@ -1146,16 +1143,16 @@ int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
1146 struct btrfs_path *path) 1143 struct btrfs_path *path)
1147{ 1144{
1148 struct btrfs_key key; 1145 struct btrfs_key key;
1149 return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path, 1146 return btrfs_find_item(fs_root, path, inum, ioff,
1150 &key); 1147 BTRFS_INODE_ITEM_KEY, &key);
1151} 1148}
1152 1149
1153static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, 1150static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
1154 struct btrfs_path *path, 1151 struct btrfs_path *path,
1155 struct btrfs_key *found_key) 1152 struct btrfs_key *found_key)
1156{ 1153{
1157 return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path, 1154 return btrfs_find_item(fs_root, path, inum, ioff,
1158 found_key); 1155 BTRFS_INODE_REF_KEY, found_key);
1159} 1156}
1160 1157
1161int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, 1158int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
@@ -1335,20 +1332,45 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
1335 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 1332 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
1336 if (ret < 0) 1333 if (ret < 0)
1337 return ret; 1334 return ret;
1338 ret = btrfs_previous_item(fs_info->extent_root, path,
1339 0, BTRFS_EXTENT_ITEM_KEY);
1340 if (ret < 0)
1341 return ret;
1342 1335
1343 btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); 1336 while (1) {
1337 u32 nritems;
1338 if (path->slots[0] == 0) {
1339 btrfs_set_path_blocking(path);
1340 ret = btrfs_prev_leaf(fs_info->extent_root, path);
1341 if (ret != 0) {
1342 if (ret > 0) {
1343 pr_debug("logical %llu is not within "
1344 "any extent\n", logical);
1345 ret = -ENOENT;
1346 }
1347 return ret;
1348 }
1349 } else {
1350 path->slots[0]--;
1351 }
1352 nritems = btrfs_header_nritems(path->nodes[0]);
1353 if (nritems == 0) {
1354 pr_debug("logical %llu is not within any extent\n",
1355 logical);
1356 return -ENOENT;
1357 }
1358 if (path->slots[0] == nritems)
1359 path->slots[0]--;
1360
1361 btrfs_item_key_to_cpu(path->nodes[0], found_key,
1362 path->slots[0]);
1363 if (found_key->type == BTRFS_EXTENT_ITEM_KEY ||
1364 found_key->type == BTRFS_METADATA_ITEM_KEY)
1365 break;
1366 }
1367
1344 if (found_key->type == BTRFS_METADATA_ITEM_KEY) 1368 if (found_key->type == BTRFS_METADATA_ITEM_KEY)
1345 size = fs_info->extent_root->leafsize; 1369 size = fs_info->extent_root->leafsize;
1346 else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) 1370 else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
1347 size = found_key->offset; 1371 size = found_key->offset;
1348 1372
1349 if ((found_key->type != BTRFS_EXTENT_ITEM_KEY && 1373 if (found_key->objectid > logical ||
1350 found_key->type != BTRFS_METADATA_ITEM_KEY) ||
1351 found_key->objectid > logical ||
1352 found_key->objectid + size <= logical) { 1374 found_key->objectid + size <= logical) {
1353 pr_debug("logical %llu is not within any extent\n", logical); 1375 pr_debug("logical %llu is not within any extent\n", logical);
1354 return -ENOENT; 1376 return -ENOENT;
@@ -1601,7 +1623,6 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
1601 struct btrfs_key found_key; 1623 struct btrfs_key found_key;
1602 1624
1603 while (!ret) { 1625 while (!ret) {
1604 path->leave_spinning = 1;
1605 ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, 1626 ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
1606 &found_key); 1627 &found_key);
1607 if (ret < 0) 1628 if (ret < 0)
@@ -1614,9 +1635,12 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
1614 1635
1615 parent = found_key.offset; 1636 parent = found_key.offset;
1616 slot = path->slots[0]; 1637 slot = path->slots[0];
1617 eb = path->nodes[0]; 1638 eb = btrfs_clone_extent_buffer(path->nodes[0]);
1618 /* make sure we can use eb after releasing the path */ 1639 if (!eb) {
1619 atomic_inc(&eb->refs); 1640 ret = -ENOMEM;
1641 break;
1642 }
1643 extent_buffer_get(eb);
1620 btrfs_tree_read_lock(eb); 1644 btrfs_tree_read_lock(eb);
1621 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1645 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1622 btrfs_release_path(path); 1646 btrfs_release_path(path);
@@ -1674,17 +1698,20 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
1674 ++found; 1698 ++found;
1675 1699
1676 slot = path->slots[0]; 1700 slot = path->slots[0];
1677 eb = path->nodes[0]; 1701 eb = btrfs_clone_extent_buffer(path->nodes[0]);
1678 /* make sure we can use eb after releasing the path */ 1702 if (!eb) {
1679 atomic_inc(&eb->refs); 1703 ret = -ENOMEM;
1704 break;
1705 }
1706 extent_buffer_get(eb);
1680 1707
1681 btrfs_tree_read_lock(eb); 1708 btrfs_tree_read_lock(eb);
1682 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1709 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1683 btrfs_release_path(path); 1710 btrfs_release_path(path);
1684 1711
1685 leaf = path->nodes[0]; 1712 leaf = path->nodes[0];
1686 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1713 item_size = btrfs_item_size_nr(leaf, slot);
1687 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1714 ptr = btrfs_item_ptr_offset(leaf, slot);
1688 cur_offset = 0; 1715 cur_offset = 0;
1689 1716
1690 while (cur_offset < item_size) { 1717 while (cur_offset < item_size) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ac0b39db27d1..8fed2125689e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -43,6 +43,7 @@
43#define BTRFS_INODE_COPY_EVERYTHING 8 43#define BTRFS_INODE_COPY_EVERYTHING 8
44#define BTRFS_INODE_IN_DELALLOC_LIST 9 44#define BTRFS_INODE_IN_DELALLOC_LIST 9
45#define BTRFS_INODE_READDIO_NEED_LOCK 10 45#define BTRFS_INODE_READDIO_NEED_LOCK 10
46#define BTRFS_INODE_HAS_PROPS 11
46 47
47/* in memory btrfs inode */ 48/* in memory btrfs inode */
48struct btrfs_inode { 49struct btrfs_inode {
@@ -135,6 +136,9 @@ struct btrfs_inode {
135 */ 136 */
136 u64 index_cnt; 137 u64 index_cnt;
137 138
139 /* Cache the directory index number to speed the dir/file remove */
140 u64 dir_index;
141
138 /* the fsync log has some corner cases that mean we have to check 142 /* the fsync log has some corner cases that mean we have to check
139 * directories to see if any unlinks have been done before 143 * directories to see if any unlinks have been done before
140 * the directory was logged. See tree-log.c for all the 144 * the directory was logged. See tree-log.c for all the
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index cb05e1c842c5..49a62b4dda3b 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1456,10 +1456,14 @@ static int btrfsic_handle_extent_data(
1456 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1456 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1457 file_extent_item_offset, 1457 file_extent_item_offset,
1458 sizeof(struct btrfs_file_extent_item)); 1458 sizeof(struct btrfs_file_extent_item));
1459 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) + 1459 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
1460 btrfs_stack_file_extent_offset(&file_extent_item); 1460 if (btrfs_stack_file_extent_compression(&file_extent_item) ==
1461 generation = btrfs_stack_file_extent_generation(&file_extent_item); 1461 BTRFS_COMPRESS_NONE) {
1462 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item); 1462 next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
1463 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1464 } else {
1465 num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
1466 }
1463 generation = btrfs_stack_file_extent_generation(&file_extent_item); 1467 generation = btrfs_stack_file_extent_generation(&file_extent_item);
1464 1468
1465 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1469 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f5cdeb4b5538..e2600cdb6c25 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -128,11 +128,10 @@ static int check_compressed_csum(struct inode *inode,
128 kunmap_atomic(kaddr); 128 kunmap_atomic(kaddr);
129 129
130 if (csum != *cb_sum) { 130 if (csum != *cb_sum) {
131 printk(KERN_INFO "btrfs csum failed ino %llu " 131 btrfs_info(BTRFS_I(inode)->root->fs_info,
132 "extent %llu csum %u " 132 "csum failed ino %llu extent %llu csum %u wanted %u mirror %d",
133 "wanted %u mirror %d\n", 133 btrfs_ino(inode), disk_start, csum, *cb_sum,
134 btrfs_ino(inode), disk_start, csum, *cb_sum, 134 cb->mirror_num);
135 cb->mirror_num);
136 ret = -EIO; 135 ret = -EIO;
137 goto fail; 136 goto fail;
138 } 137 }
@@ -411,7 +410,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
411 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); 410 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
412 } 411 }
413 if (bytes_left < PAGE_CACHE_SIZE) { 412 if (bytes_left < PAGE_CACHE_SIZE) {
414 printk("bytes left %lu compress len %lu nr %lu\n", 413 btrfs_info(BTRFS_I(inode)->root->fs_info,
414 "bytes left %lu compress len %lu nr %lu",
415 bytes_left, cb->compressed_len, cb->nr_pages); 415 bytes_left, cb->compressed_len, cb->nr_pages);
416 } 416 }
417 bytes_left -= PAGE_CACHE_SIZE; 417 bytes_left -= PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 316136bd6dd7..cbd3a7d6fa68 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -39,9 +39,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
39 struct extent_buffer *src_buf); 39 struct extent_buffer *src_buf);
40static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, 40static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
41 int level, int slot); 41 int level, int slot);
42static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, 42static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
43 struct extent_buffer *eb); 43 struct extent_buffer *eb);
44static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
45 44
46struct btrfs_path *btrfs_alloc_path(void) 45struct btrfs_path *btrfs_alloc_path(void)
47{ 46{
@@ -475,6 +474,8 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
475 * the index is the shifted logical of the *new* root node for root replace 474 * the index is the shifted logical of the *new* root node for root replace
476 * operations, or the shifted logical of the affected block for all other 475 * operations, or the shifted logical of the affected block for all other
477 * operations. 476 * operations.
477 *
478 * Note: must be called with write lock (tree_mod_log_write_lock).
478 */ 479 */
479static noinline int 480static noinline int
480__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) 481__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -483,24 +484,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
483 struct rb_node **new; 484 struct rb_node **new;
484 struct rb_node *parent = NULL; 485 struct rb_node *parent = NULL;
485 struct tree_mod_elem *cur; 486 struct tree_mod_elem *cur;
486 int ret = 0;
487 487
488 BUG_ON(!tm); 488 BUG_ON(!tm);
489 489
490 tree_mod_log_write_lock(fs_info);
491 if (list_empty(&fs_info->tree_mod_seq_list)) {
492 tree_mod_log_write_unlock(fs_info);
493 /*
494 * Ok we no longer care about logging modifications, free up tm
495 * and return 0. Any callers shouldn't be using tm after
496 * calling tree_mod_log_insert, but if they do we can just
497 * change this to return a special error code to let the callers
498 * do their own thing.
499 */
500 kfree(tm);
501 return 0;
502 }
503
504 spin_lock(&fs_info->tree_mod_seq_lock); 490 spin_lock(&fs_info->tree_mod_seq_lock);
505 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); 491 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
506 spin_unlock(&fs_info->tree_mod_seq_lock); 492 spin_unlock(&fs_info->tree_mod_seq_lock);
@@ -518,18 +504,13 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
518 new = &((*new)->rb_left); 504 new = &((*new)->rb_left);
519 else if (cur->seq > tm->seq) 505 else if (cur->seq > tm->seq)
520 new = &((*new)->rb_right); 506 new = &((*new)->rb_right);
521 else { 507 else
522 ret = -EEXIST; 508 return -EEXIST;
523 kfree(tm);
524 goto out;
525 }
526 } 509 }
527 510
528 rb_link_node(&tm->node, parent, new); 511 rb_link_node(&tm->node, parent, new);
529 rb_insert_color(&tm->node, tm_root); 512 rb_insert_color(&tm->node, tm_root);
530out: 513 return 0;
531 tree_mod_log_write_unlock(fs_info);
532 return ret;
533} 514}
534 515
535/* 516/*
@@ -545,19 +526,38 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
545 return 1; 526 return 1;
546 if (eb && btrfs_header_level(eb) == 0) 527 if (eb && btrfs_header_level(eb) == 0)
547 return 1; 528 return 1;
529
530 tree_mod_log_write_lock(fs_info);
531 if (list_empty(&(fs_info)->tree_mod_seq_list)) {
532 tree_mod_log_write_unlock(fs_info);
533 return 1;
534 }
535
548 return 0; 536 return 0;
549} 537}
550 538
551static inline int 539/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
552__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, 540static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
553 struct extent_buffer *eb, int slot, 541 struct extent_buffer *eb)
554 enum mod_log_op op, gfp_t flags) 542{
543 smp_mb();
544 if (list_empty(&(fs_info)->tree_mod_seq_list))
545 return 0;
546 if (eb && btrfs_header_level(eb) == 0)
547 return 0;
548
549 return 1;
550}
551
552static struct tree_mod_elem *
553alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
554 enum mod_log_op op, gfp_t flags)
555{ 555{
556 struct tree_mod_elem *tm; 556 struct tree_mod_elem *tm;
557 557
558 tm = kzalloc(sizeof(*tm), flags); 558 tm = kzalloc(sizeof(*tm), flags);
559 if (!tm) 559 if (!tm)
560 return -ENOMEM; 560 return NULL;
561 561
562 tm->index = eb->start >> PAGE_CACHE_SHIFT; 562 tm->index = eb->start >> PAGE_CACHE_SHIFT;
563 if (op != MOD_LOG_KEY_ADD) { 563 if (op != MOD_LOG_KEY_ADD) {
@@ -567,8 +567,9 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
567 tm->op = op; 567 tm->op = op;
568 tm->slot = slot; 568 tm->slot = slot;
569 tm->generation = btrfs_node_ptr_generation(eb, slot); 569 tm->generation = btrfs_node_ptr_generation(eb, slot);
570 RB_CLEAR_NODE(&tm->node);
570 571
571 return __tree_mod_log_insert(fs_info, tm); 572 return tm;
572} 573}
573 574
574static noinline int 575static noinline int
@@ -576,10 +577,27 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
576 struct extent_buffer *eb, int slot, 577 struct extent_buffer *eb, int slot,
577 enum mod_log_op op, gfp_t flags) 578 enum mod_log_op op, gfp_t flags)
578{ 579{
579 if (tree_mod_dont_log(fs_info, eb)) 580 struct tree_mod_elem *tm;
581 int ret;
582
583 if (!tree_mod_need_log(fs_info, eb))
580 return 0; 584 return 0;
581 585
582 return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); 586 tm = alloc_tree_mod_elem(eb, slot, op, flags);
587 if (!tm)
588 return -ENOMEM;
589
590 if (tree_mod_dont_log(fs_info, eb)) {
591 kfree(tm);
592 return 0;
593 }
594
595 ret = __tree_mod_log_insert(fs_info, tm);
596 tree_mod_log_write_unlock(fs_info);
597 if (ret)
598 kfree(tm);
599
600 return ret;
583} 601}
584 602
585static noinline int 603static noinline int
@@ -587,53 +605,95 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
587 struct extent_buffer *eb, int dst_slot, int src_slot, 605 struct extent_buffer *eb, int dst_slot, int src_slot,
588 int nr_items, gfp_t flags) 606 int nr_items, gfp_t flags)
589{ 607{
590 struct tree_mod_elem *tm; 608 struct tree_mod_elem *tm = NULL;
591 int ret; 609 struct tree_mod_elem **tm_list = NULL;
610 int ret = 0;
592 int i; 611 int i;
612 int locked = 0;
593 613
594 if (tree_mod_dont_log(fs_info, eb)) 614 if (!tree_mod_need_log(fs_info, eb))
595 return 0; 615 return 0;
596 616
617 tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
618 if (!tm_list)
619 return -ENOMEM;
620
621 tm = kzalloc(sizeof(*tm), flags);
622 if (!tm) {
623 ret = -ENOMEM;
624 goto free_tms;
625 }
626
627 tm->index = eb->start >> PAGE_CACHE_SHIFT;
628 tm->slot = src_slot;
629 tm->move.dst_slot = dst_slot;
630 tm->move.nr_items = nr_items;
631 tm->op = MOD_LOG_MOVE_KEYS;
632
633 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
634 tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
635 MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
636 if (!tm_list[i]) {
637 ret = -ENOMEM;
638 goto free_tms;
639 }
640 }
641
642 if (tree_mod_dont_log(fs_info, eb))
643 goto free_tms;
644 locked = 1;
645
597 /* 646 /*
598 * When we override something during the move, we log these removals. 647 * When we override something during the move, we log these removals.
599 * This can only happen when we move towards the beginning of the 648 * This can only happen when we move towards the beginning of the
600 * buffer, i.e. dst_slot < src_slot. 649 * buffer, i.e. dst_slot < src_slot.
601 */ 650 */
602 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 651 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
603 ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot, 652 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
604 MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); 653 if (ret)
605 BUG_ON(ret < 0); 654 goto free_tms;
606 } 655 }
607 656
608 tm = kzalloc(sizeof(*tm), flags); 657 ret = __tree_mod_log_insert(fs_info, tm);
609 if (!tm) 658 if (ret)
610 return -ENOMEM; 659 goto free_tms;
660 tree_mod_log_write_unlock(fs_info);
661 kfree(tm_list);
611 662
612 tm->index = eb->start >> PAGE_CACHE_SHIFT; 663 return 0;
613 tm->slot = src_slot; 664free_tms:
614 tm->move.dst_slot = dst_slot; 665 for (i = 0; i < nr_items; i++) {
615 tm->move.nr_items = nr_items; 666 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
616 tm->op = MOD_LOG_MOVE_KEYS; 667 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
668 kfree(tm_list[i]);
669 }
670 if (locked)
671 tree_mod_log_write_unlock(fs_info);
672 kfree(tm_list);
673 kfree(tm);
617 674
618 return __tree_mod_log_insert(fs_info, tm); 675 return ret;
619} 676}
620 677
621static inline void 678static inline int
622__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) 679__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
680 struct tree_mod_elem **tm_list,
681 int nritems)
623{ 682{
624 int i; 683 int i, j;
625 u32 nritems;
626 int ret; 684 int ret;
627 685
628 if (btrfs_header_level(eb) == 0)
629 return;
630
631 nritems = btrfs_header_nritems(eb);
632 for (i = nritems - 1; i >= 0; i--) { 686 for (i = nritems - 1; i >= 0; i--) {
633 ret = __tree_mod_log_insert_key(fs_info, eb, i, 687 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
634 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); 688 if (ret) {
635 BUG_ON(ret < 0); 689 for (j = nritems - 1; j > i; j--)
690 rb_erase(&tm_list[j]->node,
691 &fs_info->tree_mod_log);
692 return ret;
693 }
636 } 694 }
695
696 return 0;
637} 697}
638 698
639static noinline int 699static noinline int
@@ -642,17 +702,38 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
642 struct extent_buffer *new_root, gfp_t flags, 702 struct extent_buffer *new_root, gfp_t flags,
643 int log_removal) 703 int log_removal)
644{ 704{
645 struct tree_mod_elem *tm; 705 struct tree_mod_elem *tm = NULL;
706 struct tree_mod_elem **tm_list = NULL;
707 int nritems = 0;
708 int ret = 0;
709 int i;
646 710
647 if (tree_mod_dont_log(fs_info, NULL)) 711 if (!tree_mod_need_log(fs_info, NULL))
648 return 0; 712 return 0;
649 713
650 if (log_removal) 714 if (log_removal && btrfs_header_level(old_root) > 0) {
651 __tree_mod_log_free_eb(fs_info, old_root); 715 nritems = btrfs_header_nritems(old_root);
716 tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
717 flags);
718 if (!tm_list) {
719 ret = -ENOMEM;
720 goto free_tms;
721 }
722 for (i = 0; i < nritems; i++) {
723 tm_list[i] = alloc_tree_mod_elem(old_root, i,
724 MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
725 if (!tm_list[i]) {
726 ret = -ENOMEM;
727 goto free_tms;
728 }
729 }
730 }
652 731
653 tm = kzalloc(sizeof(*tm), flags); 732 tm = kzalloc(sizeof(*tm), flags);
654 if (!tm) 733 if (!tm) {
655 return -ENOMEM; 734 ret = -ENOMEM;
735 goto free_tms;
736 }
656 737
657 tm->index = new_root->start >> PAGE_CACHE_SHIFT; 738 tm->index = new_root->start >> PAGE_CACHE_SHIFT;
658 tm->old_root.logical = old_root->start; 739 tm->old_root.logical = old_root->start;
@@ -660,7 +741,30 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
660 tm->generation = btrfs_header_generation(old_root); 741 tm->generation = btrfs_header_generation(old_root);
661 tm->op = MOD_LOG_ROOT_REPLACE; 742 tm->op = MOD_LOG_ROOT_REPLACE;
662 743
663 return __tree_mod_log_insert(fs_info, tm); 744 if (tree_mod_dont_log(fs_info, NULL))
745 goto free_tms;
746
747 if (tm_list)
748 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
749 if (!ret)
750 ret = __tree_mod_log_insert(fs_info, tm);
751
752 tree_mod_log_write_unlock(fs_info);
753 if (ret)
754 goto free_tms;
755 kfree(tm_list);
756
757 return ret;
758
759free_tms:
760 if (tm_list) {
761 for (i = 0; i < nritems; i++)
762 kfree(tm_list[i]);
763 kfree(tm_list);
764 }
765 kfree(tm);
766
767 return ret;
664} 768}
665 769
666static struct tree_mod_elem * 770static struct tree_mod_elem *
@@ -729,31 +833,75 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
729 return __tree_mod_log_search(fs_info, start, min_seq, 0); 833 return __tree_mod_log_search(fs_info, start, min_seq, 0);
730} 834}
731 835
732static noinline void 836static noinline int
733tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, 837tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
734 struct extent_buffer *src, unsigned long dst_offset, 838 struct extent_buffer *src, unsigned long dst_offset,
735 unsigned long src_offset, int nr_items) 839 unsigned long src_offset, int nr_items)
736{ 840{
737 int ret; 841 int ret = 0;
842 struct tree_mod_elem **tm_list = NULL;
843 struct tree_mod_elem **tm_list_add, **tm_list_rem;
738 int i; 844 int i;
845 int locked = 0;
739 846
740 if (tree_mod_dont_log(fs_info, NULL)) 847 if (!tree_mod_need_log(fs_info, NULL))
741 return; 848 return 0;
742 849
743 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) 850 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
744 return; 851 return 0;
852
853 tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
854 GFP_NOFS);
855 if (!tm_list)
856 return -ENOMEM;
745 857
858 tm_list_add = tm_list;
859 tm_list_rem = tm_list + nr_items;
746 for (i = 0; i < nr_items; i++) { 860 for (i = 0; i < nr_items; i++) {
747 ret = __tree_mod_log_insert_key(fs_info, src, 861 tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
748 i + src_offset, 862 MOD_LOG_KEY_REMOVE, GFP_NOFS);
749 MOD_LOG_KEY_REMOVE, GFP_NOFS); 863 if (!tm_list_rem[i]) {
750 BUG_ON(ret < 0); 864 ret = -ENOMEM;
751 ret = __tree_mod_log_insert_key(fs_info, dst, 865 goto free_tms;
752 i + dst_offset, 866 }
753 MOD_LOG_KEY_ADD, 867
754 GFP_NOFS); 868 tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
755 BUG_ON(ret < 0); 869 MOD_LOG_KEY_ADD, GFP_NOFS);
870 if (!tm_list_add[i]) {
871 ret = -ENOMEM;
872 goto free_tms;
873 }
756 } 874 }
875
876 if (tree_mod_dont_log(fs_info, NULL))
877 goto free_tms;
878 locked = 1;
879
880 for (i = 0; i < nr_items; i++) {
881 ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
882 if (ret)
883 goto free_tms;
884 ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
885 if (ret)
886 goto free_tms;
887 }
888
889 tree_mod_log_write_unlock(fs_info);
890 kfree(tm_list);
891
892 return 0;
893
894free_tms:
895 for (i = 0; i < nr_items * 2; i++) {
896 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
897 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
898 kfree(tm_list[i]);
899 }
900 if (locked)
901 tree_mod_log_write_unlock(fs_info);
902 kfree(tm_list);
903
904 return ret;
757} 905}
758 906
759static inline void 907static inline void
@@ -772,18 +920,58 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
772{ 920{
773 int ret; 921 int ret;
774 922
775 ret = __tree_mod_log_insert_key(fs_info, eb, slot, 923 ret = tree_mod_log_insert_key(fs_info, eb, slot,
776 MOD_LOG_KEY_REPLACE, 924 MOD_LOG_KEY_REPLACE,
777 atomic ? GFP_ATOMIC : GFP_NOFS); 925 atomic ? GFP_ATOMIC : GFP_NOFS);
778 BUG_ON(ret < 0); 926 BUG_ON(ret < 0);
779} 927}
780 928
781static noinline void 929static noinline int
782tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) 930tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
783{ 931{
932 struct tree_mod_elem **tm_list = NULL;
933 int nritems = 0;
934 int i;
935 int ret = 0;
936
937 if (btrfs_header_level(eb) == 0)
938 return 0;
939
940 if (!tree_mod_need_log(fs_info, NULL))
941 return 0;
942
943 nritems = btrfs_header_nritems(eb);
944 tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
945 GFP_NOFS);
946 if (!tm_list)
947 return -ENOMEM;
948
949 for (i = 0; i < nritems; i++) {
950 tm_list[i] = alloc_tree_mod_elem(eb, i,
951 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
952 if (!tm_list[i]) {
953 ret = -ENOMEM;
954 goto free_tms;
955 }
956 }
957
784 if (tree_mod_dont_log(fs_info, eb)) 958 if (tree_mod_dont_log(fs_info, eb))
785 return; 959 goto free_tms;
786 __tree_mod_log_free_eb(fs_info, eb); 960
961 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
962 tree_mod_log_write_unlock(fs_info);
963 if (ret)
964 goto free_tms;
965 kfree(tm_list);
966
967 return 0;
968
969free_tms:
970 for (i = 0; i < nritems; i++)
971 kfree(tm_list[i]);
972 kfree(tm_list);
973
974 return ret;
787} 975}
788 976
789static noinline void 977static noinline void
@@ -1041,8 +1229,13 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1041 btrfs_set_node_ptr_generation(parent, parent_slot, 1229 btrfs_set_node_ptr_generation(parent, parent_slot,
1042 trans->transid); 1230 trans->transid);
1043 btrfs_mark_buffer_dirty(parent); 1231 btrfs_mark_buffer_dirty(parent);
1044 if (last_ref) 1232 if (last_ref) {
1045 tree_mod_log_free_eb(root->fs_info, buf); 1233 ret = tree_mod_log_free_eb(root->fs_info, buf);
1234 if (ret) {
1235 btrfs_abort_transaction(trans, root, ret);
1236 return ret;
1237 }
1238 }
1046 btrfs_free_tree_block(trans, root, buf, parent_start, 1239 btrfs_free_tree_block(trans, root, buf, parent_start,
1047 last_ref); 1240 last_ref);
1048 } 1241 }
@@ -1287,8 +1480,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1287 old = read_tree_block(root, logical, blocksize, 0); 1480 old = read_tree_block(root, logical, blocksize, 0);
1288 if (WARN_ON(!old || !extent_buffer_uptodate(old))) { 1481 if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
1289 free_extent_buffer(old); 1482 free_extent_buffer(old);
1290 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", 1483 btrfs_warn(root->fs_info,
1291 logical); 1484 "failed to read tree block %llu from get_old_root", logical);
1292 } else { 1485 } else {
1293 eb = btrfs_clone_extent_buffer(old); 1486 eb = btrfs_clone_extent_buffer(old);
1294 free_extent_buffer(old); 1487 free_extent_buffer(old);
@@ -2462,6 +2655,49 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
2462 return 0; 2655 return 0;
2463} 2656}
2464 2657
2658int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
2659 u64 iobjectid, u64 ioff, u8 key_type,
2660 struct btrfs_key *found_key)
2661{
2662 int ret;
2663 struct btrfs_key key;
2664 struct extent_buffer *eb;
2665 struct btrfs_path *path;
2666
2667 key.type = key_type;
2668 key.objectid = iobjectid;
2669 key.offset = ioff;
2670
2671 if (found_path == NULL) {
2672 path = btrfs_alloc_path();
2673 if (!path)
2674 return -ENOMEM;
2675 } else
2676 path = found_path;
2677
2678 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
2679 if ((ret < 0) || (found_key == NULL)) {
2680 if (path != found_path)
2681 btrfs_free_path(path);
2682 return ret;
2683 }
2684
2685 eb = path->nodes[0];
2686 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
2687 ret = btrfs_next_leaf(fs_root, path);
2688 if (ret)
2689 return ret;
2690 eb = path->nodes[0];
2691 }
2692
2693 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
2694 if (found_key->type != key.type ||
2695 found_key->objectid != key.objectid)
2696 return 1;
2697
2698 return 0;
2699}
2700
2465/* 2701/*
2466 * look for key in the tree. path is filled in with nodes along the way 2702 * look for key in the tree. path is filled in with nodes along the way
2467 * if key is found, we return zero and you can find the item in the leaf 2703 * if key is found, we return zero and you can find the item in the leaf
@@ -2495,6 +2731,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2495 lowest_level = p->lowest_level; 2731 lowest_level = p->lowest_level;
2496 WARN_ON(lowest_level && ins_len > 0); 2732 WARN_ON(lowest_level && ins_len > 0);
2497 WARN_ON(p->nodes[0] != NULL); 2733 WARN_ON(p->nodes[0] != NULL);
2734 BUG_ON(!cow && ins_len);
2498 2735
2499 if (ins_len < 0) { 2736 if (ins_len < 0) {
2500 lowest_unlock = 2; 2737 lowest_unlock = 2;
@@ -2603,8 +2840,6 @@ again:
2603 } 2840 }
2604 } 2841 }
2605cow_done: 2842cow_done:
2606 BUG_ON(!cow && ins_len);
2607
2608 p->nodes[level] = b; 2843 p->nodes[level] = b;
2609 btrfs_clear_path_blocking(p, NULL, 0); 2844 btrfs_clear_path_blocking(p, NULL, 0);
2610 2845
@@ -2614,13 +2849,19 @@ cow_done:
2614 * It is safe to drop the lock on our parent before we 2849 * It is safe to drop the lock on our parent before we
2615 * go through the expensive btree search on b. 2850 * go through the expensive btree search on b.
2616 * 2851 *
2617 * If cow is true, then we might be changing slot zero, 2852 * If we're inserting or deleting (ins_len != 0), then we might
2618 * which may require changing the parent. So, we can't 2853 * be changing slot zero, which may require changing the parent.
2619 * drop the lock until after we know which slot we're 2854 * So, we can't drop the lock until after we know which slot
2620 * operating on. 2855 * we're operating on.
2621 */ 2856 */
2622 if (!cow) 2857 if (!ins_len && !p->keep_locks) {
2623 btrfs_unlock_up_safe(p, level + 1); 2858 int u = level + 1;
2859
2860 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
2861 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
2862 p->locks[u] = 0;
2863 }
2864 }
2624 2865
2625 ret = key_search(b, key, level, &prev_cmp, &slot); 2866 ret = key_search(b, key, level, &prev_cmp, &slot);
2626 2867
@@ -2648,7 +2889,7 @@ cow_done:
2648 * which means we must have a write lock 2889 * which means we must have a write lock
2649 * on the parent 2890 * on the parent
2650 */ 2891 */
2651 if (slot == 0 && cow && 2892 if (slot == 0 && ins_len &&
2652 write_lock_level < level + 1) { 2893 write_lock_level < level + 1) {
2653 write_lock_level = level + 1; 2894 write_lock_level = level + 1;
2654 btrfs_release_path(p); 2895 btrfs_release_path(p);
@@ -2901,7 +3142,9 @@ again:
2901 if (ret < 0) 3142 if (ret < 0)
2902 return ret; 3143 return ret;
2903 if (!ret) { 3144 if (!ret) {
2904 p->slots[0] = btrfs_header_nritems(leaf) - 1; 3145 leaf = p->nodes[0];
3146 if (p->slots[0] == btrfs_header_nritems(leaf))
3147 p->slots[0]--;
2905 return 0; 3148 return 0;
2906 } 3149 }
2907 if (!return_any) 3150 if (!return_any)
@@ -3022,8 +3265,12 @@ static int push_node_left(struct btrfs_trans_handle *trans,
3022 } else 3265 } else
3023 push_items = min(src_nritems - 8, push_items); 3266 push_items = min(src_nritems - 8, push_items);
3024 3267
3025 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, 3268 ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
3026 push_items); 3269 push_items);
3270 if (ret) {
3271 btrfs_abort_transaction(trans, root, ret);
3272 return ret;
3273 }
3027 copy_extent_buffer(dst, src, 3274 copy_extent_buffer(dst, src,
3028 btrfs_node_key_ptr_offset(dst_nritems), 3275 btrfs_node_key_ptr_offset(dst_nritems),
3029 btrfs_node_key_ptr_offset(0), 3276 btrfs_node_key_ptr_offset(0),
@@ -3093,8 +3340,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
3093 (dst_nritems) * 3340 (dst_nritems) *
3094 sizeof(struct btrfs_key_ptr)); 3341 sizeof(struct btrfs_key_ptr));
3095 3342
3096 tree_mod_log_eb_copy(root->fs_info, dst, src, 0, 3343 ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
3097 src_nritems - push_items, push_items); 3344 src_nritems - push_items, push_items);
3345 if (ret) {
3346 btrfs_abort_transaction(trans, root, ret);
3347 return ret;
3348 }
3098 copy_extent_buffer(dst, src, 3349 copy_extent_buffer(dst, src,
3099 btrfs_node_key_ptr_offset(0), 3350 btrfs_node_key_ptr_offset(0),
3100 btrfs_node_key_ptr_offset(src_nritems - push_items), 3351 btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3295,7 +3546,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3295 btrfs_header_chunk_tree_uuid(split), 3546 btrfs_header_chunk_tree_uuid(split),
3296 BTRFS_UUID_SIZE); 3547 BTRFS_UUID_SIZE);
3297 3548
3298 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); 3549 ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
3550 mid, c_nritems - mid);
3551 if (ret) {
3552 btrfs_abort_transaction(trans, root, ret);
3553 return ret;
3554 }
3299 copy_extent_buffer(split, c, 3555 copy_extent_buffer(split, c,
3300 btrfs_node_key_ptr_offset(0), 3556 btrfs_node_key_ptr_offset(0),
3301 btrfs_node_key_ptr_offset(mid), 3557 btrfs_node_key_ptr_offset(mid),
@@ -3362,8 +3618,8 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
3362 int ret; 3618 int ret;
3363 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 3619 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
3364 if (ret < 0) { 3620 if (ret < 0) {
3365 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " 3621 btrfs_crit(root->fs_info,
3366 "used %d nritems %d\n", 3622 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
3367 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), 3623 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
3368 leaf_space_used(leaf, 0, nritems), nritems); 3624 leaf_space_used(leaf, 0, nritems), nritems);
3369 } 3625 }
@@ -3571,6 +3827,19 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3571 if (left_nritems == 0) 3827 if (left_nritems == 0)
3572 goto out_unlock; 3828 goto out_unlock;
3573 3829
3830 if (path->slots[0] == left_nritems && !empty) {
3831 /* Key greater than all keys in the leaf, right neighbor has
3832 * enough room for it and we're not emptying our leaf to delete
3833 * it, therefore use right neighbor to insert the new item and
3834 * no need to touch/dirty our left leaft. */
3835 btrfs_tree_unlock(left);
3836 free_extent_buffer(left);
3837 path->nodes[0] = right;
3838 path->slots[0] = 0;
3839 path->slots[1]++;
3840 return 0;
3841 }
3842
3574 return __push_leaf_right(trans, root, path, min_data_size, empty, 3843 return __push_leaf_right(trans, root, path, min_data_size, empty,
3575 right, free_space, left_nritems, min_slot); 3844 right, free_space, left_nritems, min_slot);
3576out_unlock: 3845out_unlock:
@@ -3887,14 +4156,17 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3887 int progress = 0; 4156 int progress = 0;
3888 int slot; 4157 int slot;
3889 u32 nritems; 4158 u32 nritems;
4159 int space_needed = data_size;
3890 4160
3891 slot = path->slots[0]; 4161 slot = path->slots[0];
4162 if (slot < btrfs_header_nritems(path->nodes[0]))
4163 space_needed -= btrfs_leaf_free_space(root, path->nodes[0]);
3892 4164
3893 /* 4165 /*
3894 * try to push all the items after our slot into the 4166 * try to push all the items after our slot into the
3895 * right leaf 4167 * right leaf
3896 */ 4168 */
3897 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); 4169 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
3898 if (ret < 0) 4170 if (ret < 0)
3899 return ret; 4171 return ret;
3900 4172
@@ -3914,7 +4186,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3914 4186
3915 /* try to push all the items before our slot into the next leaf */ 4187 /* try to push all the items before our slot into the next leaf */
3916 slot = path->slots[0]; 4188 slot = path->slots[0];
3917 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); 4189 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
3918 if (ret < 0) 4190 if (ret < 0)
3919 return ret; 4191 return ret;
3920 4192
@@ -3958,13 +4230,18 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
3958 4230
3959 /* first try to make some room by pushing left and right */ 4231 /* first try to make some room by pushing left and right */
3960 if (data_size && path->nodes[1]) { 4232 if (data_size && path->nodes[1]) {
3961 wret = push_leaf_right(trans, root, path, data_size, 4233 int space_needed = data_size;
3962 data_size, 0, 0); 4234
4235 if (slot < btrfs_header_nritems(l))
4236 space_needed -= btrfs_leaf_free_space(root, l);
4237
4238 wret = push_leaf_right(trans, root, path, space_needed,
4239 space_needed, 0, 0);
3963 if (wret < 0) 4240 if (wret < 0)
3964 return wret; 4241 return wret;
3965 if (wret) { 4242 if (wret) {
3966 wret = push_leaf_left(trans, root, path, data_size, 4243 wret = push_leaf_left(trans, root, path, space_needed,
3967 data_size, 0, (u32)-1); 4244 space_needed, 0, (u32)-1);
3968 if (wret < 0) 4245 if (wret < 0)
3969 return wret; 4246 return wret;
3970 } 4247 }
@@ -4432,7 +4709,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
4432 BUG_ON(slot < 0); 4709 BUG_ON(slot < 0);
4433 if (slot >= nritems) { 4710 if (slot >= nritems) {
4434 btrfs_print_leaf(root, leaf); 4711 btrfs_print_leaf(root, leaf);
4435 printk(KERN_CRIT "slot %d too large, nritems %d\n", 4712 btrfs_crit(root->fs_info, "slot %d too large, nritems %d",
4436 slot, nritems); 4713 slot, nritems);
4437 BUG_ON(1); 4714 BUG_ON(1);
4438 } 4715 }
@@ -4495,7 +4772,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4495 4772
4496 if (btrfs_leaf_free_space(root, leaf) < total_size) { 4773 if (btrfs_leaf_free_space(root, leaf) < total_size) {
4497 btrfs_print_leaf(root, leaf); 4774 btrfs_print_leaf(root, leaf);
4498 printk(KERN_CRIT "not enough freespace need %u have %d\n", 4775 btrfs_crit(root->fs_info, "not enough freespace need %u have %d",
4499 total_size, btrfs_leaf_free_space(root, leaf)); 4776 total_size, btrfs_leaf_free_space(root, leaf));
4500 BUG(); 4777 BUG();
4501 } 4778 }
@@ -4505,7 +4782,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4505 4782
4506 if (old_data < data_end) { 4783 if (old_data < data_end) {
4507 btrfs_print_leaf(root, leaf); 4784 btrfs_print_leaf(root, leaf);
4508 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 4785 btrfs_crit(root->fs_info, "slot %d old_data %d data_end %d",
4509 slot, old_data, data_end); 4786 slot, old_data, data_end);
4510 BUG_ON(1); 4787 BUG_ON(1);
4511 } 4788 }
@@ -4817,7 +5094,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4817 * This may release the path, and so you may lose any locks held at the 5094 * This may release the path, and so you may lose any locks held at the
4818 * time you call it. 5095 * time you call it.
4819 */ 5096 */
4820static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 5097int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4821{ 5098{
4822 struct btrfs_key key; 5099 struct btrfs_key key;
4823 struct btrfs_disk_key found_key; 5100 struct btrfs_disk_key found_key;
@@ -5240,7 +5517,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5240 5517
5241 if (!left_start_ctransid || !right_start_ctransid) { 5518 if (!left_start_ctransid || !right_start_ctransid) {
5242 WARN(1, KERN_WARNING 5519 WARN(1, KERN_WARNING
5243 "btrfs: btrfs_compare_tree detected " 5520 "BTRFS: btrfs_compare_tree detected "
5244 "a change in one of the trees while " 5521 "a change in one of the trees while "
5245 "iterating. This is probably a " 5522 "iterating. This is probably a "
5246 "bug.\n"); 5523 "bug.\n");
@@ -5680,3 +5957,46 @@ int btrfs_previous_item(struct btrfs_root *root,
5680 } 5957 }
5681 return 1; 5958 return 1;
5682} 5959}
5960
5961/*
5962 * search in extent tree to find a previous Metadata/Data extent item with
5963 * min objecitd.
5964 *
5965 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5966 */
5967int btrfs_previous_extent_item(struct btrfs_root *root,
5968 struct btrfs_path *path, u64 min_objectid)
5969{
5970 struct btrfs_key found_key;
5971 struct extent_buffer *leaf;
5972 u32 nritems;
5973 int ret;
5974
5975 while (1) {
5976 if (path->slots[0] == 0) {
5977 btrfs_set_path_blocking(path);
5978 ret = btrfs_prev_leaf(root, path);
5979 if (ret != 0)
5980 return ret;
5981 } else {
5982 path->slots[0]--;
5983 }
5984 leaf = path->nodes[0];
5985 nritems = btrfs_header_nritems(leaf);
5986 if (nritems == 0)
5987 return 1;
5988 if (path->slots[0] == nritems)
5989 path->slots[0]--;
5990
5991 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5992 if (found_key.objectid < min_objectid)
5993 break;
5994 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
5995 found_key.type == BTRFS_METADATA_ITEM_KEY)
5996 return 0;
5997 if (found_key.objectid == min_objectid &&
5998 found_key.type < BTRFS_EXTENT_ITEM_KEY)
5999 break;
6000 }
6001 return 1;
6002}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7506825211a2..2c1a42ca519f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -521,9 +521,15 @@ struct btrfs_super_block {
521#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) 521#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
522#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) 522#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
523#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) 523#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
524#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
524 525
525#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 526#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
527#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL
528#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
526#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 529#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
530#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
531#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
532
527#define BTRFS_FEATURE_INCOMPAT_SUPP \ 533#define BTRFS_FEATURE_INCOMPAT_SUPP \
528 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 534 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
529 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 535 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
@@ -532,7 +538,12 @@ struct btrfs_super_block {
532 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ 538 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
533 BTRFS_FEATURE_INCOMPAT_RAID56 | \ 539 BTRFS_FEATURE_INCOMPAT_RAID56 | \
534 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ 540 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
535 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 541 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
542 BTRFS_FEATURE_INCOMPAT_NO_HOLES)
543
544#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
545 (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
546#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL
536 547
537/* 548/*
538 * A leaf is full of items. offset and size tell us where to find 549 * A leaf is full of items. offset and size tell us where to find
@@ -1094,7 +1105,7 @@ struct btrfs_qgroup_limit_item {
1094} __attribute__ ((__packed__)); 1105} __attribute__ ((__packed__));
1095 1106
1096struct btrfs_space_info { 1107struct btrfs_space_info {
1097 u64 flags; 1108 spinlock_t lock;
1098 1109
1099 u64 total_bytes; /* total bytes in the space, 1110 u64 total_bytes; /* total bytes in the space,
1100 this doesn't take mirrors into account */ 1111 this doesn't take mirrors into account */
@@ -1104,14 +1115,25 @@ struct btrfs_space_info {
1104 transaction finishes */ 1115 transaction finishes */
1105 u64 bytes_reserved; /* total bytes the allocator has reserved for 1116 u64 bytes_reserved; /* total bytes the allocator has reserved for
1106 current allocations */ 1117 current allocations */
1107 u64 bytes_readonly; /* total bytes that are read only */
1108
1109 u64 bytes_may_use; /* number of bytes that may be used for 1118 u64 bytes_may_use; /* number of bytes that may be used for
1110 delalloc/allocations */ 1119 delalloc/allocations */
1120 u64 bytes_readonly; /* total bytes that are read only */
1121
1122 unsigned int full:1; /* indicates that we cannot allocate any more
1123 chunks for this space */
1124 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
1125
1126 unsigned int flush:1; /* set if we are trying to make space */
1127
1128 unsigned int force_alloc; /* set if we need to force a chunk
1129 alloc for this space */
1130
1111 u64 disk_used; /* total bytes used on disk */ 1131 u64 disk_used; /* total bytes used on disk */
1112 u64 disk_total; /* total bytes on disk, takes mirrors into 1132 u64 disk_total; /* total bytes on disk, takes mirrors into
1113 account */ 1133 account */
1114 1134
1135 u64 flags;
1136
1115 /* 1137 /*
1116 * bytes_pinned is kept in line with what is actually pinned, as in 1138 * bytes_pinned is kept in line with what is actually pinned, as in
1117 * we've called update_block_group and dropped the bytes_used counter 1139 * we've called update_block_group and dropped the bytes_used counter
@@ -1124,22 +1146,15 @@ struct btrfs_space_info {
1124 */ 1146 */
1125 struct percpu_counter total_bytes_pinned; 1147 struct percpu_counter total_bytes_pinned;
1126 1148
1127 unsigned int full:1; /* indicates that we cannot allocate any more
1128 chunks for this space */
1129 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
1130
1131 unsigned int flush:1; /* set if we are trying to make space */
1132
1133 unsigned int force_alloc; /* set if we need to force a chunk
1134 alloc for this space */
1135
1136 struct list_head list; 1149 struct list_head list;
1137 1150
1151 struct rw_semaphore groups_sem;
1138 /* for block groups in our same type */ 1152 /* for block groups in our same type */
1139 struct list_head block_groups[BTRFS_NR_RAID_TYPES]; 1153 struct list_head block_groups[BTRFS_NR_RAID_TYPES];
1140 spinlock_t lock;
1141 struct rw_semaphore groups_sem;
1142 wait_queue_head_t wait; 1154 wait_queue_head_t wait;
1155
1156 struct kobject kobj;
1157 struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
1143}; 1158};
1144 1159
1145#define BTRFS_BLOCK_RSV_GLOBAL 1 1160#define BTRFS_BLOCK_RSV_GLOBAL 1
@@ -1346,6 +1361,7 @@ struct btrfs_fs_info {
1346 1361
1347 u64 generation; 1362 u64 generation;
1348 u64 last_trans_committed; 1363 u64 last_trans_committed;
1364 u64 avg_delayed_ref_runtime;
1349 1365
1350 /* 1366 /*
1351 * this is updated to the current trans every time a full commit 1367 * this is updated to the current trans every time a full commit
@@ -1448,7 +1464,6 @@ struct btrfs_fs_info {
1448 spinlock_t tree_mod_seq_lock; 1464 spinlock_t tree_mod_seq_lock;
1449 atomic64_t tree_mod_seq; 1465 atomic64_t tree_mod_seq;
1450 struct list_head tree_mod_seq_list; 1466 struct list_head tree_mod_seq_list;
1451 struct seq_list tree_mod_seq_elem;
1452 1467
1453 /* this protects tree_mod_log */ 1468 /* this protects tree_mod_log */
1454 rwlock_t tree_mod_log_lock; 1469 rwlock_t tree_mod_log_lock;
@@ -1515,6 +1530,8 @@ struct btrfs_fs_info {
1515 int thread_pool_size; 1530 int thread_pool_size;
1516 1531
1517 struct kobject super_kobj; 1532 struct kobject super_kobj;
1533 struct kobject *space_info_kobj;
1534 struct kobject *device_dir_kobj;
1518 struct completion kobj_unregister; 1535 struct completion kobj_unregister;
1519 int do_barriers; 1536 int do_barriers;
1520 int closing; 1537 int closing;
@@ -1643,6 +1660,10 @@ struct btrfs_fs_info {
1643 spinlock_t reada_lock; 1660 spinlock_t reada_lock;
1644 struct radix_tree_root reada_tree; 1661 struct radix_tree_root reada_tree;
1645 1662
1663 /* Extent buffer radix tree */
1664 spinlock_t buffer_lock;
1665 struct radix_tree_root buffer_radix;
1666
1646 /* next backup root to be overwritten */ 1667 /* next backup root to be overwritten */
1647 int backup_root_index; 1668 int backup_root_index;
1648 1669
@@ -1795,6 +1816,12 @@ struct btrfs_root {
1795 struct list_head ordered_extents; 1816 struct list_head ordered_extents;
1796 struct list_head ordered_root; 1817 struct list_head ordered_root;
1797 u64 nr_ordered_extents; 1818 u64 nr_ordered_extents;
1819
1820 /*
1821 * Number of currently running SEND ioctls to prevent
1822 * manipulation with the read-only status via SUBVOL_SETFLAGS
1823 */
1824 int send_in_progress;
1798}; 1825};
1799 1826
1800struct btrfs_ioctl_defrag_range_args { 1827struct btrfs_ioctl_defrag_range_args {
@@ -1997,6 +2024,7 @@ struct btrfs_ioctl_defrag_range_args {
1997#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) 2024#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
1998#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) 2025#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
1999#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) 2026#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
2027#define BTRFS_MOUNT_CHANGE_INODE_CACHE (1 << 24)
2000 2028
2001#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) 2029#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
2002 2030
@@ -2925,6 +2953,10 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
2925 struct btrfs_file_extent_item, generation, 64); 2953 struct btrfs_file_extent_item, generation, 64);
2926BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, 2954BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
2927 struct btrfs_file_extent_item, num_bytes, 64); 2955 struct btrfs_file_extent_item, num_bytes, 64);
2956BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes,
2957 struct btrfs_file_extent_item, disk_num_bytes, 64);
2958BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
2959 struct btrfs_file_extent_item, compression, 8);
2928 2960
2929static inline unsigned long 2961static inline unsigned long
2930btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) 2962btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
@@ -2958,15 +2990,6 @@ BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
2958BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, 2990BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
2959 other_encoding, 16); 2991 other_encoding, 16);
2960 2992
2961/* this returns the number of file bytes represented by the inline item.
2962 * If an item is compressed, this is the uncompressed size
2963 */
2964static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
2965 struct btrfs_file_extent_item *e)
2966{
2967 return btrfs_file_extent_ram_bytes(eb, e);
2968}
2969
2970/* 2993/*
2971 * this returns the number of bytes used by the item on disk, minus the 2994 * this returns the number of bytes used by the item on disk, minus the
2972 * size of any extent headers. If a file is compressed on disk, this is 2995 * size of any extent headers. If a file is compressed on disk, this is
@@ -2980,6 +3003,32 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
2980 return btrfs_item_size(eb, e) - offset; 3003 return btrfs_item_size(eb, e) - offset;
2981} 3004}
2982 3005
3006/* this returns the number of file bytes represented by the inline item.
3007 * If an item is compressed, this is the uncompressed size
3008 */
3009static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
3010 int slot,
3011 struct btrfs_file_extent_item *fi)
3012{
3013 struct btrfs_map_token token;
3014
3015 btrfs_init_map_token(&token);
3016 /*
3017 * return the space used on disk if this item isn't
3018 * compressed or encoded
3019 */
3020 if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
3021 btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
3022 btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
3023 return btrfs_file_extent_inline_item_len(eb,
3024 btrfs_item_nr(slot));
3025 }
3026
3027 /* otherwise use the ram bytes field */
3028 return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
3029}
3030
3031
2983/* btrfs_dev_stats_item */ 3032/* btrfs_dev_stats_item */
2984static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb, 3033static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
2985 struct btrfs_dev_stats_item *ptr, 3034 struct btrfs_dev_stats_item *ptr,
@@ -3143,6 +3192,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
3143 3192
3144int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, 3193int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
3145 struct btrfs_root *root); 3194 struct btrfs_root *root);
3195int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
3196 struct btrfs_root *root);
3146void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 3197void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
3147int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 3198int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
3148 struct btrfs_root *root, unsigned long count); 3199 struct btrfs_root *root, unsigned long count);
@@ -3163,6 +3214,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
3163 struct btrfs_fs_info *info, 3214 struct btrfs_fs_info *info,
3164 u64 bytenr); 3215 u64 bytenr);
3165void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 3216void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
3217int get_block_group_index(struct btrfs_block_group_cache *cache);
3166struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 3218struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3167 struct btrfs_root *root, u32 blocksize, 3219 struct btrfs_root *root, u32 blocksize,
3168 u64 parent, u64 root_objectid, 3220 u64 parent, u64 root_objectid,
@@ -3301,6 +3353,8 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
3301int btrfs_previous_item(struct btrfs_root *root, 3353int btrfs_previous_item(struct btrfs_root *root,
3302 struct btrfs_path *path, u64 min_objectid, 3354 struct btrfs_path *path, u64 min_objectid,
3303 int type); 3355 int type);
3356int btrfs_previous_extent_item(struct btrfs_root *root,
3357 struct btrfs_path *path, u64 min_objectid);
3304void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, 3358void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
3305 struct btrfs_key *new_key); 3359 struct btrfs_key *new_key);
3306struct extent_buffer *btrfs_root_node(struct btrfs_root *root); 3360struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
@@ -3350,6 +3404,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3350 struct btrfs_root *root, 3404 struct btrfs_root *root,
3351 struct btrfs_path *path, 3405 struct btrfs_path *path,
3352 struct btrfs_key *new_key); 3406 struct btrfs_key *new_key);
3407int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
3408 u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
3353int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 3409int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
3354 *root, struct btrfs_key *key, struct btrfs_path *p, int 3410 *root, struct btrfs_key *key, struct btrfs_path *p, int
3355 ins_len, int cow); 3411 ins_len, int cow);
@@ -3399,6 +3455,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
3399} 3455}
3400 3456
3401int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 3457int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
3458int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
3402int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, 3459int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
3403 u64 time_seq); 3460 u64 time_seq);
3404static inline int btrfs_next_old_item(struct btrfs_root *root, 3461static inline int btrfs_next_old_item(struct btrfs_root *root,
@@ -3563,12 +3620,6 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
3563 struct btrfs_root *root, 3620 struct btrfs_root *root,
3564 const char *name, int name_len, 3621 const char *name, int name_len,
3565 u64 inode_objectid, u64 ref_objectid, u64 *index); 3622 u64 inode_objectid, u64 ref_objectid, u64 *index);
3566int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
3567 struct btrfs_root *root,
3568 struct btrfs_path *path,
3569 const char *name, int name_len,
3570 u64 inode_objectid, u64 ref_objectid, int mod,
3571 u64 *ret_index);
3572int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, 3623int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
3573 struct btrfs_root *root, 3624 struct btrfs_root *root,
3574 struct btrfs_path *path, u64 objectid); 3625 struct btrfs_path *path, u64 objectid);
@@ -3676,7 +3727,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput);
3676int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 3727int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
3677 struct extent_state **cached_state); 3728 struct extent_state **cached_state);
3678int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 3729int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
3679 struct btrfs_root *new_root, u64 new_dirid); 3730 struct btrfs_root *new_root,
3731 struct btrfs_root *parent_root,
3732 u64 new_dirid);
3680int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, 3733int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
3681 size_t size, struct bio *bio, 3734 size_t size, struct bio *bio,
3682 unsigned long bio_flags); 3735 unsigned long bio_flags);
@@ -3745,7 +3798,10 @@ extern const struct file_operations btrfs_file_operations;
3745int __btrfs_drop_extents(struct btrfs_trans_handle *trans, 3798int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
3746 struct btrfs_root *root, struct inode *inode, 3799 struct btrfs_root *root, struct inode *inode,
3747 struct btrfs_path *path, u64 start, u64 end, 3800 struct btrfs_path *path, u64 start, u64 end,
3748 u64 *drop_end, int drop_cache); 3801 u64 *drop_end, int drop_cache,
3802 int replace_extent,
3803 u32 extent_item_size,
3804 int *key_inserted);
3749int btrfs_drop_extents(struct btrfs_trans_handle *trans, 3805int btrfs_drop_extents(struct btrfs_trans_handle *trans,
3750 struct btrfs_root *root, struct inode *inode, u64 start, 3806 struct btrfs_root *root, struct inode *inode, u64 start,
3751 u64 end, int drop_cache); 3807 u64 end, int drop_cache);
@@ -3764,6 +3820,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
3764/* sysfs.c */ 3820/* sysfs.c */
3765int btrfs_init_sysfs(void); 3821int btrfs_init_sysfs(void);
3766void btrfs_exit_sysfs(void); 3822void btrfs_exit_sysfs(void);
3823int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info);
3824void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info);
3767 3825
3768/* xattr.c */ 3826/* xattr.c */
3769ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); 3827ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
@@ -3796,14 +3854,20 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
3796 btrfs_printk(fs_info, KERN_NOTICE fmt, ##args) 3854 btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
3797#define btrfs_info(fs_info, fmt, args...) \ 3855#define btrfs_info(fs_info, fmt, args...) \
3798 btrfs_printk(fs_info, KERN_INFO fmt, ##args) 3856 btrfs_printk(fs_info, KERN_INFO fmt, ##args)
3857
3858#ifdef DEBUG
3799#define btrfs_debug(fs_info, fmt, args...) \ 3859#define btrfs_debug(fs_info, fmt, args...) \
3800 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) 3860 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
3861#else
3862#define btrfs_debug(fs_info, fmt, args...) \
3863 no_printk(KERN_DEBUG fmt, ##args)
3864#endif
3801 3865
3802#ifdef CONFIG_BTRFS_ASSERT 3866#ifdef CONFIG_BTRFS_ASSERT
3803 3867
3804static inline void assfail(char *expr, char *file, int line) 3868static inline void assfail(char *expr, char *file, int line)
3805{ 3869{
3806 printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d", 3870 pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
3807 expr, file, line); 3871 expr, file, line);
3808 BUG(); 3872 BUG();
3809} 3873}
@@ -3841,7 +3905,7 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
3841 if (!(features & flag)) { 3905 if (!(features & flag)) {
3842 features |= flag; 3906 features |= flag;
3843 btrfs_set_super_incompat_flags(disk_super, features); 3907 btrfs_set_super_incompat_flags(disk_super, features);
3844 printk(KERN_INFO "btrfs: setting %llu feature flag\n", 3908 btrfs_info(fs_info, "setting %llu feature flag",
3845 flag); 3909 flag);
3846 } 3910 }
3847 spin_unlock(&fs_info->super_lock); 3911 spin_unlock(&fs_info->super_lock);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 8d292fbae659..451b00c86f6c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -55,8 +55,7 @@ static inline void btrfs_init_delayed_node(
55 delayed_node->inode_id = inode_id; 55 delayed_node->inode_id = inode_id;
56 atomic_set(&delayed_node->refs, 0); 56 atomic_set(&delayed_node->refs, 0);
57 delayed_node->count = 0; 57 delayed_node->count = 0;
58 delayed_node->in_list = 0; 58 delayed_node->flags = 0;
59 delayed_node->inode_dirty = 0;
60 delayed_node->ins_root = RB_ROOT; 59 delayed_node->ins_root = RB_ROOT;
61 delayed_node->del_root = RB_ROOT; 60 delayed_node->del_root = RB_ROOT;
62 mutex_init(&delayed_node->mutex); 61 mutex_init(&delayed_node->mutex);
@@ -172,7 +171,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
172 int mod) 171 int mod)
173{ 172{
174 spin_lock(&root->lock); 173 spin_lock(&root->lock);
175 if (node->in_list) { 174 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
176 if (!list_empty(&node->p_list)) 175 if (!list_empty(&node->p_list))
177 list_move_tail(&node->p_list, &root->prepare_list); 176 list_move_tail(&node->p_list, &root->prepare_list);
178 else if (mod) 177 else if (mod)
@@ -182,7 +181,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
182 list_add_tail(&node->p_list, &root->prepare_list); 181 list_add_tail(&node->p_list, &root->prepare_list);
183 atomic_inc(&node->refs); /* inserted into list */ 182 atomic_inc(&node->refs); /* inserted into list */
184 root->nodes++; 183 root->nodes++;
185 node->in_list = 1; 184 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
186 } 185 }
187 spin_unlock(&root->lock); 186 spin_unlock(&root->lock);
188} 187}
@@ -192,13 +191,13 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
192 struct btrfs_delayed_node *node) 191 struct btrfs_delayed_node *node)
193{ 192{
194 spin_lock(&root->lock); 193 spin_lock(&root->lock);
195 if (node->in_list) { 194 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
196 root->nodes--; 195 root->nodes--;
197 atomic_dec(&node->refs); /* not in the list */ 196 atomic_dec(&node->refs); /* not in the list */
198 list_del_init(&node->n_list); 197 list_del_init(&node->n_list);
199 if (!list_empty(&node->p_list)) 198 if (!list_empty(&node->p_list))
200 list_del_init(&node->p_list); 199 list_del_init(&node->p_list);
201 node->in_list = 0; 200 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
202 } 201 }
203 spin_unlock(&root->lock); 202 spin_unlock(&root->lock);
204} 203}
@@ -231,7 +230,8 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
231 230
232 delayed_root = node->root->fs_info->delayed_root; 231 delayed_root = node->root->fs_info->delayed_root;
233 spin_lock(&delayed_root->lock); 232 spin_lock(&delayed_root->lock);
234 if (!node->in_list) { /* not in the list */ 233 if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
234 /* not in the list */
235 if (list_empty(&delayed_root->node_list)) 235 if (list_empty(&delayed_root->node_list))
236 goto out; 236 goto out;
237 p = delayed_root->node_list.next; 237 p = delayed_root->node_list.next;
@@ -1004,9 +1004,10 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1004{ 1004{
1005 struct btrfs_delayed_root *delayed_root; 1005 struct btrfs_delayed_root *delayed_root;
1006 1006
1007 if (delayed_node && delayed_node->inode_dirty) { 1007 if (delayed_node &&
1008 test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1008 BUG_ON(!delayed_node->root); 1009 BUG_ON(!delayed_node->root);
1009 delayed_node->inode_dirty = 0; 1010 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1010 delayed_node->count--; 1011 delayed_node->count--;
1011 1012
1012 delayed_root = delayed_node->root->fs_info->delayed_root; 1013 delayed_root = delayed_node->root->fs_info->delayed_root;
@@ -1014,6 +1015,18 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1014 } 1015 }
1015} 1016}
1016 1017
1018static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
1019{
1020 struct btrfs_delayed_root *delayed_root;
1021
1022 ASSERT(delayed_node->root);
1023 clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1024 delayed_node->count--;
1025
1026 delayed_root = delayed_node->root->fs_info->delayed_root;
1027 finish_one_item(delayed_root);
1028}
1029
1017static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, 1030static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1018 struct btrfs_root *root, 1031 struct btrfs_root *root,
1019 struct btrfs_path *path, 1032 struct btrfs_path *path,
@@ -1022,13 +1035,19 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1022 struct btrfs_key key; 1035 struct btrfs_key key;
1023 struct btrfs_inode_item *inode_item; 1036 struct btrfs_inode_item *inode_item;
1024 struct extent_buffer *leaf; 1037 struct extent_buffer *leaf;
1038 int mod;
1025 int ret; 1039 int ret;
1026 1040
1027 key.objectid = node->inode_id; 1041 key.objectid = node->inode_id;
1028 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 1042 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1029 key.offset = 0; 1043 key.offset = 0;
1030 1044
1031 ret = btrfs_lookup_inode(trans, root, path, &key, 1); 1045 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1046 mod = -1;
1047 else
1048 mod = 1;
1049
1050 ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1032 if (ret > 0) { 1051 if (ret > 0) {
1033 btrfs_release_path(path); 1052 btrfs_release_path(path);
1034 return -ENOENT; 1053 return -ENOENT;
@@ -1036,19 +1055,58 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1036 return ret; 1055 return ret;
1037 } 1056 }
1038 1057
1039 btrfs_unlock_up_safe(path, 1);
1040 leaf = path->nodes[0]; 1058 leaf = path->nodes[0];
1041 inode_item = btrfs_item_ptr(leaf, path->slots[0], 1059 inode_item = btrfs_item_ptr(leaf, path->slots[0],
1042 struct btrfs_inode_item); 1060 struct btrfs_inode_item);
1043 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item, 1061 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1044 sizeof(struct btrfs_inode_item)); 1062 sizeof(struct btrfs_inode_item));
1045 btrfs_mark_buffer_dirty(leaf); 1063 btrfs_mark_buffer_dirty(leaf);
1046 btrfs_release_path(path);
1047 1064
1065 if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1066 goto no_iref;
1067
1068 path->slots[0]++;
1069 if (path->slots[0] >= btrfs_header_nritems(leaf))
1070 goto search;
1071again:
1072 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1073 if (key.objectid != node->inode_id)
1074 goto out;
1075
1076 if (key.type != BTRFS_INODE_REF_KEY &&
1077 key.type != BTRFS_INODE_EXTREF_KEY)
1078 goto out;
1079
1080 /*
1081 * Delayed iref deletion is for the inode who has only one link,
1082 * so there is only one iref. The case that several irefs are
1083 * in the same item doesn't exist.
1084 */
1085 btrfs_del_item(trans, root, path);
1086out:
1087 btrfs_release_delayed_iref(node);
1088no_iref:
1089 btrfs_release_path(path);
1090err_out:
1048 btrfs_delayed_inode_release_metadata(root, node); 1091 btrfs_delayed_inode_release_metadata(root, node);
1049 btrfs_release_delayed_inode(node); 1092 btrfs_release_delayed_inode(node);
1050 1093
1051 return 0; 1094 return ret;
1095
1096search:
1097 btrfs_release_path(path);
1098
1099 btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY);
1100 key.offset = -1;
1101 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1102 if (ret < 0)
1103 goto err_out;
1104 ASSERT(ret);
1105
1106 ret = 0;
1107 leaf = path->nodes[0];
1108 path->slots[0]--;
1109 goto again;
1052} 1110}
1053 1111
1054static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, 1112static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
@@ -1059,7 +1117,7 @@ static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1059 int ret; 1117 int ret;
1060 1118
1061 mutex_lock(&node->mutex); 1119 mutex_lock(&node->mutex);
1062 if (!node->inode_dirty) { 1120 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1063 mutex_unlock(&node->mutex); 1121 mutex_unlock(&node->mutex);
1064 return 0; 1122 return 0;
1065 } 1123 }
@@ -1203,7 +1261,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
1203 return 0; 1261 return 0;
1204 1262
1205 mutex_lock(&delayed_node->mutex); 1263 mutex_lock(&delayed_node->mutex);
1206 if (!delayed_node->inode_dirty) { 1264 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1207 mutex_unlock(&delayed_node->mutex); 1265 mutex_unlock(&delayed_node->mutex);
1208 btrfs_release_delayed_node(delayed_node); 1266 btrfs_release_delayed_node(delayed_node);
1209 return 0; 1267 return 0;
@@ -1227,7 +1285,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
1227 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv; 1285 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1228 1286
1229 mutex_lock(&delayed_node->mutex); 1287 mutex_lock(&delayed_node->mutex);
1230 if (delayed_node->inode_dirty) 1288 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1231 ret = __btrfs_update_delayed_inode(trans, delayed_node->root, 1289 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1232 path, delayed_node); 1290 path, delayed_node);
1233 else 1291 else
@@ -1300,36 +1358,9 @@ again:
1300 trans->block_rsv = &root->fs_info->delayed_block_rsv; 1358 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1301 1359
1302 __btrfs_commit_inode_delayed_items(trans, path, delayed_node); 1360 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1303 /*
1304 * Maybe new delayed items have been inserted, so we need requeue
1305 * the work. Besides that, we must dequeue the empty delayed nodes
1306 * to avoid the race between delayed items balance and the worker.
1307 * The race like this:
1308 * Task1 Worker thread
1309 * count == 0, needn't requeue
1310 * also needn't insert the
1311 * delayed node into prepare
1312 * list again.
1313 * add lots of delayed items
1314 * queue the delayed node
1315 * already in the list,
1316 * and not in the prepare
1317 * list, it means the delayed
1318 * node is being dealt with
1319 * by the worker.
1320 * do delayed items balance
1321 * the delayed node is being
1322 * dealt with by the worker
1323 * now, just wait.
1324 * the worker goto idle.
1325 * Task1 will sleep until the transaction is commited.
1326 */
1327 mutex_lock(&delayed_node->mutex);
1328 btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
1329 mutex_unlock(&delayed_node->mutex);
1330 1361
1331 trans->block_rsv = block_rsv; 1362 trans->block_rsv = block_rsv;
1332 btrfs_end_transaction_dmeta(trans, root); 1363 btrfs_end_transaction(trans, root);
1333 btrfs_btree_balance_dirty_nodelay(root); 1364 btrfs_btree_balance_dirty_nodelay(root);
1334 1365
1335release_path: 1366release_path:
@@ -1376,52 +1407,41 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1376 WARN_ON(btrfs_first_delayed_node(delayed_root)); 1407 WARN_ON(btrfs_first_delayed_node(delayed_root));
1377} 1408}
1378 1409
1379static int refs_newer(struct btrfs_delayed_root *delayed_root, 1410static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1380 int seq, int count)
1381{ 1411{
1382 int val = atomic_read(&delayed_root->items_seq); 1412 int val = atomic_read(&delayed_root->items_seq);
1383 1413
1384 if (val < seq || val >= seq + count) 1414 if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1415 return 1;
1416
1417 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1385 return 1; 1418 return 1;
1419
1386 return 0; 1420 return 0;
1387} 1421}
1388 1422
1389void btrfs_balance_delayed_items(struct btrfs_root *root) 1423void btrfs_balance_delayed_items(struct btrfs_root *root)
1390{ 1424{
1391 struct btrfs_delayed_root *delayed_root; 1425 struct btrfs_delayed_root *delayed_root;
1392 int seq;
1393 1426
1394 delayed_root = btrfs_get_delayed_root(root); 1427 delayed_root = btrfs_get_delayed_root(root);
1395 1428
1396 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) 1429 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1397 return; 1430 return;
1398 1431
1399 seq = atomic_read(&delayed_root->items_seq);
1400
1401 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { 1432 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1433 int seq;
1402 int ret; 1434 int ret;
1403 DEFINE_WAIT(__wait); 1435
1436 seq = atomic_read(&delayed_root->items_seq);
1404 1437
1405 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0); 1438 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
1406 if (ret) 1439 if (ret)
1407 return; 1440 return;
1408 1441
1409 while (1) { 1442 wait_event_interruptible(delayed_root->wait,
1410 prepare_to_wait(&delayed_root->wait, &__wait, 1443 could_end_wait(delayed_root, seq));
1411 TASK_INTERRUPTIBLE); 1444 return;
1412
1413 if (refs_newer(delayed_root, seq,
1414 BTRFS_DELAYED_BATCH) ||
1415 atomic_read(&delayed_root->items) <
1416 BTRFS_DELAYED_BACKGROUND) {
1417 break;
1418 }
1419 if (!signal_pending(current))
1420 schedule();
1421 else
1422 break;
1423 }
1424 finish_wait(&delayed_root->wait, &__wait);
1425 } 1445 }
1426 1446
1427 btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH); 1447 btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
@@ -1472,9 +1492,9 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1472 mutex_lock(&delayed_node->mutex); 1492 mutex_lock(&delayed_node->mutex);
1473 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); 1493 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1474 if (unlikely(ret)) { 1494 if (unlikely(ret)) {
1475 printk(KERN_ERR "err add delayed dir index item(name: %.*s) " 1495 btrfs_err(root->fs_info, "err add delayed dir index item(name: %.*s) "
1476 "into the insertion tree of the delayed node" 1496 "into the insertion tree of the delayed node"
1477 "(root id: %llu, inode id: %llu, errno: %d)\n", 1497 "(root id: %llu, inode id: %llu, errno: %d)",
1478 name_len, name, delayed_node->root->objectid, 1498 name_len, name, delayed_node->root->objectid,
1479 delayed_node->inode_id, ret); 1499 delayed_node->inode_id, ret);
1480 BUG(); 1500 BUG();
@@ -1544,9 +1564,9 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1544 mutex_lock(&node->mutex); 1564 mutex_lock(&node->mutex);
1545 ret = __btrfs_add_delayed_deletion_item(node, item); 1565 ret = __btrfs_add_delayed_deletion_item(node, item);
1546 if (unlikely(ret)) { 1566 if (unlikely(ret)) {
1547 printk(KERN_ERR "err add delayed dir index item(index: %llu) " 1567 btrfs_err(root->fs_info, "err add delayed dir index item(index: %llu) "
1548 "into the deletion tree of the delayed node" 1568 "into the deletion tree of the delayed node"
1549 "(root id: %llu, inode id: %llu, errno: %d)\n", 1569 "(root id: %llu, inode id: %llu, errno: %d)",
1550 index, node->root->objectid, node->inode_id, 1570 index, node->root->objectid, node->inode_id,
1551 ret); 1571 ret);
1552 BUG(); 1572 BUG();
@@ -1759,7 +1779,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1759 return -ENOENT; 1779 return -ENOENT;
1760 1780
1761 mutex_lock(&delayed_node->mutex); 1781 mutex_lock(&delayed_node->mutex);
1762 if (!delayed_node->inode_dirty) { 1782 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1763 mutex_unlock(&delayed_node->mutex); 1783 mutex_unlock(&delayed_node->mutex);
1764 btrfs_release_delayed_node(delayed_node); 1784 btrfs_release_delayed_node(delayed_node);
1765 return -ENOENT; 1785 return -ENOENT;
@@ -1810,7 +1830,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1810 return PTR_ERR(delayed_node); 1830 return PTR_ERR(delayed_node);
1811 1831
1812 mutex_lock(&delayed_node->mutex); 1832 mutex_lock(&delayed_node->mutex);
1813 if (delayed_node->inode_dirty) { 1833 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1814 fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1834 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1815 goto release_node; 1835 goto release_node;
1816 } 1836 }
@@ -1821,7 +1841,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1821 goto release_node; 1841 goto release_node;
1822 1842
1823 fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1843 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1824 delayed_node->inode_dirty = 1; 1844 set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1825 delayed_node->count++; 1845 delayed_node->count++;
1826 atomic_inc(&root->fs_info->delayed_root->items); 1846 atomic_inc(&root->fs_info->delayed_root->items);
1827release_node: 1847release_node:
@@ -1830,6 +1850,41 @@ release_node:
1830 return ret; 1850 return ret;
1831} 1851}
1832 1852
1853int btrfs_delayed_delete_inode_ref(struct inode *inode)
1854{
1855 struct btrfs_delayed_node *delayed_node;
1856
1857 delayed_node = btrfs_get_or_create_delayed_node(inode);
1858 if (IS_ERR(delayed_node))
1859 return PTR_ERR(delayed_node);
1860
1861 /*
1862 * We don't reserve space for inode ref deletion is because:
1863 * - We ONLY do async inode ref deletion for the inode who has only
1864 * one link(i_nlink == 1), it means there is only one inode ref.
1865 * And in most case, the inode ref and the inode item are in the
1866 * same leaf, and we will deal with them at the same time.
1867 * Since we are sure we will reserve the space for the inode item,
1868 * it is unnecessary to reserve space for inode ref deletion.
1869 * - If the inode ref and the inode item are not in the same leaf,
1870 * We also needn't worry about enospc problem, because we reserve
1871 * much more space for the inode update than it needs.
1872 * - At the worst, we can steal some space from the global reservation.
1873 * It is very rare.
1874 */
1875 mutex_lock(&delayed_node->mutex);
1876 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1877 goto release_node;
1878
1879 set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1880 delayed_node->count++;
1881 atomic_inc(&BTRFS_I(inode)->root->fs_info->delayed_root->items);
1882release_node:
1883 mutex_unlock(&delayed_node->mutex);
1884 btrfs_release_delayed_node(delayed_node);
1885 return 0;
1886}
1887
1833static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node) 1888static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1834{ 1889{
1835 struct btrfs_root *root = delayed_node->root; 1890 struct btrfs_root *root = delayed_node->root;
@@ -1852,7 +1907,10 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1852 btrfs_release_delayed_item(prev_item); 1907 btrfs_release_delayed_item(prev_item);
1853 } 1908 }
1854 1909
1855 if (delayed_node->inode_dirty) { 1910 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1911 btrfs_release_delayed_iref(delayed_node);
1912
1913 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1856 btrfs_delayed_inode_release_metadata(root, delayed_node); 1914 btrfs_delayed_inode_release_metadata(root, delayed_node);
1857 btrfs_release_delayed_inode(delayed_node); 1915 btrfs_release_delayed_inode(delayed_node);
1858 } 1916 }
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index a4b38f934d14..f70119f25421 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -48,6 +48,10 @@ struct btrfs_delayed_root {
48 wait_queue_head_t wait; 48 wait_queue_head_t wait;
49}; 49};
50 50
51#define BTRFS_DELAYED_NODE_IN_LIST 0
52#define BTRFS_DELAYED_NODE_INODE_DIRTY 1
53#define BTRFS_DELAYED_NODE_DEL_IREF 2
54
51struct btrfs_delayed_node { 55struct btrfs_delayed_node {
52 u64 inode_id; 56 u64 inode_id;
53 u64 bytes_reserved; 57 u64 bytes_reserved;
@@ -65,8 +69,7 @@ struct btrfs_delayed_node {
65 struct btrfs_inode_item inode_item; 69 struct btrfs_inode_item inode_item;
66 atomic_t refs; 70 atomic_t refs;
67 u64 index_cnt; 71 u64 index_cnt;
68 bool in_list; 72 unsigned long flags;
69 bool inode_dirty;
70 int count; 73 int count;
71}; 74};
72 75
@@ -125,6 +128,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode);
125int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, 128int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
126 struct btrfs_root *root, struct inode *inode); 129 struct btrfs_root *root, struct inode *inode);
127int btrfs_fill_inode(struct inode *inode, u32 *rdev); 130int btrfs_fill_inode(struct inode *inode, u32 *rdev);
131int btrfs_delayed_delete_inode_ref(struct inode *inode);
128 132
129/* Used for drop dead root */ 133/* Used for drop dead root */
130void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); 134void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e4d467be2dd4..f3bff89eecf0 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -161,35 +161,61 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
161 return NULL; 161 return NULL;
162} 162}
163 163
164/* insert a new ref to head ref rbtree */
165static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
166 struct rb_node *node)
167{
168 struct rb_node **p = &root->rb_node;
169 struct rb_node *parent_node = NULL;
170 struct btrfs_delayed_ref_head *entry;
171 struct btrfs_delayed_ref_head *ins;
172 u64 bytenr;
173
174 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
175 bytenr = ins->node.bytenr;
176 while (*p) {
177 parent_node = *p;
178 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
179 href_node);
180
181 if (bytenr < entry->node.bytenr)
182 p = &(*p)->rb_left;
183 else if (bytenr > entry->node.bytenr)
184 p = &(*p)->rb_right;
185 else
186 return entry;
187 }
188
189 rb_link_node(node, parent_node, p);
190 rb_insert_color(node, root);
191 return NULL;
192}
193
164/* 194/*
165 * find an head entry based on bytenr. This returns the delayed ref 195 * find an head entry based on bytenr. This returns the delayed ref
166 * head if it was able to find one, or NULL if nothing was in that spot. 196 * head if it was able to find one, or NULL if nothing was in that spot.
167 * If return_bigger is given, the next bigger entry is returned if no exact 197 * If return_bigger is given, the next bigger entry is returned if no exact
168 * match is found. 198 * match is found.
169 */ 199 */
170static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, 200static struct btrfs_delayed_ref_head *
171 u64 bytenr, 201find_ref_head(struct rb_root *root, u64 bytenr,
172 struct btrfs_delayed_ref_node **last, 202 struct btrfs_delayed_ref_head **last, int return_bigger)
173 int return_bigger)
174{ 203{
175 struct rb_node *n; 204 struct rb_node *n;
176 struct btrfs_delayed_ref_node *entry; 205 struct btrfs_delayed_ref_head *entry;
177 int cmp = 0; 206 int cmp = 0;
178 207
179again: 208again:
180 n = root->rb_node; 209 n = root->rb_node;
181 entry = NULL; 210 entry = NULL;
182 while (n) { 211 while (n) {
183 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); 212 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
184 WARN_ON(!entry->in_tree);
185 if (last) 213 if (last)
186 *last = entry; 214 *last = entry;
187 215
188 if (bytenr < entry->bytenr) 216 if (bytenr < entry->node.bytenr)
189 cmp = -1; 217 cmp = -1;
190 else if (bytenr > entry->bytenr) 218 else if (bytenr > entry->node.bytenr)
191 cmp = 1;
192 else if (!btrfs_delayed_ref_is_head(entry))
193 cmp = 1; 219 cmp = 1;
194 else 220 else
195 cmp = 0; 221 cmp = 0;
@@ -203,12 +229,12 @@ again:
203 } 229 }
204 if (entry && return_bigger) { 230 if (entry && return_bigger) {
205 if (cmp > 0) { 231 if (cmp > 0) {
206 n = rb_next(&entry->rb_node); 232 n = rb_next(&entry->href_node);
207 if (!n) 233 if (!n)
208 n = rb_first(root); 234 n = rb_first(root);
209 entry = rb_entry(n, struct btrfs_delayed_ref_node, 235 entry = rb_entry(n, struct btrfs_delayed_ref_head,
210 rb_node); 236 href_node);
211 bytenr = entry->bytenr; 237 bytenr = entry->node.bytenr;
212 return_bigger = 0; 238 return_bigger = 0;
213 goto again; 239 goto again;
214 } 240 }
@@ -243,33 +269,38 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
243 269
244static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, 270static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
245 struct btrfs_delayed_ref_root *delayed_refs, 271 struct btrfs_delayed_ref_root *delayed_refs,
272 struct btrfs_delayed_ref_head *head,
246 struct btrfs_delayed_ref_node *ref) 273 struct btrfs_delayed_ref_node *ref)
247{ 274{
248 rb_erase(&ref->rb_node, &delayed_refs->root); 275 if (btrfs_delayed_ref_is_head(ref)) {
276 head = btrfs_delayed_node_to_head(ref);
277 rb_erase(&head->href_node, &delayed_refs->href_root);
278 } else {
279 assert_spin_locked(&head->lock);
280 rb_erase(&ref->rb_node, &head->ref_root);
281 }
249 ref->in_tree = 0; 282 ref->in_tree = 0;
250 btrfs_put_delayed_ref(ref); 283 btrfs_put_delayed_ref(ref);
251 delayed_refs->num_entries--; 284 atomic_dec(&delayed_refs->num_entries);
252 if (trans->delayed_ref_updates) 285 if (trans->delayed_ref_updates)
253 trans->delayed_ref_updates--; 286 trans->delayed_ref_updates--;
254} 287}
255 288
256static int merge_ref(struct btrfs_trans_handle *trans, 289static int merge_ref(struct btrfs_trans_handle *trans,
257 struct btrfs_delayed_ref_root *delayed_refs, 290 struct btrfs_delayed_ref_root *delayed_refs,
291 struct btrfs_delayed_ref_head *head,
258 struct btrfs_delayed_ref_node *ref, u64 seq) 292 struct btrfs_delayed_ref_node *ref, u64 seq)
259{ 293{
260 struct rb_node *node; 294 struct rb_node *node;
261 int merged = 0;
262 int mod = 0; 295 int mod = 0;
263 int done = 0; 296 int done = 0;
264 297
265 node = rb_prev(&ref->rb_node); 298 node = rb_next(&ref->rb_node);
266 while (node) { 299 while (!done && node) {
267 struct btrfs_delayed_ref_node *next; 300 struct btrfs_delayed_ref_node *next;
268 301
269 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 302 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
270 node = rb_prev(node); 303 node = rb_next(node);
271 if (next->bytenr != ref->bytenr)
272 break;
273 if (seq && next->seq >= seq) 304 if (seq && next->seq >= seq)
274 break; 305 break;
275 if (comp_entry(ref, next, 0)) 306 if (comp_entry(ref, next, 0))
@@ -289,12 +320,11 @@ static int merge_ref(struct btrfs_trans_handle *trans,
289 mod = -next->ref_mod; 320 mod = -next->ref_mod;
290 } 321 }
291 322
292 merged++; 323 drop_delayed_ref(trans, delayed_refs, head, next);
293 drop_delayed_ref(trans, delayed_refs, next);
294 ref->ref_mod += mod; 324 ref->ref_mod += mod;
295 if (ref->ref_mod == 0) { 325 if (ref->ref_mod == 0) {
296 drop_delayed_ref(trans, delayed_refs, ref); 326 drop_delayed_ref(trans, delayed_refs, head, ref);
297 break; 327 done = 1;
298 } else { 328 } else {
299 /* 329 /*
300 * You can't have multiples of the same ref on a tree 330 * You can't have multiples of the same ref on a tree
@@ -303,13 +333,8 @@ static int merge_ref(struct btrfs_trans_handle *trans,
303 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || 333 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
304 ref->type == BTRFS_SHARED_BLOCK_REF_KEY); 334 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
305 } 335 }
306
307 if (done)
308 break;
309 node = rb_prev(&ref->rb_node);
310 } 336 }
311 337 return done;
312 return merged;
313} 338}
314 339
315void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, 340void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
@@ -320,6 +345,14 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
320 struct rb_node *node; 345 struct rb_node *node;
321 u64 seq = 0; 346 u64 seq = 0;
322 347
348 assert_spin_locked(&head->lock);
349 /*
350 * We don't have too much refs to merge in the case of delayed data
351 * refs.
352 */
353 if (head->is_data)
354 return;
355
323 spin_lock(&fs_info->tree_mod_seq_lock); 356 spin_lock(&fs_info->tree_mod_seq_lock);
324 if (!list_empty(&fs_info->tree_mod_seq_list)) { 357 if (!list_empty(&fs_info->tree_mod_seq_list)) {
325 struct seq_list *elem; 358 struct seq_list *elem;
@@ -330,22 +363,19 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
330 } 363 }
331 spin_unlock(&fs_info->tree_mod_seq_lock); 364 spin_unlock(&fs_info->tree_mod_seq_lock);
332 365
333 node = rb_prev(&head->node.rb_node); 366 node = rb_first(&head->ref_root);
334 while (node) { 367 while (node) {
335 struct btrfs_delayed_ref_node *ref; 368 struct btrfs_delayed_ref_node *ref;
336 369
337 ref = rb_entry(node, struct btrfs_delayed_ref_node, 370 ref = rb_entry(node, struct btrfs_delayed_ref_node,
338 rb_node); 371 rb_node);
339 if (ref->bytenr != head->node.bytenr)
340 break;
341
342 /* We can't merge refs that are outside of our seq count */ 372 /* We can't merge refs that are outside of our seq count */
343 if (seq && ref->seq >= seq) 373 if (seq && ref->seq >= seq)
344 break; 374 break;
345 if (merge_ref(trans, delayed_refs, ref, seq)) 375 if (merge_ref(trans, delayed_refs, head, ref, seq))
346 node = rb_prev(&head->node.rb_node); 376 node = rb_first(&head->ref_root);
347 else 377 else
348 node = rb_prev(node); 378 node = rb_next(&ref->rb_node);
349 } 379 }
350} 380}
351 381
@@ -373,71 +403,52 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
373 return ret; 403 return ret;
374} 404}
375 405
376int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 406struct btrfs_delayed_ref_head *
377 struct list_head *cluster, u64 start) 407btrfs_select_ref_head(struct btrfs_trans_handle *trans)
378{ 408{
379 int count = 0;
380 struct btrfs_delayed_ref_root *delayed_refs; 409 struct btrfs_delayed_ref_root *delayed_refs;
381 struct rb_node *node;
382 struct btrfs_delayed_ref_node *ref;
383 struct btrfs_delayed_ref_head *head; 410 struct btrfs_delayed_ref_head *head;
411 u64 start;
412 bool loop = false;
384 413
385 delayed_refs = &trans->transaction->delayed_refs; 414 delayed_refs = &trans->transaction->delayed_refs;
386 if (start == 0) { 415
387 node = rb_first(&delayed_refs->root);
388 } else {
389 ref = NULL;
390 find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
391 if (ref) {
392 node = &ref->rb_node;
393 } else
394 node = rb_first(&delayed_refs->root);
395 }
396again: 416again:
397 while (node && count < 32) { 417 start = delayed_refs->run_delayed_start;
398 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 418 head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
399 if (btrfs_delayed_ref_is_head(ref)) { 419 if (!head && !loop) {
400 head = btrfs_delayed_node_to_head(ref); 420 delayed_refs->run_delayed_start = 0;
401 if (list_empty(&head->cluster)) {
402 list_add_tail(&head->cluster, cluster);
403 delayed_refs->run_delayed_start =
404 head->node.bytenr;
405 count++;
406
407 WARN_ON(delayed_refs->num_heads_ready == 0);
408 delayed_refs->num_heads_ready--;
409 } else if (count) {
410 /* the goal of the clustering is to find extents
411 * that are likely to end up in the same extent
412 * leaf on disk. So, we don't want them spread
413 * all over the tree. Stop now if we've hit
414 * a head that was already in use
415 */
416 break;
417 }
418 }
419 node = rb_next(node);
420 }
421 if (count) {
422 return 0;
423 } else if (start) {
424 /*
425 * we've gone to the end of the rbtree without finding any
426 * clusters. start from the beginning and try again
427 */
428 start = 0; 421 start = 0;
429 node = rb_first(&delayed_refs->root); 422 loop = true;
430 goto again; 423 head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
424 if (!head)
425 return NULL;
426 } else if (!head && loop) {
427 return NULL;
431 } 428 }
432 return 1;
433}
434 429
435void btrfs_release_ref_cluster(struct list_head *cluster) 430 while (head->processing) {
436{ 431 struct rb_node *node;
437 struct list_head *pos, *q; 432
433 node = rb_next(&head->href_node);
434 if (!node) {
435 if (loop)
436 return NULL;
437 delayed_refs->run_delayed_start = 0;
438 start = 0;
439 loop = true;
440 goto again;
441 }
442 head = rb_entry(node, struct btrfs_delayed_ref_head,
443 href_node);
444 }
438 445
439 list_for_each_safe(pos, q, cluster) 446 head->processing = 1;
440 list_del_init(pos); 447 WARN_ON(delayed_refs->num_heads_ready == 0);
448 delayed_refs->num_heads_ready--;
449 delayed_refs->run_delayed_start = head->node.bytenr +
450 head->node.num_bytes;
451 return head;
441} 452}
442 453
443/* 454/*
@@ -451,6 +462,7 @@ void btrfs_release_ref_cluster(struct list_head *cluster)
451static noinline void 462static noinline void
452update_existing_ref(struct btrfs_trans_handle *trans, 463update_existing_ref(struct btrfs_trans_handle *trans,
453 struct btrfs_delayed_ref_root *delayed_refs, 464 struct btrfs_delayed_ref_root *delayed_refs,
465 struct btrfs_delayed_ref_head *head,
454 struct btrfs_delayed_ref_node *existing, 466 struct btrfs_delayed_ref_node *existing,
455 struct btrfs_delayed_ref_node *update) 467 struct btrfs_delayed_ref_node *update)
456{ 468{
@@ -463,7 +475,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
463 */ 475 */
464 existing->ref_mod--; 476 existing->ref_mod--;
465 if (existing->ref_mod == 0) 477 if (existing->ref_mod == 0)
466 drop_delayed_ref(trans, delayed_refs, existing); 478 drop_delayed_ref(trans, delayed_refs, head, existing);
467 else 479 else
468 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 480 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
469 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 481 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -533,9 +545,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
533 } 545 }
534 } 546 }
535 /* 547 /*
536 * update the reference mod on the head to reflect this new operation 548 * update the reference mod on the head to reflect this new operation,
549 * only need the lock for this case cause we could be processing it
550 * currently, for refs we just added we know we're a-ok.
537 */ 551 */
552 spin_lock(&existing_ref->lock);
538 existing->ref_mod += update->ref_mod; 553 existing->ref_mod += update->ref_mod;
554 spin_unlock(&existing_ref->lock);
539} 555}
540 556
541/* 557/*
@@ -543,13 +559,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
543 * this does all the dirty work in terms of maintaining the correct 559 * this does all the dirty work in terms of maintaining the correct
544 * overall modification count. 560 * overall modification count.
545 */ 561 */
546static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, 562static noinline struct btrfs_delayed_ref_head *
547 struct btrfs_trans_handle *trans, 563add_delayed_ref_head(struct btrfs_fs_info *fs_info,
548 struct btrfs_delayed_ref_node *ref, 564 struct btrfs_trans_handle *trans,
549 u64 bytenr, u64 num_bytes, 565 struct btrfs_delayed_ref_node *ref, u64 bytenr,
550 int action, int is_data) 566 u64 num_bytes, int action, int is_data)
551{ 567{
552 struct btrfs_delayed_ref_node *existing; 568 struct btrfs_delayed_ref_head *existing;
553 struct btrfs_delayed_ref_head *head_ref = NULL; 569 struct btrfs_delayed_ref_head *head_ref = NULL;
554 struct btrfs_delayed_ref_root *delayed_refs; 570 struct btrfs_delayed_ref_root *delayed_refs;
555 int count_mod = 1; 571 int count_mod = 1;
@@ -596,38 +612,43 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
596 head_ref = btrfs_delayed_node_to_head(ref); 612 head_ref = btrfs_delayed_node_to_head(ref);
597 head_ref->must_insert_reserved = must_insert_reserved; 613 head_ref->must_insert_reserved = must_insert_reserved;
598 head_ref->is_data = is_data; 614 head_ref->is_data = is_data;
615 head_ref->ref_root = RB_ROOT;
616 head_ref->processing = 0;
599 617
600 INIT_LIST_HEAD(&head_ref->cluster); 618 spin_lock_init(&head_ref->lock);
601 mutex_init(&head_ref->mutex); 619 mutex_init(&head_ref->mutex);
602 620
603 trace_add_delayed_ref_head(ref, head_ref, action); 621 trace_add_delayed_ref_head(ref, head_ref, action);
604 622
605 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 623 existing = htree_insert(&delayed_refs->href_root,
606 624 &head_ref->href_node);
607 if (existing) { 625 if (existing) {
608 update_existing_head_ref(existing, ref); 626 update_existing_head_ref(&existing->node, ref);
609 /* 627 /*
610 * we've updated the existing ref, free the newly 628 * we've updated the existing ref, free the newly
611 * allocated ref 629 * allocated ref
612 */ 630 */
613 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 631 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
632 head_ref = existing;
614 } else { 633 } else {
615 delayed_refs->num_heads++; 634 delayed_refs->num_heads++;
616 delayed_refs->num_heads_ready++; 635 delayed_refs->num_heads_ready++;
617 delayed_refs->num_entries++; 636 atomic_inc(&delayed_refs->num_entries);
618 trans->delayed_ref_updates++; 637 trans->delayed_ref_updates++;
619 } 638 }
639 return head_ref;
620} 640}
621 641
622/* 642/*
623 * helper to insert a delayed tree ref into the rbtree. 643 * helper to insert a delayed tree ref into the rbtree.
624 */ 644 */
625static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 645static noinline void
626 struct btrfs_trans_handle *trans, 646add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
627 struct btrfs_delayed_ref_node *ref, 647 struct btrfs_trans_handle *trans,
628 u64 bytenr, u64 num_bytes, u64 parent, 648 struct btrfs_delayed_ref_head *head_ref,
629 u64 ref_root, int level, int action, 649 struct btrfs_delayed_ref_node *ref, u64 bytenr,
630 int for_cow) 650 u64 num_bytes, u64 parent, u64 ref_root, int level,
651 int action, int for_cow)
631{ 652{
632 struct btrfs_delayed_ref_node *existing; 653 struct btrfs_delayed_ref_node *existing;
633 struct btrfs_delayed_tree_ref *full_ref; 654 struct btrfs_delayed_tree_ref *full_ref;
@@ -663,30 +684,33 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
663 684
664 trace_add_delayed_tree_ref(ref, full_ref, action); 685 trace_add_delayed_tree_ref(ref, full_ref, action);
665 686
666 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 687 spin_lock(&head_ref->lock);
667 688 existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
668 if (existing) { 689 if (existing) {
669 update_existing_ref(trans, delayed_refs, existing, ref); 690 update_existing_ref(trans, delayed_refs, head_ref, existing,
691 ref);
670 /* 692 /*
671 * we've updated the existing ref, free the newly 693 * we've updated the existing ref, free the newly
672 * allocated ref 694 * allocated ref
673 */ 695 */
674 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref); 696 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
675 } else { 697 } else {
676 delayed_refs->num_entries++; 698 atomic_inc(&delayed_refs->num_entries);
677 trans->delayed_ref_updates++; 699 trans->delayed_ref_updates++;
678 } 700 }
701 spin_unlock(&head_ref->lock);
679} 702}
680 703
681/* 704/*
682 * helper to insert a delayed data ref into the rbtree. 705 * helper to insert a delayed data ref into the rbtree.
683 */ 706 */
684static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, 707static noinline void
685 struct btrfs_trans_handle *trans, 708add_delayed_data_ref(struct btrfs_fs_info *fs_info,
686 struct btrfs_delayed_ref_node *ref, 709 struct btrfs_trans_handle *trans,
687 u64 bytenr, u64 num_bytes, u64 parent, 710 struct btrfs_delayed_ref_head *head_ref,
688 u64 ref_root, u64 owner, u64 offset, 711 struct btrfs_delayed_ref_node *ref, u64 bytenr,
689 int action, int for_cow) 712 u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
713 u64 offset, int action, int for_cow)
690{ 714{
691 struct btrfs_delayed_ref_node *existing; 715 struct btrfs_delayed_ref_node *existing;
692 struct btrfs_delayed_data_ref *full_ref; 716 struct btrfs_delayed_data_ref *full_ref;
@@ -724,19 +748,21 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
724 748
725 trace_add_delayed_data_ref(ref, full_ref, action); 749 trace_add_delayed_data_ref(ref, full_ref, action);
726 750
727 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 751 spin_lock(&head_ref->lock);
728 752 existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
729 if (existing) { 753 if (existing) {
730 update_existing_ref(trans, delayed_refs, existing, ref); 754 update_existing_ref(trans, delayed_refs, head_ref, existing,
755 ref);
731 /* 756 /*
732 * we've updated the existing ref, free the newly 757 * we've updated the existing ref, free the newly
733 * allocated ref 758 * allocated ref
734 */ 759 */
735 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); 760 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
736 } else { 761 } else {
737 delayed_refs->num_entries++; 762 atomic_inc(&delayed_refs->num_entries);
738 trans->delayed_ref_updates++; 763 trans->delayed_ref_updates++;
739 } 764 }
765 spin_unlock(&head_ref->lock);
740} 766}
741 767
742/* 768/*
@@ -775,10 +801,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
775 * insert both the head node and the new ref without dropping 801 * insert both the head node and the new ref without dropping
776 * the spin lock 802 * the spin lock
777 */ 803 */
778 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 804 head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
779 num_bytes, action, 0); 805 bytenr, num_bytes, action, 0);
780 806
781 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 807 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
782 num_bytes, parent, ref_root, level, action, 808 num_bytes, parent, ref_root, level, action,
783 for_cow); 809 for_cow);
784 spin_unlock(&delayed_refs->lock); 810 spin_unlock(&delayed_refs->lock);
@@ -823,10 +849,10 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
823 * insert both the head node and the new ref without dropping 849 * insert both the head node and the new ref without dropping
824 * the spin lock 850 * the spin lock
825 */ 851 */
826 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 852 head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
827 num_bytes, action, 1); 853 bytenr, num_bytes, action, 1);
828 854
829 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 855 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
830 num_bytes, parent, ref_root, owner, offset, 856 num_bytes, parent, ref_root, owner, offset,
831 action, for_cow); 857 action, for_cow);
832 spin_unlock(&delayed_refs->lock); 858 spin_unlock(&delayed_refs->lock);
@@ -869,14 +895,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
869struct btrfs_delayed_ref_head * 895struct btrfs_delayed_ref_head *
870btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) 896btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
871{ 897{
872 struct btrfs_delayed_ref_node *ref;
873 struct btrfs_delayed_ref_root *delayed_refs; 898 struct btrfs_delayed_ref_root *delayed_refs;
874 899
875 delayed_refs = &trans->transaction->delayed_refs; 900 delayed_refs = &trans->transaction->delayed_refs;
876 ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0); 901 return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0);
877 if (ref)
878 return btrfs_delayed_node_to_head(ref);
879 return NULL;
880} 902}
881 903
882void btrfs_delayed_ref_exit(void) 904void btrfs_delayed_ref_exit(void)
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 70b962cc177d..4ba9b93022ff 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -81,7 +81,10 @@ struct btrfs_delayed_ref_head {
81 */ 81 */
82 struct mutex mutex; 82 struct mutex mutex;
83 83
84 struct list_head cluster; 84 spinlock_t lock;
85 struct rb_root ref_root;
86
87 struct rb_node href_node;
85 88
86 struct btrfs_delayed_extent_op *extent_op; 89 struct btrfs_delayed_extent_op *extent_op;
87 /* 90 /*
@@ -98,6 +101,7 @@ struct btrfs_delayed_ref_head {
98 */ 101 */
99 unsigned int must_insert_reserved:1; 102 unsigned int must_insert_reserved:1;
100 unsigned int is_data:1; 103 unsigned int is_data:1;
104 unsigned int processing:1;
101}; 105};
102 106
103struct btrfs_delayed_tree_ref { 107struct btrfs_delayed_tree_ref {
@@ -116,7 +120,8 @@ struct btrfs_delayed_data_ref {
116}; 120};
117 121
118struct btrfs_delayed_ref_root { 122struct btrfs_delayed_ref_root {
119 struct rb_root root; 123 /* head ref rbtree */
124 struct rb_root href_root;
120 125
121 /* this spin lock protects the rbtree and the entries inside */ 126 /* this spin lock protects the rbtree and the entries inside */
122 spinlock_t lock; 127 spinlock_t lock;
@@ -124,7 +129,7 @@ struct btrfs_delayed_ref_root {
124 /* how many delayed ref updates we've queued, used by the 129 /* how many delayed ref updates we've queued, used by the
125 * throttling code 130 * throttling code
126 */ 131 */
127 unsigned long num_entries; 132 atomic_t num_entries;
128 133
129 /* total number of head nodes in tree */ 134 /* total number of head nodes in tree */
130 unsigned long num_heads; 135 unsigned long num_heads;
@@ -133,15 +138,6 @@ struct btrfs_delayed_ref_root {
133 unsigned long num_heads_ready; 138 unsigned long num_heads_ready;
134 139
135 /* 140 /*
136 * bumped when someone is making progress on the delayed
137 * refs, so that other procs know they are just adding to
138 * contention intead of helping
139 */
140 atomic_t procs_running_refs;
141 atomic_t ref_seq;
142 wait_queue_head_t wait;
143
144 /*
145 * set when the tree is flushing before a transaction commit, 141 * set when the tree is flushing before a transaction commit,
146 * used by the throttling code to decide if new updates need 142 * used by the throttling code to decide if new updates need
147 * to be run right away 143 * to be run right away
@@ -226,9 +222,9 @@ static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
226 mutex_unlock(&head->mutex); 222 mutex_unlock(&head->mutex);
227} 223}
228 224
229int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 225
230 struct list_head *cluster, u64 search_start); 226struct btrfs_delayed_ref_head *
231void btrfs_release_ref_cluster(struct list_head *cluster); 227btrfs_select_ref_head(struct btrfs_trans_handle *trans);
232 228
233int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, 229int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
234 struct btrfs_delayed_ref_root *delayed_refs, 230 struct btrfs_delayed_ref_root *delayed_refs,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2cfc3dfff64f..564c92638b20 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -102,7 +102,8 @@ no_valid_dev_replace_entry_found:
102 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item); 102 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item);
103 103
104 if (item_size != sizeof(struct btrfs_dev_replace_item)) { 104 if (item_size != sizeof(struct btrfs_dev_replace_item)) {
105 pr_warn("btrfs: dev_replace entry found has unexpected size, ignore entry\n"); 105 btrfs_warn(fs_info,
106 "dev_replace entry found has unexpected size, ignore entry");
106 goto no_valid_dev_replace_entry_found; 107 goto no_valid_dev_replace_entry_found;
107 } 108 }
108 109
@@ -145,13 +146,19 @@ no_valid_dev_replace_entry_found:
145 if (!dev_replace->srcdev && 146 if (!dev_replace->srcdev &&
146 !btrfs_test_opt(dev_root, DEGRADED)) { 147 !btrfs_test_opt(dev_root, DEGRADED)) {
147 ret = -EIO; 148 ret = -EIO;
148 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", 149 btrfs_warn(fs_info,
149 src_devid); 150 "cannot mount because device replace operation is ongoing and");
151 btrfs_warn(fs_info,
152 "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
153 src_devid);
150 } 154 }
151 if (!dev_replace->tgtdev && 155 if (!dev_replace->tgtdev &&
152 !btrfs_test_opt(dev_root, DEGRADED)) { 156 !btrfs_test_opt(dev_root, DEGRADED)) {
153 ret = -EIO; 157 ret = -EIO;
154 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", 158 btrfs_warn(fs_info,
159 "cannot mount because device replace operation is ongoing and");
160 btrfs_warn(fs_info,
161 "tgtdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
155 BTRFS_DEV_REPLACE_DEVID); 162 BTRFS_DEV_REPLACE_DEVID);
156 } 163 }
157 if (dev_replace->tgtdev) { 164 if (dev_replace->tgtdev) {
@@ -210,7 +217,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
210 } 217 }
211 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 218 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
212 if (ret < 0) { 219 if (ret < 0) {
213 pr_warn("btrfs: error %d while searching for dev_replace item!\n", 220 btrfs_warn(fs_info, "error %d while searching for dev_replace item!",
214 ret); 221 ret);
215 goto out; 222 goto out;
216 } 223 }
@@ -230,7 +237,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
230 */ 237 */
231 ret = btrfs_del_item(trans, dev_root, path); 238 ret = btrfs_del_item(trans, dev_root, path);
232 if (ret != 0) { 239 if (ret != 0) {
233 pr_warn("btrfs: delete too small dev_replace item failed %d!\n", 240 btrfs_warn(fs_info, "delete too small dev_replace item failed %d!",
234 ret); 241 ret);
235 goto out; 242 goto out;
236 } 243 }
@@ -243,7 +250,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
243 ret = btrfs_insert_empty_item(trans, dev_root, path, 250 ret = btrfs_insert_empty_item(trans, dev_root, path,
244 &key, sizeof(*ptr)); 251 &key, sizeof(*ptr));
245 if (ret < 0) { 252 if (ret < 0) {
246 pr_warn("btrfs: insert dev_replace item failed %d!\n", 253 btrfs_warn(fs_info, "insert dev_replace item failed %d!",
247 ret); 254 ret);
248 goto out; 255 goto out;
249 } 256 }
@@ -305,7 +312,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
305 struct btrfs_device *src_device = NULL; 312 struct btrfs_device *src_device = NULL;
306 313
307 if (btrfs_fs_incompat(fs_info, RAID56)) { 314 if (btrfs_fs_incompat(fs_info, RAID56)) {
308 pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n"); 315 btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6");
309 return -EINVAL; 316 return -EINVAL;
310 } 317 }
311 318
@@ -325,7 +332,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
325 ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, 332 ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name,
326 &tgt_device); 333 &tgt_device);
327 if (ret) { 334 if (ret) {
328 pr_err("btrfs: target device %s is invalid!\n", 335 btrfs_err(fs_info, "target device %s is invalid!",
329 args->start.tgtdev_name); 336 args->start.tgtdev_name);
330 mutex_unlock(&fs_info->volume_mutex); 337 mutex_unlock(&fs_info->volume_mutex);
331 return -EINVAL; 338 return -EINVAL;
@@ -341,7 +348,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
341 } 348 }
342 349
343 if (tgt_device->total_bytes < src_device->total_bytes) { 350 if (tgt_device->total_bytes < src_device->total_bytes) {
344 pr_err("btrfs: target device is smaller than source device!\n"); 351 btrfs_err(fs_info, "target device is smaller than source device!");
345 ret = -EINVAL; 352 ret = -EINVAL;
346 goto leave_no_lock; 353 goto leave_no_lock;
347 } 354 }
@@ -366,7 +373,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
366 dev_replace->tgtdev = tgt_device; 373 dev_replace->tgtdev = tgt_device;
367 374
368 printk_in_rcu(KERN_INFO 375 printk_in_rcu(KERN_INFO
369 "btrfs: dev_replace from %s (devid %llu) to %s started\n", 376 "BTRFS: dev_replace from %s (devid %llu) to %s started\n",
370 src_device->missing ? "<missing disk>" : 377 src_device->missing ? "<missing disk>" :
371 rcu_str_deref(src_device->name), 378 rcu_str_deref(src_device->name),
372 src_device->devid, 379 src_device->devid,
@@ -489,7 +496,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
489 496
490 if (scrub_ret) { 497 if (scrub_ret) {
491 printk_in_rcu(KERN_ERR 498 printk_in_rcu(KERN_ERR
492 "btrfs: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", 499 "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
493 src_device->missing ? "<missing disk>" : 500 src_device->missing ? "<missing disk>" :
494 rcu_str_deref(src_device->name), 501 rcu_str_deref(src_device->name),
495 src_device->devid, 502 src_device->devid,
@@ -504,7 +511,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
504 } 511 }
505 512
506 printk_in_rcu(KERN_INFO 513 printk_in_rcu(KERN_INFO
507 "btrfs: dev_replace from %s (devid %llu) to %s) finished\n", 514 "BTRFS: dev_replace from %s (devid %llu) to %s) finished\n",
508 src_device->missing ? "<missing disk>" : 515 src_device->missing ? "<missing disk>" :
509 rcu_str_deref(src_device->name), 516 rcu_str_deref(src_device->name),
510 src_device->devid, 517 src_device->devid,
@@ -699,7 +706,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
699 BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; 706 BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
700 dev_replace->time_stopped = get_seconds(); 707 dev_replace->time_stopped = get_seconds();
701 dev_replace->item_needs_writeback = 1; 708 dev_replace->item_needs_writeback = 1;
702 pr_info("btrfs: suspending dev_replace for unmount\n"); 709 btrfs_info(fs_info, "suspending dev_replace for unmount");
703 break; 710 break;
704 } 711 }
705 712
@@ -728,8 +735,9 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
728 break; 735 break;
729 } 736 }
730 if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) { 737 if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) {
731 pr_info("btrfs: cannot continue dev_replace, tgtdev is missing\n" 738 btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
732 "btrfs: you may cancel the operation after 'mount -o degraded'\n"); 739 btrfs_info(fs_info,
740 "you may cancel the operation after 'mount -o degraded'");
733 btrfs_dev_replace_unlock(dev_replace); 741 btrfs_dev_replace_unlock(dev_replace);
734 return 0; 742 return 0;
735 } 743 }
@@ -755,14 +763,14 @@ static int btrfs_dev_replace_kthread(void *data)
755 kfree(status_args); 763 kfree(status_args);
756 do_div(progress, 10); 764 do_div(progress, 10);
757 printk_in_rcu(KERN_INFO 765 printk_in_rcu(KERN_INFO
758 "btrfs: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", 766 "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
759 dev_replace->srcdev->missing ? "<missing disk>" : 767 dev_replace->srcdev->missing ? "<missing disk>" :
760 rcu_str_deref(dev_replace->srcdev->name), 768 rcu_str_deref(dev_replace->srcdev->name),
761 dev_replace->srcdev->devid, 769 dev_replace->srcdev->devid,
762 dev_replace->tgtdev ? 770 dev_replace->tgtdev ?
763 rcu_str_deref(dev_replace->tgtdev->name) : 771 rcu_str_deref(dev_replace->tgtdev->name) :
764 "<missing target disk>", 772 "<missing target disk>",
765 (unsigned int)progress); 773 (unsigned int)progress);
766 } 774 }
767 btrfs_dev_replace_continue_on_mount(fs_info); 775 btrfs_dev_replace_continue_on_mount(fs_info);
768 atomic_set(&fs_info->mutually_exclusive_operation_running, 0); 776 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c031ea3fd70f..a0691df5dcea 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -261,7 +261,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
261 * see if there is room in the item to insert this 261 * see if there is room in the item to insert this
262 * name 262 * name
263 */ 263 */
264 data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item); 264 data_size = sizeof(*di) + name_len;
265 leaf = path->nodes[0]; 265 leaf = path->nodes[0];
266 slot = path->slots[0]; 266 slot = path->slots[0];
267 if (data_size + btrfs_item_size_nr(leaf, slot) + 267 if (data_size + btrfs_item_size_nr(leaf, slot) +
@@ -459,7 +459,7 @@ int verify_dir_item(struct btrfs_root *root,
459 u8 type = btrfs_dir_type(leaf, dir_item); 459 u8 type = btrfs_dir_type(leaf, dir_item);
460 460
461 if (type >= BTRFS_FT_MAX) { 461 if (type >= BTRFS_FT_MAX) {
462 printk(KERN_CRIT "btrfs: invalid dir item type: %d\n", 462 btrfs_crit(root->fs_info, "invalid dir item type: %d",
463 (int)type); 463 (int)type);
464 return 1; 464 return 1;
465 } 465 }
@@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_root *root,
468 namelen = XATTR_NAME_MAX; 468 namelen = XATTR_NAME_MAX;
469 469
470 if (btrfs_dir_name_len(leaf, dir_item) > namelen) { 470 if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
471 printk(KERN_CRIT "btrfs: invalid dir item name len: %u\n", 471 btrfs_crit(root->fs_info, "invalid dir item name len: %u",
472 (unsigned)btrfs_dir_data_len(leaf, dir_item)); 472 (unsigned)btrfs_dir_data_len(leaf, dir_item));
473 return 1; 473 return 1;
474 } 474 }
@@ -476,7 +476,7 @@ int verify_dir_item(struct btrfs_root *root,
476 /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ 476 /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
477 if ((btrfs_dir_data_len(leaf, dir_item) + 477 if ((btrfs_dir_data_len(leaf, dir_item) +
478 btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) { 478 btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
479 printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n", 479 btrfs_crit(root->fs_info, "invalid dir item name + data len: %u + %u",
480 (unsigned)btrfs_dir_name_len(leaf, dir_item), 480 (unsigned)btrfs_dir_name_len(leaf, dir_item),
481 (unsigned)btrfs_dir_data_len(leaf, dir_item)); 481 (unsigned)btrfs_dir_data_len(leaf, dir_item));
482 return 1; 482 return 1;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e71039ea66cf..0e69295d0031 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -48,6 +48,7 @@
48#include "rcu-string.h" 48#include "rcu-string.h"
49#include "dev-replace.h" 49#include "dev-replace.h"
50#include "raid56.h" 50#include "raid56.h"
51#include "sysfs.h"
51 52
52#ifdef CONFIG_X86 53#ifdef CONFIG_X86
53#include <asm/cpufeature.h> 54#include <asm/cpufeature.h>
@@ -299,11 +300,11 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
299 memcpy(&found, result, csum_size); 300 memcpy(&found, result, csum_size);
300 301
301 read_extent_buffer(buf, &val, 0, csum_size); 302 read_extent_buffer(buf, &val, 0, csum_size);
302 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " 303 printk_ratelimited(KERN_INFO
303 "failed on %llu wanted %X found %X " 304 "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
304 "level %d\n", 305 "level %d\n",
305 root->fs_info->sb->s_id, buf->start, 306 root->fs_info->sb->s_id, buf->start,
306 val, found, btrfs_header_level(buf)); 307 val, found, btrfs_header_level(buf));
307 if (result != (char *)&inline_result) 308 if (result != (char *)&inline_result)
308 kfree(result); 309 kfree(result);
309 return 1; 310 return 1;
@@ -382,13 +383,14 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
382 ret = 1; 383 ret = 1;
383 384
384 if (ret && btrfs_super_generation(disk_sb) < 10) { 385 if (ret && btrfs_super_generation(disk_sb) < 10) {
385 printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n"); 386 printk(KERN_WARNING
387 "BTRFS: super block crcs don't match, older mkfs detected\n");
386 ret = 0; 388 ret = 0;
387 } 389 }
388 } 390 }
389 391
390 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { 392 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
391 printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n", 393 printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n",
392 csum_type); 394 csum_type);
393 ret = 1; 395 ret = 1;
394 } 396 }
@@ -464,13 +466,10 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
464 466
465static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 467static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
466{ 468{
467 struct extent_io_tree *tree;
468 u64 start = page_offset(page); 469 u64 start = page_offset(page);
469 u64 found_start; 470 u64 found_start;
470 struct extent_buffer *eb; 471 struct extent_buffer *eb;
471 472
472 tree = &BTRFS_I(page->mapping->host)->io_tree;
473
474 eb = (struct extent_buffer *)page->private; 473 eb = (struct extent_buffer *)page->private;
475 if (page != eb->pages[0]) 474 if (page != eb->pages[0])
476 return 0; 475 return 0;
@@ -500,8 +499,8 @@ static int check_tree_block_fsid(struct btrfs_root *root,
500} 499}
501 500
502#define CORRUPT(reason, eb, root, slot) \ 501#define CORRUPT(reason, eb, root, slot) \
503 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ 502 btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \
504 "root=%llu, slot=%d\n", reason, \ 503 "root=%llu, slot=%d", reason, \
505 btrfs_header_bytenr(eb), root->objectid, slot) 504 btrfs_header_bytenr(eb), root->objectid, slot)
506 505
507static noinline int check_leaf(struct btrfs_root *root, 506static noinline int check_leaf(struct btrfs_root *root,
@@ -569,7 +568,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
569 u64 phy_offset, struct page *page, 568 u64 phy_offset, struct page *page,
570 u64 start, u64 end, int mirror) 569 u64 start, u64 end, int mirror)
571{ 570{
572 struct extent_io_tree *tree;
573 u64 found_start; 571 u64 found_start;
574 int found_level; 572 int found_level;
575 struct extent_buffer *eb; 573 struct extent_buffer *eb;
@@ -580,7 +578,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
580 if (!page->private) 578 if (!page->private)
581 goto out; 579 goto out;
582 580
583 tree = &BTRFS_I(page->mapping->host)->io_tree;
584 eb = (struct extent_buffer *)page->private; 581 eb = (struct extent_buffer *)page->private;
585 582
586 /* the pending IO might have been the only thing that kept this buffer 583 /* the pending IO might have been the only thing that kept this buffer
@@ -600,21 +597,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
600 597
601 found_start = btrfs_header_bytenr(eb); 598 found_start = btrfs_header_bytenr(eb);
602 if (found_start != eb->start) { 599 if (found_start != eb->start) {
603 printk_ratelimited(KERN_INFO "btrfs bad tree block start " 600 printk_ratelimited(KERN_INFO "BTRFS: bad tree block start "
604 "%llu %llu\n", 601 "%llu %llu\n",
605 found_start, eb->start); 602 found_start, eb->start);
606 ret = -EIO; 603 ret = -EIO;
607 goto err; 604 goto err;
608 } 605 }
609 if (check_tree_block_fsid(root, eb)) { 606 if (check_tree_block_fsid(root, eb)) {
610 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 607 printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n",
611 eb->start); 608 eb->start);
612 ret = -EIO; 609 ret = -EIO;
613 goto err; 610 goto err;
614 } 611 }
615 found_level = btrfs_header_level(eb); 612 found_level = btrfs_header_level(eb);
616 if (found_level >= BTRFS_MAX_LEVEL) { 613 if (found_level >= BTRFS_MAX_LEVEL) {
617 btrfs_info(root->fs_info, "bad tree block level %d\n", 614 btrfs_info(root->fs_info, "bad tree block level %d",
618 (int)btrfs_header_level(eb)); 615 (int)btrfs_header_level(eb));
619 ret = -EIO; 616 ret = -EIO;
620 goto err; 617 goto err;
@@ -964,11 +961,9 @@ static int btree_migratepage(struct address_space *mapping,
964static int btree_writepages(struct address_space *mapping, 961static int btree_writepages(struct address_space *mapping,
965 struct writeback_control *wbc) 962 struct writeback_control *wbc)
966{ 963{
967 struct extent_io_tree *tree;
968 struct btrfs_fs_info *fs_info; 964 struct btrfs_fs_info *fs_info;
969 int ret; 965 int ret;
970 966
971 tree = &BTRFS_I(mapping->host)->io_tree;
972 if (wbc->sync_mode == WB_SYNC_NONE) { 967 if (wbc->sync_mode == WB_SYNC_NONE) {
973 968
974 if (wbc->for_kupdate) 969 if (wbc->for_kupdate)
@@ -1007,8 +1002,9 @@ static void btree_invalidatepage(struct page *page, unsigned int offset,
1007 extent_invalidatepage(tree, page, offset); 1002 extent_invalidatepage(tree, page, offset);
1008 btree_releasepage(page, GFP_NOFS); 1003 btree_releasepage(page, GFP_NOFS);
1009 if (PagePrivate(page)) { 1004 if (PagePrivate(page)) {
1010 printk(KERN_WARNING "btrfs warning page private not zero " 1005 btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info,
1011 "on page %llu\n", (unsigned long long)page_offset(page)); 1006 "page private not zero on page %llu",
1007 (unsigned long long)page_offset(page));
1012 ClearPagePrivate(page); 1008 ClearPagePrivate(page);
1013 set_page_private(page, 0); 1009 set_page_private(page, 0);
1014 page_cache_release(page); 1010 page_cache_release(page);
@@ -1092,21 +1088,13 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
1092struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 1088struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
1093 u64 bytenr, u32 blocksize) 1089 u64 bytenr, u32 blocksize)
1094{ 1090{
1095 struct inode *btree_inode = root->fs_info->btree_inode; 1091 return find_extent_buffer(root->fs_info, bytenr);
1096 struct extent_buffer *eb;
1097 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
1098 return eb;
1099} 1092}
1100 1093
1101struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 1094struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
1102 u64 bytenr, u32 blocksize) 1095 u64 bytenr, u32 blocksize)
1103{ 1096{
1104 struct inode *btree_inode = root->fs_info->btree_inode; 1097 return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
1105 struct extent_buffer *eb;
1106
1107 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
1108 bytenr, blocksize);
1109 return eb;
1110} 1098}
1111 1099
1112 1100
@@ -1270,7 +1258,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1270 struct btrfs_root *root; 1258 struct btrfs_root *root;
1271 struct btrfs_key key; 1259 struct btrfs_key key;
1272 int ret = 0; 1260 int ret = 0;
1273 u64 bytenr;
1274 uuid_le uuid; 1261 uuid_le uuid;
1275 1262
1276 root = btrfs_alloc_root(fs_info); 1263 root = btrfs_alloc_root(fs_info);
@@ -1292,7 +1279,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1292 goto fail; 1279 goto fail;
1293 } 1280 }
1294 1281
1295 bytenr = leaf->start;
1296 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 1282 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
1297 btrfs_set_header_bytenr(leaf, leaf->start); 1283 btrfs_set_header_bytenr(leaf, leaf->start);
1298 btrfs_set_header_generation(leaf, trans->transid); 1284 btrfs_set_header_generation(leaf, trans->transid);
@@ -1613,7 +1599,8 @@ again:
1613 if (ret) 1599 if (ret)
1614 goto fail; 1600 goto fail;
1615 1601
1616 ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); 1602 ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
1603 location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
1617 if (ret < 0) 1604 if (ret < 0)
1618 goto fail; 1605 goto fail;
1619 if (ret == 0) 1606 if (ret == 0)
@@ -1681,12 +1668,10 @@ static void end_workqueue_fn(struct btrfs_work *work)
1681{ 1668{
1682 struct bio *bio; 1669 struct bio *bio;
1683 struct end_io_wq *end_io_wq; 1670 struct end_io_wq *end_io_wq;
1684 struct btrfs_fs_info *fs_info;
1685 int error; 1671 int error;
1686 1672
1687 end_io_wq = container_of(work, struct end_io_wq, work); 1673 end_io_wq = container_of(work, struct end_io_wq, work);
1688 bio = end_io_wq->bio; 1674 bio = end_io_wq->bio;
1689 fs_info = end_io_wq->info;
1690 1675
1691 error = end_io_wq->error; 1676 error = end_io_wq->error;
1692 bio->bi_private = end_io_wq->private; 1677 bio->bi_private = end_io_wq->private;
@@ -2077,6 +2062,12 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
2077 for (i = 0; i < ret; i++) 2062 for (i = 0; i < ret; i++)
2078 btrfs_drop_and_free_fs_root(fs_info, gang[i]); 2063 btrfs_drop_and_free_fs_root(fs_info, gang[i]);
2079 } 2064 }
2065
2066 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
2067 btrfs_free_log_root_tree(NULL, fs_info);
2068 btrfs_destroy_pinned_extent(fs_info->tree_root,
2069 fs_info->pinned_extents);
2070 }
2080} 2071}
2081 2072
2082int open_ctree(struct super_block *sb, 2073int open_ctree(struct super_block *sb,
@@ -2151,6 +2142,7 @@ int open_ctree(struct super_block *sb,
2151 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); 2142 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
2152 2143
2153 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 2144 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
2145 INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
2154 INIT_LIST_HEAD(&fs_info->trans_list); 2146 INIT_LIST_HEAD(&fs_info->trans_list);
2155 INIT_LIST_HEAD(&fs_info->dead_roots); 2147 INIT_LIST_HEAD(&fs_info->dead_roots);
2156 INIT_LIST_HEAD(&fs_info->delayed_iputs); 2148 INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2164,6 +2156,7 @@ int open_ctree(struct super_block *sb,
2164 spin_lock_init(&fs_info->free_chunk_lock); 2156 spin_lock_init(&fs_info->free_chunk_lock);
2165 spin_lock_init(&fs_info->tree_mod_seq_lock); 2157 spin_lock_init(&fs_info->tree_mod_seq_lock);
2166 spin_lock_init(&fs_info->super_lock); 2158 spin_lock_init(&fs_info->super_lock);
2159 spin_lock_init(&fs_info->buffer_lock);
2167 rwlock_init(&fs_info->tree_mod_log_lock); 2160 rwlock_init(&fs_info->tree_mod_log_lock);
2168 mutex_init(&fs_info->reloc_mutex); 2161 mutex_init(&fs_info->reloc_mutex);
2169 seqlock_init(&fs_info->profiles_lock); 2162 seqlock_init(&fs_info->profiles_lock);
@@ -2195,7 +2188,7 @@ int open_ctree(struct super_block *sb,
2195 fs_info->free_chunk_space = 0; 2188 fs_info->free_chunk_space = 0;
2196 fs_info->tree_mod_log = RB_ROOT; 2189 fs_info->tree_mod_log = RB_ROOT;
2197 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 2190 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
2198 2191 fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
2199 /* readahead state */ 2192 /* readahead state */
2200 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2193 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
2201 spin_lock_init(&fs_info->reada_lock); 2194 spin_lock_init(&fs_info->reada_lock);
@@ -2334,7 +2327,7 @@ int open_ctree(struct super_block *sb,
2334 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). 2327 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
2335 */ 2328 */
2336 if (btrfs_check_super_csum(bh->b_data)) { 2329 if (btrfs_check_super_csum(bh->b_data)) {
2337 printk(KERN_ERR "btrfs: superblock checksum mismatch\n"); 2330 printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
2338 err = -EINVAL; 2331 err = -EINVAL;
2339 goto fail_alloc; 2332 goto fail_alloc;
2340 } 2333 }
@@ -2353,7 +2346,7 @@ int open_ctree(struct super_block *sb,
2353 2346
2354 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 2347 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2355 if (ret) { 2348 if (ret) {
2356 printk(KERN_ERR "btrfs: superblock contains fatal errors\n"); 2349 printk(KERN_ERR "BTRFS: superblock contains fatal errors\n");
2357 err = -EINVAL; 2350 err = -EINVAL;
2358 goto fail_alloc; 2351 goto fail_alloc;
2359 } 2352 }
@@ -2418,7 +2411,7 @@ int open_ctree(struct super_block *sb,
2418 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2411 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2419 2412
2420 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 2413 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
2421 printk(KERN_ERR "btrfs: has skinny extents\n"); 2414 printk(KERN_ERR "BTRFS: has skinny extents\n");
2422 2415
2423 /* 2416 /*
2424 * flag our filesystem as having big metadata blocks if 2417 * flag our filesystem as having big metadata blocks if
@@ -2426,7 +2419,7 @@ int open_ctree(struct super_block *sb,
2426 */ 2419 */
2427 if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { 2420 if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
2428 if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) 2421 if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
2429 printk(KERN_INFO "btrfs flagging fs with big metadata feature\n"); 2422 printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
2430 features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; 2423 features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
2431 } 2424 }
2432 2425
@@ -2443,7 +2436,7 @@ int open_ctree(struct super_block *sb,
2443 */ 2436 */
2444 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && 2437 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
2445 (sectorsize != leafsize)) { 2438 (sectorsize != leafsize)) {
2446 printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes " 2439 printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
2447 "are not allowed for mixed block groups on %s\n", 2440 "are not allowed for mixed block groups on %s\n",
2448 sb->s_id); 2441 sb->s_id);
2449 goto fail_alloc; 2442 goto fail_alloc;
@@ -2580,12 +2573,12 @@ int open_ctree(struct super_block *sb,
2580 sb->s_blocksize_bits = blksize_bits(sectorsize); 2573 sb->s_blocksize_bits = blksize_bits(sectorsize);
2581 2574
2582 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { 2575 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
2583 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); 2576 printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
2584 goto fail_sb_buffer; 2577 goto fail_sb_buffer;
2585 } 2578 }
2586 2579
2587 if (sectorsize != PAGE_SIZE) { 2580 if (sectorsize != PAGE_SIZE) {
2588 printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) " 2581 printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
2589 "found on %s\n", (unsigned long)sectorsize, sb->s_id); 2582 "found on %s\n", (unsigned long)sectorsize, sb->s_id);
2590 goto fail_sb_buffer; 2583 goto fail_sb_buffer;
2591 } 2584 }
@@ -2594,7 +2587,7 @@ int open_ctree(struct super_block *sb,
2594 ret = btrfs_read_sys_array(tree_root); 2587 ret = btrfs_read_sys_array(tree_root);
2595 mutex_unlock(&fs_info->chunk_mutex); 2588 mutex_unlock(&fs_info->chunk_mutex);
2596 if (ret) { 2589 if (ret) {
2597 printk(KERN_WARNING "btrfs: failed to read the system " 2590 printk(KERN_WARNING "BTRFS: failed to read the system "
2598 "array on %s\n", sb->s_id); 2591 "array on %s\n", sb->s_id);
2599 goto fail_sb_buffer; 2592 goto fail_sb_buffer;
2600 } 2593 }
@@ -2611,7 +2604,7 @@ int open_ctree(struct super_block *sb,
2611 blocksize, generation); 2604 blocksize, generation);
2612 if (!chunk_root->node || 2605 if (!chunk_root->node ||
2613 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2606 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
2614 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 2607 printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
2615 sb->s_id); 2608 sb->s_id);
2616 goto fail_tree_roots; 2609 goto fail_tree_roots;
2617 } 2610 }
@@ -2623,7 +2616,7 @@ int open_ctree(struct super_block *sb,
2623 2616
2624 ret = btrfs_read_chunk_tree(chunk_root); 2617 ret = btrfs_read_chunk_tree(chunk_root);
2625 if (ret) { 2618 if (ret) {
2626 printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", 2619 printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
2627 sb->s_id); 2620 sb->s_id);
2628 goto fail_tree_roots; 2621 goto fail_tree_roots;
2629 } 2622 }
@@ -2635,7 +2628,7 @@ int open_ctree(struct super_block *sb,
2635 btrfs_close_extra_devices(fs_info, fs_devices, 0); 2628 btrfs_close_extra_devices(fs_info, fs_devices, 0);
2636 2629
2637 if (!fs_devices->latest_bdev) { 2630 if (!fs_devices->latest_bdev) {
2638 printk(KERN_CRIT "btrfs: failed to read devices on %s\n", 2631 printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
2639 sb->s_id); 2632 sb->s_id);
2640 goto fail_tree_roots; 2633 goto fail_tree_roots;
2641 } 2634 }
@@ -2650,7 +2643,7 @@ retry_root_backup:
2650 blocksize, generation); 2643 blocksize, generation);
2651 if (!tree_root->node || 2644 if (!tree_root->node ||
2652 !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { 2645 !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
2653 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", 2646 printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
2654 sb->s_id); 2647 sb->s_id);
2655 2648
2656 goto recovery_tree_root; 2649 goto recovery_tree_root;
@@ -2721,50 +2714,56 @@ retry_root_backup:
2721 2714
2722 ret = btrfs_recover_balance(fs_info); 2715 ret = btrfs_recover_balance(fs_info);
2723 if (ret) { 2716 if (ret) {
2724 printk(KERN_WARNING "btrfs: failed to recover balance\n"); 2717 printk(KERN_WARNING "BTRFS: failed to recover balance\n");
2725 goto fail_block_groups; 2718 goto fail_block_groups;
2726 } 2719 }
2727 2720
2728 ret = btrfs_init_dev_stats(fs_info); 2721 ret = btrfs_init_dev_stats(fs_info);
2729 if (ret) { 2722 if (ret) {
2730 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", 2723 printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n",
2731 ret); 2724 ret);
2732 goto fail_block_groups; 2725 goto fail_block_groups;
2733 } 2726 }
2734 2727
2735 ret = btrfs_init_dev_replace(fs_info); 2728 ret = btrfs_init_dev_replace(fs_info);
2736 if (ret) { 2729 if (ret) {
2737 pr_err("btrfs: failed to init dev_replace: %d\n", ret); 2730 pr_err("BTRFS: failed to init dev_replace: %d\n", ret);
2738 goto fail_block_groups; 2731 goto fail_block_groups;
2739 } 2732 }
2740 2733
2741 btrfs_close_extra_devices(fs_info, fs_devices, 1); 2734 btrfs_close_extra_devices(fs_info, fs_devices, 1);
2742 2735
2743 ret = btrfs_init_space_info(fs_info); 2736 ret = btrfs_sysfs_add_one(fs_info);
2744 if (ret) { 2737 if (ret) {
2745 printk(KERN_ERR "Failed to initial space info: %d\n", ret); 2738 pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
2746 goto fail_block_groups; 2739 goto fail_block_groups;
2747 } 2740 }
2748 2741
2742 ret = btrfs_init_space_info(fs_info);
2743 if (ret) {
2744 printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
2745 goto fail_sysfs;
2746 }
2747
2749 ret = btrfs_read_block_groups(extent_root); 2748 ret = btrfs_read_block_groups(extent_root);
2750 if (ret) { 2749 if (ret) {
2751 printk(KERN_ERR "Failed to read block groups: %d\n", ret); 2750 printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
2752 goto fail_block_groups; 2751 goto fail_sysfs;
2753 } 2752 }
2754 fs_info->num_tolerated_disk_barrier_failures = 2753 fs_info->num_tolerated_disk_barrier_failures =
2755 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); 2754 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
2756 if (fs_info->fs_devices->missing_devices > 2755 if (fs_info->fs_devices->missing_devices >
2757 fs_info->num_tolerated_disk_barrier_failures && 2756 fs_info->num_tolerated_disk_barrier_failures &&
2758 !(sb->s_flags & MS_RDONLY)) { 2757 !(sb->s_flags & MS_RDONLY)) {
2759 printk(KERN_WARNING 2758 printk(KERN_WARNING "BTRFS: "
2760 "Btrfs: too many missing devices, writeable mount is not allowed\n"); 2759 "too many missing devices, writeable mount is not allowed\n");
2761 goto fail_block_groups; 2760 goto fail_sysfs;
2762 } 2761 }
2763 2762
2764 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 2763 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
2765 "btrfs-cleaner"); 2764 "btrfs-cleaner");
2766 if (IS_ERR(fs_info->cleaner_kthread)) 2765 if (IS_ERR(fs_info->cleaner_kthread))
2767 goto fail_block_groups; 2766 goto fail_sysfs;
2768 2767
2769 fs_info->transaction_kthread = kthread_run(transaction_kthread, 2768 fs_info->transaction_kthread = kthread_run(transaction_kthread,
2770 tree_root, 2769 tree_root,
@@ -2775,11 +2774,15 @@ retry_root_backup:
2775 if (!btrfs_test_opt(tree_root, SSD) && 2774 if (!btrfs_test_opt(tree_root, SSD) &&
2776 !btrfs_test_opt(tree_root, NOSSD) && 2775 !btrfs_test_opt(tree_root, NOSSD) &&
2777 !fs_info->fs_devices->rotating) { 2776 !fs_info->fs_devices->rotating) {
2778 printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD " 2777 printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD "
2779 "mode\n"); 2778 "mode\n");
2780 btrfs_set_opt(fs_info->mount_opt, SSD); 2779 btrfs_set_opt(fs_info->mount_opt, SSD);
2781 } 2780 }
2782 2781
2782 /* Set the real inode map cache flag */
2783 if (btrfs_test_opt(tree_root, CHANGE_INODE_CACHE))
2784 btrfs_set_opt(tree_root->fs_info->mount_opt, INODE_MAP_CACHE);
2785
2783#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 2786#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2784 if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) { 2787 if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
2785 ret = btrfsic_mount(tree_root, fs_devices, 2788 ret = btrfsic_mount(tree_root, fs_devices,
@@ -2788,7 +2791,7 @@ retry_root_backup:
2788 1 : 0, 2791 1 : 0,
2789 fs_info->check_integrity_print_mask); 2792 fs_info->check_integrity_print_mask);
2790 if (ret) 2793 if (ret)
2791 printk(KERN_WARNING "btrfs: failed to initialize" 2794 printk(KERN_WARNING "BTRFS: failed to initialize"
2792 " integrity check module %s\n", sb->s_id); 2795 " integrity check module %s\n", sb->s_id);
2793 } 2796 }
2794#endif 2797#endif
@@ -2801,7 +2804,7 @@ retry_root_backup:
2801 u64 bytenr = btrfs_super_log_root(disk_super); 2804 u64 bytenr = btrfs_super_log_root(disk_super);
2802 2805
2803 if (fs_devices->rw_devices == 0) { 2806 if (fs_devices->rw_devices == 0) {
2804 printk(KERN_WARNING "Btrfs log replay required " 2807 printk(KERN_WARNING "BTRFS: log replay required "
2805 "on RO media\n"); 2808 "on RO media\n");
2806 err = -EIO; 2809 err = -EIO;
2807 goto fail_qgroup; 2810 goto fail_qgroup;
@@ -2824,7 +2827,7 @@ retry_root_backup:
2824 generation + 1); 2827 generation + 1);
2825 if (!log_tree_root->node || 2828 if (!log_tree_root->node ||
2826 !extent_buffer_uptodate(log_tree_root->node)) { 2829 !extent_buffer_uptodate(log_tree_root->node)) {
2827 printk(KERN_ERR "btrfs: failed to read log tree\n"); 2830 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2828 free_extent_buffer(log_tree_root->node); 2831 free_extent_buffer(log_tree_root->node);
2829 kfree(log_tree_root); 2832 kfree(log_tree_root);
2830 goto fail_trans_kthread; 2833 goto fail_trans_kthread;
@@ -2858,7 +2861,7 @@ retry_root_backup:
2858 ret = btrfs_recover_relocation(tree_root); 2861 ret = btrfs_recover_relocation(tree_root);
2859 if (ret < 0) { 2862 if (ret < 0) {
2860 printk(KERN_WARNING 2863 printk(KERN_WARNING
2861 "btrfs: failed to recover relocation\n"); 2864 "BTRFS: failed to recover relocation\n");
2862 err = -EINVAL; 2865 err = -EINVAL;
2863 goto fail_qgroup; 2866 goto fail_qgroup;
2864 } 2867 }
@@ -2888,14 +2891,14 @@ retry_root_backup:
2888 2891
2889 ret = btrfs_resume_balance_async(fs_info); 2892 ret = btrfs_resume_balance_async(fs_info);
2890 if (ret) { 2893 if (ret) {
2891 printk(KERN_WARNING "btrfs: failed to resume balance\n"); 2894 printk(KERN_WARNING "BTRFS: failed to resume balance\n");
2892 close_ctree(tree_root); 2895 close_ctree(tree_root);
2893 return ret; 2896 return ret;
2894 } 2897 }
2895 2898
2896 ret = btrfs_resume_dev_replace_async(fs_info); 2899 ret = btrfs_resume_dev_replace_async(fs_info);
2897 if (ret) { 2900 if (ret) {
2898 pr_warn("btrfs: failed to resume dev_replace\n"); 2901 pr_warn("BTRFS: failed to resume dev_replace\n");
2899 close_ctree(tree_root); 2902 close_ctree(tree_root);
2900 return ret; 2903 return ret;
2901 } 2904 }
@@ -2903,20 +2906,20 @@ retry_root_backup:
2903 btrfs_qgroup_rescan_resume(fs_info); 2906 btrfs_qgroup_rescan_resume(fs_info);
2904 2907
2905 if (create_uuid_tree) { 2908 if (create_uuid_tree) {
2906 pr_info("btrfs: creating UUID tree\n"); 2909 pr_info("BTRFS: creating UUID tree\n");
2907 ret = btrfs_create_uuid_tree(fs_info); 2910 ret = btrfs_create_uuid_tree(fs_info);
2908 if (ret) { 2911 if (ret) {
2909 pr_warn("btrfs: failed to create the UUID tree %d\n", 2912 pr_warn("BTRFS: failed to create the UUID tree %d\n",
2910 ret); 2913 ret);
2911 close_ctree(tree_root); 2914 close_ctree(tree_root);
2912 return ret; 2915 return ret;
2913 } 2916 }
2914 } else if (check_uuid_tree || 2917 } else if (check_uuid_tree ||
2915 btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { 2918 btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
2916 pr_info("btrfs: checking UUID tree\n"); 2919 pr_info("BTRFS: checking UUID tree\n");
2917 ret = btrfs_check_uuid_tree(fs_info); 2920 ret = btrfs_check_uuid_tree(fs_info);
2918 if (ret) { 2921 if (ret) {
2919 pr_warn("btrfs: failed to check the UUID tree %d\n", 2922 pr_warn("BTRFS: failed to check the UUID tree %d\n",
2920 ret); 2923 ret);
2921 close_ctree(tree_root); 2924 close_ctree(tree_root);
2922 return ret; 2925 return ret;
@@ -2942,6 +2945,9 @@ fail_cleaner:
2942 */ 2945 */
2943 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2946 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2944 2947
2948fail_sysfs:
2949 btrfs_sysfs_remove_one(fs_info);
2950
2945fail_block_groups: 2951fail_block_groups:
2946 btrfs_put_block_group_cache(fs_info); 2952 btrfs_put_block_group_cache(fs_info);
2947 btrfs_free_block_groups(fs_info); 2953 btrfs_free_block_groups(fs_info);
@@ -2997,7 +3003,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
2997 struct btrfs_device *device = (struct btrfs_device *) 3003 struct btrfs_device *device = (struct btrfs_device *)
2998 bh->b_private; 3004 bh->b_private;
2999 3005
3000 printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to " 3006 printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to "
3001 "I/O error on %s\n", 3007 "I/O error on %s\n",
3002 rcu_str_deref(device->name)); 3008 rcu_str_deref(device->name));
3003 /* note, we dont' set_buffer_write_io_error because we have 3009 /* note, we dont' set_buffer_write_io_error because we have
@@ -3116,7 +3122,7 @@ static int write_dev_supers(struct btrfs_device *device,
3116 bh = __getblk(device->bdev, bytenr / 4096, 3122 bh = __getblk(device->bdev, bytenr / 4096,
3117 BTRFS_SUPER_INFO_SIZE); 3123 BTRFS_SUPER_INFO_SIZE);
3118 if (!bh) { 3124 if (!bh) {
3119 printk(KERN_ERR "btrfs: couldn't get super " 3125 printk(KERN_ERR "BTRFS: couldn't get super "
3120 "buffer head for bytenr %Lu\n", bytenr); 3126 "buffer head for bytenr %Lu\n", bytenr);
3121 errors++; 3127 errors++;
3122 continue; 3128 continue;
@@ -3137,7 +3143,10 @@ static int write_dev_supers(struct btrfs_device *device,
3137 * we fua the first super. The others we allow 3143 * we fua the first super. The others we allow
3138 * to go down lazy. 3144 * to go down lazy.
3139 */ 3145 */
3140 ret = btrfsic_submit_bh(WRITE_FUA, bh); 3146 if (i == 0)
3147 ret = btrfsic_submit_bh(WRITE_FUA, bh);
3148 else
3149 ret = btrfsic_submit_bh(WRITE_SYNC, bh);
3141 if (ret) 3150 if (ret)
3142 errors++; 3151 errors++;
3143 } 3152 }
@@ -3183,7 +3192,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3183 wait_for_completion(&device->flush_wait); 3192 wait_for_completion(&device->flush_wait);
3184 3193
3185 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 3194 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
3186 printk_in_rcu("btrfs: disabling barriers on dev %s\n", 3195 printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
3187 rcu_str_deref(device->name)); 3196 rcu_str_deref(device->name));
3188 device->nobarriers = 1; 3197 device->nobarriers = 1;
3189 } else if (!bio_flagged(bio, BIO_UPTODATE)) { 3198 } else if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3404,7 +3413,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3404 total_errors++; 3413 total_errors++;
3405 } 3414 }
3406 if (total_errors > max_errors) { 3415 if (total_errors > max_errors) {
3407 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 3416 btrfs_err(root->fs_info, "%d errors while writing supers",
3408 total_errors); 3417 total_errors);
3409 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 3418 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3410 3419
@@ -3452,10 +3461,8 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
3452 if (btrfs_root_refs(&root->root_item) == 0) 3461 if (btrfs_root_refs(&root->root_item) == 0)
3453 synchronize_srcu(&fs_info->subvol_srcu); 3462 synchronize_srcu(&fs_info->subvol_srcu);
3454 3463
3455 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 3464 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3456 btrfs_free_log(NULL, root); 3465 btrfs_free_log(NULL, root);
3457 btrfs_free_log_root_tree(NULL, fs_info);
3458 }
3459 3466
3460 __btrfs_remove_free_space_cache(root->free_ino_pinned); 3467 __btrfs_remove_free_space_cache(root->free_ino_pinned);
3461 __btrfs_remove_free_space_cache(root->free_ino_ctl); 3468 __btrfs_remove_free_space_cache(root->free_ino_ctl);
@@ -3560,14 +3567,12 @@ int close_ctree(struct btrfs_root *root)
3560 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3567 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3561 ret = btrfs_commit_super(root); 3568 ret = btrfs_commit_super(root);
3562 if (ret) 3569 if (ret)
3563 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3570 btrfs_err(root->fs_info, "commit super ret %d", ret);
3564 } 3571 }
3565 3572
3566 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) 3573 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3567 btrfs_error_commit_super(root); 3574 btrfs_error_commit_super(root);
3568 3575
3569 btrfs_put_block_group_cache(fs_info);
3570
3571 kthread_stop(fs_info->transaction_kthread); 3576 kthread_stop(fs_info->transaction_kthread);
3572 kthread_stop(fs_info->cleaner_kthread); 3577 kthread_stop(fs_info->cleaner_kthread);
3573 3578
@@ -3577,12 +3582,16 @@ int close_ctree(struct btrfs_root *root)
3577 btrfs_free_qgroup_config(root->fs_info); 3582 btrfs_free_qgroup_config(root->fs_info);
3578 3583
3579 if (percpu_counter_sum(&fs_info->delalloc_bytes)) { 3584 if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
3580 printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n", 3585 btrfs_info(root->fs_info, "at unmount delalloc count %lld",
3581 percpu_counter_sum(&fs_info->delalloc_bytes)); 3586 percpu_counter_sum(&fs_info->delalloc_bytes));
3582 } 3587 }
3583 3588
3589 btrfs_sysfs_remove_one(fs_info);
3590
3584 del_fs_roots(fs_info); 3591 del_fs_roots(fs_info);
3585 3592
3593 btrfs_put_block_group_cache(fs_info);
3594
3586 btrfs_free_block_groups(fs_info); 3595 btrfs_free_block_groups(fs_info);
3587 3596
3588 btrfs_stop_all_workers(fs_info); 3597 btrfs_stop_all_workers(fs_info);
@@ -3800,55 +3809,55 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3800 delayed_refs = &trans->delayed_refs; 3809 delayed_refs = &trans->delayed_refs;
3801 3810
3802 spin_lock(&delayed_refs->lock); 3811 spin_lock(&delayed_refs->lock);
3803 if (delayed_refs->num_entries == 0) { 3812 if (atomic_read(&delayed_refs->num_entries) == 0) {
3804 spin_unlock(&delayed_refs->lock); 3813 spin_unlock(&delayed_refs->lock);
3805 printk(KERN_INFO "delayed_refs has NO entry\n"); 3814 btrfs_info(root->fs_info, "delayed_refs has NO entry");
3806 return ret; 3815 return ret;
3807 } 3816 }
3808 3817
3809 while ((node = rb_first(&delayed_refs->root)) != NULL) { 3818 while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
3810 struct btrfs_delayed_ref_head *head = NULL; 3819 struct btrfs_delayed_ref_head *head;
3811 bool pin_bytes = false; 3820 bool pin_bytes = false;
3812 3821
3813 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3822 head = rb_entry(node, struct btrfs_delayed_ref_head,
3814 atomic_set(&ref->refs, 1); 3823 href_node);
3815 if (btrfs_delayed_ref_is_head(ref)) { 3824 if (!mutex_trylock(&head->mutex)) {
3816 3825 atomic_inc(&head->node.refs);
3817 head = btrfs_delayed_node_to_head(ref); 3826 spin_unlock(&delayed_refs->lock);
3818 if (!mutex_trylock(&head->mutex)) {
3819 atomic_inc(&ref->refs);
3820 spin_unlock(&delayed_refs->lock);
3821
3822 /* Need to wait for the delayed ref to run */
3823 mutex_lock(&head->mutex);
3824 mutex_unlock(&head->mutex);
3825 btrfs_put_delayed_ref(ref);
3826
3827 spin_lock(&delayed_refs->lock);
3828 continue;
3829 }
3830
3831 if (head->must_insert_reserved)
3832 pin_bytes = true;
3833 btrfs_free_delayed_extent_op(head->extent_op);
3834 delayed_refs->num_heads--;
3835 if (list_empty(&head->cluster))
3836 delayed_refs->num_heads_ready--;
3837 list_del_init(&head->cluster);
3838 }
3839 3827
3840 ref->in_tree = 0; 3828 mutex_lock(&head->mutex);
3841 rb_erase(&ref->rb_node, &delayed_refs->root);
3842 delayed_refs->num_entries--;
3843 spin_unlock(&delayed_refs->lock);
3844 if (head) {
3845 if (pin_bytes)
3846 btrfs_pin_extent(root, ref->bytenr,
3847 ref->num_bytes, 1);
3848 mutex_unlock(&head->mutex); 3829 mutex_unlock(&head->mutex);
3830 btrfs_put_delayed_ref(&head->node);
3831 spin_lock(&delayed_refs->lock);
3832 continue;
3833 }
3834 spin_lock(&head->lock);
3835 while ((node = rb_first(&head->ref_root)) != NULL) {
3836 ref = rb_entry(node, struct btrfs_delayed_ref_node,
3837 rb_node);
3838 ref->in_tree = 0;
3839 rb_erase(&ref->rb_node, &head->ref_root);
3840 atomic_dec(&delayed_refs->num_entries);
3841 btrfs_put_delayed_ref(ref);
3842 cond_resched_lock(&head->lock);
3849 } 3843 }
3850 btrfs_put_delayed_ref(ref); 3844 if (head->must_insert_reserved)
3845 pin_bytes = true;
3846 btrfs_free_delayed_extent_op(head->extent_op);
3847 delayed_refs->num_heads--;
3848 if (head->processing == 0)
3849 delayed_refs->num_heads_ready--;
3850 atomic_dec(&delayed_refs->num_entries);
3851 head->node.in_tree = 0;
3852 rb_erase(&head->href_node, &delayed_refs->href_root);
3853 spin_unlock(&head->lock);
3854 spin_unlock(&delayed_refs->lock);
3855 mutex_unlock(&head->mutex);
3851 3856
3857 if (pin_bytes)
3858 btrfs_pin_extent(root, head->node.bytenr,
3859 head->node.num_bytes, 1);
3860 btrfs_put_delayed_ref(&head->node);
3852 cond_resched(); 3861 cond_resched();
3853 spin_lock(&delayed_refs->lock); 3862 spin_lock(&delayed_refs->lock);
3854 } 3863 }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9c01509dd8ab..9c9ecc93ae2c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,6 +35,7 @@
35#include "locking.h" 35#include "locking.h"
36#include "free-space-cache.h" 36#include "free-space-cache.h"
37#include "math.h" 37#include "math.h"
38#include "sysfs.h"
38 39
39#undef SCRAMBLE_DELAYED_REFS 40#undef SCRAMBLE_DELAYED_REFS
40 41
@@ -441,7 +442,8 @@ next:
441 if (ret) 442 if (ret)
442 break; 443 break;
443 444
444 if (need_resched()) { 445 if (need_resched() ||
446 rwsem_is_contended(&fs_info->extent_commit_sem)) {
445 caching_ctl->progress = last; 447 caching_ctl->progress = last;
446 btrfs_release_path(path); 448 btrfs_release_path(path);
447 up_read(&fs_info->extent_commit_sem); 449 up_read(&fs_info->extent_commit_sem);
@@ -855,12 +857,14 @@ again:
855 btrfs_put_delayed_ref(&head->node); 857 btrfs_put_delayed_ref(&head->node);
856 goto search_again; 858 goto search_again;
857 } 859 }
860 spin_lock(&head->lock);
858 if (head->extent_op && head->extent_op->update_flags) 861 if (head->extent_op && head->extent_op->update_flags)
859 extent_flags |= head->extent_op->flags_to_set; 862 extent_flags |= head->extent_op->flags_to_set;
860 else 863 else
861 BUG_ON(num_refs == 0); 864 BUG_ON(num_refs == 0);
862 865
863 num_refs += head->node.ref_mod; 866 num_refs += head->node.ref_mod;
867 spin_unlock(&head->lock);
864 mutex_unlock(&head->mutex); 868 mutex_unlock(&head->mutex);
865 } 869 }
866 spin_unlock(&delayed_refs->lock); 870 spin_unlock(&delayed_refs->lock);
@@ -1070,11 +1074,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1070 __le64 lenum; 1074 __le64 lenum;
1071 1075
1072 lenum = cpu_to_le64(root_objectid); 1076 lenum = cpu_to_le64(root_objectid);
1073 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 1077 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1074 lenum = cpu_to_le64(owner); 1078 lenum = cpu_to_le64(owner);
1075 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 1079 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1076 lenum = cpu_to_le64(offset); 1080 lenum = cpu_to_le64(offset);
1077 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 1081 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1078 1082
1079 return ((u64)high_crc << 31) ^ (u64)low_crc; 1083 return ((u64)high_crc << 31) ^ (u64)low_crc;
1080} 1084}
@@ -2285,64 +2289,62 @@ static noinline struct btrfs_delayed_ref_node *
2285select_delayed_ref(struct btrfs_delayed_ref_head *head) 2289select_delayed_ref(struct btrfs_delayed_ref_head *head)
2286{ 2290{
2287 struct rb_node *node; 2291 struct rb_node *node;
2288 struct btrfs_delayed_ref_node *ref; 2292 struct btrfs_delayed_ref_node *ref, *last = NULL;;
2289 int action = BTRFS_ADD_DELAYED_REF; 2293
2290again:
2291 /* 2294 /*
2292 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 2295 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
2293 * this prevents ref count from going down to zero when 2296 * this prevents ref count from going down to zero when
2294 * there still are pending delayed ref. 2297 * there still are pending delayed ref.
2295 */ 2298 */
2296 node = rb_prev(&head->node.rb_node); 2299 node = rb_first(&head->ref_root);
2297 while (1) { 2300 while (node) {
2298 if (!node)
2299 break;
2300 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2301 ref = rb_entry(node, struct btrfs_delayed_ref_node,
2301 rb_node); 2302 rb_node);
2302 if (ref->bytenr != head->node.bytenr) 2303 if (ref->action == BTRFS_ADD_DELAYED_REF)
2303 break;
2304 if (ref->action == action)
2305 return ref; 2304 return ref;
2306 node = rb_prev(node); 2305 else if (last == NULL)
2307 } 2306 last = ref;
2308 if (action == BTRFS_ADD_DELAYED_REF) { 2307 node = rb_next(node);
2309 action = BTRFS_DROP_DELAYED_REF;
2310 goto again;
2311 } 2308 }
2312 return NULL; 2309 return last;
2313} 2310}
2314 2311
2315/* 2312/*
2316 * Returns 0 on success or if called with an already aborted transaction. 2313 * Returns 0 on success or if called with an already aborted transaction.
2317 * Returns -ENOMEM or -EIO on failure and will abort the transaction. 2314 * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2318 */ 2315 */
2319static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2316static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2320 struct btrfs_root *root, 2317 struct btrfs_root *root,
2321 struct list_head *cluster) 2318 unsigned long nr)
2322{ 2319{
2323 struct btrfs_delayed_ref_root *delayed_refs; 2320 struct btrfs_delayed_ref_root *delayed_refs;
2324 struct btrfs_delayed_ref_node *ref; 2321 struct btrfs_delayed_ref_node *ref;
2325 struct btrfs_delayed_ref_head *locked_ref = NULL; 2322 struct btrfs_delayed_ref_head *locked_ref = NULL;
2326 struct btrfs_delayed_extent_op *extent_op; 2323 struct btrfs_delayed_extent_op *extent_op;
2327 struct btrfs_fs_info *fs_info = root->fs_info; 2324 struct btrfs_fs_info *fs_info = root->fs_info;
2325 ktime_t start = ktime_get();
2328 int ret; 2326 int ret;
2329 int count = 0; 2327 unsigned long count = 0;
2328 unsigned long actual_count = 0;
2330 int must_insert_reserved = 0; 2329 int must_insert_reserved = 0;
2331 2330
2332 delayed_refs = &trans->transaction->delayed_refs; 2331 delayed_refs = &trans->transaction->delayed_refs;
2333 while (1) { 2332 while (1) {
2334 if (!locked_ref) { 2333 if (!locked_ref) {
2335 /* pick a new head ref from the cluster list */ 2334 if (count >= nr)
2336 if (list_empty(cluster))
2337 break; 2335 break;
2338 2336
2339 locked_ref = list_entry(cluster->next, 2337 spin_lock(&delayed_refs->lock);
2340 struct btrfs_delayed_ref_head, cluster); 2338 locked_ref = btrfs_select_ref_head(trans);
2339 if (!locked_ref) {
2340 spin_unlock(&delayed_refs->lock);
2341 break;
2342 }
2341 2343
2342 /* grab the lock that says we are going to process 2344 /* grab the lock that says we are going to process
2343 * all the refs for this head */ 2345 * all the refs for this head */
2344 ret = btrfs_delayed_ref_lock(trans, locked_ref); 2346 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2345 2347 spin_unlock(&delayed_refs->lock);
2346 /* 2348 /*
2347 * we may have dropped the spin lock to get the head 2349 * we may have dropped the spin lock to get the head
2348 * mutex lock, and that might have given someone else 2350 * mutex lock, and that might have given someone else
@@ -2363,6 +2365,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2363 * finish. If we merged anything we need to re-loop so we can 2365 * finish. If we merged anything we need to re-loop so we can
2364 * get a good ref. 2366 * get a good ref.
2365 */ 2367 */
2368 spin_lock(&locked_ref->lock);
2366 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, 2369 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2367 locked_ref); 2370 locked_ref);
2368 2371
@@ -2374,17 +2377,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2374 2377
2375 if (ref && ref->seq && 2378 if (ref && ref->seq &&
2376 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { 2379 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2377 /* 2380 spin_unlock(&locked_ref->lock);
2378 * there are still refs with lower seq numbers in the
2379 * process of being added. Don't run this ref yet.
2380 */
2381 list_del_init(&locked_ref->cluster);
2382 btrfs_delayed_ref_unlock(locked_ref); 2381 btrfs_delayed_ref_unlock(locked_ref);
2383 locked_ref = NULL; 2382 spin_lock(&delayed_refs->lock);
2383 locked_ref->processing = 0;
2384 delayed_refs->num_heads_ready++; 2384 delayed_refs->num_heads_ready++;
2385 spin_unlock(&delayed_refs->lock); 2385 spin_unlock(&delayed_refs->lock);
2386 locked_ref = NULL;
2386 cond_resched(); 2387 cond_resched();
2387 spin_lock(&delayed_refs->lock);
2388 continue; 2388 continue;
2389 } 2389 }
2390 2390
@@ -2399,6 +2399,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2399 locked_ref->extent_op = NULL; 2399 locked_ref->extent_op = NULL;
2400 2400
2401 if (!ref) { 2401 if (!ref) {
2402
2403
2402 /* All delayed refs have been processed, Go ahead 2404 /* All delayed refs have been processed, Go ahead
2403 * and send the head node to run_one_delayed_ref, 2405 * and send the head node to run_one_delayed_ref,
2404 * so that any accounting fixes can happen 2406 * so that any accounting fixes can happen
@@ -2411,8 +2413,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2411 } 2413 }
2412 2414
2413 if (extent_op) { 2415 if (extent_op) {
2414 spin_unlock(&delayed_refs->lock); 2416 spin_unlock(&locked_ref->lock);
2415
2416 ret = run_delayed_extent_op(trans, root, 2417 ret = run_delayed_extent_op(trans, root,
2417 ref, extent_op); 2418 ref, extent_op);
2418 btrfs_free_delayed_extent_op(extent_op); 2419 btrfs_free_delayed_extent_op(extent_op);
@@ -2426,19 +2427,39 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2426 */ 2427 */
2427 if (must_insert_reserved) 2428 if (must_insert_reserved)
2428 locked_ref->must_insert_reserved = 1; 2429 locked_ref->must_insert_reserved = 1;
2430 locked_ref->processing = 0;
2429 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); 2431 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2430 spin_lock(&delayed_refs->lock);
2431 btrfs_delayed_ref_unlock(locked_ref); 2432 btrfs_delayed_ref_unlock(locked_ref);
2432 return ret; 2433 return ret;
2433 } 2434 }
2435 continue;
2436 }
2434 2437
2435 goto next; 2438 /*
2439 * Need to drop our head ref lock and re-aqcuire the
2440 * delayed ref lock and then re-check to make sure
2441 * nobody got added.
2442 */
2443 spin_unlock(&locked_ref->lock);
2444 spin_lock(&delayed_refs->lock);
2445 spin_lock(&locked_ref->lock);
2446 if (rb_first(&locked_ref->ref_root)) {
2447 spin_unlock(&locked_ref->lock);
2448 spin_unlock(&delayed_refs->lock);
2449 continue;
2436 } 2450 }
2451 ref->in_tree = 0;
2452 delayed_refs->num_heads--;
2453 rb_erase(&locked_ref->href_node,
2454 &delayed_refs->href_root);
2455 spin_unlock(&delayed_refs->lock);
2456 } else {
2457 actual_count++;
2458 ref->in_tree = 0;
2459 rb_erase(&ref->rb_node, &locked_ref->ref_root);
2437 } 2460 }
2461 atomic_dec(&delayed_refs->num_entries);
2438 2462
2439 ref->in_tree = 0;
2440 rb_erase(&ref->rb_node, &delayed_refs->root);
2441 delayed_refs->num_entries--;
2442 if (!btrfs_delayed_ref_is_head(ref)) { 2463 if (!btrfs_delayed_ref_is_head(ref)) {
2443 /* 2464 /*
2444 * when we play the delayed ref, also correct the 2465 * when we play the delayed ref, also correct the
@@ -2455,20 +2476,18 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2455 default: 2476 default:
2456 WARN_ON(1); 2477 WARN_ON(1);
2457 } 2478 }
2458 } else {
2459 list_del_init(&locked_ref->cluster);
2460 } 2479 }
2461 spin_unlock(&delayed_refs->lock); 2480 spin_unlock(&locked_ref->lock);
2462 2481
2463 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2482 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2464 must_insert_reserved); 2483 must_insert_reserved);
2465 2484
2466 btrfs_free_delayed_extent_op(extent_op); 2485 btrfs_free_delayed_extent_op(extent_op);
2467 if (ret) { 2486 if (ret) {
2487 locked_ref->processing = 0;
2468 btrfs_delayed_ref_unlock(locked_ref); 2488 btrfs_delayed_ref_unlock(locked_ref);
2469 btrfs_put_delayed_ref(ref); 2489 btrfs_put_delayed_ref(ref);
2470 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret); 2490 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2471 spin_lock(&delayed_refs->lock);
2472 return ret; 2491 return ret;
2473 } 2492 }
2474 2493
@@ -2484,11 +2503,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2484 } 2503 }
2485 btrfs_put_delayed_ref(ref); 2504 btrfs_put_delayed_ref(ref);
2486 count++; 2505 count++;
2487next:
2488 cond_resched(); 2506 cond_resched();
2507 }
2508
2509 /*
2510 * We don't want to include ref heads since we can have empty ref heads
2511 * and those will drastically skew our runtime down since we just do
2512 * accounting, no actual extent tree updates.
2513 */
2514 if (actual_count > 0) {
2515 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2516 u64 avg;
2517
2518 /*
2519 * We weigh the current average higher than our current runtime
2520 * to avoid large swings in the average.
2521 */
2489 spin_lock(&delayed_refs->lock); 2522 spin_lock(&delayed_refs->lock);
2523 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2524 avg = div64_u64(avg, 4);
2525 fs_info->avg_delayed_ref_runtime = avg;
2526 spin_unlock(&delayed_refs->lock);
2490 } 2527 }
2491 return count; 2528 return 0;
2492} 2529}
2493 2530
2494#ifdef SCRAMBLE_DELAYED_REFS 2531#ifdef SCRAMBLE_DELAYED_REFS
@@ -2570,16 +2607,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2570 return ret; 2607 return ret;
2571} 2608}
2572 2609
2573static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
2574 int count)
2575{
2576 int val = atomic_read(&delayed_refs->ref_seq);
2577
2578 if (val < seq || val >= seq + count)
2579 return 1;
2580 return 0;
2581}
2582
2583static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) 2610static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2584{ 2611{
2585 u64 num_bytes; 2612 u64 num_bytes;
@@ -2596,7 +2623,7 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2596 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); 2623 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2597} 2624}
2598 2625
2599int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, 2626int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2600 struct btrfs_root *root) 2627 struct btrfs_root *root)
2601{ 2628{
2602 struct btrfs_block_rsv *global_rsv; 2629 struct btrfs_block_rsv *global_rsv;
@@ -2625,6 +2652,22 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2625 return ret; 2652 return ret;
2626} 2653}
2627 2654
2655int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2656 struct btrfs_root *root)
2657{
2658 struct btrfs_fs_info *fs_info = root->fs_info;
2659 u64 num_entries =
2660 atomic_read(&trans->transaction->delayed_refs.num_entries);
2661 u64 avg_runtime;
2662
2663 smp_mb();
2664 avg_runtime = fs_info->avg_delayed_ref_runtime;
2665 if (num_entries * avg_runtime >= NSEC_PER_SEC)
2666 return 1;
2667
2668 return btrfs_check_space_for_delayed_refs(trans, root);
2669}
2670
2628/* 2671/*
2629 * this starts processing the delayed reference count updates and 2672 * this starts processing the delayed reference count updates and
2630 * extent insertions we have queued up so far. count can be 2673 * extent insertions we have queued up so far. count can be
@@ -2640,13 +2683,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2640{ 2683{
2641 struct rb_node *node; 2684 struct rb_node *node;
2642 struct btrfs_delayed_ref_root *delayed_refs; 2685 struct btrfs_delayed_ref_root *delayed_refs;
2643 struct btrfs_delayed_ref_node *ref; 2686 struct btrfs_delayed_ref_head *head;
2644 struct list_head cluster;
2645 int ret; 2687 int ret;
2646 u64 delayed_start;
2647 int run_all = count == (unsigned long)-1; 2688 int run_all = count == (unsigned long)-1;
2648 int run_most = 0; 2689 int run_most = 0;
2649 int loops;
2650 2690
2651 /* We'll clean this up in btrfs_cleanup_transaction */ 2691 /* We'll clean this up in btrfs_cleanup_transaction */
2652 if (trans->aborted) 2692 if (trans->aborted)
@@ -2658,130 +2698,40 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2658 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); 2698 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
2659 2699
2660 delayed_refs = &trans->transaction->delayed_refs; 2700 delayed_refs = &trans->transaction->delayed_refs;
2661 INIT_LIST_HEAD(&cluster);
2662 if (count == 0) { 2701 if (count == 0) {
2663 count = delayed_refs->num_entries * 2; 2702 count = atomic_read(&delayed_refs->num_entries) * 2;
2664 run_most = 1; 2703 run_most = 1;
2665 } 2704 }
2666 2705
2667 if (!run_all && !run_most) {
2668 int old;
2669 int seq = atomic_read(&delayed_refs->ref_seq);
2670
2671progress:
2672 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2673 if (old) {
2674 DEFINE_WAIT(__wait);
2675 if (delayed_refs->flushing ||
2676 !btrfs_should_throttle_delayed_refs(trans, root))
2677 return 0;
2678
2679 prepare_to_wait(&delayed_refs->wait, &__wait,
2680 TASK_UNINTERRUPTIBLE);
2681
2682 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2683 if (old) {
2684 schedule();
2685 finish_wait(&delayed_refs->wait, &__wait);
2686
2687 if (!refs_newer(delayed_refs, seq, 256))
2688 goto progress;
2689 else
2690 return 0;
2691 } else {
2692 finish_wait(&delayed_refs->wait, &__wait);
2693 goto again;
2694 }
2695 }
2696
2697 } else {
2698 atomic_inc(&delayed_refs->procs_running_refs);
2699 }
2700
2701again: 2706again:
2702 loops = 0;
2703 spin_lock(&delayed_refs->lock);
2704
2705#ifdef SCRAMBLE_DELAYED_REFS 2707#ifdef SCRAMBLE_DELAYED_REFS
2706 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); 2708 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2707#endif 2709#endif
2708 2710 ret = __btrfs_run_delayed_refs(trans, root, count);
2709 while (1) { 2711 if (ret < 0) {
2710 if (!(run_all || run_most) && 2712 btrfs_abort_transaction(trans, root, ret);
2711 !btrfs_should_throttle_delayed_refs(trans, root)) 2713 return ret;
2712 break;
2713
2714 /*
2715 * go find something we can process in the rbtree. We start at
2716 * the beginning of the tree, and then build a cluster
2717 * of refs to process starting at the first one we are able to
2718 * lock
2719 */
2720 delayed_start = delayed_refs->run_delayed_start;
2721 ret = btrfs_find_ref_cluster(trans, &cluster,
2722 delayed_refs->run_delayed_start);
2723 if (ret)
2724 break;
2725
2726 ret = run_clustered_refs(trans, root, &cluster);
2727 if (ret < 0) {
2728 btrfs_release_ref_cluster(&cluster);
2729 spin_unlock(&delayed_refs->lock);
2730 btrfs_abort_transaction(trans, root, ret);
2731 atomic_dec(&delayed_refs->procs_running_refs);
2732 wake_up(&delayed_refs->wait);
2733 return ret;
2734 }
2735
2736 atomic_add(ret, &delayed_refs->ref_seq);
2737
2738 count -= min_t(unsigned long, ret, count);
2739
2740 if (count == 0)
2741 break;
2742
2743 if (delayed_start >= delayed_refs->run_delayed_start) {
2744 if (loops == 0) {
2745 /*
2746 * btrfs_find_ref_cluster looped. let's do one
2747 * more cycle. if we don't run any delayed ref
2748 * during that cycle (because we can't because
2749 * all of them are blocked), bail out.
2750 */
2751 loops = 1;
2752 } else {
2753 /*
2754 * no runnable refs left, stop trying
2755 */
2756 BUG_ON(run_all);
2757 break;
2758 }
2759 }
2760 if (ret) {
2761 /* refs were run, let's reset staleness detection */
2762 loops = 0;
2763 }
2764 } 2714 }
2765 2715
2766 if (run_all) { 2716 if (run_all) {
2767 if (!list_empty(&trans->new_bgs)) { 2717 if (!list_empty(&trans->new_bgs))
2768 spin_unlock(&delayed_refs->lock);
2769 btrfs_create_pending_block_groups(trans, root); 2718 btrfs_create_pending_block_groups(trans, root);
2770 spin_lock(&delayed_refs->lock);
2771 }
2772 2719
2773 node = rb_first(&delayed_refs->root); 2720 spin_lock(&delayed_refs->lock);
2774 if (!node) 2721 node = rb_first(&delayed_refs->href_root);
2722 if (!node) {
2723 spin_unlock(&delayed_refs->lock);
2775 goto out; 2724 goto out;
2725 }
2776 count = (unsigned long)-1; 2726 count = (unsigned long)-1;
2777 2727
2778 while (node) { 2728 while (node) {
2779 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2729 head = rb_entry(node, struct btrfs_delayed_ref_head,
2780 rb_node); 2730 href_node);
2781 if (btrfs_delayed_ref_is_head(ref)) { 2731 if (btrfs_delayed_ref_is_head(&head->node)) {
2782 struct btrfs_delayed_ref_head *head; 2732 struct btrfs_delayed_ref_node *ref;
2783 2733
2784 head = btrfs_delayed_node_to_head(ref); 2734 ref = &head->node;
2785 atomic_inc(&ref->refs); 2735 atomic_inc(&ref->refs);
2786 2736
2787 spin_unlock(&delayed_refs->lock); 2737 spin_unlock(&delayed_refs->lock);
@@ -2795,20 +2745,16 @@ again:
2795 btrfs_put_delayed_ref(ref); 2745 btrfs_put_delayed_ref(ref);
2796 cond_resched(); 2746 cond_resched();
2797 goto again; 2747 goto again;
2748 } else {
2749 WARN_ON(1);
2798 } 2750 }
2799 node = rb_next(node); 2751 node = rb_next(node);
2800 } 2752 }
2801 spin_unlock(&delayed_refs->lock); 2753 spin_unlock(&delayed_refs->lock);
2802 schedule_timeout(1); 2754 cond_resched();
2803 goto again; 2755 goto again;
2804 } 2756 }
2805out: 2757out:
2806 atomic_dec(&delayed_refs->procs_running_refs);
2807 smp_mb();
2808 if (waitqueue_active(&delayed_refs->wait))
2809 wake_up(&delayed_refs->wait);
2810
2811 spin_unlock(&delayed_refs->lock);
2812 assert_qgroups_uptodate(trans); 2758 assert_qgroups_uptodate(trans);
2813 return 0; 2759 return 0;
2814} 2760}
@@ -2850,12 +2796,13 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2850 struct rb_node *node; 2796 struct rb_node *node;
2851 int ret = 0; 2797 int ret = 0;
2852 2798
2853 ret = -ENOENT;
2854 delayed_refs = &trans->transaction->delayed_refs; 2799 delayed_refs = &trans->transaction->delayed_refs;
2855 spin_lock(&delayed_refs->lock); 2800 spin_lock(&delayed_refs->lock);
2856 head = btrfs_find_delayed_ref_head(trans, bytenr); 2801 head = btrfs_find_delayed_ref_head(trans, bytenr);
2857 if (!head) 2802 if (!head) {
2858 goto out; 2803 spin_unlock(&delayed_refs->lock);
2804 return 0;
2805 }
2859 2806
2860 if (!mutex_trylock(&head->mutex)) { 2807 if (!mutex_trylock(&head->mutex)) {
2861 atomic_inc(&head->node.refs); 2808 atomic_inc(&head->node.refs);
@@ -2872,40 +2819,35 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2872 btrfs_put_delayed_ref(&head->node); 2819 btrfs_put_delayed_ref(&head->node);
2873 return -EAGAIN; 2820 return -EAGAIN;
2874 } 2821 }
2822 spin_unlock(&delayed_refs->lock);
2875 2823
2876 node = rb_prev(&head->node.rb_node); 2824 spin_lock(&head->lock);
2877 if (!node) 2825 node = rb_first(&head->ref_root);
2878 goto out_unlock; 2826 while (node) {
2879 2827 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2880 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2828 node = rb_next(node);
2881
2882 if (ref->bytenr != bytenr)
2883 goto out_unlock;
2884
2885 ret = 1;
2886 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2887 goto out_unlock;
2888 2829
2889 data_ref = btrfs_delayed_node_to_data_ref(ref); 2830 /* If it's a shared ref we know a cross reference exists */
2831 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2832 ret = 1;
2833 break;
2834 }
2890 2835
2891 node = rb_prev(node); 2836 data_ref = btrfs_delayed_node_to_data_ref(ref);
2892 if (node) {
2893 int seq = ref->seq;
2894 2837
2895 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2838 /*
2896 if (ref->bytenr == bytenr && ref->seq == seq) 2839 * If our ref doesn't match the one we're currently looking at
2897 goto out_unlock; 2840 * then we have a cross reference.
2841 */
2842 if (data_ref->root != root->root_key.objectid ||
2843 data_ref->objectid != objectid ||
2844 data_ref->offset != offset) {
2845 ret = 1;
2846 break;
2847 }
2898 } 2848 }
2899 2849 spin_unlock(&head->lock);
2900 if (data_ref->root != root->root_key.objectid ||
2901 data_ref->objectid != objectid || data_ref->offset != offset)
2902 goto out_unlock;
2903
2904 ret = 0;
2905out_unlock:
2906 mutex_unlock(&head->mutex); 2850 mutex_unlock(&head->mutex);
2907out:
2908 spin_unlock(&delayed_refs->lock);
2909 return ret; 2851 return ret;
2910} 2852}
2911 2853
@@ -3402,6 +3344,23 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3402 return readonly; 3344 return readonly;
3403} 3345}
3404 3346
3347static const char *alloc_name(u64 flags)
3348{
3349 switch (flags) {
3350 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3351 return "mixed";
3352 case BTRFS_BLOCK_GROUP_METADATA:
3353 return "metadata";
3354 case BTRFS_BLOCK_GROUP_DATA:
3355 return "data";
3356 case BTRFS_BLOCK_GROUP_SYSTEM:
3357 return "system";
3358 default:
3359 WARN_ON(1);
3360 return "invalid-combination";
3361 };
3362}
3363
3405static int update_space_info(struct btrfs_fs_info *info, u64 flags, 3364static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3406 u64 total_bytes, u64 bytes_used, 3365 u64 total_bytes, u64 bytes_used,
3407 struct btrfs_space_info **space_info) 3366 struct btrfs_space_info **space_info)
@@ -3439,8 +3398,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3439 return ret; 3398 return ret;
3440 } 3399 }
3441 3400
3442 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 3401 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
3443 INIT_LIST_HEAD(&found->block_groups[i]); 3402 INIT_LIST_HEAD(&found->block_groups[i]);
3403 kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
3404 }
3444 init_rwsem(&found->groups_sem); 3405 init_rwsem(&found->groups_sem);
3445 spin_lock_init(&found->lock); 3406 spin_lock_init(&found->lock);
3446 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; 3407 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
@@ -3457,11 +3418,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3457 found->chunk_alloc = 0; 3418 found->chunk_alloc = 0;
3458 found->flush = 0; 3419 found->flush = 0;
3459 init_waitqueue_head(&found->wait); 3420 init_waitqueue_head(&found->wait);
3421
3422 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3423 info->space_info_kobj, "%s",
3424 alloc_name(found->flags));
3425 if (ret) {
3426 kfree(found);
3427 return ret;
3428 }
3429
3460 *space_info = found; 3430 *space_info = found;
3461 list_add_rcu(&found->list, &info->space_info); 3431 list_add_rcu(&found->list, &info->space_info);
3462 if (flags & BTRFS_BLOCK_GROUP_DATA) 3432 if (flags & BTRFS_BLOCK_GROUP_DATA)
3463 info->data_sinfo = found; 3433 info->data_sinfo = found;
3464 return 0; 3434
3435 return ret;
3465} 3436}
3466 3437
3467static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 3438static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
@@ -4637,7 +4608,7 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
4637 u64 num_bytes) 4608 u64 num_bytes)
4638{ 4609{
4639 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 4610 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4640 if (global_rsv->full || global_rsv == block_rsv || 4611 if (global_rsv == block_rsv ||
4641 block_rsv->space_info != global_rsv->space_info) 4612 block_rsv->space_info != global_rsv->space_info)
4642 global_rsv = NULL; 4613 global_rsv = NULL;
4643 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv, 4614 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
@@ -5916,24 +5887,16 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5916{ 5887{
5917 struct btrfs_delayed_ref_head *head; 5888 struct btrfs_delayed_ref_head *head;
5918 struct btrfs_delayed_ref_root *delayed_refs; 5889 struct btrfs_delayed_ref_root *delayed_refs;
5919 struct btrfs_delayed_ref_node *ref;
5920 struct rb_node *node;
5921 int ret = 0; 5890 int ret = 0;
5922 5891
5923 delayed_refs = &trans->transaction->delayed_refs; 5892 delayed_refs = &trans->transaction->delayed_refs;
5924 spin_lock(&delayed_refs->lock); 5893 spin_lock(&delayed_refs->lock);
5925 head = btrfs_find_delayed_ref_head(trans, bytenr); 5894 head = btrfs_find_delayed_ref_head(trans, bytenr);
5926 if (!head) 5895 if (!head)
5927 goto out; 5896 goto out_delayed_unlock;
5928 5897
5929 node = rb_prev(&head->node.rb_node); 5898 spin_lock(&head->lock);
5930 if (!node) 5899 if (rb_first(&head->ref_root))
5931 goto out;
5932
5933 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
5934
5935 /* there are still entries for this ref, we can't drop it */
5936 if (ref->bytenr == bytenr)
5937 goto out; 5900 goto out;
5938 5901
5939 if (head->extent_op) { 5902 if (head->extent_op) {
@@ -5955,19 +5918,19 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5955 * ahead and process it. 5918 * ahead and process it.
5956 */ 5919 */
5957 head->node.in_tree = 0; 5920 head->node.in_tree = 0;
5958 rb_erase(&head->node.rb_node, &delayed_refs->root); 5921 rb_erase(&head->href_node, &delayed_refs->href_root);
5959 5922
5960 delayed_refs->num_entries--; 5923 atomic_dec(&delayed_refs->num_entries);
5961 5924
5962 /* 5925 /*
5963 * we don't take a ref on the node because we're removing it from the 5926 * we don't take a ref on the node because we're removing it from the
5964 * tree, so we just steal the ref the tree was holding. 5927 * tree, so we just steal the ref the tree was holding.
5965 */ 5928 */
5966 delayed_refs->num_heads--; 5929 delayed_refs->num_heads--;
5967 if (list_empty(&head->cluster)) 5930 if (head->processing == 0)
5968 delayed_refs->num_heads_ready--; 5931 delayed_refs->num_heads_ready--;
5969 5932 head->processing = 0;
5970 list_del_init(&head->cluster); 5933 spin_unlock(&head->lock);
5971 spin_unlock(&delayed_refs->lock); 5934 spin_unlock(&delayed_refs->lock);
5972 5935
5973 BUG_ON(head->extent_op); 5936 BUG_ON(head->extent_op);
@@ -5978,6 +5941,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5978 btrfs_put_delayed_ref(&head->node); 5941 btrfs_put_delayed_ref(&head->node);
5979 return ret; 5942 return ret;
5980out: 5943out:
5944 spin_unlock(&head->lock);
5945
5946out_delayed_unlock:
5981 spin_unlock(&delayed_refs->lock); 5947 spin_unlock(&delayed_refs->lock);
5982 return 0; 5948 return 0;
5983} 5949}
@@ -6145,11 +6111,29 @@ int __get_raid_index(u64 flags)
6145 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ 6111 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
6146} 6112}
6147 6113
6148static int get_block_group_index(struct btrfs_block_group_cache *cache) 6114int get_block_group_index(struct btrfs_block_group_cache *cache)
6149{ 6115{
6150 return __get_raid_index(cache->flags); 6116 return __get_raid_index(cache->flags);
6151} 6117}
6152 6118
6119static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
6120 [BTRFS_RAID_RAID10] = "raid10",
6121 [BTRFS_RAID_RAID1] = "raid1",
6122 [BTRFS_RAID_DUP] = "dup",
6123 [BTRFS_RAID_RAID0] = "raid0",
6124 [BTRFS_RAID_SINGLE] = "single",
6125 [BTRFS_RAID_RAID5] = "raid5",
6126 [BTRFS_RAID_RAID6] = "raid6",
6127};
6128
6129static const char *get_raid_name(enum btrfs_raid_types type)
6130{
6131 if (type >= BTRFS_NR_RAID_TYPES)
6132 return NULL;
6133
6134 return btrfs_raid_type_names[type];
6135}
6136
6153enum btrfs_loop_type { 6137enum btrfs_loop_type {
6154 LOOP_CACHING_NOWAIT = 0, 6138 LOOP_CACHING_NOWAIT = 0,
6155 LOOP_CACHING_WAIT = 1, 6139 LOOP_CACHING_WAIT = 1,
@@ -6177,7 +6161,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6177 struct btrfs_root *root = orig_root->fs_info->extent_root; 6161 struct btrfs_root *root = orig_root->fs_info->extent_root;
6178 struct btrfs_free_cluster *last_ptr = NULL; 6162 struct btrfs_free_cluster *last_ptr = NULL;
6179 struct btrfs_block_group_cache *block_group = NULL; 6163 struct btrfs_block_group_cache *block_group = NULL;
6180 struct btrfs_block_group_cache *used_block_group;
6181 u64 search_start = 0; 6164 u64 search_start = 0;
6182 u64 max_extent_size = 0; 6165 u64 max_extent_size = 0;
6183 int empty_cluster = 2 * 1024 * 1024; 6166 int empty_cluster = 2 * 1024 * 1024;
@@ -6186,7 +6169,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6186 int index = __get_raid_index(flags); 6169 int index = __get_raid_index(flags);
6187 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ? 6170 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
6188 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 6171 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
6189 bool found_uncached_bg = false;
6190 bool failed_cluster_refill = false; 6172 bool failed_cluster_refill = false;
6191 bool failed_alloc = false; 6173 bool failed_alloc = false;
6192 bool use_cluster = true; 6174 bool use_cluster = true;
@@ -6239,7 +6221,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6239 if (search_start == hint_byte) { 6221 if (search_start == hint_byte) {
6240 block_group = btrfs_lookup_block_group(root->fs_info, 6222 block_group = btrfs_lookup_block_group(root->fs_info,
6241 search_start); 6223 search_start);
6242 used_block_group = block_group;
6243 /* 6224 /*
6244 * we don't want to use the block group if it doesn't match our 6225 * we don't want to use the block group if it doesn't match our
6245 * allocation bits, or if its not cached. 6226 * allocation bits, or if its not cached.
@@ -6276,7 +6257,6 @@ search:
6276 u64 offset; 6257 u64 offset;
6277 int cached; 6258 int cached;
6278 6259
6279 used_block_group = block_group;
6280 btrfs_get_block_group(block_group); 6260 btrfs_get_block_group(block_group);
6281 search_start = block_group->key.objectid; 6261 search_start = block_group->key.objectid;
6282 6262
@@ -6304,7 +6284,6 @@ search:
6304have_block_group: 6284have_block_group:
6305 cached = block_group_cache_done(block_group); 6285 cached = block_group_cache_done(block_group);
6306 if (unlikely(!cached)) { 6286 if (unlikely(!cached)) {
6307 found_uncached_bg = true;
6308 ret = cache_block_group(block_group, 0); 6287 ret = cache_block_group(block_group, 0);
6309 BUG_ON(ret < 0); 6288 BUG_ON(ret < 0);
6310 ret = 0; 6289 ret = 0;
@@ -6320,6 +6299,7 @@ have_block_group:
6320 * lets look there 6299 * lets look there
6321 */ 6300 */
6322 if (last_ptr) { 6301 if (last_ptr) {
6302 struct btrfs_block_group_cache *used_block_group;
6323 unsigned long aligned_cluster; 6303 unsigned long aligned_cluster;
6324 /* 6304 /*
6325 * the refill lock keeps out other 6305 * the refill lock keeps out other
@@ -6330,10 +6310,8 @@ have_block_group:
6330 if (used_block_group != block_group && 6310 if (used_block_group != block_group &&
6331 (!used_block_group || 6311 (!used_block_group ||
6332 used_block_group->ro || 6312 used_block_group->ro ||
6333 !block_group_bits(used_block_group, flags))) { 6313 !block_group_bits(used_block_group, flags)))
6334 used_block_group = block_group;
6335 goto refill_cluster; 6314 goto refill_cluster;
6336 }
6337 6315
6338 if (used_block_group != block_group) 6316 if (used_block_group != block_group)
6339 btrfs_get_block_group(used_block_group); 6317 btrfs_get_block_group(used_block_group);
@@ -6347,17 +6325,19 @@ have_block_group:
6347 /* we have a block, we're done */ 6325 /* we have a block, we're done */
6348 spin_unlock(&last_ptr->refill_lock); 6326 spin_unlock(&last_ptr->refill_lock);
6349 trace_btrfs_reserve_extent_cluster(root, 6327 trace_btrfs_reserve_extent_cluster(root,
6350 block_group, search_start, num_bytes); 6328 used_block_group,
6329 search_start, num_bytes);
6330 if (used_block_group != block_group) {
6331 btrfs_put_block_group(block_group);
6332 block_group = used_block_group;
6333 }
6351 goto checks; 6334 goto checks;
6352 } 6335 }
6353 6336
6354 WARN_ON(last_ptr->block_group != used_block_group); 6337 WARN_ON(last_ptr->block_group != used_block_group);
6355 if (used_block_group != block_group) { 6338 if (used_block_group != block_group)
6356 btrfs_put_block_group(used_block_group); 6339 btrfs_put_block_group(used_block_group);
6357 used_block_group = block_group;
6358 }
6359refill_cluster: 6340refill_cluster:
6360 BUG_ON(used_block_group != block_group);
6361 /* If we are on LOOP_NO_EMPTY_SIZE, we can't 6341 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
6362 * set up a new clusters, so lets just skip it 6342 * set up a new clusters, so lets just skip it
6363 * and let the allocator find whatever block 6343 * and let the allocator find whatever block
@@ -6476,25 +6456,25 @@ unclustered_alloc:
6476 goto loop; 6456 goto loop;
6477 } 6457 }
6478checks: 6458checks:
6479 search_start = stripe_align(root, used_block_group, 6459 search_start = stripe_align(root, block_group,
6480 offset, num_bytes); 6460 offset, num_bytes);
6481 6461
6482 /* move on to the next group */ 6462 /* move on to the next group */
6483 if (search_start + num_bytes > 6463 if (search_start + num_bytes >
6484 used_block_group->key.objectid + used_block_group->key.offset) { 6464 block_group->key.objectid + block_group->key.offset) {
6485 btrfs_add_free_space(used_block_group, offset, num_bytes); 6465 btrfs_add_free_space(block_group, offset, num_bytes);
6486 goto loop; 6466 goto loop;
6487 } 6467 }
6488 6468
6489 if (offset < search_start) 6469 if (offset < search_start)
6490 btrfs_add_free_space(used_block_group, offset, 6470 btrfs_add_free_space(block_group, offset,
6491 search_start - offset); 6471 search_start - offset);
6492 BUG_ON(offset > search_start); 6472 BUG_ON(offset > search_start);
6493 6473
6494 ret = btrfs_update_reserved_bytes(used_block_group, num_bytes, 6474 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
6495 alloc_type); 6475 alloc_type);
6496 if (ret == -EAGAIN) { 6476 if (ret == -EAGAIN) {
6497 btrfs_add_free_space(used_block_group, offset, num_bytes); 6477 btrfs_add_free_space(block_group, offset, num_bytes);
6498 goto loop; 6478 goto loop;
6499 } 6479 }
6500 6480
@@ -6504,16 +6484,12 @@ checks:
6504 6484
6505 trace_btrfs_reserve_extent(orig_root, block_group, 6485 trace_btrfs_reserve_extent(orig_root, block_group,
6506 search_start, num_bytes); 6486 search_start, num_bytes);
6507 if (used_block_group != block_group)
6508 btrfs_put_block_group(used_block_group);
6509 btrfs_put_block_group(block_group); 6487 btrfs_put_block_group(block_group);
6510 break; 6488 break;
6511loop: 6489loop:
6512 failed_cluster_refill = false; 6490 failed_cluster_refill = false;
6513 failed_alloc = false; 6491 failed_alloc = false;
6514 BUG_ON(index != get_block_group_index(block_group)); 6492 BUG_ON(index != get_block_group_index(block_group));
6515 if (used_block_group != block_group)
6516 btrfs_put_block_group(used_block_group);
6517 btrfs_put_block_group(block_group); 6493 btrfs_put_block_group(block_group);
6518 } 6494 }
6519 up_read(&space_info->groups_sem); 6495 up_read(&space_info->groups_sem);
@@ -6584,12 +6560,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6584 int index = 0; 6560 int index = 0;
6585 6561
6586 spin_lock(&info->lock); 6562 spin_lock(&info->lock);
6587 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", 6563 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
6588 info->flags, 6564 info->flags,
6589 info->total_bytes - info->bytes_used - info->bytes_pinned - 6565 info->total_bytes - info->bytes_used - info->bytes_pinned -
6590 info->bytes_reserved - info->bytes_readonly, 6566 info->bytes_reserved - info->bytes_readonly,
6591 (info->full) ? "" : "not "); 6567 (info->full) ? "" : "not ");
6592 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " 6568 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
6593 "reserved=%llu, may_use=%llu, readonly=%llu\n", 6569 "reserved=%llu, may_use=%llu, readonly=%llu\n",
6594 info->total_bytes, info->bytes_used, info->bytes_pinned, 6570 info->total_bytes, info->bytes_used, info->bytes_pinned,
6595 info->bytes_reserved, info->bytes_may_use, 6571 info->bytes_reserved, info->bytes_may_use,
@@ -6603,7 +6579,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6603again: 6579again:
6604 list_for_each_entry(cache, &info->block_groups[index], list) { 6580 list_for_each_entry(cache, &info->block_groups[index], list) {
6605 spin_lock(&cache->lock); 6581 spin_lock(&cache->lock);
6606 printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", 6582 printk(KERN_INFO "BTRFS: "
6583 "block group %llu has %llu bytes, "
6584 "%llu used %llu pinned %llu reserved %s\n",
6607 cache->key.objectid, cache->key.offset, 6585 cache->key.objectid, cache->key.offset,
6608 btrfs_block_group_used(&cache->item), cache->pinned, 6586 btrfs_block_group_used(&cache->item), cache->pinned,
6609 cache->reserved, cache->ro ? "[readonly]" : ""); 6587 cache->reserved, cache->ro ? "[readonly]" : "");
@@ -6966,7 +6944,7 @@ again:
6966 /*DEFAULT_RATELIMIT_BURST*/ 1); 6944 /*DEFAULT_RATELIMIT_BURST*/ 1);
6967 if (__ratelimit(&_rs)) 6945 if (__ratelimit(&_rs))
6968 WARN(1, KERN_DEBUG 6946 WARN(1, KERN_DEBUG
6969 "btrfs: block rsv returned %d\n", ret); 6947 "BTRFS: block rsv returned %d\n", ret);
6970 } 6948 }
6971try_reserve: 6949try_reserve:
6972 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6950 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
@@ -7714,7 +7692,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7714 7692
7715 btrfs_end_transaction_throttle(trans, tree_root); 7693 btrfs_end_transaction_throttle(trans, tree_root);
7716 if (!for_reloc && btrfs_need_cleaner_sleep(root)) { 7694 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
7717 pr_debug("btrfs: drop snapshot early exit\n"); 7695 pr_debug("BTRFS: drop snapshot early exit\n");
7718 err = -EAGAIN; 7696 err = -EAGAIN;
7719 goto out_free; 7697 goto out_free;
7720 } 7698 }
@@ -7779,7 +7757,7 @@ out:
7779 */ 7757 */
7780 if (!for_reloc && root_dropped == false) 7758 if (!for_reloc && root_dropped == false)
7781 btrfs_add_dead_root(root); 7759 btrfs_add_dead_root(root);
7782 if (err) 7760 if (err && err != -EAGAIN)
7783 btrfs_std_error(root->fs_info, err); 7761 btrfs_std_error(root->fs_info, err);
7784 return err; 7762 return err;
7785} 7763}
@@ -8333,6 +8311,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8333 release_global_block_rsv(info); 8311 release_global_block_rsv(info);
8334 8312
8335 while (!list_empty(&info->space_info)) { 8313 while (!list_empty(&info->space_info)) {
8314 int i;
8315
8336 space_info = list_entry(info->space_info.next, 8316 space_info = list_entry(info->space_info.next,
8337 struct btrfs_space_info, 8317 struct btrfs_space_info,
8338 list); 8318 list);
@@ -8343,9 +8323,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8343 dump_space_info(space_info, 0, 0); 8323 dump_space_info(space_info, 0, 0);
8344 } 8324 }
8345 } 8325 }
8346 percpu_counter_destroy(&space_info->total_bytes_pinned);
8347 list_del(&space_info->list); 8326 list_del(&space_info->list);
8348 kfree(space_info); 8327 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
8328 struct kobject *kobj;
8329 kobj = &space_info->block_group_kobjs[i];
8330 if (kobj->parent) {
8331 kobject_del(kobj);
8332 kobject_put(kobj);
8333 }
8334 }
8335 kobject_del(&space_info->kobj);
8336 kobject_put(&space_info->kobj);
8349 } 8337 }
8350 return 0; 8338 return 0;
8351} 8339}
@@ -8356,10 +8344,57 @@ static void __link_block_group(struct btrfs_space_info *space_info,
8356 int index = get_block_group_index(cache); 8344 int index = get_block_group_index(cache);
8357 8345
8358 down_write(&space_info->groups_sem); 8346 down_write(&space_info->groups_sem);
8347 if (list_empty(&space_info->block_groups[index])) {
8348 struct kobject *kobj = &space_info->block_group_kobjs[index];
8349 int ret;
8350
8351 kobject_get(&space_info->kobj); /* put in release */
8352 ret = kobject_add(kobj, &space_info->kobj, "%s",
8353 get_raid_name(index));
8354 if (ret) {
8355 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
8356 kobject_put(&space_info->kobj);
8357 }
8358 }
8359 list_add_tail(&cache->list, &space_info->block_groups[index]); 8359 list_add_tail(&cache->list, &space_info->block_groups[index]);
8360 up_write(&space_info->groups_sem); 8360 up_write(&space_info->groups_sem);
8361} 8361}
8362 8362
8363static struct btrfs_block_group_cache *
8364btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
8365{
8366 struct btrfs_block_group_cache *cache;
8367
8368 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8369 if (!cache)
8370 return NULL;
8371
8372 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8373 GFP_NOFS);
8374 if (!cache->free_space_ctl) {
8375 kfree(cache);
8376 return NULL;
8377 }
8378
8379 cache->key.objectid = start;
8380 cache->key.offset = size;
8381 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8382
8383 cache->sectorsize = root->sectorsize;
8384 cache->fs_info = root->fs_info;
8385 cache->full_stripe_len = btrfs_full_stripe_len(root,
8386 &root->fs_info->mapping_tree,
8387 start);
8388 atomic_set(&cache->count, 1);
8389 spin_lock_init(&cache->lock);
8390 INIT_LIST_HEAD(&cache->list);
8391 INIT_LIST_HEAD(&cache->cluster_list);
8392 INIT_LIST_HEAD(&cache->new_bg_list);
8393 btrfs_init_free_space_ctl(cache);
8394
8395 return cache;
8396}
8397
8363int btrfs_read_block_groups(struct btrfs_root *root) 8398int btrfs_read_block_groups(struct btrfs_root *root)
8364{ 8399{
8365 struct btrfs_path *path; 8400 struct btrfs_path *path;
@@ -8395,26 +8430,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8395 break; 8430 break;
8396 if (ret != 0) 8431 if (ret != 0)
8397 goto error; 8432 goto error;
8433
8398 leaf = path->nodes[0]; 8434 leaf = path->nodes[0];
8399 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 8435 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8400 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8436
8437 cache = btrfs_create_block_group_cache(root, found_key.objectid,
8438 found_key.offset);
8401 if (!cache) { 8439 if (!cache) {
8402 ret = -ENOMEM; 8440 ret = -ENOMEM;
8403 goto error; 8441 goto error;
8404 } 8442 }
8405 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8406 GFP_NOFS);
8407 if (!cache->free_space_ctl) {
8408 kfree(cache);
8409 ret = -ENOMEM;
8410 goto error;
8411 }
8412
8413 atomic_set(&cache->count, 1);
8414 spin_lock_init(&cache->lock);
8415 cache->fs_info = info;
8416 INIT_LIST_HEAD(&cache->list);
8417 INIT_LIST_HEAD(&cache->cluster_list);
8418 8443
8419 if (need_clear) { 8444 if (need_clear) {
8420 /* 8445 /*
@@ -8435,16 +8460,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8435 read_extent_buffer(leaf, &cache->item, 8460 read_extent_buffer(leaf, &cache->item,
8436 btrfs_item_ptr_offset(leaf, path->slots[0]), 8461 btrfs_item_ptr_offset(leaf, path->slots[0]),
8437 sizeof(cache->item)); 8462 sizeof(cache->item));
8438 memcpy(&cache->key, &found_key, sizeof(found_key)); 8463 cache->flags = btrfs_block_group_flags(&cache->item);
8439 8464
8440 key.objectid = found_key.objectid + found_key.offset; 8465 key.objectid = found_key.objectid + found_key.offset;
8441 btrfs_release_path(path); 8466 btrfs_release_path(path);
8442 cache->flags = btrfs_block_group_flags(&cache->item);
8443 cache->sectorsize = root->sectorsize;
8444 cache->full_stripe_len = btrfs_full_stripe_len(root,
8445 &root->fs_info->mapping_tree,
8446 found_key.objectid);
8447 btrfs_init_free_space_ctl(cache);
8448 8467
8449 /* 8468 /*
8450 * We need to exclude the super stripes now so that the space 8469 * We need to exclude the super stripes now so that the space
@@ -8458,8 +8477,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8458 * case. 8477 * case.
8459 */ 8478 */
8460 free_excluded_extents(root, cache); 8479 free_excluded_extents(root, cache);
8461 kfree(cache->free_space_ctl); 8480 btrfs_put_block_group(cache);
8462 kfree(cache);
8463 goto error; 8481 goto error;
8464 } 8482 }
8465 8483
@@ -8590,38 +8608,15 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8590 8608
8591 root->fs_info->last_trans_log_full_commit = trans->transid; 8609 root->fs_info->last_trans_log_full_commit = trans->transid;
8592 8610
8593 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8611 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
8594 if (!cache) 8612 if (!cache)
8595 return -ENOMEM; 8613 return -ENOMEM;
8596 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8597 GFP_NOFS);
8598 if (!cache->free_space_ctl) {
8599 kfree(cache);
8600 return -ENOMEM;
8601 }
8602
8603 cache->key.objectid = chunk_offset;
8604 cache->key.offset = size;
8605 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8606 cache->sectorsize = root->sectorsize;
8607 cache->fs_info = root->fs_info;
8608 cache->full_stripe_len = btrfs_full_stripe_len(root,
8609 &root->fs_info->mapping_tree,
8610 chunk_offset);
8611
8612 atomic_set(&cache->count, 1);
8613 spin_lock_init(&cache->lock);
8614 INIT_LIST_HEAD(&cache->list);
8615 INIT_LIST_HEAD(&cache->cluster_list);
8616 INIT_LIST_HEAD(&cache->new_bg_list);
8617
8618 btrfs_init_free_space_ctl(cache);
8619 8614
8620 btrfs_set_block_group_used(&cache->item, bytes_used); 8615 btrfs_set_block_group_used(&cache->item, bytes_used);
8621 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 8616 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8622 cache->flags = type;
8623 btrfs_set_block_group_flags(&cache->item, type); 8617 btrfs_set_block_group_flags(&cache->item, type);
8624 8618
8619 cache->flags = type;
8625 cache->last_byte_to_unpin = (u64)-1; 8620 cache->last_byte_to_unpin = (u64)-1;
8626 cache->cached = BTRFS_CACHE_FINISHED; 8621 cache->cached = BTRFS_CACHE_FINISHED;
8627 ret = exclude_super_stripes(root, cache); 8622 ret = exclude_super_stripes(root, cache);
@@ -8631,8 +8626,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8631 * case. 8626 * case.
8632 */ 8627 */
8633 free_excluded_extents(root, cache); 8628 free_excluded_extents(root, cache);
8634 kfree(cache->free_space_ctl); 8629 btrfs_put_block_group(cache);
8635 kfree(cache);
8636 return ret; 8630 return ret;
8637 } 8631 }
8638 8632
@@ -8796,8 +8790,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8796 * are still on the list after taking the semaphore 8790 * are still on the list after taking the semaphore
8797 */ 8791 */
8798 list_del_init(&block_group->list); 8792 list_del_init(&block_group->list);
8799 if (list_empty(&block_group->space_info->block_groups[index])) 8793 if (list_empty(&block_group->space_info->block_groups[index])) {
8794 kobject_del(&block_group->space_info->block_group_kobjs[index]);
8795 kobject_put(&block_group->space_info->block_group_kobjs[index]);
8800 clear_avail_alloc_bits(root->fs_info, block_group->flags); 8796 clear_avail_alloc_bits(root->fs_info, block_group->flags);
8797 }
8801 up_write(&block_group->space_info->groups_sem); 8798 up_write(&block_group->space_info->groups_sem);
8802 8799
8803 if (block_group->cached == BTRFS_CACHE_STARTED) 8800 if (block_group->cached == BTRFS_CACHE_STARTED)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index bcb6f1b780d6..85bbd01f1271 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -59,7 +59,7 @@ void btrfs_leak_debug_check(void)
59 59
60 while (!list_empty(&states)) { 60 while (!list_empty(&states)) {
61 state = list_entry(states.next, struct extent_state, leak_list); 61 state = list_entry(states.next, struct extent_state, leak_list);
62 printk(KERN_ERR "btrfs state leak: start %llu end %llu " 62 printk(KERN_ERR "BTRFS: state leak: start %llu end %llu "
63 "state %lu in tree %p refs %d\n", 63 "state %lu in tree %p refs %d\n",
64 state->start, state->end, state->state, state->tree, 64 state->start, state->end, state->state, state->tree,
65 atomic_read(&state->refs)); 65 atomic_read(&state->refs));
@@ -69,7 +69,7 @@ void btrfs_leak_debug_check(void)
69 69
70 while (!list_empty(&buffers)) { 70 while (!list_empty(&buffers)) {
71 eb = list_entry(buffers.next, struct extent_buffer, leak_list); 71 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
72 printk(KERN_ERR "btrfs buffer leak start %llu len %lu " 72 printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
73 "refs %d\n", 73 "refs %d\n",
74 eb->start, eb->len, atomic_read(&eb->refs)); 74 eb->start, eb->len, atomic_read(&eb->refs));
75 list_del(&eb->leak_list); 75 list_del(&eb->leak_list);
@@ -77,16 +77,22 @@ void btrfs_leak_debug_check(void)
77 } 77 }
78} 78}
79 79
80#define btrfs_debug_check_extent_io_range(inode, start, end) \ 80#define btrfs_debug_check_extent_io_range(tree, start, end) \
81 __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end)) 81 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
82static inline void __btrfs_debug_check_extent_io_range(const char *caller, 82static inline void __btrfs_debug_check_extent_io_range(const char *caller,
83 struct inode *inode, u64 start, u64 end) 83 struct extent_io_tree *tree, u64 start, u64 end)
84{ 84{
85 u64 isize = i_size_read(inode); 85 struct inode *inode;
86 u64 isize;
87
88 if (!tree->mapping)
89 return;
86 90
91 inode = tree->mapping->host;
92 isize = i_size_read(inode);
87 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 93 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
88 printk_ratelimited(KERN_DEBUG 94 printk_ratelimited(KERN_DEBUG
89 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", 95 "BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
90 caller, btrfs_ino(inode), isize, start, end); 96 caller, btrfs_ino(inode), isize, start, end);
91 } 97 }
92} 98}
@@ -124,6 +130,8 @@ static noinline void flush_write_bio(void *data);
124static inline struct btrfs_fs_info * 130static inline struct btrfs_fs_info *
125tree_fs_info(struct extent_io_tree *tree) 131tree_fs_info(struct extent_io_tree *tree)
126{ 132{
133 if (!tree->mapping)
134 return NULL;
127 return btrfs_sb(tree->mapping->host->i_sb); 135 return btrfs_sb(tree->mapping->host->i_sb);
128} 136}
129 137
@@ -186,11 +194,9 @@ void extent_io_tree_init(struct extent_io_tree *tree,
186 struct address_space *mapping) 194 struct address_space *mapping)
187{ 195{
188 tree->state = RB_ROOT; 196 tree->state = RB_ROOT;
189 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
190 tree->ops = NULL; 197 tree->ops = NULL;
191 tree->dirty_bytes = 0; 198 tree->dirty_bytes = 0;
192 spin_lock_init(&tree->lock); 199 spin_lock_init(&tree->lock);
193 spin_lock_init(&tree->buffer_lock);
194 tree->mapping = mapping; 200 tree->mapping = mapping;
195} 201}
196 202
@@ -224,12 +230,20 @@ void free_extent_state(struct extent_state *state)
224} 230}
225 231
226static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 232static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
227 struct rb_node *node) 233 struct rb_node *node,
234 struct rb_node ***p_in,
235 struct rb_node **parent_in)
228{ 236{
229 struct rb_node **p = &root->rb_node; 237 struct rb_node **p = &root->rb_node;
230 struct rb_node *parent = NULL; 238 struct rb_node *parent = NULL;
231 struct tree_entry *entry; 239 struct tree_entry *entry;
232 240
241 if (p_in && parent_in) {
242 p = *p_in;
243 parent = *parent_in;
244 goto do_insert;
245 }
246
233 while (*p) { 247 while (*p) {
234 parent = *p; 248 parent = *p;
235 entry = rb_entry(parent, struct tree_entry, rb_node); 249 entry = rb_entry(parent, struct tree_entry, rb_node);
@@ -242,35 +256,43 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
242 return parent; 256 return parent;
243 } 257 }
244 258
259do_insert:
245 rb_link_node(node, parent, p); 260 rb_link_node(node, parent, p);
246 rb_insert_color(node, root); 261 rb_insert_color(node, root);
247 return NULL; 262 return NULL;
248} 263}
249 264
250static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, 265static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
251 struct rb_node **prev_ret, 266 struct rb_node **prev_ret,
252 struct rb_node **next_ret) 267 struct rb_node **next_ret,
268 struct rb_node ***p_ret,
269 struct rb_node **parent_ret)
253{ 270{
254 struct rb_root *root = &tree->state; 271 struct rb_root *root = &tree->state;
255 struct rb_node *n = root->rb_node; 272 struct rb_node **n = &root->rb_node;
256 struct rb_node *prev = NULL; 273 struct rb_node *prev = NULL;
257 struct rb_node *orig_prev = NULL; 274 struct rb_node *orig_prev = NULL;
258 struct tree_entry *entry; 275 struct tree_entry *entry;
259 struct tree_entry *prev_entry = NULL; 276 struct tree_entry *prev_entry = NULL;
260 277
261 while (n) { 278 while (*n) {
262 entry = rb_entry(n, struct tree_entry, rb_node); 279 prev = *n;
263 prev = n; 280 entry = rb_entry(prev, struct tree_entry, rb_node);
264 prev_entry = entry; 281 prev_entry = entry;
265 282
266 if (offset < entry->start) 283 if (offset < entry->start)
267 n = n->rb_left; 284 n = &(*n)->rb_left;
268 else if (offset > entry->end) 285 else if (offset > entry->end)
269 n = n->rb_right; 286 n = &(*n)->rb_right;
270 else 287 else
271 return n; 288 return *n;
272 } 289 }
273 290
291 if (p_ret)
292 *p_ret = n;
293 if (parent_ret)
294 *parent_ret = prev;
295
274 if (prev_ret) { 296 if (prev_ret) {
275 orig_prev = prev; 297 orig_prev = prev;
276 while (prev && offset > prev_entry->end) { 298 while (prev && offset > prev_entry->end) {
@@ -292,18 +314,27 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
292 return NULL; 314 return NULL;
293} 315}
294 316
295static inline struct rb_node *tree_search(struct extent_io_tree *tree, 317static inline struct rb_node *
296 u64 offset) 318tree_search_for_insert(struct extent_io_tree *tree,
319 u64 offset,
320 struct rb_node ***p_ret,
321 struct rb_node **parent_ret)
297{ 322{
298 struct rb_node *prev = NULL; 323 struct rb_node *prev = NULL;
299 struct rb_node *ret; 324 struct rb_node *ret;
300 325
301 ret = __etree_search(tree, offset, &prev, NULL); 326 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
302 if (!ret) 327 if (!ret)
303 return prev; 328 return prev;
304 return ret; 329 return ret;
305} 330}
306 331
332static inline struct rb_node *tree_search(struct extent_io_tree *tree,
333 u64 offset)
334{
335 return tree_search_for_insert(tree, offset, NULL, NULL);
336}
337
307static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, 338static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
308 struct extent_state *other) 339 struct extent_state *other)
309{ 340{
@@ -385,23 +416,25 @@ static void set_state_bits(struct extent_io_tree *tree,
385 */ 416 */
386static int insert_state(struct extent_io_tree *tree, 417static int insert_state(struct extent_io_tree *tree,
387 struct extent_state *state, u64 start, u64 end, 418 struct extent_state *state, u64 start, u64 end,
419 struct rb_node ***p,
420 struct rb_node **parent,
388 unsigned long *bits) 421 unsigned long *bits)
389{ 422{
390 struct rb_node *node; 423 struct rb_node *node;
391 424
392 if (end < start) 425 if (end < start)
393 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", 426 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
394 end, start); 427 end, start);
395 state->start = start; 428 state->start = start;
396 state->end = end; 429 state->end = end;
397 430
398 set_state_bits(tree, state, bits); 431 set_state_bits(tree, state, bits);
399 432
400 node = tree_insert(&tree->state, end, &state->rb_node); 433 node = tree_insert(&tree->state, end, &state->rb_node, p, parent);
401 if (node) { 434 if (node) {
402 struct extent_state *found; 435 struct extent_state *found;
403 found = rb_entry(node, struct extent_state, rb_node); 436 found = rb_entry(node, struct extent_state, rb_node);
404 printk(KERN_ERR "btrfs found node %llu %llu on insert of " 437 printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
405 "%llu %llu\n", 438 "%llu %llu\n",
406 found->start, found->end, start, end); 439 found->start, found->end, start, end);
407 return -EEXIST; 440 return -EEXIST;
@@ -444,7 +477,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
444 prealloc->state = orig->state; 477 prealloc->state = orig->state;
445 orig->start = split; 478 orig->start = split;
446 479
447 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 480 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node,
481 NULL, NULL);
448 if (node) { 482 if (node) {
449 free_extent_state(prealloc); 483 free_extent_state(prealloc);
450 return -EEXIST; 484 return -EEXIST;
@@ -542,7 +576,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
542 int err; 576 int err;
543 int clear = 0; 577 int clear = 0;
544 578
545 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); 579 btrfs_debug_check_extent_io_range(tree, start, end);
546 580
547 if (bits & EXTENT_DELALLOC) 581 if (bits & EXTENT_DELALLOC)
548 bits |= EXTENT_NORESERVE; 582 bits |= EXTENT_NORESERVE;
@@ -702,7 +736,7 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
702 struct extent_state *state; 736 struct extent_state *state;
703 struct rb_node *node; 737 struct rb_node *node;
704 738
705 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); 739 btrfs_debug_check_extent_io_range(tree, start, end);
706 740
707 spin_lock(&tree->lock); 741 spin_lock(&tree->lock);
708again: 742again:
@@ -783,11 +817,13 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
783 struct extent_state *state; 817 struct extent_state *state;
784 struct extent_state *prealloc = NULL; 818 struct extent_state *prealloc = NULL;
785 struct rb_node *node; 819 struct rb_node *node;
820 struct rb_node **p;
821 struct rb_node *parent;
786 int err = 0; 822 int err = 0;
787 u64 last_start; 823 u64 last_start;
788 u64 last_end; 824 u64 last_end;
789 825
790 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); 826 btrfs_debug_check_extent_io_range(tree, start, end);
791 827
792 bits |= EXTENT_FIRST_DELALLOC; 828 bits |= EXTENT_FIRST_DELALLOC;
793again: 829again:
@@ -809,14 +845,16 @@ again:
809 * this search will find all the extents that end after 845 * this search will find all the extents that end after
810 * our range starts. 846 * our range starts.
811 */ 847 */
812 node = tree_search(tree, start); 848 node = tree_search_for_insert(tree, start, &p, &parent);
813 if (!node) { 849 if (!node) {
814 prealloc = alloc_extent_state_atomic(prealloc); 850 prealloc = alloc_extent_state_atomic(prealloc);
815 BUG_ON(!prealloc); 851 BUG_ON(!prealloc);
816 err = insert_state(tree, prealloc, start, end, &bits); 852 err = insert_state(tree, prealloc, start, end,
853 &p, &parent, &bits);
817 if (err) 854 if (err)
818 extent_io_tree_panic(tree, err); 855 extent_io_tree_panic(tree, err);
819 856
857 cache_state(prealloc, cached_state);
820 prealloc = NULL; 858 prealloc = NULL;
821 goto out; 859 goto out;
822 } 860 }
@@ -919,7 +957,7 @@ hit_next:
919 * the later extent. 957 * the later extent.
920 */ 958 */
921 err = insert_state(tree, prealloc, start, this_end, 959 err = insert_state(tree, prealloc, start, this_end,
922 &bits); 960 NULL, NULL, &bits);
923 if (err) 961 if (err)
924 extent_io_tree_panic(tree, err); 962 extent_io_tree_panic(tree, err);
925 963
@@ -1005,11 +1043,13 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1005 struct extent_state *state; 1043 struct extent_state *state;
1006 struct extent_state *prealloc = NULL; 1044 struct extent_state *prealloc = NULL;
1007 struct rb_node *node; 1045 struct rb_node *node;
1046 struct rb_node **p;
1047 struct rb_node *parent;
1008 int err = 0; 1048 int err = 0;
1009 u64 last_start; 1049 u64 last_start;
1010 u64 last_end; 1050 u64 last_end;
1011 1051
1012 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); 1052 btrfs_debug_check_extent_io_range(tree, start, end);
1013 1053
1014again: 1054again:
1015 if (!prealloc && (mask & __GFP_WAIT)) { 1055 if (!prealloc && (mask & __GFP_WAIT)) {
@@ -1032,17 +1072,19 @@ again:
1032 * this search will find all the extents that end after 1072 * this search will find all the extents that end after
1033 * our range starts. 1073 * our range starts.
1034 */ 1074 */
1035 node = tree_search(tree, start); 1075 node = tree_search_for_insert(tree, start, &p, &parent);
1036 if (!node) { 1076 if (!node) {
1037 prealloc = alloc_extent_state_atomic(prealloc); 1077 prealloc = alloc_extent_state_atomic(prealloc);
1038 if (!prealloc) { 1078 if (!prealloc) {
1039 err = -ENOMEM; 1079 err = -ENOMEM;
1040 goto out; 1080 goto out;
1041 } 1081 }
1042 err = insert_state(tree, prealloc, start, end, &bits); 1082 err = insert_state(tree, prealloc, start, end,
1043 prealloc = NULL; 1083 &p, &parent, &bits);
1044 if (err) 1084 if (err)
1045 extent_io_tree_panic(tree, err); 1085 extent_io_tree_panic(tree, err);
1086 cache_state(prealloc, cached_state);
1087 prealloc = NULL;
1046 goto out; 1088 goto out;
1047 } 1089 }
1048 state = rb_entry(node, struct extent_state, rb_node); 1090 state = rb_entry(node, struct extent_state, rb_node);
@@ -1135,7 +1177,7 @@ hit_next:
1135 * the later extent. 1177 * the later extent.
1136 */ 1178 */
1137 err = insert_state(tree, prealloc, start, this_end, 1179 err = insert_state(tree, prealloc, start, this_end,
1138 &bits); 1180 NULL, NULL, &bits);
1139 if (err) 1181 if (err)
1140 extent_io_tree_panic(tree, err); 1182 extent_io_tree_panic(tree, err);
1141 cache_state(prealloc, cached_state); 1183 cache_state(prealloc, cached_state);
@@ -2012,9 +2054,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
2012 return -EIO; 2054 return -EIO;
2013 } 2055 }
2014 2056
2015 printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " 2057 printk_ratelimited_in_rcu(KERN_INFO
2016 "(dev %s sector %llu)\n", page->mapping->host->i_ino, 2058 "BTRFS: read error corrected: ino %lu off %llu "
2017 start, rcu_str_deref(dev->name), sector); 2059 "(dev %s sector %llu)\n", page->mapping->host->i_ino,
2060 start, rcu_str_deref(dev->name), sector);
2018 2061
2019 bio_put(bio); 2062 bio_put(bio);
2020 return 0; 2063 return 0;
@@ -2156,7 +2199,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2156 return -EIO; 2199 return -EIO;
2157 } 2200 }
2158 2201
2159 if (em->start > start || em->start + em->len < start) { 2202 if (em->start > start || em->start + em->len <= start) {
2160 free_extent_map(em); 2203 free_extent_map(em);
2161 em = NULL; 2204 em = NULL;
2162 } 2205 }
@@ -2333,25 +2376,29 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2333static void end_bio_extent_writepage(struct bio *bio, int err) 2376static void end_bio_extent_writepage(struct bio *bio, int err)
2334{ 2377{
2335 struct bio_vec *bvec; 2378 struct bio_vec *bvec;
2336 struct extent_io_tree *tree;
2337 u64 start; 2379 u64 start;
2338 u64 end; 2380 u64 end;
2339 int i; 2381 int i;
2340 2382
2341 bio_for_each_segment_all(bvec, bio, i) { 2383 bio_for_each_segment_all(bvec, bio, i) {
2342 struct page *page = bvec->bv_page; 2384 struct page *page = bvec->bv_page;
2343 tree = &BTRFS_I(page->mapping->host)->io_tree;
2344 2385
2345 /* We always issue full-page reads, but if some block 2386 /* We always issue full-page reads, but if some block
2346 * in a page fails to read, blk_update_request() will 2387 * in a page fails to read, blk_update_request() will
2347 * advance bv_offset and adjust bv_len to compensate. 2388 * advance bv_offset and adjust bv_len to compensate.
2348 * Print a warning for nonzero offsets, and an error 2389 * Print a warning for nonzero offsets, and an error
2349 * if they don't add up to a full page. */ 2390 * if they don't add up to a full page. */
2350 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) 2391 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2351 printk("%s page write in btrfs with offset %u and length %u\n", 2392 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2352 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE 2393 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2353 ? KERN_ERR "partial" : KERN_INFO "incomplete", 2394 "partial page write in btrfs with offset %u and length %u",
2354 bvec->bv_offset, bvec->bv_len); 2395 bvec->bv_offset, bvec->bv_len);
2396 else
2397 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2398 "incomplete page write in btrfs with offset %u and "
2399 "length %u",
2400 bvec->bv_offset, bvec->bv_len);
2401 }
2355 2402
2356 start = page_offset(page); 2403 start = page_offset(page);
2357 end = start + bvec->bv_offset + bvec->bv_len - 1; 2404 end = start + bvec->bv_offset + bvec->bv_len - 1;
@@ -2421,11 +2468,17 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2421 * advance bv_offset and adjust bv_len to compensate. 2468 * advance bv_offset and adjust bv_len to compensate.
2422 * Print a warning for nonzero offsets, and an error 2469 * Print a warning for nonzero offsets, and an error
2423 * if they don't add up to a full page. */ 2470 * if they don't add up to a full page. */
2424 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) 2471 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2425 printk("%s page read in btrfs with offset %u and length %u\n", 2472 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2426 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE 2473 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2427 ? KERN_ERR "partial" : KERN_INFO "incomplete", 2474 "partial page read in btrfs with offset %u and length %u",
2428 bvec->bv_offset, bvec->bv_len); 2475 bvec->bv_offset, bvec->bv_len);
2476 else
2477 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2478 "incomplete page read in btrfs with offset %u and "
2479 "length %u",
2480 bvec->bv_offset, bvec->bv_len);
2481 }
2429 2482
2430 start = page_offset(page); 2483 start = page_offset(page);
2431 end = start + bvec->bv_offset + bvec->bv_len - 1; 2484 end = start + bvec->bv_offset + bvec->bv_len - 1;
@@ -3281,8 +3334,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3281 3334
3282 set_range_writeback(tree, cur, cur + iosize - 1); 3335 set_range_writeback(tree, cur, cur + iosize - 1);
3283 if (!PageWriteback(page)) { 3336 if (!PageWriteback(page)) {
3284 printk(KERN_ERR "btrfs warning page %lu not " 3337 btrfs_err(BTRFS_I(inode)->root->fs_info,
3285 "writeback, cur %llu end %llu\n", 3338 "page %lu not writeback, cur %llu end %llu",
3286 page->index, cur, end); 3339 page->index, cur, end);
3287 } 3340 }
3288 3341
@@ -3438,6 +3491,7 @@ static int write_one_eb(struct extent_buffer *eb,
3438 struct extent_page_data *epd) 3491 struct extent_page_data *epd)
3439{ 3492{
3440 struct block_device *bdev = fs_info->fs_devices->latest_bdev; 3493 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3494 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3441 u64 offset = eb->start; 3495 u64 offset = eb->start;
3442 unsigned long i, num_pages; 3496 unsigned long i, num_pages;
3443 unsigned long bio_flags = 0; 3497 unsigned long bio_flags = 0;
@@ -3455,7 +3509,7 @@ static int write_one_eb(struct extent_buffer *eb,
3455 3509
3456 clear_page_dirty_for_io(p); 3510 clear_page_dirty_for_io(p);
3457 set_page_writeback(p); 3511 set_page_writeback(p);
3458 ret = submit_extent_page(rw, eb->tree, p, offset >> 9, 3512 ret = submit_extent_page(rw, tree, p, offset >> 9,
3459 PAGE_CACHE_SIZE, 0, bdev, &epd->bio, 3513 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3460 -1, end_bio_extent_buffer_writepage, 3514 -1, end_bio_extent_buffer_writepage,
3461 0, epd->bio_flags, bio_flags); 3515 0, epd->bio_flags, bio_flags);
@@ -4073,12 +4127,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4073 struct extent_map *em = NULL; 4127 struct extent_map *em = NULL;
4074 struct extent_state *cached_state = NULL; 4128 struct extent_state *cached_state = NULL;
4075 struct btrfs_path *path; 4129 struct btrfs_path *path;
4076 struct btrfs_file_extent_item *item;
4077 int end = 0; 4130 int end = 0;
4078 u64 em_start = 0; 4131 u64 em_start = 0;
4079 u64 em_len = 0; 4132 u64 em_len = 0;
4080 u64 em_end = 0; 4133 u64 em_end = 0;
4081 unsigned long emflags;
4082 4134
4083 if (len == 0) 4135 if (len == 0)
4084 return -EINVAL; 4136 return -EINVAL;
@@ -4103,8 +4155,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4103 } 4155 }
4104 WARN_ON(!ret); 4156 WARN_ON(!ret);
4105 path->slots[0]--; 4157 path->slots[0]--;
4106 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
4107 struct btrfs_file_extent_item);
4108 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 4158 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4109 found_type = btrfs_key_type(&found_key); 4159 found_type = btrfs_key_type(&found_key);
4110 4160
@@ -4172,7 +4222,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4172 offset_in_extent = em_start - em->start; 4222 offset_in_extent = em_start - em->start;
4173 em_end = extent_map_end(em); 4223 em_end = extent_map_end(em);
4174 em_len = em_end - em_start; 4224 em_len = em_end - em_start;
4175 emflags = em->flags;
4176 disko = 0; 4225 disko = 0;
4177 flags = 0; 4226 flags = 0;
4178 4227
@@ -4324,10 +4373,9 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4324 __free_extent_buffer(eb); 4373 __free_extent_buffer(eb);
4325} 4374}
4326 4375
4327static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 4376static struct extent_buffer *
4328 u64 start, 4377__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4329 unsigned long len, 4378 unsigned long len, gfp_t mask)
4330 gfp_t mask)
4331{ 4379{
4332 struct extent_buffer *eb = NULL; 4380 struct extent_buffer *eb = NULL;
4333 4381
@@ -4336,7 +4384,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
4336 return NULL; 4384 return NULL;
4337 eb->start = start; 4385 eb->start = start;
4338 eb->len = len; 4386 eb->len = len;
4339 eb->tree = tree; 4387 eb->fs_info = fs_info;
4340 eb->bflags = 0; 4388 eb->bflags = 0;
4341 rwlock_init(&eb->lock); 4389 rwlock_init(&eb->lock);
4342 atomic_set(&eb->write_locks, 0); 4390 atomic_set(&eb->write_locks, 0);
@@ -4468,13 +4516,14 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
4468 } 4516 }
4469} 4517}
4470 4518
4471struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 4519struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4472 u64 start) 4520 u64 start)
4473{ 4521{
4474 struct extent_buffer *eb; 4522 struct extent_buffer *eb;
4475 4523
4476 rcu_read_lock(); 4524 rcu_read_lock();
4477 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4525 eb = radix_tree_lookup(&fs_info->buffer_radix,
4526 start >> PAGE_CACHE_SHIFT);
4478 if (eb && atomic_inc_not_zero(&eb->refs)) { 4527 if (eb && atomic_inc_not_zero(&eb->refs)) {
4479 rcu_read_unlock(); 4528 rcu_read_unlock();
4480 mark_extent_buffer_accessed(eb); 4529 mark_extent_buffer_accessed(eb);
@@ -4485,7 +4534,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
4485 return NULL; 4534 return NULL;
4486} 4535}
4487 4536
4488struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 4537struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4489 u64 start, unsigned long len) 4538 u64 start, unsigned long len)
4490{ 4539{
4491 unsigned long num_pages = num_extent_pages(start, len); 4540 unsigned long num_pages = num_extent_pages(start, len);
@@ -4494,16 +4543,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
4494 struct extent_buffer *eb; 4543 struct extent_buffer *eb;
4495 struct extent_buffer *exists = NULL; 4544 struct extent_buffer *exists = NULL;
4496 struct page *p; 4545 struct page *p;
4497 struct address_space *mapping = tree->mapping; 4546 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4498 int uptodate = 1; 4547 int uptodate = 1;
4499 int ret; 4548 int ret;
4500 4549
4501 4550 eb = find_extent_buffer(fs_info, start);
4502 eb = find_extent_buffer(tree, start);
4503 if (eb) 4551 if (eb)
4504 return eb; 4552 return eb;
4505 4553
4506 eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS); 4554 eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS);
4507 if (!eb) 4555 if (!eb)
4508 return NULL; 4556 return NULL;
4509 4557
@@ -4558,12 +4606,13 @@ again:
4558 if (ret) 4606 if (ret)
4559 goto free_eb; 4607 goto free_eb;
4560 4608
4561 spin_lock(&tree->buffer_lock); 4609 spin_lock(&fs_info->buffer_lock);
4562 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); 4610 ret = radix_tree_insert(&fs_info->buffer_radix,
4563 spin_unlock(&tree->buffer_lock); 4611 start >> PAGE_CACHE_SHIFT, eb);
4612 spin_unlock(&fs_info->buffer_lock);
4564 radix_tree_preload_end(); 4613 radix_tree_preload_end();
4565 if (ret == -EEXIST) { 4614 if (ret == -EEXIST) {
4566 exists = find_extent_buffer(tree, start); 4615 exists = find_extent_buffer(fs_info, start);
4567 if (exists) 4616 if (exists)
4568 goto free_eb; 4617 goto free_eb;
4569 else 4618 else
@@ -4571,6 +4620,7 @@ again:
4571 } 4620 }
4572 /* add one reference for the tree */ 4621 /* add one reference for the tree */
4573 check_buffer_tree_ref(eb); 4622 check_buffer_tree_ref(eb);
4623 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4574 4624
4575 /* 4625 /*
4576 * there is a race where release page may have 4626 * there is a race where release page may have
@@ -4614,17 +4664,17 @@ static int release_extent_buffer(struct extent_buffer *eb)
4614{ 4664{
4615 WARN_ON(atomic_read(&eb->refs) == 0); 4665 WARN_ON(atomic_read(&eb->refs) == 0);
4616 if (atomic_dec_and_test(&eb->refs)) { 4666 if (atomic_dec_and_test(&eb->refs)) {
4617 if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) { 4667 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
4618 spin_unlock(&eb->refs_lock); 4668 struct btrfs_fs_info *fs_info = eb->fs_info;
4619 } else {
4620 struct extent_io_tree *tree = eb->tree;
4621 4669
4622 spin_unlock(&eb->refs_lock); 4670 spin_unlock(&eb->refs_lock);
4623 4671
4624 spin_lock(&tree->buffer_lock); 4672 spin_lock(&fs_info->buffer_lock);
4625 radix_tree_delete(&tree->buffer, 4673 radix_tree_delete(&fs_info->buffer_radix,
4626 eb->start >> PAGE_CACHE_SHIFT); 4674 eb->start >> PAGE_CACHE_SHIFT);
4627 spin_unlock(&tree->buffer_lock); 4675 spin_unlock(&fs_info->buffer_lock);
4676 } else {
4677 spin_unlock(&eb->refs_lock);
4628 } 4678 }
4629 4679
4630 /* Should be safe to release our pages at this point */ 4680 /* Should be safe to release our pages at this point */
@@ -5103,12 +5153,12 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5103 unsigned long src_i; 5153 unsigned long src_i;
5104 5154
5105 if (src_offset + len > dst->len) { 5155 if (src_offset + len > dst->len) {
5106 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 5156 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
5107 "len %lu dst len %lu\n", src_offset, len, dst->len); 5157 "len %lu dst len %lu\n", src_offset, len, dst->len);
5108 BUG_ON(1); 5158 BUG_ON(1);
5109 } 5159 }
5110 if (dst_offset + len > dst->len) { 5160 if (dst_offset + len > dst->len) {
5111 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 5161 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
5112 "len %lu dst len %lu\n", dst_offset, len, dst->len); 5162 "len %lu dst len %lu\n", dst_offset, len, dst->len);
5113 BUG_ON(1); 5163 BUG_ON(1);
5114 } 5164 }
@@ -5150,12 +5200,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5150 unsigned long src_i; 5200 unsigned long src_i;
5151 5201
5152 if (src_offset + len > dst->len) { 5202 if (src_offset + len > dst->len) {
5153 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 5203 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
5154 "len %lu len %lu\n", src_offset, len, dst->len); 5204 "len %lu len %lu\n", src_offset, len, dst->len);
5155 BUG_ON(1); 5205 BUG_ON(1);
5156 } 5206 }
5157 if (dst_offset + len > dst->len) { 5207 if (dst_offset + len > dst->len) {
5158 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 5208 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
5159 "len %lu len %lu\n", dst_offset, len, dst->len); 5209 "len %lu len %lu\n", dst_offset, len, dst->len);
5160 BUG_ON(1); 5210 BUG_ON(1);
5161 } 5211 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 19620c58f096..58b27e5ab521 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -43,6 +43,7 @@
43#define EXTENT_BUFFER_WRITEBACK 7 43#define EXTENT_BUFFER_WRITEBACK 7
44#define EXTENT_BUFFER_IOERR 8 44#define EXTENT_BUFFER_IOERR 8
45#define EXTENT_BUFFER_DUMMY 9 45#define EXTENT_BUFFER_DUMMY 9
46#define EXTENT_BUFFER_IN_TREE 10
46 47
47/* these are flags for extent_clear_unlock_delalloc */ 48/* these are flags for extent_clear_unlock_delalloc */
48#define PAGE_UNLOCK (1 << 0) 49#define PAGE_UNLOCK (1 << 0)
@@ -94,12 +95,10 @@ struct extent_io_ops {
94 95
95struct extent_io_tree { 96struct extent_io_tree {
96 struct rb_root state; 97 struct rb_root state;
97 struct radix_tree_root buffer;
98 struct address_space *mapping; 98 struct address_space *mapping;
99 u64 dirty_bytes; 99 u64 dirty_bytes;
100 int track_uptodate; 100 int track_uptodate;
101 spinlock_t lock; 101 spinlock_t lock;
102 spinlock_t buffer_lock;
103 struct extent_io_ops *ops; 102 struct extent_io_ops *ops;
104}; 103};
105 104
@@ -130,7 +129,7 @@ struct extent_buffer {
130 unsigned long map_start; 129 unsigned long map_start;
131 unsigned long map_len; 130 unsigned long map_len;
132 unsigned long bflags; 131 unsigned long bflags;
133 struct extent_io_tree *tree; 132 struct btrfs_fs_info *fs_info;
134 spinlock_t refs_lock; 133 spinlock_t refs_lock;
135 atomic_t refs; 134 atomic_t refs;
136 atomic_t io_pages; 135 atomic_t io_pages;
@@ -266,11 +265,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
266int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 265int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
267void set_page_extent_mapped(struct page *page); 266void set_page_extent_mapped(struct page *page);
268 267
269struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 268struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
270 u64 start, unsigned long len); 269 u64 start, unsigned long len);
271struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); 270struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
272struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); 271struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
273struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 272struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
274 u64 start); 273 u64 start);
275void free_extent_buffer(struct extent_buffer *eb); 274void free_extent_buffer(struct extent_buffer *eb);
276void free_extent_buffer_stale(struct extent_buffer *eb); 275void free_extent_buffer_stale(struct extent_buffer *eb);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a4a7a1a8da95..996ad56b57db 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -79,12 +79,21 @@ void free_extent_map(struct extent_map *em)
79 } 79 }
80} 80}
81 81
82static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 82/* simple helper to do math around the end of an extent, handling wrap */
83 struct rb_node *node) 83static u64 range_end(u64 start, u64 len)
84{
85 if (start + len < start)
86 return (u64)-1;
87 return start + len;
88}
89
90static int tree_insert(struct rb_root *root, struct extent_map *em)
84{ 91{
85 struct rb_node **p = &root->rb_node; 92 struct rb_node **p = &root->rb_node;
86 struct rb_node *parent = NULL; 93 struct rb_node *parent = NULL;
87 struct extent_map *entry; 94 struct extent_map *entry = NULL;
95 struct rb_node *orig_parent = NULL;
96 u64 end = range_end(em->start, em->len);
88 97
89 while (*p) { 98 while (*p) {
90 parent = *p; 99 parent = *p;
@@ -92,19 +101,37 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
92 101
93 WARN_ON(!entry->in_tree); 102 WARN_ON(!entry->in_tree);
94 103
95 if (offset < entry->start) 104 if (em->start < entry->start)
96 p = &(*p)->rb_left; 105 p = &(*p)->rb_left;
97 else if (offset >= extent_map_end(entry)) 106 else if (em->start >= extent_map_end(entry))
98 p = &(*p)->rb_right; 107 p = &(*p)->rb_right;
99 else 108 else
100 return parent; 109 return -EEXIST;
101 } 110 }
102 111
103 entry = rb_entry(node, struct extent_map, rb_node); 112 orig_parent = parent;
104 entry->in_tree = 1; 113 while (parent && em->start >= extent_map_end(entry)) {
105 rb_link_node(node, parent, p); 114 parent = rb_next(parent);
106 rb_insert_color(node, root); 115 entry = rb_entry(parent, struct extent_map, rb_node);
107 return NULL; 116 }
117 if (parent)
118 if (end > entry->start && em->start < extent_map_end(entry))
119 return -EEXIST;
120
121 parent = orig_parent;
122 entry = rb_entry(parent, struct extent_map, rb_node);
123 while (parent && em->start < entry->start) {
124 parent = rb_prev(parent);
125 entry = rb_entry(parent, struct extent_map, rb_node);
126 }
127 if (parent)
128 if (end > entry->start && em->start < extent_map_end(entry))
129 return -EEXIST;
130
131 em->in_tree = 1;
132 rb_link_node(&em->rb_node, orig_parent, p);
133 rb_insert_color(&em->rb_node, root);
134 return 0;
108} 135}
109 136
110/* 137/*
@@ -228,7 +255,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
228 merge = rb_entry(rb, struct extent_map, rb_node); 255 merge = rb_entry(rb, struct extent_map, rb_node);
229 if (rb && mergable_maps(em, merge)) { 256 if (rb && mergable_maps(em, merge)) {
230 em->len += merge->len; 257 em->len += merge->len;
231 em->block_len += merge->len; 258 em->block_len += merge->block_len;
232 rb_erase(&merge->rb_node, &tree->map); 259 rb_erase(&merge->rb_node, &tree->map);
233 merge->in_tree = 0; 260 merge->in_tree = 0;
234 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; 261 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
@@ -310,20 +337,11 @@ int add_extent_mapping(struct extent_map_tree *tree,
310 struct extent_map *em, int modified) 337 struct extent_map *em, int modified)
311{ 338{
312 int ret = 0; 339 int ret = 0;
313 struct rb_node *rb;
314 struct extent_map *exist;
315 340
316 exist = lookup_extent_mapping(tree, em->start, em->len); 341 ret = tree_insert(&tree->map, em);
317 if (exist) { 342 if (ret)
318 free_extent_map(exist);
319 ret = -EEXIST;
320 goto out;
321 }
322 rb = tree_insert(&tree->map, em->start, &em->rb_node);
323 if (rb) {
324 ret = -EEXIST;
325 goto out; 343 goto out;
326 } 344
327 atomic_inc(&em->refs); 345 atomic_inc(&em->refs);
328 346
329 em->mod_start = em->start; 347 em->mod_start = em->start;
@@ -337,14 +355,6 @@ out:
337 return ret; 355 return ret;
338} 356}
339 357
340/* simple helper to do math around the end of an extent, handling wrap */
341static u64 range_end(u64 start, u64 len)
342{
343 if (start + len < start)
344 return (u64)-1;
345 return start + len;
346}
347
348static struct extent_map * 358static struct extent_map *
349__lookup_extent_mapping(struct extent_map_tree *tree, 359__lookup_extent_mapping(struct extent_map_tree *tree,
350 u64 start, u64 len, int strict) 360 u64 start, u64 len, int strict)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 84a46a42d262..127555b29f58 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -246,8 +246,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
246 offset + bvec->bv_len - 1, 246 offset + bvec->bv_len - 1,
247 EXTENT_NODATASUM, GFP_NOFS); 247 EXTENT_NODATASUM, GFP_NOFS);
248 } else { 248 } else {
249 printk(KERN_INFO "btrfs no csum found " 249 btrfs_info(BTRFS_I(inode)->root->fs_info,
250 "for inode %llu start %llu\n", 250 "no csum found for inode %llu start %llu",
251 btrfs_ino(inode), offset); 251 btrfs_ino(inode), offset);
252 } 252 }
253 item = NULL; 253 item = NULL;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 82d0342763c5..0165b8672f09 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -692,7 +692,10 @@ next:
692int __btrfs_drop_extents(struct btrfs_trans_handle *trans, 692int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
693 struct btrfs_root *root, struct inode *inode, 693 struct btrfs_root *root, struct inode *inode,
694 struct btrfs_path *path, u64 start, u64 end, 694 struct btrfs_path *path, u64 start, u64 end,
695 u64 *drop_end, int drop_cache) 695 u64 *drop_end, int drop_cache,
696 int replace_extent,
697 u32 extent_item_size,
698 int *key_inserted)
696{ 699{
697 struct extent_buffer *leaf; 700 struct extent_buffer *leaf;
698 struct btrfs_file_extent_item *fi; 701 struct btrfs_file_extent_item *fi;
@@ -712,6 +715,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
712 int modify_tree = -1; 715 int modify_tree = -1;
713 int update_refs = (root->ref_cows || root == root->fs_info->tree_root); 716 int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
714 int found = 0; 717 int found = 0;
718 int leafs_visited = 0;
715 719
716 if (drop_cache) 720 if (drop_cache)
717 btrfs_drop_extent_cache(inode, start, end - 1, 0); 721 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -733,6 +737,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
733 path->slots[0]--; 737 path->slots[0]--;
734 } 738 }
735 ret = 0; 739 ret = 0;
740 leafs_visited++;
736next_slot: 741next_slot:
737 leaf = path->nodes[0]; 742 leaf = path->nodes[0];
738 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 743 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -744,6 +749,7 @@ next_slot:
744 ret = 0; 749 ret = 0;
745 break; 750 break;
746 } 751 }
752 leafs_visited++;
747 leaf = path->nodes[0]; 753 leaf = path->nodes[0];
748 recow = 1; 754 recow = 1;
749 } 755 }
@@ -766,7 +772,8 @@ next_slot:
766 btrfs_file_extent_num_bytes(leaf, fi); 772 btrfs_file_extent_num_bytes(leaf, fi);
767 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 773 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
768 extent_end = key.offset + 774 extent_end = key.offset +
769 btrfs_file_extent_inline_len(leaf, fi); 775 btrfs_file_extent_inline_len(leaf,
776 path->slots[0], fi);
770 } else { 777 } else {
771 WARN_ON(1); 778 WARN_ON(1);
772 extent_end = search_start; 779 extent_end = search_start;
@@ -927,14 +934,44 @@ next_slot:
927 } 934 }
928 935
929 if (!ret && del_nr > 0) { 936 if (!ret && del_nr > 0) {
937 /*
938 * Set path->slots[0] to first slot, so that after the delete
939 * if items are move off from our leaf to its immediate left or
940 * right neighbor leafs, we end up with a correct and adjusted
941 * path->slots[0] for our insertion.
942 */
943 path->slots[0] = del_slot;
930 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 944 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
931 if (ret) 945 if (ret)
932 btrfs_abort_transaction(trans, root, ret); 946 btrfs_abort_transaction(trans, root, ret);
947
948 leaf = path->nodes[0];
949 /*
950 * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that
951 * is, its contents got pushed to its neighbors), in which case
952 * it means path->locks[0] == 0
953 */
954 if (!ret && replace_extent && leafs_visited == 1 &&
955 path->locks[0] &&
956 btrfs_leaf_free_space(root, leaf) >=
957 sizeof(struct btrfs_item) + extent_item_size) {
958
959 key.objectid = ino;
960 key.type = BTRFS_EXTENT_DATA_KEY;
961 key.offset = start;
962 setup_items_for_insert(root, path, &key,
963 &extent_item_size,
964 extent_item_size,
965 sizeof(struct btrfs_item) +
966 extent_item_size, 1);
967 *key_inserted = 1;
968 }
933 } 969 }
934 970
971 if (!replace_extent || !(*key_inserted))
972 btrfs_release_path(path);
935 if (drop_end) 973 if (drop_end)
936 *drop_end = found ? min(end, extent_end) : end; 974 *drop_end = found ? min(end, extent_end) : end;
937 btrfs_release_path(path);
938 return ret; 975 return ret;
939} 976}
940 977
@@ -949,7 +986,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
949 if (!path) 986 if (!path)
950 return -ENOMEM; 987 return -ENOMEM;
951 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, 988 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
952 drop_cache); 989 drop_cache, 0, 0, NULL);
953 btrfs_free_path(path); 990 btrfs_free_path(path);
954 return ret; 991 return ret;
955} 992}
@@ -1235,29 +1272,18 @@ static int prepare_uptodate_page(struct page *page, u64 pos,
1235} 1272}
1236 1273
1237/* 1274/*
1238 * this gets pages into the page cache and locks them down, it also properly 1275 * this just gets pages into the page cache and locks them down.
1239 * waits for data=ordered extents to finish before allowing the pages to be
1240 * modified.
1241 */ 1276 */
1242static noinline int prepare_pages(struct btrfs_root *root, struct file *file, 1277static noinline int prepare_pages(struct inode *inode, struct page **pages,
1243 struct page **pages, size_t num_pages, 1278 size_t num_pages, loff_t pos,
1244 loff_t pos, unsigned long first_index, 1279 size_t write_bytes, bool force_uptodate)
1245 size_t write_bytes, bool force_uptodate)
1246{ 1280{
1247 struct extent_state *cached_state = NULL;
1248 int i; 1281 int i;
1249 unsigned long index = pos >> PAGE_CACHE_SHIFT; 1282 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1250 struct inode *inode = file_inode(file);
1251 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1283 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1252 int err = 0; 1284 int err = 0;
1253 int faili = 0; 1285 int faili;
1254 u64 start_pos;
1255 u64 last_pos;
1256
1257 start_pos = pos & ~((u64)root->sectorsize - 1);
1258 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
1259 1286
1260again:
1261 for (i = 0; i < num_pages; i++) { 1287 for (i = 0; i < num_pages; i++) {
1262 pages[i] = find_or_create_page(inode->i_mapping, index + i, 1288 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1263 mask | __GFP_WRITE); 1289 mask | __GFP_WRITE);
@@ -1280,57 +1306,85 @@ again:
1280 } 1306 }
1281 wait_on_page_writeback(pages[i]); 1307 wait_on_page_writeback(pages[i]);
1282 } 1308 }
1283 faili = num_pages - 1; 1309
1284 err = 0; 1310 return 0;
1311fail:
1312 while (faili >= 0) {
1313 unlock_page(pages[faili]);
1314 page_cache_release(pages[faili]);
1315 faili--;
1316 }
1317 return err;
1318
1319}
1320
1321/*
1322 * This function locks the extent and properly waits for data=ordered extents
1323 * to finish before allowing the pages to be modified if need.
1324 *
1325 * The return value:
1326 * 1 - the extent is locked
1327 * 0 - the extent is not locked, and everything is OK
1328 * -EAGAIN - need re-prepare the pages
1329 * the other < 0 number - Something wrong happens
1330 */
1331static noinline int
1332lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
1333 size_t num_pages, loff_t pos,
1334 u64 *lockstart, u64 *lockend,
1335 struct extent_state **cached_state)
1336{
1337 u64 start_pos;
1338 u64 last_pos;
1339 int i;
1340 int ret = 0;
1341
1342 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1343 last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
1344
1285 if (start_pos < inode->i_size) { 1345 if (start_pos < inode->i_size) {
1286 struct btrfs_ordered_extent *ordered; 1346 struct btrfs_ordered_extent *ordered;
1287 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1347 lock_extent_bits(&BTRFS_I(inode)->io_tree,
1288 start_pos, last_pos - 1, 0, &cached_state); 1348 start_pos, last_pos, 0, cached_state);
1289 ordered = btrfs_lookup_first_ordered_extent(inode, 1349 ordered = btrfs_lookup_first_ordered_extent(inode, last_pos);
1290 last_pos - 1);
1291 if (ordered && 1350 if (ordered &&
1292 ordered->file_offset + ordered->len > start_pos && 1351 ordered->file_offset + ordered->len > start_pos &&
1293 ordered->file_offset < last_pos) { 1352 ordered->file_offset <= last_pos) {
1294 btrfs_put_ordered_extent(ordered); 1353 btrfs_put_ordered_extent(ordered);
1295 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1354 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1296 start_pos, last_pos - 1, 1355 start_pos, last_pos,
1297 &cached_state, GFP_NOFS); 1356 cached_state, GFP_NOFS);
1298 for (i = 0; i < num_pages; i++) { 1357 for (i = 0; i < num_pages; i++) {
1299 unlock_page(pages[i]); 1358 unlock_page(pages[i]);
1300 page_cache_release(pages[i]); 1359 page_cache_release(pages[i]);
1301 } 1360 }
1302 err = btrfs_wait_ordered_range(inode, start_pos, 1361 ret = btrfs_wait_ordered_range(inode, start_pos,
1303 last_pos - start_pos); 1362 last_pos - start_pos + 1);
1304 if (err) 1363 if (ret)
1305 goto fail; 1364 return ret;
1306 goto again; 1365 else
1366 return -EAGAIN;
1307 } 1367 }
1308 if (ordered) 1368 if (ordered)
1309 btrfs_put_ordered_extent(ordered); 1369 btrfs_put_ordered_extent(ordered);
1310 1370
1311 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, 1371 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
1312 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1372 last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
1313 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1373 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
1314 0, 0, &cached_state, GFP_NOFS); 1374 0, 0, cached_state, GFP_NOFS);
1315 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1375 *lockstart = start_pos;
1316 start_pos, last_pos - 1, &cached_state, 1376 *lockend = last_pos;
1317 GFP_NOFS); 1377 ret = 1;
1318 } 1378 }
1379
1319 for (i = 0; i < num_pages; i++) { 1380 for (i = 0; i < num_pages; i++) {
1320 if (clear_page_dirty_for_io(pages[i])) 1381 if (clear_page_dirty_for_io(pages[i]))
1321 account_page_redirty(pages[i]); 1382 account_page_redirty(pages[i]);
1322 set_page_extent_mapped(pages[i]); 1383 set_page_extent_mapped(pages[i]);
1323 WARN_ON(!PageLocked(pages[i])); 1384 WARN_ON(!PageLocked(pages[i]));
1324 } 1385 }
1325 return 0;
1326fail:
1327 while (faili >= 0) {
1328 unlock_page(pages[faili]);
1329 page_cache_release(pages[faili]);
1330 faili--;
1331 }
1332 return err;
1333 1386
1387 return ret;
1334} 1388}
1335 1389
1336static noinline int check_can_nocow(struct inode *inode, loff_t pos, 1390static noinline int check_can_nocow(struct inode *inode, loff_t pos,
@@ -1381,13 +1435,17 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1381 struct inode *inode = file_inode(file); 1435 struct inode *inode = file_inode(file);
1382 struct btrfs_root *root = BTRFS_I(inode)->root; 1436 struct btrfs_root *root = BTRFS_I(inode)->root;
1383 struct page **pages = NULL; 1437 struct page **pages = NULL;
1438 struct extent_state *cached_state = NULL;
1384 u64 release_bytes = 0; 1439 u64 release_bytes = 0;
1440 u64 lockstart;
1441 u64 lockend;
1385 unsigned long first_index; 1442 unsigned long first_index;
1386 size_t num_written = 0; 1443 size_t num_written = 0;
1387 int nrptrs; 1444 int nrptrs;
1388 int ret = 0; 1445 int ret = 0;
1389 bool only_release_metadata = false; 1446 bool only_release_metadata = false;
1390 bool force_page_uptodate = false; 1447 bool force_page_uptodate = false;
1448 bool need_unlock;
1391 1449
1392 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1450 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
1393 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 1451 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -1456,18 +1514,31 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1456 } 1514 }
1457 1515
1458 release_bytes = reserve_bytes; 1516 release_bytes = reserve_bytes;
1459 1517 need_unlock = false;
1518again:
1460 /* 1519 /*
1461 * This is going to setup the pages array with the number of 1520 * This is going to setup the pages array with the number of
1462 * pages we want, so we don't really need to worry about the 1521 * pages we want, so we don't really need to worry about the
1463 * contents of pages from loop to loop 1522 * contents of pages from loop to loop
1464 */ 1523 */
1465 ret = prepare_pages(root, file, pages, num_pages, 1524 ret = prepare_pages(inode, pages, num_pages,
1466 pos, first_index, write_bytes, 1525 pos, write_bytes,
1467 force_page_uptodate); 1526 force_page_uptodate);
1468 if (ret) 1527 if (ret)
1469 break; 1528 break;
1470 1529
1530 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
1531 pos, &lockstart, &lockend,
1532 &cached_state);
1533 if (ret < 0) {
1534 if (ret == -EAGAIN)
1535 goto again;
1536 break;
1537 } else if (ret > 0) {
1538 need_unlock = true;
1539 ret = 0;
1540 }
1541
1471 copied = btrfs_copy_from_user(pos, num_pages, 1542 copied = btrfs_copy_from_user(pos, num_pages,
1472 write_bytes, pages, i); 1543 write_bytes, pages, i);
1473 1544
@@ -1512,19 +1583,21 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1512 } 1583 }
1513 1584
1514 release_bytes = dirty_pages << PAGE_CACHE_SHIFT; 1585 release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
1515 if (copied > 0) { 1586
1587 if (copied > 0)
1516 ret = btrfs_dirty_pages(root, inode, pages, 1588 ret = btrfs_dirty_pages(root, inode, pages,
1517 dirty_pages, pos, copied, 1589 dirty_pages, pos, copied,
1518 NULL); 1590 NULL);
1519 if (ret) { 1591 if (need_unlock)
1520 btrfs_drop_pages(pages, num_pages); 1592 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1521 break; 1593 lockstart, lockend, &cached_state,
1522 } 1594 GFP_NOFS);
1595 if (ret) {
1596 btrfs_drop_pages(pages, num_pages);
1597 break;
1523 } 1598 }
1524 1599
1525 release_bytes = 0; 1600 release_bytes = 0;
1526 btrfs_drop_pages(pages, num_pages);
1527
1528 if (only_release_metadata && copied > 0) { 1601 if (only_release_metadata && copied > 0) {
1529 u64 lockstart = round_down(pos, root->sectorsize); 1602 u64 lockstart = round_down(pos, root->sectorsize);
1530 u64 lockend = lockstart + 1603 u64 lockend = lockstart +
@@ -1536,6 +1609,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1536 only_release_metadata = false; 1609 only_release_metadata = false;
1537 } 1610 }
1538 1611
1612 btrfs_drop_pages(pages, num_pages);
1613
1539 cond_resched(); 1614 cond_resched();
1540 1615
1541 balance_dirty_pages_ratelimited(inode->i_mapping); 1616 balance_dirty_pages_ratelimited(inode->i_mapping);
@@ -1857,12 +1932,24 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1857 if (file->private_data) 1932 if (file->private_data)
1858 btrfs_ioctl_trans_end(file); 1933 btrfs_ioctl_trans_end(file);
1859 1934
1935 /*
1936 * We use start here because we will need to wait on the IO to complete
1937 * in btrfs_sync_log, which could require joining a transaction (for
1938 * example checking cross references in the nocow path). If we use join
1939 * here we could get into a situation where we're waiting on IO to
1940 * happen that is blocked on a transaction trying to commit. With start
1941 * we inc the extwriter counter, so we wait for all extwriters to exit
1942 * before we start blocking join'ers. This comment is to keep somebody
1943 * from thinking they are super smart and changing this to
1944 * btrfs_join_transaction *cough*Josef*cough*.
1945 */
1860 trans = btrfs_start_transaction(root, 0); 1946 trans = btrfs_start_transaction(root, 0);
1861 if (IS_ERR(trans)) { 1947 if (IS_ERR(trans)) {
1862 ret = PTR_ERR(trans); 1948 ret = PTR_ERR(trans);
1863 mutex_unlock(&inode->i_mutex); 1949 mutex_unlock(&inode->i_mutex);
1864 goto out; 1950 goto out;
1865 } 1951 }
1952 trans->sync = true;
1866 1953
1867 ret = btrfs_log_dentry_safe(trans, root, dentry); 1954 ret = btrfs_log_dentry_safe(trans, root, dentry);
1868 if (ret < 0) { 1955 if (ret < 0) {
@@ -1963,11 +2050,13 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
1963 struct btrfs_key key; 2050 struct btrfs_key key;
1964 int ret; 2051 int ret;
1965 2052
2053 if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
2054 goto out;
2055
1966 key.objectid = btrfs_ino(inode); 2056 key.objectid = btrfs_ino(inode);
1967 key.type = BTRFS_EXTENT_DATA_KEY; 2057 key.type = BTRFS_EXTENT_DATA_KEY;
1968 key.offset = offset; 2058 key.offset = offset;
1969 2059
1970
1971 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 2060 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1972 if (ret < 0) 2061 if (ret < 0)
1973 return ret; 2062 return ret;
@@ -2064,8 +2153,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2064 u64 drop_end; 2153 u64 drop_end;
2065 int ret = 0; 2154 int ret = 0;
2066 int err = 0; 2155 int err = 0;
2156 int rsv_count;
2067 bool same_page = ((offset >> PAGE_CACHE_SHIFT) == 2157 bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
2068 ((offset + len - 1) >> PAGE_CACHE_SHIFT)); 2158 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
2159 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2069 2160
2070 ret = btrfs_wait_ordered_range(inode, offset, len); 2161 ret = btrfs_wait_ordered_range(inode, offset, len);
2071 if (ret) 2162 if (ret)
@@ -2125,7 +2216,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2125 * we need to try again. 2216 * we need to try again.
2126 */ 2217 */
2127 if ((!ordered || 2218 if ((!ordered ||
2128 (ordered->file_offset + ordered->len < lockstart || 2219 (ordered->file_offset + ordered->len <= lockstart ||
2129 ordered->file_offset > lockend)) && 2220 ordered->file_offset > lockend)) &&
2130 !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, 2221 !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
2131 lockend, EXTENT_UPTODATE, 0, 2222 lockend, EXTENT_UPTODATE, 0,
@@ -2163,9 +2254,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2163 /* 2254 /*
2164 * 1 - update the inode 2255 * 1 - update the inode
2165 * 1 - removing the extents in the range 2256 * 1 - removing the extents in the range
2166 * 1 - adding the hole extent 2257 * 1 - adding the hole extent if no_holes isn't set
2167 */ 2258 */
2168 trans = btrfs_start_transaction(root, 3); 2259 rsv_count = no_holes ? 2 : 3;
2260 trans = btrfs_start_transaction(root, rsv_count);
2169 if (IS_ERR(trans)) { 2261 if (IS_ERR(trans)) {
2170 err = PTR_ERR(trans); 2262 err = PTR_ERR(trans);
2171 goto out_free; 2263 goto out_free;
@@ -2179,7 +2271,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2179 while (cur_offset < lockend) { 2271 while (cur_offset < lockend) {
2180 ret = __btrfs_drop_extents(trans, root, inode, path, 2272 ret = __btrfs_drop_extents(trans, root, inode, path,
2181 cur_offset, lockend + 1, 2273 cur_offset, lockend + 1,
2182 &drop_end, 1); 2274 &drop_end, 1, 0, 0, NULL);
2183 if (ret != -ENOSPC) 2275 if (ret != -ENOSPC)
2184 break; 2276 break;
2185 2277
@@ -2202,7 +2294,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2202 btrfs_end_transaction(trans, root); 2294 btrfs_end_transaction(trans, root);
2203 btrfs_btree_balance_dirty(root); 2295 btrfs_btree_balance_dirty(root);
2204 2296
2205 trans = btrfs_start_transaction(root, 3); 2297 trans = btrfs_start_transaction(root, rsv_count);
2206 if (IS_ERR(trans)) { 2298 if (IS_ERR(trans)) {
2207 ret = PTR_ERR(trans); 2299 ret = PTR_ERR(trans);
2208 trans = NULL; 2300 trans = NULL;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 057be95b1e1e..73f3de7a083c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -347,8 +347,8 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
347 btrfs_readpage(NULL, page); 347 btrfs_readpage(NULL, page);
348 lock_page(page); 348 lock_page(page);
349 if (!PageUptodate(page)) { 349 if (!PageUptodate(page)) {
350 printk(KERN_ERR "btrfs: error reading free " 350 btrfs_err(BTRFS_I(inode)->root->fs_info,
351 "space cache\n"); 351 "error reading free space cache");
352 io_ctl_drop_pages(io_ctl); 352 io_ctl_drop_pages(io_ctl);
353 return -EIO; 353 return -EIO;
354 } 354 }
@@ -405,7 +405,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
405 405
406 gen = io_ctl->cur; 406 gen = io_ctl->cur;
407 if (le64_to_cpu(*gen) != generation) { 407 if (le64_to_cpu(*gen) != generation) {
408 printk_ratelimited(KERN_ERR "btrfs: space cache generation " 408 printk_ratelimited(KERN_ERR "BTRFS: space cache generation "
409 "(%Lu) does not match inode (%Lu)\n", *gen, 409 "(%Lu) does not match inode (%Lu)\n", *gen,
410 generation); 410 generation);
411 io_ctl_unmap_page(io_ctl); 411 io_ctl_unmap_page(io_ctl);
@@ -463,7 +463,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
463 PAGE_CACHE_SIZE - offset); 463 PAGE_CACHE_SIZE - offset);
464 btrfs_csum_final(crc, (char *)&crc); 464 btrfs_csum_final(crc, (char *)&crc);
465 if (val != crc) { 465 if (val != crc) {
466 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " 466 printk_ratelimited(KERN_ERR "BTRFS: csum mismatch on free "
467 "space cache\n"); 467 "space cache\n");
468 io_ctl_unmap_page(io_ctl); 468 io_ctl_unmap_page(io_ctl);
469 return -EIO; 469 return -EIO;
@@ -1902,7 +1902,7 @@ out:
1902 spin_unlock(&ctl->tree_lock); 1902 spin_unlock(&ctl->tree_lock);
1903 1903
1904 if (ret) { 1904 if (ret) {
1905 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1905 printk(KERN_CRIT "BTRFS: unable to add free space :%d\n", ret);
1906 ASSERT(ret != -EEXIST); 1906 ASSERT(ret != -EEXIST);
1907 } 1907 }
1908 1908
@@ -2011,14 +2011,15 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
2011 info = rb_entry(n, struct btrfs_free_space, offset_index); 2011 info = rb_entry(n, struct btrfs_free_space, offset_index);
2012 if (info->bytes >= bytes && !block_group->ro) 2012 if (info->bytes >= bytes && !block_group->ro)
2013 count++; 2013 count++;
2014 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 2014 btrfs_crit(block_group->fs_info,
2015 info->offset, info->bytes, 2015 "entry offset %llu, bytes %llu, bitmap %s",
2016 info->offset, info->bytes,
2016 (info->bitmap) ? "yes" : "no"); 2017 (info->bitmap) ? "yes" : "no");
2017 } 2018 }
2018 printk(KERN_INFO "block group has cluster?: %s\n", 2019 btrfs_info(block_group->fs_info, "block group has cluster?: %s",
2019 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2020 list_empty(&block_group->cluster_list) ? "no" : "yes");
2020 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 2021 btrfs_info(block_group->fs_info,
2021 "\n", count); 2022 "%d blocks of free space at or bigger than bytes is", count);
2022} 2023}
2023 2024
2024void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2025void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
@@ -2421,7 +2422,6 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2421 struct btrfs_free_space *entry = NULL; 2422 struct btrfs_free_space *entry = NULL;
2422 struct btrfs_free_space *last; 2423 struct btrfs_free_space *last;
2423 struct rb_node *node; 2424 struct rb_node *node;
2424 u64 window_start;
2425 u64 window_free; 2425 u64 window_free;
2426 u64 max_extent; 2426 u64 max_extent;
2427 u64 total_size = 0; 2427 u64 total_size = 0;
@@ -2443,7 +2443,6 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2443 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2443 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2444 } 2444 }
2445 2445
2446 window_start = entry->offset;
2447 window_free = entry->bytes; 2446 window_free = entry->bytes;
2448 max_extent = entry->bytes; 2447 max_extent = entry->bytes;
2449 first = entry; 2448 first = entry;
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
new file mode 100644
index 000000000000..85889aa82c62
--- /dev/null
+++ b/fs/btrfs/hash.c
@@ -0,0 +1,50 @@
1/*
2 * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#include <crypto/hash.h>
15#include <linux/err.h>
16#include "hash.h"
17
18static struct crypto_shash *tfm;
19
20int __init btrfs_hash_init(void)
21{
22 tfm = crypto_alloc_shash("crc32c", 0, 0);
23 if (IS_ERR(tfm))
24 return PTR_ERR(tfm);
25
26 return 0;
27}
28
29void btrfs_hash_exit(void)
30{
31 crypto_free_shash(tfm);
32}
33
34u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
35{
36 struct {
37 struct shash_desc shash;
38 char ctx[crypto_shash_descsize(tfm)];
39 } desc;
40 int err;
41
42 desc.shash.tfm = tfm;
43 desc.shash.flags = 0;
44 *(u32 *)desc.ctx = crc;
45
46 err = crypto_shash_update(&desc.shash, address, length);
47 BUG_ON(err);
48
49 return *(u32 *)desc.ctx;
50}
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index 1d982812ab67..118a2316e5d3 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -19,10 +19,15 @@
19#ifndef __HASH__ 19#ifndef __HASH__
20#define __HASH__ 20#define __HASH__
21 21
22#include <linux/crc32c.h> 22int __init btrfs_hash_init(void);
23
24void btrfs_hash_exit(void);
25
26u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
27
23static inline u64 btrfs_name_hash(const char *name, int len) 28static inline u64 btrfs_name_hash(const char *name, int len)
24{ 29{
25 return crc32c((u32)~1, name, len); 30 return btrfs_crc32c((u32)~1, name, len);
26} 31}
27 32
28/* 33/*
@@ -31,7 +36,7 @@ static inline u64 btrfs_name_hash(const char *name, int len)
31static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name, 36static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
32 int len) 37 int len)
33{ 38{
34 return (u64) crc32c(parent_objectid, name, len); 39 return (u64) btrfs_crc32c(parent_objectid, name, len);
35} 40}
36 41
37#endif 42#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index ec82fae07097..2be38df703c9 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -91,32 +91,6 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
91 return 0; 91 return 0;
92} 92}
93 93
94static struct btrfs_inode_ref *
95btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
96 struct btrfs_root *root,
97 struct btrfs_path *path,
98 const char *name, int name_len,
99 u64 inode_objectid, u64 ref_objectid, int ins_len,
100 int cow)
101{
102 int ret;
103 struct btrfs_key key;
104 struct btrfs_inode_ref *ref;
105
106 key.objectid = inode_objectid;
107 key.type = BTRFS_INODE_REF_KEY;
108 key.offset = ref_objectid;
109
110 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
111 if (ret < 0)
112 return ERR_PTR(ret);
113 if (ret > 0)
114 return NULL;
115 if (!find_name_in_backref(path, name, name_len, &ref))
116 return NULL;
117 return ref;
118}
119
120/* Returns NULL if no extref found */ 94/* Returns NULL if no extref found */
121struct btrfs_inode_extref * 95struct btrfs_inode_extref *
122btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, 96btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
@@ -144,45 +118,6 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
144 return extref; 118 return extref;
145} 119}
146 120
147int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
148 struct btrfs_root *root,
149 struct btrfs_path *path,
150 const char *name, int name_len,
151 u64 inode_objectid, u64 ref_objectid, int mod,
152 u64 *ret_index)
153{
154 struct btrfs_inode_ref *ref;
155 struct btrfs_inode_extref *extref;
156 int ins_len = mod < 0 ? -1 : 0;
157 int cow = mod != 0;
158
159 ref = btrfs_lookup_inode_ref(trans, root, path, name, name_len,
160 inode_objectid, ref_objectid, ins_len,
161 cow);
162 if (IS_ERR(ref))
163 return PTR_ERR(ref);
164
165 if (ref != NULL) {
166 *ret_index = btrfs_inode_ref_index(path->nodes[0], ref);
167 return 0;
168 }
169
170 btrfs_release_path(path);
171
172 extref = btrfs_lookup_inode_extref(trans, root, path, name,
173 name_len, inode_objectid,
174 ref_objectid, ins_len, cow);
175 if (IS_ERR(extref))
176 return PTR_ERR(extref);
177
178 if (extref) {
179 *ret_index = btrfs_inode_extref_index(path->nodes[0], extref);
180 return 0;
181 }
182
183 return -ENOENT;
184}
185
186static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, 121static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
187 struct btrfs_root *root, 122 struct btrfs_root *root,
188 const char *name, int name_len, 123 const char *name, int name_len,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d546d8c3038b..5c4ab9c18940 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -58,9 +58,10 @@
58#include "inode-map.h" 58#include "inode-map.h"
59#include "backref.h" 59#include "backref.h"
60#include "hash.h" 60#include "hash.h"
61#include "props.h"
61 62
62struct btrfs_iget_args { 63struct btrfs_iget_args {
63 u64 ino; 64 struct btrfs_key *location;
64 struct btrfs_root *root; 65 struct btrfs_root *root;
65}; 66};
66 67
@@ -125,13 +126,12 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
125 * no overlapping inline items exist in the btree 126 * no overlapping inline items exist in the btree
126 */ 127 */
127static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, 128static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
129 struct btrfs_path *path, int extent_inserted,
128 struct btrfs_root *root, struct inode *inode, 130 struct btrfs_root *root, struct inode *inode,
129 u64 start, size_t size, size_t compressed_size, 131 u64 start, size_t size, size_t compressed_size,
130 int compress_type, 132 int compress_type,
131 struct page **compressed_pages) 133 struct page **compressed_pages)
132{ 134{
133 struct btrfs_key key;
134 struct btrfs_path *path;
135 struct extent_buffer *leaf; 135 struct extent_buffer *leaf;
136 struct page *page = NULL; 136 struct page *page = NULL;
137 char *kaddr; 137 char *kaddr;
@@ -140,29 +140,29 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
140 int err = 0; 140 int err = 0;
141 int ret; 141 int ret;
142 size_t cur_size = size; 142 size_t cur_size = size;
143 size_t datasize;
144 unsigned long offset; 143 unsigned long offset;
145 144
146 if (compressed_size && compressed_pages) 145 if (compressed_size && compressed_pages)
147 cur_size = compressed_size; 146 cur_size = compressed_size;
148 147
149 path = btrfs_alloc_path(); 148 inode_add_bytes(inode, size);
150 if (!path)
151 return -ENOMEM;
152 149
153 path->leave_spinning = 1; 150 if (!extent_inserted) {
151 struct btrfs_key key;
152 size_t datasize;
154 153
155 key.objectid = btrfs_ino(inode); 154 key.objectid = btrfs_ino(inode);
156 key.offset = start; 155 key.offset = start;
157 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 156 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
158 datasize = btrfs_file_extent_calc_inline_size(cur_size);
159 157
160 inode_add_bytes(inode, size); 158 datasize = btrfs_file_extent_calc_inline_size(cur_size);
161 ret = btrfs_insert_empty_item(trans, root, path, &key, 159 path->leave_spinning = 1;
162 datasize); 160 ret = btrfs_insert_empty_item(trans, root, path, &key,
163 if (ret) { 161 datasize);
164 err = ret; 162 if (ret) {
165 goto fail; 163 err = ret;
164 goto fail;
165 }
166 } 166 }
167 leaf = path->nodes[0]; 167 leaf = path->nodes[0];
168 ei = btrfs_item_ptr(leaf, path->slots[0], 168 ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -203,7 +203,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
203 page_cache_release(page); 203 page_cache_release(page);
204 } 204 }
205 btrfs_mark_buffer_dirty(leaf); 205 btrfs_mark_buffer_dirty(leaf);
206 btrfs_free_path(path); 206 btrfs_release_path(path);
207 207
208 /* 208 /*
209 * we're an inline extent, so nobody can 209 * we're an inline extent, so nobody can
@@ -219,7 +219,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
219 219
220 return ret; 220 return ret;
221fail: 221fail:
222 btrfs_free_path(path);
223 return err; 222 return err;
224} 223}
225 224
@@ -242,6 +241,9 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
242 u64 aligned_end = ALIGN(end, root->sectorsize); 241 u64 aligned_end = ALIGN(end, root->sectorsize);
243 u64 data_len = inline_len; 242 u64 data_len = inline_len;
244 int ret; 243 int ret;
244 struct btrfs_path *path;
245 int extent_inserted = 0;
246 u32 extent_item_size;
245 247
246 if (compressed_size) 248 if (compressed_size)
247 data_len = compressed_size; 249 data_len = compressed_size;
@@ -256,12 +258,27 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
256 return 1; 258 return 1;
257 } 259 }
258 260
261 path = btrfs_alloc_path();
262 if (!path)
263 return -ENOMEM;
264
259 trans = btrfs_join_transaction(root); 265 trans = btrfs_join_transaction(root);
260 if (IS_ERR(trans)) 266 if (IS_ERR(trans)) {
267 btrfs_free_path(path);
261 return PTR_ERR(trans); 268 return PTR_ERR(trans);
269 }
262 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 270 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
263 271
264 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); 272 if (compressed_size && compressed_pages)
273 extent_item_size = btrfs_file_extent_calc_inline_size(
274 compressed_size);
275 else
276 extent_item_size = btrfs_file_extent_calc_inline_size(
277 inline_len);
278
279 ret = __btrfs_drop_extents(trans, root, inode, path,
280 start, aligned_end, NULL,
281 1, 1, extent_item_size, &extent_inserted);
265 if (ret) { 282 if (ret) {
266 btrfs_abort_transaction(trans, root, ret); 283 btrfs_abort_transaction(trans, root, ret);
267 goto out; 284 goto out;
@@ -269,7 +286,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
269 286
270 if (isize > actual_end) 287 if (isize > actual_end)
271 inline_len = min_t(u64, isize, actual_end); 288 inline_len = min_t(u64, isize, actual_end);
272 ret = insert_inline_extent(trans, root, inode, start, 289 ret = insert_inline_extent(trans, path, extent_inserted,
290 root, inode, start,
273 inline_len, compressed_size, 291 inline_len, compressed_size,
274 compress_type, compressed_pages); 292 compress_type, compressed_pages);
275 if (ret && ret != -ENOSPC) { 293 if (ret && ret != -ENOSPC) {
@@ -284,6 +302,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
284 btrfs_delalloc_release_metadata(inode, end + 1 - start); 302 btrfs_delalloc_release_metadata(inode, end + 1 - start);
285 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 303 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
286out: 304out:
305 btrfs_free_path(path);
287 btrfs_end_transaction(trans, root); 306 btrfs_end_transaction(trans, root);
288 return ret; 307 return ret;
289} 308}
@@ -1262,7 +1281,8 @@ next_slot:
1262 nocow = 1; 1281 nocow = 1;
1263 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1282 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1264 extent_end = found_key.offset + 1283 extent_end = found_key.offset +
1265 btrfs_file_extent_inline_len(leaf, fi); 1284 btrfs_file_extent_inline_len(leaf,
1285 path->slots[0], fi);
1266 extent_end = ALIGN(extent_end, root->sectorsize); 1286 extent_end = ALIGN(extent_end, root->sectorsize);
1267 } else { 1287 } else {
1268 BUG_ON(1); 1288 BUG_ON(1);
@@ -1841,14 +1861,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1841 struct btrfs_path *path; 1861 struct btrfs_path *path;
1842 struct extent_buffer *leaf; 1862 struct extent_buffer *leaf;
1843 struct btrfs_key ins; 1863 struct btrfs_key ins;
1864 int extent_inserted = 0;
1844 int ret; 1865 int ret;
1845 1866
1846 path = btrfs_alloc_path(); 1867 path = btrfs_alloc_path();
1847 if (!path) 1868 if (!path)
1848 return -ENOMEM; 1869 return -ENOMEM;
1849 1870
1850 path->leave_spinning = 1;
1851
1852 /* 1871 /*
1853 * we may be replacing one extent in the tree with another. 1872 * we may be replacing one extent in the tree with another.
1854 * The new extent is pinned in the extent map, and we don't want 1873 * The new extent is pinned in the extent map, and we don't want
@@ -1858,17 +1877,23 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1858 * the caller is expected to unpin it and allow it to be merged 1877 * the caller is expected to unpin it and allow it to be merged
1859 * with the others. 1878 * with the others.
1860 */ 1879 */
1861 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1880 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
1862 file_pos + num_bytes, 0); 1881 file_pos + num_bytes, NULL, 0,
1882 1, sizeof(*fi), &extent_inserted);
1863 if (ret) 1883 if (ret)
1864 goto out; 1884 goto out;
1865 1885
1866 ins.objectid = btrfs_ino(inode); 1886 if (!extent_inserted) {
1867 ins.offset = file_pos; 1887 ins.objectid = btrfs_ino(inode);
1868 ins.type = BTRFS_EXTENT_DATA_KEY; 1888 ins.offset = file_pos;
1869 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); 1889 ins.type = BTRFS_EXTENT_DATA_KEY;
1870 if (ret) 1890
1871 goto out; 1891 path->leave_spinning = 1;
1892 ret = btrfs_insert_empty_item(trans, root, path, &ins,
1893 sizeof(*fi));
1894 if (ret)
1895 goto out;
1896 }
1872 leaf = path->nodes[0]; 1897 leaf = path->nodes[0];
1873 fi = btrfs_item_ptr(leaf, path->slots[0], 1898 fi = btrfs_item_ptr(leaf, path->slots[0],
1874 struct btrfs_file_extent_item); 1899 struct btrfs_file_extent_item);
@@ -2290,7 +2315,7 @@ again:
2290 u64 extent_len; 2315 u64 extent_len;
2291 struct btrfs_key found_key; 2316 struct btrfs_key found_key;
2292 2317
2293 ret = btrfs_search_slot(trans, root, &key, path, 1, 1); 2318 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2294 if (ret < 0) 2319 if (ret < 0)
2295 goto out_free_path; 2320 goto out_free_path;
2296 2321
@@ -2543,12 +2568,6 @@ out_kfree:
2543 return NULL; 2568 return NULL;
2544} 2569}
2545 2570
2546/*
2547 * helper function for btrfs_finish_ordered_io, this
2548 * just reads in some of the csum leaves to prime them into ram
2549 * before we start the transaction. It limits the amount of btree
2550 * reads required while inside the transaction.
2551 */
2552/* as ordered data IO finishes, this gets called so we can finish 2571/* as ordered data IO finishes, this gets called so we can finish
2553 * an ordered extent if the range of bytes in the file it covers are 2572 * an ordered extent if the range of bytes in the file it covers are
2554 * fully written. 2573 * fully written.
@@ -3248,7 +3267,8 @@ out:
3248 * slot is the slot the inode is in, objectid is the objectid of the inode 3267 * slot is the slot the inode is in, objectid is the objectid of the inode
3249 */ 3268 */
3250static noinline int acls_after_inode_item(struct extent_buffer *leaf, 3269static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3251 int slot, u64 objectid) 3270 int slot, u64 objectid,
3271 int *first_xattr_slot)
3252{ 3272{
3253 u32 nritems = btrfs_header_nritems(leaf); 3273 u32 nritems = btrfs_header_nritems(leaf);
3254 struct btrfs_key found_key; 3274 struct btrfs_key found_key;
@@ -3264,6 +3284,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3264 } 3284 }
3265 3285
3266 slot++; 3286 slot++;
3287 *first_xattr_slot = -1;
3267 while (slot < nritems) { 3288 while (slot < nritems) {
3268 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3289 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3269 3290
@@ -3273,6 +3294,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3273 3294
3274 /* we found an xattr, assume we've got an acl */ 3295 /* we found an xattr, assume we've got an acl */
3275 if (found_key.type == BTRFS_XATTR_ITEM_KEY) { 3296 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3297 if (*first_xattr_slot == -1)
3298 *first_xattr_slot = slot;
3276 if (found_key.offset == xattr_access || 3299 if (found_key.offset == xattr_access ||
3277 found_key.offset == xattr_default) 3300 found_key.offset == xattr_default)
3278 return 1; 3301 return 1;
@@ -3301,6 +3324,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3301 * something larger than an xattr. We have to assume the inode 3324 * something larger than an xattr. We have to assume the inode
3302 * has acls 3325 * has acls
3303 */ 3326 */
3327 if (*first_xattr_slot == -1)
3328 *first_xattr_slot = slot;
3304 return 1; 3329 return 1;
3305} 3330}
3306 3331
@@ -3315,10 +3340,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
3315 struct btrfs_timespec *tspec; 3340 struct btrfs_timespec *tspec;
3316 struct btrfs_root *root = BTRFS_I(inode)->root; 3341 struct btrfs_root *root = BTRFS_I(inode)->root;
3317 struct btrfs_key location; 3342 struct btrfs_key location;
3343 unsigned long ptr;
3318 int maybe_acls; 3344 int maybe_acls;
3319 u32 rdev; 3345 u32 rdev;
3320 int ret; 3346 int ret;
3321 bool filled = false; 3347 bool filled = false;
3348 int first_xattr_slot;
3322 3349
3323 ret = btrfs_fill_inode(inode, &rdev); 3350 ret = btrfs_fill_inode(inode, &rdev);
3324 if (!ret) 3351 if (!ret)
@@ -3328,7 +3355,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
3328 if (!path) 3355 if (!path)
3329 goto make_bad; 3356 goto make_bad;
3330 3357
3331 path->leave_spinning = 1;
3332 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 3358 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3333 3359
3334 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 3360 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -3338,7 +3364,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
3338 leaf = path->nodes[0]; 3364 leaf = path->nodes[0];
3339 3365
3340 if (filled) 3366 if (filled)
3341 goto cache_acl; 3367 goto cache_index;
3342 3368
3343 inode_item = btrfs_item_ptr(leaf, path->slots[0], 3369 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3344 struct btrfs_inode_item); 3370 struct btrfs_inode_item);
@@ -3381,18 +3407,51 @@ static void btrfs_read_locked_inode(struct inode *inode)
3381 3407
3382 BTRFS_I(inode)->index_cnt = (u64)-1; 3408 BTRFS_I(inode)->index_cnt = (u64)-1;
3383 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 3409 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3410
3411cache_index:
3412 path->slots[0]++;
3413 if (inode->i_nlink != 1 ||
3414 path->slots[0] >= btrfs_header_nritems(leaf))
3415 goto cache_acl;
3416
3417 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3418 if (location.objectid != btrfs_ino(inode))
3419 goto cache_acl;
3420
3421 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3422 if (location.type == BTRFS_INODE_REF_KEY) {
3423 struct btrfs_inode_ref *ref;
3424
3425 ref = (struct btrfs_inode_ref *)ptr;
3426 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3427 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3428 struct btrfs_inode_extref *extref;
3429
3430 extref = (struct btrfs_inode_extref *)ptr;
3431 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3432 extref);
3433 }
3384cache_acl: 3434cache_acl:
3385 /* 3435 /*
3386 * try to precache a NULL acl entry for files that don't have 3436 * try to precache a NULL acl entry for files that don't have
3387 * any xattrs or acls 3437 * any xattrs or acls
3388 */ 3438 */
3389 maybe_acls = acls_after_inode_item(leaf, path->slots[0], 3439 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3390 btrfs_ino(inode)); 3440 btrfs_ino(inode), &first_xattr_slot);
3441 if (first_xattr_slot != -1) {
3442 path->slots[0] = first_xattr_slot;
3443 ret = btrfs_load_inode_props(inode, path);
3444 if (ret)
3445 btrfs_err(root->fs_info,
3446 "error loading props for ino %llu (root %llu): %d\n",
3447 btrfs_ino(inode),
3448 root->root_key.objectid, ret);
3449 }
3450 btrfs_free_path(path);
3451
3391 if (!maybe_acls) 3452 if (!maybe_acls)
3392 cache_no_acl(inode); 3453 cache_no_acl(inode);
3393 3454
3394 btrfs_free_path(path);
3395
3396 switch (inode->i_mode & S_IFMT) { 3455 switch (inode->i_mode & S_IFMT) {
3397 case S_IFREG: 3456 case S_IFREG:
3398 inode->i_mapping->a_ops = &btrfs_aops; 3457 inode->i_mapping->a_ops = &btrfs_aops;
@@ -3496,7 +3555,6 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3496 goto failed; 3555 goto failed;
3497 } 3556 }
3498 3557
3499 btrfs_unlock_up_safe(path, 1);
3500 leaf = path->nodes[0]; 3558 leaf = path->nodes[0];
3501 inode_item = btrfs_item_ptr(leaf, path->slots[0], 3559 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3502 struct btrfs_inode_item); 3560 struct btrfs_inode_item);
@@ -3593,6 +3651,24 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3593 goto err; 3651 goto err;
3594 btrfs_release_path(path); 3652 btrfs_release_path(path);
3595 3653
3654 /*
3655 * If we don't have dir index, we have to get it by looking up
3656 * the inode ref, since we get the inode ref, remove it directly,
3657 * it is unnecessary to do delayed deletion.
3658 *
3659 * But if we have dir index, needn't search inode ref to get it.
3660 * Since the inode ref is close to the inode item, it is better
3661 * that we delay to delete it, and just do this deletion when
3662 * we update the inode item.
3663 */
3664 if (BTRFS_I(inode)->dir_index) {
3665 ret = btrfs_delayed_delete_inode_ref(inode);
3666 if (!ret) {
3667 index = BTRFS_I(inode)->dir_index;
3668 goto skip_backref;
3669 }
3670 }
3671
3596 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, 3672 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
3597 dir_ino, &index); 3673 dir_ino, &index);
3598 if (ret) { 3674 if (ret) {
@@ -3602,7 +3678,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3602 btrfs_abort_transaction(trans, root, ret); 3678 btrfs_abort_transaction(trans, root, ret);
3603 goto err; 3679 goto err;
3604 } 3680 }
3605 3681skip_backref:
3606 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); 3682 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
3607 if (ret) { 3683 if (ret) {
3608 btrfs_abort_transaction(trans, root, ret); 3684 btrfs_abort_transaction(trans, root, ret);
@@ -3948,7 +4024,7 @@ search_again:
3948 btrfs_file_extent_num_bytes(leaf, fi); 4024 btrfs_file_extent_num_bytes(leaf, fi);
3949 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 4025 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
3950 item_end += btrfs_file_extent_inline_len(leaf, 4026 item_end += btrfs_file_extent_inline_len(leaf,
3951 fi); 4027 path->slots[0], fi);
3952 } 4028 }
3953 item_end--; 4029 item_end--;
3954 } 4030 }
@@ -4018,6 +4094,12 @@ search_again:
4018 inode_sub_bytes(inode, item_end + 1 - 4094 inode_sub_bytes(inode, item_end + 1 -
4019 new_size); 4095 new_size);
4020 } 4096 }
4097
4098 /*
4099 * update the ram bytes to properly reflect
4100 * the new size of our item
4101 */
4102 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4021 size = 4103 size =
4022 btrfs_file_extent_calc_inline_size(size); 4104 btrfs_file_extent_calc_inline_size(size);
4023 btrfs_truncate_item(root, path, size, 1); 4105 btrfs_truncate_item(root, path, size, 1);
@@ -4203,6 +4285,49 @@ out:
4203 return ret; 4285 return ret;
4204} 4286}
4205 4287
4288static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
4289 u64 offset, u64 len)
4290{
4291 struct btrfs_trans_handle *trans;
4292 int ret;
4293
4294 /*
4295 * Still need to make sure the inode looks like it's been updated so
4296 * that any holes get logged if we fsync.
4297 */
4298 if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
4299 BTRFS_I(inode)->last_trans = root->fs_info->generation;
4300 BTRFS_I(inode)->last_sub_trans = root->log_transid;
4301 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
4302 return 0;
4303 }
4304
4305 /*
4306 * 1 - for the one we're dropping
4307 * 1 - for the one we're adding
4308 * 1 - for updating the inode.
4309 */
4310 trans = btrfs_start_transaction(root, 3);
4311 if (IS_ERR(trans))
4312 return PTR_ERR(trans);
4313
4314 ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
4315 if (ret) {
4316 btrfs_abort_transaction(trans, root, ret);
4317 btrfs_end_transaction(trans, root);
4318 return ret;
4319 }
4320
4321 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
4322 0, 0, len, 0, len, 0, 0, 0);
4323 if (ret)
4324 btrfs_abort_transaction(trans, root, ret);
4325 else
4326 btrfs_update_inode(trans, root, inode);
4327 btrfs_end_transaction(trans, root);
4328 return ret;
4329}
4330
4206/* 4331/*
4207 * This function puts in dummy file extents for the area we're creating a hole 4332 * This function puts in dummy file extents for the area we're creating a hole
4208 * for. So if we are truncating this file to a larger size we need to insert 4333 * for. So if we are truncating this file to a larger size we need to insert
@@ -4211,7 +4336,6 @@ out:
4211 */ 4336 */
4212int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) 4337int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4213{ 4338{
4214 struct btrfs_trans_handle *trans;
4215 struct btrfs_root *root = BTRFS_I(inode)->root; 4339 struct btrfs_root *root = BTRFS_I(inode)->root;
4216 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 4340 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4217 struct extent_map *em = NULL; 4341 struct extent_map *em = NULL;
@@ -4266,31 +4390,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4266 struct extent_map *hole_em; 4390 struct extent_map *hole_em;
4267 hole_size = last_byte - cur_offset; 4391 hole_size = last_byte - cur_offset;
4268 4392
4269 trans = btrfs_start_transaction(root, 3); 4393 err = maybe_insert_hole(root, inode, cur_offset,
4270 if (IS_ERR(trans)) { 4394 hole_size);
4271 err = PTR_ERR(trans); 4395 if (err)
4272 break;
4273 }
4274
4275 err = btrfs_drop_extents(trans, root, inode,
4276 cur_offset,
4277 cur_offset + hole_size, 1);
4278 if (err) {
4279 btrfs_abort_transaction(trans, root, err);
4280 btrfs_end_transaction(trans, root);
4281 break;
4282 }
4283
4284 err = btrfs_insert_file_extent(trans, root,
4285 btrfs_ino(inode), cur_offset, 0,
4286 0, hole_size, 0, hole_size,
4287 0, 0, 0);
4288 if (err) {
4289 btrfs_abort_transaction(trans, root, err);
4290 btrfs_end_transaction(trans, root);
4291 break; 4396 break;
4292 }
4293
4294 btrfs_drop_extent_cache(inode, cur_offset, 4397 btrfs_drop_extent_cache(inode, cur_offset,
4295 cur_offset + hole_size - 1, 0); 4398 cur_offset + hole_size - 1, 0);
4296 hole_em = alloc_extent_map(); 4399 hole_em = alloc_extent_map();
@@ -4309,7 +4412,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4309 hole_em->ram_bytes = hole_size; 4412 hole_em->ram_bytes = hole_size;
4310 hole_em->bdev = root->fs_info->fs_devices->latest_bdev; 4413 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
4311 hole_em->compress_type = BTRFS_COMPRESS_NONE; 4414 hole_em->compress_type = BTRFS_COMPRESS_NONE;
4312 hole_em->generation = trans->transid; 4415 hole_em->generation = root->fs_info->generation;
4313 4416
4314 while (1) { 4417 while (1) {
4315 write_lock(&em_tree->lock); 4418 write_lock(&em_tree->lock);
@@ -4322,17 +4425,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4322 hole_size - 1, 0); 4425 hole_size - 1, 0);
4323 } 4426 }
4324 free_extent_map(hole_em); 4427 free_extent_map(hole_em);
4325next:
4326 btrfs_update_inode(trans, root, inode);
4327 btrfs_end_transaction(trans, root);
4328 } 4428 }
4429next:
4329 free_extent_map(em); 4430 free_extent_map(em);
4330 em = NULL; 4431 em = NULL;
4331 cur_offset = last_byte; 4432 cur_offset = last_byte;
4332 if (cur_offset >= block_end) 4433 if (cur_offset >= block_end)
4333 break; 4434 break;
4334 } 4435 }
4335
4336 free_extent_map(em); 4436 free_extent_map(em);
4337 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, 4437 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
4338 GFP_NOFS); 4438 GFP_NOFS);
@@ -4474,6 +4574,64 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
4474 return err; 4574 return err;
4475} 4575}
4476 4576
4577/*
4578 * While truncating the inode pages during eviction, we get the VFS calling
4579 * btrfs_invalidatepage() against each page of the inode. This is slow because
4580 * the calls to btrfs_invalidatepage() result in a huge amount of calls to
4581 * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
4582 * extent_state structures over and over, wasting lots of time.
4583 *
4584 * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
4585 * those expensive operations on a per page basis and do only the ordered io
4586 * finishing, while we release here the extent_map and extent_state structures,
4587 * without the excessive merging and splitting.
4588 */
4589static void evict_inode_truncate_pages(struct inode *inode)
4590{
4591 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4592 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
4593 struct rb_node *node;
4594
4595 ASSERT(inode->i_state & I_FREEING);
4596 truncate_inode_pages(&inode->i_data, 0);
4597
4598 write_lock(&map_tree->lock);
4599 while (!RB_EMPTY_ROOT(&map_tree->map)) {
4600 struct extent_map *em;
4601
4602 node = rb_first(&map_tree->map);
4603 em = rb_entry(node, struct extent_map, rb_node);
4604 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
4605 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
4606 remove_extent_mapping(map_tree, em);
4607 free_extent_map(em);
4608 }
4609 write_unlock(&map_tree->lock);
4610
4611 spin_lock(&io_tree->lock);
4612 while (!RB_EMPTY_ROOT(&io_tree->state)) {
4613 struct extent_state *state;
4614 struct extent_state *cached_state = NULL;
4615
4616 node = rb_first(&io_tree->state);
4617 state = rb_entry(node, struct extent_state, rb_node);
4618 atomic_inc(&state->refs);
4619 spin_unlock(&io_tree->lock);
4620
4621 lock_extent_bits(io_tree, state->start, state->end,
4622 0, &cached_state);
4623 clear_extent_bit(io_tree, state->start, state->end,
4624 EXTENT_LOCKED | EXTENT_DIRTY |
4625 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
4626 EXTENT_DEFRAG, 1, 1,
4627 &cached_state, GFP_NOFS);
4628 free_extent_state(state);
4629
4630 spin_lock(&io_tree->lock);
4631 }
4632 spin_unlock(&io_tree->lock);
4633}
4634
4477void btrfs_evict_inode(struct inode *inode) 4635void btrfs_evict_inode(struct inode *inode)
4478{ 4636{
4479 struct btrfs_trans_handle *trans; 4637 struct btrfs_trans_handle *trans;
@@ -4484,7 +4642,8 @@ void btrfs_evict_inode(struct inode *inode)
4484 4642
4485 trace_btrfs_inode_evict(inode); 4643 trace_btrfs_inode_evict(inode);
4486 4644
4487 truncate_inode_pages(&inode->i_data, 0); 4645 evict_inode_truncate_pages(inode);
4646
4488 if (inode->i_nlink && 4647 if (inode->i_nlink &&
4489 ((btrfs_root_refs(&root->root_item) != 0 && 4648 ((btrfs_root_refs(&root->root_item) != 0 &&
4490 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) || 4649 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
@@ -4659,9 +4818,9 @@ static int fixup_tree_root_location(struct btrfs_root *root,
4659 } 4818 }
4660 4819
4661 err = -ENOENT; 4820 err = -ENOENT;
4662 ret = btrfs_find_root_ref(root->fs_info->tree_root, path, 4821 ret = btrfs_find_item(root->fs_info->tree_root, path,
4663 BTRFS_I(dir)->root->root_key.objectid, 4822 BTRFS_I(dir)->root->root_key.objectid,
4664 location->objectid); 4823 location->objectid, BTRFS_ROOT_REF_KEY, NULL);
4665 if (ret) { 4824 if (ret) {
4666 if (ret < 0) 4825 if (ret < 0)
4667 err = ret; 4826 err = ret;
@@ -4822,7 +4981,9 @@ again:
4822static int btrfs_init_locked_inode(struct inode *inode, void *p) 4981static int btrfs_init_locked_inode(struct inode *inode, void *p)
4823{ 4982{
4824 struct btrfs_iget_args *args = p; 4983 struct btrfs_iget_args *args = p;
4825 inode->i_ino = args->ino; 4984 inode->i_ino = args->location->objectid;
4985 memcpy(&BTRFS_I(inode)->location, args->location,
4986 sizeof(*args->location));
4826 BTRFS_I(inode)->root = args->root; 4987 BTRFS_I(inode)->root = args->root;
4827 return 0; 4988 return 0;
4828} 4989}
@@ -4830,19 +4991,19 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
4830static int btrfs_find_actor(struct inode *inode, void *opaque) 4991static int btrfs_find_actor(struct inode *inode, void *opaque)
4831{ 4992{
4832 struct btrfs_iget_args *args = opaque; 4993 struct btrfs_iget_args *args = opaque;
4833 return args->ino == btrfs_ino(inode) && 4994 return args->location->objectid == BTRFS_I(inode)->location.objectid &&
4834 args->root == BTRFS_I(inode)->root; 4995 args->root == BTRFS_I(inode)->root;
4835} 4996}
4836 4997
4837static struct inode *btrfs_iget_locked(struct super_block *s, 4998static struct inode *btrfs_iget_locked(struct super_block *s,
4838 u64 objectid, 4999 struct btrfs_key *location,
4839 struct btrfs_root *root) 5000 struct btrfs_root *root)
4840{ 5001{
4841 struct inode *inode; 5002 struct inode *inode;
4842 struct btrfs_iget_args args; 5003 struct btrfs_iget_args args;
4843 unsigned long hashval = btrfs_inode_hash(objectid, root); 5004 unsigned long hashval = btrfs_inode_hash(location->objectid, root);
4844 5005
4845 args.ino = objectid; 5006 args.location = location;
4846 args.root = root; 5007 args.root = root;
4847 5008
4848 inode = iget5_locked(s, hashval, btrfs_find_actor, 5009 inode = iget5_locked(s, hashval, btrfs_find_actor,
@@ -4859,13 +5020,11 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
4859{ 5020{
4860 struct inode *inode; 5021 struct inode *inode;
4861 5022
4862 inode = btrfs_iget_locked(s, location->objectid, root); 5023 inode = btrfs_iget_locked(s, location, root);
4863 if (!inode) 5024 if (!inode)
4864 return ERR_PTR(-ENOMEM); 5025 return ERR_PTR(-ENOMEM);
4865 5026
4866 if (inode->i_state & I_NEW) { 5027 if (inode->i_state & I_NEW) {
4867 BTRFS_I(inode)->root = root;
4868 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
4869 btrfs_read_locked_inode(inode); 5028 btrfs_read_locked_inode(inode);
4870 if (!is_bad_inode(inode)) { 5029 if (!is_bad_inode(inode)) {
4871 inode_tree_add(inode); 5030 inode_tree_add(inode);
@@ -4921,7 +5080,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4921 return ERR_PTR(ret); 5080 return ERR_PTR(ret);
4922 5081
4923 if (location.objectid == 0) 5082 if (location.objectid == 0)
4924 return NULL; 5083 return ERR_PTR(-ENOENT);
4925 5084
4926 if (location.type == BTRFS_INODE_ITEM_KEY) { 5085 if (location.type == BTRFS_INODE_ITEM_KEY) {
4927 inode = btrfs_iget(dir->i_sb, &location, root, NULL); 5086 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
@@ -4985,10 +5144,17 @@ static void btrfs_dentry_release(struct dentry *dentry)
4985static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, 5144static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4986 unsigned int flags) 5145 unsigned int flags)
4987{ 5146{
4988 struct dentry *ret; 5147 struct inode *inode;
4989 5148
4990 ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); 5149 inode = btrfs_lookup_dentry(dir, dentry);
4991 return ret; 5150 if (IS_ERR(inode)) {
5151 if (PTR_ERR(inode) == -ENOENT)
5152 inode = NULL;
5153 else
5154 return ERR_CAST(inode);
5155 }
5156
5157 return d_splice_alias(inode, dentry);
4992} 5158}
4993 5159
4994unsigned char btrfs_filetype_table[] = { 5160unsigned char btrfs_filetype_table[] = {
@@ -5358,7 +5524,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5358 u32 sizes[2]; 5524 u32 sizes[2];
5359 unsigned long ptr; 5525 unsigned long ptr;
5360 int ret; 5526 int ret;
5361 int owner;
5362 5527
5363 path = btrfs_alloc_path(); 5528 path = btrfs_alloc_path();
5364 if (!path) 5529 if (!path)
@@ -5392,6 +5557,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5392 * number 5557 * number
5393 */ 5558 */
5394 BTRFS_I(inode)->index_cnt = 2; 5559 BTRFS_I(inode)->index_cnt = 2;
5560 BTRFS_I(inode)->dir_index = *index;
5395 BTRFS_I(inode)->root = root; 5561 BTRFS_I(inode)->root = root;
5396 BTRFS_I(inode)->generation = trans->transid; 5562 BTRFS_I(inode)->generation = trans->transid;
5397 inode->i_generation = BTRFS_I(inode)->generation; 5563 inode->i_generation = BTRFS_I(inode)->generation;
@@ -5404,11 +5570,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5404 */ 5570 */
5405 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); 5571 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
5406 5572
5407 if (S_ISDIR(mode))
5408 owner = 0;
5409 else
5410 owner = 1;
5411
5412 key[0].objectid = objectid; 5573 key[0].objectid = objectid;
5413 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 5574 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
5414 key[0].offset = 0; 5575 key[0].offset = 0;
@@ -5473,6 +5634,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5473 5634
5474 btrfs_update_root_times(trans, root); 5635 btrfs_update_root_times(trans, root);
5475 5636
5637 ret = btrfs_inode_inherit_props(trans, inode, dir);
5638 if (ret)
5639 btrfs_err(root->fs_info,
5640 "error inheriting props for ino %llu (root %llu): %d",
5641 btrfs_ino(inode), root->root_key.objectid, ret);
5642
5476 return inode; 5643 return inode;
5477fail: 5644fail:
5478 if (dir) 5645 if (dir)
@@ -5741,6 +5908,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
5741 goto fail; 5908 goto fail;
5742 } 5909 }
5743 5910
5911 /* There are several dir indexes for this inode, clear the cache. */
5912 BTRFS_I(inode)->dir_index = 0ULL;
5744 inc_nlink(inode); 5913 inc_nlink(inode);
5745 inode_inc_iversion(inode); 5914 inode_inc_iversion(inode);
5746 inode->i_ctime = CURRENT_TIME; 5915 inode->i_ctime = CURRENT_TIME;
@@ -6004,7 +6173,7 @@ again:
6004 btrfs_file_extent_num_bytes(leaf, item); 6173 btrfs_file_extent_num_bytes(leaf, item);
6005 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 6174 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
6006 size_t size; 6175 size_t size;
6007 size = btrfs_file_extent_inline_len(leaf, item); 6176 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6008 extent_end = ALIGN(extent_start + size, root->sectorsize); 6177 extent_end = ALIGN(extent_start + size, root->sectorsize);
6009 } 6178 }
6010next: 6179next:
@@ -6073,7 +6242,7 @@ next:
6073 goto out; 6242 goto out;
6074 } 6243 }
6075 6244
6076 size = btrfs_file_extent_inline_len(leaf, item); 6245 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6077 extent_offset = page_offset(page) + pg_offset - extent_start; 6246 extent_offset = page_offset(page) + pg_offset - extent_start;
6078 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset, 6247 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
6079 size - extent_offset); 6248 size - extent_offset);
@@ -6390,6 +6559,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6390 int slot; 6559 int slot;
6391 int found_type; 6560 int found_type;
6392 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW); 6561 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
6562
6393 path = btrfs_alloc_path(); 6563 path = btrfs_alloc_path();
6394 if (!path) 6564 if (!path)
6395 return -ENOMEM; 6565 return -ENOMEM;
@@ -6433,6 +6603,10 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6433 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG) 6603 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
6434 goto out; 6604 goto out;
6435 6605
6606 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6607 if (extent_end <= offset)
6608 goto out;
6609
6436 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6610 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6437 if (disk_bytenr == 0) 6611 if (disk_bytenr == 0)
6438 goto out; 6612 goto out;
@@ -6450,8 +6624,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6450 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 6624 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6451 } 6625 }
6452 6626
6453 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6454
6455 if (btrfs_extent_readonly(root, disk_bytenr)) 6627 if (btrfs_extent_readonly(root, disk_bytenr))
6456 goto out; 6628 goto out;
6457 btrfs_release_path(path); 6629 btrfs_release_path(path);
@@ -6895,8 +7067,8 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
6895 struct btrfs_dio_private *dip = bio->bi_private; 7067 struct btrfs_dio_private *dip = bio->bi_private;
6896 7068
6897 if (err) { 7069 if (err) {
6898 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " 7070 btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
6899 "sector %#Lx len %u err no %d\n", 7071 "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
6900 btrfs_ino(dip->inode), bio->bi_rw, 7072 btrfs_ino(dip->inode), bio->bi_rw,
6901 (unsigned long long)bio->bi_iter.bi_sector, 7073 (unsigned long long)bio->bi_iter.bi_sector,
6902 bio->bi_iter.bi_size, err); 7074 bio->bi_iter.bi_size, err);
@@ -7370,6 +7542,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7370 struct extent_state *cached_state = NULL; 7542 struct extent_state *cached_state = NULL;
7371 u64 page_start = page_offset(page); 7543 u64 page_start = page_offset(page);
7372 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 7544 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
7545 int inode_evicting = inode->i_state & I_FREEING;
7373 7546
7374 /* 7547 /*
7375 * we have the page locked, so new writeback can't start, 7548 * we have the page locked, so new writeback can't start,
@@ -7385,17 +7558,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7385 btrfs_releasepage(page, GFP_NOFS); 7558 btrfs_releasepage(page, GFP_NOFS);
7386 return; 7559 return;
7387 } 7560 }
7388 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 7561
7389 ordered = btrfs_lookup_ordered_extent(inode, page_offset(page)); 7562 if (!inode_evicting)
7563 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
7564 ordered = btrfs_lookup_ordered_extent(inode, page_start);
7390 if (ordered) { 7565 if (ordered) {
7391 /* 7566 /*
7392 * IO on this page will never be started, so we need 7567 * IO on this page will never be started, so we need
7393 * to account for any ordered extents now 7568 * to account for any ordered extents now
7394 */ 7569 */
7395 clear_extent_bit(tree, page_start, page_end, 7570 if (!inode_evicting)
7396 EXTENT_DIRTY | EXTENT_DELALLOC | 7571 clear_extent_bit(tree, page_start, page_end,
7397 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | 7572 EXTENT_DIRTY | EXTENT_DELALLOC |
7398 EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS); 7573 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
7574 EXTENT_DEFRAG, 1, 0, &cached_state,
7575 GFP_NOFS);
7399 /* 7576 /*
7400 * whoever cleared the private bit is responsible 7577 * whoever cleared the private bit is responsible
7401 * for the finish_ordered_io 7578 * for the finish_ordered_io
@@ -7419,14 +7596,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7419 btrfs_finish_ordered_io(ordered); 7596 btrfs_finish_ordered_io(ordered);
7420 } 7597 }
7421 btrfs_put_ordered_extent(ordered); 7598 btrfs_put_ordered_extent(ordered);
7422 cached_state = NULL; 7599 if (!inode_evicting) {
7423 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 7600 cached_state = NULL;
7601 lock_extent_bits(tree, page_start, page_end, 0,
7602 &cached_state);
7603 }
7604 }
7605
7606 if (!inode_evicting) {
7607 clear_extent_bit(tree, page_start, page_end,
7608 EXTENT_LOCKED | EXTENT_DIRTY |
7609 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
7610 EXTENT_DEFRAG, 1, 1,
7611 &cached_state, GFP_NOFS);
7612
7613 __btrfs_releasepage(page, GFP_NOFS);
7424 } 7614 }
7425 clear_extent_bit(tree, page_start, page_end,
7426 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
7427 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
7428 &cached_state, GFP_NOFS);
7429 __btrfs_releasepage(page, GFP_NOFS);
7430 7615
7431 ClearPageChecked(page); 7616 ClearPageChecked(page);
7432 if (PagePrivate(page)) { 7617 if (PagePrivate(page)) {
@@ -7736,7 +7921,9 @@ out:
7736 * create a new subvolume directory/inode (helper for the ioctl). 7921 * create a new subvolume directory/inode (helper for the ioctl).
7737 */ 7922 */
7738int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 7923int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
7739 struct btrfs_root *new_root, u64 new_dirid) 7924 struct btrfs_root *new_root,
7925 struct btrfs_root *parent_root,
7926 u64 new_dirid)
7740{ 7927{
7741 struct inode *inode; 7928 struct inode *inode;
7742 int err; 7929 int err;
@@ -7754,6 +7941,12 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
7754 set_nlink(inode, 1); 7941 set_nlink(inode, 1);
7755 btrfs_i_size_write(inode, 0); 7942 btrfs_i_size_write(inode, 0);
7756 7943
7944 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
7945 if (err)
7946 btrfs_err(new_root->fs_info,
7947 "error inheriting subvolume %llu properties: %d\n",
7948 new_root->root_key.objectid, err);
7949
7757 err = btrfs_update_inode(trans, new_root, inode); 7950 err = btrfs_update_inode(trans, new_root, inode);
7758 7951
7759 iput(inode); 7952 iput(inode);
@@ -7779,6 +7972,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
7779 ei->flags = 0; 7972 ei->flags = 0;
7780 ei->csum_bytes = 0; 7973 ei->csum_bytes = 0;
7781 ei->index_cnt = (u64)-1; 7974 ei->index_cnt = (u64)-1;
7975 ei->dir_index = 0;
7782 ei->last_unlink_trans = 0; 7976 ei->last_unlink_trans = 0;
7783 ei->last_log_commit = 0; 7977 ei->last_log_commit = 0;
7784 7978
@@ -8066,6 +8260,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8066 if (ret) 8260 if (ret)
8067 goto out_fail; 8261 goto out_fail;
8068 8262
8263 BTRFS_I(old_inode)->dir_index = 0ULL;
8069 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { 8264 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
8070 /* force full log commit if subvolume involved. */ 8265 /* force full log commit if subvolume involved. */
8071 root->fs_info->last_trans_log_full_commit = trans->transid; 8266 root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -8154,6 +8349,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8154 goto out_fail; 8349 goto out_fail;
8155 } 8350 }
8156 8351
8352 if (old_inode->i_nlink == 1)
8353 BTRFS_I(old_inode)->dir_index = index;
8354
8157 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { 8355 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
8158 struct dentry *parent = new_dentry->d_parent; 8356 struct dentry *parent = new_dentry->d_parent;
8159 btrfs_log_new_name(trans, old_inode, old_dir, parent); 8357 btrfs_log_new_name(trans, old_inode, old_dir, parent);
@@ -8289,7 +8487,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8289{ 8487{
8290 int ret; 8488 int ret;
8291 8489
8292 if (root->fs_info->sb->s_flags & MS_RDONLY) 8490 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
8293 return -EROFS; 8491 return -EROFS;
8294 8492
8295 ret = __start_delalloc_inodes(root, delay_iput); 8493 ret = __start_delalloc_inodes(root, delay_iput);
@@ -8315,7 +8513,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
8315 struct list_head splice; 8513 struct list_head splice;
8316 int ret; 8514 int ret;
8317 8515
8318 if (fs_info->sb->s_flags & MS_RDONLY) 8516 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
8319 return -EROFS; 8517 return -EROFS;
8320 8518
8321 INIT_LIST_HEAD(&splice); 8519 INIT_LIST_HEAD(&splice);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ad27dcea319c..b0134892dc70 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -56,6 +56,8 @@
56#include "rcu-string.h" 56#include "rcu-string.h"
57#include "send.h" 57#include "send.h"
58#include "dev-replace.h" 58#include "dev-replace.h"
59#include "props.h"
60#include "sysfs.h"
59 61
60static int btrfs_clone(struct inode *src, struct inode *inode, 62static int btrfs_clone(struct inode *src, struct inode *inode,
61 u64 off, u64 olen, u64 olen_aligned, u64 destoff); 63 u64 off, u64 olen, u64 olen_aligned, u64 destoff);
@@ -190,6 +192,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
190 unsigned int i_oldflags; 192 unsigned int i_oldflags;
191 umode_t mode; 193 umode_t mode;
192 194
195 if (!inode_owner_or_capable(inode))
196 return -EPERM;
197
193 if (btrfs_root_readonly(root)) 198 if (btrfs_root_readonly(root))
194 return -EROFS; 199 return -EROFS;
195 200
@@ -200,9 +205,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
200 if (ret) 205 if (ret)
201 return ret; 206 return ret;
202 207
203 if (!inode_owner_or_capable(inode))
204 return -EACCES;
205
206 ret = mnt_want_write_file(file); 208 ret = mnt_want_write_file(file);
207 if (ret) 209 if (ret)
208 return ret; 210 return ret;
@@ -280,9 +282,25 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
280 if (flags & FS_NOCOMP_FL) { 282 if (flags & FS_NOCOMP_FL) {
281 ip->flags &= ~BTRFS_INODE_COMPRESS; 283 ip->flags &= ~BTRFS_INODE_COMPRESS;
282 ip->flags |= BTRFS_INODE_NOCOMPRESS; 284 ip->flags |= BTRFS_INODE_NOCOMPRESS;
285
286 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
287 if (ret && ret != -ENODATA)
288 goto out_drop;
283 } else if (flags & FS_COMPR_FL) { 289 } else if (flags & FS_COMPR_FL) {
290 const char *comp;
291
284 ip->flags |= BTRFS_INODE_COMPRESS; 292 ip->flags |= BTRFS_INODE_COMPRESS;
285 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 293 ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
294
295 if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
296 comp = "lzo";
297 else
298 comp = "zlib";
299 ret = btrfs_set_prop(inode, "btrfs.compression",
300 comp, strlen(comp), 0);
301 if (ret)
302 goto out_drop;
303
286 } else { 304 } else {
287 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 305 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
288 } 306 }
@@ -392,6 +410,7 @@ static noinline int create_subvol(struct inode *dir,
392 struct btrfs_root *new_root; 410 struct btrfs_root *new_root;
393 struct btrfs_block_rsv block_rsv; 411 struct btrfs_block_rsv block_rsv;
394 struct timespec cur_time = CURRENT_TIME; 412 struct timespec cur_time = CURRENT_TIME;
413 struct inode *inode;
395 int ret; 414 int ret;
396 int err; 415 int err;
397 u64 objectid; 416 u64 objectid;
@@ -417,7 +436,9 @@ static noinline int create_subvol(struct inode *dir,
417 trans = btrfs_start_transaction(root, 0); 436 trans = btrfs_start_transaction(root, 0);
418 if (IS_ERR(trans)) { 437 if (IS_ERR(trans)) {
419 ret = PTR_ERR(trans); 438 ret = PTR_ERR(trans);
420 goto out; 439 btrfs_subvolume_release_metadata(root, &block_rsv,
440 qgroup_reserved);
441 return ret;
421 } 442 }
422 trans->block_rsv = &block_rsv; 443 trans->block_rsv = &block_rsv;
423 trans->bytes_reserved = block_rsv.size; 444 trans->bytes_reserved = block_rsv.size;
@@ -500,7 +521,7 @@ static noinline int create_subvol(struct inode *dir,
500 521
501 btrfs_record_root_in_trans(trans, new_root); 522 btrfs_record_root_in_trans(trans, new_root);
502 523
503 ret = btrfs_create_subvol_root(trans, new_root, new_dirid); 524 ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
504 if (ret) { 525 if (ret) {
505 /* We potentially lose an unused inode item here */ 526 /* We potentially lose an unused inode item here */
506 btrfs_abort_transaction(trans, root, ret); 527 btrfs_abort_transaction(trans, root, ret);
@@ -542,6 +563,8 @@ static noinline int create_subvol(struct inode *dir,
542fail: 563fail:
543 trans->block_rsv = NULL; 564 trans->block_rsv = NULL;
544 trans->bytes_reserved = 0; 565 trans->bytes_reserved = 0;
566 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
567
545 if (async_transid) { 568 if (async_transid) {
546 *async_transid = trans->transid; 569 *async_transid = trans->transid;
547 err = btrfs_commit_transaction_async(trans, root, 1); 570 err = btrfs_commit_transaction_async(trans, root, 1);
@@ -553,10 +576,12 @@ fail:
553 if (err && !ret) 576 if (err && !ret)
554 ret = err; 577 ret = err;
555 578
556 if (!ret) 579 if (!ret) {
557 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 580 inode = btrfs_lookup_dentry(dir, dentry);
558out: 581 if (IS_ERR(inode))
559 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); 582 return PTR_ERR(inode);
583 d_instantiate(dentry, inode);
584 }
560 return ret; 585 return ret;
561} 586}
562 587
@@ -642,7 +667,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
642 ret = PTR_ERR(inode); 667 ret = PTR_ERR(inode);
643 goto fail; 668 goto fail;
644 } 669 }
645 BUG_ON(!inode); 670
646 d_instantiate(dentry, inode); 671 d_instantiate(dentry, inode);
647 ret = 0; 672 ret = 0;
648fail: 673fail:
@@ -1011,7 +1036,7 @@ out:
1011static int cluster_pages_for_defrag(struct inode *inode, 1036static int cluster_pages_for_defrag(struct inode *inode,
1012 struct page **pages, 1037 struct page **pages,
1013 unsigned long start_index, 1038 unsigned long start_index,
1014 int num_pages) 1039 unsigned long num_pages)
1015{ 1040{
1016 unsigned long file_end; 1041 unsigned long file_end;
1017 u64 isize = i_size_read(inode); 1042 u64 isize = i_size_read(inode);
@@ -1169,8 +1194,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1169 int defrag_count = 0; 1194 int defrag_count = 0;
1170 int compress_type = BTRFS_COMPRESS_ZLIB; 1195 int compress_type = BTRFS_COMPRESS_ZLIB;
1171 int extent_thresh = range->extent_thresh; 1196 int extent_thresh = range->extent_thresh;
1172 int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; 1197 unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
1173 int cluster = max_cluster; 1198 unsigned long cluster = max_cluster;
1174 u64 new_align = ~((u64)128 * 1024 - 1); 1199 u64 new_align = ~((u64)128 * 1024 - 1);
1175 struct page **pages = NULL; 1200 struct page **pages = NULL;
1176 1201
@@ -1254,7 +1279,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1254 break; 1279 break;
1255 1280
1256 if (btrfs_defrag_cancelled(root->fs_info)) { 1281 if (btrfs_defrag_cancelled(root->fs_info)) {
1257 printk(KERN_DEBUG "btrfs: defrag_file cancelled\n"); 1282 printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n");
1258 ret = -EAGAIN; 1283 ret = -EAGAIN;
1259 break; 1284 break;
1260 } 1285 }
@@ -1416,20 +1441,20 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1416 ret = -EINVAL; 1441 ret = -EINVAL;
1417 goto out_free; 1442 goto out_free;
1418 } 1443 }
1419 printk(KERN_INFO "btrfs: resizing devid %llu\n", devid); 1444 btrfs_info(root->fs_info, "resizing devid %llu", devid);
1420 } 1445 }
1421 1446
1422 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1447 device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
1423 if (!device) { 1448 if (!device) {
1424 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1449 btrfs_info(root->fs_info, "resizer unable to find device %llu",
1425 devid); 1450 devid);
1426 ret = -ENODEV; 1451 ret = -ENODEV;
1427 goto out_free; 1452 goto out_free;
1428 } 1453 }
1429 1454
1430 if (!device->writeable) { 1455 if (!device->writeable) {
1431 printk(KERN_INFO "btrfs: resizer unable to apply on " 1456 btrfs_info(root->fs_info,
1432 "readonly device %llu\n", 1457 "resizer unable to apply on readonly device %llu",
1433 devid); 1458 devid);
1434 ret = -EPERM; 1459 ret = -EPERM;
1435 goto out_free; 1460 goto out_free;
@@ -1466,6 +1491,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1466 } 1491 }
1467 new_size = old_size - new_size; 1492 new_size = old_size - new_size;
1468 } else if (mod > 0) { 1493 } else if (mod > 0) {
1494 if (new_size > ULLONG_MAX - old_size) {
1495 ret = -EINVAL;
1496 goto out_free;
1497 }
1469 new_size = old_size + new_size; 1498 new_size = old_size + new_size;
1470 } 1499 }
1471 1500
@@ -1481,7 +1510,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1481 do_div(new_size, root->sectorsize); 1510 do_div(new_size, root->sectorsize);
1482 new_size *= root->sectorsize; 1511 new_size *= root->sectorsize;
1483 1512
1484 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", 1513 printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
1485 rcu_str_deref(device->name), new_size); 1514 rcu_str_deref(device->name), new_size);
1486 1515
1487 if (new_size > old_size) { 1516 if (new_size > old_size) {
@@ -1542,9 +1571,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1542 1571
1543 src_inode = file_inode(src.file); 1572 src_inode = file_inode(src.file);
1544 if (src_inode->i_sb != file_inode(file)->i_sb) { 1573 if (src_inode->i_sb != file_inode(file)->i_sb) {
1545 printk(KERN_INFO "btrfs: Snapshot src from " 1574 btrfs_info(BTRFS_I(src_inode)->root->fs_info,
1546 "another FS\n"); 1575 "Snapshot src from another FS");
1547 ret = -EINVAL; 1576 ret = -EINVAL;
1577 } else if (!inode_owner_or_capable(src_inode)) {
1578 /*
1579 * Subvolume creation is not restricted, but snapshots
1580 * are limited to own subvolumes only
1581 */
1582 ret = -EPERM;
1548 } else { 1583 } else {
1549 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1584 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1550 BTRFS_I(src_inode)->root, 1585 BTRFS_I(src_inode)->root,
@@ -1662,6 +1697,9 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1662 u64 flags; 1697 u64 flags;
1663 int ret = 0; 1698 int ret = 0;
1664 1699
1700 if (!inode_owner_or_capable(inode))
1701 return -EPERM;
1702
1665 ret = mnt_want_write_file(file); 1703 ret = mnt_want_write_file(file);
1666 if (ret) 1704 if (ret)
1667 goto out; 1705 goto out;
@@ -1686,11 +1724,6 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1686 goto out_drop_write; 1724 goto out_drop_write;
1687 } 1725 }
1688 1726
1689 if (!inode_owner_or_capable(inode)) {
1690 ret = -EACCES;
1691 goto out_drop_write;
1692 }
1693
1694 down_write(&root->fs_info->subvol_sem); 1727 down_write(&root->fs_info->subvol_sem);
1695 1728
1696 /* nothing to do */ 1729 /* nothing to do */
@@ -1698,12 +1731,28 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1698 goto out_drop_sem; 1731 goto out_drop_sem;
1699 1732
1700 root_flags = btrfs_root_flags(&root->root_item); 1733 root_flags = btrfs_root_flags(&root->root_item);
1701 if (flags & BTRFS_SUBVOL_RDONLY) 1734 if (flags & BTRFS_SUBVOL_RDONLY) {
1702 btrfs_set_root_flags(&root->root_item, 1735 btrfs_set_root_flags(&root->root_item,
1703 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1736 root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
1704 else 1737 } else {
1705 btrfs_set_root_flags(&root->root_item, 1738 /*
1739 * Block RO -> RW transition if this subvolume is involved in
1740 * send
1741 */
1742 spin_lock(&root->root_item_lock);
1743 if (root->send_in_progress == 0) {
1744 btrfs_set_root_flags(&root->root_item,
1706 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1745 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
1746 spin_unlock(&root->root_item_lock);
1747 } else {
1748 spin_unlock(&root->root_item_lock);
1749 btrfs_warn(root->fs_info,
1750 "Attempt to set subvolume %llu read-write during send",
1751 root->root_key.objectid);
1752 ret = -EPERM;
1753 goto out_drop_sem;
1754 }
1755 }
1707 1756
1708 trans = btrfs_start_transaction(root, 1); 1757 trans = btrfs_start_transaction(root, 1);
1709 if (IS_ERR(trans)) { 1758 if (IS_ERR(trans)) {
@@ -1910,7 +1959,7 @@ static noinline int search_ioctl(struct inode *inode,
1910 key.offset = (u64)-1; 1959 key.offset = (u64)-1;
1911 root = btrfs_read_fs_root_no_name(info, &key); 1960 root = btrfs_read_fs_root_no_name(info, &key);
1912 if (IS_ERR(root)) { 1961 if (IS_ERR(root)) {
1913 printk(KERN_ERR "could not find root %llu\n", 1962 printk(KERN_ERR "BTRFS: could not find root %llu\n",
1914 sk->tree_id); 1963 sk->tree_id);
1915 btrfs_free_path(path); 1964 btrfs_free_path(path);
1916 return -ENOENT; 1965 return -ENOENT;
@@ -2000,7 +2049,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
2000 key.offset = (u64)-1; 2049 key.offset = (u64)-1;
2001 root = btrfs_read_fs_root_no_name(info, &key); 2050 root = btrfs_read_fs_root_no_name(info, &key);
2002 if (IS_ERR(root)) { 2051 if (IS_ERR(root)) {
2003 printk(KERN_ERR "could not find root %llu\n", tree_id); 2052 printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id);
2004 ret = -ENOENT; 2053 ret = -ENOENT;
2005 goto out; 2054 goto out;
2006 } 2055 }
@@ -2838,12 +2887,14 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2838 * note the key will change type as we walk through the 2887 * note the key will change type as we walk through the
2839 * tree. 2888 * tree.
2840 */ 2889 */
2890 path->leave_spinning = 1;
2841 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 2891 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
2842 0, 0); 2892 0, 0);
2843 if (ret < 0) 2893 if (ret < 0)
2844 goto out; 2894 goto out;
2845 2895
2846 nritems = btrfs_header_nritems(path->nodes[0]); 2896 nritems = btrfs_header_nritems(path->nodes[0]);
2897process_slot:
2847 if (path->slots[0] >= nritems) { 2898 if (path->slots[0] >= nritems) {
2848 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 2899 ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
2849 if (ret < 0) 2900 if (ret < 0)
@@ -2870,11 +2921,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2870 u8 comp; 2921 u8 comp;
2871 u64 endoff; 2922 u64 endoff;
2872 2923
2873 size = btrfs_item_size_nr(leaf, slot);
2874 read_extent_buffer(leaf, buf,
2875 btrfs_item_ptr_offset(leaf, slot),
2876 size);
2877
2878 extent = btrfs_item_ptr(leaf, slot, 2924 extent = btrfs_item_ptr(leaf, slot,
2879 struct btrfs_file_extent_item); 2925 struct btrfs_file_extent_item);
2880 comp = btrfs_file_extent_compression(leaf, extent); 2926 comp = btrfs_file_extent_compression(leaf, extent);
@@ -2893,11 +2939,20 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2893 datal = btrfs_file_extent_ram_bytes(leaf, 2939 datal = btrfs_file_extent_ram_bytes(leaf,
2894 extent); 2940 extent);
2895 } 2941 }
2896 btrfs_release_path(path);
2897 2942
2898 if (key.offset + datal <= off || 2943 if (key.offset + datal <= off ||
2899 key.offset >= off + len - 1) 2944 key.offset >= off + len - 1) {
2900 goto next; 2945 path->slots[0]++;
2946 goto process_slot;
2947 }
2948
2949 size = btrfs_item_size_nr(leaf, slot);
2950 read_extent_buffer(leaf, buf,
2951 btrfs_item_ptr_offset(leaf, slot),
2952 size);
2953
2954 btrfs_release_path(path);
2955 path->leave_spinning = 0;
2901 2956
2902 memcpy(&new_key, &key, sizeof(new_key)); 2957 memcpy(&new_key, &key, sizeof(new_key));
2903 new_key.objectid = btrfs_ino(inode); 2958 new_key.objectid = btrfs_ino(inode);
@@ -3068,7 +3123,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
3068 } 3123 }
3069 ret = btrfs_end_transaction(trans, root); 3124 ret = btrfs_end_transaction(trans, root);
3070 } 3125 }
3071next:
3072 btrfs_release_path(path); 3126 btrfs_release_path(path);
3073 key.offset++; 3127 key.offset++;
3074 } 3128 }
@@ -3196,9 +3250,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3196 3250
3197 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 3251 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
3198out_unlock: 3252out_unlock:
3199 mutex_unlock(&src->i_mutex); 3253 if (!same_inode) {
3200 if (!same_inode) 3254 if (inode < src) {
3201 mutex_unlock(&inode->i_mutex); 3255 mutex_unlock(&src->i_mutex);
3256 mutex_unlock(&inode->i_mutex);
3257 } else {
3258 mutex_unlock(&inode->i_mutex);
3259 mutex_unlock(&src->i_mutex);
3260 }
3261 } else {
3262 mutex_unlock(&src->i_mutex);
3263 }
3202out_fput: 3264out_fput:
3203 fdput(src_file); 3265 fdput(src_file);
3204out_drop_write: 3266out_drop_write:
@@ -3321,8 +3383,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
3321 if (IS_ERR_OR_NULL(di)) { 3383 if (IS_ERR_OR_NULL(di)) {
3322 btrfs_free_path(path); 3384 btrfs_free_path(path);
3323 btrfs_end_transaction(trans, root); 3385 btrfs_end_transaction(trans, root);
3324 printk(KERN_ERR "Umm, you don't have the default dir item, " 3386 btrfs_err(new_root->fs_info, "Umm, you don't have the default dir"
3325 "this isn't going to work\n"); 3387 "item, this isn't going to work");
3326 ret = -ENOENT; 3388 ret = -ENOENT;
3327 goto out; 3389 goto out;
3328 } 3390 }
@@ -3475,6 +3537,20 @@ out:
3475 return ret; 3537 return ret;
3476} 3538}
3477 3539
3540static long btrfs_ioctl_global_rsv(struct btrfs_root *root, void __user *arg)
3541{
3542 struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv;
3543 u64 reserved;
3544
3545 spin_lock(&block_rsv->lock);
3546 reserved = block_rsv->reserved;
3547 spin_unlock(&block_rsv->lock);
3548
3549 if (arg && copy_to_user(arg, &reserved, sizeof(reserved)))
3550 return -EFAULT;
3551 return 0;
3552}
3553
3478/* 3554/*
3479 * there are many ways the trans_start and trans_end ioctls can lead 3555 * there are many ways the trans_start and trans_end ioctls can lead
3480 * to deadlocks. They should only be used by applications that 3556 * to deadlocks. They should only be used by applications that
@@ -4303,6 +4379,9 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
4303 int ret = 0; 4379 int ret = 0;
4304 int received_uuid_changed; 4380 int received_uuid_changed;
4305 4381
4382 if (!inode_owner_or_capable(inode))
4383 return -EPERM;
4384
4306 ret = mnt_want_write_file(file); 4385 ret = mnt_want_write_file(file);
4307 if (ret < 0) 4386 if (ret < 0)
4308 return ret; 4387 return ret;
@@ -4319,11 +4398,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
4319 goto out; 4398 goto out;
4320 } 4399 }
4321 4400
4322 if (!inode_owner_or_capable(inode)) {
4323 ret = -EACCES;
4324 goto out;
4325 }
4326
4327 sa = memdup_user(arg, sizeof(*sa)); 4401 sa = memdup_user(arg, sizeof(*sa));
4328 if (IS_ERR(sa)) { 4402 if (IS_ERR(sa)) {
4329 ret = PTR_ERR(sa); 4403 ret = PTR_ERR(sa);
@@ -4409,8 +4483,8 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
4409 len = strnlen(label, BTRFS_LABEL_SIZE); 4483 len = strnlen(label, BTRFS_LABEL_SIZE);
4410 4484
4411 if (len == BTRFS_LABEL_SIZE) { 4485 if (len == BTRFS_LABEL_SIZE) {
4412 pr_warn("btrfs: label is too long, return the first %zu bytes\n", 4486 btrfs_warn(root->fs_info,
4413 --len); 4487 "label is too long, return the first %zu bytes", --len);
4414 } 4488 }
4415 4489
4416 ret = copy_to_user(arg, label, len); 4490 ret = copy_to_user(arg, label, len);
@@ -4433,7 +4507,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
4433 return -EFAULT; 4507 return -EFAULT;
4434 4508
4435 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { 4509 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
4436 pr_err("btrfs: unable to set label with more than %d bytes\n", 4510 btrfs_err(root->fs_info, "unable to set label with more than %d bytes",
4437 BTRFS_LABEL_SIZE - 1); 4511 BTRFS_LABEL_SIZE - 1);
4438 return -EINVAL; 4512 return -EINVAL;
4439 } 4513 }
@@ -4458,6 +4532,166 @@ out_unlock:
4458 return ret; 4532 return ret;
4459} 4533}
4460 4534
4535#define INIT_FEATURE_FLAGS(suffix) \
4536 { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
4537 .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
4538 .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
4539
4540static int btrfs_ioctl_get_supported_features(struct file *file,
4541 void __user *arg)
4542{
4543 static struct btrfs_ioctl_feature_flags features[3] = {
4544 INIT_FEATURE_FLAGS(SUPP),
4545 INIT_FEATURE_FLAGS(SAFE_SET),
4546 INIT_FEATURE_FLAGS(SAFE_CLEAR)
4547 };
4548
4549 if (copy_to_user(arg, &features, sizeof(features)))
4550 return -EFAULT;
4551
4552 return 0;
4553}
4554
4555static int btrfs_ioctl_get_features(struct file *file, void __user *arg)
4556{
4557 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
4558 struct btrfs_super_block *super_block = root->fs_info->super_copy;
4559 struct btrfs_ioctl_feature_flags features;
4560
4561 features.compat_flags = btrfs_super_compat_flags(super_block);
4562 features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block);
4563 features.incompat_flags = btrfs_super_incompat_flags(super_block);
4564
4565 if (copy_to_user(arg, &features, sizeof(features)))
4566 return -EFAULT;
4567
4568 return 0;
4569}
4570
4571static int check_feature_bits(struct btrfs_root *root,
4572 enum btrfs_feature_set set,
4573 u64 change_mask, u64 flags, u64 supported_flags,
4574 u64 safe_set, u64 safe_clear)
4575{
4576 const char *type = btrfs_feature_set_names[set];
4577 char *names;
4578 u64 disallowed, unsupported;
4579 u64 set_mask = flags & change_mask;
4580 u64 clear_mask = ~flags & change_mask;
4581
4582 unsupported = set_mask & ~supported_flags;
4583 if (unsupported) {
4584 names = btrfs_printable_features(set, unsupported);
4585 if (names) {
4586 btrfs_warn(root->fs_info,
4587 "this kernel does not support the %s feature bit%s",
4588 names, strchr(names, ',') ? "s" : "");
4589 kfree(names);
4590 } else
4591 btrfs_warn(root->fs_info,
4592 "this kernel does not support %s bits 0x%llx",
4593 type, unsupported);
4594 return -EOPNOTSUPP;
4595 }
4596
4597 disallowed = set_mask & ~safe_set;
4598 if (disallowed) {
4599 names = btrfs_printable_features(set, disallowed);
4600 if (names) {
4601 btrfs_warn(root->fs_info,
4602 "can't set the %s feature bit%s while mounted",
4603 names, strchr(names, ',') ? "s" : "");
4604 kfree(names);
4605 } else
4606 btrfs_warn(root->fs_info,
4607 "can't set %s bits 0x%llx while mounted",
4608 type, disallowed);
4609 return -EPERM;
4610 }
4611
4612 disallowed = clear_mask & ~safe_clear;
4613 if (disallowed) {
4614 names = btrfs_printable_features(set, disallowed);
4615 if (names) {
4616 btrfs_warn(root->fs_info,
4617 "can't clear the %s feature bit%s while mounted",
4618 names, strchr(names, ',') ? "s" : "");
4619 kfree(names);
4620 } else
4621 btrfs_warn(root->fs_info,
4622 "can't clear %s bits 0x%llx while mounted",
4623 type, disallowed);
4624 return -EPERM;
4625 }
4626
4627 return 0;
4628}
4629
4630#define check_feature(root, change_mask, flags, mask_base) \
4631check_feature_bits(root, FEAT_##mask_base, change_mask, flags, \
4632 BTRFS_FEATURE_ ## mask_base ## _SUPP, \
4633 BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \
4634 BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
4635
4636static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
4637{
4638 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
4639 struct btrfs_super_block *super_block = root->fs_info->super_copy;
4640 struct btrfs_ioctl_feature_flags flags[2];
4641 struct btrfs_trans_handle *trans;
4642 u64 newflags;
4643 int ret;
4644
4645 if (!capable(CAP_SYS_ADMIN))
4646 return -EPERM;
4647
4648 if (copy_from_user(flags, arg, sizeof(flags)))
4649 return -EFAULT;
4650
4651 /* Nothing to do */
4652 if (!flags[0].compat_flags && !flags[0].compat_ro_flags &&
4653 !flags[0].incompat_flags)
4654 return 0;
4655
4656 ret = check_feature(root, flags[0].compat_flags,
4657 flags[1].compat_flags, COMPAT);
4658 if (ret)
4659 return ret;
4660
4661 ret = check_feature(root, flags[0].compat_ro_flags,
4662 flags[1].compat_ro_flags, COMPAT_RO);
4663 if (ret)
4664 return ret;
4665
4666 ret = check_feature(root, flags[0].incompat_flags,
4667 flags[1].incompat_flags, INCOMPAT);
4668 if (ret)
4669 return ret;
4670
4671 trans = btrfs_start_transaction(root, 1);
4672 if (IS_ERR(trans))
4673 return PTR_ERR(trans);
4674
4675 spin_lock(&root->fs_info->super_lock);
4676 newflags = btrfs_super_compat_flags(super_block);
4677 newflags |= flags[0].compat_flags & flags[1].compat_flags;
4678 newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags);
4679 btrfs_set_super_compat_flags(super_block, newflags);
4680
4681 newflags = btrfs_super_compat_ro_flags(super_block);
4682 newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags;
4683 newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags);
4684 btrfs_set_super_compat_ro_flags(super_block, newflags);
4685
4686 newflags = btrfs_super_incompat_flags(super_block);
4687 newflags |= flags[0].incompat_flags & flags[1].incompat_flags;
4688 newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags);
4689 btrfs_set_super_incompat_flags(super_block, newflags);
4690 spin_unlock(&root->fs_info->super_lock);
4691
4692 return btrfs_end_transaction(trans, root);
4693}
4694
4461long btrfs_ioctl(struct file *file, unsigned int 4695long btrfs_ioctl(struct file *file, unsigned int
4462 cmd, unsigned long arg) 4696 cmd, unsigned long arg)
4463{ 4697{
@@ -4523,6 +4757,8 @@ long btrfs_ioctl(struct file *file, unsigned int
4523 return btrfs_ioctl_logical_to_ino(root, argp); 4757 return btrfs_ioctl_logical_to_ino(root, argp);
4524 case BTRFS_IOC_SPACE_INFO: 4758 case BTRFS_IOC_SPACE_INFO:
4525 return btrfs_ioctl_space_info(root, argp); 4759 return btrfs_ioctl_space_info(root, argp);
4760 case BTRFS_IOC_GLOBAL_RSV:
4761 return btrfs_ioctl_global_rsv(root, argp);
4526 case BTRFS_IOC_SYNC: { 4762 case BTRFS_IOC_SYNC: {
4527 int ret; 4763 int ret;
4528 4764
@@ -4576,6 +4812,12 @@ long btrfs_ioctl(struct file *file, unsigned int
4576 return btrfs_ioctl_set_fslabel(file, argp); 4812 return btrfs_ioctl_set_fslabel(file, argp);
4577 case BTRFS_IOC_FILE_EXTENT_SAME: 4813 case BTRFS_IOC_FILE_EXTENT_SAME:
4578 return btrfs_ioctl_file_extent_same(file, argp); 4814 return btrfs_ioctl_file_extent_same(file, argp);
4815 case BTRFS_IOC_GET_SUPPORTED_FEATURES:
4816 return btrfs_ioctl_get_supported_features(file, argp);
4817 case BTRFS_IOC_GET_FEATURES:
4818 return btrfs_ioctl_get_features(file, argp);
4819 case BTRFS_IOC_SET_FEATURES:
4820 return btrfs_ioctl_set_features(file, argp);
4579 } 4821 }
4580 4822
4581 return -ENOTTY; 4823 return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index b6a6f07c5ce2..b47f669aca75 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -141,7 +141,7 @@ static int lzo_compress_pages(struct list_head *ws,
141 ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, 141 ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
142 &out_len, workspace->mem); 142 &out_len, workspace->mem);
143 if (ret != LZO_E_OK) { 143 if (ret != LZO_E_OK) {
144 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", 144 printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
145 ret); 145 ret);
146 ret = -1; 146 ret = -1;
147 goto out; 147 goto out;
@@ -357,7 +357,7 @@ cont:
357 if (need_unmap) 357 if (need_unmap)
358 kunmap(pages_in[page_in_index - 1]); 358 kunmap(pages_in[page_in_index - 1]);
359 if (ret != LZO_E_OK) { 359 if (ret != LZO_E_OK) {
360 printk(KERN_WARNING "btrfs decompress failed\n"); 360 printk(KERN_WARNING "BTRFS: decompress failed\n");
361 ret = -1; 361 ret = -1;
362 break; 362 break;
363 } 363 }
@@ -401,7 +401,7 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
401 out_len = PAGE_CACHE_SIZE; 401 out_len = PAGE_CACHE_SIZE;
402 ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); 402 ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
403 if (ret != LZO_E_OK) { 403 if (ret != LZO_E_OK) {
404 printk(KERN_WARNING "btrfs decompress failed!\n"); 404 printk(KERN_WARNING "BTRFS: decompress failed!\n");
405 ret = -1; 405 ret = -1;
406 goto out; 406 goto out;
407 } 407 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 69582d5b69d1..b16450b840e7 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -336,13 +336,14 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
336 entry->len); 336 entry->len);
337 *file_offset = dec_end; 337 *file_offset = dec_end;
338 if (dec_start > dec_end) { 338 if (dec_start > dec_end) {
339 printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", 339 btrfs_crit(BTRFS_I(inode)->root->fs_info,
340 dec_start, dec_end); 340 "bad ordering dec_start %llu end %llu", dec_start, dec_end);
341 } 341 }
342 to_dec = dec_end - dec_start; 342 to_dec = dec_end - dec_start;
343 if (to_dec > entry->bytes_left) { 343 if (to_dec > entry->bytes_left) {
344 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", 344 btrfs_crit(BTRFS_I(inode)->root->fs_info,
345 entry->bytes_left, to_dec); 345 "bad ordered accounting left %llu size %llu",
346 entry->bytes_left, to_dec);
346 } 347 }
347 entry->bytes_left -= to_dec; 348 entry->bytes_left -= to_dec;
348 if (!uptodate) 349 if (!uptodate)
@@ -401,7 +402,8 @@ have_entry:
401 } 402 }
402 403
403 if (io_size > entry->bytes_left) { 404 if (io_size > entry->bytes_left) {
404 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", 405 btrfs_crit(BTRFS_I(inode)->root->fs_info,
406 "bad ordered accounting left %llu size %llu",
405 entry->bytes_left, io_size); 407 entry->bytes_left, io_size);
406 } 408 }
407 entry->bytes_left -= io_size; 409 entry->bytes_left -= io_size;
@@ -520,7 +522,8 @@ void btrfs_remove_ordered_extent(struct inode *inode,
520 spin_lock_irq(&tree->lock); 522 spin_lock_irq(&tree->lock);
521 node = &entry->rb_node; 523 node = &entry->rb_node;
522 rb_erase(node, &tree->tree); 524 rb_erase(node, &tree->tree);
523 tree->last = NULL; 525 if (tree->last == node)
526 tree->last = NULL;
524 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 527 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
525 spin_unlock_irq(&tree->lock); 528 spin_unlock_irq(&tree->lock);
526 529
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 24cad1695af7..65793edb38ca 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -69,23 +69,3 @@ out:
69 btrfs_free_path(path); 69 btrfs_free_path(path);
70 return ret; 70 return ret;
71} 71}
72
73int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
74{
75 struct btrfs_path *path;
76 struct btrfs_key key;
77 int ret;
78
79 key.objectid = BTRFS_ORPHAN_OBJECTID;
80 key.type = BTRFS_ORPHAN_ITEM_KEY;
81 key.offset = offset;
82
83 path = btrfs_alloc_path();
84 if (!path)
85 return -ENOMEM;
86
87 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
88
89 btrfs_free_path(path);
90 return ret;
91}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 417053b17181..6efd70d3b64f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -154,7 +154,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
154 u32 item_size) 154 u32 item_size)
155{ 155{
156 if (!IS_ALIGNED(item_size, sizeof(u64))) { 156 if (!IS_ALIGNED(item_size, sizeof(u64))) {
157 pr_warn("btrfs: uuid item with illegal size %lu!\n", 157 pr_warn("BTRFS: uuid item with illegal size %lu!\n",
158 (unsigned long)item_size); 158 (unsigned long)item_size);
159 return; 159 return;
160 } 160 }
@@ -249,7 +249,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
249 BTRFS_FILE_EXTENT_INLINE) { 249 BTRFS_FILE_EXTENT_INLINE) {
250 printk(KERN_INFO "\t\tinline extent data " 250 printk(KERN_INFO "\t\tinline extent data "
251 "size %u\n", 251 "size %u\n",
252 btrfs_file_extent_inline_len(l, fi)); 252 btrfs_file_extent_inline_len(l, i, fi));
253 break; 253 break;
254 } 254 }
255 printk(KERN_INFO "\t\textent data disk bytenr %llu " 255 printk(KERN_INFO "\t\textent data disk bytenr %llu "
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
new file mode 100644
index 000000000000..129b1dd28527
--- /dev/null
+++ b/fs/btrfs/props.c
@@ -0,0 +1,427 @@
1/*
2 * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/hashtable.h>
20#include "props.h"
21#include "btrfs_inode.h"
22#include "hash.h"
23#include "transaction.h"
24#include "xattr.h"
25
26#define BTRFS_PROP_HANDLERS_HT_BITS 8
27static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
28
29struct prop_handler {
30 struct hlist_node node;
31 const char *xattr_name;
32 int (*validate)(const char *value, size_t len);
33 int (*apply)(struct inode *inode, const char *value, size_t len);
34 const char *(*extract)(struct inode *inode);
35 int inheritable;
36};
37
38static int prop_compression_validate(const char *value, size_t len);
39static int prop_compression_apply(struct inode *inode,
40 const char *value,
41 size_t len);
42static const char *prop_compression_extract(struct inode *inode);
43
44static struct prop_handler prop_handlers[] = {
45 {
46 .xattr_name = XATTR_BTRFS_PREFIX "compression",
47 .validate = prop_compression_validate,
48 .apply = prop_compression_apply,
49 .extract = prop_compression_extract,
50 .inheritable = 1
51 },
52 {
53 .xattr_name = NULL
54 }
55};
56
57void __init btrfs_props_init(void)
58{
59 struct prop_handler *p;
60
61 hash_init(prop_handlers_ht);
62
63 for (p = &prop_handlers[0]; p->xattr_name; p++) {
64 u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
65
66 hash_add(prop_handlers_ht, &p->node, h);
67 }
68}
69
70static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
71{
72 struct hlist_head *h;
73
74 h = &prop_handlers_ht[hash_min(hash, BTRFS_PROP_HANDLERS_HT_BITS)];
75 if (hlist_empty(h))
76 return NULL;
77
78 return h;
79}
80
81static const struct prop_handler *
82find_prop_handler(const char *name,
83 const struct hlist_head *handlers)
84{
85 struct prop_handler *h;
86
87 if (!handlers) {
88 u64 hash = btrfs_name_hash(name, strlen(name));
89
90 handlers = find_prop_handlers_by_hash(hash);
91 if (!handlers)
92 return NULL;
93 }
94
95 hlist_for_each_entry(h, handlers, node)
96 if (!strcmp(h->xattr_name, name))
97 return h;
98
99 return NULL;
100}
101
102static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
103 struct inode *inode,
104 const char *name,
105 const char *value,
106 size_t value_len,
107 int flags)
108{
109 const struct prop_handler *handler;
110 int ret;
111
112 if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
113 return -EINVAL;
114
115 handler = find_prop_handler(name, NULL);
116 if (!handler)
117 return -EINVAL;
118
119 if (value_len == 0) {
120 ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
121 NULL, 0, flags);
122 if (ret)
123 return ret;
124
125 ret = handler->apply(inode, NULL, 0);
126 ASSERT(ret == 0);
127
128 return ret;
129 }
130
131 ret = handler->validate(value, value_len);
132 if (ret)
133 return ret;
134 ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
135 value, value_len, flags);
136 if (ret)
137 return ret;
138 ret = handler->apply(inode, value, value_len);
139 if (ret) {
140 __btrfs_setxattr(trans, inode, handler->xattr_name,
141 NULL, 0, flags);
142 return ret;
143 }
144
145 set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
146
147 return 0;
148}
149
150int btrfs_set_prop(struct inode *inode,
151 const char *name,
152 const char *value,
153 size_t value_len,
154 int flags)
155{
156 return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
157}
158
159static int iterate_object_props(struct btrfs_root *root,
160 struct btrfs_path *path,
161 u64 objectid,
162 void (*iterator)(void *,
163 const struct prop_handler *,
164 const char *,
165 size_t),
166 void *ctx)
167{
168 int ret;
169 char *name_buf = NULL;
170 char *value_buf = NULL;
171 int name_buf_len = 0;
172 int value_buf_len = 0;
173
174 while (1) {
175 struct btrfs_key key;
176 struct btrfs_dir_item *di;
177 struct extent_buffer *leaf;
178 u32 total_len, cur, this_len;
179 int slot;
180 const struct hlist_head *handlers;
181
182 slot = path->slots[0];
183 leaf = path->nodes[0];
184
185 if (slot >= btrfs_header_nritems(leaf)) {
186 ret = btrfs_next_leaf(root, path);
187 if (ret < 0)
188 goto out;
189 else if (ret > 0)
190 break;
191 continue;
192 }
193
194 btrfs_item_key_to_cpu(leaf, &key, slot);
195 if (key.objectid != objectid)
196 break;
197 if (key.type != BTRFS_XATTR_ITEM_KEY)
198 break;
199
200 handlers = find_prop_handlers_by_hash(key.offset);
201 if (!handlers)
202 goto next_slot;
203
204 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
205 cur = 0;
206 total_len = btrfs_item_size_nr(leaf, slot);
207
208 while (cur < total_len) {
209 u32 name_len = btrfs_dir_name_len(leaf, di);
210 u32 data_len = btrfs_dir_data_len(leaf, di);
211 unsigned long name_ptr, data_ptr;
212 const struct prop_handler *handler;
213
214 this_len = sizeof(*di) + name_len + data_len;
215 name_ptr = (unsigned long)(di + 1);
216 data_ptr = name_ptr + name_len;
217
218 if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
219 memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
220 name_ptr,
221 XATTR_BTRFS_PREFIX_LEN))
222 goto next_dir_item;
223
224 if (name_len >= name_buf_len) {
225 kfree(name_buf);
226 name_buf_len = name_len + 1;
227 name_buf = kmalloc(name_buf_len, GFP_NOFS);
228 if (!name_buf) {
229 ret = -ENOMEM;
230 goto out;
231 }
232 }
233 read_extent_buffer(leaf, name_buf, name_ptr, name_len);
234 name_buf[name_len] = '\0';
235
236 handler = find_prop_handler(name_buf, handlers);
237 if (!handler)
238 goto next_dir_item;
239
240 if (data_len > value_buf_len) {
241 kfree(value_buf);
242 value_buf_len = data_len;
243 value_buf = kmalloc(data_len, GFP_NOFS);
244 if (!value_buf) {
245 ret = -ENOMEM;
246 goto out;
247 }
248 }
249 read_extent_buffer(leaf, value_buf, data_ptr, data_len);
250
251 iterator(ctx, handler, value_buf, data_len);
252next_dir_item:
253 cur += this_len;
254 di = (struct btrfs_dir_item *)((char *) di + this_len);
255 }
256
257next_slot:
258 path->slots[0]++;
259 }
260
261 ret = 0;
262out:
263 btrfs_release_path(path);
264 kfree(name_buf);
265 kfree(value_buf);
266
267 return ret;
268}
269
270static void inode_prop_iterator(void *ctx,
271 const struct prop_handler *handler,
272 const char *value,
273 size_t len)
274{
275 struct inode *inode = ctx;
276 struct btrfs_root *root = BTRFS_I(inode)->root;
277 int ret;
278
279 ret = handler->apply(inode, value, len);
280 if (unlikely(ret))
281 btrfs_warn(root->fs_info,
282 "error applying prop %s to ino %llu (root %llu): %d",
283 handler->xattr_name, btrfs_ino(inode),
284 root->root_key.objectid, ret);
285 else
286 set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
287}
288
289int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
290{
291 struct btrfs_root *root = BTRFS_I(inode)->root;
292 u64 ino = btrfs_ino(inode);
293 int ret;
294
295 ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
296
297 return ret;
298}
299
300static int inherit_props(struct btrfs_trans_handle *trans,
301 struct inode *inode,
302 struct inode *parent)
303{
304 const struct prop_handler *h;
305 struct btrfs_root *root = BTRFS_I(inode)->root;
306 int ret;
307
308 if (!test_bit(BTRFS_INODE_HAS_PROPS,
309 &BTRFS_I(parent)->runtime_flags))
310 return 0;
311
312 for (h = &prop_handlers[0]; h->xattr_name; h++) {
313 const char *value;
314 u64 num_bytes;
315
316 if (!h->inheritable)
317 continue;
318
319 value = h->extract(parent);
320 if (!value)
321 continue;
322
323 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
324 ret = btrfs_block_rsv_add(root, trans->block_rsv,
325 num_bytes, BTRFS_RESERVE_NO_FLUSH);
326 if (ret)
327 goto out;
328 ret = __btrfs_set_prop(trans, inode, h->xattr_name,
329 value, strlen(value), 0);
330 btrfs_block_rsv_release(root, trans->block_rsv, num_bytes);
331 if (ret)
332 goto out;
333 }
334 ret = 0;
335out:
336 return ret;
337}
338
339int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
340 struct inode *inode,
341 struct inode *dir)
342{
343 if (!dir)
344 return 0;
345
346 return inherit_props(trans, inode, dir);
347}
348
349int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
350 struct btrfs_root *root,
351 struct btrfs_root *parent_root)
352{
353 struct btrfs_key key;
354 struct inode *parent_inode, *child_inode;
355 int ret;
356
357 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
358 key.type = BTRFS_INODE_ITEM_KEY;
359 key.offset = 0;
360
361 parent_inode = btrfs_iget(parent_root->fs_info->sb, &key,
362 parent_root, NULL);
363 if (IS_ERR(parent_inode))
364 return PTR_ERR(parent_inode);
365
366 child_inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
367 if (IS_ERR(child_inode)) {
368 iput(parent_inode);
369 return PTR_ERR(child_inode);
370 }
371
372 ret = inherit_props(trans, child_inode, parent_inode);
373 iput(child_inode);
374 iput(parent_inode);
375
376 return ret;
377}
378
379static int prop_compression_validate(const char *value, size_t len)
380{
381 if (!strncmp("lzo", value, len))
382 return 0;
383 else if (!strncmp("zlib", value, len))
384 return 0;
385
386 return -EINVAL;
387}
388
389static int prop_compression_apply(struct inode *inode,
390 const char *value,
391 size_t len)
392{
393 int type;
394
395 if (len == 0) {
396 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
397 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
398 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
399
400 return 0;
401 }
402
403 if (!strncmp("lzo", value, len))
404 type = BTRFS_COMPRESS_LZO;
405 else if (!strncmp("zlib", value, len))
406 type = BTRFS_COMPRESS_ZLIB;
407 else
408 return -EINVAL;
409
410 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
411 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
412 BTRFS_I(inode)->force_compress = type;
413
414 return 0;
415}
416
417static const char *prop_compression_extract(struct inode *inode)
418{
419 switch (BTRFS_I(inode)->force_compress) {
420 case BTRFS_COMPRESS_ZLIB:
421 return "zlib";
422 case BTRFS_COMPRESS_LZO:
423 return "lzo";
424 }
425
426 return NULL;
427}
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h
new file mode 100644
index 000000000000..100f18829d50
--- /dev/null
+++ b/fs/btrfs/props.h
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_PROPS_H
20#define __BTRFS_PROPS_H
21
22#include "ctree.h"
23
24void __init btrfs_props_init(void);
25
26int btrfs_set_prop(struct inode *inode,
27 const char *name,
28 const char *value,
29 size_t value_len,
30 int flags);
31
32int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
33
34int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
35 struct inode *inode,
36 struct inode *dir);
37
38int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
39 struct btrfs_root *root,
40 struct btrfs_root *parent_root);
41
42#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4e6ef490619e..472302a2d745 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -301,16 +301,16 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
301 301
302 if (btrfs_qgroup_status_version(l, ptr) != 302 if (btrfs_qgroup_status_version(l, ptr) !=
303 BTRFS_QGROUP_STATUS_VERSION) { 303 BTRFS_QGROUP_STATUS_VERSION) {
304 printk(KERN_ERR 304 btrfs_err(fs_info,
305 "btrfs: old qgroup version, quota disabled\n"); 305 "old qgroup version, quota disabled");
306 goto out; 306 goto out;
307 } 307 }
308 if (btrfs_qgroup_status_generation(l, ptr) != 308 if (btrfs_qgroup_status_generation(l, ptr) !=
309 fs_info->generation) { 309 fs_info->generation) {
310 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 310 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
311 printk(KERN_ERR 311 btrfs_err(fs_info,
312 "btrfs: qgroup generation mismatch, " 312 "qgroup generation mismatch, "
313 "marked as inconsistent\n"); 313 "marked as inconsistent");
314 } 314 }
315 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 315 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
316 ptr); 316 ptr);
@@ -325,7 +325,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
325 qgroup = find_qgroup_rb(fs_info, found_key.offset); 325 qgroup = find_qgroup_rb(fs_info, found_key.offset);
326 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 326 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
327 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 327 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
328 printk(KERN_ERR "btrfs: inconsitent qgroup config\n"); 328 btrfs_err(fs_info, "inconsitent qgroup config");
329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330 } 330 }
331 if (!qgroup) { 331 if (!qgroup) {
@@ -396,8 +396,8 @@ next1:
396 ret = add_relation_rb(fs_info, found_key.objectid, 396 ret = add_relation_rb(fs_info, found_key.objectid,
397 found_key.offset); 397 found_key.offset);
398 if (ret == -ENOENT) { 398 if (ret == -ENOENT) {
399 printk(KERN_WARNING 399 btrfs_warn(fs_info,
400 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", 400 "orphan qgroup relation 0x%llx->0x%llx",
401 found_key.objectid, found_key.offset); 401 found_key.objectid, found_key.offset);
402 ret = 0; /* ignore the error */ 402 ret = 0; /* ignore the error */
403 } 403 }
@@ -644,8 +644,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
644 644
645 l = path->nodes[0]; 645 l = path->nodes[0];
646 slot = path->slots[0]; 646 slot = path->slots[0];
647 qgroup_limit = btrfs_item_ptr(l, path->slots[0], 647 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
648 struct btrfs_qgroup_limit_item);
649 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); 648 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
650 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); 649 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
651 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); 650 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
@@ -687,8 +686,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
687 686
688 l = path->nodes[0]; 687 l = path->nodes[0];
689 slot = path->slots[0]; 688 slot = path->slots[0];
690 qgroup_info = btrfs_item_ptr(l, path->slots[0], 689 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
691 struct btrfs_qgroup_info_item);
692 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 690 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
693 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 691 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
694 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 692 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
@@ -1161,7 +1159,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1161 limit->rsv_excl); 1159 limit->rsv_excl);
1162 if (ret) { 1160 if (ret) {
1163 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1161 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1164 printk(KERN_INFO "unable to update quota limit for %llu\n", 1162 btrfs_info(fs_info, "unable to update quota limit for %llu",
1165 qgroupid); 1163 qgroupid);
1166 } 1164 }
1167 1165
@@ -1349,7 +1347,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1349 struct btrfs_delayed_ref_node *node, 1347 struct btrfs_delayed_ref_node *node,
1350 struct btrfs_delayed_extent_op *extent_op) 1348 struct btrfs_delayed_extent_op *extent_op)
1351{ 1349{
1352 struct btrfs_key ins;
1353 struct btrfs_root *quota_root; 1350 struct btrfs_root *quota_root;
1354 u64 ref_root; 1351 u64 ref_root;
1355 struct btrfs_qgroup *qgroup; 1352 struct btrfs_qgroup *qgroup;
@@ -1363,10 +1360,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1363 1360
1364 BUG_ON(!fs_info->quota_root); 1361 BUG_ON(!fs_info->quota_root);
1365 1362
1366 ins.objectid = node->bytenr;
1367 ins.offset = node->num_bytes;
1368 ins.type = BTRFS_EXTENT_ITEM_KEY;
1369
1370 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1363 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1371 node->type == BTRFS_SHARED_BLOCK_REF_KEY) { 1364 node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1372 struct btrfs_delayed_tree_ref *ref; 1365 struct btrfs_delayed_tree_ref *ref;
@@ -1840,7 +1833,9 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1840{ 1833{
1841 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 1834 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1842 return; 1835 return;
1843 pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n", 1836 btrfs_err(trans->root->fs_info,
1837 "qgroups not uptodate in trans handle %p: list is%s empty, "
1838 "seq is %#x.%x",
1844 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 1839 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1845 (u32)(trans->delayed_ref_elem.seq >> 32), 1840 (u32)(trans->delayed_ref_elem.seq >> 32),
1846 (u32)trans->delayed_ref_elem.seq); 1841 (u32)trans->delayed_ref_elem.seq);
@@ -1902,9 +1897,17 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1902 mutex_unlock(&fs_info->qgroup_rescan_lock); 1897 mutex_unlock(&fs_info->qgroup_rescan_lock);
1903 1898
1904 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 1899 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1900 u64 num_bytes;
1901
1905 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 1902 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1906 if (found.type != BTRFS_EXTENT_ITEM_KEY) 1903 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
1904 found.type != BTRFS_METADATA_ITEM_KEY)
1907 continue; 1905 continue;
1906 if (found.type == BTRFS_METADATA_ITEM_KEY)
1907 num_bytes = fs_info->extent_root->leafsize;
1908 else
1909 num_bytes = found.offset;
1910
1908 ret = btrfs_find_all_roots(trans, fs_info, found.objectid, 1911 ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1909 tree_mod_seq_elem.seq, &roots); 1912 tree_mod_seq_elem.seq, &roots);
1910 if (ret < 0) 1913 if (ret < 0)
@@ -1949,12 +1952,12 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1949 struct btrfs_qgroup_list *glist; 1952 struct btrfs_qgroup_list *glist;
1950 1953
1951 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux; 1954 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1952 qg->rfer += found.offset; 1955 qg->rfer += num_bytes;
1953 qg->rfer_cmpr += found.offset; 1956 qg->rfer_cmpr += num_bytes;
1954 WARN_ON(qg->tag >= seq); 1957 WARN_ON(qg->tag >= seq);
1955 if (qg->refcnt - seq == roots->nnodes) { 1958 if (qg->refcnt - seq == roots->nnodes) {
1956 qg->excl += found.offset; 1959 qg->excl += num_bytes;
1957 qg->excl_cmpr += found.offset; 1960 qg->excl_cmpr += num_bytes;
1958 } 1961 }
1959 qgroup_dirty(fs_info, qg); 1962 qgroup_dirty(fs_info, qg);
1960 1963
@@ -2037,10 +2040,10 @@ out:
2037 mutex_unlock(&fs_info->qgroup_rescan_lock); 2040 mutex_unlock(&fs_info->qgroup_rescan_lock);
2038 2041
2039 if (err >= 0) { 2042 if (err >= 0) {
2040 pr_info("btrfs: qgroup scan completed%s\n", 2043 btrfs_info(fs_info, "qgroup scan completed%s",
2041 err == 2 ? " (inconsistency flag cleared)" : ""); 2044 err == 2 ? " (inconsistency flag cleared)" : "");
2042 } else { 2045 } else {
2043 pr_err("btrfs: qgroup scan failed with %d\n", err); 2046 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2044 } 2047 }
2045 2048
2046 complete_all(&fs_info->qgroup_rescan_completion); 2049 complete_all(&fs_info->qgroup_rescan_completion);
@@ -2096,7 +2099,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2096 2099
2097 if (ret) { 2100 if (ret) {
2098err: 2101err:
2099 pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret); 2102 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2100 return ret; 2103 return ret;
2101 } 2104 }
2102 2105
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 1031b69252c5..31c797c48c3e 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -189,8 +189,8 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
189 */ 189 */
190#ifdef DEBUG 190#ifdef DEBUG
191 if (rec->generation != generation) { 191 if (rec->generation != generation) {
192 printk(KERN_DEBUG "generation mismatch for " 192 btrfs_debug(root->fs_info,
193 "(%llu,%d,%llu) %llu != %llu\n", 193 "generation mismatch for (%llu,%d,%llu) %llu != %llu",
194 key.objectid, key.type, key.offset, 194 key.objectid, key.type, key.offset,
195 rec->generation, generation); 195 rec->generation, generation);
196 } 196 }
@@ -365,8 +365,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
365 goto error; 365 goto error;
366 366
367 if (bbio->num_stripes > BTRFS_MAX_MIRRORS) { 367 if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
368 printk(KERN_ERR "btrfs readahead: more than %d copies not " 368 btrfs_err(root->fs_info,
369 "supported", BTRFS_MAX_MIRRORS); 369 "readahead: more than %d copies not supported",
370 BTRFS_MAX_MIRRORS);
370 goto error; 371 goto error;
371 } 372 }
372 373
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 429c73c374b8..07b3b36f40ee 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -94,6 +94,7 @@ struct backref_edge {
94 94
95#define LOWER 0 95#define LOWER 0
96#define UPPER 1 96#define UPPER 1
97#define RELOCATION_RESERVED_NODES 256
97 98
98struct backref_cache { 99struct backref_cache {
99 /* red black tree of all backref nodes in the cache */ 100 /* red black tree of all backref nodes in the cache */
@@ -176,6 +177,8 @@ struct reloc_control {
176 u64 merging_rsv_size; 177 u64 merging_rsv_size;
177 /* size of relocated tree nodes */ 178 /* size of relocated tree nodes */
178 u64 nodes_relocated; 179 u64 nodes_relocated;
180 /* reserved size for block group relocation*/
181 u64 reserved_bytes;
179 182
180 u64 search_start; 183 u64 search_start;
181 u64 extents_found; 184 u64 extents_found;
@@ -184,7 +187,6 @@ struct reloc_control {
184 unsigned int create_reloc_tree:1; 187 unsigned int create_reloc_tree:1;
185 unsigned int merge_reloc_tree:1; 188 unsigned int merge_reloc_tree:1;
186 unsigned int found_file_extent:1; 189 unsigned int found_file_extent:1;
187 unsigned int commit_transaction:1;
188}; 190};
189 191
190/* stages of data relocation */ 192/* stages of data relocation */
@@ -2309,9 +2311,6 @@ void free_reloc_roots(struct list_head *list)
2309 reloc_root = list_entry(list->next, struct btrfs_root, 2311 reloc_root = list_entry(list->next, struct btrfs_root,
2310 root_list); 2312 root_list);
2311 __del_reloc_root(reloc_root); 2313 __del_reloc_root(reloc_root);
2312 free_extent_buffer(reloc_root->node);
2313 free_extent_buffer(reloc_root->commit_root);
2314 kfree(reloc_root);
2315 } 2314 }
2316} 2315}
2317 2316
@@ -2353,10 +2352,9 @@ again:
2353 2352
2354 ret = merge_reloc_root(rc, root); 2353 ret = merge_reloc_root(rc, root);
2355 if (ret) { 2354 if (ret) {
2356 __del_reloc_root(reloc_root); 2355 if (list_empty(&reloc_root->root_list))
2357 free_extent_buffer(reloc_root->node); 2356 list_add_tail(&reloc_root->root_list,
2358 free_extent_buffer(reloc_root->commit_root); 2357 &reloc_roots);
2359 kfree(reloc_root);
2360 goto out; 2358 goto out;
2361 } 2359 }
2362 } else { 2360 } else {
@@ -2452,7 +2450,7 @@ static noinline_for_stack
2452struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 2450struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2453 struct reloc_control *rc, 2451 struct reloc_control *rc,
2454 struct backref_node *node, 2452 struct backref_node *node,
2455 struct backref_edge *edges[], int *nr) 2453 struct backref_edge *edges[])
2456{ 2454{
2457 struct backref_node *next; 2455 struct backref_node *next;
2458 struct btrfs_root *root; 2456 struct btrfs_root *root;
@@ -2494,7 +2492,6 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2494 if (!root) 2492 if (!root)
2495 return NULL; 2493 return NULL;
2496 2494
2497 *nr = index;
2498 next = node; 2495 next = node;
2499 /* setup backref node path for btrfs_reloc_cow_block */ 2496 /* setup backref node path for btrfs_reloc_cow_block */
2500 while (1) { 2497 while (1) {
@@ -2590,28 +2587,36 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2590 struct btrfs_root *root = rc->extent_root; 2587 struct btrfs_root *root = rc->extent_root;
2591 u64 num_bytes; 2588 u64 num_bytes;
2592 int ret; 2589 int ret;
2590 u64 tmp;
2593 2591
2594 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2592 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2595 2593
2596 trans->block_rsv = rc->block_rsv; 2594 trans->block_rsv = rc->block_rsv;
2597 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, 2595 rc->reserved_bytes += num_bytes;
2598 BTRFS_RESERVE_FLUSH_ALL); 2596 ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
2597 BTRFS_RESERVE_FLUSH_ALL);
2599 if (ret) { 2598 if (ret) {
2600 if (ret == -EAGAIN) 2599 if (ret == -EAGAIN) {
2601 rc->commit_transaction = 1; 2600 tmp = rc->extent_root->nodesize *
2601 RELOCATION_RESERVED_NODES;
2602 while (tmp <= rc->reserved_bytes)
2603 tmp <<= 1;
2604 /*
2605 * only one thread can access block_rsv at this point,
2606 * so we don't need hold lock to protect block_rsv.
2607 * we expand more reservation size here to allow enough
2608 * space for relocation and we will return eailer in
2609 * enospc case.
2610 */
2611 rc->block_rsv->size = tmp + rc->extent_root->nodesize *
2612 RELOCATION_RESERVED_NODES;
2613 }
2602 return ret; 2614 return ret;
2603 } 2615 }
2604 2616
2605 return 0; 2617 return 0;
2606} 2618}
2607 2619
2608static void release_metadata_space(struct reloc_control *rc,
2609 struct backref_node *node)
2610{
2611 u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
2612 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
2613}
2614
2615/* 2620/*
2616 * relocate a block tree, and then update pointers in upper level 2621 * relocate a block tree, and then update pointers in upper level
2617 * blocks that reference the block to point to the new location. 2622 * blocks that reference the block to point to the new location.
@@ -2633,7 +2638,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2633 u32 blocksize; 2638 u32 blocksize;
2634 u64 bytenr; 2639 u64 bytenr;
2635 u64 generation; 2640 u64 generation;
2636 int nr;
2637 int slot; 2641 int slot;
2638 int ret; 2642 int ret;
2639 int err = 0; 2643 int err = 0;
@@ -2646,7 +2650,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2646 cond_resched(); 2650 cond_resched();
2647 2651
2648 upper = edge->node[UPPER]; 2652 upper = edge->node[UPPER];
2649 root = select_reloc_root(trans, rc, upper, edges, &nr); 2653 root = select_reloc_root(trans, rc, upper, edges);
2650 BUG_ON(!root); 2654 BUG_ON(!root);
2651 2655
2652 if (upper->eb && !upper->locked) { 2656 if (upper->eb && !upper->locked) {
@@ -2898,7 +2902,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
2898 struct btrfs_path *path) 2902 struct btrfs_path *path)
2899{ 2903{
2900 struct btrfs_root *root; 2904 struct btrfs_root *root;
2901 int release = 0;
2902 int ret = 0; 2905 int ret = 0;
2903 2906
2904 if (!node) 2907 if (!node)
@@ -2915,7 +2918,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
2915 ret = reserve_metadata_space(trans, rc, node); 2918 ret = reserve_metadata_space(trans, rc, node);
2916 if (ret) 2919 if (ret)
2917 goto out; 2920 goto out;
2918 release = 1;
2919 } 2921 }
2920 2922
2921 if (root) { 2923 if (root) {
@@ -2940,11 +2942,8 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
2940 ret = do_relocation(trans, rc, node, key, path, 1); 2942 ret = do_relocation(trans, rc, node, key, path, 1);
2941 } 2943 }
2942out: 2944out:
2943 if (ret || node->level == 0 || node->cowonly) { 2945 if (ret || node->level == 0 || node->cowonly)
2944 if (release)
2945 release_metadata_space(rc, node);
2946 remove_backref_node(&rc->backref_cache, node); 2946 remove_backref_node(&rc->backref_cache, node);
2947 }
2948 return ret; 2947 return ret;
2949} 2948}
2950 2949
@@ -3867,29 +3866,20 @@ static noinline_for_stack
3867int prepare_to_relocate(struct reloc_control *rc) 3866int prepare_to_relocate(struct reloc_control *rc)
3868{ 3867{
3869 struct btrfs_trans_handle *trans; 3868 struct btrfs_trans_handle *trans;
3870 int ret;
3871 3869
3872 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, 3870 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
3873 BTRFS_BLOCK_RSV_TEMP); 3871 BTRFS_BLOCK_RSV_TEMP);
3874 if (!rc->block_rsv) 3872 if (!rc->block_rsv)
3875 return -ENOMEM; 3873 return -ENOMEM;
3876 3874
3877 /*
3878 * reserve some space for creating reloc trees.
3879 * btrfs_init_reloc_root will use them when there
3880 * is no reservation in transaction handle.
3881 */
3882 ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
3883 rc->extent_root->nodesize * 256,
3884 BTRFS_RESERVE_FLUSH_ALL);
3885 if (ret)
3886 return ret;
3887
3888 memset(&rc->cluster, 0, sizeof(rc->cluster)); 3875 memset(&rc->cluster, 0, sizeof(rc->cluster));
3889 rc->search_start = rc->block_group->key.objectid; 3876 rc->search_start = rc->block_group->key.objectid;
3890 rc->extents_found = 0; 3877 rc->extents_found = 0;
3891 rc->nodes_relocated = 0; 3878 rc->nodes_relocated = 0;
3892 rc->merging_rsv_size = 0; 3879 rc->merging_rsv_size = 0;
3880 rc->reserved_bytes = 0;
3881 rc->block_rsv->size = rc->extent_root->nodesize *
3882 RELOCATION_RESERVED_NODES;
3893 3883
3894 rc->create_reloc_tree = 1; 3884 rc->create_reloc_tree = 1;
3895 set_reloc_control(rc); 3885 set_reloc_control(rc);
@@ -3933,6 +3923,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3933 } 3923 }
3934 3924
3935 while (1) { 3925 while (1) {
3926 rc->reserved_bytes = 0;
3927 ret = btrfs_block_rsv_refill(rc->extent_root,
3928 rc->block_rsv, rc->block_rsv->size,
3929 BTRFS_RESERVE_FLUSH_ALL);
3930 if (ret) {
3931 err = ret;
3932 break;
3933 }
3936 progress++; 3934 progress++;
3937 trans = btrfs_start_transaction(rc->extent_root, 0); 3935 trans = btrfs_start_transaction(rc->extent_root, 0);
3938 if (IS_ERR(trans)) { 3936 if (IS_ERR(trans)) {
@@ -4011,6 +4009,12 @@ restart:
4011 if (!RB_EMPTY_ROOT(&blocks)) { 4009 if (!RB_EMPTY_ROOT(&blocks)) {
4012 ret = relocate_tree_blocks(trans, rc, &blocks); 4010 ret = relocate_tree_blocks(trans, rc, &blocks);
4013 if (ret < 0) { 4011 if (ret < 0) {
4012 /*
4013 * if we fail to relocate tree blocks, force to update
4014 * backref cache when committing transaction.
4015 */
4016 rc->backref_cache.last_trans = trans->transid - 1;
4017
4014 if (ret != -EAGAIN) { 4018 if (ret != -EAGAIN) {
4015 err = ret; 4019 err = ret;
4016 break; 4020 break;
@@ -4020,14 +4024,8 @@ restart:
4020 } 4024 }
4021 } 4025 }
4022 4026
4023 if (rc->commit_transaction) { 4027 btrfs_end_transaction_throttle(trans, rc->extent_root);
4024 rc->commit_transaction = 0; 4028 btrfs_btree_balance_dirty(rc->extent_root);
4025 ret = btrfs_commit_transaction(trans, rc->extent_root);
4026 BUG_ON(ret);
4027 } else {
4028 btrfs_end_transaction_throttle(trans, rc->extent_root);
4029 btrfs_btree_balance_dirty(rc->extent_root);
4030 }
4031 trans = NULL; 4029 trans = NULL;
4032 4030
4033 if (rc->stage == MOVE_DATA_EXTENTS && 4031 if (rc->stage == MOVE_DATA_EXTENTS &&
@@ -4247,7 +4245,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4247 goto out; 4245 goto out;
4248 } 4246 }
4249 4247
4250 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 4248 btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu",
4251 rc->block_group->key.objectid, rc->block_group->flags); 4249 rc->block_group->key.objectid, rc->block_group->flags);
4252 4250
4253 ret = btrfs_start_delalloc_roots(fs_info, 0); 4251 ret = btrfs_start_delalloc_roots(fs_info, 0);
@@ -4269,7 +4267,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4269 if (rc->extents_found == 0) 4267 if (rc->extents_found == 0)
4270 break; 4268 break;
4271 4269
4272 printk(KERN_INFO "btrfs: found %llu extents\n", 4270 btrfs_info(extent_root->fs_info, "found %llu extents",
4273 rc->extents_found); 4271 rc->extents_found);
4274 4272
4275 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4273 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
@@ -4285,11 +4283,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4285 } 4283 }
4286 } 4284 }
4287 4285
4288 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
4289 rc->block_group->key.objectid,
4290 rc->block_group->key.objectid +
4291 rc->block_group->key.offset - 1);
4292
4293 WARN_ON(rc->block_group->pinned > 0); 4286 WARN_ON(rc->block_group->pinned > 0);
4294 WARN_ON(rc->block_group->reserved > 0); 4287 WARN_ON(rc->block_group->reserved > 0);
4295 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 4288 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ec71ea44d2b4..1389b69059de 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -44,7 +44,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
44 if (!need_reset && btrfs_root_generation(item) 44 if (!need_reset && btrfs_root_generation(item)
45 != btrfs_root_generation_v2(item)) { 45 != btrfs_root_generation_v2(item)) {
46 if (btrfs_root_generation_v2(item) != 0) { 46 if (btrfs_root_generation_v2(item) != 0) {
47 printk(KERN_WARNING "btrfs: mismatching " 47 printk(KERN_WARNING "BTRFS: mismatching "
48 "generation and generation_v2 " 48 "generation and generation_v2 "
49 "found in root item. This root " 49 "found in root item. This root "
50 "was probably mounted with an " 50 "was probably mounted with an "
@@ -154,7 +154,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
154 154
155 if (ret != 0) { 155 if (ret != 0) {
156 btrfs_print_leaf(root, path->nodes[0]); 156 btrfs_print_leaf(root, path->nodes[0]);
157 printk(KERN_CRIT "unable to update root key %llu %u %llu\n", 157 btrfs_crit(root->fs_info, "unable to update root key %llu %u %llu",
158 key->objectid, key->type, key->offset); 158 key->objectid, key->type, key->offset);
159 BUG_ON(1); 159 BUG_ON(1);
160 } 160 }
@@ -400,21 +400,6 @@ out:
400 return err; 400 return err;
401} 401}
402 402
403int btrfs_find_root_ref(struct btrfs_root *tree_root,
404 struct btrfs_path *path,
405 u64 root_id, u64 ref_id)
406{
407 struct btrfs_key key;
408 int ret;
409
410 key.objectid = root_id;
411 key.type = BTRFS_ROOT_REF_KEY;
412 key.offset = ref_id;
413
414 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
415 return ret;
416}
417
418/* 403/*
419 * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY 404 * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
420 * or BTRFS_ROOT_BACKREF_KEY. 405 * or BTRFS_ROOT_BACKREF_KEY.
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bb9a928fa3a8..efba5d1282ee 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -256,6 +256,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
256static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 256static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
257 int mirror_num, u64 physical_for_dev_replace); 257 int mirror_num, u64 physical_for_dev_replace);
258static void copy_nocow_pages_worker(struct btrfs_work *work); 258static void copy_nocow_pages_worker(struct btrfs_work *work);
259static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
260static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
259 261
260 262
261static void scrub_pending_bio_inc(struct scrub_ctx *sctx) 263static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
@@ -269,6 +271,29 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
269 wake_up(&sctx->list_wait); 271 wake_up(&sctx->list_wait);
270} 272}
271 273
274static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
275{
276 while (atomic_read(&fs_info->scrub_pause_req)) {
277 mutex_unlock(&fs_info->scrub_lock);
278 wait_event(fs_info->scrub_pause_wait,
279 atomic_read(&fs_info->scrub_pause_req) == 0);
280 mutex_lock(&fs_info->scrub_lock);
281 }
282}
283
284static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
285{
286 atomic_inc(&fs_info->scrubs_paused);
287 wake_up(&fs_info->scrub_pause_wait);
288
289 mutex_lock(&fs_info->scrub_lock);
290 __scrub_blocked_if_needed(fs_info);
291 atomic_dec(&fs_info->scrubs_paused);
292 mutex_unlock(&fs_info->scrub_lock);
293
294 wake_up(&fs_info->scrub_pause_wait);
295}
296
272/* 297/*
273 * used for workers that require transaction commits (i.e., for the 298 * used for workers that require transaction commits (i.e., for the
274 * NOCOW case) 299 * NOCOW case)
@@ -480,7 +505,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
480 * hold all of the paths here 505 * hold all of the paths here
481 */ 506 */
482 for (i = 0; i < ipath->fspath->elem_cnt; ++i) 507 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
483 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " 508 printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
484 "%s, sector %llu, root %llu, inode %llu, offset %llu, " 509 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
485 "length %llu, links %u (path: %s)\n", swarn->errstr, 510 "length %llu, links %u (path: %s)\n", swarn->errstr,
486 swarn->logical, rcu_str_deref(swarn->dev->name), 511 swarn->logical, rcu_str_deref(swarn->dev->name),
@@ -492,7 +517,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
492 return 0; 517 return 0;
493 518
494err: 519err:
495 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " 520 printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
496 "%s, sector %llu, root %llu, inode %llu, offset %llu: path " 521 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
497 "resolving failed with ret=%d\n", swarn->errstr, 522 "resolving failed with ret=%d\n", swarn->errstr,
498 swarn->logical, rcu_str_deref(swarn->dev->name), 523 swarn->logical, rcu_str_deref(swarn->dev->name),
@@ -555,7 +580,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
555 ret = tree_backref_for_extent(&ptr, eb, ei, item_size, 580 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
556 &ref_root, &ref_level); 581 &ref_root, &ref_level);
557 printk_in_rcu(KERN_WARNING 582 printk_in_rcu(KERN_WARNING
558 "btrfs: %s at logical %llu on dev %s, " 583 "BTRFS: %s at logical %llu on dev %s, "
559 "sector %llu: metadata %s (level %d) in tree " 584 "sector %llu: metadata %s (level %d) in tree "
560 "%llu\n", errstr, swarn.logical, 585 "%llu\n", errstr, swarn.logical,
561 rcu_str_deref(dev->name), 586 rcu_str_deref(dev->name),
@@ -704,13 +729,11 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work)
704 struct scrub_fixup_nodatasum *fixup; 729 struct scrub_fixup_nodatasum *fixup;
705 struct scrub_ctx *sctx; 730 struct scrub_ctx *sctx;
706 struct btrfs_trans_handle *trans = NULL; 731 struct btrfs_trans_handle *trans = NULL;
707 struct btrfs_fs_info *fs_info;
708 struct btrfs_path *path; 732 struct btrfs_path *path;
709 int uncorrectable = 0; 733 int uncorrectable = 0;
710 734
711 fixup = container_of(work, struct scrub_fixup_nodatasum, work); 735 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
712 sctx = fixup->sctx; 736 sctx = fixup->sctx;
713 fs_info = fixup->root->fs_info;
714 737
715 path = btrfs_alloc_path(); 738 path = btrfs_alloc_path();
716 if (!path) { 739 if (!path) {
@@ -759,8 +782,8 @@ out:
759 btrfs_dev_replace_stats_inc( 782 btrfs_dev_replace_stats_inc(
760 &sctx->dev_root->fs_info->dev_replace. 783 &sctx->dev_root->fs_info->dev_replace.
761 num_uncorrectable_read_errors); 784 num_uncorrectable_read_errors);
762 printk_ratelimited_in_rcu(KERN_ERR 785 printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
763 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", 786 "unable to fixup (nodatasum) error at logical %llu on dev %s\n",
764 fixup->logical, rcu_str_deref(fixup->dev->name)); 787 fixup->logical, rcu_str_deref(fixup->dev->name));
765 } 788 }
766 789
@@ -1161,7 +1184,7 @@ corrected_error:
1161 sctx->stat.corrected_errors++; 1184 sctx->stat.corrected_errors++;
1162 spin_unlock(&sctx->stat_lock); 1185 spin_unlock(&sctx->stat_lock);
1163 printk_ratelimited_in_rcu(KERN_ERR 1186 printk_ratelimited_in_rcu(KERN_ERR
1164 "btrfs: fixed up error at logical %llu on dev %s\n", 1187 "BTRFS: fixed up error at logical %llu on dev %s\n",
1165 logical, rcu_str_deref(dev->name)); 1188 logical, rcu_str_deref(dev->name));
1166 } 1189 }
1167 } else { 1190 } else {
@@ -1170,7 +1193,7 @@ did_not_correct_error:
1170 sctx->stat.uncorrectable_errors++; 1193 sctx->stat.uncorrectable_errors++;
1171 spin_unlock(&sctx->stat_lock); 1194 spin_unlock(&sctx->stat_lock);
1172 printk_ratelimited_in_rcu(KERN_ERR 1195 printk_ratelimited_in_rcu(KERN_ERR
1173 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", 1196 "BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n",
1174 logical, rcu_str_deref(dev->name)); 1197 logical, rcu_str_deref(dev->name));
1175 } 1198 }
1176 1199
@@ -1418,8 +1441,9 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1418 int ret; 1441 int ret;
1419 1442
1420 if (!page_bad->dev->bdev) { 1443 if (!page_bad->dev->bdev) {
1421 printk_ratelimited(KERN_WARNING 1444 printk_ratelimited(KERN_WARNING "BTRFS: "
1422 "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n"); 1445 "scrub_repair_page_from_good_copy(bdev == NULL) "
1446 "is unexpected!\n");
1423 return -EIO; 1447 return -EIO;
1424 } 1448 }
1425 1449
@@ -1877,7 +1901,7 @@ static void scrub_submit(struct scrub_ctx *sctx)
1877 * This case is handled correctly (but _very_ slowly). 1901 * This case is handled correctly (but _very_ slowly).
1878 */ 1902 */
1879 printk_ratelimited(KERN_WARNING 1903 printk_ratelimited(KERN_WARNING
1880 "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n"); 1904 "BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
1881 bio_endio(sbio->bio, -EIO); 1905 bio_endio(sbio->bio, -EIO);
1882 } else { 1906 } else {
1883 btrfsic_submit_bio(READ, sbio->bio); 1907 btrfsic_submit_bio(READ, sbio->bio);
@@ -2286,8 +2310,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2286 2310
2287 wait_event(sctx->list_wait, 2311 wait_event(sctx->list_wait,
2288 atomic_read(&sctx->bios_in_flight) == 0); 2312 atomic_read(&sctx->bios_in_flight) == 0);
2289 atomic_inc(&fs_info->scrubs_paused); 2313 scrub_blocked_if_needed(fs_info);
2290 wake_up(&fs_info->scrub_pause_wait);
2291 2314
2292 /* FIXME it might be better to start readahead at commit root */ 2315 /* FIXME it might be better to start readahead at commit root */
2293 key_start.objectid = logical; 2316 key_start.objectid = logical;
@@ -2311,16 +2334,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2311 if (!IS_ERR(reada2)) 2334 if (!IS_ERR(reada2))
2312 btrfs_reada_wait(reada2); 2335 btrfs_reada_wait(reada2);
2313 2336
2314 mutex_lock(&fs_info->scrub_lock);
2315 while (atomic_read(&fs_info->scrub_pause_req)) {
2316 mutex_unlock(&fs_info->scrub_lock);
2317 wait_event(fs_info->scrub_pause_wait,
2318 atomic_read(&fs_info->scrub_pause_req) == 0);
2319 mutex_lock(&fs_info->scrub_lock);
2320 }
2321 atomic_dec(&fs_info->scrubs_paused);
2322 mutex_unlock(&fs_info->scrub_lock);
2323 wake_up(&fs_info->scrub_pause_wait);
2324 2337
2325 /* 2338 /*
2326 * collect all data csums for the stripe to avoid seeking during 2339 * collect all data csums for the stripe to avoid seeking during
@@ -2357,22 +2370,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2357 wait_event(sctx->list_wait, 2370 wait_event(sctx->list_wait,
2358 atomic_read(&sctx->bios_in_flight) == 0); 2371 atomic_read(&sctx->bios_in_flight) == 0);
2359 atomic_set(&sctx->wr_ctx.flush_all_writes, 0); 2372 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2360 atomic_inc(&fs_info->scrubs_paused); 2373 scrub_blocked_if_needed(fs_info);
2361 wake_up(&fs_info->scrub_pause_wait);
2362 mutex_lock(&fs_info->scrub_lock);
2363 while (atomic_read(&fs_info->scrub_pause_req)) {
2364 mutex_unlock(&fs_info->scrub_lock);
2365 wait_event(fs_info->scrub_pause_wait,
2366 atomic_read(&fs_info->scrub_pause_req) == 0);
2367 mutex_lock(&fs_info->scrub_lock);
2368 }
2369 atomic_dec(&fs_info->scrubs_paused);
2370 mutex_unlock(&fs_info->scrub_lock);
2371 wake_up(&fs_info->scrub_pause_wait);
2372 } 2374 }
2373 2375
2376 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2377 key.type = BTRFS_METADATA_ITEM_KEY;
2378 else
2379 key.type = BTRFS_EXTENT_ITEM_KEY;
2374 key.objectid = logical; 2380 key.objectid = logical;
2375 key.type = BTRFS_EXTENT_ITEM_KEY;
2376 key.offset = (u64)-1; 2381 key.offset = (u64)-1;
2377 2382
2378 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2383 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -2380,8 +2385,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2380 goto out; 2385 goto out;
2381 2386
2382 if (ret > 0) { 2387 if (ret > 0) {
2383 ret = btrfs_previous_item(root, path, 0, 2388 ret = btrfs_previous_extent_item(root, path, 0);
2384 BTRFS_EXTENT_ITEM_KEY);
2385 if (ret < 0) 2389 if (ret < 0)
2386 goto out; 2390 goto out;
2387 if (ret > 0) { 2391 if (ret > 0) {
@@ -2439,9 +2443,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2439 2443
2440 if (key.objectid < logical && 2444 if (key.objectid < logical &&
2441 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { 2445 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
2442 printk(KERN_ERR 2446 btrfs_err(fs_info,
2443 "btrfs scrub: tree block %llu spanning " 2447 "scrub: tree block %llu spanning "
2444 "stripes, ignored. logical=%llu\n", 2448 "stripes, ignored. logical=%llu",
2445 key.objectid, logical); 2449 key.objectid, logical);
2446 goto next; 2450 goto next;
2447 } 2451 }
@@ -2683,21 +2687,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2683 wait_event(sctx->list_wait, 2687 wait_event(sctx->list_wait,
2684 atomic_read(&sctx->bios_in_flight) == 0); 2688 atomic_read(&sctx->bios_in_flight) == 0);
2685 atomic_set(&sctx->wr_ctx.flush_all_writes, 0); 2689 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2686 atomic_inc(&fs_info->scrubs_paused);
2687 wake_up(&fs_info->scrub_pause_wait);
2688 wait_event(sctx->list_wait, 2690 wait_event(sctx->list_wait,
2689 atomic_read(&sctx->workers_pending) == 0); 2691 atomic_read(&sctx->workers_pending) == 0);
2690 2692 scrub_blocked_if_needed(fs_info);
2691 mutex_lock(&fs_info->scrub_lock);
2692 while (atomic_read(&fs_info->scrub_pause_req)) {
2693 mutex_unlock(&fs_info->scrub_lock);
2694 wait_event(fs_info->scrub_pause_wait,
2695 atomic_read(&fs_info->scrub_pause_req) == 0);
2696 mutex_lock(&fs_info->scrub_lock);
2697 }
2698 atomic_dec(&fs_info->scrubs_paused);
2699 mutex_unlock(&fs_info->scrub_lock);
2700 wake_up(&fs_info->scrub_pause_wait);
2701 2693
2702 btrfs_put_block_group(cache); 2694 btrfs_put_block_group(cache);
2703 if (ret) 2695 if (ret)
@@ -2823,8 +2815,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2823 * check some assumptions 2815 * check some assumptions
2824 */ 2816 */
2825 if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) { 2817 if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
2826 printk(KERN_ERR 2818 btrfs_err(fs_info,
2827 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n", 2819 "scrub: size assumption nodesize == leafsize (%d == %d) fails",
2828 fs_info->chunk_root->nodesize, 2820 fs_info->chunk_root->nodesize,
2829 fs_info->chunk_root->leafsize); 2821 fs_info->chunk_root->leafsize);
2830 return -EINVAL; 2822 return -EINVAL;
@@ -2836,16 +2828,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2836 * the way scrub is implemented. Do not handle this 2828 * the way scrub is implemented. Do not handle this
2837 * situation at all because it won't ever happen. 2829 * situation at all because it won't ever happen.
2838 */ 2830 */
2839 printk(KERN_ERR 2831 btrfs_err(fs_info,
2840 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n", 2832 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
2841 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN); 2833 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
2842 return -EINVAL; 2834 return -EINVAL;
2843 } 2835 }
2844 2836
2845 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { 2837 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
2846 /* not supported for data w/o checksums */ 2838 /* not supported for data w/o checksums */
2847 printk(KERN_ERR 2839 btrfs_err(fs_info,
2848 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n", 2840 "scrub: size assumption sectorsize != PAGE_SIZE "
2841 "(%d != %lu) fails",
2849 fs_info->chunk_root->sectorsize, PAGE_SIZE); 2842 fs_info->chunk_root->sectorsize, PAGE_SIZE);
2850 return -EINVAL; 2843 return -EINVAL;
2851 } 2844 }
@@ -2858,7 +2851,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2858 * would exhaust the array bounds of pagev member in 2851 * would exhaust the array bounds of pagev member in
2859 * struct scrub_block 2852 * struct scrub_block
2860 */ 2853 */
2861 pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n", 2854 btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize "
2855 "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
2862 fs_info->chunk_root->nodesize, 2856 fs_info->chunk_root->nodesize,
2863 SCRUB_MAX_PAGES_PER_BLOCK, 2857 SCRUB_MAX_PAGES_PER_BLOCK,
2864 fs_info->chunk_root->sectorsize, 2858 fs_info->chunk_root->sectorsize,
@@ -2908,7 +2902,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2908 } 2902 }
2909 sctx->readonly = readonly; 2903 sctx->readonly = readonly;
2910 dev->scrub_device = sctx; 2904 dev->scrub_device = sctx;
2905 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2911 2906
2907 /*
2908 * checking @scrub_pause_req here, we can avoid
2909 * race between committing transaction and scrubbing.
2910 */
2911 __scrub_blocked_if_needed(fs_info);
2912 atomic_inc(&fs_info->scrubs_running); 2912 atomic_inc(&fs_info->scrubs_running);
2913 mutex_unlock(&fs_info->scrub_lock); 2913 mutex_unlock(&fs_info->scrub_lock);
2914 2914
@@ -2917,9 +2917,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2917 * by holding device list mutex, we can 2917 * by holding device list mutex, we can
2918 * kick off writing super in log tree sync. 2918 * kick off writing super in log tree sync.
2919 */ 2919 */
2920 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2920 ret = scrub_supers(sctx, dev); 2921 ret = scrub_supers(sctx, dev);
2922 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2921 } 2923 }
2922 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2923 2924
2924 if (!ret) 2925 if (!ret)
2925 ret = scrub_enumerate_chunks(sctx, dev, start, end, 2926 ret = scrub_enumerate_chunks(sctx, dev, start, end,
@@ -3167,7 +3168,8 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3167 ret = iterate_inodes_from_logical(logical, fs_info, path, 3168 ret = iterate_inodes_from_logical(logical, fs_info, path,
3168 record_inode_for_nocow, nocow_ctx); 3169 record_inode_for_nocow, nocow_ctx);
3169 if (ret != 0 && ret != -ENOENT) { 3170 if (ret != 0 && ret != -ENOENT) {
3170 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n", 3171 btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, "
3172 "phys %llu, len %llu, mir %u, ret %d",
3171 logical, physical_for_dev_replace, len, mirror_num, 3173 logical, physical_for_dev_replace, len, mirror_num,
3172 ret); 3174 ret);
3173 not_written = 1; 3175 not_written = 1;
@@ -3289,7 +3291,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3289again: 3291again:
3290 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 3292 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
3291 if (!page) { 3293 if (!page) {
3292 pr_err("find_or_create_page() failed\n"); 3294 btrfs_err(fs_info, "find_or_create_page() failed");
3293 ret = -ENOMEM; 3295 ret = -ENOMEM;
3294 goto out; 3296 goto out;
3295 } 3297 }
@@ -3361,7 +3363,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
3361 return -EIO; 3363 return -EIO;
3362 if (!dev->bdev) { 3364 if (!dev->bdev) {
3363 printk_ratelimited(KERN_WARNING 3365 printk_ratelimited(KERN_WARNING
3364 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); 3366 "BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
3365 return -EIO; 3367 return -EIO;
3366 } 3368 }
3367 bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 3369 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 945d1db98f26..730dce395858 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -88,8 +88,6 @@ struct send_ctx {
88 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 88 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
89 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 89 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
90 90
91 struct vfsmount *mnt;
92
93 struct btrfs_root *send_root; 91 struct btrfs_root *send_root;
94 struct btrfs_root *parent_root; 92 struct btrfs_root *parent_root;
95 struct clone_root *clone_roots; 93 struct clone_root *clone_roots;
@@ -111,6 +109,7 @@ struct send_ctx {
111 int cur_inode_deleted; 109 int cur_inode_deleted;
112 u64 cur_inode_size; 110 u64 cur_inode_size;
113 u64 cur_inode_mode; 111 u64 cur_inode_mode;
112 u64 cur_inode_last_extent;
114 113
115 u64 send_progress; 114 u64 send_progress;
116 115
@@ -122,6 +121,74 @@ struct send_ctx {
122 int name_cache_size; 121 int name_cache_size;
123 122
124 char *read_buf; 123 char *read_buf;
124
125 /*
126 * We process inodes by their increasing order, so if before an
127 * incremental send we reverse the parent/child relationship of
128 * directories such that a directory with a lower inode number was
129 * the parent of a directory with a higher inode number, and the one
130 * becoming the new parent got renamed too, we can't rename/move the
131 * directory with lower inode number when we finish processing it - we
132 * must process the directory with higher inode number first, then
133 * rename/move it and then rename/move the directory with lower inode
134 * number. Example follows.
135 *
136 * Tree state when the first send was performed:
137 *
138 * .
139 * |-- a (ino 257)
140 * |-- b (ino 258)
141 * |
142 * |
143 * |-- c (ino 259)
144 * | |-- d (ino 260)
145 * |
146 * |-- c2 (ino 261)
147 *
148 * Tree state when the second (incremental) send is performed:
149 *
150 * .
151 * |-- a (ino 257)
152 * |-- b (ino 258)
153 * |-- c2 (ino 261)
154 * |-- d2 (ino 260)
155 * |-- cc (ino 259)
156 *
157 * The sequence of steps that lead to the second state was:
158 *
159 * mv /a/b/c/d /a/b/c2/d2
160 * mv /a/b/c /a/b/c2/d2/cc
161 *
162 * "c" has lower inode number, but we can't move it (2nd mv operation)
163 * before we move "d", which has higher inode number.
164 *
165 * So we just memorize which move/rename operations must be performed
166 * later when their respective parent is processed and moved/renamed.
167 */
168
169 /* Indexed by parent directory inode number. */
170 struct rb_root pending_dir_moves;
171
172 /*
173 * Reverse index, indexed by the inode number of a directory that
174 * is waiting for the move/rename of its immediate parent before its
175 * own move/rename can be performed.
176 */
177 struct rb_root waiting_dir_moves;
178};
179
180struct pending_dir_move {
181 struct rb_node node;
182 struct list_head list;
183 u64 parent_ino;
184 u64 ino;
185 u64 gen;
186 struct list_head update_refs;
187};
188
189struct waiting_dir_move {
190 struct rb_node node;
191 u64 ino;
125}; 192};
126 193
127struct name_cache_entry { 194struct name_cache_entry {
@@ -145,6 +212,15 @@ struct name_cache_entry {
145 char name[]; 212 char name[];
146}; 213};
147 214
215static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
216
217static int need_send_hole(struct send_ctx *sctx)
218{
219 return (sctx->parent_root && !sctx->cur_inode_new &&
220 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
221 S_ISREG(sctx->cur_inode_mode));
222}
223
148static void fs_path_reset(struct fs_path *p) 224static void fs_path_reset(struct fs_path *p)
149{ 225{
150 if (p->reversed) { 226 if (p->reversed) {
@@ -336,16 +412,6 @@ out:
336 return ret; 412 return ret;
337} 413}
338 414
339#if 0
340static void fs_path_remove(struct fs_path *p)
341{
342 BUG_ON(p->reversed);
343 while (p->start != p->end && *p->end != '/')
344 p->end--;
345 *p->end = 0;
346}
347#endif
348
349static int fs_path_copy(struct fs_path *p, struct fs_path *from) 415static int fs_path_copy(struct fs_path *p, struct fs_path *from)
350{ 416{
351 int ret; 417 int ret;
@@ -436,30 +502,15 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
436 return 0; 502 return 0;
437} 503}
438 504
439#if 0 505#define TLV_PUT_DEFINE_INT(bits) \
440static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value) 506 static int tlv_put_u##bits(struct send_ctx *sctx, \
441{ 507 u##bits attr, u##bits value) \
442 return tlv_put(sctx, attr, &value, sizeof(value)); 508 { \
443} 509 __le##bits __tmp = cpu_to_le##bits(value); \
444 510 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
445static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value) 511 }
446{
447 __le16 tmp = cpu_to_le16(value);
448 return tlv_put(sctx, attr, &tmp, sizeof(tmp));
449}
450
451static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
452{
453 __le32 tmp = cpu_to_le32(value);
454 return tlv_put(sctx, attr, &tmp, sizeof(tmp));
455}
456#endif
457 512
458static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value) 513TLV_PUT_DEFINE_INT(64)
459{
460 __le64 tmp = cpu_to_le64(value);
461 return tlv_put(sctx, attr, &tmp, sizeof(tmp));
462}
463 514
464static int tlv_put_string(struct send_ctx *sctx, u16 attr, 515static int tlv_put_string(struct send_ctx *sctx, u16 attr,
465 const char *str, int len) 516 const char *str, int len)
@@ -475,17 +526,6 @@ static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
475 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 526 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
476} 527}
477 528
478#if 0
479static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
480 struct timespec *ts)
481{
482 struct btrfs_timespec bts;
483 bts.sec = cpu_to_le64(ts->tv_sec);
484 bts.nsec = cpu_to_le32(ts->tv_nsec);
485 return tlv_put(sctx, attr, &bts, sizeof(bts));
486}
487#endif
488
489static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 529static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
490 struct extent_buffer *eb, 530 struct extent_buffer *eb,
491 struct btrfs_timespec *ts) 531 struct btrfs_timespec *ts)
@@ -533,12 +573,6 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
533 if (ret < 0) \ 573 if (ret < 0) \
534 goto tlv_put_failure; \ 574 goto tlv_put_failure; \
535 } while (0) 575 } while (0)
536#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
537 do { \
538 ret = tlv_put_timespec(sctx, attrtype, ts); \
539 if (ret < 0) \
540 goto tlv_put_failure; \
541 } while (0)
542#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 576#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
543 do { \ 577 do { \
544 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 578 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
@@ -1270,7 +1304,7 @@ static int find_extent_clone(struct send_ctx *sctx,
1270 if (!backref_ctx->found_itself) { 1304 if (!backref_ctx->found_itself) {
1271 /* found a bug in backref code? */ 1305 /* found a bug in backref code? */
1272 ret = -EIO; 1306 ret = -EIO;
1273 printk(KERN_ERR "btrfs: ERROR did not find backref in " 1307 btrfs_err(sctx->send_root->fs_info, "did not find backref in "
1274 "send_root. inode=%llu, offset=%llu, " 1308 "send_root. inode=%llu, offset=%llu, "
1275 "disk_byte=%llu found extent=%llu\n", 1309 "disk_byte=%llu found extent=%llu\n",
1276 ino, data_offset, disk_byte, found_key.objectid); 1310 ino, data_offset, disk_byte, found_key.objectid);
@@ -1343,7 +1377,7 @@ static int read_symlink(struct btrfs_root *root,
1343 BUG_ON(compression); 1377 BUG_ON(compression);
1344 1378
1345 off = btrfs_file_extent_inline_start(ei); 1379 off = btrfs_file_extent_inline_start(ei);
1346 len = btrfs_file_extent_inline_len(path->nodes[0], ei); 1380 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
1347 1381
1348 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1382 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1349 1383
@@ -1372,7 +1406,7 @@ static int gen_unique_name(struct send_ctx *sctx,
1372 return -ENOMEM; 1406 return -ENOMEM;
1373 1407
1374 while (1) { 1408 while (1) {
1375 len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu", 1409 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1376 ino, gen, idx); 1410 ino, gen, idx);
1377 if (len >= sizeof(tmp)) { 1411 if (len >= sizeof(tmp)) {
1378 /* should really not happen */ 1412 /* should really not happen */
@@ -1933,6 +1967,7 @@ static void name_cache_free(struct send_ctx *sctx)
1933 */ 1967 */
1934static int __get_cur_name_and_parent(struct send_ctx *sctx, 1968static int __get_cur_name_and_parent(struct send_ctx *sctx,
1935 u64 ino, u64 gen, 1969 u64 ino, u64 gen,
1970 int skip_name_cache,
1936 u64 *parent_ino, 1971 u64 *parent_ino,
1937 u64 *parent_gen, 1972 u64 *parent_gen,
1938 struct fs_path *dest) 1973 struct fs_path *dest)
@@ -1942,6 +1977,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
1942 struct btrfs_path *path = NULL; 1977 struct btrfs_path *path = NULL;
1943 struct name_cache_entry *nce = NULL; 1978 struct name_cache_entry *nce = NULL;
1944 1979
1980 if (skip_name_cache)
1981 goto get_ref;
1945 /* 1982 /*
1946 * First check if we already did a call to this function with the same 1983 * First check if we already did a call to this function with the same
1947 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 1984 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
@@ -1986,11 +2023,12 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
1986 goto out_cache; 2023 goto out_cache;
1987 } 2024 }
1988 2025
2026get_ref:
1989 /* 2027 /*
1990 * Depending on whether the inode was already processed or not, use 2028 * Depending on whether the inode was already processed or not, use
1991 * send_root or parent_root for ref lookup. 2029 * send_root or parent_root for ref lookup.
1992 */ 2030 */
1993 if (ino < sctx->send_progress) 2031 if (ino < sctx->send_progress && !skip_name_cache)
1994 ret = get_first_ref(sctx->send_root, ino, 2032 ret = get_first_ref(sctx->send_root, ino,
1995 parent_ino, parent_gen, dest); 2033 parent_ino, parent_gen, dest);
1996 else 2034 else
@@ -2014,6 +2052,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
2014 goto out; 2052 goto out;
2015 ret = 1; 2053 ret = 1;
2016 } 2054 }
2055 if (skip_name_cache)
2056 goto out;
2017 2057
2018out_cache: 2058out_cache:
2019 /* 2059 /*
@@ -2081,6 +2121,9 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2081 u64 parent_inode = 0; 2121 u64 parent_inode = 0;
2082 u64 parent_gen = 0; 2122 u64 parent_gen = 0;
2083 int stop = 0; 2123 int stop = 0;
2124 u64 start_ino = ino;
2125 u64 start_gen = gen;
2126 int skip_name_cache = 0;
2084 2127
2085 name = fs_path_alloc(); 2128 name = fs_path_alloc();
2086 if (!name) { 2129 if (!name) {
@@ -2088,19 +2131,32 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2088 goto out; 2131 goto out;
2089 } 2132 }
2090 2133
2134 if (is_waiting_for_move(sctx, ino))
2135 skip_name_cache = 1;
2136
2137again:
2091 dest->reversed = 1; 2138 dest->reversed = 1;
2092 fs_path_reset(dest); 2139 fs_path_reset(dest);
2093 2140
2094 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2141 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2095 fs_path_reset(name); 2142 fs_path_reset(name);
2096 2143
2097 ret = __get_cur_name_and_parent(sctx, ino, gen, 2144 ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache,
2098 &parent_inode, &parent_gen, name); 2145 &parent_inode, &parent_gen, name);
2099 if (ret < 0) 2146 if (ret < 0)
2100 goto out; 2147 goto out;
2101 if (ret) 2148 if (ret)
2102 stop = 1; 2149 stop = 1;
2103 2150
2151 if (!skip_name_cache &&
2152 is_waiting_for_move(sctx, parent_inode)) {
2153 ino = start_ino;
2154 gen = start_gen;
2155 stop = 0;
2156 skip_name_cache = 1;
2157 goto again;
2158 }
2159
2104 ret = fs_path_add_path(dest, name); 2160 ret = fs_path_add_path(dest, name);
2105 if (ret < 0) 2161 if (ret < 0)
2106 goto out; 2162 goto out;
@@ -2131,7 +2187,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
2131 char *name = NULL; 2187 char *name = NULL;
2132 int namelen; 2188 int namelen;
2133 2189
2134 path = alloc_path_for_send(); 2190 path = btrfs_alloc_path();
2135 if (!path) 2191 if (!path)
2136 return -ENOMEM; 2192 return -ENOMEM;
2137 2193
@@ -2180,12 +2236,12 @@ static int send_subvol_begin(struct send_ctx *sctx)
2180 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2236 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2181 sctx->send_root->root_item.uuid); 2237 sctx->send_root->root_item.uuid);
2182 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2238 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2183 sctx->send_root->root_item.ctransid); 2239 le64_to_cpu(sctx->send_root->root_item.ctransid));
2184 if (parent_root) { 2240 if (parent_root) {
2185 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2241 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2186 sctx->parent_root->root_item.uuid); 2242 sctx->parent_root->root_item.uuid);
2187 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2243 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2188 sctx->parent_root->root_item.ctransid); 2244 le64_to_cpu(sctx->parent_root->root_item.ctransid));
2189 } 2245 }
2190 2246
2191 ret = send_cmd(sctx); 2247 ret = send_cmd(sctx);
@@ -2672,10 +2728,349 @@ out:
2672 return ret; 2728 return ret;
2673} 2729}
2674 2730
2731static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
2732{
2733 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2734 struct waiting_dir_move *entry;
2735
2736 while (n) {
2737 entry = rb_entry(n, struct waiting_dir_move, node);
2738 if (ino < entry->ino)
2739 n = n->rb_left;
2740 else if (ino > entry->ino)
2741 n = n->rb_right;
2742 else
2743 return 1;
2744 }
2745 return 0;
2746}
2747
2748static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2749{
2750 struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
2751 struct rb_node *parent = NULL;
2752 struct waiting_dir_move *entry, *dm;
2753
2754 dm = kmalloc(sizeof(*dm), GFP_NOFS);
2755 if (!dm)
2756 return -ENOMEM;
2757 dm->ino = ino;
2758
2759 while (*p) {
2760 parent = *p;
2761 entry = rb_entry(parent, struct waiting_dir_move, node);
2762 if (ino < entry->ino) {
2763 p = &(*p)->rb_left;
2764 } else if (ino > entry->ino) {
2765 p = &(*p)->rb_right;
2766 } else {
2767 kfree(dm);
2768 return -EEXIST;
2769 }
2770 }
2771
2772 rb_link_node(&dm->node, parent, p);
2773 rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
2774 return 0;
2775}
2776
2777#ifdef CONFIG_BTRFS_ASSERT
2778
2779static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2780{
2781 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2782 struct waiting_dir_move *entry;
2783
2784 while (n) {
2785 entry = rb_entry(n, struct waiting_dir_move, node);
2786 if (ino < entry->ino) {
2787 n = n->rb_left;
2788 } else if (ino > entry->ino) {
2789 n = n->rb_right;
2790 } else {
2791 rb_erase(&entry->node, &sctx->waiting_dir_moves);
2792 kfree(entry);
2793 return 0;
2794 }
2795 }
2796 return -ENOENT;
2797}
2798
2799#endif
2800
2801static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
2802{
2803 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2804 struct rb_node *parent = NULL;
2805 struct pending_dir_move *entry, *pm;
2806 struct recorded_ref *cur;
2807 int exists = 0;
2808 int ret;
2809
2810 pm = kmalloc(sizeof(*pm), GFP_NOFS);
2811 if (!pm)
2812 return -ENOMEM;
2813 pm->parent_ino = parent_ino;
2814 pm->ino = sctx->cur_ino;
2815 pm->gen = sctx->cur_inode_gen;
2816 INIT_LIST_HEAD(&pm->list);
2817 INIT_LIST_HEAD(&pm->update_refs);
2818 RB_CLEAR_NODE(&pm->node);
2819
2820 while (*p) {
2821 parent = *p;
2822 entry = rb_entry(parent, struct pending_dir_move, node);
2823 if (parent_ino < entry->parent_ino) {
2824 p = &(*p)->rb_left;
2825 } else if (parent_ino > entry->parent_ino) {
2826 p = &(*p)->rb_right;
2827 } else {
2828 exists = 1;
2829 break;
2830 }
2831 }
2832
2833 list_for_each_entry(cur, &sctx->deleted_refs, list) {
2834 ret = dup_ref(cur, &pm->update_refs);
2835 if (ret < 0)
2836 goto out;
2837 }
2838 list_for_each_entry(cur, &sctx->new_refs, list) {
2839 ret = dup_ref(cur, &pm->update_refs);
2840 if (ret < 0)
2841 goto out;
2842 }
2843
2844 ret = add_waiting_dir_move(sctx, pm->ino);
2845 if (ret)
2846 goto out;
2847
2848 if (exists) {
2849 list_add_tail(&pm->list, &entry->list);
2850 } else {
2851 rb_link_node(&pm->node, parent, p);
2852 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
2853 }
2854 ret = 0;
2855out:
2856 if (ret) {
2857 __free_recorded_refs(&pm->update_refs);
2858 kfree(pm);
2859 }
2860 return ret;
2861}
2862
2863static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
2864 u64 parent_ino)
2865{
2866 struct rb_node *n = sctx->pending_dir_moves.rb_node;
2867 struct pending_dir_move *entry;
2868
2869 while (n) {
2870 entry = rb_entry(n, struct pending_dir_move, node);
2871 if (parent_ino < entry->parent_ino)
2872 n = n->rb_left;
2873 else if (parent_ino > entry->parent_ino)
2874 n = n->rb_right;
2875 else
2876 return entry;
2877 }
2878 return NULL;
2879}
2880
2881static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
2882{
2883 struct fs_path *from_path = NULL;
2884 struct fs_path *to_path = NULL;
2885 u64 orig_progress = sctx->send_progress;
2886 struct recorded_ref *cur;
2887 int ret;
2888
2889 from_path = fs_path_alloc();
2890 if (!from_path)
2891 return -ENOMEM;
2892
2893 sctx->send_progress = pm->ino;
2894 ret = get_cur_path(sctx, pm->ino, pm->gen, from_path);
2895 if (ret < 0)
2896 goto out;
2897
2898 to_path = fs_path_alloc();
2899 if (!to_path) {
2900 ret = -ENOMEM;
2901 goto out;
2902 }
2903
2904 sctx->send_progress = sctx->cur_ino + 1;
2905 ASSERT(del_waiting_dir_move(sctx, pm->ino) == 0);
2906 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
2907 if (ret < 0)
2908 goto out;
2909
2910 ret = send_rename(sctx, from_path, to_path);
2911 if (ret < 0)
2912 goto out;
2913
2914 ret = send_utimes(sctx, pm->ino, pm->gen);
2915 if (ret < 0)
2916 goto out;
2917
2918 /*
2919 * After rename/move, need to update the utimes of both new parent(s)
2920 * and old parent(s).
2921 */
2922 list_for_each_entry(cur, &pm->update_refs, list) {
2923 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
2924 if (ret < 0)
2925 goto out;
2926 }
2927
2928out:
2929 fs_path_free(from_path);
2930 fs_path_free(to_path);
2931 sctx->send_progress = orig_progress;
2932
2933 return ret;
2934}
2935
2936static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
2937{
2938 if (!list_empty(&m->list))
2939 list_del(&m->list);
2940 if (!RB_EMPTY_NODE(&m->node))
2941 rb_erase(&m->node, &sctx->pending_dir_moves);
2942 __free_recorded_refs(&m->update_refs);
2943 kfree(m);
2944}
2945
2946static void tail_append_pending_moves(struct pending_dir_move *moves,
2947 struct list_head *stack)
2948{
2949 if (list_empty(&moves->list)) {
2950 list_add_tail(&moves->list, stack);
2951 } else {
2952 LIST_HEAD(list);
2953 list_splice_init(&moves->list, &list);
2954 list_add_tail(&moves->list, stack);
2955 list_splice_tail(&list, stack);
2956 }
2957}
2958
2959static int apply_children_dir_moves(struct send_ctx *sctx)
2960{
2961 struct pending_dir_move *pm;
2962 struct list_head stack;
2963 u64 parent_ino = sctx->cur_ino;
2964 int ret = 0;
2965
2966 pm = get_pending_dir_moves(sctx, parent_ino);
2967 if (!pm)
2968 return 0;
2969
2970 INIT_LIST_HEAD(&stack);
2971 tail_append_pending_moves(pm, &stack);
2972
2973 while (!list_empty(&stack)) {
2974 pm = list_first_entry(&stack, struct pending_dir_move, list);
2975 parent_ino = pm->ino;
2976 ret = apply_dir_move(sctx, pm);
2977 free_pending_move(sctx, pm);
2978 if (ret)
2979 goto out;
2980 pm = get_pending_dir_moves(sctx, parent_ino);
2981 if (pm)
2982 tail_append_pending_moves(pm, &stack);
2983 }
2984 return 0;
2985
2986out:
2987 while (!list_empty(&stack)) {
2988 pm = list_first_entry(&stack, struct pending_dir_move, list);
2989 free_pending_move(sctx, pm);
2990 }
2991 return ret;
2992}
2993
2994static int wait_for_parent_move(struct send_ctx *sctx,
2995 struct recorded_ref *parent_ref)
2996{
2997 int ret;
2998 u64 ino = parent_ref->dir;
2999 u64 parent_ino_before, parent_ino_after;
3000 u64 new_gen, old_gen;
3001 struct fs_path *path_before = NULL;
3002 struct fs_path *path_after = NULL;
3003 int len1, len2;
3004
3005 if (parent_ref->dir <= sctx->cur_ino)
3006 return 0;
3007
3008 if (is_waiting_for_move(sctx, ino))
3009 return 1;
3010
3011 ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen,
3012 NULL, NULL, NULL, NULL);
3013 if (ret == -ENOENT)
3014 return 0;
3015 else if (ret < 0)
3016 return ret;
3017
3018 ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen,
3019 NULL, NULL, NULL, NULL);
3020 if (ret < 0)
3021 return ret;
3022
3023 if (new_gen != old_gen)
3024 return 0;
3025
3026 path_before = fs_path_alloc();
3027 if (!path_before)
3028 return -ENOMEM;
3029
3030 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3031 NULL, path_before);
3032 if (ret == -ENOENT) {
3033 ret = 0;
3034 goto out;
3035 } else if (ret < 0) {
3036 goto out;
3037 }
3038
3039 path_after = fs_path_alloc();
3040 if (!path_after) {
3041 ret = -ENOMEM;
3042 goto out;
3043 }
3044
3045 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3046 NULL, path_after);
3047 if (ret == -ENOENT) {
3048 ret = 0;
3049 goto out;
3050 } else if (ret < 0) {
3051 goto out;
3052 }
3053
3054 len1 = fs_path_len(path_before);
3055 len2 = fs_path_len(path_after);
3056 if ((parent_ino_before != parent_ino_after) && (len1 != len2 ||
3057 memcmp(path_before->start, path_after->start, len1))) {
3058 ret = 1;
3059 goto out;
3060 }
3061 ret = 0;
3062
3063out:
3064 fs_path_free(path_before);
3065 fs_path_free(path_after);
3066
3067 return ret;
3068}
3069
2675/* 3070/*
2676 * This does all the move/link/unlink/rmdir magic. 3071 * This does all the move/link/unlink/rmdir magic.
2677 */ 3072 */
2678static int process_recorded_refs(struct send_ctx *sctx) 3073static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
2679{ 3074{
2680 int ret = 0; 3075 int ret = 0;
2681 struct recorded_ref *cur; 3076 struct recorded_ref *cur;
@@ -2824,11 +3219,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2824 * dirs, we always have one new and one deleted 3219 * dirs, we always have one new and one deleted
2825 * ref. The deleted ref is ignored later. 3220 * ref. The deleted ref is ignored later.
2826 */ 3221 */
2827 ret = send_rename(sctx, valid_path, 3222 if (wait_for_parent_move(sctx, cur)) {
2828 cur->full_path); 3223 ret = add_pending_dir_move(sctx,
2829 if (ret < 0) 3224 cur->dir);
2830 goto out; 3225 *pending_move = 1;
2831 ret = fs_path_copy(valid_path, cur->full_path); 3226 } else {
3227 ret = send_rename(sctx, valid_path,
3228 cur->full_path);
3229 if (!ret)
3230 ret = fs_path_copy(valid_path,
3231 cur->full_path);
3232 }
2832 if (ret < 0) 3233 if (ret < 0)
2833 goto out; 3234 goto out;
2834 } else { 3235 } else {
@@ -3197,6 +3598,7 @@ static int process_all_refs(struct send_ctx *sctx,
3197 struct extent_buffer *eb; 3598 struct extent_buffer *eb;
3198 int slot; 3599 int slot;
3199 iterate_inode_ref_t cb; 3600 iterate_inode_ref_t cb;
3601 int pending_move = 0;
3200 3602
3201 path = alloc_path_for_send(); 3603 path = alloc_path_for_send();
3202 if (!path) 3604 if (!path)
@@ -3240,7 +3642,9 @@ static int process_all_refs(struct send_ctx *sctx,
3240 } 3642 }
3241 btrfs_release_path(path); 3643 btrfs_release_path(path);
3242 3644
3243 ret = process_recorded_refs(sctx); 3645 ret = process_recorded_refs(sctx, &pending_move);
3646 /* Only applicable to an incremental send. */
3647 ASSERT(pending_move == 0);
3244 3648
3245out: 3649out:
3246 btrfs_free_path(path); 3650 btrfs_free_path(path);
@@ -3706,7 +4110,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
3706 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4110 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
3707 clone_root->root->root_item.uuid); 4111 clone_root->root->root_item.uuid);
3708 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4112 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
3709 clone_root->root->root_item.ctransid); 4113 le64_to_cpu(clone_root->root->root_item.ctransid));
3710 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4114 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
3711 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4115 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
3712 clone_root->offset); 4116 clone_root->offset);
@@ -3752,6 +4156,39 @@ out:
3752 return ret; 4156 return ret;
3753} 4157}
3754 4158
4159static int send_hole(struct send_ctx *sctx, u64 end)
4160{
4161 struct fs_path *p = NULL;
4162 u64 offset = sctx->cur_inode_last_extent;
4163 u64 len;
4164 int ret = 0;
4165
4166 p = fs_path_alloc();
4167 if (!p)
4168 return -ENOMEM;
4169 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
4170 while (offset < end) {
4171 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
4172
4173 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4174 if (ret < 0)
4175 break;
4176 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4177 if (ret < 0)
4178 break;
4179 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4180 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4181 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
4182 ret = send_cmd(sctx);
4183 if (ret < 0)
4184 break;
4185 offset += len;
4186 }
4187tlv_put_failure:
4188 fs_path_free(p);
4189 return ret;
4190}
4191
3755static int send_write_or_clone(struct send_ctx *sctx, 4192static int send_write_or_clone(struct send_ctx *sctx,
3756 struct btrfs_path *path, 4193 struct btrfs_path *path,
3757 struct btrfs_key *key, 4194 struct btrfs_key *key,
@@ -3764,12 +4201,14 @@ static int send_write_or_clone(struct send_ctx *sctx,
3764 u64 len; 4201 u64 len;
3765 u32 l; 4202 u32 l;
3766 u8 type; 4203 u8 type;
4204 u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
3767 4205
3768 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4206 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3769 struct btrfs_file_extent_item); 4207 struct btrfs_file_extent_item);
3770 type = btrfs_file_extent_type(path->nodes[0], ei); 4208 type = btrfs_file_extent_type(path->nodes[0], ei);
3771 if (type == BTRFS_FILE_EXTENT_INLINE) { 4209 if (type == BTRFS_FILE_EXTENT_INLINE) {
3772 len = btrfs_file_extent_inline_len(path->nodes[0], ei); 4210 len = btrfs_file_extent_inline_len(path->nodes[0],
4211 path->slots[0], ei);
3773 /* 4212 /*
3774 * it is possible the inline item won't cover the whole page, 4213 * it is possible the inline item won't cover the whole page,
3775 * but there may be items after this page. Make 4214 * but there may be items after this page. Make
@@ -3787,7 +4226,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
3787 goto out; 4226 goto out;
3788 } 4227 }
3789 4228
3790 if (clone_root) { 4229 if (clone_root && IS_ALIGNED(offset + len, bs)) {
3791 ret = send_clone(sctx, offset, len, clone_root); 4230 ret = send_clone(sctx, offset, len, clone_root);
3792 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { 4231 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
3793 ret = send_update_extent(sctx, offset, len); 4232 ret = send_update_extent(sctx, offset, len);
@@ -3979,6 +4418,101 @@ out:
3979 return ret; 4418 return ret;
3980} 4419}
3981 4420
4421static int get_last_extent(struct send_ctx *sctx, u64 offset)
4422{
4423 struct btrfs_path *path;
4424 struct btrfs_root *root = sctx->send_root;
4425 struct btrfs_file_extent_item *fi;
4426 struct btrfs_key key;
4427 u64 extent_end;
4428 u8 type;
4429 int ret;
4430
4431 path = alloc_path_for_send();
4432 if (!path)
4433 return -ENOMEM;
4434
4435 sctx->cur_inode_last_extent = 0;
4436
4437 key.objectid = sctx->cur_ino;
4438 key.type = BTRFS_EXTENT_DATA_KEY;
4439 key.offset = offset;
4440 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
4441 if (ret < 0)
4442 goto out;
4443 ret = 0;
4444 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4445 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
4446 goto out;
4447
4448 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4449 struct btrfs_file_extent_item);
4450 type = btrfs_file_extent_type(path->nodes[0], fi);
4451 if (type == BTRFS_FILE_EXTENT_INLINE) {
4452 u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4453 path->slots[0], fi);
4454 extent_end = ALIGN(key.offset + size,
4455 sctx->send_root->sectorsize);
4456 } else {
4457 extent_end = key.offset +
4458 btrfs_file_extent_num_bytes(path->nodes[0], fi);
4459 }
4460 sctx->cur_inode_last_extent = extent_end;
4461out:
4462 btrfs_free_path(path);
4463 return ret;
4464}
4465
4466static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
4467 struct btrfs_key *key)
4468{
4469 struct btrfs_file_extent_item *fi;
4470 u64 extent_end;
4471 u8 type;
4472 int ret = 0;
4473
4474 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
4475 return 0;
4476
4477 if (sctx->cur_inode_last_extent == (u64)-1) {
4478 ret = get_last_extent(sctx, key->offset - 1);
4479 if (ret)
4480 return ret;
4481 }
4482
4483 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4484 struct btrfs_file_extent_item);
4485 type = btrfs_file_extent_type(path->nodes[0], fi);
4486 if (type == BTRFS_FILE_EXTENT_INLINE) {
4487 u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4488 path->slots[0], fi);
4489 extent_end = ALIGN(key->offset + size,
4490 sctx->send_root->sectorsize);
4491 } else {
4492 extent_end = key->offset +
4493 btrfs_file_extent_num_bytes(path->nodes[0], fi);
4494 }
4495
4496 if (path->slots[0] == 0 &&
4497 sctx->cur_inode_last_extent < key->offset) {
4498 /*
4499 * We might have skipped entire leafs that contained only
4500 * file extent items for our current inode. These leafs have
4501 * a generation number smaller (older) than the one in the
4502 * current leaf and the leaf our last extent came from, and
4503 * are located between these 2 leafs.
4504 */
4505 ret = get_last_extent(sctx, key->offset - 1);
4506 if (ret)
4507 return ret;
4508 }
4509
4510 if (sctx->cur_inode_last_extent < key->offset)
4511 ret = send_hole(sctx, key->offset);
4512 sctx->cur_inode_last_extent = extent_end;
4513 return ret;
4514}
4515
3982static int process_extent(struct send_ctx *sctx, 4516static int process_extent(struct send_ctx *sctx,
3983 struct btrfs_path *path, 4517 struct btrfs_path *path,
3984 struct btrfs_key *key) 4518 struct btrfs_key *key)
@@ -3995,7 +4529,7 @@ static int process_extent(struct send_ctx *sctx,
3995 goto out; 4529 goto out;
3996 if (ret) { 4530 if (ret) {
3997 ret = 0; 4531 ret = 0;
3998 goto out; 4532 goto out_hole;
3999 } 4533 }
4000 } else { 4534 } else {
4001 struct btrfs_file_extent_item *ei; 4535 struct btrfs_file_extent_item *ei;
@@ -4031,7 +4565,10 @@ static int process_extent(struct send_ctx *sctx,
4031 goto out; 4565 goto out;
4032 4566
4033 ret = send_write_or_clone(sctx, path, key, found_clone); 4567 ret = send_write_or_clone(sctx, path, key, found_clone);
4034 4568 if (ret)
4569 goto out;
4570out_hole:
4571 ret = maybe_send_hole(sctx, path, key);
4035out: 4572out:
4036 return ret; 4573 return ret;
4037} 4574}
@@ -4054,17 +4591,25 @@ static int process_all_extents(struct send_ctx *sctx)
4054 key.objectid = sctx->cmp_key->objectid; 4591 key.objectid = sctx->cmp_key->objectid;
4055 key.type = BTRFS_EXTENT_DATA_KEY; 4592 key.type = BTRFS_EXTENT_DATA_KEY;
4056 key.offset = 0; 4593 key.offset = 0;
4057 while (1) { 4594 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4058 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 4595 if (ret < 0)
4059 if (ret < 0) 4596 goto out;
4060 goto out;
4061 if (ret) {
4062 ret = 0;
4063 goto out;
4064 }
4065 4597
4598 while (1) {
4066 eb = path->nodes[0]; 4599 eb = path->nodes[0];
4067 slot = path->slots[0]; 4600 slot = path->slots[0];
4601
4602 if (slot >= btrfs_header_nritems(eb)) {
4603 ret = btrfs_next_leaf(root, path);
4604 if (ret < 0) {
4605 goto out;
4606 } else if (ret > 0) {
4607 ret = 0;
4608 break;
4609 }
4610 continue;
4611 }
4612
4068 btrfs_item_key_to_cpu(eb, &found_key, slot); 4613 btrfs_item_key_to_cpu(eb, &found_key, slot);
4069 4614
4070 if (found_key.objectid != key.objectid || 4615 if (found_key.objectid != key.objectid ||
@@ -4077,8 +4622,7 @@ static int process_all_extents(struct send_ctx *sctx)
4077 if (ret < 0) 4622 if (ret < 0)
4078 goto out; 4623 goto out;
4079 4624
4080 btrfs_release_path(path); 4625 path->slots[0]++;
4081 key.offset = found_key.offset + 1;
4082 } 4626 }
4083 4627
4084out: 4628out:
@@ -4086,7 +4630,9 @@ out:
4086 return ret; 4630 return ret;
4087} 4631}
4088 4632
4089static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) 4633static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
4634 int *pending_move,
4635 int *refs_processed)
4090{ 4636{
4091 int ret = 0; 4637 int ret = 0;
4092 4638
@@ -4098,17 +4644,11 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
4098 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 4644 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
4099 goto out; 4645 goto out;
4100 4646
4101 ret = process_recorded_refs(sctx); 4647 ret = process_recorded_refs(sctx, pending_move);
4102 if (ret < 0) 4648 if (ret < 0)
4103 goto out; 4649 goto out;
4104 4650
4105 /* 4651 *refs_processed = 1;
4106 * We have processed the refs and thus need to advance send_progress.
4107 * Now, calls to get_cur_xxx will take the updated refs of the current
4108 * inode into account.
4109 */
4110 sctx->send_progress = sctx->cur_ino + 1;
4111
4112out: 4652out:
4113 return ret; 4653 return ret;
4114} 4654}
@@ -4124,11 +4664,29 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4124 u64 right_gid; 4664 u64 right_gid;
4125 int need_chmod = 0; 4665 int need_chmod = 0;
4126 int need_chown = 0; 4666 int need_chown = 0;
4667 int pending_move = 0;
4668 int refs_processed = 0;
4127 4669
4128 ret = process_recorded_refs_if_needed(sctx, at_end); 4670 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
4671 &refs_processed);
4129 if (ret < 0) 4672 if (ret < 0)
4130 goto out; 4673 goto out;
4131 4674
4675 /*
4676 * We have processed the refs and thus need to advance send_progress.
4677 * Now, calls to get_cur_xxx will take the updated refs of the current
4678 * inode into account.
4679 *
4680 * On the other hand, if our current inode is a directory and couldn't
4681 * be moved/renamed because its parent was renamed/moved too and it has
4682 * a higher inode number, we can only move/rename our current inode
4683 * after we moved/renamed its parent. Therefore in this case operate on
4684 * the old path (pre move/rename) of our current inode, and the
4685 * move/rename will be performed later.
4686 */
4687 if (refs_processed && !pending_move)
4688 sctx->send_progress = sctx->cur_ino + 1;
4689
4132 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 4690 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4133 goto out; 4691 goto out;
4134 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 4692 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
@@ -4157,6 +4715,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4157 } 4715 }
4158 4716
4159 if (S_ISREG(sctx->cur_inode_mode)) { 4717 if (S_ISREG(sctx->cur_inode_mode)) {
4718 if (need_send_hole(sctx)) {
4719 if (sctx->cur_inode_last_extent == (u64)-1) {
4720 ret = get_last_extent(sctx, (u64)-1);
4721 if (ret)
4722 goto out;
4723 }
4724 if (sctx->cur_inode_last_extent <
4725 sctx->cur_inode_size) {
4726 ret = send_hole(sctx, sctx->cur_inode_size);
4727 if (ret)
4728 goto out;
4729 }
4730 }
4160 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 4731 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4161 sctx->cur_inode_size); 4732 sctx->cur_inode_size);
4162 if (ret < 0) 4733 if (ret < 0)
@@ -4177,9 +4748,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4177 } 4748 }
4178 4749
4179 /* 4750 /*
4180 * Need to send that every time, no matter if it actually changed 4751 * If other directory inodes depended on our current directory
4181 * between the two trees as we have done changes to the inode before. 4752 * inode's move/rename, now do their move/rename operations.
4753 */
4754 if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
4755 ret = apply_children_dir_moves(sctx);
4756 if (ret)
4757 goto out;
4758 }
4759
4760 /*
4761 * Need to send that every time, no matter if it actually
4762 * changed between the two trees as we have done changes to
4763 * the inode before.
4182 */ 4764 */
4765 sctx->send_progress = sctx->cur_ino + 1;
4183 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 4766 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4184 if (ret < 0) 4767 if (ret < 0)
4185 goto out; 4768 goto out;
@@ -4200,6 +4783,7 @@ static int changed_inode(struct send_ctx *sctx,
4200 4783
4201 sctx->cur_ino = key->objectid; 4784 sctx->cur_ino = key->objectid;
4202 sctx->cur_inode_new_gen = 0; 4785 sctx->cur_inode_new_gen = 0;
4786 sctx->cur_inode_last_extent = (u64)-1;
4203 4787
4204 /* 4788 /*
4205 * Set send_progress to current inode. This will tell all get_cur_xxx 4789 * Set send_progress to current inode. This will tell all get_cur_xxx
@@ -4480,14 +5064,18 @@ static int changed_cb(struct btrfs_root *left_root,
4480 struct send_ctx *sctx = ctx; 5064 struct send_ctx *sctx = ctx;
4481 5065
4482 if (result == BTRFS_COMPARE_TREE_SAME) { 5066 if (result == BTRFS_COMPARE_TREE_SAME) {
4483 if (key->type != BTRFS_INODE_REF_KEY && 5067 if (key->type == BTRFS_INODE_REF_KEY ||
4484 key->type != BTRFS_INODE_EXTREF_KEY) 5068 key->type == BTRFS_INODE_EXTREF_KEY) {
4485 return 0; 5069 ret = compare_refs(sctx, left_path, key);
4486 ret = compare_refs(sctx, left_path, key); 5070 if (!ret)
4487 if (!ret) 5071 return 0;
5072 if (ret < 0)
5073 return ret;
5074 } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
5075 return maybe_send_hole(sctx, left_path, key);
5076 } else {
4488 return 0; 5077 return 0;
4489 if (ret < 0) 5078 }
4490 return ret;
4491 result = BTRFS_COMPARE_TREE_CHANGED; 5079 result = BTRFS_COMPARE_TREE_CHANGED;
4492 ret = 0; 5080 ret = 0;
4493 } 5081 }
@@ -4522,7 +5110,6 @@ out:
4522static int full_send_tree(struct send_ctx *sctx) 5110static int full_send_tree(struct send_ctx *sctx)
4523{ 5111{
4524 int ret; 5112 int ret;
4525 struct btrfs_trans_handle *trans = NULL;
4526 struct btrfs_root *send_root = sctx->send_root; 5113 struct btrfs_root *send_root = sctx->send_root;
4527 struct btrfs_key key; 5114 struct btrfs_key key;
4528 struct btrfs_key found_key; 5115 struct btrfs_key found_key;
@@ -4544,19 +5131,6 @@ static int full_send_tree(struct send_ctx *sctx)
4544 key.type = BTRFS_INODE_ITEM_KEY; 5131 key.type = BTRFS_INODE_ITEM_KEY;
4545 key.offset = 0; 5132 key.offset = 0;
4546 5133
4547join_trans:
4548 /*
4549 * We need to make sure the transaction does not get committed
4550 * while we do anything on commit roots. Join a transaction to prevent
4551 * this.
4552 */
4553 trans = btrfs_join_transaction(send_root);
4554 if (IS_ERR(trans)) {
4555 ret = PTR_ERR(trans);
4556 trans = NULL;
4557 goto out;
4558 }
4559
4560 /* 5134 /*
4561 * Make sure the tree has not changed after re-joining. We detect this 5135 * Make sure the tree has not changed after re-joining. We detect this
4562 * by comparing start_ctransid and ctransid. They should always match. 5136 * by comparing start_ctransid and ctransid. They should always match.
@@ -4566,7 +5140,7 @@ join_trans:
4566 spin_unlock(&send_root->root_item_lock); 5140 spin_unlock(&send_root->root_item_lock);
4567 5141
4568 if (ctransid != start_ctransid) { 5142 if (ctransid != start_ctransid) {
4569 WARN(1, KERN_WARNING "btrfs: the root that you're trying to " 5143 WARN(1, KERN_WARNING "BTRFS: the root that you're trying to "
4570 "send was modified in between. This is " 5144 "send was modified in between. This is "
4571 "probably a bug.\n"); 5145 "probably a bug.\n");
4572 ret = -EIO; 5146 ret = -EIO;
@@ -4580,19 +5154,6 @@ join_trans:
4580 goto out_finish; 5154 goto out_finish;
4581 5155
4582 while (1) { 5156 while (1) {
4583 /*
4584 * When someone want to commit while we iterate, end the
4585 * joined transaction and rejoin.
4586 */
4587 if (btrfs_should_end_transaction(trans, send_root)) {
4588 ret = btrfs_end_transaction(trans, send_root);
4589 trans = NULL;
4590 if (ret < 0)
4591 goto out;
4592 btrfs_release_path(path);
4593 goto join_trans;
4594 }
4595
4596 eb = path->nodes[0]; 5157 eb = path->nodes[0];
4597 slot = path->slots[0]; 5158 slot = path->slots[0];
4598 btrfs_item_key_to_cpu(eb, &found_key, slot); 5159 btrfs_item_key_to_cpu(eb, &found_key, slot);
@@ -4620,12 +5181,6 @@ out_finish:
4620 5181
4621out: 5182out:
4622 btrfs_free_path(path); 5183 btrfs_free_path(path);
4623 if (trans) {
4624 if (!ret)
4625 ret = btrfs_end_transaction(trans, send_root);
4626 else
4627 btrfs_end_transaction(trans, send_root);
4628 }
4629 return ret; 5184 return ret;
4630} 5185}
4631 5186
@@ -4662,6 +5217,21 @@ out:
4662 return ret; 5217 return ret;
4663} 5218}
4664 5219
5220static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
5221{
5222 spin_lock(&root->root_item_lock);
5223 root->send_in_progress--;
5224 /*
5225 * Not much left to do, we don't know why it's unbalanced and
5226 * can't blindly reset it to 0.
5227 */
5228 if (root->send_in_progress < 0)
5229 btrfs_err(root->fs_info,
5230 "send_in_progres unbalanced %d root %llu\n",
5231 root->send_in_progress, root->root_key.objectid);
5232 spin_unlock(&root->root_item_lock);
5233}
5234
4665long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 5235long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4666{ 5236{
4667 int ret = 0; 5237 int ret = 0;
@@ -4673,6 +5243,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4673 struct send_ctx *sctx = NULL; 5243 struct send_ctx *sctx = NULL;
4674 u32 i; 5244 u32 i;
4675 u64 *clone_sources_tmp = NULL; 5245 u64 *clone_sources_tmp = NULL;
5246 int clone_sources_to_rollback = 0;
5247 int sort_clone_roots = 0;
5248 int index;
4676 5249
4677 if (!capable(CAP_SYS_ADMIN)) 5250 if (!capable(CAP_SYS_ADMIN))
4678 return -EPERM; 5251 return -EPERM;
@@ -4681,38 +5254,26 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4681 fs_info = send_root->fs_info; 5254 fs_info = send_root->fs_info;
4682 5255
4683 /* 5256 /*
5257 * The subvolume must remain read-only during send, protect against
5258 * making it RW.
5259 */
5260 spin_lock(&send_root->root_item_lock);
5261 send_root->send_in_progress++;
5262 spin_unlock(&send_root->root_item_lock);
5263
5264 /*
4684 * This is done when we lookup the root, it should already be complete 5265 * This is done when we lookup the root, it should already be complete
4685 * by the time we get here. 5266 * by the time we get here.
4686 */ 5267 */
4687 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 5268 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
4688 5269
4689 /* 5270 /*
4690 * If we just created this root we need to make sure that the orphan 5271 * Userspace tools do the checks and warn the user if it's
4691 * cleanup has been done and committed since we search the commit root, 5272 * not RO.
4692 * so check its commit root transid with our otransid and if they match
4693 * commit the transaction to make sure everything is updated.
4694 */ 5273 */
4695 down_read(&send_root->fs_info->extent_commit_sem); 5274 if (!btrfs_root_readonly(send_root)) {
4696 if (btrfs_header_generation(send_root->commit_root) == 5275 ret = -EPERM;
4697 btrfs_root_otransid(&send_root->root_item)) { 5276 goto out;
4698 struct btrfs_trans_handle *trans;
4699
4700 up_read(&send_root->fs_info->extent_commit_sem);
4701
4702 trans = btrfs_attach_transaction_barrier(send_root);
4703 if (IS_ERR(trans)) {
4704 if (PTR_ERR(trans) != -ENOENT) {
4705 ret = PTR_ERR(trans);
4706 goto out;
4707 }
4708 /* ENOENT means theres no transaction */
4709 } else {
4710 ret = btrfs_commit_transaction(trans, send_root);
4711 if (ret)
4712 goto out;
4713 }
4714 } else {
4715 up_read(&send_root->fs_info->extent_commit_sem);
4716 } 5277 }
4717 5278
4718 arg = memdup_user(arg_, sizeof(*arg)); 5279 arg = memdup_user(arg_, sizeof(*arg));
@@ -4753,8 +5314,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4753 goto out; 5314 goto out;
4754 } 5315 }
4755 5316
4756 sctx->mnt = mnt_file->f_path.mnt;
4757
4758 sctx->send_root = send_root; 5317 sctx->send_root = send_root;
4759 sctx->clone_roots_cnt = arg->clone_sources_count; 5318 sctx->clone_roots_cnt = arg->clone_sources_count;
4760 5319
@@ -4771,6 +5330,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4771 goto out; 5330 goto out;
4772 } 5331 }
4773 5332
5333 sctx->pending_dir_moves = RB_ROOT;
5334 sctx->waiting_dir_moves = RB_ROOT;
5335
4774 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 5336 sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
4775 (arg->clone_sources_count + 1)); 5337 (arg->clone_sources_count + 1));
4776 if (!sctx->clone_roots) { 5338 if (!sctx->clone_roots) {
@@ -4798,11 +5360,27 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4798 key.objectid = clone_sources_tmp[i]; 5360 key.objectid = clone_sources_tmp[i];
4799 key.type = BTRFS_ROOT_ITEM_KEY; 5361 key.type = BTRFS_ROOT_ITEM_KEY;
4800 key.offset = (u64)-1; 5362 key.offset = (u64)-1;
5363
5364 index = srcu_read_lock(&fs_info->subvol_srcu);
5365
4801 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 5366 clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
4802 if (IS_ERR(clone_root)) { 5367 if (IS_ERR(clone_root)) {
5368 srcu_read_unlock(&fs_info->subvol_srcu, index);
4803 ret = PTR_ERR(clone_root); 5369 ret = PTR_ERR(clone_root);
4804 goto out; 5370 goto out;
4805 } 5371 }
5372 clone_sources_to_rollback = i + 1;
5373 spin_lock(&clone_root->root_item_lock);
5374 clone_root->send_in_progress++;
5375 if (!btrfs_root_readonly(clone_root)) {
5376 spin_unlock(&clone_root->root_item_lock);
5377 srcu_read_unlock(&fs_info->subvol_srcu, index);
5378 ret = -EPERM;
5379 goto out;
5380 }
5381 spin_unlock(&clone_root->root_item_lock);
5382 srcu_read_unlock(&fs_info->subvol_srcu, index);
5383
4806 sctx->clone_roots[i].root = clone_root; 5384 sctx->clone_roots[i].root = clone_root;
4807 } 5385 }
4808 vfree(clone_sources_tmp); 5386 vfree(clone_sources_tmp);
@@ -4813,11 +5391,27 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4813 key.objectid = arg->parent_root; 5391 key.objectid = arg->parent_root;
4814 key.type = BTRFS_ROOT_ITEM_KEY; 5392 key.type = BTRFS_ROOT_ITEM_KEY;
4815 key.offset = (u64)-1; 5393 key.offset = (u64)-1;
5394
5395 index = srcu_read_lock(&fs_info->subvol_srcu);
5396
4816 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 5397 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
4817 if (IS_ERR(sctx->parent_root)) { 5398 if (IS_ERR(sctx->parent_root)) {
5399 srcu_read_unlock(&fs_info->subvol_srcu, index);
4818 ret = PTR_ERR(sctx->parent_root); 5400 ret = PTR_ERR(sctx->parent_root);
4819 goto out; 5401 goto out;
4820 } 5402 }
5403
5404 spin_lock(&sctx->parent_root->root_item_lock);
5405 sctx->parent_root->send_in_progress++;
5406 if (!btrfs_root_readonly(sctx->parent_root)) {
5407 spin_unlock(&sctx->parent_root->root_item_lock);
5408 srcu_read_unlock(&fs_info->subvol_srcu, index);
5409 ret = -EPERM;
5410 goto out;
5411 }
5412 spin_unlock(&sctx->parent_root->root_item_lock);
5413
5414 srcu_read_unlock(&fs_info->subvol_srcu, index);
4821 } 5415 }
4822 5416
4823 /* 5417 /*
@@ -4831,6 +5425,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4831 sort(sctx->clone_roots, sctx->clone_roots_cnt, 5425 sort(sctx->clone_roots, sctx->clone_roots_cnt,
4832 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 5426 sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
4833 NULL); 5427 NULL);
5428 sort_clone_roots = 1;
4834 5429
4835 ret = send_subvol(sctx); 5430 ret = send_subvol(sctx);
4836 if (ret < 0) 5431 if (ret < 0)
@@ -4846,6 +5441,48 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4846 } 5441 }
4847 5442
4848out: 5443out:
5444 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
5445 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
5446 struct rb_node *n;
5447 struct pending_dir_move *pm;
5448
5449 n = rb_first(&sctx->pending_dir_moves);
5450 pm = rb_entry(n, struct pending_dir_move, node);
5451 while (!list_empty(&pm->list)) {
5452 struct pending_dir_move *pm2;
5453
5454 pm2 = list_first_entry(&pm->list,
5455 struct pending_dir_move, list);
5456 free_pending_move(sctx, pm2);
5457 }
5458 free_pending_move(sctx, pm);
5459 }
5460
5461 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
5462 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
5463 struct rb_node *n;
5464 struct waiting_dir_move *dm;
5465
5466 n = rb_first(&sctx->waiting_dir_moves);
5467 dm = rb_entry(n, struct waiting_dir_move, node);
5468 rb_erase(&dm->node, &sctx->waiting_dir_moves);
5469 kfree(dm);
5470 }
5471
5472 if (sort_clone_roots) {
5473 for (i = 0; i < sctx->clone_roots_cnt; i++)
5474 btrfs_root_dec_send_in_progress(
5475 sctx->clone_roots[i].root);
5476 } else {
5477 for (i = 0; sctx && i < clone_sources_to_rollback; i++)
5478 btrfs_root_dec_send_in_progress(
5479 sctx->clone_roots[i].root);
5480
5481 btrfs_root_dec_send_in_progress(send_root);
5482 }
5483 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
5484 btrfs_root_dec_send_in_progress(sctx->parent_root);
5485
4849 kfree(arg); 5486 kfree(arg);
4850 vfree(clone_sources_tmp); 5487 vfree(clone_sources_tmp);
4851 5488
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d71a11d13dfa..c02f63356895 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -48,6 +48,8 @@
48#include "transaction.h" 48#include "transaction.h"
49#include "btrfs_inode.h" 49#include "btrfs_inode.h"
50#include "print-tree.h" 50#include "print-tree.h"
51#include "hash.h"
52#include "props.h"
51#include "xattr.h" 53#include "xattr.h"
52#include "volumes.h" 54#include "volumes.h"
53#include "export.h" 55#include "export.h"
@@ -152,11 +154,12 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
152 vaf.fmt = fmt; 154 vaf.fmt = fmt;
153 vaf.va = &args; 155 vaf.va = &args;
154 156
155 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s (%pV)\n", 157 printk(KERN_CRIT
158 "BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
156 sb->s_id, function, line, errno, errstr, &vaf); 159 sb->s_id, function, line, errno, errstr, &vaf);
157 va_end(args); 160 va_end(args);
158 } else { 161 } else {
159 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s\n", 162 printk(KERN_CRIT "BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
160 sb->s_id, function, line, errno, errstr); 163 sb->s_id, function, line, errno, errstr);
161 } 164 }
162 165
@@ -250,7 +253,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
250 */ 253 */
251 if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, 254 if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
252 &root->fs_info->fs_state)) { 255 &root->fs_info->fs_state)) {
253 WARN(1, KERN_DEBUG "btrfs: Transaction aborted (error %d)\n", 256 WARN(1, KERN_DEBUG "BTRFS: Transaction aborted (error %d)\n",
254 errno); 257 errno);
255 } 258 }
256 trans->aborted = errno; 259 trans->aborted = errno;
@@ -294,8 +297,8 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
294 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n", 297 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
295 s_id, function, line, &vaf, errno, errstr); 298 s_id, function, line, &vaf, errno, errstr);
296 299
297 printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n", 300 btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
298 s_id, function, line, &vaf, errno, errstr); 301 function, line, &vaf, errno, errstr);
299 va_end(args); 302 va_end(args);
300 /* Caller calls BUG() */ 303 /* Caller calls BUG() */
301} 304}
@@ -322,7 +325,9 @@ enum {
322 Opt_no_space_cache, Opt_recovery, Opt_skip_balance, 325 Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
323 Opt_check_integrity, Opt_check_integrity_including_extent_data, 326 Opt_check_integrity, Opt_check_integrity_including_extent_data,
324 Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, 327 Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
325 Opt_commit_interval, 328 Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
329 Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
330 Opt_datasum, Opt_treelog, Opt_noinode_cache,
326 Opt_err, 331 Opt_err,
327}; 332};
328 333
@@ -332,8 +337,11 @@ static match_table_t tokens = {
332 {Opt_subvolid, "subvolid=%s"}, 337 {Opt_subvolid, "subvolid=%s"},
333 {Opt_device, "device=%s"}, 338 {Opt_device, "device=%s"},
334 {Opt_nodatasum, "nodatasum"}, 339 {Opt_nodatasum, "nodatasum"},
340 {Opt_datasum, "datasum"},
335 {Opt_nodatacow, "nodatacow"}, 341 {Opt_nodatacow, "nodatacow"},
342 {Opt_datacow, "datacow"},
336 {Opt_nobarrier, "nobarrier"}, 343 {Opt_nobarrier, "nobarrier"},
344 {Opt_barrier, "barrier"},
337 {Opt_max_inline, "max_inline=%s"}, 345 {Opt_max_inline, "max_inline=%s"},
338 {Opt_alloc_start, "alloc_start=%s"}, 346 {Opt_alloc_start, "alloc_start=%s"},
339 {Opt_thread_pool, "thread_pool=%d"}, 347 {Opt_thread_pool, "thread_pool=%d"},
@@ -344,18 +352,25 @@ static match_table_t tokens = {
344 {Opt_ssd, "ssd"}, 352 {Opt_ssd, "ssd"},
345 {Opt_ssd_spread, "ssd_spread"}, 353 {Opt_ssd_spread, "ssd_spread"},
346 {Opt_nossd, "nossd"}, 354 {Opt_nossd, "nossd"},
355 {Opt_acl, "acl"},
347 {Opt_noacl, "noacl"}, 356 {Opt_noacl, "noacl"},
348 {Opt_notreelog, "notreelog"}, 357 {Opt_notreelog, "notreelog"},
358 {Opt_treelog, "treelog"},
349 {Opt_flushoncommit, "flushoncommit"}, 359 {Opt_flushoncommit, "flushoncommit"},
360 {Opt_noflushoncommit, "noflushoncommit"},
350 {Opt_ratio, "metadata_ratio=%d"}, 361 {Opt_ratio, "metadata_ratio=%d"},
351 {Opt_discard, "discard"}, 362 {Opt_discard, "discard"},
363 {Opt_nodiscard, "nodiscard"},
352 {Opt_space_cache, "space_cache"}, 364 {Opt_space_cache, "space_cache"},
353 {Opt_clear_cache, "clear_cache"}, 365 {Opt_clear_cache, "clear_cache"},
354 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 366 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
355 {Opt_enospc_debug, "enospc_debug"}, 367 {Opt_enospc_debug, "enospc_debug"},
368 {Opt_noenospc_debug, "noenospc_debug"},
356 {Opt_subvolrootid, "subvolrootid=%d"}, 369 {Opt_subvolrootid, "subvolrootid=%d"},
357 {Opt_defrag, "autodefrag"}, 370 {Opt_defrag, "autodefrag"},
371 {Opt_nodefrag, "noautodefrag"},
358 {Opt_inode_cache, "inode_cache"}, 372 {Opt_inode_cache, "inode_cache"},
373 {Opt_noinode_cache, "noinode_cache"},
359 {Opt_no_space_cache, "nospace_cache"}, 374 {Opt_no_space_cache, "nospace_cache"},
360 {Opt_recovery, "recovery"}, 375 {Opt_recovery, "recovery"},
361 {Opt_skip_balance, "skip_balance"}, 376 {Opt_skip_balance, "skip_balance"},
@@ -368,6 +383,20 @@ static match_table_t tokens = {
368 {Opt_err, NULL}, 383 {Opt_err, NULL},
369}; 384};
370 385
386#define btrfs_set_and_info(root, opt, fmt, args...) \
387{ \
388 if (!btrfs_test_opt(root, opt)) \
389 btrfs_info(root->fs_info, fmt, ##args); \
390 btrfs_set_opt(root->fs_info->mount_opt, opt); \
391}
392
393#define btrfs_clear_and_info(root, opt, fmt, args...) \
394{ \
395 if (btrfs_test_opt(root, opt)) \
396 btrfs_info(root->fs_info, fmt, ##args); \
397 btrfs_clear_opt(root->fs_info->mount_opt, opt); \
398}
399
371/* 400/*
372 * Regular mount options parser. Everything that is needed only when 401 * Regular mount options parser. Everything that is needed only when
373 * reading in a new superblock is parsed here. 402 * reading in a new superblock is parsed here.
@@ -383,6 +412,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
383 int ret = 0; 412 int ret = 0;
384 char *compress_type; 413 char *compress_type;
385 bool compress_force = false; 414 bool compress_force = false;
415 bool compress = false;
386 416
387 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); 417 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
388 if (cache_gen) 418 if (cache_gen)
@@ -409,7 +439,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
409 token = match_token(p, tokens, args); 439 token = match_token(p, tokens, args);
410 switch (token) { 440 switch (token) {
411 case Opt_degraded: 441 case Opt_degraded:
412 printk(KERN_INFO "btrfs: allowing degraded mounts\n"); 442 btrfs_info(root->fs_info, "allowing degraded mounts");
413 btrfs_set_opt(info->mount_opt, DEGRADED); 443 btrfs_set_opt(info->mount_opt, DEGRADED);
414 break; 444 break;
415 case Opt_subvol: 445 case Opt_subvol:
@@ -422,27 +452,45 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
422 */ 452 */
423 break; 453 break;
424 case Opt_nodatasum: 454 case Opt_nodatasum:
425 printk(KERN_INFO "btrfs: setting nodatasum\n"); 455 btrfs_set_and_info(root, NODATASUM,
426 btrfs_set_opt(info->mount_opt, NODATASUM); 456 "setting nodatasum");
457 break;
458 case Opt_datasum:
459 if (btrfs_test_opt(root, NODATASUM)) {
460 if (btrfs_test_opt(root, NODATACOW))
461 btrfs_info(root->fs_info, "setting datasum, datacow enabled");
462 else
463 btrfs_info(root->fs_info, "setting datasum");
464 }
465 btrfs_clear_opt(info->mount_opt, NODATACOW);
466 btrfs_clear_opt(info->mount_opt, NODATASUM);
427 break; 467 break;
428 case Opt_nodatacow: 468 case Opt_nodatacow:
429 if (!btrfs_test_opt(root, COMPRESS) || 469 if (!btrfs_test_opt(root, NODATACOW)) {
430 !btrfs_test_opt(root, FORCE_COMPRESS)) { 470 if (!btrfs_test_opt(root, COMPRESS) ||
431 printk(KERN_INFO "btrfs: setting nodatacow, compression disabled\n"); 471 !btrfs_test_opt(root, FORCE_COMPRESS)) {
432 } else { 472 btrfs_info(root->fs_info,
433 printk(KERN_INFO "btrfs: setting nodatacow\n"); 473 "setting nodatacow, compression disabled");
474 } else {
475 btrfs_info(root->fs_info, "setting nodatacow");
476 }
434 } 477 }
435 btrfs_clear_opt(info->mount_opt, COMPRESS); 478 btrfs_clear_opt(info->mount_opt, COMPRESS);
436 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 479 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
437 btrfs_set_opt(info->mount_opt, NODATACOW); 480 btrfs_set_opt(info->mount_opt, NODATACOW);
438 btrfs_set_opt(info->mount_opt, NODATASUM); 481 btrfs_set_opt(info->mount_opt, NODATASUM);
439 break; 482 break;
483 case Opt_datacow:
484 btrfs_clear_and_info(root, NODATACOW,
485 "setting datacow");
486 break;
440 case Opt_compress_force: 487 case Opt_compress_force:
441 case Opt_compress_force_type: 488 case Opt_compress_force_type:
442 compress_force = true; 489 compress_force = true;
443 /* Fallthrough */ 490 /* Fallthrough */
444 case Opt_compress: 491 case Opt_compress:
445 case Opt_compress_type: 492 case Opt_compress_type:
493 compress = true;
446 if (token == Opt_compress || 494 if (token == Opt_compress ||
447 token == Opt_compress_force || 495 token == Opt_compress_force ||
448 strcmp(args[0].from, "zlib") == 0) { 496 strcmp(args[0].from, "zlib") == 0) {
@@ -469,34 +517,36 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
469 } 517 }
470 518
471 if (compress_force) { 519 if (compress_force) {
472 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 520 btrfs_set_and_info(root, FORCE_COMPRESS,
473 pr_info("btrfs: force %s compression\n", 521 "force %s compression",
474 compress_type); 522 compress_type);
475 } else if (btrfs_test_opt(root, COMPRESS)) { 523 } else if (compress) {
476 pr_info("btrfs: use %s compression\n", 524 if (!btrfs_test_opt(root, COMPRESS))
477 compress_type); 525 btrfs_info(root->fs_info,
526 "btrfs: use %s compression\n",
527 compress_type);
478 } 528 }
479 break; 529 break;
480 case Opt_ssd: 530 case Opt_ssd:
481 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 531 btrfs_set_and_info(root, SSD,
482 btrfs_set_opt(info->mount_opt, SSD); 532 "use ssd allocation scheme");
483 break; 533 break;
484 case Opt_ssd_spread: 534 case Opt_ssd_spread:
485 printk(KERN_INFO "btrfs: use spread ssd " 535 btrfs_set_and_info(root, SSD_SPREAD,
486 "allocation scheme\n"); 536 "use spread ssd allocation scheme");
487 btrfs_set_opt(info->mount_opt, SSD);
488 btrfs_set_opt(info->mount_opt, SSD_SPREAD);
489 break; 537 break;
490 case Opt_nossd: 538 case Opt_nossd:
491 printk(KERN_INFO "btrfs: not using ssd allocation " 539 btrfs_clear_and_info(root, NOSSD,
492 "scheme\n"); 540 "not using ssd allocation scheme");
493 btrfs_set_opt(info->mount_opt, NOSSD);
494 btrfs_clear_opt(info->mount_opt, SSD); 541 btrfs_clear_opt(info->mount_opt, SSD);
495 btrfs_clear_opt(info->mount_opt, SSD_SPREAD); 542 break;
543 case Opt_barrier:
544 btrfs_clear_and_info(root, NOBARRIER,
545 "turning on barriers");
496 break; 546 break;
497 case Opt_nobarrier: 547 case Opt_nobarrier:
498 printk(KERN_INFO "btrfs: turning off barriers\n"); 548 btrfs_set_and_info(root, NOBARRIER,
499 btrfs_set_opt(info->mount_opt, NOBARRIER); 549 "turning off barriers");
500 break; 550 break;
501 case Opt_thread_pool: 551 case Opt_thread_pool:
502 ret = match_int(&args[0], &intarg); 552 ret = match_int(&args[0], &intarg);
@@ -520,7 +570,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
520 info->max_inline, 570 info->max_inline,
521 root->sectorsize); 571 root->sectorsize);
522 } 572 }
523 printk(KERN_INFO "btrfs: max_inline at %llu\n", 573 btrfs_info(root->fs_info, "max_inline at %llu",
524 info->max_inline); 574 info->max_inline);
525 } else { 575 } else {
526 ret = -ENOMEM; 576 ret = -ENOMEM;
@@ -534,24 +584,34 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
534 info->alloc_start = memparse(num, NULL); 584 info->alloc_start = memparse(num, NULL);
535 mutex_unlock(&info->chunk_mutex); 585 mutex_unlock(&info->chunk_mutex);
536 kfree(num); 586 kfree(num);
537 printk(KERN_INFO 587 btrfs_info(root->fs_info, "allocations start at %llu",
538 "btrfs: allocations start at %llu\n",
539 info->alloc_start); 588 info->alloc_start);
540 } else { 589 } else {
541 ret = -ENOMEM; 590 ret = -ENOMEM;
542 goto out; 591 goto out;
543 } 592 }
544 break; 593 break;
594 case Opt_acl:
595 root->fs_info->sb->s_flags |= MS_POSIXACL;
596 break;
545 case Opt_noacl: 597 case Opt_noacl:
546 root->fs_info->sb->s_flags &= ~MS_POSIXACL; 598 root->fs_info->sb->s_flags &= ~MS_POSIXACL;
547 break; 599 break;
548 case Opt_notreelog: 600 case Opt_notreelog:
549 printk(KERN_INFO "btrfs: disabling tree log\n"); 601 btrfs_set_and_info(root, NOTREELOG,
550 btrfs_set_opt(info->mount_opt, NOTREELOG); 602 "disabling tree log");
603 break;
604 case Opt_treelog:
605 btrfs_clear_and_info(root, NOTREELOG,
606 "enabling tree log");
551 break; 607 break;
552 case Opt_flushoncommit: 608 case Opt_flushoncommit:
553 printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); 609 btrfs_set_and_info(root, FLUSHONCOMMIT,
554 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); 610 "turning on flush-on-commit");
611 break;
612 case Opt_noflushoncommit:
613 btrfs_clear_and_info(root, FLUSHONCOMMIT,
614 "turning off flush-on-commit");
555 break; 615 break;
556 case Opt_ratio: 616 case Opt_ratio:
557 ret = match_int(&args[0], &intarg); 617 ret = match_int(&args[0], &intarg);
@@ -559,7 +619,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
559 goto out; 619 goto out;
560 } else if (intarg >= 0) { 620 } else if (intarg >= 0) {
561 info->metadata_ratio = intarg; 621 info->metadata_ratio = intarg;
562 printk(KERN_INFO "btrfs: metadata ratio %d\n", 622 btrfs_info(root->fs_info, "metadata ratio %d",
563 info->metadata_ratio); 623 info->metadata_ratio);
564 } else { 624 } else {
565 ret = -EINVAL; 625 ret = -EINVAL;
@@ -567,25 +627,35 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
567 } 627 }
568 break; 628 break;
569 case Opt_discard: 629 case Opt_discard:
570 btrfs_set_opt(info->mount_opt, DISCARD); 630 btrfs_set_and_info(root, DISCARD,
631 "turning on discard");
632 break;
633 case Opt_nodiscard:
634 btrfs_clear_and_info(root, DISCARD,
635 "turning off discard");
571 break; 636 break;
572 case Opt_space_cache: 637 case Opt_space_cache:
573 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 638 btrfs_set_and_info(root, SPACE_CACHE,
639 "enabling disk space caching");
574 break; 640 break;
575 case Opt_rescan_uuid_tree: 641 case Opt_rescan_uuid_tree:
576 btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); 642 btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
577 break; 643 break;
578 case Opt_no_space_cache: 644 case Opt_no_space_cache:
579 printk(KERN_INFO "btrfs: disabling disk space caching\n"); 645 btrfs_clear_and_info(root, SPACE_CACHE,
580 btrfs_clear_opt(info->mount_opt, SPACE_CACHE); 646 "disabling disk space caching");
581 break; 647 break;
582 case Opt_inode_cache: 648 case Opt_inode_cache:
583 printk(KERN_INFO "btrfs: enabling inode map caching\n"); 649 btrfs_set_and_info(root, CHANGE_INODE_CACHE,
584 btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); 650 "enabling inode map caching");
651 break;
652 case Opt_noinode_cache:
653 btrfs_clear_and_info(root, CHANGE_INODE_CACHE,
654 "disabling inode map caching");
585 break; 655 break;
586 case Opt_clear_cache: 656 case Opt_clear_cache:
587 printk(KERN_INFO "btrfs: force clearing of disk cache\n"); 657 btrfs_set_and_info(root, CLEAR_CACHE,
588 btrfs_set_opt(info->mount_opt, CLEAR_CACHE); 658 "force clearing of disk cache");
589 break; 659 break;
590 case Opt_user_subvol_rm_allowed: 660 case Opt_user_subvol_rm_allowed:
591 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 661 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
@@ -593,12 +663,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
593 case Opt_enospc_debug: 663 case Opt_enospc_debug:
594 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); 664 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
595 break; 665 break;
666 case Opt_noenospc_debug:
667 btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
668 break;
596 case Opt_defrag: 669 case Opt_defrag:
597 printk(KERN_INFO "btrfs: enabling auto defrag\n"); 670 btrfs_set_and_info(root, AUTO_DEFRAG,
598 btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); 671 "enabling auto defrag");
672 break;
673 case Opt_nodefrag:
674 btrfs_clear_and_info(root, AUTO_DEFRAG,
675 "disabling auto defrag");
599 break; 676 break;
600 case Opt_recovery: 677 case Opt_recovery:
601 printk(KERN_INFO "btrfs: enabling auto recovery\n"); 678 btrfs_info(root->fs_info, "enabling auto recovery");
602 btrfs_set_opt(info->mount_opt, RECOVERY); 679 btrfs_set_opt(info->mount_opt, RECOVERY);
603 break; 680 break;
604 case Opt_skip_balance: 681 case Opt_skip_balance:
@@ -606,14 +683,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
606 break; 683 break;
607#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 684#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
608 case Opt_check_integrity_including_extent_data: 685 case Opt_check_integrity_including_extent_data:
609 printk(KERN_INFO "btrfs: enabling check integrity" 686 btrfs_info(root->fs_info,
610 " including extent data\n"); 687 "enabling check integrity including extent data");
611 btrfs_set_opt(info->mount_opt, 688 btrfs_set_opt(info->mount_opt,
612 CHECK_INTEGRITY_INCLUDING_EXTENT_DATA); 689 CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
613 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 690 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
614 break; 691 break;
615 case Opt_check_integrity: 692 case Opt_check_integrity:
616 printk(KERN_INFO "btrfs: enabling check integrity\n"); 693 btrfs_info(root->fs_info, "enabling check integrity");
617 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 694 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
618 break; 695 break;
619 case Opt_check_integrity_print_mask: 696 case Opt_check_integrity_print_mask:
@@ -622,8 +699,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
622 goto out; 699 goto out;
623 } else if (intarg >= 0) { 700 } else if (intarg >= 0) {
624 info->check_integrity_print_mask = intarg; 701 info->check_integrity_print_mask = intarg;
625 printk(KERN_INFO "btrfs:" 702 btrfs_info(root->fs_info, "check_integrity_print_mask 0x%x",
626 " check_integrity_print_mask 0x%x\n",
627 info->check_integrity_print_mask); 703 info->check_integrity_print_mask);
628 } else { 704 } else {
629 ret = -EINVAL; 705 ret = -EINVAL;
@@ -634,8 +710,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
634 case Opt_check_integrity_including_extent_data: 710 case Opt_check_integrity_including_extent_data:
635 case Opt_check_integrity: 711 case Opt_check_integrity:
636 case Opt_check_integrity_print_mask: 712 case Opt_check_integrity_print_mask:
637 printk(KERN_ERR "btrfs: support for check_integrity*" 713 btrfs_err(root->fs_info,
638 " not compiled in!\n"); 714 "support for check_integrity* not compiled in!");
639 ret = -EINVAL; 715 ret = -EINVAL;
640 goto out; 716 goto out;
641#endif 717#endif
@@ -655,28 +731,24 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
655 intarg = 0; 731 intarg = 0;
656 ret = match_int(&args[0], &intarg); 732 ret = match_int(&args[0], &intarg);
657 if (ret < 0) { 733 if (ret < 0) {
658 printk(KERN_ERR 734 btrfs_err(root->fs_info, "invalid commit interval");
659 "btrfs: invalid commit interval\n");
660 ret = -EINVAL; 735 ret = -EINVAL;
661 goto out; 736 goto out;
662 } 737 }
663 if (intarg > 0) { 738 if (intarg > 0) {
664 if (intarg > 300) { 739 if (intarg > 300) {
665 printk(KERN_WARNING 740 btrfs_warn(root->fs_info, "excessive commit interval %d",
666 "btrfs: excessive commit interval %d\n",
667 intarg); 741 intarg);
668 } 742 }
669 info->commit_interval = intarg; 743 info->commit_interval = intarg;
670 } else { 744 } else {
671 printk(KERN_INFO 745 btrfs_info(root->fs_info, "using default commit interval %ds",
672 "btrfs: using default commit interval %ds\n",
673 BTRFS_DEFAULT_COMMIT_INTERVAL); 746 BTRFS_DEFAULT_COMMIT_INTERVAL);
674 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 747 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
675 } 748 }
676 break; 749 break;
677 case Opt_err: 750 case Opt_err:
678 printk(KERN_INFO "btrfs: unrecognized mount option " 751 btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
679 "'%s'\n", p);
680 ret = -EINVAL; 752 ret = -EINVAL;
681 goto out; 753 goto out;
682 default: 754 default:
@@ -685,7 +757,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
685 } 757 }
686out: 758out:
687 if (!ret && btrfs_test_opt(root, SPACE_CACHE)) 759 if (!ret && btrfs_test_opt(root, SPACE_CACHE))
688 printk(KERN_INFO "btrfs: disk space caching is enabled\n"); 760 btrfs_info(root->fs_info, "disk space caching is enabled");
689 kfree(orig); 761 kfree(orig);
690 return ret; 762 return ret;
691} 763}
@@ -748,7 +820,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
748 break; 820 break;
749 case Opt_subvolrootid: 821 case Opt_subvolrootid:
750 printk(KERN_WARNING 822 printk(KERN_WARNING
751 "btrfs: 'subvolrootid' mount option is deprecated and has no effect\n"); 823 "BTRFS: 'subvolrootid' mount option is deprecated and has "
824 "no effect\n");
752 break; 825 break;
753 case Opt_device: 826 case Opt_device:
754 device_name = match_strdup(&args[0]); 827 device_name = match_strdup(&args[0]);
@@ -877,7 +950,7 @@ static int btrfs_fill_super(struct super_block *sb,
877 sb->s_flags |= MS_I_VERSION; 950 sb->s_flags |= MS_I_VERSION;
878 err = open_ctree(sb, fs_devices, (char *)data); 951 err = open_ctree(sb, fs_devices, (char *)data);
879 if (err) { 952 if (err) {
880 printk("btrfs: open_ctree failed\n"); 953 printk(KERN_ERR "BTRFS: open_ctree failed\n");
881 return err; 954 return err;
882 } 955 }
883 956
@@ -1115,7 +1188,7 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
1115 dput(root); 1188 dput(root);
1116 root = ERR_PTR(-EINVAL); 1189 root = ERR_PTR(-EINVAL);
1117 deactivate_locked_super(s); 1190 deactivate_locked_super(s);
1118 printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n", 1191 printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n",
1119 subvol_name); 1192 subvol_name);
1120 } 1193 }
1121 1194
@@ -1240,7 +1313,7 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1240 1313
1241 fs_info->thread_pool_size = new_pool_size; 1314 fs_info->thread_pool_size = new_pool_size;
1242 1315
1243 printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n", 1316 btrfs_info(fs_info, "resize thread pool %d -> %d",
1244 old_pool_size, new_pool_size); 1317 old_pool_size, new_pool_size);
1245 1318
1246 btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); 1319 btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size);
@@ -1346,7 +1419,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1346 } else { 1419 } else {
1347 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { 1420 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1348 btrfs_err(fs_info, 1421 btrfs_err(fs_info,
1349 "Remounting read-write after error is not allowed\n"); 1422 "Remounting read-write after error is not allowed");
1350 ret = -EINVAL; 1423 ret = -EINVAL;
1351 goto restore; 1424 goto restore;
1352 } 1425 }
@@ -1358,8 +1431,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1358 if (fs_info->fs_devices->missing_devices > 1431 if (fs_info->fs_devices->missing_devices >
1359 fs_info->num_tolerated_disk_barrier_failures && 1432 fs_info->num_tolerated_disk_barrier_failures &&
1360 !(*flags & MS_RDONLY)) { 1433 !(*flags & MS_RDONLY)) {
1361 printk(KERN_WARNING 1434 btrfs_warn(fs_info,
1362 "Btrfs: too many missing devices, writeable remount is not allowed\n"); 1435 "too many missing devices, writeable remount is not allowed");
1363 ret = -EACCES; 1436 ret = -EACCES;
1364 goto restore; 1437 goto restore;
1365 } 1438 }
@@ -1384,16 +1457,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1384 1457
1385 ret = btrfs_resume_dev_replace_async(fs_info); 1458 ret = btrfs_resume_dev_replace_async(fs_info);
1386 if (ret) { 1459 if (ret) {
1387 pr_warn("btrfs: failed to resume dev_replace\n"); 1460 btrfs_warn(fs_info, "failed to resume dev_replace");
1388 goto restore; 1461 goto restore;
1389 } 1462 }
1390 1463
1391 if (!fs_info->uuid_root) { 1464 if (!fs_info->uuid_root) {
1392 pr_info("btrfs: creating UUID tree\n"); 1465 btrfs_info(fs_info, "creating UUID tree");
1393 ret = btrfs_create_uuid_tree(fs_info); 1466 ret = btrfs_create_uuid_tree(fs_info);
1394 if (ret) { 1467 if (ret) {
1395 pr_warn("btrfs: failed to create the uuid tree" 1468 btrfs_warn(fs_info, "failed to create the UUID tree %d", ret);
1396 "%d\n", ret);
1397 goto restore; 1469 goto restore;
1398 } 1470 }
1399 } 1471 }
@@ -1773,7 +1845,7 @@ static int btrfs_interface_init(void)
1773static void btrfs_interface_exit(void) 1845static void btrfs_interface_exit(void)
1774{ 1846{
1775 if (misc_deregister(&btrfs_misc) < 0) 1847 if (misc_deregister(&btrfs_misc) < 0)
1776 printk(KERN_INFO "btrfs: misc_deregister failed for control device\n"); 1848 printk(KERN_INFO "BTRFS: misc_deregister failed for control device\n");
1777} 1849}
1778 1850
1779static void btrfs_print_info(void) 1851static void btrfs_print_info(void)
@@ -1818,10 +1890,16 @@ static int __init init_btrfs_fs(void)
1818{ 1890{
1819 int err; 1891 int err;
1820 1892
1821 err = btrfs_init_sysfs(); 1893 err = btrfs_hash_init();
1822 if (err) 1894 if (err)
1823 return err; 1895 return err;
1824 1896
1897 btrfs_props_init();
1898
1899 err = btrfs_init_sysfs();
1900 if (err)
1901 goto free_hash;
1902
1825 btrfs_init_compress(); 1903 btrfs_init_compress();
1826 1904
1827 err = btrfs_init_cachep(); 1905 err = btrfs_init_cachep();
@@ -1895,6 +1973,8 @@ free_cachep:
1895free_compress: 1973free_compress:
1896 btrfs_exit_compress(); 1974 btrfs_exit_compress();
1897 btrfs_exit_sysfs(); 1975 btrfs_exit_sysfs();
1976free_hash:
1977 btrfs_hash_exit();
1898 return err; 1978 return err;
1899} 1979}
1900 1980
@@ -1913,6 +1993,7 @@ static void __exit exit_btrfs_fs(void)
1913 btrfs_exit_sysfs(); 1993 btrfs_exit_sysfs();
1914 btrfs_cleanup_fs_uuids(); 1994 btrfs_cleanup_fs_uuids();
1915 btrfs_exit_compress(); 1995 btrfs_exit_compress();
1996 btrfs_hash_exit();
1916} 1997}
1917 1998
1918module_init(init_btrfs_fs) 1999module_init(init_btrfs_fs)
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 5b326cd60a4a..782374d8fd19 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -22,24 +22,641 @@
22#include <linux/completion.h> 22#include <linux/completion.h>
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/kobject.h> 24#include <linux/kobject.h>
25#include <linux/bug.h>
26#include <linux/genhd.h>
25 27
26#include "ctree.h" 28#include "ctree.h"
27#include "disk-io.h" 29#include "disk-io.h"
28#include "transaction.h" 30#include "transaction.h"
31#include "sysfs.h"
32#include "volumes.h"
33
34static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
35
36static u64 get_features(struct btrfs_fs_info *fs_info,
37 enum btrfs_feature_set set)
38{
39 struct btrfs_super_block *disk_super = fs_info->super_copy;
40 if (set == FEAT_COMPAT)
41 return btrfs_super_compat_flags(disk_super);
42 else if (set == FEAT_COMPAT_RO)
43 return btrfs_super_compat_ro_flags(disk_super);
44 else
45 return btrfs_super_incompat_flags(disk_super);
46}
47
48static void set_features(struct btrfs_fs_info *fs_info,
49 enum btrfs_feature_set set, u64 features)
50{
51 struct btrfs_super_block *disk_super = fs_info->super_copy;
52 if (set == FEAT_COMPAT)
53 btrfs_set_super_compat_flags(disk_super, features);
54 else if (set == FEAT_COMPAT_RO)
55 btrfs_set_super_compat_ro_flags(disk_super, features);
56 else
57 btrfs_set_super_incompat_flags(disk_super, features);
58}
59
60static int can_modify_feature(struct btrfs_feature_attr *fa)
61{
62 int val = 0;
63 u64 set, clear;
64 switch (fa->feature_set) {
65 case FEAT_COMPAT:
66 set = BTRFS_FEATURE_COMPAT_SAFE_SET;
67 clear = BTRFS_FEATURE_COMPAT_SAFE_CLEAR;
68 break;
69 case FEAT_COMPAT_RO:
70 set = BTRFS_FEATURE_COMPAT_RO_SAFE_SET;
71 clear = BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR;
72 break;
73 case FEAT_INCOMPAT:
74 set = BTRFS_FEATURE_INCOMPAT_SAFE_SET;
75 clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR;
76 break;
77 default:
78 printk(KERN_WARNING "btrfs: sysfs: unknown feature set %d\n",
79 fa->feature_set);
80 return 0;
81 }
82
83 if (set & fa->feature_bit)
84 val |= 1;
85 if (clear & fa->feature_bit)
86 val |= 2;
87
88 return val;
89}
90
91static ssize_t btrfs_feature_attr_show(struct kobject *kobj,
92 struct kobj_attribute *a, char *buf)
93{
94 int val = 0;
95 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
96 struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a);
97 if (fs_info) {
98 u64 features = get_features(fs_info, fa->feature_set);
99 if (features & fa->feature_bit)
100 val = 1;
101 } else
102 val = can_modify_feature(fa);
103
104 return snprintf(buf, PAGE_SIZE, "%d\n", val);
105}
106
107static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
108 struct kobj_attribute *a,
109 const char *buf, size_t count)
110{
111 struct btrfs_fs_info *fs_info;
112 struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a);
113 struct btrfs_trans_handle *trans;
114 u64 features, set, clear;
115 unsigned long val;
116 int ret;
117
118 fs_info = to_fs_info(kobj);
119 if (!fs_info)
120 return -EPERM;
121
122 ret = kstrtoul(skip_spaces(buf), 0, &val);
123 if (ret)
124 return ret;
125
126 if (fa->feature_set == FEAT_COMPAT) {
127 set = BTRFS_FEATURE_COMPAT_SAFE_SET;
128 clear = BTRFS_FEATURE_COMPAT_SAFE_CLEAR;
129 } else if (fa->feature_set == FEAT_COMPAT_RO) {
130 set = BTRFS_FEATURE_COMPAT_RO_SAFE_SET;
131 clear = BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR;
132 } else {
133 set = BTRFS_FEATURE_INCOMPAT_SAFE_SET;
134 clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR;
135 }
136
137 features = get_features(fs_info, fa->feature_set);
138
139 /* Nothing to do */
140 if ((val && (features & fa->feature_bit)) ||
141 (!val && !(features & fa->feature_bit)))
142 return count;
143
144 if ((val && !(set & fa->feature_bit)) ||
145 (!val && !(clear & fa->feature_bit))) {
146 btrfs_info(fs_info,
147 "%sabling feature %s on mounted fs is not supported.",
148 val ? "En" : "Dis", fa->kobj_attr.attr.name);
149 return -EPERM;
150 }
151
152 btrfs_info(fs_info, "%s %s feature flag",
153 val ? "Setting" : "Clearing", fa->kobj_attr.attr.name);
154
155 trans = btrfs_start_transaction(fs_info->fs_root, 0);
156 if (IS_ERR(trans))
157 return PTR_ERR(trans);
158
159 spin_lock(&fs_info->super_lock);
160 features = get_features(fs_info, fa->feature_set);
161 if (val)
162 features |= fa->feature_bit;
163 else
164 features &= ~fa->feature_bit;
165 set_features(fs_info, fa->feature_set, features);
166 spin_unlock(&fs_info->super_lock);
167
168 ret = btrfs_commit_transaction(trans, fs_info->fs_root);
169 if (ret)
170 return ret;
171
172 return count;
173}
174
175static umode_t btrfs_feature_visible(struct kobject *kobj,
176 struct attribute *attr, int unused)
177{
178 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
179 umode_t mode = attr->mode;
180
181 if (fs_info) {
182 struct btrfs_feature_attr *fa;
183 u64 features;
184
185 fa = attr_to_btrfs_feature_attr(attr);
186 features = get_features(fs_info, fa->feature_set);
187
188 if (can_modify_feature(fa))
189 mode |= S_IWUSR;
190 else if (!(features & fa->feature_bit))
191 mode = 0;
192 }
193
194 return mode;
195}
196
197BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF);
198BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL);
199BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS);
200BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO);
201BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA);
202BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
203BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
204BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
205BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
206
207static struct attribute *btrfs_supported_feature_attrs[] = {
208 BTRFS_FEAT_ATTR_PTR(mixed_backref),
209 BTRFS_FEAT_ATTR_PTR(default_subvol),
210 BTRFS_FEAT_ATTR_PTR(mixed_groups),
211 BTRFS_FEAT_ATTR_PTR(compress_lzo),
212 BTRFS_FEAT_ATTR_PTR(big_metadata),
213 BTRFS_FEAT_ATTR_PTR(extended_iref),
214 BTRFS_FEAT_ATTR_PTR(raid56),
215 BTRFS_FEAT_ATTR_PTR(skinny_metadata),
216 BTRFS_FEAT_ATTR_PTR(no_holes),
217 NULL
218};
219
220static const struct attribute_group btrfs_feature_attr_group = {
221 .name = "features",
222 .is_visible = btrfs_feature_visible,
223 .attrs = btrfs_supported_feature_attrs,
224};
225
226static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
227{
228 u64 val;
229 if (lock)
230 spin_lock(lock);
231 val = *value_ptr;
232 if (lock)
233 spin_unlock(lock);
234 return snprintf(buf, PAGE_SIZE, "%llu\n", val);
235}
236
237static ssize_t global_rsv_size_show(struct kobject *kobj,
238 struct kobj_attribute *ka, char *buf)
239{
240 struct btrfs_fs_info *fs_info = to_fs_info(kobj->parent);
241 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
242 return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf);
243}
244BTRFS_ATTR(global_rsv_size, 0444, global_rsv_size_show);
245
246static ssize_t global_rsv_reserved_show(struct kobject *kobj,
247 struct kobj_attribute *a, char *buf)
248{
249 struct btrfs_fs_info *fs_info = to_fs_info(kobj->parent);
250 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
251 return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf);
252}
253BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show);
254
255#define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj)
256
257static ssize_t raid_bytes_show(struct kobject *kobj,
258 struct kobj_attribute *attr, char *buf);
259BTRFS_RAID_ATTR(total_bytes, raid_bytes_show);
260BTRFS_RAID_ATTR(used_bytes, raid_bytes_show);
261
262static ssize_t raid_bytes_show(struct kobject *kobj,
263 struct kobj_attribute *attr, char *buf)
264
265{
266 struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
267 struct btrfs_block_group_cache *block_group;
268 int index = kobj - sinfo->block_group_kobjs;
269 u64 val = 0;
270
271 down_read(&sinfo->groups_sem);
272 list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
273 if (&attr->attr == BTRFS_RAID_ATTR_PTR(total_bytes))
274 val += block_group->key.offset;
275 else
276 val += btrfs_block_group_used(&block_group->item);
277 }
278 up_read(&sinfo->groups_sem);
279 return snprintf(buf, PAGE_SIZE, "%llu\n", val);
280}
281
282static struct attribute *raid_attributes[] = {
283 BTRFS_RAID_ATTR_PTR(total_bytes),
284 BTRFS_RAID_ATTR_PTR(used_bytes),
285 NULL
286};
287
288static void release_raid_kobj(struct kobject *kobj)
289{
290 kobject_put(kobj->parent);
291}
292
293struct kobj_type btrfs_raid_ktype = {
294 .sysfs_ops = &kobj_sysfs_ops,
295 .release = release_raid_kobj,
296 .default_attrs = raid_attributes,
297};
298
299#define SPACE_INFO_ATTR(field) \
300static ssize_t btrfs_space_info_show_##field(struct kobject *kobj, \
301 struct kobj_attribute *a, \
302 char *buf) \
303{ \
304 struct btrfs_space_info *sinfo = to_space_info(kobj); \
305 return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \
306} \
307BTRFS_ATTR(field, 0444, btrfs_space_info_show_##field)
308
309static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj,
310 struct kobj_attribute *a,
311 char *buf)
312{
313 struct btrfs_space_info *sinfo = to_space_info(kobj);
314 s64 val = percpu_counter_sum(&sinfo->total_bytes_pinned);
315 return snprintf(buf, PAGE_SIZE, "%lld\n", val);
316}
317
318SPACE_INFO_ATTR(flags);
319SPACE_INFO_ATTR(total_bytes);
320SPACE_INFO_ATTR(bytes_used);
321SPACE_INFO_ATTR(bytes_pinned);
322SPACE_INFO_ATTR(bytes_reserved);
323SPACE_INFO_ATTR(bytes_may_use);
324SPACE_INFO_ATTR(disk_used);
325SPACE_INFO_ATTR(disk_total);
326BTRFS_ATTR(total_bytes_pinned, 0444, btrfs_space_info_show_total_bytes_pinned);
327
328static struct attribute *space_info_attrs[] = {
329 BTRFS_ATTR_PTR(flags),
330 BTRFS_ATTR_PTR(total_bytes),
331 BTRFS_ATTR_PTR(bytes_used),
332 BTRFS_ATTR_PTR(bytes_pinned),
333 BTRFS_ATTR_PTR(bytes_reserved),
334 BTRFS_ATTR_PTR(bytes_may_use),
335 BTRFS_ATTR_PTR(disk_used),
336 BTRFS_ATTR_PTR(disk_total),
337 BTRFS_ATTR_PTR(total_bytes_pinned),
338 NULL,
339};
340
341static void space_info_release(struct kobject *kobj)
342{
343 struct btrfs_space_info *sinfo = to_space_info(kobj);
344 percpu_counter_destroy(&sinfo->total_bytes_pinned);
345 kfree(sinfo);
346}
347
348struct kobj_type space_info_ktype = {
349 .sysfs_ops = &kobj_sysfs_ops,
350 .release = space_info_release,
351 .default_attrs = space_info_attrs,
352};
353
354static const struct attribute *allocation_attrs[] = {
355 BTRFS_ATTR_PTR(global_rsv_reserved),
356 BTRFS_ATTR_PTR(global_rsv_size),
357 NULL,
358};
359
360static ssize_t btrfs_label_show(struct kobject *kobj,
361 struct kobj_attribute *a, char *buf)
362{
363 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
364 return snprintf(buf, PAGE_SIZE, "%s\n", fs_info->super_copy->label);
365}
366
367static ssize_t btrfs_label_store(struct kobject *kobj,
368 struct kobj_attribute *a,
369 const char *buf, size_t len)
370{
371 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
372 struct btrfs_trans_handle *trans;
373 struct btrfs_root *root = fs_info->fs_root;
374 int ret;
375
376 if (len >= BTRFS_LABEL_SIZE) {
377 pr_err("BTRFS: unable to set label with more than %d bytes\n",
378 BTRFS_LABEL_SIZE - 1);
379 return -EINVAL;
380 }
381
382 trans = btrfs_start_transaction(root, 0);
383 if (IS_ERR(trans))
384 return PTR_ERR(trans);
385
386 spin_lock(&root->fs_info->super_lock);
387 strcpy(fs_info->super_copy->label, buf);
388 spin_unlock(&root->fs_info->super_lock);
389 ret = btrfs_commit_transaction(trans, root);
390
391 if (!ret)
392 return len;
393
394 return ret;
395}
396BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store);
397
398static struct attribute *btrfs_attrs[] = {
399 BTRFS_ATTR_PTR(label),
400 NULL,
401};
402
403static void btrfs_release_super_kobj(struct kobject *kobj)
404{
405 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
406 complete(&fs_info->kobj_unregister);
407}
408
409static struct kobj_type btrfs_ktype = {
410 .sysfs_ops = &kobj_sysfs_ops,
411 .release = btrfs_release_super_kobj,
412 .default_attrs = btrfs_attrs,
413};
414
415static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
416{
417 if (kobj->ktype != &btrfs_ktype)
418 return NULL;
419 return container_of(kobj, struct btrfs_fs_info, super_kobj);
420}
421
422#define NUM_FEATURE_BITS 64
423static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
424static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
425
426static u64 supported_feature_masks[3] = {
427 [FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
428 [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
429 [FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP,
430};
431
432static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
433{
434 int set;
435
436 for (set = 0; set < FEAT_MAX; set++) {
437 int i;
438 struct attribute *attrs[2];
439 struct attribute_group agroup = {
440 .name = "features",
441 .attrs = attrs,
442 };
443 u64 features = get_features(fs_info, set);
444 features &= ~supported_feature_masks[set];
445
446 if (!features)
447 continue;
448
449 attrs[1] = NULL;
450 for (i = 0; i < NUM_FEATURE_BITS; i++) {
451 struct btrfs_feature_attr *fa;
452
453 if (!(features & (1ULL << i)))
454 continue;
455
456 fa = &btrfs_feature_attrs[set][i];
457 attrs[0] = &fa->kobj_attr.attr;
458 if (add) {
459 int ret;
460 ret = sysfs_merge_group(&fs_info->super_kobj,
461 &agroup);
462 if (ret)
463 return ret;
464 } else
465 sysfs_unmerge_group(&fs_info->super_kobj,
466 &agroup);
467 }
468
469 }
470 return 0;
471}
472
473static void __btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
474{
475 kobject_del(&fs_info->super_kobj);
476 kobject_put(&fs_info->super_kobj);
477 wait_for_completion(&fs_info->kobj_unregister);
478}
479
480void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
481{
482 if (fs_info->space_info_kobj) {
483 sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs);
484 kobject_del(fs_info->space_info_kobj);
485 kobject_put(fs_info->space_info_kobj);
486 }
487 kobject_del(fs_info->device_dir_kobj);
488 kobject_put(fs_info->device_dir_kobj);
489 addrm_unknown_feature_attrs(fs_info, false);
490 sysfs_remove_group(&fs_info->super_kobj, &btrfs_feature_attr_group);
491 __btrfs_sysfs_remove_one(fs_info);
492}
493
494const char * const btrfs_feature_set_names[3] = {
495 [FEAT_COMPAT] = "compat",
496 [FEAT_COMPAT_RO] = "compat_ro",
497 [FEAT_INCOMPAT] = "incompat",
498};
499
500char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags)
501{
502 size_t bufsize = 4096; /* safe max, 64 names * 64 bytes */
503 int len = 0;
504 int i;
505 char *str;
506
507 str = kmalloc(bufsize, GFP_KERNEL);
508 if (!str)
509 return str;
510
511 for (i = 0; i < ARRAY_SIZE(btrfs_feature_attrs[set]); i++) {
512 const char *name;
513
514 if (!(flags & (1ULL << i)))
515 continue;
516
517 name = btrfs_feature_attrs[set][i].kobj_attr.attr.name;
518 len += snprintf(str + len, bufsize - len, "%s%s",
519 len ? "," : "", name);
520 }
521
522 return str;
523}
524
525static void init_feature_attrs(void)
526{
527 struct btrfs_feature_attr *fa;
528 int set, i;
529
530 BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
531 ARRAY_SIZE(btrfs_feature_attrs));
532 BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
533 ARRAY_SIZE(btrfs_feature_attrs[0]));
534
535 memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
536 memset(btrfs_unknown_feature_names, 0,
537 sizeof(btrfs_unknown_feature_names));
538
539 for (i = 0; btrfs_supported_feature_attrs[i]; i++) {
540 struct btrfs_feature_attr *sfa;
541 struct attribute *a = btrfs_supported_feature_attrs[i];
542 int bit;
543 sfa = attr_to_btrfs_feature_attr(a);
544 bit = ilog2(sfa->feature_bit);
545 fa = &btrfs_feature_attrs[sfa->feature_set][bit];
546
547 fa->kobj_attr.attr.name = sfa->kobj_attr.attr.name;
548 }
549
550 for (set = 0; set < FEAT_MAX; set++) {
551 for (i = 0; i < ARRAY_SIZE(btrfs_feature_attrs[set]); i++) {
552 char *name = btrfs_unknown_feature_names[set][i];
553 fa = &btrfs_feature_attrs[set][i];
554
555 if (fa->kobj_attr.attr.name)
556 continue;
557
558 snprintf(name, 13, "%s:%u",
559 btrfs_feature_set_names[set], i);
560
561 fa->kobj_attr.attr.name = name;
562 fa->kobj_attr.attr.mode = S_IRUGO;
563 fa->feature_set = set;
564 fa->feature_bit = 1ULL << i;
565 }
566 }
567}
568
569static int add_device_membership(struct btrfs_fs_info *fs_info)
570{
571 int error = 0;
572 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
573 struct btrfs_device *dev;
574
575 fs_info->device_dir_kobj = kobject_create_and_add("devices",
576 &fs_info->super_kobj);
577 if (!fs_info->device_dir_kobj)
578 return -ENOMEM;
579
580 list_for_each_entry(dev, &fs_devices->devices, dev_list) {
581 struct hd_struct *disk = dev->bdev->bd_part;
582 struct kobject *disk_kobj = &part_to_dev(disk)->kobj;
583
584 error = sysfs_create_link(fs_info->device_dir_kobj,
585 disk_kobj, disk_kobj->name);
586 if (error)
587 break;
588 }
589
590 return error;
591}
29 592
30/* /sys/fs/btrfs/ entry */ 593/* /sys/fs/btrfs/ entry */
31static struct kset *btrfs_kset; 594static struct kset *btrfs_kset;
32 595
596int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
597{
598 int error;
599
600 init_completion(&fs_info->kobj_unregister);
601 fs_info->super_kobj.kset = btrfs_kset;
602 error = kobject_init_and_add(&fs_info->super_kobj, &btrfs_ktype, NULL,
603 "%pU", fs_info->fsid);
604 if (error)
605 return error;
606
607 error = sysfs_create_group(&fs_info->super_kobj,
608 &btrfs_feature_attr_group);
609 if (error) {
610 __btrfs_sysfs_remove_one(fs_info);
611 return error;
612 }
613
614 error = addrm_unknown_feature_attrs(fs_info, true);
615 if (error)
616 goto failure;
617
618 error = add_device_membership(fs_info);
619 if (error)
620 goto failure;
621
622 fs_info->space_info_kobj = kobject_create_and_add("allocation",
623 &fs_info->super_kobj);
624 if (!fs_info->space_info_kobj) {
625 error = -ENOMEM;
626 goto failure;
627 }
628
629 error = sysfs_create_files(fs_info->space_info_kobj, allocation_attrs);
630 if (error)
631 goto failure;
632
633 return 0;
634failure:
635 btrfs_sysfs_remove_one(fs_info);
636 return error;
637}
638
33int btrfs_init_sysfs(void) 639int btrfs_init_sysfs(void)
34{ 640{
641 int ret;
35 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); 642 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
36 if (!btrfs_kset) 643 if (!btrfs_kset)
37 return -ENOMEM; 644 return -ENOMEM;
645
646 init_feature_attrs();
647
648 ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
649 if (ret) {
650 kset_unregister(btrfs_kset);
651 return ret;
652 }
653
38 return 0; 654 return 0;
39} 655}
40 656
41void btrfs_exit_sysfs(void) 657void btrfs_exit_sysfs(void)
42{ 658{
659 sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
43 kset_unregister(btrfs_kset); 660 kset_unregister(btrfs_kset);
44} 661}
45 662
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
new file mode 100644
index 000000000000..f3cea3710d44
--- /dev/null
+++ b/fs/btrfs/sysfs.h
@@ -0,0 +1,64 @@
1#ifndef _BTRFS_SYSFS_H_
2#define _BTRFS_SYSFS_H_
3
4enum btrfs_feature_set {
5 FEAT_COMPAT,
6 FEAT_COMPAT_RO,
7 FEAT_INCOMPAT,
8 FEAT_MAX
9};
10
11#define __INIT_KOBJ_ATTR(_name, _mode, _show, _store) \
12{ \
13 .attr = { .name = __stringify(_name), .mode = _mode }, \
14 .show = _show, \
15 .store = _store, \
16}
17
18#define BTRFS_ATTR_RW(_name, _mode, _show, _store) \
19static struct kobj_attribute btrfs_attr_##_name = \
20 __INIT_KOBJ_ATTR(_name, _mode, _show, _store)
21#define BTRFS_ATTR(_name, _mode, _show) \
22 BTRFS_ATTR_RW(_name, _mode, _show, NULL)
23#define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr)
24
25#define BTRFS_RAID_ATTR(_name, _show) \
26static struct kobj_attribute btrfs_raid_attr_##_name = \
27 __INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
28#define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr)
29
30
31struct btrfs_feature_attr {
32 struct kobj_attribute kobj_attr;
33 enum btrfs_feature_set feature_set;
34 u64 feature_bit;
35};
36
37#define BTRFS_FEAT_ATTR(_name, _feature_set, _prefix, _feature_bit) \
38static struct btrfs_feature_attr btrfs_attr_##_name = { \
39 .kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO, \
40 btrfs_feature_attr_show, \
41 btrfs_feature_attr_store), \
42 .feature_set = _feature_set, \
43 .feature_bit = _prefix ##_## _feature_bit, \
44}
45#define BTRFS_FEAT_ATTR_PTR(_name) (&btrfs_attr_##_name.kobj_attr.attr)
46
47#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
48 BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
49#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
50 BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
51#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
52 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
53
54/* convert from attribute */
55#define to_btrfs_feature_attr(a) \
56 container_of(a, struct btrfs_feature_attr, kobj_attr)
57#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr)
58#define attr_to_btrfs_feature_attr(a) \
59 to_btrfs_feature_attr(attr_to_btrfs_attr(a))
60char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
61extern const char * const btrfs_feature_set_names[3];
62extern struct kobj_type space_info_ktype;
63extern struct kobj_type btrfs_raid_ktype;
64#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index b353bc806ca0..312560a9123d 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -21,7 +21,7 @@
21 21
22#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 22#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
23 23
24#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__) 24#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
25 25
26int btrfs_test_free_space_cache(void); 26int btrfs_test_free_space_cache(void);
27int btrfs_test_extent_buffer_operations(void); 27int btrfs_test_extent_buffer_operations(void);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c6a872a8a468..34cd83184c4a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -62,7 +62,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
62 WARN_ON(atomic_read(&transaction->use_count) == 0); 62 WARN_ON(atomic_read(&transaction->use_count) == 0);
63 if (atomic_dec_and_test(&transaction->use_count)) { 63 if (atomic_dec_and_test(&transaction->use_count)) {
64 BUG_ON(!list_empty(&transaction->list)); 64 BUG_ON(!list_empty(&transaction->list));
65 WARN_ON(transaction->delayed_refs.root.rb_node); 65 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
66 while (!list_empty(&transaction->pending_chunks)) { 66 while (!list_empty(&transaction->pending_chunks)) {
67 struct extent_map *em; 67 struct extent_map *em;
68 68
@@ -183,8 +183,8 @@ loop:
183 atomic_set(&cur_trans->use_count, 2); 183 atomic_set(&cur_trans->use_count, 2);
184 cur_trans->start_time = get_seconds(); 184 cur_trans->start_time = get_seconds();
185 185
186 cur_trans->delayed_refs.root = RB_ROOT; 186 cur_trans->delayed_refs.href_root = RB_ROOT;
187 cur_trans->delayed_refs.num_entries = 0; 187 atomic_set(&cur_trans->delayed_refs.num_entries, 0);
188 cur_trans->delayed_refs.num_heads_ready = 0; 188 cur_trans->delayed_refs.num_heads_ready = 0;
189 cur_trans->delayed_refs.num_heads = 0; 189 cur_trans->delayed_refs.num_heads = 0;
190 cur_trans->delayed_refs.flushing = 0; 190 cur_trans->delayed_refs.flushing = 0;
@@ -196,17 +196,14 @@ loop:
196 */ 196 */
197 smp_mb(); 197 smp_mb();
198 if (!list_empty(&fs_info->tree_mod_seq_list)) 198 if (!list_empty(&fs_info->tree_mod_seq_list))
199 WARN(1, KERN_ERR "btrfs: tree_mod_seq_list not empty when " 199 WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when "
200 "creating a fresh transaction\n"); 200 "creating a fresh transaction\n");
201 if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) 201 if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
202 WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " 202 WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when "
203 "creating a fresh transaction\n"); 203 "creating a fresh transaction\n");
204 atomic64_set(&fs_info->tree_mod_seq, 0); 204 atomic64_set(&fs_info->tree_mod_seq, 0);
205 205
206 spin_lock_init(&cur_trans->delayed_refs.lock); 206 spin_lock_init(&cur_trans->delayed_refs.lock);
207 atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0);
208 atomic_set(&cur_trans->delayed_refs.ref_seq, 0);
209 init_waitqueue_head(&cur_trans->delayed_refs.wait);
210 207
211 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 208 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
212 INIT_LIST_HEAD(&cur_trans->ordered_operations); 209 INIT_LIST_HEAD(&cur_trans->ordered_operations);
@@ -472,6 +469,7 @@ again:
472 h->type = type; 469 h->type = type;
473 h->allocating_chunk = false; 470 h->allocating_chunk = false;
474 h->reloc_reserved = false; 471 h->reloc_reserved = false;
472 h->sync = false;
475 INIT_LIST_HEAD(&h->qgroup_ref_list); 473 INIT_LIST_HEAD(&h->qgroup_ref_list);
476 INIT_LIST_HEAD(&h->new_bgs); 474 INIT_LIST_HEAD(&h->new_bgs);
477 475
@@ -647,7 +645,7 @@ static int should_end_transaction(struct btrfs_trans_handle *trans,
647 struct btrfs_root *root) 645 struct btrfs_root *root)
648{ 646{
649 if (root->fs_info->global_block_rsv.space_info->full && 647 if (root->fs_info->global_block_rsv.space_info->full &&
650 btrfs_should_throttle_delayed_refs(trans, root)) 648 btrfs_check_space_for_delayed_refs(trans, root))
651 return 1; 649 return 1;
652 650
653 return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); 651 return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
@@ -711,8 +709,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
711 btrfs_create_pending_block_groups(trans, root); 709 btrfs_create_pending_block_groups(trans, root);
712 710
713 trans->delayed_ref_updates = 0; 711 trans->delayed_ref_updates = 0;
714 if (btrfs_should_throttle_delayed_refs(trans, root)) { 712 if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) {
715 cur = max_t(unsigned long, cur, 1); 713 cur = max_t(unsigned long, cur, 32);
716 trans->delayed_ref_updates = 0; 714 trans->delayed_ref_updates = 0;
717 btrfs_run_delayed_refs(trans, root, cur); 715 btrfs_run_delayed_refs(trans, root, cur);
718 } 716 }
@@ -788,12 +786,6 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
788 return __btrfs_end_transaction(trans, root, 1); 786 return __btrfs_end_transaction(trans, root, 1);
789} 787}
790 788
791int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
792 struct btrfs_root *root)
793{
794 return __btrfs_end_transaction(trans, root, 1);
795}
796
797/* 789/*
798 * when btree blocks are allocated, they have some corresponding bits set for 790 * when btree blocks are allocated, they have some corresponding bits set for
799 * them in one of two extent_io trees. This is used to make sure all of 791 * them in one of two extent_io trees. This is used to make sure all of
@@ -1105,7 +1097,7 @@ int btrfs_defrag_root(struct btrfs_root *root)
1105 break; 1097 break;
1106 1098
1107 if (btrfs_defrag_cancelled(root->fs_info)) { 1099 if (btrfs_defrag_cancelled(root->fs_info)) {
1108 printk(KERN_DEBUG "btrfs: defrag_root cancelled\n"); 1100 pr_debug("BTRFS: defrag_root cancelled\n");
1109 ret = -EAGAIN; 1101 ret = -EAGAIN;
1110 break; 1102 break;
1111 } 1103 }
@@ -1746,6 +1738,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1746 goto cleanup_transaction; 1738 goto cleanup_transaction;
1747 1739
1748 btrfs_wait_delalloc_flush(root->fs_info); 1740 btrfs_wait_delalloc_flush(root->fs_info);
1741
1742 btrfs_scrub_pause(root);
1749 /* 1743 /*
1750 * Ok now we need to make sure to block out any other joins while we 1744 * Ok now we need to make sure to block out any other joins while we
1751 * commit the transaction. We could have started a join before setting 1745 * commit the transaction. We could have started a join before setting
@@ -1810,7 +1804,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1810 1804
1811 WARN_ON(cur_trans != trans->transaction); 1805 WARN_ON(cur_trans != trans->transaction);
1812 1806
1813 btrfs_scrub_pause(root);
1814 /* btrfs_commit_tree_roots is responsible for getting the 1807 /* btrfs_commit_tree_roots is responsible for getting the
1815 * various roots consistent with each other. Every pointer 1808 * various roots consistent with each other. Every pointer
1816 * in the tree of tree roots has to point to the most up to date 1809 * in the tree of tree roots has to point to the most up to date
@@ -1833,6 +1826,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1833 goto cleanup_transaction; 1826 goto cleanup_transaction;
1834 } 1827 }
1835 1828
1829 /*
1830 * Since the transaction is done, we should set the inode map cache flag
1831 * before any other comming transaction.
1832 */
1833 if (btrfs_test_opt(root, CHANGE_INODE_CACHE))
1834 btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1835 else
1836 btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1837
1836 /* commit_fs_roots gets rid of all the tree log roots, it is now 1838 /* commit_fs_roots gets rid of all the tree log roots, it is now
1837 * safe to free the root of tree log roots 1839 * safe to free the root of tree log roots
1838 */ 1840 */
@@ -1975,10 +1977,23 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1975 } 1977 }
1976 root = list_first_entry(&fs_info->dead_roots, 1978 root = list_first_entry(&fs_info->dead_roots,
1977 struct btrfs_root, root_list); 1979 struct btrfs_root, root_list);
1980 /*
1981 * Make sure root is not involved in send,
1982 * if we fail with first root, we return
1983 * directly rather than continue.
1984 */
1985 spin_lock(&root->root_item_lock);
1986 if (root->send_in_progress) {
1987 spin_unlock(&fs_info->trans_lock);
1988 spin_unlock(&root->root_item_lock);
1989 return 0;
1990 }
1991 spin_unlock(&root->root_item_lock);
1992
1978 list_del_init(&root->root_list); 1993 list_del_init(&root->root_list);
1979 spin_unlock(&fs_info->trans_lock); 1994 spin_unlock(&fs_info->trans_lock);
1980 1995
1981 pr_debug("btrfs: cleaner removing %llu\n", root->objectid); 1996 pr_debug("BTRFS: cleaner removing %llu\n", root->objectid);
1982 1997
1983 btrfs_kill_all_delayed_nodes(root); 1998 btrfs_kill_all_delayed_nodes(root);
1984 1999
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7657d115067d..6ac037e9f9f0 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -93,6 +93,7 @@ struct btrfs_trans_handle {
93 short adding_csums; 93 short adding_csums;
94 bool allocating_chunk; 94 bool allocating_chunk;
95 bool reloc_reserved; 95 bool reloc_reserved;
96 bool sync;
96 unsigned int type; 97 unsigned int type;
97 /* 98 /*
98 * this root is only needed to validate that the root passed to 99 * this root is only needed to validate that the root passed to
@@ -154,8 +155,6 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
154 int wait_for_unblock); 155 int wait_for_unblock);
155int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 156int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
156 struct btrfs_root *root); 157 struct btrfs_root *root);
157int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
158 struct btrfs_root *root);
159int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 158int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
160 struct btrfs_root *root); 159 struct btrfs_root *root);
161void btrfs_throttle(struct btrfs_root *root); 160void btrfs_throttle(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9f7fc51ca334..39d83da03e03 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -570,7 +570,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
570 if (btrfs_file_extent_disk_bytenr(eb, item) == 0) 570 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
571 nbytes = 0; 571 nbytes = 0;
572 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 572 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
573 size = btrfs_file_extent_inline_len(eb, item); 573 size = btrfs_file_extent_inline_len(eb, slot, item);
574 nbytes = btrfs_file_extent_ram_bytes(eb, item); 574 nbytes = btrfs_file_extent_ram_bytes(eb, item);
575 extent_end = ALIGN(start + size, root->sectorsize); 575 extent_end = ALIGN(start + size, root->sectorsize);
576 } else { 576 } else {
@@ -1238,7 +1238,8 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans,
1238 struct btrfs_root *root, u64 offset) 1238 struct btrfs_root *root, u64 offset)
1239{ 1239{
1240 int ret; 1240 int ret;
1241 ret = btrfs_find_orphan_item(root, offset); 1241 ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
1242 offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
1242 if (ret > 0) 1243 if (ret > 0)
1243 ret = btrfs_insert_orphan_item(trans, root, offset); 1244 ret = btrfs_insert_orphan_item(trans, root, offset);
1244 return ret; 1245 return ret;
@@ -3194,7 +3195,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
3194static noinline int copy_items(struct btrfs_trans_handle *trans, 3195static noinline int copy_items(struct btrfs_trans_handle *trans,
3195 struct inode *inode, 3196 struct inode *inode,
3196 struct btrfs_path *dst_path, 3197 struct btrfs_path *dst_path,
3197 struct extent_buffer *src, 3198 struct btrfs_path *src_path, u64 *last_extent,
3198 int start_slot, int nr, int inode_only) 3199 int start_slot, int nr, int inode_only)
3199{ 3200{
3200 unsigned long src_offset; 3201 unsigned long src_offset;
@@ -3202,6 +3203,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3202 struct btrfs_root *log = BTRFS_I(inode)->root->log_root; 3203 struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
3203 struct btrfs_file_extent_item *extent; 3204 struct btrfs_file_extent_item *extent;
3204 struct btrfs_inode_item *inode_item; 3205 struct btrfs_inode_item *inode_item;
3206 struct extent_buffer *src = src_path->nodes[0];
3207 struct btrfs_key first_key, last_key, key;
3205 int ret; 3208 int ret;
3206 struct btrfs_key *ins_keys; 3209 struct btrfs_key *ins_keys;
3207 u32 *ins_sizes; 3210 u32 *ins_sizes;
@@ -3209,6 +3212,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3209 int i; 3212 int i;
3210 struct list_head ordered_sums; 3213 struct list_head ordered_sums;
3211 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3214 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3215 bool has_extents = false;
3216 bool need_find_last_extent = (*last_extent == 0);
3217 bool done = false;
3212 3218
3213 INIT_LIST_HEAD(&ordered_sums); 3219 INIT_LIST_HEAD(&ordered_sums);
3214 3220
@@ -3217,6 +3223,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3217 if (!ins_data) 3223 if (!ins_data)
3218 return -ENOMEM; 3224 return -ENOMEM;
3219 3225
3226 first_key.objectid = (u64)-1;
3227
3220 ins_sizes = (u32 *)ins_data; 3228 ins_sizes = (u32 *)ins_data;
3221 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); 3229 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
3222 3230
@@ -3237,6 +3245,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3237 3245
3238 src_offset = btrfs_item_ptr_offset(src, start_slot + i); 3246 src_offset = btrfs_item_ptr_offset(src, start_slot + i);
3239 3247
3248 if ((i == (nr - 1)))
3249 last_key = ins_keys[i];
3250
3240 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { 3251 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
3241 inode_item = btrfs_item_ptr(dst_path->nodes[0], 3252 inode_item = btrfs_item_ptr(dst_path->nodes[0],
3242 dst_path->slots[0], 3253 dst_path->slots[0],
@@ -3248,6 +3259,21 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3248 src_offset, ins_sizes[i]); 3259 src_offset, ins_sizes[i]);
3249 } 3260 }
3250 3261
3262 /*
3263 * We set need_find_last_extent here in case we know we were
3264 * processing other items and then walk into the first extent in
3265 * the inode. If we don't hit an extent then nothing changes,
3266 * we'll do the last search the next time around.
3267 */
3268 if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
3269 has_extents = true;
3270 if (need_find_last_extent &&
3271 first_key.objectid == (u64)-1)
3272 first_key = ins_keys[i];
3273 } else {
3274 need_find_last_extent = false;
3275 }
3276
3251 /* take a reference on file data extents so that truncates 3277 /* take a reference on file data extents so that truncates
3252 * or deletes of this inode don't have to relog the inode 3278 * or deletes of this inode don't have to relog the inode
3253 * again 3279 * again
@@ -3312,6 +3338,128 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3312 list_del(&sums->list); 3338 list_del(&sums->list);
3313 kfree(sums); 3339 kfree(sums);
3314 } 3340 }
3341
3342 if (!has_extents)
3343 return ret;
3344
3345 /*
3346 * Because we use btrfs_search_forward we could skip leaves that were
3347 * not modified and then assume *last_extent is valid when it really
3348 * isn't. So back up to the previous leaf and read the end of the last
3349 * extent before we go and fill in holes.
3350 */
3351 if (need_find_last_extent) {
3352 u64 len;
3353
3354 ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
3355 if (ret < 0)
3356 return ret;
3357 if (ret)
3358 goto fill_holes;
3359 if (src_path->slots[0])
3360 src_path->slots[0]--;
3361 src = src_path->nodes[0];
3362 btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
3363 if (key.objectid != btrfs_ino(inode) ||
3364 key.type != BTRFS_EXTENT_DATA_KEY)
3365 goto fill_holes;
3366 extent = btrfs_item_ptr(src, src_path->slots[0],
3367 struct btrfs_file_extent_item);
3368 if (btrfs_file_extent_type(src, extent) ==
3369 BTRFS_FILE_EXTENT_INLINE) {
3370 len = btrfs_file_extent_inline_len(src,
3371 src_path->slots[0],
3372 extent);
3373 *last_extent = ALIGN(key.offset + len,
3374 log->sectorsize);
3375 } else {
3376 len = btrfs_file_extent_num_bytes(src, extent);
3377 *last_extent = key.offset + len;
3378 }
3379 }
3380fill_holes:
3381 /* So we did prev_leaf, now we need to move to the next leaf, but a few
3382 * things could have happened
3383 *
3384 * 1) A merge could have happened, so we could currently be on a leaf
3385 * that holds what we were copying in the first place.
3386 * 2) A split could have happened, and now not all of the items we want
3387 * are on the same leaf.
3388 *
3389 * So we need to adjust how we search for holes, we need to drop the
3390 * path and re-search for the first extent key we found, and then walk
3391 * forward until we hit the last one we copied.
3392 */
3393 if (need_find_last_extent) {
3394 /* btrfs_prev_leaf could return 1 without releasing the path */
3395 btrfs_release_path(src_path);
3396 ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key,
3397 src_path, 0, 0);
3398 if (ret < 0)
3399 return ret;
3400 ASSERT(ret == 0);
3401 src = src_path->nodes[0];
3402 i = src_path->slots[0];
3403 } else {
3404 i = start_slot;
3405 }
3406
3407 /*
3408 * Ok so here we need to go through and fill in any holes we may have
3409 * to make sure that holes are punched for those areas in case they had
3410 * extents previously.
3411 */
3412 while (!done) {
3413 u64 offset, len;
3414 u64 extent_end;
3415
3416 if (i >= btrfs_header_nritems(src_path->nodes[0])) {
3417 ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
3418 if (ret < 0)
3419 return ret;
3420 ASSERT(ret == 0);
3421 src = src_path->nodes[0];
3422 i = 0;
3423 }
3424
3425 btrfs_item_key_to_cpu(src, &key, i);
3426 if (!btrfs_comp_cpu_keys(&key, &last_key))
3427 done = true;
3428 if (key.objectid != btrfs_ino(inode) ||
3429 key.type != BTRFS_EXTENT_DATA_KEY) {
3430 i++;
3431 continue;
3432 }
3433 extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
3434 if (btrfs_file_extent_type(src, extent) ==
3435 BTRFS_FILE_EXTENT_INLINE) {
3436 len = btrfs_file_extent_inline_len(src, i, extent);
3437 extent_end = ALIGN(key.offset + len, log->sectorsize);
3438 } else {
3439 len = btrfs_file_extent_num_bytes(src, extent);
3440 extent_end = key.offset + len;
3441 }
3442 i++;
3443
3444 if (*last_extent == key.offset) {
3445 *last_extent = extent_end;
3446 continue;
3447 }
3448 offset = *last_extent;
3449 len = key.offset - *last_extent;
3450 ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
3451 offset, 0, 0, len, 0, len, 0,
3452 0, 0);
3453 if (ret)
3454 break;
3455 *last_extent = offset + len;
3456 }
3457 /*
3458 * Need to let the callers know we dropped the path so they should
3459 * re-search.
3460 */
3461 if (!ret && need_find_last_extent)
3462 ret = 1;
3315 return ret; 3463 return ret;
3316} 3464}
3317 3465
@@ -3349,21 +3497,27 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3349 int ret; 3497 int ret;
3350 int index = log->log_transid % 2; 3498 int index = log->log_transid % 2;
3351 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3499 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3352 3500 int extent_inserted = 0;
3353 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3354 em->start + em->len, NULL, 0);
3355 if (ret)
3356 return ret;
3357 3501
3358 INIT_LIST_HEAD(&ordered_sums); 3502 INIT_LIST_HEAD(&ordered_sums);
3359 btrfs_init_map_token(&token); 3503 btrfs_init_map_token(&token);
3360 key.objectid = btrfs_ino(inode);
3361 key.type = BTRFS_EXTENT_DATA_KEY;
3362 key.offset = em->start;
3363 3504
3364 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi)); 3505 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3506 em->start + em->len, NULL, 0, 1,
3507 sizeof(*fi), &extent_inserted);
3365 if (ret) 3508 if (ret)
3366 return ret; 3509 return ret;
3510
3511 if (!extent_inserted) {
3512 key.objectid = btrfs_ino(inode);
3513 key.type = BTRFS_EXTENT_DATA_KEY;
3514 key.offset = em->start;
3515
3516 ret = btrfs_insert_empty_item(trans, log, path, &key,
3517 sizeof(*fi));
3518 if (ret)
3519 return ret;
3520 }
3367 leaf = path->nodes[0]; 3521 leaf = path->nodes[0];
3368 fi = btrfs_item_ptr(leaf, path->slots[0], 3522 fi = btrfs_item_ptr(leaf, path->slots[0],
3369 struct btrfs_file_extent_item); 3523 struct btrfs_file_extent_item);
@@ -3485,7 +3639,11 @@ again:
3485 * start over after this. 3639 * start over after this.
3486 */ 3640 */
3487 3641
3488 wait_event(ordered->wait, ordered->csum_bytes_left == 0); 3642 if (ordered->csum_bytes_left) {
3643 btrfs_start_ordered_extent(inode, ordered, 0);
3644 wait_event(ordered->wait,
3645 ordered->csum_bytes_left == 0);
3646 }
3489 3647
3490 list_for_each_entry(sum, &ordered->list, list) { 3648 list_for_each_entry(sum, &ordered->list, list) {
3491 ret = btrfs_csum_file_blocks(trans, log, sum); 3649 ret = btrfs_csum_file_blocks(trans, log, sum);
@@ -3630,6 +3788,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3630 struct btrfs_key max_key; 3788 struct btrfs_key max_key;
3631 struct btrfs_root *log = root->log_root; 3789 struct btrfs_root *log = root->log_root;
3632 struct extent_buffer *src = NULL; 3790 struct extent_buffer *src = NULL;
3791 u64 last_extent = 0;
3633 int err = 0; 3792 int err = 0;
3634 int ret; 3793 int ret;
3635 int nritems; 3794 int nritems;
@@ -3745,11 +3904,15 @@ again:
3745 goto next_slot; 3904 goto next_slot;
3746 } 3905 }
3747 3906
3748 ret = copy_items(trans, inode, dst_path, src, ins_start_slot, 3907 ret = copy_items(trans, inode, dst_path, path, &last_extent,
3749 ins_nr, inode_only); 3908 ins_start_slot, ins_nr, inode_only);
3750 if (ret) { 3909 if (ret < 0) {
3751 err = ret; 3910 err = ret;
3752 goto out_unlock; 3911 goto out_unlock;
3912 } if (ret) {
3913 ins_nr = 0;
3914 btrfs_release_path(path);
3915 continue;
3753 } 3916 }
3754 ins_nr = 1; 3917 ins_nr = 1;
3755 ins_start_slot = path->slots[0]; 3918 ins_start_slot = path->slots[0];
@@ -3763,13 +3926,14 @@ next_slot:
3763 goto again; 3926 goto again;
3764 } 3927 }
3765 if (ins_nr) { 3928 if (ins_nr) {
3766 ret = copy_items(trans, inode, dst_path, src, 3929 ret = copy_items(trans, inode, dst_path, path,
3767 ins_start_slot, 3930 &last_extent, ins_start_slot,
3768 ins_nr, inode_only); 3931 ins_nr, inode_only);
3769 if (ret) { 3932 if (ret < 0) {
3770 err = ret; 3933 err = ret;
3771 goto out_unlock; 3934 goto out_unlock;
3772 } 3935 }
3936 ret = 0;
3773 ins_nr = 0; 3937 ins_nr = 0;
3774 } 3938 }
3775 btrfs_release_path(path); 3939 btrfs_release_path(path);
@@ -3784,12 +3948,13 @@ next_slot:
3784 } 3948 }
3785 } 3949 }
3786 if (ins_nr) { 3950 if (ins_nr) {
3787 ret = copy_items(trans, inode, dst_path, src, ins_start_slot, 3951 ret = copy_items(trans, inode, dst_path, path, &last_extent,
3788 ins_nr, inode_only); 3952 ins_start_slot, ins_nr, inode_only);
3789 if (ret) { 3953 if (ret < 0) {
3790 err = ret; 3954 err = ret;
3791 goto out_unlock; 3955 goto out_unlock;
3792 } 3956 }
3957 ret = 0;
3793 ins_nr = 0; 3958 ins_nr = 0;
3794 } 3959 }
3795 3960
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index b0a523b2c60e..840a38b2778a 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -5,8 +5,8 @@
5 */ 5 */
6 6
7#include <linux/slab.h> 7#include <linux/slab.h>
8#include <linux/export.h>
9#include "ulist.h" 8#include "ulist.h"
9#include "ctree.h"
10 10
11/* 11/*
12 * ulist is a generic data structure to hold a collection of unique u64 12 * ulist is a generic data structure to hold a collection of unique u64
@@ -14,10 +14,6 @@
14 * enumerating it. 14 * enumerating it.
15 * It is possible to store an auxiliary value along with the key. 15 * It is possible to store an auxiliary value along with the key.
16 * 16 *
17 * The implementation is preliminary and can probably be sped up
18 * significantly. A first step would be to store the values in an rbtree
19 * as soon as ULIST_SIZE is exceeded.
20 *
21 * A sample usage for ulists is the enumeration of directed graphs without 17 * A sample usage for ulists is the enumeration of directed graphs without
22 * visiting a node twice. The pseudo-code could look like this: 18 * visiting a node twice. The pseudo-code could look like this:
23 * 19 *
@@ -50,12 +46,10 @@
50 */ 46 */
51void ulist_init(struct ulist *ulist) 47void ulist_init(struct ulist *ulist)
52{ 48{
53 ulist->nnodes = 0; 49 INIT_LIST_HEAD(&ulist->nodes);
54 ulist->nodes = ulist->int_nodes;
55 ulist->nodes_alloced = ULIST_SIZE;
56 ulist->root = RB_ROOT; 50 ulist->root = RB_ROOT;
51 ulist->nnodes = 0;
57} 52}
58EXPORT_SYMBOL(ulist_init);
59 53
60/** 54/**
61 * ulist_fini - free up additionally allocated memory for the ulist 55 * ulist_fini - free up additionally allocated memory for the ulist
@@ -64,18 +58,17 @@ EXPORT_SYMBOL(ulist_init);
64 * This is useful in cases where the base 'struct ulist' has been statically 58 * This is useful in cases where the base 'struct ulist' has been statically
65 * allocated. 59 * allocated.
66 */ 60 */
67void ulist_fini(struct ulist *ulist) 61static void ulist_fini(struct ulist *ulist)
68{ 62{
69 /* 63 struct ulist_node *node;
70 * The first ULIST_SIZE elements are stored inline in struct ulist. 64 struct ulist_node *next;
71 * Only if more elements are alocated they need to be freed. 65
72 */ 66 list_for_each_entry_safe(node, next, &ulist->nodes, list) {
73 if (ulist->nodes_alloced > ULIST_SIZE) 67 kfree(node);
74 kfree(ulist->nodes); 68 }
75 ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */
76 ulist->root = RB_ROOT; 69 ulist->root = RB_ROOT;
70 INIT_LIST_HEAD(&ulist->nodes);
77} 71}
78EXPORT_SYMBOL(ulist_fini);
79 72
80/** 73/**
81 * ulist_reinit - prepare a ulist for reuse 74 * ulist_reinit - prepare a ulist for reuse
@@ -89,7 +82,6 @@ void ulist_reinit(struct ulist *ulist)
89 ulist_fini(ulist); 82 ulist_fini(ulist);
90 ulist_init(ulist); 83 ulist_init(ulist);
91} 84}
92EXPORT_SYMBOL(ulist_reinit);
93 85
94/** 86/**
95 * ulist_alloc - dynamically allocate a ulist 87 * ulist_alloc - dynamically allocate a ulist
@@ -108,7 +100,6 @@ struct ulist *ulist_alloc(gfp_t gfp_mask)
108 100
109 return ulist; 101 return ulist;
110} 102}
111EXPORT_SYMBOL(ulist_alloc);
112 103
113/** 104/**
114 * ulist_free - free dynamically allocated ulist 105 * ulist_free - free dynamically allocated ulist
@@ -123,7 +114,6 @@ void ulist_free(struct ulist *ulist)
123 ulist_fini(ulist); 114 ulist_fini(ulist);
124 kfree(ulist); 115 kfree(ulist);
125} 116}
126EXPORT_SYMBOL(ulist_free);
127 117
128static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val) 118static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
129{ 119{
@@ -192,63 +182,32 @@ int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask)
192int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, 182int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
193 u64 *old_aux, gfp_t gfp_mask) 183 u64 *old_aux, gfp_t gfp_mask)
194{ 184{
195 int ret = 0; 185 int ret;
196 struct ulist_node *node = NULL; 186 struct ulist_node *node;
187
197 node = ulist_rbtree_search(ulist, val); 188 node = ulist_rbtree_search(ulist, val);
198 if (node) { 189 if (node) {
199 if (old_aux) 190 if (old_aux)
200 *old_aux = node->aux; 191 *old_aux = node->aux;
201 return 0; 192 return 0;
202 } 193 }
194 node = kmalloc(sizeof(*node), gfp_mask);
195 if (!node)
196 return -ENOMEM;
203 197
204 if (ulist->nnodes >= ulist->nodes_alloced) { 198 node->val = val;
205 u64 new_alloced = ulist->nodes_alloced + 128; 199 node->aux = aux;
206 struct ulist_node *new_nodes; 200#ifdef CONFIG_BTRFS_DEBUG
207 void *old = NULL; 201 node->seqnum = ulist->nnodes;
208 int i; 202#endif
209
210 for (i = 0; i < ulist->nnodes; i++)
211 rb_erase(&ulist->nodes[i].rb_node, &ulist->root);
212
213 /*
214 * if nodes_alloced == ULIST_SIZE no memory has been allocated
215 * yet, so pass NULL to krealloc
216 */
217 if (ulist->nodes_alloced > ULIST_SIZE)
218 old = ulist->nodes;
219 203
220 new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced, 204 ret = ulist_rbtree_insert(ulist, node);
221 gfp_mask); 205 ASSERT(!ret);
222 if (!new_nodes) 206 list_add_tail(&node->list, &ulist->nodes);
223 return -ENOMEM; 207 ulist->nnodes++;
224
225 if (!old)
226 memcpy(new_nodes, ulist->int_nodes,
227 sizeof(ulist->int_nodes));
228
229 ulist->nodes = new_nodes;
230 ulist->nodes_alloced = new_alloced;
231
232 /*
233 * krealloc actually uses memcpy, which does not copy rb_node
234 * pointers, so we have to do it ourselves. Otherwise we may
235 * be bitten by crashes.
236 */
237 for (i = 0; i < ulist->nnodes; i++) {
238 ret = ulist_rbtree_insert(ulist, &ulist->nodes[i]);
239 if (ret < 0)
240 return ret;
241 }
242 }
243 ulist->nodes[ulist->nnodes].val = val;
244 ulist->nodes[ulist->nnodes].aux = aux;
245 ret = ulist_rbtree_insert(ulist, &ulist->nodes[ulist->nnodes]);
246 BUG_ON(ret);
247 ++ulist->nnodes;
248 208
249 return 1; 209 return 1;
250} 210}
251EXPORT_SYMBOL(ulist_add);
252 211
253/** 212/**
254 * ulist_next - iterate ulist 213 * ulist_next - iterate ulist
@@ -268,11 +227,25 @@ EXPORT_SYMBOL(ulist_add);
268 */ 227 */
269struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter) 228struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter)
270{ 229{
271 if (ulist->nnodes == 0) 230 struct ulist_node *node;
231
232 if (list_empty(&ulist->nodes))
272 return NULL; 233 return NULL;
273 if (uiter->i < 0 || uiter->i >= ulist->nnodes) 234 if (uiter->cur_list && uiter->cur_list->next == &ulist->nodes)
274 return NULL; 235 return NULL;
275 236 if (uiter->cur_list) {
276 return &ulist->nodes[uiter->i++]; 237 uiter->cur_list = uiter->cur_list->next;
238 } else {
239 uiter->cur_list = ulist->nodes.next;
240#ifdef CONFIG_BTRFS_DEBUG
241 uiter->i = 0;
242#endif
243 }
244 node = list_entry(uiter->cur_list, struct ulist_node, list);
245#ifdef CONFIG_BTRFS_DEBUG
246 ASSERT(node->seqnum == uiter->i);
247 ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes);
248 uiter->i++;
249#endif
250 return node;
277} 251}
278EXPORT_SYMBOL(ulist_next);
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index fb36731074b5..7f78cbf5cf41 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -17,18 +17,12 @@
17 * enumerating it. 17 * enumerating it.
18 * It is possible to store an auxiliary value along with the key. 18 * It is possible to store an auxiliary value along with the key.
19 * 19 *
20 * The implementation is preliminary and can probably be sped up
21 * significantly. A first step would be to store the values in an rbtree
22 * as soon as ULIST_SIZE is exceeded.
23 */ 20 */
24
25/*
26 * number of elements statically allocated inside struct ulist
27 */
28#define ULIST_SIZE 16
29
30struct ulist_iterator { 21struct ulist_iterator {
22#ifdef CONFIG_BTRFS_DEBUG
31 int i; 23 int i;
24#endif
25 struct list_head *cur_list; /* hint to start search */
32}; 26};
33 27
34/* 28/*
@@ -37,6 +31,12 @@ struct ulist_iterator {
37struct ulist_node { 31struct ulist_node {
38 u64 val; /* value to store */ 32 u64 val; /* value to store */
39 u64 aux; /* auxiliary value saved along with the val */ 33 u64 aux; /* auxiliary value saved along with the val */
34
35#ifdef CONFIG_BTRFS_DEBUG
36 int seqnum; /* sequence number this node is added */
37#endif
38
39 struct list_head list; /* used to link node */
40 struct rb_node rb_node; /* used to speed up search */ 40 struct rb_node rb_node; /* used to speed up search */
41}; 41};
42 42
@@ -46,28 +46,11 @@ struct ulist {
46 */ 46 */
47 unsigned long nnodes; 47 unsigned long nnodes;
48 48
49 /* 49 struct list_head nodes;
50 * number of nodes we already have room for
51 */
52 unsigned long nodes_alloced;
53
54 /*
55 * pointer to the array storing the elements. The first ULIST_SIZE
56 * elements are stored inline. In this case the it points to int_nodes.
57 * After exceeding ULIST_SIZE, dynamic memory is allocated.
58 */
59 struct ulist_node *nodes;
60
61 struct rb_root root; 50 struct rb_root root;
62
63 /*
64 * inline storage space for the first ULIST_SIZE entries
65 */
66 struct ulist_node int_nodes[ULIST_SIZE];
67}; 51};
68 52
69void ulist_init(struct ulist *ulist); 53void ulist_init(struct ulist *ulist);
70void ulist_fini(struct ulist *ulist);
71void ulist_reinit(struct ulist *ulist); 54void ulist_reinit(struct ulist *ulist);
72struct ulist *ulist_alloc(gfp_t gfp_mask); 55struct ulist *ulist_alloc(gfp_t gfp_mask);
73void ulist_free(struct ulist *ulist); 56void ulist_free(struct ulist *ulist);
@@ -77,6 +60,6 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
77struct ulist_node *ulist_next(struct ulist *ulist, 60struct ulist_node *ulist_next(struct ulist *ulist,
78 struct ulist_iterator *uiter); 61 struct ulist_iterator *uiter);
79 62
80#define ULIST_ITER_INIT(uiter) ((uiter)->i = 0) 63#define ULIST_ITER_INIT(uiter) ((uiter)->cur_list = NULL)
81 64
82#endif 65#endif
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index fbda90004fe9..f6a4c03ee7d8 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -69,7 +69,7 @@ static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
69 ret = -ENOENT; 69 ret = -ENOENT;
70 70
71 if (!IS_ALIGNED(item_size, sizeof(u64))) { 71 if (!IS_ALIGNED(item_size, sizeof(u64))) {
72 pr_warn("btrfs: uuid item with illegal size %lu!\n", 72 btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!",
73 (unsigned long)item_size); 73 (unsigned long)item_size);
74 goto out; 74 goto out;
75 } 75 }
@@ -137,7 +137,8 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
137 offset = btrfs_item_ptr_offset(eb, slot); 137 offset = btrfs_item_ptr_offset(eb, slot);
138 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le); 138 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
139 } else if (ret < 0) { 139 } else if (ret < 0) {
140 pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n", 140 btrfs_warn(uuid_root->fs_info, "insert uuid item failed %d "
141 "(0x%016llx, 0x%016llx) type %u!",
141 ret, (unsigned long long)key.objectid, 142 ret, (unsigned long long)key.objectid,
142 (unsigned long long)key.offset, type); 143 (unsigned long long)key.offset, type);
143 goto out; 144 goto out;
@@ -183,7 +184,7 @@ int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
183 184
184 ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1); 185 ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
185 if (ret < 0) { 186 if (ret < 0) {
186 pr_warn("btrfs: error %d while searching for uuid item!\n", 187 btrfs_warn(uuid_root->fs_info, "error %d while searching for uuid item!",
187 ret); 188 ret);
188 goto out; 189 goto out;
189 } 190 }
@@ -197,7 +198,7 @@ int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
197 offset = btrfs_item_ptr_offset(eb, slot); 198 offset = btrfs_item_ptr_offset(eb, slot);
198 item_size = btrfs_item_size_nr(eb, slot); 199 item_size = btrfs_item_size_nr(eb, slot);
199 if (!IS_ALIGNED(item_size, sizeof(u64))) { 200 if (!IS_ALIGNED(item_size, sizeof(u64))) {
200 pr_warn("btrfs: uuid item with illegal size %lu!\n", 201 btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!",
201 (unsigned long)item_size); 202 (unsigned long)item_size);
202 ret = -ENOENT; 203 ret = -ENOENT;
203 goto out; 204 goto out;
@@ -299,7 +300,7 @@ again_search_slot:
299 offset = btrfs_item_ptr_offset(leaf, slot); 300 offset = btrfs_item_ptr_offset(leaf, slot);
300 item_size = btrfs_item_size_nr(leaf, slot); 301 item_size = btrfs_item_size_nr(leaf, slot);
301 if (!IS_ALIGNED(item_size, sizeof(u64))) { 302 if (!IS_ALIGNED(item_size, sizeof(u64))) {
302 pr_warn("btrfs: uuid item with illegal size %lu!\n", 303 btrfs_warn(fs_info, "uuid item with illegal size %lu!",
303 (unsigned long)item_size); 304 (unsigned long)item_size);
304 goto skip; 305 goto skip;
305 } 306 }
@@ -349,6 +350,6 @@ skip:
349out: 350out:
350 btrfs_free_path(path); 351 btrfs_free_path(path);
351 if (ret) 352 if (ret)
352 pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret); 353 btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret);
353 return 0; 354 return 0;
354} 355}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 54d2685a3071..bab0b84d8f80 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,7 +125,7 @@ static void btrfs_kobject_uevent(struct block_device *bdev,
125 125
126 ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action); 126 ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
127 if (ret) 127 if (ret)
128 pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n", 128 pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n",
129 action, 129 action,
130 kobject_name(&disk_to_dev(bdev->bd_disk)->kobj), 130 kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
131 &disk_to_dev(bdev->bd_disk)->kobj); 131 &disk_to_dev(bdev->bd_disk)->kobj);
@@ -200,7 +200,7 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
200 200
201 if (IS_ERR(*bdev)) { 201 if (IS_ERR(*bdev)) {
202 ret = PTR_ERR(*bdev); 202 ret = PTR_ERR(*bdev);
203 printk(KERN_INFO "btrfs: open %s failed\n", device_path); 203 printk(KERN_INFO "BTRFS: open %s failed\n", device_path);
204 goto error; 204 goto error;
205 } 205 }
206 206
@@ -912,9 +912,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
912 if (disk_super->label[0]) { 912 if (disk_super->label[0]) {
913 if (disk_super->label[BTRFS_LABEL_SIZE - 1]) 913 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
914 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; 914 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
915 printk(KERN_INFO "btrfs: device label %s ", disk_super->label); 915 printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
916 } else { 916 } else {
917 printk(KERN_INFO "btrfs: device fsid %pU ", disk_super->fsid); 917 printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
918 } 918 }
919 919
920 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); 920 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
@@ -1813,7 +1813,7 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
1813 } 1813 }
1814 1814
1815 if (!*device) { 1815 if (!*device) {
1816 pr_err("btrfs: no missing device found\n"); 1816 btrfs_err(root->fs_info, "no missing device found");
1817 return -ENOENT; 1817 return -ENOENT;
1818 } 1818 }
1819 1819
@@ -3052,7 +3052,7 @@ loop:
3052error: 3052error:
3053 btrfs_free_path(path); 3053 btrfs_free_path(path);
3054 if (enospc_errors) { 3054 if (enospc_errors) {
3055 printk(KERN_INFO "btrfs: %d enospc errors during balance\n", 3055 btrfs_info(fs_info, "%d enospc errors during balance",
3056 enospc_errors); 3056 enospc_errors);
3057 if (!ret) 3057 if (!ret)
3058 ret = -ENOSPC; 3058 ret = -ENOSPC;
@@ -3138,8 +3138,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3138 if (!(bctl->flags & BTRFS_BALANCE_DATA) || 3138 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
3139 !(bctl->flags & BTRFS_BALANCE_METADATA) || 3139 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
3140 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { 3140 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
3141 printk(KERN_ERR "btrfs: with mixed groups data and " 3141 btrfs_err(fs_info, "with mixed groups data and "
3142 "metadata balance options must be the same\n"); 3142 "metadata balance options must be the same");
3143 ret = -EINVAL; 3143 ret = -EINVAL;
3144 goto out; 3144 goto out;
3145 } 3145 }
@@ -3165,8 +3165,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3165 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3165 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3166 (!alloc_profile_is_valid(bctl->data.target, 1) || 3166 (!alloc_profile_is_valid(bctl->data.target, 1) ||
3167 (bctl->data.target & ~allowed))) { 3167 (bctl->data.target & ~allowed))) {
3168 printk(KERN_ERR "btrfs: unable to start balance with target " 3168 btrfs_err(fs_info, "unable to start balance with target "
3169 "data profile %llu\n", 3169 "data profile %llu",
3170 bctl->data.target); 3170 bctl->data.target);
3171 ret = -EINVAL; 3171 ret = -EINVAL;
3172 goto out; 3172 goto out;
@@ -3174,8 +3174,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3174 if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3174 if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3175 (!alloc_profile_is_valid(bctl->meta.target, 1) || 3175 (!alloc_profile_is_valid(bctl->meta.target, 1) ||
3176 (bctl->meta.target & ~allowed))) { 3176 (bctl->meta.target & ~allowed))) {
3177 printk(KERN_ERR "btrfs: unable to start balance with target " 3177 btrfs_err(fs_info,
3178 "metadata profile %llu\n", 3178 "unable to start balance with target metadata profile %llu",
3179 bctl->meta.target); 3179 bctl->meta.target);
3180 ret = -EINVAL; 3180 ret = -EINVAL;
3181 goto out; 3181 goto out;
@@ -3183,8 +3183,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3183 if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3183 if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3184 (!alloc_profile_is_valid(bctl->sys.target, 1) || 3184 (!alloc_profile_is_valid(bctl->sys.target, 1) ||
3185 (bctl->sys.target & ~allowed))) { 3185 (bctl->sys.target & ~allowed))) {
3186 printk(KERN_ERR "btrfs: unable to start balance with target " 3186 btrfs_err(fs_info,
3187 "system profile %llu\n", 3187 "unable to start balance with target system profile %llu",
3188 bctl->sys.target); 3188 bctl->sys.target);
3189 ret = -EINVAL; 3189 ret = -EINVAL;
3190 goto out; 3190 goto out;
@@ -3193,7 +3193,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3193 /* allow dup'ed data chunks only in mixed mode */ 3193 /* allow dup'ed data chunks only in mixed mode */
3194 if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3194 if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3195 (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) { 3195 (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
3196 printk(KERN_ERR "btrfs: dup for data is not allowed\n"); 3196 btrfs_err(fs_info, "dup for data is not allowed");
3197 ret = -EINVAL; 3197 ret = -EINVAL;
3198 goto out; 3198 goto out;
3199 } 3199 }
@@ -3213,11 +3213,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3213 (fs_info->avail_metadata_alloc_bits & allowed) && 3213 (fs_info->avail_metadata_alloc_bits & allowed) &&
3214 !(bctl->meta.target & allowed))) { 3214 !(bctl->meta.target & allowed))) {
3215 if (bctl->flags & BTRFS_BALANCE_FORCE) { 3215 if (bctl->flags & BTRFS_BALANCE_FORCE) {
3216 printk(KERN_INFO "btrfs: force reducing metadata " 3216 btrfs_info(fs_info, "force reducing metadata integrity");
3217 "integrity\n");
3218 } else { 3217 } else {
3219 printk(KERN_ERR "btrfs: balance will reduce metadata " 3218 btrfs_err(fs_info, "balance will reduce metadata "
3220 "integrity, use force if you want this\n"); 3219 "integrity, use force if you want this");
3221 ret = -EINVAL; 3220 ret = -EINVAL;
3222 goto out; 3221 goto out;
3223 } 3222 }
@@ -3303,7 +3302,7 @@ static int balance_kthread(void *data)
3303 mutex_lock(&fs_info->balance_mutex); 3302 mutex_lock(&fs_info->balance_mutex);
3304 3303
3305 if (fs_info->balance_ctl) { 3304 if (fs_info->balance_ctl) {
3306 printk(KERN_INFO "btrfs: continuing balance\n"); 3305 btrfs_info(fs_info, "continuing balance");
3307 ret = btrfs_balance(fs_info->balance_ctl, NULL); 3306 ret = btrfs_balance(fs_info->balance_ctl, NULL);
3308 } 3307 }
3309 3308
@@ -3325,7 +3324,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
3325 spin_unlock(&fs_info->balance_lock); 3324 spin_unlock(&fs_info->balance_lock);
3326 3325
3327 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { 3326 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
3328 printk(KERN_INFO "btrfs: force skipping balance\n"); 3327 btrfs_info(fs_info, "force skipping balance");
3329 return 0; 3328 return 0;
3330 } 3329 }
3331 3330
@@ -3543,7 +3542,7 @@ update_tree:
3543 BTRFS_UUID_KEY_SUBVOL, 3542 BTRFS_UUID_KEY_SUBVOL,
3544 key.objectid); 3543 key.objectid);
3545 if (ret < 0) { 3544 if (ret < 0) {
3546 pr_warn("btrfs: uuid_tree_add failed %d\n", 3545 btrfs_warn(fs_info, "uuid_tree_add failed %d",
3547 ret); 3546 ret);
3548 break; 3547 break;
3549 } 3548 }
@@ -3555,7 +3554,7 @@ update_tree:
3555 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 3554 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
3556 key.objectid); 3555 key.objectid);
3557 if (ret < 0) { 3556 if (ret < 0) {
3558 pr_warn("btrfs: uuid_tree_add failed %d\n", 3557 btrfs_warn(fs_info, "uuid_tree_add failed %d",
3559 ret); 3558 ret);
3560 break; 3559 break;
3561 } 3560 }
@@ -3590,7 +3589,7 @@ out:
3590 if (trans && !IS_ERR(trans)) 3589 if (trans && !IS_ERR(trans))
3591 btrfs_end_transaction(trans, fs_info->uuid_root); 3590 btrfs_end_transaction(trans, fs_info->uuid_root);
3592 if (ret) 3591 if (ret)
3593 pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); 3592 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
3594 else 3593 else
3595 fs_info->update_uuid_tree_gen = 1; 3594 fs_info->update_uuid_tree_gen = 1;
3596 up(&fs_info->uuid_tree_rescan_sem); 3595 up(&fs_info->uuid_tree_rescan_sem);
@@ -3654,7 +3653,7 @@ static int btrfs_uuid_rescan_kthread(void *data)
3654 */ 3653 */
3655 ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry); 3654 ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
3656 if (ret < 0) { 3655 if (ret < 0) {
3657 pr_warn("btrfs: iterating uuid_tree failed %d\n", ret); 3656 btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret);
3658 up(&fs_info->uuid_tree_rescan_sem); 3657 up(&fs_info->uuid_tree_rescan_sem);
3659 return ret; 3658 return ret;
3660 } 3659 }
@@ -3695,7 +3694,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
3695 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); 3694 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
3696 if (IS_ERR(task)) { 3695 if (IS_ERR(task)) {
3697 /* fs_info->update_uuid_tree_gen remains 0 in all error case */ 3696 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
3698 pr_warn("btrfs: failed to start uuid_scan task\n"); 3697 btrfs_warn(fs_info, "failed to start uuid_scan task");
3699 up(&fs_info->uuid_tree_rescan_sem); 3698 up(&fs_info->uuid_tree_rescan_sem);
3700 return PTR_ERR(task); 3699 return PTR_ERR(task);
3701 } 3700 }
@@ -3711,7 +3710,7 @@ int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
3711 task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); 3710 task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
3712 if (IS_ERR(task)) { 3711 if (IS_ERR(task)) {
3713 /* fs_info->update_uuid_tree_gen remains 0 in all error case */ 3712 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
3714 pr_warn("btrfs: failed to start uuid_rescan task\n"); 3713 btrfs_warn(fs_info, "failed to start uuid_rescan task");
3715 up(&fs_info->uuid_tree_rescan_sem); 3714 up(&fs_info->uuid_tree_rescan_sem);
3716 return PTR_ERR(task); 3715 return PTR_ERR(task);
3717 } 3716 }
@@ -4033,7 +4032,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4033 max_stripe_size = 32 * 1024 * 1024; 4032 max_stripe_size = 32 * 1024 * 1024;
4034 max_chunk_size = 2 * max_stripe_size; 4033 max_chunk_size = 2 * max_stripe_size;
4035 } else { 4034 } else {
4036 printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", 4035 btrfs_err(info, "invalid chunk type 0x%llx requested\n",
4037 type); 4036 type);
4038 BUG_ON(1); 4037 BUG_ON(1);
4039 } 4038 }
@@ -4065,7 +4064,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4065 4064
4066 if (!device->writeable) { 4065 if (!device->writeable) {
4067 WARN(1, KERN_ERR 4066 WARN(1, KERN_ERR
4068 "btrfs: read-only device in alloc_list\n"); 4067 "BTRFS: read-only device in alloc_list\n");
4069 continue; 4068 continue;
4070 } 4069 }
4071 4070
@@ -5193,13 +5192,13 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5193 read_unlock(&em_tree->lock); 5192 read_unlock(&em_tree->lock);
5194 5193
5195 if (!em) { 5194 if (!em) {
5196 printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n", 5195 printk(KERN_ERR "BTRFS: couldn't find em for chunk %Lu\n",
5197 chunk_start); 5196 chunk_start);
5198 return -EIO; 5197 return -EIO;
5199 } 5198 }
5200 5199
5201 if (em->start != chunk_start) { 5200 if (em->start != chunk_start) {
5202 printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n", 5201 printk(KERN_ERR "BTRFS: bad chunk start, em=%Lu, wanted=%Lu\n",
5203 em->start, chunk_start); 5202 em->start, chunk_start);
5204 free_extent_map(em); 5203 free_extent_map(em);
5205 return -EIO; 5204 return -EIO;
@@ -6130,7 +6129,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
6130 BUG_ON(!path); 6129 BUG_ON(!path);
6131 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 6130 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
6132 if (ret < 0) { 6131 if (ret < 0) {
6133 printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", 6132 printk_in_rcu(KERN_WARNING "BTRFS: "
6133 "error %d while searching for dev_stats item for device %s!\n",
6134 ret, rcu_str_deref(device->name)); 6134 ret, rcu_str_deref(device->name));
6135 goto out; 6135 goto out;
6136 } 6136 }
@@ -6140,7 +6140,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
6140 /* need to delete old one and insert a new one */ 6140 /* need to delete old one and insert a new one */
6141 ret = btrfs_del_item(trans, dev_root, path); 6141 ret = btrfs_del_item(trans, dev_root, path);
6142 if (ret != 0) { 6142 if (ret != 0) {
6143 printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", 6143 printk_in_rcu(KERN_WARNING "BTRFS: "
6144 "delete too small dev_stats item for device %s failed %d!\n",
6144 rcu_str_deref(device->name), ret); 6145 rcu_str_deref(device->name), ret);
6145 goto out; 6146 goto out;
6146 } 6147 }
@@ -6153,7 +6154,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
6153 ret = btrfs_insert_empty_item(trans, dev_root, path, 6154 ret = btrfs_insert_empty_item(trans, dev_root, path,
6154 &key, sizeof(*ptr)); 6155 &key, sizeof(*ptr));
6155 if (ret < 0) { 6156 if (ret < 0) {
6156 printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", 6157 printk_in_rcu(KERN_WARNING "BTRFS: "
6158 "insert dev_stats item for device %s failed %d!\n",
6157 rcu_str_deref(device->name), ret); 6159 rcu_str_deref(device->name), ret);
6158 goto out; 6160 goto out;
6159 } 6161 }
@@ -6206,16 +6208,14 @@ static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
6206{ 6208{
6207 if (!dev->dev_stats_valid) 6209 if (!dev->dev_stats_valid)
6208 return; 6210 return;
6209 printk_ratelimited_in_rcu(KERN_ERR 6211 printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
6210 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 6212 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
6211 rcu_str_deref(dev->name), 6213 rcu_str_deref(dev->name),
6212 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 6214 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
6213 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 6215 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
6214 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 6216 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
6215 btrfs_dev_stat_read(dev, 6217 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
6216 BTRFS_DEV_STAT_CORRUPTION_ERRS), 6218 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
6217 btrfs_dev_stat_read(dev,
6218 BTRFS_DEV_STAT_GENERATION_ERRS));
6219} 6219}
6220 6220
6221static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) 6221static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
@@ -6228,7 +6228,8 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
6228 if (i == BTRFS_DEV_STAT_VALUES_MAX) 6228 if (i == BTRFS_DEV_STAT_VALUES_MAX)
6229 return; /* all values == 0, suppress message */ 6229 return; /* all values == 0, suppress message */
6230 6230
6231 printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 6231 printk_in_rcu(KERN_INFO "BTRFS: "
6232 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
6232 rcu_str_deref(dev->name), 6233 rcu_str_deref(dev->name),
6233 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 6234 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
6234 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 6235 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
@@ -6249,12 +6250,10 @@ int btrfs_get_dev_stats(struct btrfs_root *root,
6249 mutex_unlock(&fs_devices->device_list_mutex); 6250 mutex_unlock(&fs_devices->device_list_mutex);
6250 6251
6251 if (!dev) { 6252 if (!dev) {
6252 printk(KERN_WARNING 6253 btrfs_warn(root->fs_info, "get dev_stats failed, device not found");
6253 "btrfs: get dev_stats failed, device not found\n");
6254 return -ENODEV; 6254 return -ENODEV;
6255 } else if (!dev->dev_stats_valid) { 6255 } else if (!dev->dev_stats_valid) {
6256 printk(KERN_WARNING 6256 btrfs_warn(root->fs_info, "get dev_stats failed, not yet valid");
6257 "btrfs: get dev_stats failed, not yet valid\n");
6258 return -ENODEV; 6257 return -ENODEV;
6259 } else if (stats->flags & BTRFS_DEV_STATS_RESET) { 6258 } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
6260 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { 6259 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 3d1c301c9260..ad8328d797ea 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -28,6 +28,7 @@
28#include "transaction.h" 28#include "transaction.h"
29#include "xattr.h" 29#include "xattr.h"
30#include "disk-io.h" 30#include "disk-io.h"
31#include "props.h"
31 32
32 33
33ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 34ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@ -332,7 +333,8 @@ static bool btrfs_is_valid_xattr(const char *name)
332 XATTR_SECURITY_PREFIX_LEN) || 333 XATTR_SECURITY_PREFIX_LEN) ||
333 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || 334 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
334 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 335 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
335 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); 336 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) ||
337 !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN);
336} 338}
337 339
338ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 340ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
@@ -374,6 +376,10 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
374 if (!btrfs_is_valid_xattr(name)) 376 if (!btrfs_is_valid_xattr(name))
375 return -EOPNOTSUPP; 377 return -EOPNOTSUPP;
376 378
379 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
380 return btrfs_set_prop(dentry->d_inode, name,
381 value, size, flags);
382
377 if (size == 0) 383 if (size == 0)
378 value = ""; /* empty EA, do not remove */ 384 value = ""; /* empty EA, do not remove */
379 385
@@ -403,6 +409,10 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
403 if (!btrfs_is_valid_xattr(name)) 409 if (!btrfs_is_valid_xattr(name))
404 return -EOPNOTSUPP; 410 return -EOPNOTSUPP;
405 411
412 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
413 return btrfs_set_prop(dentry->d_inode, name,
414 NULL, 0, XATTR_REPLACE);
415
406 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0, 416 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
407 XATTR_REPLACE); 417 XATTR_REPLACE);
408} 418}
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 9acb846c3e7f..8e57191950cb 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -97,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws,
97 *total_in = 0; 97 *total_in = 0;
98 98
99 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 99 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
100 printk(KERN_WARNING "btrfs: deflateInit failed\n"); 100 printk(KERN_WARNING "BTRFS: deflateInit failed\n");
101 ret = -1; 101 ret = -1;
102 goto out; 102 goto out;
103 } 103 }
@@ -125,7 +125,7 @@ static int zlib_compress_pages(struct list_head *ws,
125 while (workspace->def_strm.total_in < len) { 125 while (workspace->def_strm.total_in < len) {
126 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); 126 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
127 if (ret != Z_OK) { 127 if (ret != Z_OK) {
128 printk(KERN_DEBUG "btrfs: deflate in loop returned %d\n", 128 printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
129 ret); 129 ret);
130 zlib_deflateEnd(&workspace->def_strm); 130 zlib_deflateEnd(&workspace->def_strm);
131 ret = -1; 131 ret = -1;
@@ -252,7 +252,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
252 } 252 }
253 253
254 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 254 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
255 printk(KERN_WARNING "btrfs: inflateInit failed\n"); 255 printk(KERN_WARNING "BTRFS: inflateInit failed\n");
256 return -1; 256 return -1;
257 } 257 }
258 while (workspace->inf_strm.total_in < srclen) { 258 while (workspace->inf_strm.total_in < srclen) {
@@ -336,7 +336,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
336 } 336 }
337 337
338 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 338 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
339 printk(KERN_WARNING "btrfs: inflateInit failed\n"); 339 printk(KERN_WARNING "BTRFS: inflateInit failed\n");
340 return -1; 340 return -1;
341 } 341 }
342 342
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 0616ffe45702..03f3b05e8ec1 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -75,6 +75,17 @@ do { \
75} while (0) 75} while (0)
76 76
77/* 77/*
78 * This is the same regardless of which rwsem implementation that is being used.
79 * It is just a heuristic meant to be called by somebody alreadying holding the
80 * rwsem to see if somebody from an incompatible type is wanting access to the
81 * lock.
82 */
83static inline int rwsem_is_contended(struct rw_semaphore *sem)
84{
85 return !list_empty(&sem->wait_list);
86}
87
88/*
78 * lock for reading 89 * lock for reading
79 */ 90 */
80extern void down_read(struct rw_semaphore *sem); 91extern void down_read(struct rw_semaphore *sem);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 4832d75dcbae..3176cdc32937 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -208,17 +208,18 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
208 __entry->refs, __entry->compress_type) 208 __entry->refs, __entry->compress_type)
209); 209);
210 210
211#define show_ordered_flags(flags) \ 211#define show_ordered_flags(flags) \
212 __print_symbolic(flags, \ 212 __print_flags(flags, "|", \
213 { BTRFS_ORDERED_IO_DONE, "IO_DONE" }, \ 213 { (1 << BTRFS_ORDERED_IO_DONE), "IO_DONE" }, \
214 { BTRFS_ORDERED_COMPLETE, "COMPLETE" }, \ 214 { (1 << BTRFS_ORDERED_COMPLETE), "COMPLETE" }, \
215 { BTRFS_ORDERED_NOCOW, "NOCOW" }, \ 215 { (1 << BTRFS_ORDERED_NOCOW), "NOCOW" }, \
216 { BTRFS_ORDERED_COMPRESSED, "COMPRESSED" }, \ 216 { (1 << BTRFS_ORDERED_COMPRESSED), "COMPRESSED" }, \
217 { BTRFS_ORDERED_PREALLOC, "PREALLOC" }, \ 217 { (1 << BTRFS_ORDERED_PREALLOC), "PREALLOC" }, \
218 { BTRFS_ORDERED_DIRECT, "DIRECT" }, \ 218 { (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \
219 { BTRFS_ORDERED_IOERR, "IOERR" }, \ 219 { (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \
220 { BTRFS_ORDERED_UPDATED_ISIZE, "UPDATED_ISIZE" }, \ 220 { (1 << BTRFS_ORDERED_UPDATED_ISIZE), "UPDATED_ISIZE" }, \
221 { BTRFS_ORDERED_LOGGED_CSUM, "LOGGED_CSUM" }) 221 { (1 << BTRFS_ORDERED_LOGGED_CSUM), "LOGGED_CSUM" }, \
222 { (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" })
222 223
223 224
224DECLARE_EVENT_CLASS(btrfs__ordered_extent, 225DECLARE_EVENT_CLASS(btrfs__ordered_extent,
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 45e618921c61..1b8a0f4c9590 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -184,6 +184,12 @@ struct btrfs_ioctl_fs_info_args {
184 __u64 reserved[124]; /* pad to 1k */ 184 __u64 reserved[124]; /* pad to 1k */
185}; 185};
186 186
187struct btrfs_ioctl_feature_flags {
188 __u64 compat_flags;
189 __u64 compat_ro_flags;
190 __u64 incompat_flags;
191};
192
187/* balance control ioctl modes */ 193/* balance control ioctl modes */
188#define BTRFS_BALANCE_CTL_PAUSE 1 194#define BTRFS_BALANCE_CTL_PAUSE 1
189#define BTRFS_BALANCE_CTL_CANCEL 2 195#define BTRFS_BALANCE_CTL_CANCEL 2
@@ -552,6 +558,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
552#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64) 558#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)
553#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ 559#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
554 struct btrfs_ioctl_space_args) 560 struct btrfs_ioctl_space_args)
561#define BTRFS_IOC_GLOBAL_RSV _IOR(BTRFS_IOCTL_MAGIC, 20, __u64)
555#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) 562#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
556#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 563#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
557#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ 564#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
@@ -606,5 +613,11 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
606 struct btrfs_ioctl_dev_replace_args) 613 struct btrfs_ioctl_dev_replace_args)
607#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \ 614#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
608 struct btrfs_ioctl_same_args) 615 struct btrfs_ioctl_same_args)
616#define BTRFS_IOC_GET_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
617 struct btrfs_ioctl_feature_flags)
618#define BTRFS_IOC_SET_FEATURES _IOW(BTRFS_IOCTL_MAGIC, 57, \
619 struct btrfs_ioctl_feature_flags[2])
620#define BTRFS_IOC_GET_SUPPORTED_FEATURES _IOR(BTRFS_IOCTL_MAGIC, 57, \
621 struct btrfs_ioctl_feature_flags[3])
609 622
610#endif /* _UAPI_LINUX_BTRFS_H */ 623#endif /* _UAPI_LINUX_BTRFS_H */
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index e4629b93bdd6..40bbc04b6f81 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -20,6 +20,9 @@
20#define XATTR_MAC_OSX_PREFIX "osx." 20#define XATTR_MAC_OSX_PREFIX "osx."
21#define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1) 21#define XATTR_MAC_OSX_PREFIX_LEN (sizeof(XATTR_MAC_OSX_PREFIX) - 1)
22 22
23#define XATTR_BTRFS_PREFIX "btrfs."
24#define XATTR_BTRFS_PREFIX_LEN (sizeof(XATTR_BTRFS_PREFIX) - 1)
25
23#define XATTR_SECURITY_PREFIX "security." 26#define XATTR_SECURITY_PREFIX "security."
24#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1) 27#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
25 28
diff --git a/lib/kobject.c b/lib/kobject.c
index b0b26665c611..cb14aeac4cca 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -779,6 +779,7 @@ const struct sysfs_ops kobj_sysfs_ops = {
779 .show = kobj_attr_show, 779 .show = kobj_attr_show,
780 .store = kobj_attr_store, 780 .store = kobj_attr_store,
781}; 781};
782EXPORT_SYMBOL_GPL(kobj_sysfs_ops);
782 783
783/** 784/**
784 * kset_register - initialize and add a kset. 785 * kset_register - initialize and add a kset.