aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Kconfig3
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c142
-rw-r--r--fs/btrfs/backref.c195
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/check-integrity.c24
-rw-r--r--fs/btrfs/compression.c41
-rw-r--r--fs/btrfs/ctree.c552
-rw-r--r--fs/btrfs/ctree.h141
-rw-r--r--fs/btrfs/delayed-inode.c208
-rw-r--r--fs/btrfs/delayed-inode.h8
-rw-r--r--fs/btrfs/delayed-ref.c300
-rw-r--r--fs/btrfs/delayed-ref.h26
-rw-r--r--fs/btrfs/dev-replace.c56
-rw-r--r--fs/btrfs/dir-item.c8
-rw-r--r--fs/btrfs/disk-io.c269
-rw-r--r--fs/btrfs/extent-tree.c618
-rw-r--r--fs/btrfs/extent_io.c277
-rw-r--r--fs/btrfs/extent_io.h9
-rw-r--r--fs/btrfs/extent_map.c74
-rw-r--r--fs/btrfs/file-item.c23
-rw-r--r--fs/btrfs/file.c216
-rw-r--r--fs/btrfs/free-space-cache.c23
-rw-r--r--fs/btrfs/hash.c50
-rw-r--r--fs/btrfs/hash.h11
-rw-r--r--fs/btrfs/inode-item.c65
-rw-r--r--fs/btrfs/inode.c500
-rw-r--r--fs/btrfs/ioctl.c404
-rw-r--r--fs/btrfs/lzo.c6
-rw-r--r--fs/btrfs/ordered-data.c15
-rw-r--r--fs/btrfs/orphan.c20
-rw-r--r--fs/btrfs/print-tree.c4
-rw-r--r--fs/btrfs/props.c427
-rw-r--r--fs/btrfs/props.h42
-rw-r--r--fs/btrfs/qgroup.c57
-rw-r--r--fs/btrfs/raid56.c22
-rw-r--r--fs/btrfs/reada.c9
-rw-r--r--fs/btrfs/relocation.c105
-rw-r--r--fs/btrfs/root-tree.c19
-rw-r--r--fs/btrfs/scrub.c146
-rw-r--r--fs/btrfs/send.c973
-rw-r--r--fs/btrfs/super.c254
-rw-r--r--fs/btrfs/sysfs.c623
-rw-r--r--fs/btrfs/sysfs.h64
-rw-r--r--fs/btrfs/tests/btrfs-tests.h2
-rw-r--r--fs/btrfs/tests/free-space-tests.c4
-rw-r--r--fs/btrfs/transaction.c55
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c209
-rw-r--r--fs/btrfs/ulist.c117
-rw-r--r--fs/btrfs/ulist.h39
-rw-r--r--fs/btrfs/uuid-tree.c13
-rw-r--r--fs/btrfs/volumes.c108
-rw-r--r--fs/btrfs/xattr.c17
-rw-r--r--fs/btrfs/xattr.h2
-rw-r--r--fs/btrfs/zlib.c8
56 files changed, 5281 insertions, 2331 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index aa976eced2d2..a66768ebc8d1 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,6 +1,7 @@
1config BTRFS_FS 1config BTRFS_FS
2 tristate "Btrfs filesystem support" 2 tristate "Btrfs filesystem support"
3 select LIBCRC32C 3 select CRYPTO
4 select CRYPTO_CRC32C
4 select ZLIB_INFLATE 5 select ZLIB_INFLATE
5 select ZLIB_DEFLATE 6 select ZLIB_DEFLATE
6 select LZO_COMPRESS 7 select LZO_COMPRESS
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 1a44e42d602a..f341a98031d2 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \ 9 export.o tree-log.o free-space-cache.o zlib.o lzo.o \
10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ 10 compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ 11 reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
12 uuid-tree.o 12 uuid-tree.o props.o hash.o
13 13
14btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o 14btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
15btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o 15btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 0890c83643e9..ff9b3995d453 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -35,13 +35,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
35 char *value = NULL; 35 char *value = NULL;
36 struct posix_acl *acl; 36 struct posix_acl *acl;
37 37
38 if (!IS_POSIXACL(inode))
39 return NULL;
40
41 acl = get_cached_acl(inode, type);
42 if (acl != ACL_NOT_CACHED)
43 return acl;
44
45 switch (type) { 38 switch (type) {
46 case ACL_TYPE_ACCESS: 39 case ACL_TYPE_ACCESS:
47 name = POSIX_ACL_XATTR_ACCESS; 40 name = POSIX_ACL_XATTR_ACCESS;
@@ -76,31 +69,10 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
76 return acl; 69 return acl;
77} 70}
78 71
79static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
80 void *value, size_t size, int type)
81{
82 struct posix_acl *acl;
83 int ret = 0;
84
85 if (!IS_POSIXACL(dentry->d_inode))
86 return -EOPNOTSUPP;
87
88 acl = btrfs_get_acl(dentry->d_inode, type);
89
90 if (IS_ERR(acl))
91 return PTR_ERR(acl);
92 if (acl == NULL)
93 return -ENODATA;
94 ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
95 posix_acl_release(acl);
96
97 return ret;
98}
99
100/* 72/*
101 * Needs to be called with fs_mutex held 73 * Needs to be called with fs_mutex held
102 */ 74 */
103static int btrfs_set_acl(struct btrfs_trans_handle *trans, 75static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
104 struct inode *inode, struct posix_acl *acl, int type) 76 struct inode *inode, struct posix_acl *acl, int type)
105{ 77{
106 int ret, size = 0; 78 int ret, size = 0;
@@ -158,35 +130,9 @@ out:
158 return ret; 130 return ret;
159} 131}
160 132
161static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, 133int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
162 const void *value, size_t size, int flags, int type)
163{ 134{
164 int ret; 135 return __btrfs_set_acl(NULL, inode, acl, type);
165 struct posix_acl *acl = NULL;
166
167 if (!inode_owner_or_capable(dentry->d_inode))
168 return -EPERM;
169
170 if (!IS_POSIXACL(dentry->d_inode))
171 return -EOPNOTSUPP;
172
173 if (value) {
174 acl = posix_acl_from_xattr(&init_user_ns, value, size);
175 if (IS_ERR(acl))
176 return PTR_ERR(acl);
177
178 if (acl) {
179 ret = posix_acl_valid(acl);
180 if (ret)
181 goto out;
182 }
183 }
184
185 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
186out:
187 posix_acl_release(acl);
188
189 return ret;
190} 136}
191 137
192/* 138/*
@@ -197,83 +143,31 @@ out:
197int btrfs_init_acl(struct btrfs_trans_handle *trans, 143int btrfs_init_acl(struct btrfs_trans_handle *trans,
198 struct inode *inode, struct inode *dir) 144 struct inode *inode, struct inode *dir)
199{ 145{
200 struct posix_acl *acl = NULL; 146 struct posix_acl *default_acl, *acl;
201 int ret = 0; 147 int ret = 0;
202 148
203 /* this happens with subvols */ 149 /* this happens with subvols */
204 if (!dir) 150 if (!dir)
205 return 0; 151 return 0;
206 152
207 if (!S_ISLNK(inode->i_mode)) { 153 ret = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
208 if (IS_POSIXACL(dir)) { 154 if (ret)
209 acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT); 155 return ret;
210 if (IS_ERR(acl))
211 return PTR_ERR(acl);
212 }
213 156
214 if (!acl) 157 if (default_acl) {
215 inode->i_mode &= ~current_umask(); 158 ret = __btrfs_set_acl(trans, inode, default_acl,
159 ACL_TYPE_DEFAULT);
160 posix_acl_release(default_acl);
216 } 161 }
217 162
218 if (IS_POSIXACL(dir) && acl) { 163 if (acl) {
219 if (S_ISDIR(inode->i_mode)) { 164 if (!ret)
220 ret = btrfs_set_acl(trans, inode, acl, 165 ret = __btrfs_set_acl(trans, inode, acl,
221 ACL_TYPE_DEFAULT); 166 ACL_TYPE_ACCESS);
222 if (ret) 167 posix_acl_release(acl);
223 goto failed;
224 }
225 ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
226 if (ret < 0)
227 return ret;
228
229 if (ret > 0) {
230 /* we need an acl */
231 ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
232 } else if (ret < 0) {
233 cache_no_acl(inode);
234 }
235 } else {
236 cache_no_acl(inode);
237 } 168 }
238failed:
239 posix_acl_release(acl);
240
241 return ret;
242}
243 169
244int btrfs_acl_chmod(struct inode *inode) 170 if (!default_acl && !acl)
245{ 171 cache_no_acl(inode);
246 struct posix_acl *acl;
247 int ret = 0;
248
249 if (S_ISLNK(inode->i_mode))
250 return -EOPNOTSUPP;
251
252 if (!IS_POSIXACL(inode))
253 return 0;
254
255 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
256 if (IS_ERR_OR_NULL(acl))
257 return PTR_ERR(acl);
258
259 ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
260 if (ret)
261 return ret;
262 ret = btrfs_set_acl(NULL, inode, acl, ACL_TYPE_ACCESS);
263 posix_acl_release(acl);
264 return ret; 172 return ret;
265} 173}
266
267const struct xattr_handler btrfs_xattr_acl_default_handler = {
268 .prefix = POSIX_ACL_XATTR_DEFAULT,
269 .flags = ACL_TYPE_DEFAULT,
270 .get = btrfs_xattr_acl_get,
271 .set = btrfs_xattr_acl_set,
272};
273
274const struct xattr_handler btrfs_xattr_acl_access_handler = {
275 .prefix = POSIX_ACL_XATTR_ACCESS,
276 .flags = ACL_TYPE_ACCESS,
277 .get = btrfs_xattr_acl_get,
278 .set = btrfs_xattr_acl_set,
279};
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 3775947429b2..aded3ef3d3d4 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -66,6 +66,16 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
66 return 0; 66 return 0;
67} 67}
68 68
69static void free_inode_elem_list(struct extent_inode_elem *eie)
70{
71 struct extent_inode_elem *eie_next;
72
73 for (; eie; eie = eie_next) {
74 eie_next = eie->next;
75 kfree(eie);
76 }
77}
78
69static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, 79static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte,
70 u64 extent_item_pos, 80 u64 extent_item_pos,
71 struct extent_inode_elem **eie) 81 struct extent_inode_elem **eie)
@@ -209,18 +219,19 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
209} 219}
210 220
211static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 221static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
212 struct ulist *parents, int level, 222 struct ulist *parents, struct __prelim_ref *ref,
213 struct btrfs_key *key_for_search, u64 time_seq, 223 int level, u64 time_seq, const u64 *extent_item_pos)
214 u64 wanted_disk_byte,
215 const u64 *extent_item_pos)
216{ 224{
217 int ret = 0; 225 int ret = 0;
218 int slot; 226 int slot;
219 struct extent_buffer *eb; 227 struct extent_buffer *eb;
220 struct btrfs_key key; 228 struct btrfs_key key;
229 struct btrfs_key *key_for_search = &ref->key_for_search;
221 struct btrfs_file_extent_item *fi; 230 struct btrfs_file_extent_item *fi;
222 struct extent_inode_elem *eie = NULL, *old = NULL; 231 struct extent_inode_elem *eie = NULL, *old = NULL;
223 u64 disk_byte; 232 u64 disk_byte;
233 u64 wanted_disk_byte = ref->wanted_disk_byte;
234 u64 count = 0;
224 235
225 if (level != 0) { 236 if (level != 0) {
226 eb = path->nodes[level]; 237 eb = path->nodes[level];
@@ -238,7 +249,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
238 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) 249 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
239 ret = btrfs_next_old_leaf(root, path, time_seq); 250 ret = btrfs_next_old_leaf(root, path, time_seq);
240 251
241 while (!ret) { 252 while (!ret && count < ref->count) {
242 eb = path->nodes[0]; 253 eb = path->nodes[0];
243 slot = path->slots[0]; 254 slot = path->slots[0];
244 255
@@ -254,6 +265,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
254 if (disk_byte == wanted_disk_byte) { 265 if (disk_byte == wanted_disk_byte) {
255 eie = NULL; 266 eie = NULL;
256 old = NULL; 267 old = NULL;
268 count++;
257 if (extent_item_pos) { 269 if (extent_item_pos) {
258 ret = check_extent_in_eb(&key, eb, fi, 270 ret = check_extent_in_eb(&key, eb, fi,
259 *extent_item_pos, 271 *extent_item_pos,
@@ -273,6 +285,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
273 old = old->next; 285 old = old->next;
274 old->next = eie; 286 old->next = eie;
275 } 287 }
288 eie = NULL;
276 } 289 }
277next: 290next:
278 ret = btrfs_next_old_item(root, path, time_seq); 291 ret = btrfs_next_old_item(root, path, time_seq);
@@ -280,6 +293,8 @@ next:
280 293
281 if (ret > 0) 294 if (ret > 0)
282 ret = 0; 295 ret = 0;
296 else if (ret < 0)
297 free_inode_elem_list(eie);
283 return ret; 298 return ret;
284} 299}
285 300
@@ -299,23 +314,34 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
299 int ret = 0; 314 int ret = 0;
300 int root_level; 315 int root_level;
301 int level = ref->level; 316 int level = ref->level;
317 int index;
302 318
303 root_key.objectid = ref->root_id; 319 root_key.objectid = ref->root_id;
304 root_key.type = BTRFS_ROOT_ITEM_KEY; 320 root_key.type = BTRFS_ROOT_ITEM_KEY;
305 root_key.offset = (u64)-1; 321 root_key.offset = (u64)-1;
322
323 index = srcu_read_lock(&fs_info->subvol_srcu);
324
306 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 325 root = btrfs_read_fs_root_no_name(fs_info, &root_key);
307 if (IS_ERR(root)) { 326 if (IS_ERR(root)) {
327 srcu_read_unlock(&fs_info->subvol_srcu, index);
308 ret = PTR_ERR(root); 328 ret = PTR_ERR(root);
309 goto out; 329 goto out;
310 } 330 }
311 331
312 root_level = btrfs_old_root_level(root, time_seq); 332 root_level = btrfs_old_root_level(root, time_seq);
313 333
314 if (root_level + 1 == level) 334 if (root_level + 1 == level) {
335 srcu_read_unlock(&fs_info->subvol_srcu, index);
315 goto out; 336 goto out;
337 }
316 338
317 path->lowest_level = level; 339 path->lowest_level = level;
318 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); 340 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
341
342 /* root node has been locked, we can release @subvol_srcu safely here */
343 srcu_read_unlock(&fs_info->subvol_srcu, index);
344
319 pr_debug("search slot in root %llu (level %d, ref count %d) returned " 345 pr_debug("search slot in root %llu (level %d, ref count %d) returned "
320 "%d for key (%llu %u %llu)\n", 346 "%d for key (%llu %u %llu)\n",
321 ref->root_id, level, ref->count, ret, 347 ref->root_id, level, ref->count, ret,
@@ -334,9 +360,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
334 eb = path->nodes[level]; 360 eb = path->nodes[level];
335 } 361 }
336 362
337 ret = add_all_parents(root, path, parents, level, &ref->key_for_search, 363 ret = add_all_parents(root, path, parents, ref, level, time_seq,
338 time_seq, ref->wanted_disk_byte, 364 extent_item_pos);
339 extent_item_pos);
340out: 365out:
341 path->lowest_level = 0; 366 path->lowest_level = 0;
342 btrfs_release_path(path); 367 btrfs_release_path(path);
@@ -376,10 +401,16 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
376 continue; 401 continue;
377 err = __resolve_indirect_ref(fs_info, path, time_seq, ref, 402 err = __resolve_indirect_ref(fs_info, path, time_seq, ref,
378 parents, extent_item_pos); 403 parents, extent_item_pos);
379 if (err == -ENOMEM) 404 /*
380 goto out; 405 * we can only tolerate ENOENT,otherwise,we should catch error
381 if (err) 406 * and return directly.
407 */
408 if (err == -ENOENT) {
382 continue; 409 continue;
410 } else if (err) {
411 ret = err;
412 goto out;
413 }
383 414
384 /* we put the first parent into the ref at hand */ 415 /* we put the first parent into the ref at hand */
385 ULIST_ITER_INIT(&uiter); 416 ULIST_ITER_INIT(&uiter);
@@ -538,14 +569,13 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
538 if (extent_op && extent_op->update_key) 569 if (extent_op && extent_op->update_key)
539 btrfs_disk_key_to_cpu(&op_key, &extent_op->key); 570 btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
540 571
541 while ((n = rb_prev(n))) { 572 spin_lock(&head->lock);
573 n = rb_first(&head->ref_root);
574 while (n) {
542 struct btrfs_delayed_ref_node *node; 575 struct btrfs_delayed_ref_node *node;
543 node = rb_entry(n, struct btrfs_delayed_ref_node, 576 node = rb_entry(n, struct btrfs_delayed_ref_node,
544 rb_node); 577 rb_node);
545 if (node->bytenr != head->node.bytenr) 578 n = rb_next(n);
546 break;
547 WARN_ON(node->is_head);
548
549 if (node->seq > seq) 579 if (node->seq > seq)
550 continue; 580 continue;
551 581
@@ -612,10 +642,10 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
612 WARN_ON(1); 642 WARN_ON(1);
613 } 643 }
614 if (ret) 644 if (ret)
615 return ret; 645 break;
616 } 646 }
617 647 spin_unlock(&head->lock);
618 return 0; 648 return ret;
619} 649}
620 650
621/* 651/*
@@ -828,6 +858,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
828 struct list_head prefs_delayed; 858 struct list_head prefs_delayed;
829 struct list_head prefs; 859 struct list_head prefs;
830 struct __prelim_ref *ref; 860 struct __prelim_ref *ref;
861 struct extent_inode_elem *eie = NULL;
831 862
832 INIT_LIST_HEAD(&prefs); 863 INIT_LIST_HEAD(&prefs);
833 INIT_LIST_HEAD(&prefs_delayed); 864 INIT_LIST_HEAD(&prefs_delayed);
@@ -882,15 +913,15 @@ again:
882 btrfs_put_delayed_ref(&head->node); 913 btrfs_put_delayed_ref(&head->node);
883 goto again; 914 goto again;
884 } 915 }
916 spin_unlock(&delayed_refs->lock);
885 ret = __add_delayed_refs(head, time_seq, 917 ret = __add_delayed_refs(head, time_seq,
886 &prefs_delayed); 918 &prefs_delayed);
887 mutex_unlock(&head->mutex); 919 mutex_unlock(&head->mutex);
888 if (ret) { 920 if (ret)
889 spin_unlock(&delayed_refs->lock);
890 goto out; 921 goto out;
891 } 922 } else {
923 spin_unlock(&delayed_refs->lock);
892 } 924 }
893 spin_unlock(&delayed_refs->lock);
894 } 925 }
895 926
896 if (path->slots[0]) { 927 if (path->slots[0]) {
@@ -941,7 +972,6 @@ again:
941 goto out; 972 goto out;
942 } 973 }
943 if (ref->count && ref->parent) { 974 if (ref->count && ref->parent) {
944 struct extent_inode_elem *eie = NULL;
945 if (extent_item_pos && !ref->inode_list) { 975 if (extent_item_pos && !ref->inode_list) {
946 u32 bsz; 976 u32 bsz;
947 struct extent_buffer *eb; 977 struct extent_buffer *eb;
@@ -976,6 +1006,7 @@ again:
976 eie = eie->next; 1006 eie = eie->next;
977 eie->next = ref->inode_list; 1007 eie->next = ref->inode_list;
978 } 1008 }
1009 eie = NULL;
979 } 1010 }
980 list_del(&ref->list); 1011 list_del(&ref->list);
981 kmem_cache_free(btrfs_prelim_ref_cache, ref); 1012 kmem_cache_free(btrfs_prelim_ref_cache, ref);
@@ -994,7 +1025,8 @@ out:
994 list_del(&ref->list); 1025 list_del(&ref->list);
995 kmem_cache_free(btrfs_prelim_ref_cache, ref); 1026 kmem_cache_free(btrfs_prelim_ref_cache, ref);
996 } 1027 }
997 1028 if (ret < 0)
1029 free_inode_elem_list(eie);
998 return ret; 1030 return ret;
999} 1031}
1000 1032
@@ -1002,7 +1034,6 @@ static void free_leaf_list(struct ulist *blocks)
1002{ 1034{
1003 struct ulist_node *node = NULL; 1035 struct ulist_node *node = NULL;
1004 struct extent_inode_elem *eie; 1036 struct extent_inode_elem *eie;
1005 struct extent_inode_elem *eie_next;
1006 struct ulist_iterator uiter; 1037 struct ulist_iterator uiter;
1007 1038
1008 ULIST_ITER_INIT(&uiter); 1039 ULIST_ITER_INIT(&uiter);
@@ -1010,10 +1041,7 @@ static void free_leaf_list(struct ulist *blocks)
1010 if (!node->aux) 1041 if (!node->aux)
1011 continue; 1042 continue;
1012 eie = (struct extent_inode_elem *)(uintptr_t)node->aux; 1043 eie = (struct extent_inode_elem *)(uintptr_t)node->aux;
1013 for (; eie; eie = eie_next) { 1044 free_inode_elem_list(eie);
1014 eie_next = eie->next;
1015 kfree(eie);
1016 }
1017 node->aux = 0; 1045 node->aux = 0;
1018 } 1046 }
1019 1047
@@ -1101,44 +1129,13 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
1101 if (!node) 1129 if (!node)
1102 break; 1130 break;
1103 bytenr = node->val; 1131 bytenr = node->val;
1132 cond_resched();
1104 } 1133 }
1105 1134
1106 ulist_free(tmp); 1135 ulist_free(tmp);
1107 return 0; 1136 return 0;
1108} 1137}
1109 1138
1110
1111static int __inode_info(u64 inum, u64 ioff, u8 key_type,
1112 struct btrfs_root *fs_root, struct btrfs_path *path,
1113 struct btrfs_key *found_key)
1114{
1115 int ret;
1116 struct btrfs_key key;
1117 struct extent_buffer *eb;
1118
1119 key.type = key_type;
1120 key.objectid = inum;
1121 key.offset = ioff;
1122
1123 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
1124 if (ret < 0)
1125 return ret;
1126
1127 eb = path->nodes[0];
1128 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
1129 ret = btrfs_next_leaf(fs_root, path);
1130 if (ret)
1131 return ret;
1132 eb = path->nodes[0];
1133 }
1134
1135 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
1136 if (found_key->type != key.type || found_key->objectid != key.objectid)
1137 return 1;
1138
1139 return 0;
1140}
1141
1142/* 1139/*
1143 * this makes the path point to (inum INODE_ITEM ioff) 1140 * this makes the path point to (inum INODE_ITEM ioff)
1144 */ 1141 */
@@ -1146,16 +1143,16 @@ int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
1146 struct btrfs_path *path) 1143 struct btrfs_path *path)
1147{ 1144{
1148 struct btrfs_key key; 1145 struct btrfs_key key;
1149 return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path, 1146 return btrfs_find_item(fs_root, path, inum, ioff,
1150 &key); 1147 BTRFS_INODE_ITEM_KEY, &key);
1151} 1148}
1152 1149
1153static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, 1150static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
1154 struct btrfs_path *path, 1151 struct btrfs_path *path,
1155 struct btrfs_key *found_key) 1152 struct btrfs_key *found_key)
1156{ 1153{
1157 return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path, 1154 return btrfs_find_item(fs_root, path, inum, ioff,
1158 found_key); 1155 BTRFS_INODE_REF_KEY, found_key);
1159} 1156}
1160 1157
1161int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, 1158int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
@@ -1335,20 +1332,45 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
1335 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 1332 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
1336 if (ret < 0) 1333 if (ret < 0)
1337 return ret; 1334 return ret;
1338 ret = btrfs_previous_item(fs_info->extent_root, path,
1339 0, BTRFS_EXTENT_ITEM_KEY);
1340 if (ret < 0)
1341 return ret;
1342 1335
1343 btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); 1336 while (1) {
1337 u32 nritems;
1338 if (path->slots[0] == 0) {
1339 btrfs_set_path_blocking(path);
1340 ret = btrfs_prev_leaf(fs_info->extent_root, path);
1341 if (ret != 0) {
1342 if (ret > 0) {
1343 pr_debug("logical %llu is not within "
1344 "any extent\n", logical);
1345 ret = -ENOENT;
1346 }
1347 return ret;
1348 }
1349 } else {
1350 path->slots[0]--;
1351 }
1352 nritems = btrfs_header_nritems(path->nodes[0]);
1353 if (nritems == 0) {
1354 pr_debug("logical %llu is not within any extent\n",
1355 logical);
1356 return -ENOENT;
1357 }
1358 if (path->slots[0] == nritems)
1359 path->slots[0]--;
1360
1361 btrfs_item_key_to_cpu(path->nodes[0], found_key,
1362 path->slots[0]);
1363 if (found_key->type == BTRFS_EXTENT_ITEM_KEY ||
1364 found_key->type == BTRFS_METADATA_ITEM_KEY)
1365 break;
1366 }
1367
1344 if (found_key->type == BTRFS_METADATA_ITEM_KEY) 1368 if (found_key->type == BTRFS_METADATA_ITEM_KEY)
1345 size = fs_info->extent_root->leafsize; 1369 size = fs_info->extent_root->leafsize;
1346 else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) 1370 else if (found_key->type == BTRFS_EXTENT_ITEM_KEY)
1347 size = found_key->offset; 1371 size = found_key->offset;
1348 1372
1349 if ((found_key->type != BTRFS_EXTENT_ITEM_KEY && 1373 if (found_key->objectid > logical ||
1350 found_key->type != BTRFS_METADATA_ITEM_KEY) ||
1351 found_key->objectid > logical ||
1352 found_key->objectid + size <= logical) { 1374 found_key->objectid + size <= logical) {
1353 pr_debug("logical %llu is not within any extent\n", logical); 1375 pr_debug("logical %llu is not within any extent\n", logical);
1354 return -ENOENT; 1376 return -ENOENT;
@@ -1601,7 +1623,6 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
1601 struct btrfs_key found_key; 1623 struct btrfs_key found_key;
1602 1624
1603 while (!ret) { 1625 while (!ret) {
1604 path->leave_spinning = 1;
1605 ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, 1626 ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
1606 &found_key); 1627 &found_key);
1607 if (ret < 0) 1628 if (ret < 0)
@@ -1614,9 +1635,12 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
1614 1635
1615 parent = found_key.offset; 1636 parent = found_key.offset;
1616 slot = path->slots[0]; 1637 slot = path->slots[0];
1617 eb = path->nodes[0]; 1638 eb = btrfs_clone_extent_buffer(path->nodes[0]);
1618 /* make sure we can use eb after releasing the path */ 1639 if (!eb) {
1619 atomic_inc(&eb->refs); 1640 ret = -ENOMEM;
1641 break;
1642 }
1643 extent_buffer_get(eb);
1620 btrfs_tree_read_lock(eb); 1644 btrfs_tree_read_lock(eb);
1621 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1645 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1622 btrfs_release_path(path); 1646 btrfs_release_path(path);
@@ -1674,17 +1698,20 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
1674 ++found; 1698 ++found;
1675 1699
1676 slot = path->slots[0]; 1700 slot = path->slots[0];
1677 eb = path->nodes[0]; 1701 eb = btrfs_clone_extent_buffer(path->nodes[0]);
1678 /* make sure we can use eb after releasing the path */ 1702 if (!eb) {
1679 atomic_inc(&eb->refs); 1703 ret = -ENOMEM;
1704 break;
1705 }
1706 extent_buffer_get(eb);
1680 1707
1681 btrfs_tree_read_lock(eb); 1708 btrfs_tree_read_lock(eb);
1682 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1709 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1683 btrfs_release_path(path); 1710 btrfs_release_path(path);
1684 1711
1685 leaf = path->nodes[0]; 1712 leaf = path->nodes[0];
1686 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1713 item_size = btrfs_item_size_nr(leaf, slot);
1687 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1714 ptr = btrfs_item_ptr_offset(leaf, slot);
1688 cur_offset = 0; 1715 cur_offset = 0;
1689 1716
1690 while (cur_offset < item_size) { 1717 while (cur_offset < item_size) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ac0b39db27d1..8fed2125689e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -43,6 +43,7 @@
43#define BTRFS_INODE_COPY_EVERYTHING 8 43#define BTRFS_INODE_COPY_EVERYTHING 8
44#define BTRFS_INODE_IN_DELALLOC_LIST 9 44#define BTRFS_INODE_IN_DELALLOC_LIST 9
45#define BTRFS_INODE_READDIO_NEED_LOCK 10 45#define BTRFS_INODE_READDIO_NEED_LOCK 10
46#define BTRFS_INODE_HAS_PROPS 11
46 47
47/* in memory btrfs inode */ 48/* in memory btrfs inode */
48struct btrfs_inode { 49struct btrfs_inode {
@@ -135,6 +136,9 @@ struct btrfs_inode {
135 */ 136 */
136 u64 index_cnt; 137 u64 index_cnt;
137 138
139 /* Cache the directory index number to speed the dir/file remove */
140 u64 dir_index;
141
138 /* the fsync log has some corner cases that mean we have to check 142 /* the fsync log has some corner cases that mean we have to check
139 * directories to see if any unlinks have been done before 143 * directories to see if any unlinks have been done before
140 * the directory was logged. See tree-log.c for all the 144 * the directory was logged. See tree-log.c for all the
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 131d82800b3a..0e8388e72d8d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -92,11 +92,11 @@
92#include <linux/slab.h> 92#include <linux/slab.h>
93#include <linux/buffer_head.h> 93#include <linux/buffer_head.h>
94#include <linux/mutex.h> 94#include <linux/mutex.h>
95#include <linux/crc32c.h>
96#include <linux/genhd.h> 95#include <linux/genhd.h>
97#include <linux/blkdev.h> 96#include <linux/blkdev.h>
98#include "ctree.h" 97#include "ctree.h"
99#include "disk-io.h" 98#include "disk-io.h"
99#include "hash.h"
100#include "transaction.h" 100#include "transaction.h"
101#include "extent_io.h" 101#include "extent_io.h"
102#include "volumes.h" 102#include "volumes.h"
@@ -1456,10 +1456,14 @@ static int btrfsic_handle_extent_data(
1456 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1456 btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1457 file_extent_item_offset, 1457 file_extent_item_offset,
1458 sizeof(struct btrfs_file_extent_item)); 1458 sizeof(struct btrfs_file_extent_item));
1459 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) + 1459 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
1460 btrfs_stack_file_extent_offset(&file_extent_item); 1460 if (btrfs_stack_file_extent_compression(&file_extent_item) ==
1461 generation = btrfs_stack_file_extent_generation(&file_extent_item); 1461 BTRFS_COMPRESS_NONE) {
1462 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item); 1462 next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
1463 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1464 } else {
1465 num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
1466 }
1463 generation = btrfs_stack_file_extent_generation(&file_extent_item); 1467 generation = btrfs_stack_file_extent_generation(&file_extent_item);
1464 1468
1465 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1469 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
@@ -1695,7 +1699,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
1695 return -1; 1699 return -1;
1696 } 1700 }
1697 bio->bi_bdev = block_ctx->dev->bdev; 1701 bio->bi_bdev = block_ctx->dev->bdev;
1698 bio->bi_sector = dev_bytenr >> 9; 1702 bio->bi_iter.bi_sector = dev_bytenr >> 9;
1699 1703
1700 for (j = i; j < num_pages; j++) { 1704 for (j = i; j < num_pages; j++) {
1701 ret = bio_add_page(bio, block_ctx->pagev[j], 1705 ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -1819,7 +1823,7 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1819 size_t sublen = i ? PAGE_CACHE_SIZE : 1823 size_t sublen = i ? PAGE_CACHE_SIZE :
1820 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); 1824 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1821 1825
1822 crc = crc32c(crc, data, sublen); 1826 crc = btrfs_crc32c(crc, data, sublen);
1823 } 1827 }
1824 btrfs_csum_final(crc, csum); 1828 btrfs_csum_final(crc, csum);
1825 if (memcmp(csum, h->csum, state->csum_size)) 1829 if (memcmp(csum, h->csum, state->csum_size))
@@ -3013,7 +3017,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
3013 int bio_is_patched; 3017 int bio_is_patched;
3014 char **mapped_datav; 3018 char **mapped_datav;
3015 3019
3016 dev_bytenr = 512 * bio->bi_sector; 3020 dev_bytenr = 512 * bio->bi_iter.bi_sector;
3017 bio_is_patched = 0; 3021 bio_is_patched = 0;
3018 if (dev_state->state->print_mask & 3022 if (dev_state->state->print_mask &
3019 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3023 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
@@ -3021,8 +3025,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
3021 "submit_bio(rw=0x%x, bi_vcnt=%u," 3025 "submit_bio(rw=0x%x, bi_vcnt=%u,"
3022 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", 3026 " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
3023 rw, bio->bi_vcnt, 3027 rw, bio->bi_vcnt,
3024 (unsigned long long)bio->bi_sector, dev_bytenr, 3028 (unsigned long long)bio->bi_iter.bi_sector,
3025 bio->bi_bdev); 3029 dev_bytenr, bio->bi_bdev);
3026 3030
3027 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 3031 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3028 GFP_NOFS); 3032 GFP_NOFS);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1499b27b4186..b01fb6c527e3 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -128,11 +128,10 @@ static int check_compressed_csum(struct inode *inode,
128 kunmap_atomic(kaddr); 128 kunmap_atomic(kaddr);
129 129
130 if (csum != *cb_sum) { 130 if (csum != *cb_sum) {
131 printk(KERN_INFO "btrfs csum failed ino %llu " 131 btrfs_info(BTRFS_I(inode)->root->fs_info,
132 "extent %llu csum %u " 132 "csum failed ino %llu extent %llu csum %u wanted %u mirror %d",
133 "wanted %u mirror %d\n", 133 btrfs_ino(inode), disk_start, csum, *cb_sum,
134 btrfs_ino(inode), disk_start, csum, *cb_sum, 134 cb->mirror_num);
135 cb->mirror_num);
136 ret = -EIO; 135 ret = -EIO;
137 goto fail; 136 goto fail;
138 } 137 }
@@ -172,7 +171,8 @@ static void end_compressed_bio_read(struct bio *bio, int err)
172 goto out; 171 goto out;
173 172
174 inode = cb->inode; 173 inode = cb->inode;
175 ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9); 174 ret = check_compressed_csum(inode, cb,
175 (u64)bio->bi_iter.bi_sector << 9);
176 if (ret) 176 if (ret)
177 goto csum_failed; 177 goto csum_failed;
178 178
@@ -201,18 +201,16 @@ csum_failed:
201 if (cb->errors) { 201 if (cb->errors) {
202 bio_io_error(cb->orig_bio); 202 bio_io_error(cb->orig_bio);
203 } else { 203 } else {
204 int bio_index = 0; 204 int i;
205 struct bio_vec *bvec = cb->orig_bio->bi_io_vec; 205 struct bio_vec *bvec;
206 206
207 /* 207 /*
208 * we have verified the checksum already, set page 208 * we have verified the checksum already, set page
209 * checked so the end_io handlers know about it 209 * checked so the end_io handlers know about it
210 */ 210 */
211 while (bio_index < cb->orig_bio->bi_vcnt) { 211 bio_for_each_segment_all(bvec, cb->orig_bio, i)
212 SetPageChecked(bvec->bv_page); 212 SetPageChecked(bvec->bv_page);
213 bvec++; 213
214 bio_index++;
215 }
216 bio_endio(cb->orig_bio, 0); 214 bio_endio(cb->orig_bio, 0);
217 } 215 }
218 216
@@ -372,7 +370,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
372 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) { 370 for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
373 page = compressed_pages[pg_index]; 371 page = compressed_pages[pg_index];
374 page->mapping = inode->i_mapping; 372 page->mapping = inode->i_mapping;
375 if (bio->bi_size) 373 if (bio->bi_iter.bi_size)
376 ret = io_tree->ops->merge_bio_hook(WRITE, page, 0, 374 ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
377 PAGE_CACHE_SIZE, 375 PAGE_CACHE_SIZE,
378 bio, 0); 376 bio, 0);
@@ -412,7 +410,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
412 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); 410 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
413 } 411 }
414 if (bytes_left < PAGE_CACHE_SIZE) { 412 if (bytes_left < PAGE_CACHE_SIZE) {
415 printk("bytes left %lu compress len %lu nr %lu\n", 413 btrfs_info(BTRFS_I(inode)->root->fs_info,
414 "bytes left %lu compress len %lu nr %lu",
416 bytes_left, cb->compressed_len, cb->nr_pages); 415 bytes_left, cb->compressed_len, cb->nr_pages);
417 } 416 }
418 bytes_left -= PAGE_CACHE_SIZE; 417 bytes_left -= PAGE_CACHE_SIZE;
@@ -506,7 +505,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
506 505
507 if (!em || last_offset < em->start || 506 if (!em || last_offset < em->start ||
508 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || 507 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
509 (em->block_start >> 9) != cb->orig_bio->bi_sector) { 508 (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
510 free_extent_map(em); 509 free_extent_map(em);
511 unlock_extent(tree, last_offset, end); 510 unlock_extent(tree, last_offset, end);
512 unlock_page(page); 511 unlock_page(page);
@@ -552,7 +551,7 @@ next:
552 * in it. We don't actually do IO on those pages but allocate new ones 551 * in it. We don't actually do IO on those pages but allocate new ones
553 * to hold the compressed pages on disk. 552 * to hold the compressed pages on disk.
554 * 553 *
555 * bio->bi_sector points to the compressed extent on disk 554 * bio->bi_iter.bi_sector points to the compressed extent on disk
556 * bio->bi_io_vec points to all of the inode pages 555 * bio->bi_io_vec points to all of the inode pages
557 * bio->bi_vcnt is a count of pages 556 * bio->bi_vcnt is a count of pages
558 * 557 *
@@ -573,7 +572,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
573 struct page *page; 572 struct page *page;
574 struct block_device *bdev; 573 struct block_device *bdev;
575 struct bio *comp_bio; 574 struct bio *comp_bio;
576 u64 cur_disk_byte = (u64)bio->bi_sector << 9; 575 u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
577 u64 em_len; 576 u64 em_len;
578 u64 em_start; 577 u64 em_start;
579 struct extent_map *em; 578 struct extent_map *em;
@@ -659,7 +658,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
659 page->mapping = inode->i_mapping; 658 page->mapping = inode->i_mapping;
660 page->index = em_start >> PAGE_CACHE_SHIFT; 659 page->index = em_start >> PAGE_CACHE_SHIFT;
661 660
662 if (comp_bio->bi_size) 661 if (comp_bio->bi_iter.bi_size)
663 ret = tree->ops->merge_bio_hook(READ, page, 0, 662 ret = tree->ops->merge_bio_hook(READ, page, 0,
664 PAGE_CACHE_SIZE, 663 PAGE_CACHE_SIZE,
665 comp_bio, 0); 664 comp_bio, 0);
@@ -687,8 +686,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
687 comp_bio, sums); 686 comp_bio, sums);
688 BUG_ON(ret); /* -ENOMEM */ 687 BUG_ON(ret); /* -ENOMEM */
689 } 688 }
690 sums += (comp_bio->bi_size + root->sectorsize - 1) / 689 sums += (comp_bio->bi_iter.bi_size +
691 root->sectorsize; 690 root->sectorsize - 1) / root->sectorsize;
692 691
693 ret = btrfs_map_bio(root, READ, comp_bio, 692 ret = btrfs_map_bio(root, READ, comp_bio,
694 mirror_num, 0); 693 mirror_num, 0);
@@ -1011,6 +1010,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
1011 bytes = min(bytes, working_bytes); 1010 bytes = min(bytes, working_bytes);
1012 kaddr = kmap_atomic(page_out); 1011 kaddr = kmap_atomic(page_out);
1013 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); 1012 memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
1013 if (*pg_index == (vcnt - 1) && *pg_offset == 0)
1014 memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
1014 kunmap_atomic(kaddr); 1015 kunmap_atomic(kaddr);
1015 flush_dcache_page(page_out); 1016 flush_dcache_page(page_out);
1016 1017
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 316136bd6dd7..cbd3a7d6fa68 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -39,9 +39,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
39 struct extent_buffer *src_buf); 39 struct extent_buffer *src_buf);
40static void del_ptr(struct btrfs_root *root, struct btrfs_path *path, 40static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
41 int level, int slot); 41 int level, int slot);
42static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, 42static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
43 struct extent_buffer *eb); 43 struct extent_buffer *eb);
44static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
45 44
46struct btrfs_path *btrfs_alloc_path(void) 45struct btrfs_path *btrfs_alloc_path(void)
47{ 46{
@@ -475,6 +474,8 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
475 * the index is the shifted logical of the *new* root node for root replace 474 * the index is the shifted logical of the *new* root node for root replace
476 * operations, or the shifted logical of the affected block for all other 475 * operations, or the shifted logical of the affected block for all other
477 * operations. 476 * operations.
477 *
478 * Note: must be called with write lock (tree_mod_log_write_lock).
478 */ 479 */
479static noinline int 480static noinline int
480__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) 481__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -483,24 +484,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
483 struct rb_node **new; 484 struct rb_node **new;
484 struct rb_node *parent = NULL; 485 struct rb_node *parent = NULL;
485 struct tree_mod_elem *cur; 486 struct tree_mod_elem *cur;
486 int ret = 0;
487 487
488 BUG_ON(!tm); 488 BUG_ON(!tm);
489 489
490 tree_mod_log_write_lock(fs_info);
491 if (list_empty(&fs_info->tree_mod_seq_list)) {
492 tree_mod_log_write_unlock(fs_info);
493 /*
494 * Ok we no longer care about logging modifications, free up tm
495 * and return 0. Any callers shouldn't be using tm after
496 * calling tree_mod_log_insert, but if they do we can just
497 * change this to return a special error code to let the callers
498 * do their own thing.
499 */
500 kfree(tm);
501 return 0;
502 }
503
504 spin_lock(&fs_info->tree_mod_seq_lock); 490 spin_lock(&fs_info->tree_mod_seq_lock);
505 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info); 491 tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
506 spin_unlock(&fs_info->tree_mod_seq_lock); 492 spin_unlock(&fs_info->tree_mod_seq_lock);
@@ -518,18 +504,13 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
518 new = &((*new)->rb_left); 504 new = &((*new)->rb_left);
519 else if (cur->seq > tm->seq) 505 else if (cur->seq > tm->seq)
520 new = &((*new)->rb_right); 506 new = &((*new)->rb_right);
521 else { 507 else
522 ret = -EEXIST; 508 return -EEXIST;
523 kfree(tm);
524 goto out;
525 }
526 } 509 }
527 510
528 rb_link_node(&tm->node, parent, new); 511 rb_link_node(&tm->node, parent, new);
529 rb_insert_color(&tm->node, tm_root); 512 rb_insert_color(&tm->node, tm_root);
530out: 513 return 0;
531 tree_mod_log_write_unlock(fs_info);
532 return ret;
533} 514}
534 515
535/* 516/*
@@ -545,19 +526,38 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
545 return 1; 526 return 1;
546 if (eb && btrfs_header_level(eb) == 0) 527 if (eb && btrfs_header_level(eb) == 0)
547 return 1; 528 return 1;
529
530 tree_mod_log_write_lock(fs_info);
531 if (list_empty(&(fs_info)->tree_mod_seq_list)) {
532 tree_mod_log_write_unlock(fs_info);
533 return 1;
534 }
535
548 return 0; 536 return 0;
549} 537}
550 538
551static inline int 539/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
552__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, 540static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
553 struct extent_buffer *eb, int slot, 541 struct extent_buffer *eb)
554 enum mod_log_op op, gfp_t flags) 542{
543 smp_mb();
544 if (list_empty(&(fs_info)->tree_mod_seq_list))
545 return 0;
546 if (eb && btrfs_header_level(eb) == 0)
547 return 0;
548
549 return 1;
550}
551
552static struct tree_mod_elem *
553alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
554 enum mod_log_op op, gfp_t flags)
555{ 555{
556 struct tree_mod_elem *tm; 556 struct tree_mod_elem *tm;
557 557
558 tm = kzalloc(sizeof(*tm), flags); 558 tm = kzalloc(sizeof(*tm), flags);
559 if (!tm) 559 if (!tm)
560 return -ENOMEM; 560 return NULL;
561 561
562 tm->index = eb->start >> PAGE_CACHE_SHIFT; 562 tm->index = eb->start >> PAGE_CACHE_SHIFT;
563 if (op != MOD_LOG_KEY_ADD) { 563 if (op != MOD_LOG_KEY_ADD) {
@@ -567,8 +567,9 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
567 tm->op = op; 567 tm->op = op;
568 tm->slot = slot; 568 tm->slot = slot;
569 tm->generation = btrfs_node_ptr_generation(eb, slot); 569 tm->generation = btrfs_node_ptr_generation(eb, slot);
570 RB_CLEAR_NODE(&tm->node);
570 571
571 return __tree_mod_log_insert(fs_info, tm); 572 return tm;
572} 573}
573 574
574static noinline int 575static noinline int
@@ -576,10 +577,27 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
576 struct extent_buffer *eb, int slot, 577 struct extent_buffer *eb, int slot,
577 enum mod_log_op op, gfp_t flags) 578 enum mod_log_op op, gfp_t flags)
578{ 579{
579 if (tree_mod_dont_log(fs_info, eb)) 580 struct tree_mod_elem *tm;
581 int ret;
582
583 if (!tree_mod_need_log(fs_info, eb))
580 return 0; 584 return 0;
581 585
582 return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags); 586 tm = alloc_tree_mod_elem(eb, slot, op, flags);
587 if (!tm)
588 return -ENOMEM;
589
590 if (tree_mod_dont_log(fs_info, eb)) {
591 kfree(tm);
592 return 0;
593 }
594
595 ret = __tree_mod_log_insert(fs_info, tm);
596 tree_mod_log_write_unlock(fs_info);
597 if (ret)
598 kfree(tm);
599
600 return ret;
583} 601}
584 602
585static noinline int 603static noinline int
@@ -587,53 +605,95 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
587 struct extent_buffer *eb, int dst_slot, int src_slot, 605 struct extent_buffer *eb, int dst_slot, int src_slot,
588 int nr_items, gfp_t flags) 606 int nr_items, gfp_t flags)
589{ 607{
590 struct tree_mod_elem *tm; 608 struct tree_mod_elem *tm = NULL;
591 int ret; 609 struct tree_mod_elem **tm_list = NULL;
610 int ret = 0;
592 int i; 611 int i;
612 int locked = 0;
593 613
594 if (tree_mod_dont_log(fs_info, eb)) 614 if (!tree_mod_need_log(fs_info, eb))
595 return 0; 615 return 0;
596 616
617 tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
618 if (!tm_list)
619 return -ENOMEM;
620
621 tm = kzalloc(sizeof(*tm), flags);
622 if (!tm) {
623 ret = -ENOMEM;
624 goto free_tms;
625 }
626
627 tm->index = eb->start >> PAGE_CACHE_SHIFT;
628 tm->slot = src_slot;
629 tm->move.dst_slot = dst_slot;
630 tm->move.nr_items = nr_items;
631 tm->op = MOD_LOG_MOVE_KEYS;
632
633 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
634 tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
635 MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
636 if (!tm_list[i]) {
637 ret = -ENOMEM;
638 goto free_tms;
639 }
640 }
641
642 if (tree_mod_dont_log(fs_info, eb))
643 goto free_tms;
644 locked = 1;
645
597 /* 646 /*
598 * When we override something during the move, we log these removals. 647 * When we override something during the move, we log these removals.
599 * This can only happen when we move towards the beginning of the 648 * This can only happen when we move towards the beginning of the
600 * buffer, i.e. dst_slot < src_slot. 649 * buffer, i.e. dst_slot < src_slot.
601 */ 650 */
602 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 651 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
603 ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot, 652 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
604 MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); 653 if (ret)
605 BUG_ON(ret < 0); 654 goto free_tms;
606 } 655 }
607 656
608 tm = kzalloc(sizeof(*tm), flags); 657 ret = __tree_mod_log_insert(fs_info, tm);
609 if (!tm) 658 if (ret)
610 return -ENOMEM; 659 goto free_tms;
660 tree_mod_log_write_unlock(fs_info);
661 kfree(tm_list);
611 662
612 tm->index = eb->start >> PAGE_CACHE_SHIFT; 663 return 0;
613 tm->slot = src_slot; 664free_tms:
614 tm->move.dst_slot = dst_slot; 665 for (i = 0; i < nr_items; i++) {
615 tm->move.nr_items = nr_items; 666 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
616 tm->op = MOD_LOG_MOVE_KEYS; 667 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
668 kfree(tm_list[i]);
669 }
670 if (locked)
671 tree_mod_log_write_unlock(fs_info);
672 kfree(tm_list);
673 kfree(tm);
617 674
618 return __tree_mod_log_insert(fs_info, tm); 675 return ret;
619} 676}
620 677
621static inline void 678static inline int
622__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) 679__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
680 struct tree_mod_elem **tm_list,
681 int nritems)
623{ 682{
624 int i; 683 int i, j;
625 u32 nritems;
626 int ret; 684 int ret;
627 685
628 if (btrfs_header_level(eb) == 0)
629 return;
630
631 nritems = btrfs_header_nritems(eb);
632 for (i = nritems - 1; i >= 0; i--) { 686 for (i = nritems - 1; i >= 0; i--) {
633 ret = __tree_mod_log_insert_key(fs_info, eb, i, 687 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
634 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); 688 if (ret) {
635 BUG_ON(ret < 0); 689 for (j = nritems - 1; j > i; j--)
690 rb_erase(&tm_list[j]->node,
691 &fs_info->tree_mod_log);
692 return ret;
693 }
636 } 694 }
695
696 return 0;
637} 697}
638 698
639static noinline int 699static noinline int
@@ -642,17 +702,38 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
642 struct extent_buffer *new_root, gfp_t flags, 702 struct extent_buffer *new_root, gfp_t flags,
643 int log_removal) 703 int log_removal)
644{ 704{
645 struct tree_mod_elem *tm; 705 struct tree_mod_elem *tm = NULL;
706 struct tree_mod_elem **tm_list = NULL;
707 int nritems = 0;
708 int ret = 0;
709 int i;
646 710
647 if (tree_mod_dont_log(fs_info, NULL)) 711 if (!tree_mod_need_log(fs_info, NULL))
648 return 0; 712 return 0;
649 713
650 if (log_removal) 714 if (log_removal && btrfs_header_level(old_root) > 0) {
651 __tree_mod_log_free_eb(fs_info, old_root); 715 nritems = btrfs_header_nritems(old_root);
716 tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
717 flags);
718 if (!tm_list) {
719 ret = -ENOMEM;
720 goto free_tms;
721 }
722 for (i = 0; i < nritems; i++) {
723 tm_list[i] = alloc_tree_mod_elem(old_root, i,
724 MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
725 if (!tm_list[i]) {
726 ret = -ENOMEM;
727 goto free_tms;
728 }
729 }
730 }
652 731
653 tm = kzalloc(sizeof(*tm), flags); 732 tm = kzalloc(sizeof(*tm), flags);
654 if (!tm) 733 if (!tm) {
655 return -ENOMEM; 734 ret = -ENOMEM;
735 goto free_tms;
736 }
656 737
657 tm->index = new_root->start >> PAGE_CACHE_SHIFT; 738 tm->index = new_root->start >> PAGE_CACHE_SHIFT;
658 tm->old_root.logical = old_root->start; 739 tm->old_root.logical = old_root->start;
@@ -660,7 +741,30 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
660 tm->generation = btrfs_header_generation(old_root); 741 tm->generation = btrfs_header_generation(old_root);
661 tm->op = MOD_LOG_ROOT_REPLACE; 742 tm->op = MOD_LOG_ROOT_REPLACE;
662 743
663 return __tree_mod_log_insert(fs_info, tm); 744 if (tree_mod_dont_log(fs_info, NULL))
745 goto free_tms;
746
747 if (tm_list)
748 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
749 if (!ret)
750 ret = __tree_mod_log_insert(fs_info, tm);
751
752 tree_mod_log_write_unlock(fs_info);
753 if (ret)
754 goto free_tms;
755 kfree(tm_list);
756
757 return ret;
758
759free_tms:
760 if (tm_list) {
761 for (i = 0; i < nritems; i++)
762 kfree(tm_list[i]);
763 kfree(tm_list);
764 }
765 kfree(tm);
766
767 return ret;
664} 768}
665 769
666static struct tree_mod_elem * 770static struct tree_mod_elem *
@@ -729,31 +833,75 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
729 return __tree_mod_log_search(fs_info, start, min_seq, 0); 833 return __tree_mod_log_search(fs_info, start, min_seq, 0);
730} 834}
731 835
732static noinline void 836static noinline int
733tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, 837tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
734 struct extent_buffer *src, unsigned long dst_offset, 838 struct extent_buffer *src, unsigned long dst_offset,
735 unsigned long src_offset, int nr_items) 839 unsigned long src_offset, int nr_items)
736{ 840{
737 int ret; 841 int ret = 0;
842 struct tree_mod_elem **tm_list = NULL;
843 struct tree_mod_elem **tm_list_add, **tm_list_rem;
738 int i; 844 int i;
845 int locked = 0;
739 846
740 if (tree_mod_dont_log(fs_info, NULL)) 847 if (!tree_mod_need_log(fs_info, NULL))
741 return; 848 return 0;
742 849
743 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) 850 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
744 return; 851 return 0;
852
853 tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
854 GFP_NOFS);
855 if (!tm_list)
856 return -ENOMEM;
745 857
858 tm_list_add = tm_list;
859 tm_list_rem = tm_list + nr_items;
746 for (i = 0; i < nr_items; i++) { 860 for (i = 0; i < nr_items; i++) {
747 ret = __tree_mod_log_insert_key(fs_info, src, 861 tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
748 i + src_offset, 862 MOD_LOG_KEY_REMOVE, GFP_NOFS);
749 MOD_LOG_KEY_REMOVE, GFP_NOFS); 863 if (!tm_list_rem[i]) {
750 BUG_ON(ret < 0); 864 ret = -ENOMEM;
751 ret = __tree_mod_log_insert_key(fs_info, dst, 865 goto free_tms;
752 i + dst_offset, 866 }
753 MOD_LOG_KEY_ADD, 867
754 GFP_NOFS); 868 tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
755 BUG_ON(ret < 0); 869 MOD_LOG_KEY_ADD, GFP_NOFS);
870 if (!tm_list_add[i]) {
871 ret = -ENOMEM;
872 goto free_tms;
873 }
756 } 874 }
875
876 if (tree_mod_dont_log(fs_info, NULL))
877 goto free_tms;
878 locked = 1;
879
880 for (i = 0; i < nr_items; i++) {
881 ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
882 if (ret)
883 goto free_tms;
884 ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
885 if (ret)
886 goto free_tms;
887 }
888
889 tree_mod_log_write_unlock(fs_info);
890 kfree(tm_list);
891
892 return 0;
893
894free_tms:
895 for (i = 0; i < nr_items * 2; i++) {
896 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
897 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
898 kfree(tm_list[i]);
899 }
900 if (locked)
901 tree_mod_log_write_unlock(fs_info);
902 kfree(tm_list);
903
904 return ret;
757} 905}
758 906
759static inline void 907static inline void
@@ -772,18 +920,58 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
772{ 920{
773 int ret; 921 int ret;
774 922
775 ret = __tree_mod_log_insert_key(fs_info, eb, slot, 923 ret = tree_mod_log_insert_key(fs_info, eb, slot,
776 MOD_LOG_KEY_REPLACE, 924 MOD_LOG_KEY_REPLACE,
777 atomic ? GFP_ATOMIC : GFP_NOFS); 925 atomic ? GFP_ATOMIC : GFP_NOFS);
778 BUG_ON(ret < 0); 926 BUG_ON(ret < 0);
779} 927}
780 928
781static noinline void 929static noinline int
782tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) 930tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
783{ 931{
932 struct tree_mod_elem **tm_list = NULL;
933 int nritems = 0;
934 int i;
935 int ret = 0;
936
937 if (btrfs_header_level(eb) == 0)
938 return 0;
939
940 if (!tree_mod_need_log(fs_info, NULL))
941 return 0;
942
943 nritems = btrfs_header_nritems(eb);
944 tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
945 GFP_NOFS);
946 if (!tm_list)
947 return -ENOMEM;
948
949 for (i = 0; i < nritems; i++) {
950 tm_list[i] = alloc_tree_mod_elem(eb, i,
951 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
952 if (!tm_list[i]) {
953 ret = -ENOMEM;
954 goto free_tms;
955 }
956 }
957
784 if (tree_mod_dont_log(fs_info, eb)) 958 if (tree_mod_dont_log(fs_info, eb))
785 return; 959 goto free_tms;
786 __tree_mod_log_free_eb(fs_info, eb); 960
961 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
962 tree_mod_log_write_unlock(fs_info);
963 if (ret)
964 goto free_tms;
965 kfree(tm_list);
966
967 return 0;
968
969free_tms:
970 for (i = 0; i < nritems; i++)
971 kfree(tm_list[i]);
972 kfree(tm_list);
973
974 return ret;
787} 975}
788 976
789static noinline void 977static noinline void
@@ -1041,8 +1229,13 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1041 btrfs_set_node_ptr_generation(parent, parent_slot, 1229 btrfs_set_node_ptr_generation(parent, parent_slot,
1042 trans->transid); 1230 trans->transid);
1043 btrfs_mark_buffer_dirty(parent); 1231 btrfs_mark_buffer_dirty(parent);
1044 if (last_ref) 1232 if (last_ref) {
1045 tree_mod_log_free_eb(root->fs_info, buf); 1233 ret = tree_mod_log_free_eb(root->fs_info, buf);
1234 if (ret) {
1235 btrfs_abort_transaction(trans, root, ret);
1236 return ret;
1237 }
1238 }
1046 btrfs_free_tree_block(trans, root, buf, parent_start, 1239 btrfs_free_tree_block(trans, root, buf, parent_start,
1047 last_ref); 1240 last_ref);
1048 } 1241 }
@@ -1287,8 +1480,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
1287 old = read_tree_block(root, logical, blocksize, 0); 1480 old = read_tree_block(root, logical, blocksize, 0);
1288 if (WARN_ON(!old || !extent_buffer_uptodate(old))) { 1481 if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
1289 free_extent_buffer(old); 1482 free_extent_buffer(old);
1290 pr_warn("btrfs: failed to read tree block %llu from get_old_root\n", 1483 btrfs_warn(root->fs_info,
1291 logical); 1484 "failed to read tree block %llu from get_old_root", logical);
1292 } else { 1485 } else {
1293 eb = btrfs_clone_extent_buffer(old); 1486 eb = btrfs_clone_extent_buffer(old);
1294 free_extent_buffer(old); 1487 free_extent_buffer(old);
@@ -2462,6 +2655,49 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
2462 return 0; 2655 return 0;
2463} 2656}
2464 2657
2658int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
2659 u64 iobjectid, u64 ioff, u8 key_type,
2660 struct btrfs_key *found_key)
2661{
2662 int ret;
2663 struct btrfs_key key;
2664 struct extent_buffer *eb;
2665 struct btrfs_path *path;
2666
2667 key.type = key_type;
2668 key.objectid = iobjectid;
2669 key.offset = ioff;
2670
2671 if (found_path == NULL) {
2672 path = btrfs_alloc_path();
2673 if (!path)
2674 return -ENOMEM;
2675 } else
2676 path = found_path;
2677
2678 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
2679 if ((ret < 0) || (found_key == NULL)) {
2680 if (path != found_path)
2681 btrfs_free_path(path);
2682 return ret;
2683 }
2684
2685 eb = path->nodes[0];
2686 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
2687 ret = btrfs_next_leaf(fs_root, path);
2688 if (ret)
2689 return ret;
2690 eb = path->nodes[0];
2691 }
2692
2693 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
2694 if (found_key->type != key.type ||
2695 found_key->objectid != key.objectid)
2696 return 1;
2697
2698 return 0;
2699}
2700
2465/* 2701/*
2466 * look for key in the tree. path is filled in with nodes along the way 2702 * look for key in the tree. path is filled in with nodes along the way
2467 * if key is found, we return zero and you can find the item in the leaf 2703 * if key is found, we return zero and you can find the item in the leaf
@@ -2495,6 +2731,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
2495 lowest_level = p->lowest_level; 2731 lowest_level = p->lowest_level;
2496 WARN_ON(lowest_level && ins_len > 0); 2732 WARN_ON(lowest_level && ins_len > 0);
2497 WARN_ON(p->nodes[0] != NULL); 2733 WARN_ON(p->nodes[0] != NULL);
2734 BUG_ON(!cow && ins_len);
2498 2735
2499 if (ins_len < 0) { 2736 if (ins_len < 0) {
2500 lowest_unlock = 2; 2737 lowest_unlock = 2;
@@ -2603,8 +2840,6 @@ again:
2603 } 2840 }
2604 } 2841 }
2605cow_done: 2842cow_done:
2606 BUG_ON(!cow && ins_len);
2607
2608 p->nodes[level] = b; 2843 p->nodes[level] = b;
2609 btrfs_clear_path_blocking(p, NULL, 0); 2844 btrfs_clear_path_blocking(p, NULL, 0);
2610 2845
@@ -2614,13 +2849,19 @@ cow_done:
2614 * It is safe to drop the lock on our parent before we 2849 * It is safe to drop the lock on our parent before we
2615 * go through the expensive btree search on b. 2850 * go through the expensive btree search on b.
2616 * 2851 *
2617 * If cow is true, then we might be changing slot zero, 2852 * If we're inserting or deleting (ins_len != 0), then we might
2618 * which may require changing the parent. So, we can't 2853 * be changing slot zero, which may require changing the parent.
2619 * drop the lock until after we know which slot we're 2854 * So, we can't drop the lock until after we know which slot
2620 * operating on. 2855 * we're operating on.
2621 */ 2856 */
2622 if (!cow) 2857 if (!ins_len && !p->keep_locks) {
2623 btrfs_unlock_up_safe(p, level + 1); 2858 int u = level + 1;
2859
2860 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
2861 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
2862 p->locks[u] = 0;
2863 }
2864 }
2624 2865
2625 ret = key_search(b, key, level, &prev_cmp, &slot); 2866 ret = key_search(b, key, level, &prev_cmp, &slot);
2626 2867
@@ -2648,7 +2889,7 @@ cow_done:
2648 * which means we must have a write lock 2889 * which means we must have a write lock
2649 * on the parent 2890 * on the parent
2650 */ 2891 */
2651 if (slot == 0 && cow && 2892 if (slot == 0 && ins_len &&
2652 write_lock_level < level + 1) { 2893 write_lock_level < level + 1) {
2653 write_lock_level = level + 1; 2894 write_lock_level = level + 1;
2654 btrfs_release_path(p); 2895 btrfs_release_path(p);
@@ -2901,7 +3142,9 @@ again:
2901 if (ret < 0) 3142 if (ret < 0)
2902 return ret; 3143 return ret;
2903 if (!ret) { 3144 if (!ret) {
2904 p->slots[0] = btrfs_header_nritems(leaf) - 1; 3145 leaf = p->nodes[0];
3146 if (p->slots[0] == btrfs_header_nritems(leaf))
3147 p->slots[0]--;
2905 return 0; 3148 return 0;
2906 } 3149 }
2907 if (!return_any) 3150 if (!return_any)
@@ -3022,8 +3265,12 @@ static int push_node_left(struct btrfs_trans_handle *trans,
3022 } else 3265 } else
3023 push_items = min(src_nritems - 8, push_items); 3266 push_items = min(src_nritems - 8, push_items);
3024 3267
3025 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, 3268 ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
3026 push_items); 3269 push_items);
3270 if (ret) {
3271 btrfs_abort_transaction(trans, root, ret);
3272 return ret;
3273 }
3027 copy_extent_buffer(dst, src, 3274 copy_extent_buffer(dst, src,
3028 btrfs_node_key_ptr_offset(dst_nritems), 3275 btrfs_node_key_ptr_offset(dst_nritems),
3029 btrfs_node_key_ptr_offset(0), 3276 btrfs_node_key_ptr_offset(0),
@@ -3093,8 +3340,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
3093 (dst_nritems) * 3340 (dst_nritems) *
3094 sizeof(struct btrfs_key_ptr)); 3341 sizeof(struct btrfs_key_ptr));
3095 3342
3096 tree_mod_log_eb_copy(root->fs_info, dst, src, 0, 3343 ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
3097 src_nritems - push_items, push_items); 3344 src_nritems - push_items, push_items);
3345 if (ret) {
3346 btrfs_abort_transaction(trans, root, ret);
3347 return ret;
3348 }
3098 copy_extent_buffer(dst, src, 3349 copy_extent_buffer(dst, src,
3099 btrfs_node_key_ptr_offset(0), 3350 btrfs_node_key_ptr_offset(0),
3100 btrfs_node_key_ptr_offset(src_nritems - push_items), 3351 btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3295,7 +3546,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
3295 btrfs_header_chunk_tree_uuid(split), 3546 btrfs_header_chunk_tree_uuid(split),
3296 BTRFS_UUID_SIZE); 3547 BTRFS_UUID_SIZE);
3297 3548
3298 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); 3549 ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
3550 mid, c_nritems - mid);
3551 if (ret) {
3552 btrfs_abort_transaction(trans, root, ret);
3553 return ret;
3554 }
3299 copy_extent_buffer(split, c, 3555 copy_extent_buffer(split, c,
3300 btrfs_node_key_ptr_offset(0), 3556 btrfs_node_key_ptr_offset(0),
3301 btrfs_node_key_ptr_offset(mid), 3557 btrfs_node_key_ptr_offset(mid),
@@ -3362,8 +3618,8 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
3362 int ret; 3618 int ret;
3363 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 3619 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
3364 if (ret < 0) { 3620 if (ret < 0) {
3365 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " 3621 btrfs_crit(root->fs_info,
3366 "used %d nritems %d\n", 3622 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
3367 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), 3623 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
3368 leaf_space_used(leaf, 0, nritems), nritems); 3624 leaf_space_used(leaf, 0, nritems), nritems);
3369 } 3625 }
@@ -3571,6 +3827,19 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
3571 if (left_nritems == 0) 3827 if (left_nritems == 0)
3572 goto out_unlock; 3828 goto out_unlock;
3573 3829
3830 if (path->slots[0] == left_nritems && !empty) {
3831 /* Key greater than all keys in the leaf, right neighbor has
3832 * enough room for it and we're not emptying our leaf to delete
3833 * it, therefore use right neighbor to insert the new item and
3834 * no need to touch/dirty our left leaft. */
3835 btrfs_tree_unlock(left);
3836 free_extent_buffer(left);
3837 path->nodes[0] = right;
3838 path->slots[0] = 0;
3839 path->slots[1]++;
3840 return 0;
3841 }
3842
3574 return __push_leaf_right(trans, root, path, min_data_size, empty, 3843 return __push_leaf_right(trans, root, path, min_data_size, empty,
3575 right, free_space, left_nritems, min_slot); 3844 right, free_space, left_nritems, min_slot);
3576out_unlock: 3845out_unlock:
@@ -3887,14 +4156,17 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3887 int progress = 0; 4156 int progress = 0;
3888 int slot; 4157 int slot;
3889 u32 nritems; 4158 u32 nritems;
4159 int space_needed = data_size;
3890 4160
3891 slot = path->slots[0]; 4161 slot = path->slots[0];
4162 if (slot < btrfs_header_nritems(path->nodes[0]))
4163 space_needed -= btrfs_leaf_free_space(root, path->nodes[0]);
3892 4164
3893 /* 4165 /*
3894 * try to push all the items after our slot into the 4166 * try to push all the items after our slot into the
3895 * right leaf 4167 * right leaf
3896 */ 4168 */
3897 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); 4169 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
3898 if (ret < 0) 4170 if (ret < 0)
3899 return ret; 4171 return ret;
3900 4172
@@ -3914,7 +4186,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3914 4186
3915 /* try to push all the items before our slot into the next leaf */ 4187 /* try to push all the items before our slot into the next leaf */
3916 slot = path->slots[0]; 4188 slot = path->slots[0];
3917 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); 4189 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
3918 if (ret < 0) 4190 if (ret < 0)
3919 return ret; 4191 return ret;
3920 4192
@@ -3958,13 +4230,18 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
3958 4230
3959 /* first try to make some room by pushing left and right */ 4231 /* first try to make some room by pushing left and right */
3960 if (data_size && path->nodes[1]) { 4232 if (data_size && path->nodes[1]) {
3961 wret = push_leaf_right(trans, root, path, data_size, 4233 int space_needed = data_size;
3962 data_size, 0, 0); 4234
4235 if (slot < btrfs_header_nritems(l))
4236 space_needed -= btrfs_leaf_free_space(root, l);
4237
4238 wret = push_leaf_right(trans, root, path, space_needed,
4239 space_needed, 0, 0);
3963 if (wret < 0) 4240 if (wret < 0)
3964 return wret; 4241 return wret;
3965 if (wret) { 4242 if (wret) {
3966 wret = push_leaf_left(trans, root, path, data_size, 4243 wret = push_leaf_left(trans, root, path, space_needed,
3967 data_size, 0, (u32)-1); 4244 space_needed, 0, (u32)-1);
3968 if (wret < 0) 4245 if (wret < 0)
3969 return wret; 4246 return wret;
3970 } 4247 }
@@ -4432,7 +4709,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
4432 BUG_ON(slot < 0); 4709 BUG_ON(slot < 0);
4433 if (slot >= nritems) { 4710 if (slot >= nritems) {
4434 btrfs_print_leaf(root, leaf); 4711 btrfs_print_leaf(root, leaf);
4435 printk(KERN_CRIT "slot %d too large, nritems %d\n", 4712 btrfs_crit(root->fs_info, "slot %d too large, nritems %d",
4436 slot, nritems); 4713 slot, nritems);
4437 BUG_ON(1); 4714 BUG_ON(1);
4438 } 4715 }
@@ -4495,7 +4772,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4495 4772
4496 if (btrfs_leaf_free_space(root, leaf) < total_size) { 4773 if (btrfs_leaf_free_space(root, leaf) < total_size) {
4497 btrfs_print_leaf(root, leaf); 4774 btrfs_print_leaf(root, leaf);
4498 printk(KERN_CRIT "not enough freespace need %u have %d\n", 4775 btrfs_crit(root->fs_info, "not enough freespace need %u have %d",
4499 total_size, btrfs_leaf_free_space(root, leaf)); 4776 total_size, btrfs_leaf_free_space(root, leaf));
4500 BUG(); 4777 BUG();
4501 } 4778 }
@@ -4505,7 +4782,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
4505 4782
4506 if (old_data < data_end) { 4783 if (old_data < data_end) {
4507 btrfs_print_leaf(root, leaf); 4784 btrfs_print_leaf(root, leaf);
4508 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 4785 btrfs_crit(root->fs_info, "slot %d old_data %d data_end %d",
4509 slot, old_data, data_end); 4786 slot, old_data, data_end);
4510 BUG_ON(1); 4787 BUG_ON(1);
4511 } 4788 }
@@ -4817,7 +5094,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4817 * This may release the path, and so you may lose any locks held at the 5094 * This may release the path, and so you may lose any locks held at the
4818 * time you call it. 5095 * time you call it.
4819 */ 5096 */
4820static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 5097int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4821{ 5098{
4822 struct btrfs_key key; 5099 struct btrfs_key key;
4823 struct btrfs_disk_key found_key; 5100 struct btrfs_disk_key found_key;
@@ -5240,7 +5517,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
5240 5517
5241 if (!left_start_ctransid || !right_start_ctransid) { 5518 if (!left_start_ctransid || !right_start_ctransid) {
5242 WARN(1, KERN_WARNING 5519 WARN(1, KERN_WARNING
5243 "btrfs: btrfs_compare_tree detected " 5520 "BTRFS: btrfs_compare_tree detected "
5244 "a change in one of the trees while " 5521 "a change in one of the trees while "
5245 "iterating. This is probably a " 5522 "iterating. This is probably a "
5246 "bug.\n"); 5523 "bug.\n");
@@ -5680,3 +5957,46 @@ int btrfs_previous_item(struct btrfs_root *root,
5680 } 5957 }
5681 return 1; 5958 return 1;
5682} 5959}
5960
5961/*
5962 * search in extent tree to find a previous Metadata/Data extent item with
5963 * min objecitd.
5964 *
5965 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5966 */
5967int btrfs_previous_extent_item(struct btrfs_root *root,
5968 struct btrfs_path *path, u64 min_objectid)
5969{
5970 struct btrfs_key found_key;
5971 struct extent_buffer *leaf;
5972 u32 nritems;
5973 int ret;
5974
5975 while (1) {
5976 if (path->slots[0] == 0) {
5977 btrfs_set_path_blocking(path);
5978 ret = btrfs_prev_leaf(root, path);
5979 if (ret != 0)
5980 return ret;
5981 } else {
5982 path->slots[0]--;
5983 }
5984 leaf = path->nodes[0];
5985 nritems = btrfs_header_nritems(leaf);
5986 if (nritems == 0)
5987 return 1;
5988 if (path->slots[0] == nritems)
5989 path->slots[0]--;
5990
5991 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5992 if (found_key.objectid < min_objectid)
5993 break;
5994 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
5995 found_key.type == BTRFS_METADATA_ITEM_KEY)
5996 return 0;
5997 if (found_key.objectid == min_objectid &&
5998 found_key.type < BTRFS_EXTENT_ITEM_KEY)
5999 break;
6000 }
6001 return 1;
6002}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 54ab86127f7a..2c1a42ca519f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -521,9 +521,15 @@ struct btrfs_super_block {
521#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6) 521#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
522#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7) 522#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
523#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8) 523#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
524#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
524 525
525#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 526#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
527#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL
528#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
526#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 529#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
530#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
531#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
532
527#define BTRFS_FEATURE_INCOMPAT_SUPP \ 533#define BTRFS_FEATURE_INCOMPAT_SUPP \
528 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 534 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
529 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 535 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
@@ -532,7 +538,12 @@ struct btrfs_super_block {
532 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ 538 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
533 BTRFS_FEATURE_INCOMPAT_RAID56 | \ 539 BTRFS_FEATURE_INCOMPAT_RAID56 | \
534 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ 540 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
535 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 541 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
542 BTRFS_FEATURE_INCOMPAT_NO_HOLES)
543
544#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
545 (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
546#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL
536 547
537/* 548/*
538 * A leaf is full of items. offset and size tell us where to find 549 * A leaf is full of items. offset and size tell us where to find
@@ -1094,7 +1105,7 @@ struct btrfs_qgroup_limit_item {
1094} __attribute__ ((__packed__)); 1105} __attribute__ ((__packed__));
1095 1106
1096struct btrfs_space_info { 1107struct btrfs_space_info {
1097 u64 flags; 1108 spinlock_t lock;
1098 1109
1099 u64 total_bytes; /* total bytes in the space, 1110 u64 total_bytes; /* total bytes in the space,
1100 this doesn't take mirrors into account */ 1111 this doesn't take mirrors into account */
@@ -1104,14 +1115,25 @@ struct btrfs_space_info {
1104 transaction finishes */ 1115 transaction finishes */
1105 u64 bytes_reserved; /* total bytes the allocator has reserved for 1116 u64 bytes_reserved; /* total bytes the allocator has reserved for
1106 current allocations */ 1117 current allocations */
1107 u64 bytes_readonly; /* total bytes that are read only */
1108
1109 u64 bytes_may_use; /* number of bytes that may be used for 1118 u64 bytes_may_use; /* number of bytes that may be used for
1110 delalloc/allocations */ 1119 delalloc/allocations */
1120 u64 bytes_readonly; /* total bytes that are read only */
1121
1122 unsigned int full:1; /* indicates that we cannot allocate any more
1123 chunks for this space */
1124 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
1125
1126 unsigned int flush:1; /* set if we are trying to make space */
1127
1128 unsigned int force_alloc; /* set if we need to force a chunk
1129 alloc for this space */
1130
1111 u64 disk_used; /* total bytes used on disk */ 1131 u64 disk_used; /* total bytes used on disk */
1112 u64 disk_total; /* total bytes on disk, takes mirrors into 1132 u64 disk_total; /* total bytes on disk, takes mirrors into
1113 account */ 1133 account */
1114 1134
1135 u64 flags;
1136
1115 /* 1137 /*
1116 * bytes_pinned is kept in line with what is actually pinned, as in 1138 * bytes_pinned is kept in line with what is actually pinned, as in
1117 * we've called update_block_group and dropped the bytes_used counter 1139 * we've called update_block_group and dropped the bytes_used counter
@@ -1124,22 +1146,15 @@ struct btrfs_space_info {
1124 */ 1146 */
1125 struct percpu_counter total_bytes_pinned; 1147 struct percpu_counter total_bytes_pinned;
1126 1148
1127 unsigned int full:1; /* indicates that we cannot allocate any more
1128 chunks for this space */
1129 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
1130
1131 unsigned int flush:1; /* set if we are trying to make space */
1132
1133 unsigned int force_alloc; /* set if we need to force a chunk
1134 alloc for this space */
1135
1136 struct list_head list; 1149 struct list_head list;
1137 1150
1151 struct rw_semaphore groups_sem;
1138 /* for block groups in our same type */ 1152 /* for block groups in our same type */
1139 struct list_head block_groups[BTRFS_NR_RAID_TYPES]; 1153 struct list_head block_groups[BTRFS_NR_RAID_TYPES];
1140 spinlock_t lock;
1141 struct rw_semaphore groups_sem;
1142 wait_queue_head_t wait; 1154 wait_queue_head_t wait;
1155
1156 struct kobject kobj;
1157 struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
1143}; 1158};
1144 1159
1145#define BTRFS_BLOCK_RSV_GLOBAL 1 1160#define BTRFS_BLOCK_RSV_GLOBAL 1
@@ -1346,6 +1361,7 @@ struct btrfs_fs_info {
1346 1361
1347 u64 generation; 1362 u64 generation;
1348 u64 last_trans_committed; 1363 u64 last_trans_committed;
1364 u64 avg_delayed_ref_runtime;
1349 1365
1350 /* 1366 /*
1351 * this is updated to the current trans every time a full commit 1367 * this is updated to the current trans every time a full commit
@@ -1448,7 +1464,6 @@ struct btrfs_fs_info {
1448 spinlock_t tree_mod_seq_lock; 1464 spinlock_t tree_mod_seq_lock;
1449 atomic64_t tree_mod_seq; 1465 atomic64_t tree_mod_seq;
1450 struct list_head tree_mod_seq_list; 1466 struct list_head tree_mod_seq_list;
1451 struct seq_list tree_mod_seq_elem;
1452 1467
1453 /* this protects tree_mod_log */ 1468 /* this protects tree_mod_log */
1454 rwlock_t tree_mod_log_lock; 1469 rwlock_t tree_mod_log_lock;
@@ -1515,6 +1530,8 @@ struct btrfs_fs_info {
1515 int thread_pool_size; 1530 int thread_pool_size;
1516 1531
1517 struct kobject super_kobj; 1532 struct kobject super_kobj;
1533 struct kobject *space_info_kobj;
1534 struct kobject *device_dir_kobj;
1518 struct completion kobj_unregister; 1535 struct completion kobj_unregister;
1519 int do_barriers; 1536 int do_barriers;
1520 int closing; 1537 int closing;
@@ -1643,6 +1660,10 @@ struct btrfs_fs_info {
1643 spinlock_t reada_lock; 1660 spinlock_t reada_lock;
1644 struct radix_tree_root reada_tree; 1661 struct radix_tree_root reada_tree;
1645 1662
1663 /* Extent buffer radix tree */
1664 spinlock_t buffer_lock;
1665 struct radix_tree_root buffer_radix;
1666
1646 /* next backup root to be overwritten */ 1667 /* next backup root to be overwritten */
1647 int backup_root_index; 1668 int backup_root_index;
1648 1669
@@ -1795,6 +1816,12 @@ struct btrfs_root {
1795 struct list_head ordered_extents; 1816 struct list_head ordered_extents;
1796 struct list_head ordered_root; 1817 struct list_head ordered_root;
1797 u64 nr_ordered_extents; 1818 u64 nr_ordered_extents;
1819
1820 /*
1821 * Number of currently running SEND ioctls to prevent
1822 * manipulation with the read-only status via SUBVOL_SETFLAGS
1823 */
1824 int send_in_progress;
1798}; 1825};
1799 1826
1800struct btrfs_ioctl_defrag_range_args { 1827struct btrfs_ioctl_defrag_range_args {
@@ -1997,6 +2024,7 @@ struct btrfs_ioctl_defrag_range_args {
1997#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) 2024#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
1998#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) 2025#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
1999#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) 2026#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
2027#define BTRFS_MOUNT_CHANGE_INODE_CACHE (1 << 24)
2000 2028
2001#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) 2029#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
2002 2030
@@ -2925,6 +2953,10 @@ BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
2925 struct btrfs_file_extent_item, generation, 64); 2953 struct btrfs_file_extent_item, generation, 64);
2926BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, 2954BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
2927 struct btrfs_file_extent_item, num_bytes, 64); 2955 struct btrfs_file_extent_item, num_bytes, 64);
2956BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes,
2957 struct btrfs_file_extent_item, disk_num_bytes, 64);
2958BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression,
2959 struct btrfs_file_extent_item, compression, 8);
2928 2960
2929static inline unsigned long 2961static inline unsigned long
2930btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) 2962btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
@@ -2958,15 +2990,6 @@ BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
2958BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, 2990BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
2959 other_encoding, 16); 2991 other_encoding, 16);
2960 2992
2961/* this returns the number of file bytes represented by the inline item.
2962 * If an item is compressed, this is the uncompressed size
2963 */
2964static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
2965 struct btrfs_file_extent_item *e)
2966{
2967 return btrfs_file_extent_ram_bytes(eb, e);
2968}
2969
2970/* 2993/*
2971 * this returns the number of bytes used by the item on disk, minus the 2994 * this returns the number of bytes used by the item on disk, minus the
2972 * size of any extent headers. If a file is compressed on disk, this is 2995 * size of any extent headers. If a file is compressed on disk, this is
@@ -2980,6 +3003,32 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
2980 return btrfs_item_size(eb, e) - offset; 3003 return btrfs_item_size(eb, e) - offset;
2981} 3004}
2982 3005
3006/* this returns the number of file bytes represented by the inline item.
3007 * If an item is compressed, this is the uncompressed size
3008 */
3009static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb,
3010 int slot,
3011 struct btrfs_file_extent_item *fi)
3012{
3013 struct btrfs_map_token token;
3014
3015 btrfs_init_map_token(&token);
3016 /*
3017 * return the space used on disk if this item isn't
3018 * compressed or encoded
3019 */
3020 if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
3021 btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
3022 btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
3023 return btrfs_file_extent_inline_item_len(eb,
3024 btrfs_item_nr(slot));
3025 }
3026
3027 /* otherwise use the ram bytes field */
3028 return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
3029}
3030
3031
2983/* btrfs_dev_stats_item */ 3032/* btrfs_dev_stats_item */
2984static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb, 3033static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
2985 struct btrfs_dev_stats_item *ptr, 3034 struct btrfs_dev_stats_item *ptr,
@@ -3143,6 +3192,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
3143 3192
3144int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, 3193int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
3145 struct btrfs_root *root); 3194 struct btrfs_root *root);
3195int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
3196 struct btrfs_root *root);
3146void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 3197void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
3147int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 3198int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
3148 struct btrfs_root *root, unsigned long count); 3199 struct btrfs_root *root, unsigned long count);
@@ -3163,6 +3214,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
3163 struct btrfs_fs_info *info, 3214 struct btrfs_fs_info *info,
3164 u64 bytenr); 3215 u64 bytenr);
3165void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 3216void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
3217int get_block_group_index(struct btrfs_block_group_cache *cache);
3166struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 3218struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3167 struct btrfs_root *root, u32 blocksize, 3219 struct btrfs_root *root, u32 blocksize,
3168 u64 parent, u64 root_objectid, 3220 u64 parent, u64 root_objectid,
@@ -3301,6 +3353,8 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
3301int btrfs_previous_item(struct btrfs_root *root, 3353int btrfs_previous_item(struct btrfs_root *root,
3302 struct btrfs_path *path, u64 min_objectid, 3354 struct btrfs_path *path, u64 min_objectid,
3303 int type); 3355 int type);
3356int btrfs_previous_extent_item(struct btrfs_root *root,
3357 struct btrfs_path *path, u64 min_objectid);
3304void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path, 3358void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
3305 struct btrfs_key *new_key); 3359 struct btrfs_key *new_key);
3306struct extent_buffer *btrfs_root_node(struct btrfs_root *root); 3360struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
@@ -3350,6 +3404,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3350 struct btrfs_root *root, 3404 struct btrfs_root *root,
3351 struct btrfs_path *path, 3405 struct btrfs_path *path,
3352 struct btrfs_key *new_key); 3406 struct btrfs_key *new_key);
3407int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
3408 u64 inum, u64 ioff, u8 key_type, struct btrfs_key *found_key);
3353int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 3409int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
3354 *root, struct btrfs_key *key, struct btrfs_path *p, int 3410 *root, struct btrfs_key *key, struct btrfs_path *p, int
3355 ins_len, int cow); 3411 ins_len, int cow);
@@ -3399,6 +3455,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
3399} 3455}
3400 3456
3401int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 3457int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
3458int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
3402int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, 3459int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
3403 u64 time_seq); 3460 u64 time_seq);
3404static inline int btrfs_next_old_item(struct btrfs_root *root, 3461static inline int btrfs_next_old_item(struct btrfs_root *root,
@@ -3563,12 +3620,6 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
3563 struct btrfs_root *root, 3620 struct btrfs_root *root,
3564 const char *name, int name_len, 3621 const char *name, int name_len,
3565 u64 inode_objectid, u64 ref_objectid, u64 *index); 3622 u64 inode_objectid, u64 ref_objectid, u64 *index);
3566int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
3567 struct btrfs_root *root,
3568 struct btrfs_path *path,
3569 const char *name, int name_len,
3570 u64 inode_objectid, u64 ref_objectid, int mod,
3571 u64 *ret_index);
3572int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, 3623int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
3573 struct btrfs_root *root, 3624 struct btrfs_root *root,
3574 struct btrfs_path *path, u64 objectid); 3625 struct btrfs_path *path, u64 objectid);
@@ -3676,7 +3727,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput);
3676int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 3727int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
3677 struct extent_state **cached_state); 3728 struct extent_state **cached_state);
3678int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 3729int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
3679 struct btrfs_root *new_root, u64 new_dirid); 3730 struct btrfs_root *new_root,
3731 struct btrfs_root *parent_root,
3732 u64 new_dirid);
3680int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset, 3733int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
3681 size_t size, struct bio *bio, 3734 size_t size, struct bio *bio,
3682 unsigned long bio_flags); 3735 unsigned long bio_flags);
@@ -3745,7 +3798,10 @@ extern const struct file_operations btrfs_file_operations;
3745int __btrfs_drop_extents(struct btrfs_trans_handle *trans, 3798int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
3746 struct btrfs_root *root, struct inode *inode, 3799 struct btrfs_root *root, struct inode *inode,
3747 struct btrfs_path *path, u64 start, u64 end, 3800 struct btrfs_path *path, u64 start, u64 end,
3748 u64 *drop_end, int drop_cache); 3801 u64 *drop_end, int drop_cache,
3802 int replace_extent,
3803 u32 extent_item_size,
3804 int *key_inserted);
3749int btrfs_drop_extents(struct btrfs_trans_handle *trans, 3805int btrfs_drop_extents(struct btrfs_trans_handle *trans,
3750 struct btrfs_root *root, struct inode *inode, u64 start, 3806 struct btrfs_root *root, struct inode *inode, u64 start,
3751 u64 end, int drop_cache); 3807 u64 end, int drop_cache);
@@ -3764,6 +3820,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
3764/* sysfs.c */ 3820/* sysfs.c */
3765int btrfs_init_sysfs(void); 3821int btrfs_init_sysfs(void);
3766void btrfs_exit_sysfs(void); 3822void btrfs_exit_sysfs(void);
3823int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info);
3824void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info);
3767 3825
3768/* xattr.c */ 3826/* xattr.c */
3769ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); 3827ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
@@ -3796,14 +3854,20 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
3796 btrfs_printk(fs_info, KERN_NOTICE fmt, ##args) 3854 btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
3797#define btrfs_info(fs_info, fmt, args...) \ 3855#define btrfs_info(fs_info, fmt, args...) \
3798 btrfs_printk(fs_info, KERN_INFO fmt, ##args) 3856 btrfs_printk(fs_info, KERN_INFO fmt, ##args)
3857
3858#ifdef DEBUG
3799#define btrfs_debug(fs_info, fmt, args...) \ 3859#define btrfs_debug(fs_info, fmt, args...) \
3800 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args) 3860 btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
3861#else
3862#define btrfs_debug(fs_info, fmt, args...) \
3863 no_printk(KERN_DEBUG fmt, ##args)
3864#endif
3801 3865
3802#ifdef CONFIG_BTRFS_ASSERT 3866#ifdef CONFIG_BTRFS_ASSERT
3803 3867
3804static inline void assfail(char *expr, char *file, int line) 3868static inline void assfail(char *expr, char *file, int line)
3805{ 3869{
3806 printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d", 3870 pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
3807 expr, file, line); 3871 expr, file, line);
3808 BUG(); 3872 BUG();
3809} 3873}
@@ -3841,7 +3905,7 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
3841 if (!(features & flag)) { 3905 if (!(features & flag)) {
3842 features |= flag; 3906 features |= flag;
3843 btrfs_set_super_incompat_flags(disk_super, features); 3907 btrfs_set_super_incompat_flags(disk_super, features);
3844 printk(KERN_INFO "btrfs: setting %llu feature flag\n", 3908 btrfs_info(fs_info, "setting %llu feature flag",
3845 flag); 3909 flag);
3846 } 3910 }
3847 spin_unlock(&fs_info->super_lock); 3911 spin_unlock(&fs_info->super_lock);
@@ -3899,20 +3963,17 @@ do { \
3899/* acl.c */ 3963/* acl.c */
3900#ifdef CONFIG_BTRFS_FS_POSIX_ACL 3964#ifdef CONFIG_BTRFS_FS_POSIX_ACL
3901struct posix_acl *btrfs_get_acl(struct inode *inode, int type); 3965struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
3966int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
3902int btrfs_init_acl(struct btrfs_trans_handle *trans, 3967int btrfs_init_acl(struct btrfs_trans_handle *trans,
3903 struct inode *inode, struct inode *dir); 3968 struct inode *inode, struct inode *dir);
3904int btrfs_acl_chmod(struct inode *inode);
3905#else 3969#else
3906#define btrfs_get_acl NULL 3970#define btrfs_get_acl NULL
3971#define btrfs_set_acl NULL
3907static inline int btrfs_init_acl(struct btrfs_trans_handle *trans, 3972static inline int btrfs_init_acl(struct btrfs_trans_handle *trans,
3908 struct inode *inode, struct inode *dir) 3973 struct inode *inode, struct inode *dir)
3909{ 3974{
3910 return 0; 3975 return 0;
3911} 3976}
3912static inline int btrfs_acl_chmod(struct inode *inode)
3913{
3914 return 0;
3915}
3916#endif 3977#endif
3917 3978
3918/* relocation.c */ 3979/* relocation.c */
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 8d292fbae659..451b00c86f6c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -55,8 +55,7 @@ static inline void btrfs_init_delayed_node(
55 delayed_node->inode_id = inode_id; 55 delayed_node->inode_id = inode_id;
56 atomic_set(&delayed_node->refs, 0); 56 atomic_set(&delayed_node->refs, 0);
57 delayed_node->count = 0; 57 delayed_node->count = 0;
58 delayed_node->in_list = 0; 58 delayed_node->flags = 0;
59 delayed_node->inode_dirty = 0;
60 delayed_node->ins_root = RB_ROOT; 59 delayed_node->ins_root = RB_ROOT;
61 delayed_node->del_root = RB_ROOT; 60 delayed_node->del_root = RB_ROOT;
62 mutex_init(&delayed_node->mutex); 61 mutex_init(&delayed_node->mutex);
@@ -172,7 +171,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
172 int mod) 171 int mod)
173{ 172{
174 spin_lock(&root->lock); 173 spin_lock(&root->lock);
175 if (node->in_list) { 174 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
176 if (!list_empty(&node->p_list)) 175 if (!list_empty(&node->p_list))
177 list_move_tail(&node->p_list, &root->prepare_list); 176 list_move_tail(&node->p_list, &root->prepare_list);
178 else if (mod) 177 else if (mod)
@@ -182,7 +181,7 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
182 list_add_tail(&node->p_list, &root->prepare_list); 181 list_add_tail(&node->p_list, &root->prepare_list);
183 atomic_inc(&node->refs); /* inserted into list */ 182 atomic_inc(&node->refs); /* inserted into list */
184 root->nodes++; 183 root->nodes++;
185 node->in_list = 1; 184 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
186 } 185 }
187 spin_unlock(&root->lock); 186 spin_unlock(&root->lock);
188} 187}
@@ -192,13 +191,13 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
192 struct btrfs_delayed_node *node) 191 struct btrfs_delayed_node *node)
193{ 192{
194 spin_lock(&root->lock); 193 spin_lock(&root->lock);
195 if (node->in_list) { 194 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
196 root->nodes--; 195 root->nodes--;
197 atomic_dec(&node->refs); /* not in the list */ 196 atomic_dec(&node->refs); /* not in the list */
198 list_del_init(&node->n_list); 197 list_del_init(&node->n_list);
199 if (!list_empty(&node->p_list)) 198 if (!list_empty(&node->p_list))
200 list_del_init(&node->p_list); 199 list_del_init(&node->p_list);
201 node->in_list = 0; 200 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
202 } 201 }
203 spin_unlock(&root->lock); 202 spin_unlock(&root->lock);
204} 203}
@@ -231,7 +230,8 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
231 230
232 delayed_root = node->root->fs_info->delayed_root; 231 delayed_root = node->root->fs_info->delayed_root;
233 spin_lock(&delayed_root->lock); 232 spin_lock(&delayed_root->lock);
234 if (!node->in_list) { /* not in the list */ 233 if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
234 /* not in the list */
235 if (list_empty(&delayed_root->node_list)) 235 if (list_empty(&delayed_root->node_list))
236 goto out; 236 goto out;
237 p = delayed_root->node_list.next; 237 p = delayed_root->node_list.next;
@@ -1004,9 +1004,10 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1004{ 1004{
1005 struct btrfs_delayed_root *delayed_root; 1005 struct btrfs_delayed_root *delayed_root;
1006 1006
1007 if (delayed_node && delayed_node->inode_dirty) { 1007 if (delayed_node &&
1008 test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1008 BUG_ON(!delayed_node->root); 1009 BUG_ON(!delayed_node->root);
1009 delayed_node->inode_dirty = 0; 1010 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1010 delayed_node->count--; 1011 delayed_node->count--;
1011 1012
1012 delayed_root = delayed_node->root->fs_info->delayed_root; 1013 delayed_root = delayed_node->root->fs_info->delayed_root;
@@ -1014,6 +1015,18 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1014 } 1015 }
1015} 1016}
1016 1017
1018static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
1019{
1020 struct btrfs_delayed_root *delayed_root;
1021
1022 ASSERT(delayed_node->root);
1023 clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1024 delayed_node->count--;
1025
1026 delayed_root = delayed_node->root->fs_info->delayed_root;
1027 finish_one_item(delayed_root);
1028}
1029
1017static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, 1030static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1018 struct btrfs_root *root, 1031 struct btrfs_root *root,
1019 struct btrfs_path *path, 1032 struct btrfs_path *path,
@@ -1022,13 +1035,19 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1022 struct btrfs_key key; 1035 struct btrfs_key key;
1023 struct btrfs_inode_item *inode_item; 1036 struct btrfs_inode_item *inode_item;
1024 struct extent_buffer *leaf; 1037 struct extent_buffer *leaf;
1038 int mod;
1025 int ret; 1039 int ret;
1026 1040
1027 key.objectid = node->inode_id; 1041 key.objectid = node->inode_id;
1028 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 1042 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1029 key.offset = 0; 1043 key.offset = 0;
1030 1044
1031 ret = btrfs_lookup_inode(trans, root, path, &key, 1); 1045 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1046 mod = -1;
1047 else
1048 mod = 1;
1049
1050 ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1032 if (ret > 0) { 1051 if (ret > 0) {
1033 btrfs_release_path(path); 1052 btrfs_release_path(path);
1034 return -ENOENT; 1053 return -ENOENT;
@@ -1036,19 +1055,58 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1036 return ret; 1055 return ret;
1037 } 1056 }
1038 1057
1039 btrfs_unlock_up_safe(path, 1);
1040 leaf = path->nodes[0]; 1058 leaf = path->nodes[0];
1041 inode_item = btrfs_item_ptr(leaf, path->slots[0], 1059 inode_item = btrfs_item_ptr(leaf, path->slots[0],
1042 struct btrfs_inode_item); 1060 struct btrfs_inode_item);
1043 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item, 1061 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1044 sizeof(struct btrfs_inode_item)); 1062 sizeof(struct btrfs_inode_item));
1045 btrfs_mark_buffer_dirty(leaf); 1063 btrfs_mark_buffer_dirty(leaf);
1046 btrfs_release_path(path);
1047 1064
1065 if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1066 goto no_iref;
1067
1068 path->slots[0]++;
1069 if (path->slots[0] >= btrfs_header_nritems(leaf))
1070 goto search;
1071again:
1072 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1073 if (key.objectid != node->inode_id)
1074 goto out;
1075
1076 if (key.type != BTRFS_INODE_REF_KEY &&
1077 key.type != BTRFS_INODE_EXTREF_KEY)
1078 goto out;
1079
1080 /*
1081 * Delayed iref deletion is for the inode who has only one link,
1082 * so there is only one iref. The case that several irefs are
1083 * in the same item doesn't exist.
1084 */
1085 btrfs_del_item(trans, root, path);
1086out:
1087 btrfs_release_delayed_iref(node);
1088no_iref:
1089 btrfs_release_path(path);
1090err_out:
1048 btrfs_delayed_inode_release_metadata(root, node); 1091 btrfs_delayed_inode_release_metadata(root, node);
1049 btrfs_release_delayed_inode(node); 1092 btrfs_release_delayed_inode(node);
1050 1093
1051 return 0; 1094 return ret;
1095
1096search:
1097 btrfs_release_path(path);
1098
1099 btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY);
1100 key.offset = -1;
1101 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1102 if (ret < 0)
1103 goto err_out;
1104 ASSERT(ret);
1105
1106 ret = 0;
1107 leaf = path->nodes[0];
1108 path->slots[0]--;
1109 goto again;
1052} 1110}
1053 1111
1054static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, 1112static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
@@ -1059,7 +1117,7 @@ static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1059 int ret; 1117 int ret;
1060 1118
1061 mutex_lock(&node->mutex); 1119 mutex_lock(&node->mutex);
1062 if (!node->inode_dirty) { 1120 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1063 mutex_unlock(&node->mutex); 1121 mutex_unlock(&node->mutex);
1064 return 0; 1122 return 0;
1065 } 1123 }
@@ -1203,7 +1261,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
1203 return 0; 1261 return 0;
1204 1262
1205 mutex_lock(&delayed_node->mutex); 1263 mutex_lock(&delayed_node->mutex);
1206 if (!delayed_node->inode_dirty) { 1264 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1207 mutex_unlock(&delayed_node->mutex); 1265 mutex_unlock(&delayed_node->mutex);
1208 btrfs_release_delayed_node(delayed_node); 1266 btrfs_release_delayed_node(delayed_node);
1209 return 0; 1267 return 0;
@@ -1227,7 +1285,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
1227 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv; 1285 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1228 1286
1229 mutex_lock(&delayed_node->mutex); 1287 mutex_lock(&delayed_node->mutex);
1230 if (delayed_node->inode_dirty) 1288 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1231 ret = __btrfs_update_delayed_inode(trans, delayed_node->root, 1289 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1232 path, delayed_node); 1290 path, delayed_node);
1233 else 1291 else
@@ -1300,36 +1358,9 @@ again:
1300 trans->block_rsv = &root->fs_info->delayed_block_rsv; 1358 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1301 1359
1302 __btrfs_commit_inode_delayed_items(trans, path, delayed_node); 1360 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1303 /*
1304 * Maybe new delayed items have been inserted, so we need requeue
1305 * the work. Besides that, we must dequeue the empty delayed nodes
1306 * to avoid the race between delayed items balance and the worker.
1307 * The race like this:
1308 * Task1 Worker thread
1309 * count == 0, needn't requeue
1310 * also needn't insert the
1311 * delayed node into prepare
1312 * list again.
1313 * add lots of delayed items
1314 * queue the delayed node
1315 * already in the list,
1316 * and not in the prepare
1317 * list, it means the delayed
1318 * node is being dealt with
1319 * by the worker.
1320 * do delayed items balance
1321 * the delayed node is being
1322 * dealt with by the worker
1323 * now, just wait.
1324 * the worker goto idle.
1325 * Task1 will sleep until the transaction is commited.
1326 */
1327 mutex_lock(&delayed_node->mutex);
1328 btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
1329 mutex_unlock(&delayed_node->mutex);
1330 1361
1331 trans->block_rsv = block_rsv; 1362 trans->block_rsv = block_rsv;
1332 btrfs_end_transaction_dmeta(trans, root); 1363 btrfs_end_transaction(trans, root);
1333 btrfs_btree_balance_dirty_nodelay(root); 1364 btrfs_btree_balance_dirty_nodelay(root);
1334 1365
1335release_path: 1366release_path:
@@ -1376,52 +1407,41 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1376 WARN_ON(btrfs_first_delayed_node(delayed_root)); 1407 WARN_ON(btrfs_first_delayed_node(delayed_root));
1377} 1408}
1378 1409
1379static int refs_newer(struct btrfs_delayed_root *delayed_root, 1410static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1380 int seq, int count)
1381{ 1411{
1382 int val = atomic_read(&delayed_root->items_seq); 1412 int val = atomic_read(&delayed_root->items_seq);
1383 1413
1384 if (val < seq || val >= seq + count) 1414 if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1415 return 1;
1416
1417 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1385 return 1; 1418 return 1;
1419
1386 return 0; 1420 return 0;
1387} 1421}
1388 1422
1389void btrfs_balance_delayed_items(struct btrfs_root *root) 1423void btrfs_balance_delayed_items(struct btrfs_root *root)
1390{ 1424{
1391 struct btrfs_delayed_root *delayed_root; 1425 struct btrfs_delayed_root *delayed_root;
1392 int seq;
1393 1426
1394 delayed_root = btrfs_get_delayed_root(root); 1427 delayed_root = btrfs_get_delayed_root(root);
1395 1428
1396 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) 1429 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1397 return; 1430 return;
1398 1431
1399 seq = atomic_read(&delayed_root->items_seq);
1400
1401 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { 1432 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1433 int seq;
1402 int ret; 1434 int ret;
1403 DEFINE_WAIT(__wait); 1435
1436 seq = atomic_read(&delayed_root->items_seq);
1404 1437
1405 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0); 1438 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
1406 if (ret) 1439 if (ret)
1407 return; 1440 return;
1408 1441
1409 while (1) { 1442 wait_event_interruptible(delayed_root->wait,
1410 prepare_to_wait(&delayed_root->wait, &__wait, 1443 could_end_wait(delayed_root, seq));
1411 TASK_INTERRUPTIBLE); 1444 return;
1412
1413 if (refs_newer(delayed_root, seq,
1414 BTRFS_DELAYED_BATCH) ||
1415 atomic_read(&delayed_root->items) <
1416 BTRFS_DELAYED_BACKGROUND) {
1417 break;
1418 }
1419 if (!signal_pending(current))
1420 schedule();
1421 else
1422 break;
1423 }
1424 finish_wait(&delayed_root->wait, &__wait);
1425 } 1445 }
1426 1446
1427 btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH); 1447 btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
@@ -1472,9 +1492,9 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1472 mutex_lock(&delayed_node->mutex); 1492 mutex_lock(&delayed_node->mutex);
1473 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); 1493 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1474 if (unlikely(ret)) { 1494 if (unlikely(ret)) {
1475 printk(KERN_ERR "err add delayed dir index item(name: %.*s) " 1495 btrfs_err(root->fs_info, "err add delayed dir index item(name: %.*s) "
1476 "into the insertion tree of the delayed node" 1496 "into the insertion tree of the delayed node"
1477 "(root id: %llu, inode id: %llu, errno: %d)\n", 1497 "(root id: %llu, inode id: %llu, errno: %d)",
1478 name_len, name, delayed_node->root->objectid, 1498 name_len, name, delayed_node->root->objectid,
1479 delayed_node->inode_id, ret); 1499 delayed_node->inode_id, ret);
1480 BUG(); 1500 BUG();
@@ -1544,9 +1564,9 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1544 mutex_lock(&node->mutex); 1564 mutex_lock(&node->mutex);
1545 ret = __btrfs_add_delayed_deletion_item(node, item); 1565 ret = __btrfs_add_delayed_deletion_item(node, item);
1546 if (unlikely(ret)) { 1566 if (unlikely(ret)) {
1547 printk(KERN_ERR "err add delayed dir index item(index: %llu) " 1567 btrfs_err(root->fs_info, "err add delayed dir index item(index: %llu) "
1548 "into the deletion tree of the delayed node" 1568 "into the deletion tree of the delayed node"
1549 "(root id: %llu, inode id: %llu, errno: %d)\n", 1569 "(root id: %llu, inode id: %llu, errno: %d)",
1550 index, node->root->objectid, node->inode_id, 1570 index, node->root->objectid, node->inode_id,
1551 ret); 1571 ret);
1552 BUG(); 1572 BUG();
@@ -1759,7 +1779,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1759 return -ENOENT; 1779 return -ENOENT;
1760 1780
1761 mutex_lock(&delayed_node->mutex); 1781 mutex_lock(&delayed_node->mutex);
1762 if (!delayed_node->inode_dirty) { 1782 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1763 mutex_unlock(&delayed_node->mutex); 1783 mutex_unlock(&delayed_node->mutex);
1764 btrfs_release_delayed_node(delayed_node); 1784 btrfs_release_delayed_node(delayed_node);
1765 return -ENOENT; 1785 return -ENOENT;
@@ -1810,7 +1830,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1810 return PTR_ERR(delayed_node); 1830 return PTR_ERR(delayed_node);
1811 1831
1812 mutex_lock(&delayed_node->mutex); 1832 mutex_lock(&delayed_node->mutex);
1813 if (delayed_node->inode_dirty) { 1833 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1814 fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1834 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1815 goto release_node; 1835 goto release_node;
1816 } 1836 }
@@ -1821,7 +1841,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1821 goto release_node; 1841 goto release_node;
1822 1842
1823 fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1843 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1824 delayed_node->inode_dirty = 1; 1844 set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1825 delayed_node->count++; 1845 delayed_node->count++;
1826 atomic_inc(&root->fs_info->delayed_root->items); 1846 atomic_inc(&root->fs_info->delayed_root->items);
1827release_node: 1847release_node:
@@ -1830,6 +1850,41 @@ release_node:
1830 return ret; 1850 return ret;
1831} 1851}
1832 1852
1853int btrfs_delayed_delete_inode_ref(struct inode *inode)
1854{
1855 struct btrfs_delayed_node *delayed_node;
1856
1857 delayed_node = btrfs_get_or_create_delayed_node(inode);
1858 if (IS_ERR(delayed_node))
1859 return PTR_ERR(delayed_node);
1860
1861 /*
1862 * We don't reserve space for inode ref deletion is because:
1863 * - We ONLY do async inode ref deletion for the inode who has only
1864 * one link(i_nlink == 1), it means there is only one inode ref.
1865 * And in most case, the inode ref and the inode item are in the
1866 * same leaf, and we will deal with them at the same time.
1867 * Since we are sure we will reserve the space for the inode item,
1868 * it is unnecessary to reserve space for inode ref deletion.
1869 * - If the inode ref and the inode item are not in the same leaf,
1870 * We also needn't worry about enospc problem, because we reserve
1871 * much more space for the inode update than it needs.
1872 * - At the worst, we can steal some space from the global reservation.
1873 * It is very rare.
1874 */
1875 mutex_lock(&delayed_node->mutex);
1876 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1877 goto release_node;
1878
1879 set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1880 delayed_node->count++;
1881 atomic_inc(&BTRFS_I(inode)->root->fs_info->delayed_root->items);
1882release_node:
1883 mutex_unlock(&delayed_node->mutex);
1884 btrfs_release_delayed_node(delayed_node);
1885 return 0;
1886}
1887
1833static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node) 1888static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1834{ 1889{
1835 struct btrfs_root *root = delayed_node->root; 1890 struct btrfs_root *root = delayed_node->root;
@@ -1852,7 +1907,10 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1852 btrfs_release_delayed_item(prev_item); 1907 btrfs_release_delayed_item(prev_item);
1853 } 1908 }
1854 1909
1855 if (delayed_node->inode_dirty) { 1910 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1911 btrfs_release_delayed_iref(delayed_node);
1912
1913 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1856 btrfs_delayed_inode_release_metadata(root, delayed_node); 1914 btrfs_delayed_inode_release_metadata(root, delayed_node);
1857 btrfs_release_delayed_inode(delayed_node); 1915 btrfs_release_delayed_inode(delayed_node);
1858 } 1916 }
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index a4b38f934d14..f70119f25421 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -48,6 +48,10 @@ struct btrfs_delayed_root {
48 wait_queue_head_t wait; 48 wait_queue_head_t wait;
49}; 49};
50 50
51#define BTRFS_DELAYED_NODE_IN_LIST 0
52#define BTRFS_DELAYED_NODE_INODE_DIRTY 1
53#define BTRFS_DELAYED_NODE_DEL_IREF 2
54
51struct btrfs_delayed_node { 55struct btrfs_delayed_node {
52 u64 inode_id; 56 u64 inode_id;
53 u64 bytes_reserved; 57 u64 bytes_reserved;
@@ -65,8 +69,7 @@ struct btrfs_delayed_node {
65 struct btrfs_inode_item inode_item; 69 struct btrfs_inode_item inode_item;
66 atomic_t refs; 70 atomic_t refs;
67 u64 index_cnt; 71 u64 index_cnt;
68 bool in_list; 72 unsigned long flags;
69 bool inode_dirty;
70 int count; 73 int count;
71}; 74};
72 75
@@ -125,6 +128,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode);
125int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, 128int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
126 struct btrfs_root *root, struct inode *inode); 129 struct btrfs_root *root, struct inode *inode);
127int btrfs_fill_inode(struct inode *inode, u32 *rdev); 130int btrfs_fill_inode(struct inode *inode, u32 *rdev);
131int btrfs_delayed_delete_inode_ref(struct inode *inode);
128 132
129/* Used for drop dead root */ 133/* Used for drop dead root */
130void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); 134void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e4d467be2dd4..f3bff89eecf0 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -161,35 +161,61 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
161 return NULL; 161 return NULL;
162} 162}
163 163
164/* insert a new ref to head ref rbtree */
165static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
166 struct rb_node *node)
167{
168 struct rb_node **p = &root->rb_node;
169 struct rb_node *parent_node = NULL;
170 struct btrfs_delayed_ref_head *entry;
171 struct btrfs_delayed_ref_head *ins;
172 u64 bytenr;
173
174 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
175 bytenr = ins->node.bytenr;
176 while (*p) {
177 parent_node = *p;
178 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
179 href_node);
180
181 if (bytenr < entry->node.bytenr)
182 p = &(*p)->rb_left;
183 else if (bytenr > entry->node.bytenr)
184 p = &(*p)->rb_right;
185 else
186 return entry;
187 }
188
189 rb_link_node(node, parent_node, p);
190 rb_insert_color(node, root);
191 return NULL;
192}
193
164/* 194/*
165 * find an head entry based on bytenr. This returns the delayed ref 195 * find an head entry based on bytenr. This returns the delayed ref
166 * head if it was able to find one, or NULL if nothing was in that spot. 196 * head if it was able to find one, or NULL if nothing was in that spot.
167 * If return_bigger is given, the next bigger entry is returned if no exact 197 * If return_bigger is given, the next bigger entry is returned if no exact
168 * match is found. 198 * match is found.
169 */ 199 */
170static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, 200static struct btrfs_delayed_ref_head *
171 u64 bytenr, 201find_ref_head(struct rb_root *root, u64 bytenr,
172 struct btrfs_delayed_ref_node **last, 202 struct btrfs_delayed_ref_head **last, int return_bigger)
173 int return_bigger)
174{ 203{
175 struct rb_node *n; 204 struct rb_node *n;
176 struct btrfs_delayed_ref_node *entry; 205 struct btrfs_delayed_ref_head *entry;
177 int cmp = 0; 206 int cmp = 0;
178 207
179again: 208again:
180 n = root->rb_node; 209 n = root->rb_node;
181 entry = NULL; 210 entry = NULL;
182 while (n) { 211 while (n) {
183 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); 212 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
184 WARN_ON(!entry->in_tree);
185 if (last) 213 if (last)
186 *last = entry; 214 *last = entry;
187 215
188 if (bytenr < entry->bytenr) 216 if (bytenr < entry->node.bytenr)
189 cmp = -1; 217 cmp = -1;
190 else if (bytenr > entry->bytenr) 218 else if (bytenr > entry->node.bytenr)
191 cmp = 1;
192 else if (!btrfs_delayed_ref_is_head(entry))
193 cmp = 1; 219 cmp = 1;
194 else 220 else
195 cmp = 0; 221 cmp = 0;
@@ -203,12 +229,12 @@ again:
203 } 229 }
204 if (entry && return_bigger) { 230 if (entry && return_bigger) {
205 if (cmp > 0) { 231 if (cmp > 0) {
206 n = rb_next(&entry->rb_node); 232 n = rb_next(&entry->href_node);
207 if (!n) 233 if (!n)
208 n = rb_first(root); 234 n = rb_first(root);
209 entry = rb_entry(n, struct btrfs_delayed_ref_node, 235 entry = rb_entry(n, struct btrfs_delayed_ref_head,
210 rb_node); 236 href_node);
211 bytenr = entry->bytenr; 237 bytenr = entry->node.bytenr;
212 return_bigger = 0; 238 return_bigger = 0;
213 goto again; 239 goto again;
214 } 240 }
@@ -243,33 +269,38 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
243 269
244static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, 270static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
245 struct btrfs_delayed_ref_root *delayed_refs, 271 struct btrfs_delayed_ref_root *delayed_refs,
272 struct btrfs_delayed_ref_head *head,
246 struct btrfs_delayed_ref_node *ref) 273 struct btrfs_delayed_ref_node *ref)
247{ 274{
248 rb_erase(&ref->rb_node, &delayed_refs->root); 275 if (btrfs_delayed_ref_is_head(ref)) {
276 head = btrfs_delayed_node_to_head(ref);
277 rb_erase(&head->href_node, &delayed_refs->href_root);
278 } else {
279 assert_spin_locked(&head->lock);
280 rb_erase(&ref->rb_node, &head->ref_root);
281 }
249 ref->in_tree = 0; 282 ref->in_tree = 0;
250 btrfs_put_delayed_ref(ref); 283 btrfs_put_delayed_ref(ref);
251 delayed_refs->num_entries--; 284 atomic_dec(&delayed_refs->num_entries);
252 if (trans->delayed_ref_updates) 285 if (trans->delayed_ref_updates)
253 trans->delayed_ref_updates--; 286 trans->delayed_ref_updates--;
254} 287}
255 288
256static int merge_ref(struct btrfs_trans_handle *trans, 289static int merge_ref(struct btrfs_trans_handle *trans,
257 struct btrfs_delayed_ref_root *delayed_refs, 290 struct btrfs_delayed_ref_root *delayed_refs,
291 struct btrfs_delayed_ref_head *head,
258 struct btrfs_delayed_ref_node *ref, u64 seq) 292 struct btrfs_delayed_ref_node *ref, u64 seq)
259{ 293{
260 struct rb_node *node; 294 struct rb_node *node;
261 int merged = 0;
262 int mod = 0; 295 int mod = 0;
263 int done = 0; 296 int done = 0;
264 297
265 node = rb_prev(&ref->rb_node); 298 node = rb_next(&ref->rb_node);
266 while (node) { 299 while (!done && node) {
267 struct btrfs_delayed_ref_node *next; 300 struct btrfs_delayed_ref_node *next;
268 301
269 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 302 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
270 node = rb_prev(node); 303 node = rb_next(node);
271 if (next->bytenr != ref->bytenr)
272 break;
273 if (seq && next->seq >= seq) 304 if (seq && next->seq >= seq)
274 break; 305 break;
275 if (comp_entry(ref, next, 0)) 306 if (comp_entry(ref, next, 0))
@@ -289,12 +320,11 @@ static int merge_ref(struct btrfs_trans_handle *trans,
289 mod = -next->ref_mod; 320 mod = -next->ref_mod;
290 } 321 }
291 322
292 merged++; 323 drop_delayed_ref(trans, delayed_refs, head, next);
293 drop_delayed_ref(trans, delayed_refs, next);
294 ref->ref_mod += mod; 324 ref->ref_mod += mod;
295 if (ref->ref_mod == 0) { 325 if (ref->ref_mod == 0) {
296 drop_delayed_ref(trans, delayed_refs, ref); 326 drop_delayed_ref(trans, delayed_refs, head, ref);
297 break; 327 done = 1;
298 } else { 328 } else {
299 /* 329 /*
300 * You can't have multiples of the same ref on a tree 330 * You can't have multiples of the same ref on a tree
@@ -303,13 +333,8 @@ static int merge_ref(struct btrfs_trans_handle *trans,
303 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || 333 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
304 ref->type == BTRFS_SHARED_BLOCK_REF_KEY); 334 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
305 } 335 }
306
307 if (done)
308 break;
309 node = rb_prev(&ref->rb_node);
310 } 336 }
311 337 return done;
312 return merged;
313} 338}
314 339
315void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, 340void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
@@ -320,6 +345,14 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
320 struct rb_node *node; 345 struct rb_node *node;
321 u64 seq = 0; 346 u64 seq = 0;
322 347
348 assert_spin_locked(&head->lock);
349 /*
350 * We don't have too much refs to merge in the case of delayed data
351 * refs.
352 */
353 if (head->is_data)
354 return;
355
323 spin_lock(&fs_info->tree_mod_seq_lock); 356 spin_lock(&fs_info->tree_mod_seq_lock);
324 if (!list_empty(&fs_info->tree_mod_seq_list)) { 357 if (!list_empty(&fs_info->tree_mod_seq_list)) {
325 struct seq_list *elem; 358 struct seq_list *elem;
@@ -330,22 +363,19 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
330 } 363 }
331 spin_unlock(&fs_info->tree_mod_seq_lock); 364 spin_unlock(&fs_info->tree_mod_seq_lock);
332 365
333 node = rb_prev(&head->node.rb_node); 366 node = rb_first(&head->ref_root);
334 while (node) { 367 while (node) {
335 struct btrfs_delayed_ref_node *ref; 368 struct btrfs_delayed_ref_node *ref;
336 369
337 ref = rb_entry(node, struct btrfs_delayed_ref_node, 370 ref = rb_entry(node, struct btrfs_delayed_ref_node,
338 rb_node); 371 rb_node);
339 if (ref->bytenr != head->node.bytenr)
340 break;
341
342 /* We can't merge refs that are outside of our seq count */ 372 /* We can't merge refs that are outside of our seq count */
343 if (seq && ref->seq >= seq) 373 if (seq && ref->seq >= seq)
344 break; 374 break;
345 if (merge_ref(trans, delayed_refs, ref, seq)) 375 if (merge_ref(trans, delayed_refs, head, ref, seq))
346 node = rb_prev(&head->node.rb_node); 376 node = rb_first(&head->ref_root);
347 else 377 else
348 node = rb_prev(node); 378 node = rb_next(&ref->rb_node);
349 } 379 }
350} 380}
351 381
@@ -373,71 +403,52 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
373 return ret; 403 return ret;
374} 404}
375 405
376int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 406struct btrfs_delayed_ref_head *
377 struct list_head *cluster, u64 start) 407btrfs_select_ref_head(struct btrfs_trans_handle *trans)
378{ 408{
379 int count = 0;
380 struct btrfs_delayed_ref_root *delayed_refs; 409 struct btrfs_delayed_ref_root *delayed_refs;
381 struct rb_node *node;
382 struct btrfs_delayed_ref_node *ref;
383 struct btrfs_delayed_ref_head *head; 410 struct btrfs_delayed_ref_head *head;
411 u64 start;
412 bool loop = false;
384 413
385 delayed_refs = &trans->transaction->delayed_refs; 414 delayed_refs = &trans->transaction->delayed_refs;
386 if (start == 0) { 415
387 node = rb_first(&delayed_refs->root);
388 } else {
389 ref = NULL;
390 find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
391 if (ref) {
392 node = &ref->rb_node;
393 } else
394 node = rb_first(&delayed_refs->root);
395 }
396again: 416again:
397 while (node && count < 32) { 417 start = delayed_refs->run_delayed_start;
398 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 418 head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
399 if (btrfs_delayed_ref_is_head(ref)) { 419 if (!head && !loop) {
400 head = btrfs_delayed_node_to_head(ref); 420 delayed_refs->run_delayed_start = 0;
401 if (list_empty(&head->cluster)) {
402 list_add_tail(&head->cluster, cluster);
403 delayed_refs->run_delayed_start =
404 head->node.bytenr;
405 count++;
406
407 WARN_ON(delayed_refs->num_heads_ready == 0);
408 delayed_refs->num_heads_ready--;
409 } else if (count) {
410 /* the goal of the clustering is to find extents
411 * that are likely to end up in the same extent
412 * leaf on disk. So, we don't want them spread
413 * all over the tree. Stop now if we've hit
414 * a head that was already in use
415 */
416 break;
417 }
418 }
419 node = rb_next(node);
420 }
421 if (count) {
422 return 0;
423 } else if (start) {
424 /*
425 * we've gone to the end of the rbtree without finding any
426 * clusters. start from the beginning and try again
427 */
428 start = 0; 421 start = 0;
429 node = rb_first(&delayed_refs->root); 422 loop = true;
430 goto again; 423 head = find_ref_head(&delayed_refs->href_root, start, NULL, 1);
424 if (!head)
425 return NULL;
426 } else if (!head && loop) {
427 return NULL;
431 } 428 }
432 return 1;
433}
434 429
435void btrfs_release_ref_cluster(struct list_head *cluster) 430 while (head->processing) {
436{ 431 struct rb_node *node;
437 struct list_head *pos, *q; 432
433 node = rb_next(&head->href_node);
434 if (!node) {
435 if (loop)
436 return NULL;
437 delayed_refs->run_delayed_start = 0;
438 start = 0;
439 loop = true;
440 goto again;
441 }
442 head = rb_entry(node, struct btrfs_delayed_ref_head,
443 href_node);
444 }
438 445
439 list_for_each_safe(pos, q, cluster) 446 head->processing = 1;
440 list_del_init(pos); 447 WARN_ON(delayed_refs->num_heads_ready == 0);
448 delayed_refs->num_heads_ready--;
449 delayed_refs->run_delayed_start = head->node.bytenr +
450 head->node.num_bytes;
451 return head;
441} 452}
442 453
443/* 454/*
@@ -451,6 +462,7 @@ void btrfs_release_ref_cluster(struct list_head *cluster)
451static noinline void 462static noinline void
452update_existing_ref(struct btrfs_trans_handle *trans, 463update_existing_ref(struct btrfs_trans_handle *trans,
453 struct btrfs_delayed_ref_root *delayed_refs, 464 struct btrfs_delayed_ref_root *delayed_refs,
465 struct btrfs_delayed_ref_head *head,
454 struct btrfs_delayed_ref_node *existing, 466 struct btrfs_delayed_ref_node *existing,
455 struct btrfs_delayed_ref_node *update) 467 struct btrfs_delayed_ref_node *update)
456{ 468{
@@ -463,7 +475,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
463 */ 475 */
464 existing->ref_mod--; 476 existing->ref_mod--;
465 if (existing->ref_mod == 0) 477 if (existing->ref_mod == 0)
466 drop_delayed_ref(trans, delayed_refs, existing); 478 drop_delayed_ref(trans, delayed_refs, head, existing);
467 else 479 else
468 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY || 480 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
469 existing->type == BTRFS_SHARED_BLOCK_REF_KEY); 481 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
@@ -533,9 +545,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
533 } 545 }
534 } 546 }
535 /* 547 /*
536 * update the reference mod on the head to reflect this new operation 548 * update the reference mod on the head to reflect this new operation,
549 * only need the lock for this case cause we could be processing it
550 * currently, for refs we just added we know we're a-ok.
537 */ 551 */
552 spin_lock(&existing_ref->lock);
538 existing->ref_mod += update->ref_mod; 553 existing->ref_mod += update->ref_mod;
554 spin_unlock(&existing_ref->lock);
539} 555}
540 556
541/* 557/*
@@ -543,13 +559,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
543 * this does all the dirty work in terms of maintaining the correct 559 * this does all the dirty work in terms of maintaining the correct
544 * overall modification count. 560 * overall modification count.
545 */ 561 */
546static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, 562static noinline struct btrfs_delayed_ref_head *
547 struct btrfs_trans_handle *trans, 563add_delayed_ref_head(struct btrfs_fs_info *fs_info,
548 struct btrfs_delayed_ref_node *ref, 564 struct btrfs_trans_handle *trans,
549 u64 bytenr, u64 num_bytes, 565 struct btrfs_delayed_ref_node *ref, u64 bytenr,
550 int action, int is_data) 566 u64 num_bytes, int action, int is_data)
551{ 567{
552 struct btrfs_delayed_ref_node *existing; 568 struct btrfs_delayed_ref_head *existing;
553 struct btrfs_delayed_ref_head *head_ref = NULL; 569 struct btrfs_delayed_ref_head *head_ref = NULL;
554 struct btrfs_delayed_ref_root *delayed_refs; 570 struct btrfs_delayed_ref_root *delayed_refs;
555 int count_mod = 1; 571 int count_mod = 1;
@@ -596,38 +612,43 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
596 head_ref = btrfs_delayed_node_to_head(ref); 612 head_ref = btrfs_delayed_node_to_head(ref);
597 head_ref->must_insert_reserved = must_insert_reserved; 613 head_ref->must_insert_reserved = must_insert_reserved;
598 head_ref->is_data = is_data; 614 head_ref->is_data = is_data;
615 head_ref->ref_root = RB_ROOT;
616 head_ref->processing = 0;
599 617
600 INIT_LIST_HEAD(&head_ref->cluster); 618 spin_lock_init(&head_ref->lock);
601 mutex_init(&head_ref->mutex); 619 mutex_init(&head_ref->mutex);
602 620
603 trace_add_delayed_ref_head(ref, head_ref, action); 621 trace_add_delayed_ref_head(ref, head_ref, action);
604 622
605 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 623 existing = htree_insert(&delayed_refs->href_root,
606 624 &head_ref->href_node);
607 if (existing) { 625 if (existing) {
608 update_existing_head_ref(existing, ref); 626 update_existing_head_ref(&existing->node, ref);
609 /* 627 /*
610 * we've updated the existing ref, free the newly 628 * we've updated the existing ref, free the newly
611 * allocated ref 629 * allocated ref
612 */ 630 */
613 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 631 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
632 head_ref = existing;
614 } else { 633 } else {
615 delayed_refs->num_heads++; 634 delayed_refs->num_heads++;
616 delayed_refs->num_heads_ready++; 635 delayed_refs->num_heads_ready++;
617 delayed_refs->num_entries++; 636 atomic_inc(&delayed_refs->num_entries);
618 trans->delayed_ref_updates++; 637 trans->delayed_ref_updates++;
619 } 638 }
639 return head_ref;
620} 640}
621 641
622/* 642/*
623 * helper to insert a delayed tree ref into the rbtree. 643 * helper to insert a delayed tree ref into the rbtree.
624 */ 644 */
625static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, 645static noinline void
626 struct btrfs_trans_handle *trans, 646add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
627 struct btrfs_delayed_ref_node *ref, 647 struct btrfs_trans_handle *trans,
628 u64 bytenr, u64 num_bytes, u64 parent, 648 struct btrfs_delayed_ref_head *head_ref,
629 u64 ref_root, int level, int action, 649 struct btrfs_delayed_ref_node *ref, u64 bytenr,
630 int for_cow) 650 u64 num_bytes, u64 parent, u64 ref_root, int level,
651 int action, int for_cow)
631{ 652{
632 struct btrfs_delayed_ref_node *existing; 653 struct btrfs_delayed_ref_node *existing;
633 struct btrfs_delayed_tree_ref *full_ref; 654 struct btrfs_delayed_tree_ref *full_ref;
@@ -663,30 +684,33 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
663 684
664 trace_add_delayed_tree_ref(ref, full_ref, action); 685 trace_add_delayed_tree_ref(ref, full_ref, action);
665 686
666 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 687 spin_lock(&head_ref->lock);
667 688 existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
668 if (existing) { 689 if (existing) {
669 update_existing_ref(trans, delayed_refs, existing, ref); 690 update_existing_ref(trans, delayed_refs, head_ref, existing,
691 ref);
670 /* 692 /*
671 * we've updated the existing ref, free the newly 693 * we've updated the existing ref, free the newly
672 * allocated ref 694 * allocated ref
673 */ 695 */
674 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref); 696 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
675 } else { 697 } else {
676 delayed_refs->num_entries++; 698 atomic_inc(&delayed_refs->num_entries);
677 trans->delayed_ref_updates++; 699 trans->delayed_ref_updates++;
678 } 700 }
701 spin_unlock(&head_ref->lock);
679} 702}
680 703
681/* 704/*
682 * helper to insert a delayed data ref into the rbtree. 705 * helper to insert a delayed data ref into the rbtree.
683 */ 706 */
684static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, 707static noinline void
685 struct btrfs_trans_handle *trans, 708add_delayed_data_ref(struct btrfs_fs_info *fs_info,
686 struct btrfs_delayed_ref_node *ref, 709 struct btrfs_trans_handle *trans,
687 u64 bytenr, u64 num_bytes, u64 parent, 710 struct btrfs_delayed_ref_head *head_ref,
688 u64 ref_root, u64 owner, u64 offset, 711 struct btrfs_delayed_ref_node *ref, u64 bytenr,
689 int action, int for_cow) 712 u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
713 u64 offset, int action, int for_cow)
690{ 714{
691 struct btrfs_delayed_ref_node *existing; 715 struct btrfs_delayed_ref_node *existing;
692 struct btrfs_delayed_data_ref *full_ref; 716 struct btrfs_delayed_data_ref *full_ref;
@@ -724,19 +748,21 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
724 748
725 trace_add_delayed_data_ref(ref, full_ref, action); 749 trace_add_delayed_data_ref(ref, full_ref, action);
726 750
727 existing = tree_insert(&delayed_refs->root, &ref->rb_node); 751 spin_lock(&head_ref->lock);
728 752 existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
729 if (existing) { 753 if (existing) {
730 update_existing_ref(trans, delayed_refs, existing, ref); 754 update_existing_ref(trans, delayed_refs, head_ref, existing,
755 ref);
731 /* 756 /*
732 * we've updated the existing ref, free the newly 757 * we've updated the existing ref, free the newly
733 * allocated ref 758 * allocated ref
734 */ 759 */
735 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); 760 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
736 } else { 761 } else {
737 delayed_refs->num_entries++; 762 atomic_inc(&delayed_refs->num_entries);
738 trans->delayed_ref_updates++; 763 trans->delayed_ref_updates++;
739 } 764 }
765 spin_unlock(&head_ref->lock);
740} 766}
741 767
742/* 768/*
@@ -775,10 +801,10 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
775 * insert both the head node and the new ref without dropping 801 * insert both the head node and the new ref without dropping
776 * the spin lock 802 * the spin lock
777 */ 803 */
778 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 804 head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
779 num_bytes, action, 0); 805 bytenr, num_bytes, action, 0);
780 806
781 add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr, 807 add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
782 num_bytes, parent, ref_root, level, action, 808 num_bytes, parent, ref_root, level, action,
783 for_cow); 809 for_cow);
784 spin_unlock(&delayed_refs->lock); 810 spin_unlock(&delayed_refs->lock);
@@ -823,10 +849,10 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
823 * insert both the head node and the new ref without dropping 849 * insert both the head node and the new ref without dropping
824 * the spin lock 850 * the spin lock
825 */ 851 */
826 add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr, 852 head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
827 num_bytes, action, 1); 853 bytenr, num_bytes, action, 1);
828 854
829 add_delayed_data_ref(fs_info, trans, &ref->node, bytenr, 855 add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
830 num_bytes, parent, ref_root, owner, offset, 856 num_bytes, parent, ref_root, owner, offset,
831 action, for_cow); 857 action, for_cow);
832 spin_unlock(&delayed_refs->lock); 858 spin_unlock(&delayed_refs->lock);
@@ -869,14 +895,10 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
869struct btrfs_delayed_ref_head * 895struct btrfs_delayed_ref_head *
870btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) 896btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
871{ 897{
872 struct btrfs_delayed_ref_node *ref;
873 struct btrfs_delayed_ref_root *delayed_refs; 898 struct btrfs_delayed_ref_root *delayed_refs;
874 899
875 delayed_refs = &trans->transaction->delayed_refs; 900 delayed_refs = &trans->transaction->delayed_refs;
876 ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0); 901 return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0);
877 if (ref)
878 return btrfs_delayed_node_to_head(ref);
879 return NULL;
880} 902}
881 903
882void btrfs_delayed_ref_exit(void) 904void btrfs_delayed_ref_exit(void)
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 70b962cc177d..4ba9b93022ff 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -81,7 +81,10 @@ struct btrfs_delayed_ref_head {
81 */ 81 */
82 struct mutex mutex; 82 struct mutex mutex;
83 83
84 struct list_head cluster; 84 spinlock_t lock;
85 struct rb_root ref_root;
86
87 struct rb_node href_node;
85 88
86 struct btrfs_delayed_extent_op *extent_op; 89 struct btrfs_delayed_extent_op *extent_op;
87 /* 90 /*
@@ -98,6 +101,7 @@ struct btrfs_delayed_ref_head {
98 */ 101 */
99 unsigned int must_insert_reserved:1; 102 unsigned int must_insert_reserved:1;
100 unsigned int is_data:1; 103 unsigned int is_data:1;
104 unsigned int processing:1;
101}; 105};
102 106
103struct btrfs_delayed_tree_ref { 107struct btrfs_delayed_tree_ref {
@@ -116,7 +120,8 @@ struct btrfs_delayed_data_ref {
116}; 120};
117 121
118struct btrfs_delayed_ref_root { 122struct btrfs_delayed_ref_root {
119 struct rb_root root; 123 /* head ref rbtree */
124 struct rb_root href_root;
120 125
121 /* this spin lock protects the rbtree and the entries inside */ 126 /* this spin lock protects the rbtree and the entries inside */
122 spinlock_t lock; 127 spinlock_t lock;
@@ -124,7 +129,7 @@ struct btrfs_delayed_ref_root {
124 /* how many delayed ref updates we've queued, used by the 129 /* how many delayed ref updates we've queued, used by the
125 * throttling code 130 * throttling code
126 */ 131 */
127 unsigned long num_entries; 132 atomic_t num_entries;
128 133
129 /* total number of head nodes in tree */ 134 /* total number of head nodes in tree */
130 unsigned long num_heads; 135 unsigned long num_heads;
@@ -133,15 +138,6 @@ struct btrfs_delayed_ref_root {
133 unsigned long num_heads_ready; 138 unsigned long num_heads_ready;
134 139
135 /* 140 /*
136 * bumped when someone is making progress on the delayed
137 * refs, so that other procs know they are just adding to
138 * contention intead of helping
139 */
140 atomic_t procs_running_refs;
141 atomic_t ref_seq;
142 wait_queue_head_t wait;
143
144 /*
145 * set when the tree is flushing before a transaction commit, 141 * set when the tree is flushing before a transaction commit,
146 * used by the throttling code to decide if new updates need 142 * used by the throttling code to decide if new updates need
147 * to be run right away 143 * to be run right away
@@ -226,9 +222,9 @@ static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
226 mutex_unlock(&head->mutex); 222 mutex_unlock(&head->mutex);
227} 223}
228 224
229int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 225
230 struct list_head *cluster, u64 search_start); 226struct btrfs_delayed_ref_head *
231void btrfs_release_ref_cluster(struct list_head *cluster); 227btrfs_select_ref_head(struct btrfs_trans_handle *trans);
232 228
233int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, 229int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
234 struct btrfs_delayed_ref_root *delayed_refs, 230 struct btrfs_delayed_ref_root *delayed_refs,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 2cfc3dfff64f..564c92638b20 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -102,7 +102,8 @@ no_valid_dev_replace_entry_found:
102 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item); 102 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item);
103 103
104 if (item_size != sizeof(struct btrfs_dev_replace_item)) { 104 if (item_size != sizeof(struct btrfs_dev_replace_item)) {
105 pr_warn("btrfs: dev_replace entry found has unexpected size, ignore entry\n"); 105 btrfs_warn(fs_info,
106 "dev_replace entry found has unexpected size, ignore entry");
106 goto no_valid_dev_replace_entry_found; 107 goto no_valid_dev_replace_entry_found;
107 } 108 }
108 109
@@ -145,13 +146,19 @@ no_valid_dev_replace_entry_found:
145 if (!dev_replace->srcdev && 146 if (!dev_replace->srcdev &&
146 !btrfs_test_opt(dev_root, DEGRADED)) { 147 !btrfs_test_opt(dev_root, DEGRADED)) {
147 ret = -EIO; 148 ret = -EIO;
148 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", 149 btrfs_warn(fs_info,
149 src_devid); 150 "cannot mount because device replace operation is ongoing and");
151 btrfs_warn(fs_info,
152 "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
153 src_devid);
150 } 154 }
151 if (!dev_replace->tgtdev && 155 if (!dev_replace->tgtdev &&
152 !btrfs_test_opt(dev_root, DEGRADED)) { 156 !btrfs_test_opt(dev_root, DEGRADED)) {
153 ret = -EIO; 157 ret = -EIO;
154 pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", 158 btrfs_warn(fs_info,
159 "cannot mount because device replace operation is ongoing and");
160 btrfs_warn(fs_info,
161 "tgtdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
155 BTRFS_DEV_REPLACE_DEVID); 162 BTRFS_DEV_REPLACE_DEVID);
156 } 163 }
157 if (dev_replace->tgtdev) { 164 if (dev_replace->tgtdev) {
@@ -210,7 +217,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
210 } 217 }
211 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 218 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
212 if (ret < 0) { 219 if (ret < 0) {
213 pr_warn("btrfs: error %d while searching for dev_replace item!\n", 220 btrfs_warn(fs_info, "error %d while searching for dev_replace item!",
214 ret); 221 ret);
215 goto out; 222 goto out;
216 } 223 }
@@ -230,7 +237,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
230 */ 237 */
231 ret = btrfs_del_item(trans, dev_root, path); 238 ret = btrfs_del_item(trans, dev_root, path);
232 if (ret != 0) { 239 if (ret != 0) {
233 pr_warn("btrfs: delete too small dev_replace item failed %d!\n", 240 btrfs_warn(fs_info, "delete too small dev_replace item failed %d!",
234 ret); 241 ret);
235 goto out; 242 goto out;
236 } 243 }
@@ -243,7 +250,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
243 ret = btrfs_insert_empty_item(trans, dev_root, path, 250 ret = btrfs_insert_empty_item(trans, dev_root, path,
244 &key, sizeof(*ptr)); 251 &key, sizeof(*ptr));
245 if (ret < 0) { 252 if (ret < 0) {
246 pr_warn("btrfs: insert dev_replace item failed %d!\n", 253 btrfs_warn(fs_info, "insert dev_replace item failed %d!",
247 ret); 254 ret);
248 goto out; 255 goto out;
249 } 256 }
@@ -305,7 +312,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
305 struct btrfs_device *src_device = NULL; 312 struct btrfs_device *src_device = NULL;
306 313
307 if (btrfs_fs_incompat(fs_info, RAID56)) { 314 if (btrfs_fs_incompat(fs_info, RAID56)) {
308 pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n"); 315 btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6");
309 return -EINVAL; 316 return -EINVAL;
310 } 317 }
311 318
@@ -325,7 +332,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
325 ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, 332 ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name,
326 &tgt_device); 333 &tgt_device);
327 if (ret) { 334 if (ret) {
328 pr_err("btrfs: target device %s is invalid!\n", 335 btrfs_err(fs_info, "target device %s is invalid!",
329 args->start.tgtdev_name); 336 args->start.tgtdev_name);
330 mutex_unlock(&fs_info->volume_mutex); 337 mutex_unlock(&fs_info->volume_mutex);
331 return -EINVAL; 338 return -EINVAL;
@@ -341,7 +348,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
341 } 348 }
342 349
343 if (tgt_device->total_bytes < src_device->total_bytes) { 350 if (tgt_device->total_bytes < src_device->total_bytes) {
344 pr_err("btrfs: target device is smaller than source device!\n"); 351 btrfs_err(fs_info, "target device is smaller than source device!");
345 ret = -EINVAL; 352 ret = -EINVAL;
346 goto leave_no_lock; 353 goto leave_no_lock;
347 } 354 }
@@ -366,7 +373,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
366 dev_replace->tgtdev = tgt_device; 373 dev_replace->tgtdev = tgt_device;
367 374
368 printk_in_rcu(KERN_INFO 375 printk_in_rcu(KERN_INFO
369 "btrfs: dev_replace from %s (devid %llu) to %s started\n", 376 "BTRFS: dev_replace from %s (devid %llu) to %s started\n",
370 src_device->missing ? "<missing disk>" : 377 src_device->missing ? "<missing disk>" :
371 rcu_str_deref(src_device->name), 378 rcu_str_deref(src_device->name),
372 src_device->devid, 379 src_device->devid,
@@ -489,7 +496,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
489 496
490 if (scrub_ret) { 497 if (scrub_ret) {
491 printk_in_rcu(KERN_ERR 498 printk_in_rcu(KERN_ERR
492 "btrfs: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", 499 "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
493 src_device->missing ? "<missing disk>" : 500 src_device->missing ? "<missing disk>" :
494 rcu_str_deref(src_device->name), 501 rcu_str_deref(src_device->name),
495 src_device->devid, 502 src_device->devid,
@@ -504,7 +511,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
504 } 511 }
505 512
506 printk_in_rcu(KERN_INFO 513 printk_in_rcu(KERN_INFO
507 "btrfs: dev_replace from %s (devid %llu) to %s) finished\n", 514 "BTRFS: dev_replace from %s (devid %llu) to %s) finished\n",
508 src_device->missing ? "<missing disk>" : 515 src_device->missing ? "<missing disk>" :
509 rcu_str_deref(src_device->name), 516 rcu_str_deref(src_device->name),
510 src_device->devid, 517 src_device->devid,
@@ -699,7 +706,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
699 BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; 706 BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
700 dev_replace->time_stopped = get_seconds(); 707 dev_replace->time_stopped = get_seconds();
701 dev_replace->item_needs_writeback = 1; 708 dev_replace->item_needs_writeback = 1;
702 pr_info("btrfs: suspending dev_replace for unmount\n"); 709 btrfs_info(fs_info, "suspending dev_replace for unmount");
703 break; 710 break;
704 } 711 }
705 712
@@ -728,8 +735,9 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
728 break; 735 break;
729 } 736 }
730 if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) { 737 if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) {
731 pr_info("btrfs: cannot continue dev_replace, tgtdev is missing\n" 738 btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
732 "btrfs: you may cancel the operation after 'mount -o degraded'\n"); 739 btrfs_info(fs_info,
740 "you may cancel the operation after 'mount -o degraded'");
733 btrfs_dev_replace_unlock(dev_replace); 741 btrfs_dev_replace_unlock(dev_replace);
734 return 0; 742 return 0;
735 } 743 }
@@ -755,14 +763,14 @@ static int btrfs_dev_replace_kthread(void *data)
755 kfree(status_args); 763 kfree(status_args);
756 do_div(progress, 10); 764 do_div(progress, 10);
757 printk_in_rcu(KERN_INFO 765 printk_in_rcu(KERN_INFO
758 "btrfs: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", 766 "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
759 dev_replace->srcdev->missing ? "<missing disk>" : 767 dev_replace->srcdev->missing ? "<missing disk>" :
760 rcu_str_deref(dev_replace->srcdev->name), 768 rcu_str_deref(dev_replace->srcdev->name),
761 dev_replace->srcdev->devid, 769 dev_replace->srcdev->devid,
762 dev_replace->tgtdev ? 770 dev_replace->tgtdev ?
763 rcu_str_deref(dev_replace->tgtdev->name) : 771 rcu_str_deref(dev_replace->tgtdev->name) :
764 "<missing target disk>", 772 "<missing target disk>",
765 (unsigned int)progress); 773 (unsigned int)progress);
766 } 774 }
767 btrfs_dev_replace_continue_on_mount(fs_info); 775 btrfs_dev_replace_continue_on_mount(fs_info);
768 atomic_set(&fs_info->mutually_exclusive_operation_running, 0); 776 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index c031ea3fd70f..a0691df5dcea 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -261,7 +261,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
261 * see if there is room in the item to insert this 261 * see if there is room in the item to insert this
262 * name 262 * name
263 */ 263 */
264 data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item); 264 data_size = sizeof(*di) + name_len;
265 leaf = path->nodes[0]; 265 leaf = path->nodes[0];
266 slot = path->slots[0]; 266 slot = path->slots[0];
267 if (data_size + btrfs_item_size_nr(leaf, slot) + 267 if (data_size + btrfs_item_size_nr(leaf, slot) +
@@ -459,7 +459,7 @@ int verify_dir_item(struct btrfs_root *root,
459 u8 type = btrfs_dir_type(leaf, dir_item); 459 u8 type = btrfs_dir_type(leaf, dir_item);
460 460
461 if (type >= BTRFS_FT_MAX) { 461 if (type >= BTRFS_FT_MAX) {
462 printk(KERN_CRIT "btrfs: invalid dir item type: %d\n", 462 btrfs_crit(root->fs_info, "invalid dir item type: %d",
463 (int)type); 463 (int)type);
464 return 1; 464 return 1;
465 } 465 }
@@ -468,7 +468,7 @@ int verify_dir_item(struct btrfs_root *root,
468 namelen = XATTR_NAME_MAX; 468 namelen = XATTR_NAME_MAX;
469 469
470 if (btrfs_dir_name_len(leaf, dir_item) > namelen) { 470 if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
471 printk(KERN_CRIT "btrfs: invalid dir item name len: %u\n", 471 btrfs_crit(root->fs_info, "invalid dir item name len: %u",
472 (unsigned)btrfs_dir_data_len(leaf, dir_item)); 472 (unsigned)btrfs_dir_data_len(leaf, dir_item));
473 return 1; 473 return 1;
474 } 474 }
@@ -476,7 +476,7 @@ int verify_dir_item(struct btrfs_root *root,
476 /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ 476 /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
477 if ((btrfs_dir_data_len(leaf, dir_item) + 477 if ((btrfs_dir_data_len(leaf, dir_item) +
478 btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) { 478 btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
479 printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n", 479 btrfs_crit(root->fs_info, "invalid dir item name + data len: %u + %u",
480 (unsigned)btrfs_dir_name_len(leaf, dir_item), 480 (unsigned)btrfs_dir_name_len(leaf, dir_item),
481 (unsigned)btrfs_dir_data_len(leaf, dir_item)); 481 (unsigned)btrfs_dir_data_len(leaf, dir_item));
482 return 1; 482 return 1;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8072cfa8a3b1..81ea55314b1f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -26,7 +26,6 @@
26#include <linux/workqueue.h> 26#include <linux/workqueue.h>
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/freezer.h> 28#include <linux/freezer.h>
29#include <linux/crc32c.h>
30#include <linux/slab.h> 29#include <linux/slab.h>
31#include <linux/migrate.h> 30#include <linux/migrate.h>
32#include <linux/ratelimit.h> 31#include <linux/ratelimit.h>
@@ -35,6 +34,7 @@
35#include <asm/unaligned.h> 34#include <asm/unaligned.h>
36#include "ctree.h" 35#include "ctree.h"
37#include "disk-io.h" 36#include "disk-io.h"
37#include "hash.h"
38#include "transaction.h" 38#include "transaction.h"
39#include "btrfs_inode.h" 39#include "btrfs_inode.h"
40#include "volumes.h" 40#include "volumes.h"
@@ -48,6 +48,7 @@
48#include "rcu-string.h" 48#include "rcu-string.h"
49#include "dev-replace.h" 49#include "dev-replace.h"
50#include "raid56.h" 50#include "raid56.h"
51#include "sysfs.h"
51 52
52#ifdef CONFIG_X86 53#ifdef CONFIG_X86
53#include <asm/cpufeature.h> 54#include <asm/cpufeature.h>
@@ -243,7 +244,7 @@ out:
243 244
244u32 btrfs_csum_data(char *data, u32 seed, size_t len) 245u32 btrfs_csum_data(char *data, u32 seed, size_t len)
245{ 246{
246 return crc32c(seed, data, len); 247 return btrfs_crc32c(seed, data, len);
247} 248}
248 249
249void btrfs_csum_final(u32 crc, char *result) 250void btrfs_csum_final(u32 crc, char *result)
@@ -299,11 +300,11 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
299 memcpy(&found, result, csum_size); 300 memcpy(&found, result, csum_size);
300 301
301 read_extent_buffer(buf, &val, 0, csum_size); 302 read_extent_buffer(buf, &val, 0, csum_size);
302 printk_ratelimited(KERN_INFO "btrfs: %s checksum verify " 303 printk_ratelimited(KERN_INFO
303 "failed on %llu wanted %X found %X " 304 "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
304 "level %d\n", 305 "level %d\n",
305 root->fs_info->sb->s_id, buf->start, 306 root->fs_info->sb->s_id, buf->start,
306 val, found, btrfs_header_level(buf)); 307 val, found, btrfs_header_level(buf));
307 if (result != (char *)&inline_result) 308 if (result != (char *)&inline_result)
308 kfree(result); 309 kfree(result);
309 return 1; 310 return 1;
@@ -382,13 +383,14 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
382 ret = 1; 383 ret = 1;
383 384
384 if (ret && btrfs_super_generation(disk_sb) < 10) { 385 if (ret && btrfs_super_generation(disk_sb) < 10) {
385 printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n"); 386 printk(KERN_WARNING
387 "BTRFS: super block crcs don't match, older mkfs detected\n");
386 ret = 0; 388 ret = 0;
387 } 389 }
388 } 390 }
389 391
390 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { 392 if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
391 printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n", 393 printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n",
392 csum_type); 394 csum_type);
393 ret = 1; 395 ret = 1;
394 } 396 }
@@ -464,13 +466,10 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
464 466
465static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 467static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
466{ 468{
467 struct extent_io_tree *tree;
468 u64 start = page_offset(page); 469 u64 start = page_offset(page);
469 u64 found_start; 470 u64 found_start;
470 struct extent_buffer *eb; 471 struct extent_buffer *eb;
471 472
472 tree = &BTRFS_I(page->mapping->host)->io_tree;
473
474 eb = (struct extent_buffer *)page->private; 473 eb = (struct extent_buffer *)page->private;
475 if (page != eb->pages[0]) 474 if (page != eb->pages[0])
476 return 0; 475 return 0;
@@ -500,8 +499,8 @@ static int check_tree_block_fsid(struct btrfs_root *root,
500} 499}
501 500
502#define CORRUPT(reason, eb, root, slot) \ 501#define CORRUPT(reason, eb, root, slot) \
503 printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \ 502 btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu," \
504 "root=%llu, slot=%d\n", reason, \ 503 "root=%llu, slot=%d", reason, \
505 btrfs_header_bytenr(eb), root->objectid, slot) 504 btrfs_header_bytenr(eb), root->objectid, slot)
506 505
507static noinline int check_leaf(struct btrfs_root *root, 506static noinline int check_leaf(struct btrfs_root *root,
@@ -569,7 +568,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
569 u64 phy_offset, struct page *page, 568 u64 phy_offset, struct page *page,
570 u64 start, u64 end, int mirror) 569 u64 start, u64 end, int mirror)
571{ 570{
572 struct extent_io_tree *tree;
573 u64 found_start; 571 u64 found_start;
574 int found_level; 572 int found_level;
575 struct extent_buffer *eb; 573 struct extent_buffer *eb;
@@ -580,7 +578,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
580 if (!page->private) 578 if (!page->private)
581 goto out; 579 goto out;
582 580
583 tree = &BTRFS_I(page->mapping->host)->io_tree;
584 eb = (struct extent_buffer *)page->private; 581 eb = (struct extent_buffer *)page->private;
585 582
586 /* the pending IO might have been the only thing that kept this buffer 583 /* the pending IO might have been the only thing that kept this buffer
@@ -600,21 +597,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
600 597
601 found_start = btrfs_header_bytenr(eb); 598 found_start = btrfs_header_bytenr(eb);
602 if (found_start != eb->start) { 599 if (found_start != eb->start) {
603 printk_ratelimited(KERN_INFO "btrfs bad tree block start " 600 printk_ratelimited(KERN_INFO "BTRFS: bad tree block start "
604 "%llu %llu\n", 601 "%llu %llu\n",
605 found_start, eb->start); 602 found_start, eb->start);
606 ret = -EIO; 603 ret = -EIO;
607 goto err; 604 goto err;
608 } 605 }
609 if (check_tree_block_fsid(root, eb)) { 606 if (check_tree_block_fsid(root, eb)) {
610 printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", 607 printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n",
611 eb->start); 608 eb->start);
612 ret = -EIO; 609 ret = -EIO;
613 goto err; 610 goto err;
614 } 611 }
615 found_level = btrfs_header_level(eb); 612 found_level = btrfs_header_level(eb);
616 if (found_level >= BTRFS_MAX_LEVEL) { 613 if (found_level >= BTRFS_MAX_LEVEL) {
617 btrfs_info(root->fs_info, "bad tree block level %d\n", 614 btrfs_info(root->fs_info, "bad tree block level %d",
618 (int)btrfs_header_level(eb)); 615 (int)btrfs_header_level(eb));
619 ret = -EIO; 616 ret = -EIO;
620 goto err; 617 goto err;
@@ -842,20 +839,17 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
842 839
843static int btree_csum_one_bio(struct bio *bio) 840static int btree_csum_one_bio(struct bio *bio)
844{ 841{
845 struct bio_vec *bvec = bio->bi_io_vec; 842 struct bio_vec *bvec;
846 int bio_index = 0;
847 struct btrfs_root *root; 843 struct btrfs_root *root;
848 int ret = 0; 844 int i, ret = 0;
849 845
850 WARN_ON(bio->bi_vcnt <= 0); 846 bio_for_each_segment_all(bvec, bio, i) {
851 while (bio_index < bio->bi_vcnt) {
852 root = BTRFS_I(bvec->bv_page->mapping->host)->root; 847 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
853 ret = csum_dirty_buffer(root, bvec->bv_page); 848 ret = csum_dirty_buffer(root, bvec->bv_page);
854 if (ret) 849 if (ret)
855 break; 850 break;
856 bio_index++;
857 bvec++;
858 } 851 }
852
859 return ret; 853 return ret;
860} 854}
861 855
@@ -967,11 +961,9 @@ static int btree_migratepage(struct address_space *mapping,
967static int btree_writepages(struct address_space *mapping, 961static int btree_writepages(struct address_space *mapping,
968 struct writeback_control *wbc) 962 struct writeback_control *wbc)
969{ 963{
970 struct extent_io_tree *tree;
971 struct btrfs_fs_info *fs_info; 964 struct btrfs_fs_info *fs_info;
972 int ret; 965 int ret;
973 966
974 tree = &BTRFS_I(mapping->host)->io_tree;
975 if (wbc->sync_mode == WB_SYNC_NONE) { 967 if (wbc->sync_mode == WB_SYNC_NONE) {
976 968
977 if (wbc->for_kupdate) 969 if (wbc->for_kupdate)
@@ -1010,8 +1002,9 @@ static void btree_invalidatepage(struct page *page, unsigned int offset,
1010 extent_invalidatepage(tree, page, offset); 1002 extent_invalidatepage(tree, page, offset);
1011 btree_releasepage(page, GFP_NOFS); 1003 btree_releasepage(page, GFP_NOFS);
1012 if (PagePrivate(page)) { 1004 if (PagePrivate(page)) {
1013 printk(KERN_WARNING "btrfs warning page private not zero " 1005 btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info,
1014 "on page %llu\n", (unsigned long long)page_offset(page)); 1006 "page private not zero on page %llu",
1007 (unsigned long long)page_offset(page));
1015 ClearPagePrivate(page); 1008 ClearPagePrivate(page);
1016 set_page_private(page, 0); 1009 set_page_private(page, 0);
1017 page_cache_release(page); 1010 page_cache_release(page);
@@ -1095,21 +1088,13 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
1095struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 1088struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
1096 u64 bytenr, u32 blocksize) 1089 u64 bytenr, u32 blocksize)
1097{ 1090{
1098 struct inode *btree_inode = root->fs_info->btree_inode; 1091 return find_extent_buffer(root->fs_info, bytenr);
1099 struct extent_buffer *eb;
1100 eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
1101 return eb;
1102} 1092}
1103 1093
1104struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, 1094struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
1105 u64 bytenr, u32 blocksize) 1095 u64 bytenr, u32 blocksize)
1106{ 1096{
1107 struct inode *btree_inode = root->fs_info->btree_inode; 1097 return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
1108 struct extent_buffer *eb;
1109
1110 eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
1111 bytenr, blocksize);
1112 return eb;
1113} 1098}
1114 1099
1115 1100
@@ -1273,7 +1258,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1273 struct btrfs_root *root; 1258 struct btrfs_root *root;
1274 struct btrfs_key key; 1259 struct btrfs_key key;
1275 int ret = 0; 1260 int ret = 0;
1276 u64 bytenr;
1277 uuid_le uuid; 1261 uuid_le uuid;
1278 1262
1279 root = btrfs_alloc_root(fs_info); 1263 root = btrfs_alloc_root(fs_info);
@@ -1295,7 +1279,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
1295 goto fail; 1279 goto fail;
1296 } 1280 }
1297 1281
1298 bytenr = leaf->start;
1299 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 1282 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
1300 btrfs_set_header_bytenr(leaf, leaf->start); 1283 btrfs_set_header_bytenr(leaf, leaf->start);
1301 btrfs_set_header_generation(leaf, trans->transid); 1284 btrfs_set_header_generation(leaf, trans->transid);
@@ -1616,7 +1599,8 @@ again:
1616 if (ret) 1599 if (ret)
1617 goto fail; 1600 goto fail;
1618 1601
1619 ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); 1602 ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
1603 location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
1620 if (ret < 0) 1604 if (ret < 0)
1621 goto fail; 1605 goto fail;
1622 if (ret == 0) 1606 if (ret == 0)
@@ -1684,18 +1668,16 @@ static void end_workqueue_fn(struct btrfs_work *work)
1684{ 1668{
1685 struct bio *bio; 1669 struct bio *bio;
1686 struct end_io_wq *end_io_wq; 1670 struct end_io_wq *end_io_wq;
1687 struct btrfs_fs_info *fs_info;
1688 int error; 1671 int error;
1689 1672
1690 end_io_wq = container_of(work, struct end_io_wq, work); 1673 end_io_wq = container_of(work, struct end_io_wq, work);
1691 bio = end_io_wq->bio; 1674 bio = end_io_wq->bio;
1692 fs_info = end_io_wq->info;
1693 1675
1694 error = end_io_wq->error; 1676 error = end_io_wq->error;
1695 bio->bi_private = end_io_wq->private; 1677 bio->bi_private = end_io_wq->private;
1696 bio->bi_end_io = end_io_wq->end_io; 1678 bio->bi_end_io = end_io_wq->end_io;
1697 kfree(end_io_wq); 1679 kfree(end_io_wq);
1698 bio_endio(bio, error); 1680 bio_endio_nodec(bio, error);
1699} 1681}
1700 1682
1701static int cleaner_kthread(void *arg) 1683static int cleaner_kthread(void *arg)
@@ -2080,6 +2062,12 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
2080 for (i = 0; i < ret; i++) 2062 for (i = 0; i < ret; i++)
2081 btrfs_drop_and_free_fs_root(fs_info, gang[i]); 2063 btrfs_drop_and_free_fs_root(fs_info, gang[i]);
2082 } 2064 }
2065
2066 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
2067 btrfs_free_log_root_tree(NULL, fs_info);
2068 btrfs_destroy_pinned_extent(fs_info->tree_root,
2069 fs_info->pinned_extents);
2070 }
2083} 2071}
2084 2072
2085int open_ctree(struct super_block *sb, 2073int open_ctree(struct super_block *sb,
@@ -2154,6 +2142,7 @@ int open_ctree(struct super_block *sb,
2154 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); 2142 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
2155 2143
2156 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); 2144 INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
2145 INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
2157 INIT_LIST_HEAD(&fs_info->trans_list); 2146 INIT_LIST_HEAD(&fs_info->trans_list);
2158 INIT_LIST_HEAD(&fs_info->dead_roots); 2147 INIT_LIST_HEAD(&fs_info->dead_roots);
2159 INIT_LIST_HEAD(&fs_info->delayed_iputs); 2148 INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2167,6 +2156,7 @@ int open_ctree(struct super_block *sb,
2167 spin_lock_init(&fs_info->free_chunk_lock); 2156 spin_lock_init(&fs_info->free_chunk_lock);
2168 spin_lock_init(&fs_info->tree_mod_seq_lock); 2157 spin_lock_init(&fs_info->tree_mod_seq_lock);
2169 spin_lock_init(&fs_info->super_lock); 2158 spin_lock_init(&fs_info->super_lock);
2159 spin_lock_init(&fs_info->buffer_lock);
2170 rwlock_init(&fs_info->tree_mod_log_lock); 2160 rwlock_init(&fs_info->tree_mod_log_lock);
2171 mutex_init(&fs_info->reloc_mutex); 2161 mutex_init(&fs_info->reloc_mutex);
2172 seqlock_init(&fs_info->profiles_lock); 2162 seqlock_init(&fs_info->profiles_lock);
@@ -2198,7 +2188,7 @@ int open_ctree(struct super_block *sb,
2198 fs_info->free_chunk_space = 0; 2188 fs_info->free_chunk_space = 0;
2199 fs_info->tree_mod_log = RB_ROOT; 2189 fs_info->tree_mod_log = RB_ROOT;
2200 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 2190 fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
2201 2191 fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
2202 /* readahead state */ 2192 /* readahead state */
2203 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); 2193 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
2204 spin_lock_init(&fs_info->reada_lock); 2194 spin_lock_init(&fs_info->reada_lock);
@@ -2337,7 +2327,7 @@ int open_ctree(struct super_block *sb,
2337 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). 2327 * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
2338 */ 2328 */
2339 if (btrfs_check_super_csum(bh->b_data)) { 2329 if (btrfs_check_super_csum(bh->b_data)) {
2340 printk(KERN_ERR "btrfs: superblock checksum mismatch\n"); 2330 printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
2341 err = -EINVAL; 2331 err = -EINVAL;
2342 goto fail_alloc; 2332 goto fail_alloc;
2343 } 2333 }
@@ -2356,7 +2346,7 @@ int open_ctree(struct super_block *sb,
2356 2346
2357 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 2347 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2358 if (ret) { 2348 if (ret) {
2359 printk(KERN_ERR "btrfs: superblock contains fatal errors\n"); 2349 printk(KERN_ERR "BTRFS: superblock contains fatal errors\n");
2360 err = -EINVAL; 2350 err = -EINVAL;
2361 goto fail_alloc; 2351 goto fail_alloc;
2362 } 2352 }
@@ -2421,7 +2411,7 @@ int open_ctree(struct super_block *sb,
2421 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; 2411 features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
2422 2412
2423 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) 2413 if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
2424 printk(KERN_ERR "btrfs: has skinny extents\n"); 2414 printk(KERN_ERR "BTRFS: has skinny extents\n");
2425 2415
2426 /* 2416 /*
2427 * flag our filesystem as having big metadata blocks if 2417 * flag our filesystem as having big metadata blocks if
@@ -2429,7 +2419,7 @@ int open_ctree(struct super_block *sb,
2429 */ 2419 */
2430 if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { 2420 if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
2431 if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) 2421 if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
2432 printk(KERN_INFO "btrfs flagging fs with big metadata feature\n"); 2422 printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
2433 features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; 2423 features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
2434 } 2424 }
2435 2425
@@ -2446,7 +2436,7 @@ int open_ctree(struct super_block *sb,
2446 */ 2436 */
2447 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && 2437 if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
2448 (sectorsize != leafsize)) { 2438 (sectorsize != leafsize)) {
2449 printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes " 2439 printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
2450 "are not allowed for mixed block groups on %s\n", 2440 "are not allowed for mixed block groups on %s\n",
2451 sb->s_id); 2441 sb->s_id);
2452 goto fail_alloc; 2442 goto fail_alloc;
@@ -2583,12 +2573,12 @@ int open_ctree(struct super_block *sb,
2583 sb->s_blocksize_bits = blksize_bits(sectorsize); 2573 sb->s_blocksize_bits = blksize_bits(sectorsize);
2584 2574
2585 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) { 2575 if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
2586 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); 2576 printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
2587 goto fail_sb_buffer; 2577 goto fail_sb_buffer;
2588 } 2578 }
2589 2579
2590 if (sectorsize != PAGE_SIZE) { 2580 if (sectorsize != PAGE_SIZE) {
2591 printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) " 2581 printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
2592 "found on %s\n", (unsigned long)sectorsize, sb->s_id); 2582 "found on %s\n", (unsigned long)sectorsize, sb->s_id);
2593 goto fail_sb_buffer; 2583 goto fail_sb_buffer;
2594 } 2584 }
@@ -2597,7 +2587,7 @@ int open_ctree(struct super_block *sb,
2597 ret = btrfs_read_sys_array(tree_root); 2587 ret = btrfs_read_sys_array(tree_root);
2598 mutex_unlock(&fs_info->chunk_mutex); 2588 mutex_unlock(&fs_info->chunk_mutex);
2599 if (ret) { 2589 if (ret) {
2600 printk(KERN_WARNING "btrfs: failed to read the system " 2590 printk(KERN_WARNING "BTRFS: failed to read the system "
2601 "array on %s\n", sb->s_id); 2591 "array on %s\n", sb->s_id);
2602 goto fail_sb_buffer; 2592 goto fail_sb_buffer;
2603 } 2593 }
@@ -2614,7 +2604,7 @@ int open_ctree(struct super_block *sb,
2614 blocksize, generation); 2604 blocksize, generation);
2615 if (!chunk_root->node || 2605 if (!chunk_root->node ||
2616 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2606 !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
2617 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 2607 printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
2618 sb->s_id); 2608 sb->s_id);
2619 goto fail_tree_roots; 2609 goto fail_tree_roots;
2620 } 2610 }
@@ -2626,7 +2616,7 @@ int open_ctree(struct super_block *sb,
2626 2616
2627 ret = btrfs_read_chunk_tree(chunk_root); 2617 ret = btrfs_read_chunk_tree(chunk_root);
2628 if (ret) { 2618 if (ret) {
2629 printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", 2619 printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
2630 sb->s_id); 2620 sb->s_id);
2631 goto fail_tree_roots; 2621 goto fail_tree_roots;
2632 } 2622 }
@@ -2638,7 +2628,7 @@ int open_ctree(struct super_block *sb,
2638 btrfs_close_extra_devices(fs_info, fs_devices, 0); 2628 btrfs_close_extra_devices(fs_info, fs_devices, 0);
2639 2629
2640 if (!fs_devices->latest_bdev) { 2630 if (!fs_devices->latest_bdev) {
2641 printk(KERN_CRIT "btrfs: failed to read devices on %s\n", 2631 printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
2642 sb->s_id); 2632 sb->s_id);
2643 goto fail_tree_roots; 2633 goto fail_tree_roots;
2644 } 2634 }
@@ -2653,7 +2643,7 @@ retry_root_backup:
2653 blocksize, generation); 2643 blocksize, generation);
2654 if (!tree_root->node || 2644 if (!tree_root->node ||
2655 !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { 2645 !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
2656 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", 2646 printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
2657 sb->s_id); 2647 sb->s_id);
2658 2648
2659 goto recovery_tree_root; 2649 goto recovery_tree_root;
@@ -2724,50 +2714,56 @@ retry_root_backup:
2724 2714
2725 ret = btrfs_recover_balance(fs_info); 2715 ret = btrfs_recover_balance(fs_info);
2726 if (ret) { 2716 if (ret) {
2727 printk(KERN_WARNING "btrfs: failed to recover balance\n"); 2717 printk(KERN_WARNING "BTRFS: failed to recover balance\n");
2728 goto fail_block_groups; 2718 goto fail_block_groups;
2729 } 2719 }
2730 2720
2731 ret = btrfs_init_dev_stats(fs_info); 2721 ret = btrfs_init_dev_stats(fs_info);
2732 if (ret) { 2722 if (ret) {
2733 printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", 2723 printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n",
2734 ret); 2724 ret);
2735 goto fail_block_groups; 2725 goto fail_block_groups;
2736 } 2726 }
2737 2727
2738 ret = btrfs_init_dev_replace(fs_info); 2728 ret = btrfs_init_dev_replace(fs_info);
2739 if (ret) { 2729 if (ret) {
2740 pr_err("btrfs: failed to init dev_replace: %d\n", ret); 2730 pr_err("BTRFS: failed to init dev_replace: %d\n", ret);
2741 goto fail_block_groups; 2731 goto fail_block_groups;
2742 } 2732 }
2743 2733
2744 btrfs_close_extra_devices(fs_info, fs_devices, 1); 2734 btrfs_close_extra_devices(fs_info, fs_devices, 1);
2745 2735
2746 ret = btrfs_init_space_info(fs_info); 2736 ret = btrfs_sysfs_add_one(fs_info);
2747 if (ret) { 2737 if (ret) {
2748 printk(KERN_ERR "Failed to initial space info: %d\n", ret); 2738 pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
2749 goto fail_block_groups; 2739 goto fail_block_groups;
2750 } 2740 }
2751 2741
2742 ret = btrfs_init_space_info(fs_info);
2743 if (ret) {
2744 printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
2745 goto fail_sysfs;
2746 }
2747
2752 ret = btrfs_read_block_groups(extent_root); 2748 ret = btrfs_read_block_groups(extent_root);
2753 if (ret) { 2749 if (ret) {
2754 printk(KERN_ERR "Failed to read block groups: %d\n", ret); 2750 printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
2755 goto fail_block_groups; 2751 goto fail_sysfs;
2756 } 2752 }
2757 fs_info->num_tolerated_disk_barrier_failures = 2753 fs_info->num_tolerated_disk_barrier_failures =
2758 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); 2754 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
2759 if (fs_info->fs_devices->missing_devices > 2755 if (fs_info->fs_devices->missing_devices >
2760 fs_info->num_tolerated_disk_barrier_failures && 2756 fs_info->num_tolerated_disk_barrier_failures &&
2761 !(sb->s_flags & MS_RDONLY)) { 2757 !(sb->s_flags & MS_RDONLY)) {
2762 printk(KERN_WARNING 2758 printk(KERN_WARNING "BTRFS: "
2763 "Btrfs: too many missing devices, writeable mount is not allowed\n"); 2759 "too many missing devices, writeable mount is not allowed\n");
2764 goto fail_block_groups; 2760 goto fail_sysfs;
2765 } 2761 }
2766 2762
2767 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 2763 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
2768 "btrfs-cleaner"); 2764 "btrfs-cleaner");
2769 if (IS_ERR(fs_info->cleaner_kthread)) 2765 if (IS_ERR(fs_info->cleaner_kthread))
2770 goto fail_block_groups; 2766 goto fail_sysfs;
2771 2767
2772 fs_info->transaction_kthread = kthread_run(transaction_kthread, 2768 fs_info->transaction_kthread = kthread_run(transaction_kthread,
2773 tree_root, 2769 tree_root,
@@ -2778,11 +2774,15 @@ retry_root_backup:
2778 if (!btrfs_test_opt(tree_root, SSD) && 2774 if (!btrfs_test_opt(tree_root, SSD) &&
2779 !btrfs_test_opt(tree_root, NOSSD) && 2775 !btrfs_test_opt(tree_root, NOSSD) &&
2780 !fs_info->fs_devices->rotating) { 2776 !fs_info->fs_devices->rotating) {
2781 printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD " 2777 printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD "
2782 "mode\n"); 2778 "mode\n");
2783 btrfs_set_opt(fs_info->mount_opt, SSD); 2779 btrfs_set_opt(fs_info->mount_opt, SSD);
2784 } 2780 }
2785 2781
2782 /* Set the real inode map cache flag */
2783 if (btrfs_test_opt(tree_root, CHANGE_INODE_CACHE))
2784 btrfs_set_opt(tree_root->fs_info->mount_opt, INODE_MAP_CACHE);
2785
2786#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 2786#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2787 if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) { 2787 if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
2788 ret = btrfsic_mount(tree_root, fs_devices, 2788 ret = btrfsic_mount(tree_root, fs_devices,
@@ -2791,7 +2791,7 @@ retry_root_backup:
2791 1 : 0, 2791 1 : 0,
2792 fs_info->check_integrity_print_mask); 2792 fs_info->check_integrity_print_mask);
2793 if (ret) 2793 if (ret)
2794 printk(KERN_WARNING "btrfs: failed to initialize" 2794 printk(KERN_WARNING "BTRFS: failed to initialize"
2795 " integrity check module %s\n", sb->s_id); 2795 " integrity check module %s\n", sb->s_id);
2796 } 2796 }
2797#endif 2797#endif
@@ -2804,7 +2804,7 @@ retry_root_backup:
2804 u64 bytenr = btrfs_super_log_root(disk_super); 2804 u64 bytenr = btrfs_super_log_root(disk_super);
2805 2805
2806 if (fs_devices->rw_devices == 0) { 2806 if (fs_devices->rw_devices == 0) {
2807 printk(KERN_WARNING "Btrfs log replay required " 2807 printk(KERN_WARNING "BTRFS: log replay required "
2808 "on RO media\n"); 2808 "on RO media\n");
2809 err = -EIO; 2809 err = -EIO;
2810 goto fail_qgroup; 2810 goto fail_qgroup;
@@ -2827,7 +2827,7 @@ retry_root_backup:
2827 generation + 1); 2827 generation + 1);
2828 if (!log_tree_root->node || 2828 if (!log_tree_root->node ||
2829 !extent_buffer_uptodate(log_tree_root->node)) { 2829 !extent_buffer_uptodate(log_tree_root->node)) {
2830 printk(KERN_ERR "btrfs: failed to read log tree\n"); 2830 printk(KERN_ERR "BTRFS: failed to read log tree\n");
2831 free_extent_buffer(log_tree_root->node); 2831 free_extent_buffer(log_tree_root->node);
2832 kfree(log_tree_root); 2832 kfree(log_tree_root);
2833 goto fail_trans_kthread; 2833 goto fail_trans_kthread;
@@ -2861,7 +2861,7 @@ retry_root_backup:
2861 ret = btrfs_recover_relocation(tree_root); 2861 ret = btrfs_recover_relocation(tree_root);
2862 if (ret < 0) { 2862 if (ret < 0) {
2863 printk(KERN_WARNING 2863 printk(KERN_WARNING
2864 "btrfs: failed to recover relocation\n"); 2864 "BTRFS: failed to recover relocation\n");
2865 err = -EINVAL; 2865 err = -EINVAL;
2866 goto fail_qgroup; 2866 goto fail_qgroup;
2867 } 2867 }
@@ -2891,14 +2891,14 @@ retry_root_backup:
2891 2891
2892 ret = btrfs_resume_balance_async(fs_info); 2892 ret = btrfs_resume_balance_async(fs_info);
2893 if (ret) { 2893 if (ret) {
2894 printk(KERN_WARNING "btrfs: failed to resume balance\n"); 2894 printk(KERN_WARNING "BTRFS: failed to resume balance\n");
2895 close_ctree(tree_root); 2895 close_ctree(tree_root);
2896 return ret; 2896 return ret;
2897 } 2897 }
2898 2898
2899 ret = btrfs_resume_dev_replace_async(fs_info); 2899 ret = btrfs_resume_dev_replace_async(fs_info);
2900 if (ret) { 2900 if (ret) {
2901 pr_warn("btrfs: failed to resume dev_replace\n"); 2901 pr_warn("BTRFS: failed to resume dev_replace\n");
2902 close_ctree(tree_root); 2902 close_ctree(tree_root);
2903 return ret; 2903 return ret;
2904 } 2904 }
@@ -2906,20 +2906,20 @@ retry_root_backup:
2906 btrfs_qgroup_rescan_resume(fs_info); 2906 btrfs_qgroup_rescan_resume(fs_info);
2907 2907
2908 if (create_uuid_tree) { 2908 if (create_uuid_tree) {
2909 pr_info("btrfs: creating UUID tree\n"); 2909 pr_info("BTRFS: creating UUID tree\n");
2910 ret = btrfs_create_uuid_tree(fs_info); 2910 ret = btrfs_create_uuid_tree(fs_info);
2911 if (ret) { 2911 if (ret) {
2912 pr_warn("btrfs: failed to create the UUID tree %d\n", 2912 pr_warn("BTRFS: failed to create the UUID tree %d\n",
2913 ret); 2913 ret);
2914 close_ctree(tree_root); 2914 close_ctree(tree_root);
2915 return ret; 2915 return ret;
2916 } 2916 }
2917 } else if (check_uuid_tree || 2917 } else if (check_uuid_tree ||
2918 btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) { 2918 btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
2919 pr_info("btrfs: checking UUID tree\n"); 2919 pr_info("BTRFS: checking UUID tree\n");
2920 ret = btrfs_check_uuid_tree(fs_info); 2920 ret = btrfs_check_uuid_tree(fs_info);
2921 if (ret) { 2921 if (ret) {
2922 pr_warn("btrfs: failed to check the UUID tree %d\n", 2922 pr_warn("BTRFS: failed to check the UUID tree %d\n",
2923 ret); 2923 ret);
2924 close_ctree(tree_root); 2924 close_ctree(tree_root);
2925 return ret; 2925 return ret;
@@ -2945,6 +2945,9 @@ fail_cleaner:
2945 */ 2945 */
2946 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2946 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2947 2947
2948fail_sysfs:
2949 btrfs_sysfs_remove_one(fs_info);
2950
2948fail_block_groups: 2951fail_block_groups:
2949 btrfs_put_block_group_cache(fs_info); 2952 btrfs_put_block_group_cache(fs_info);
2950 btrfs_free_block_groups(fs_info); 2953 btrfs_free_block_groups(fs_info);
@@ -3000,7 +3003,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
3000 struct btrfs_device *device = (struct btrfs_device *) 3003 struct btrfs_device *device = (struct btrfs_device *)
3001 bh->b_private; 3004 bh->b_private;
3002 3005
3003 printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to " 3006 printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to "
3004 "I/O error on %s\n", 3007 "I/O error on %s\n",
3005 rcu_str_deref(device->name)); 3008 rcu_str_deref(device->name));
3006 /* note, we dont' set_buffer_write_io_error because we have 3009 /* note, we dont' set_buffer_write_io_error because we have
@@ -3119,7 +3122,7 @@ static int write_dev_supers(struct btrfs_device *device,
3119 bh = __getblk(device->bdev, bytenr / 4096, 3122 bh = __getblk(device->bdev, bytenr / 4096,
3120 BTRFS_SUPER_INFO_SIZE); 3123 BTRFS_SUPER_INFO_SIZE);
3121 if (!bh) { 3124 if (!bh) {
3122 printk(KERN_ERR "btrfs: couldn't get super " 3125 printk(KERN_ERR "BTRFS: couldn't get super "
3123 "buffer head for bytenr %Lu\n", bytenr); 3126 "buffer head for bytenr %Lu\n", bytenr);
3124 errors++; 3127 errors++;
3125 continue; 3128 continue;
@@ -3140,7 +3143,10 @@ static int write_dev_supers(struct btrfs_device *device,
3140 * we fua the first super. The others we allow 3143 * we fua the first super. The others we allow
3141 * to go down lazy. 3144 * to go down lazy.
3142 */ 3145 */
3143 ret = btrfsic_submit_bh(WRITE_FUA, bh); 3146 if (i == 0)
3147 ret = btrfsic_submit_bh(WRITE_FUA, bh);
3148 else
3149 ret = btrfsic_submit_bh(WRITE_SYNC, bh);
3144 if (ret) 3150 if (ret)
3145 errors++; 3151 errors++;
3146 } 3152 }
@@ -3186,7 +3192,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
3186 wait_for_completion(&device->flush_wait); 3192 wait_for_completion(&device->flush_wait);
3187 3193
3188 if (bio_flagged(bio, BIO_EOPNOTSUPP)) { 3194 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
3189 printk_in_rcu("btrfs: disabling barriers on dev %s\n", 3195 printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
3190 rcu_str_deref(device->name)); 3196 rcu_str_deref(device->name));
3191 device->nobarriers = 1; 3197 device->nobarriers = 1;
3192 } else if (!bio_flagged(bio, BIO_UPTODATE)) { 3198 } else if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3407,7 +3413,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3407 total_errors++; 3413 total_errors++;
3408 } 3414 }
3409 if (total_errors > max_errors) { 3415 if (total_errors > max_errors) {
3410 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 3416 btrfs_err(root->fs_info, "%d errors while writing supers",
3411 total_errors); 3417 total_errors);
3412 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 3418 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3413 3419
@@ -3455,10 +3461,8 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
3455 if (btrfs_root_refs(&root->root_item) == 0) 3461 if (btrfs_root_refs(&root->root_item) == 0)
3456 synchronize_srcu(&fs_info->subvol_srcu); 3462 synchronize_srcu(&fs_info->subvol_srcu);
3457 3463
3458 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 3464 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3459 btrfs_free_log(NULL, root); 3465 btrfs_free_log(NULL, root);
3460 btrfs_free_log_root_tree(NULL, fs_info);
3461 }
3462 3466
3463 __btrfs_remove_free_space_cache(root->free_ino_pinned); 3467 __btrfs_remove_free_space_cache(root->free_ino_pinned);
3464 __btrfs_remove_free_space_cache(root->free_ino_ctl); 3468 __btrfs_remove_free_space_cache(root->free_ino_ctl);
@@ -3563,14 +3567,12 @@ int close_ctree(struct btrfs_root *root)
3563 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 3567 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
3564 ret = btrfs_commit_super(root); 3568 ret = btrfs_commit_super(root);
3565 if (ret) 3569 if (ret)
3566 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3570 btrfs_err(root->fs_info, "commit super ret %d", ret);
3567 } 3571 }
3568 3572
3569 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) 3573 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3570 btrfs_error_commit_super(root); 3574 btrfs_error_commit_super(root);
3571 3575
3572 btrfs_put_block_group_cache(fs_info);
3573
3574 kthread_stop(fs_info->transaction_kthread); 3576 kthread_stop(fs_info->transaction_kthread);
3575 kthread_stop(fs_info->cleaner_kthread); 3577 kthread_stop(fs_info->cleaner_kthread);
3576 3578
@@ -3580,12 +3582,16 @@ int close_ctree(struct btrfs_root *root)
3580 btrfs_free_qgroup_config(root->fs_info); 3582 btrfs_free_qgroup_config(root->fs_info);
3581 3583
3582 if (percpu_counter_sum(&fs_info->delalloc_bytes)) { 3584 if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
3583 printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n", 3585 btrfs_info(root->fs_info, "at unmount delalloc count %lld",
3584 percpu_counter_sum(&fs_info->delalloc_bytes)); 3586 percpu_counter_sum(&fs_info->delalloc_bytes));
3585 } 3587 }
3586 3588
3589 btrfs_sysfs_remove_one(fs_info);
3590
3587 del_fs_roots(fs_info); 3591 del_fs_roots(fs_info);
3588 3592
3593 btrfs_put_block_group_cache(fs_info);
3594
3589 btrfs_free_block_groups(fs_info); 3595 btrfs_free_block_groups(fs_info);
3590 3596
3591 btrfs_stop_all_workers(fs_info); 3597 btrfs_stop_all_workers(fs_info);
@@ -3803,55 +3809,54 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3803 delayed_refs = &trans->delayed_refs; 3809 delayed_refs = &trans->delayed_refs;
3804 3810
3805 spin_lock(&delayed_refs->lock); 3811 spin_lock(&delayed_refs->lock);
3806 if (delayed_refs->num_entries == 0) { 3812 if (atomic_read(&delayed_refs->num_entries) == 0) {
3807 spin_unlock(&delayed_refs->lock); 3813 spin_unlock(&delayed_refs->lock);
3808 printk(KERN_INFO "delayed_refs has NO entry\n"); 3814 btrfs_info(root->fs_info, "delayed_refs has NO entry");
3809 return ret; 3815 return ret;
3810 } 3816 }
3811 3817
3812 while ((node = rb_first(&delayed_refs->root)) != NULL) { 3818 while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
3813 struct btrfs_delayed_ref_head *head = NULL; 3819 struct btrfs_delayed_ref_head *head;
3814 bool pin_bytes = false; 3820 bool pin_bytes = false;
3815 3821
3816 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3822 head = rb_entry(node, struct btrfs_delayed_ref_head,
3817 atomic_set(&ref->refs, 1); 3823 href_node);
3818 if (btrfs_delayed_ref_is_head(ref)) { 3824 if (!mutex_trylock(&head->mutex)) {
3819 3825 atomic_inc(&head->node.refs);
3820 head = btrfs_delayed_node_to_head(ref); 3826 spin_unlock(&delayed_refs->lock);
3821 if (!mutex_trylock(&head->mutex)) {
3822 atomic_inc(&ref->refs);
3823 spin_unlock(&delayed_refs->lock);
3824
3825 /* Need to wait for the delayed ref to run */
3826 mutex_lock(&head->mutex);
3827 mutex_unlock(&head->mutex);
3828 btrfs_put_delayed_ref(ref);
3829
3830 spin_lock(&delayed_refs->lock);
3831 continue;
3832 }
3833 3827
3834 if (head->must_insert_reserved) 3828 mutex_lock(&head->mutex);
3835 pin_bytes = true;
3836 btrfs_free_delayed_extent_op(head->extent_op);
3837 delayed_refs->num_heads--;
3838 if (list_empty(&head->cluster))
3839 delayed_refs->num_heads_ready--;
3840 list_del_init(&head->cluster);
3841 }
3842
3843 ref->in_tree = 0;
3844 rb_erase(&ref->rb_node, &delayed_refs->root);
3845 delayed_refs->num_entries--;
3846 spin_unlock(&delayed_refs->lock);
3847 if (head) {
3848 if (pin_bytes)
3849 btrfs_pin_extent(root, ref->bytenr,
3850 ref->num_bytes, 1);
3851 mutex_unlock(&head->mutex); 3829 mutex_unlock(&head->mutex);
3830 btrfs_put_delayed_ref(&head->node);
3831 spin_lock(&delayed_refs->lock);
3832 continue;
3833 }
3834 spin_lock(&head->lock);
3835 while ((node = rb_first(&head->ref_root)) != NULL) {
3836 ref = rb_entry(node, struct btrfs_delayed_ref_node,
3837 rb_node);
3838 ref->in_tree = 0;
3839 rb_erase(&ref->rb_node, &head->ref_root);
3840 atomic_dec(&delayed_refs->num_entries);
3841 btrfs_put_delayed_ref(ref);
3852 } 3842 }
3853 btrfs_put_delayed_ref(ref); 3843 if (head->must_insert_reserved)
3844 pin_bytes = true;
3845 btrfs_free_delayed_extent_op(head->extent_op);
3846 delayed_refs->num_heads--;
3847 if (head->processing == 0)
3848 delayed_refs->num_heads_ready--;
3849 atomic_dec(&delayed_refs->num_entries);
3850 head->node.in_tree = 0;
3851 rb_erase(&head->href_node, &delayed_refs->href_root);
3852 spin_unlock(&head->lock);
3853 spin_unlock(&delayed_refs->lock);
3854 mutex_unlock(&head->mutex);
3854 3855
3856 if (pin_bytes)
3857 btrfs_pin_extent(root, head->node.bytenr,
3858 head->node.num_bytes, 1);
3859 btrfs_put_delayed_ref(&head->node);
3855 cond_resched(); 3860 cond_resched();
3856 spin_lock(&delayed_refs->lock); 3861 spin_lock(&delayed_refs->lock);
3857 } 3862 }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9c01509dd8ab..32312e09f0f5 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -35,6 +35,7 @@
35#include "locking.h" 35#include "locking.h"
36#include "free-space-cache.h" 36#include "free-space-cache.h"
37#include "math.h" 37#include "math.h"
38#include "sysfs.h"
38 39
39#undef SCRAMBLE_DELAYED_REFS 40#undef SCRAMBLE_DELAYED_REFS
40 41
@@ -441,7 +442,8 @@ next:
441 if (ret) 442 if (ret)
442 break; 443 break;
443 444
444 if (need_resched()) { 445 if (need_resched() ||
446 rwsem_is_contended(&fs_info->extent_commit_sem)) {
445 caching_ctl->progress = last; 447 caching_ctl->progress = last;
446 btrfs_release_path(path); 448 btrfs_release_path(path);
447 up_read(&fs_info->extent_commit_sem); 449 up_read(&fs_info->extent_commit_sem);
@@ -855,12 +857,14 @@ again:
855 btrfs_put_delayed_ref(&head->node); 857 btrfs_put_delayed_ref(&head->node);
856 goto search_again; 858 goto search_again;
857 } 859 }
860 spin_lock(&head->lock);
858 if (head->extent_op && head->extent_op->update_flags) 861 if (head->extent_op && head->extent_op->update_flags)
859 extent_flags |= head->extent_op->flags_to_set; 862 extent_flags |= head->extent_op->flags_to_set;
860 else 863 else
861 BUG_ON(num_refs == 0); 864 BUG_ON(num_refs == 0);
862 865
863 num_refs += head->node.ref_mod; 866 num_refs += head->node.ref_mod;
867 spin_unlock(&head->lock);
864 mutex_unlock(&head->mutex); 868 mutex_unlock(&head->mutex);
865 } 869 }
866 spin_unlock(&delayed_refs->lock); 870 spin_unlock(&delayed_refs->lock);
@@ -1070,11 +1074,11 @@ static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
1070 __le64 lenum; 1074 __le64 lenum;
1071 1075
1072 lenum = cpu_to_le64(root_objectid); 1076 lenum = cpu_to_le64(root_objectid);
1073 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 1077 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
1074 lenum = cpu_to_le64(owner); 1078 lenum = cpu_to_le64(owner);
1075 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 1079 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1076 lenum = cpu_to_le64(offset); 1080 lenum = cpu_to_le64(offset);
1077 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 1081 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
1078 1082
1079 return ((u64)high_crc << 31) ^ (u64)low_crc; 1083 return ((u64)high_crc << 31) ^ (u64)low_crc;
1080} 1084}
@@ -2285,64 +2289,62 @@ static noinline struct btrfs_delayed_ref_node *
2285select_delayed_ref(struct btrfs_delayed_ref_head *head) 2289select_delayed_ref(struct btrfs_delayed_ref_head *head)
2286{ 2290{
2287 struct rb_node *node; 2291 struct rb_node *node;
2288 struct btrfs_delayed_ref_node *ref; 2292 struct btrfs_delayed_ref_node *ref, *last = NULL;;
2289 int action = BTRFS_ADD_DELAYED_REF; 2293
2290again:
2291 /* 2294 /*
2292 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 2295 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
2293 * this prevents ref count from going down to zero when 2296 * this prevents ref count from going down to zero when
2294 * there still are pending delayed ref. 2297 * there still are pending delayed ref.
2295 */ 2298 */
2296 node = rb_prev(&head->node.rb_node); 2299 node = rb_first(&head->ref_root);
2297 while (1) { 2300 while (node) {
2298 if (!node)
2299 break;
2300 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2301 ref = rb_entry(node, struct btrfs_delayed_ref_node,
2301 rb_node); 2302 rb_node);
2302 if (ref->bytenr != head->node.bytenr) 2303 if (ref->action == BTRFS_ADD_DELAYED_REF)
2303 break;
2304 if (ref->action == action)
2305 return ref; 2304 return ref;
2306 node = rb_prev(node); 2305 else if (last == NULL)
2307 } 2306 last = ref;
2308 if (action == BTRFS_ADD_DELAYED_REF) { 2307 node = rb_next(node);
2309 action = BTRFS_DROP_DELAYED_REF;
2310 goto again;
2311 } 2308 }
2312 return NULL; 2309 return last;
2313} 2310}
2314 2311
2315/* 2312/*
2316 * Returns 0 on success or if called with an already aborted transaction. 2313 * Returns 0 on success or if called with an already aborted transaction.
2317 * Returns -ENOMEM or -EIO on failure and will abort the transaction. 2314 * Returns -ENOMEM or -EIO on failure and will abort the transaction.
2318 */ 2315 */
2319static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2316static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2320 struct btrfs_root *root, 2317 struct btrfs_root *root,
2321 struct list_head *cluster) 2318 unsigned long nr)
2322{ 2319{
2323 struct btrfs_delayed_ref_root *delayed_refs; 2320 struct btrfs_delayed_ref_root *delayed_refs;
2324 struct btrfs_delayed_ref_node *ref; 2321 struct btrfs_delayed_ref_node *ref;
2325 struct btrfs_delayed_ref_head *locked_ref = NULL; 2322 struct btrfs_delayed_ref_head *locked_ref = NULL;
2326 struct btrfs_delayed_extent_op *extent_op; 2323 struct btrfs_delayed_extent_op *extent_op;
2327 struct btrfs_fs_info *fs_info = root->fs_info; 2324 struct btrfs_fs_info *fs_info = root->fs_info;
2325 ktime_t start = ktime_get();
2328 int ret; 2326 int ret;
2329 int count = 0; 2327 unsigned long count = 0;
2328 unsigned long actual_count = 0;
2330 int must_insert_reserved = 0; 2329 int must_insert_reserved = 0;
2331 2330
2332 delayed_refs = &trans->transaction->delayed_refs; 2331 delayed_refs = &trans->transaction->delayed_refs;
2333 while (1) { 2332 while (1) {
2334 if (!locked_ref) { 2333 if (!locked_ref) {
2335 /* pick a new head ref from the cluster list */ 2334 if (count >= nr)
2336 if (list_empty(cluster))
2337 break; 2335 break;
2338 2336
2339 locked_ref = list_entry(cluster->next, 2337 spin_lock(&delayed_refs->lock);
2340 struct btrfs_delayed_ref_head, cluster); 2338 locked_ref = btrfs_select_ref_head(trans);
2339 if (!locked_ref) {
2340 spin_unlock(&delayed_refs->lock);
2341 break;
2342 }
2341 2343
2342 /* grab the lock that says we are going to process 2344 /* grab the lock that says we are going to process
2343 * all the refs for this head */ 2345 * all the refs for this head */
2344 ret = btrfs_delayed_ref_lock(trans, locked_ref); 2346 ret = btrfs_delayed_ref_lock(trans, locked_ref);
2345 2347 spin_unlock(&delayed_refs->lock);
2346 /* 2348 /*
2347 * we may have dropped the spin lock to get the head 2349 * we may have dropped the spin lock to get the head
2348 * mutex lock, and that might have given someone else 2350 * mutex lock, and that might have given someone else
@@ -2363,6 +2365,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2363 * finish. If we merged anything we need to re-loop so we can 2365 * finish. If we merged anything we need to re-loop so we can
2364 * get a good ref. 2366 * get a good ref.
2365 */ 2367 */
2368 spin_lock(&locked_ref->lock);
2366 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, 2369 btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
2367 locked_ref); 2370 locked_ref);
2368 2371
@@ -2374,17 +2377,15 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2374 2377
2375 if (ref && ref->seq && 2378 if (ref && ref->seq &&
2376 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { 2379 btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
2377 /* 2380 spin_unlock(&locked_ref->lock);
2378 * there are still refs with lower seq numbers in the
2379 * process of being added. Don't run this ref yet.
2380 */
2381 list_del_init(&locked_ref->cluster);
2382 btrfs_delayed_ref_unlock(locked_ref); 2381 btrfs_delayed_ref_unlock(locked_ref);
2383 locked_ref = NULL; 2382 spin_lock(&delayed_refs->lock);
2383 locked_ref->processing = 0;
2384 delayed_refs->num_heads_ready++; 2384 delayed_refs->num_heads_ready++;
2385 spin_unlock(&delayed_refs->lock); 2385 spin_unlock(&delayed_refs->lock);
2386 locked_ref = NULL;
2386 cond_resched(); 2387 cond_resched();
2387 spin_lock(&delayed_refs->lock); 2388 count++;
2388 continue; 2389 continue;
2389 } 2390 }
2390 2391
@@ -2399,6 +2400,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2399 locked_ref->extent_op = NULL; 2400 locked_ref->extent_op = NULL;
2400 2401
2401 if (!ref) { 2402 if (!ref) {
2403
2404
2402 /* All delayed refs have been processed, Go ahead 2405 /* All delayed refs have been processed, Go ahead
2403 * and send the head node to run_one_delayed_ref, 2406 * and send the head node to run_one_delayed_ref,
2404 * so that any accounting fixes can happen 2407 * so that any accounting fixes can happen
@@ -2411,8 +2414,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2411 } 2414 }
2412 2415
2413 if (extent_op) { 2416 if (extent_op) {
2414 spin_unlock(&delayed_refs->lock); 2417 spin_unlock(&locked_ref->lock);
2415
2416 ret = run_delayed_extent_op(trans, root, 2418 ret = run_delayed_extent_op(trans, root,
2417 ref, extent_op); 2419 ref, extent_op);
2418 btrfs_free_delayed_extent_op(extent_op); 2420 btrfs_free_delayed_extent_op(extent_op);
@@ -2426,19 +2428,39 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2426 */ 2428 */
2427 if (must_insert_reserved) 2429 if (must_insert_reserved)
2428 locked_ref->must_insert_reserved = 1; 2430 locked_ref->must_insert_reserved = 1;
2431 locked_ref->processing = 0;
2429 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); 2432 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
2430 spin_lock(&delayed_refs->lock);
2431 btrfs_delayed_ref_unlock(locked_ref); 2433 btrfs_delayed_ref_unlock(locked_ref);
2432 return ret; 2434 return ret;
2433 } 2435 }
2436 continue;
2437 }
2434 2438
2435 goto next; 2439 /*
2440 * Need to drop our head ref lock and re-aqcuire the
2441 * delayed ref lock and then re-check to make sure
2442 * nobody got added.
2443 */
2444 spin_unlock(&locked_ref->lock);
2445 spin_lock(&delayed_refs->lock);
2446 spin_lock(&locked_ref->lock);
2447 if (rb_first(&locked_ref->ref_root)) {
2448 spin_unlock(&locked_ref->lock);
2449 spin_unlock(&delayed_refs->lock);
2450 continue;
2436 } 2451 }
2452 ref->in_tree = 0;
2453 delayed_refs->num_heads--;
2454 rb_erase(&locked_ref->href_node,
2455 &delayed_refs->href_root);
2456 spin_unlock(&delayed_refs->lock);
2457 } else {
2458 actual_count++;
2459 ref->in_tree = 0;
2460 rb_erase(&ref->rb_node, &locked_ref->ref_root);
2437 } 2461 }
2462 atomic_dec(&delayed_refs->num_entries);
2438 2463
2439 ref->in_tree = 0;
2440 rb_erase(&ref->rb_node, &delayed_refs->root);
2441 delayed_refs->num_entries--;
2442 if (!btrfs_delayed_ref_is_head(ref)) { 2464 if (!btrfs_delayed_ref_is_head(ref)) {
2443 /* 2465 /*
2444 * when we play the delayed ref, also correct the 2466 * when we play the delayed ref, also correct the
@@ -2455,20 +2477,18 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2455 default: 2477 default:
2456 WARN_ON(1); 2478 WARN_ON(1);
2457 } 2479 }
2458 } else {
2459 list_del_init(&locked_ref->cluster);
2460 } 2480 }
2461 spin_unlock(&delayed_refs->lock); 2481 spin_unlock(&locked_ref->lock);
2462 2482
2463 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2483 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2464 must_insert_reserved); 2484 must_insert_reserved);
2465 2485
2466 btrfs_free_delayed_extent_op(extent_op); 2486 btrfs_free_delayed_extent_op(extent_op);
2467 if (ret) { 2487 if (ret) {
2488 locked_ref->processing = 0;
2468 btrfs_delayed_ref_unlock(locked_ref); 2489 btrfs_delayed_ref_unlock(locked_ref);
2469 btrfs_put_delayed_ref(ref); 2490 btrfs_put_delayed_ref(ref);
2470 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret); 2491 btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
2471 spin_lock(&delayed_refs->lock);
2472 return ret; 2492 return ret;
2473 } 2493 }
2474 2494
@@ -2484,11 +2504,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2484 } 2504 }
2485 btrfs_put_delayed_ref(ref); 2505 btrfs_put_delayed_ref(ref);
2486 count++; 2506 count++;
2487next:
2488 cond_resched(); 2507 cond_resched();
2508 }
2509
2510 /*
2511 * We don't want to include ref heads since we can have empty ref heads
2512 * and those will drastically skew our runtime down since we just do
2513 * accounting, no actual extent tree updates.
2514 */
2515 if (actual_count > 0) {
2516 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start));
2517 u64 avg;
2518
2519 /*
2520 * We weigh the current average higher than our current runtime
2521 * to avoid large swings in the average.
2522 */
2489 spin_lock(&delayed_refs->lock); 2523 spin_lock(&delayed_refs->lock);
2524 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
2525 avg = div64_u64(avg, 4);
2526 fs_info->avg_delayed_ref_runtime = avg;
2527 spin_unlock(&delayed_refs->lock);
2490 } 2528 }
2491 return count; 2529 return 0;
2492} 2530}
2493 2531
2494#ifdef SCRAMBLE_DELAYED_REFS 2532#ifdef SCRAMBLE_DELAYED_REFS
@@ -2570,16 +2608,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
2570 return ret; 2608 return ret;
2571} 2609}
2572 2610
2573static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
2574 int count)
2575{
2576 int val = atomic_read(&delayed_refs->ref_seq);
2577
2578 if (val < seq || val >= seq + count)
2579 return 1;
2580 return 0;
2581}
2582
2583static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) 2611static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2584{ 2612{
2585 u64 num_bytes; 2613 u64 num_bytes;
@@ -2596,7 +2624,7 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
2596 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); 2624 return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
2597} 2625}
2598 2626
2599int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans, 2627int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
2600 struct btrfs_root *root) 2628 struct btrfs_root *root)
2601{ 2629{
2602 struct btrfs_block_rsv *global_rsv; 2630 struct btrfs_block_rsv *global_rsv;
@@ -2625,6 +2653,22 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2625 return ret; 2653 return ret;
2626} 2654}
2627 2655
2656int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
2657 struct btrfs_root *root)
2658{
2659 struct btrfs_fs_info *fs_info = root->fs_info;
2660 u64 num_entries =
2661 atomic_read(&trans->transaction->delayed_refs.num_entries);
2662 u64 avg_runtime;
2663
2664 smp_mb();
2665 avg_runtime = fs_info->avg_delayed_ref_runtime;
2666 if (num_entries * avg_runtime >= NSEC_PER_SEC)
2667 return 1;
2668
2669 return btrfs_check_space_for_delayed_refs(trans, root);
2670}
2671
2628/* 2672/*
2629 * this starts processing the delayed reference count updates and 2673 * this starts processing the delayed reference count updates and
2630 * extent insertions we have queued up so far. count can be 2674 * extent insertions we have queued up so far. count can be
@@ -2640,13 +2684,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2640{ 2684{
2641 struct rb_node *node; 2685 struct rb_node *node;
2642 struct btrfs_delayed_ref_root *delayed_refs; 2686 struct btrfs_delayed_ref_root *delayed_refs;
2643 struct btrfs_delayed_ref_node *ref; 2687 struct btrfs_delayed_ref_head *head;
2644 struct list_head cluster;
2645 int ret; 2688 int ret;
2646 u64 delayed_start;
2647 int run_all = count == (unsigned long)-1; 2689 int run_all = count == (unsigned long)-1;
2648 int run_most = 0; 2690 int run_most = 0;
2649 int loops;
2650 2691
2651 /* We'll clean this up in btrfs_cleanup_transaction */ 2692 /* We'll clean this up in btrfs_cleanup_transaction */
2652 if (trans->aborted) 2693 if (trans->aborted)
@@ -2658,130 +2699,40 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2658 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); 2699 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
2659 2700
2660 delayed_refs = &trans->transaction->delayed_refs; 2701 delayed_refs = &trans->transaction->delayed_refs;
2661 INIT_LIST_HEAD(&cluster);
2662 if (count == 0) { 2702 if (count == 0) {
2663 count = delayed_refs->num_entries * 2; 2703 count = atomic_read(&delayed_refs->num_entries) * 2;
2664 run_most = 1; 2704 run_most = 1;
2665 } 2705 }
2666 2706
2667 if (!run_all && !run_most) {
2668 int old;
2669 int seq = atomic_read(&delayed_refs->ref_seq);
2670
2671progress:
2672 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2673 if (old) {
2674 DEFINE_WAIT(__wait);
2675 if (delayed_refs->flushing ||
2676 !btrfs_should_throttle_delayed_refs(trans, root))
2677 return 0;
2678
2679 prepare_to_wait(&delayed_refs->wait, &__wait,
2680 TASK_UNINTERRUPTIBLE);
2681
2682 old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
2683 if (old) {
2684 schedule();
2685 finish_wait(&delayed_refs->wait, &__wait);
2686
2687 if (!refs_newer(delayed_refs, seq, 256))
2688 goto progress;
2689 else
2690 return 0;
2691 } else {
2692 finish_wait(&delayed_refs->wait, &__wait);
2693 goto again;
2694 }
2695 }
2696
2697 } else {
2698 atomic_inc(&delayed_refs->procs_running_refs);
2699 }
2700
2701again: 2707again:
2702 loops = 0;
2703 spin_lock(&delayed_refs->lock);
2704
2705#ifdef SCRAMBLE_DELAYED_REFS 2708#ifdef SCRAMBLE_DELAYED_REFS
2706 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); 2709 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2707#endif 2710#endif
2708 2711 ret = __btrfs_run_delayed_refs(trans, root, count);
2709 while (1) { 2712 if (ret < 0) {
2710 if (!(run_all || run_most) && 2713 btrfs_abort_transaction(trans, root, ret);
2711 !btrfs_should_throttle_delayed_refs(trans, root)) 2714 return ret;
2712 break;
2713
2714 /*
2715 * go find something we can process in the rbtree. We start at
2716 * the beginning of the tree, and then build a cluster
2717 * of refs to process starting at the first one we are able to
2718 * lock
2719 */
2720 delayed_start = delayed_refs->run_delayed_start;
2721 ret = btrfs_find_ref_cluster(trans, &cluster,
2722 delayed_refs->run_delayed_start);
2723 if (ret)
2724 break;
2725
2726 ret = run_clustered_refs(trans, root, &cluster);
2727 if (ret < 0) {
2728 btrfs_release_ref_cluster(&cluster);
2729 spin_unlock(&delayed_refs->lock);
2730 btrfs_abort_transaction(trans, root, ret);
2731 atomic_dec(&delayed_refs->procs_running_refs);
2732 wake_up(&delayed_refs->wait);
2733 return ret;
2734 }
2735
2736 atomic_add(ret, &delayed_refs->ref_seq);
2737
2738 count -= min_t(unsigned long, ret, count);
2739
2740 if (count == 0)
2741 break;
2742
2743 if (delayed_start >= delayed_refs->run_delayed_start) {
2744 if (loops == 0) {
2745 /*
2746 * btrfs_find_ref_cluster looped. let's do one
2747 * more cycle. if we don't run any delayed ref
2748 * during that cycle (because we can't because
2749 * all of them are blocked), bail out.
2750 */
2751 loops = 1;
2752 } else {
2753 /*
2754 * no runnable refs left, stop trying
2755 */
2756 BUG_ON(run_all);
2757 break;
2758 }
2759 }
2760 if (ret) {
2761 /* refs were run, let's reset staleness detection */
2762 loops = 0;
2763 }
2764 } 2715 }
2765 2716
2766 if (run_all) { 2717 if (run_all) {
2767 if (!list_empty(&trans->new_bgs)) { 2718 if (!list_empty(&trans->new_bgs))
2768 spin_unlock(&delayed_refs->lock);
2769 btrfs_create_pending_block_groups(trans, root); 2719 btrfs_create_pending_block_groups(trans, root);
2770 spin_lock(&delayed_refs->lock);
2771 }
2772 2720
2773 node = rb_first(&delayed_refs->root); 2721 spin_lock(&delayed_refs->lock);
2774 if (!node) 2722 node = rb_first(&delayed_refs->href_root);
2723 if (!node) {
2724 spin_unlock(&delayed_refs->lock);
2775 goto out; 2725 goto out;
2726 }
2776 count = (unsigned long)-1; 2727 count = (unsigned long)-1;
2777 2728
2778 while (node) { 2729 while (node) {
2779 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2730 head = rb_entry(node, struct btrfs_delayed_ref_head,
2780 rb_node); 2731 href_node);
2781 if (btrfs_delayed_ref_is_head(ref)) { 2732 if (btrfs_delayed_ref_is_head(&head->node)) {
2782 struct btrfs_delayed_ref_head *head; 2733 struct btrfs_delayed_ref_node *ref;
2783 2734
2784 head = btrfs_delayed_node_to_head(ref); 2735 ref = &head->node;
2785 atomic_inc(&ref->refs); 2736 atomic_inc(&ref->refs);
2786 2737
2787 spin_unlock(&delayed_refs->lock); 2738 spin_unlock(&delayed_refs->lock);
@@ -2795,20 +2746,16 @@ again:
2795 btrfs_put_delayed_ref(ref); 2746 btrfs_put_delayed_ref(ref);
2796 cond_resched(); 2747 cond_resched();
2797 goto again; 2748 goto again;
2749 } else {
2750 WARN_ON(1);
2798 } 2751 }
2799 node = rb_next(node); 2752 node = rb_next(node);
2800 } 2753 }
2801 spin_unlock(&delayed_refs->lock); 2754 spin_unlock(&delayed_refs->lock);
2802 schedule_timeout(1); 2755 cond_resched();
2803 goto again; 2756 goto again;
2804 } 2757 }
2805out: 2758out:
2806 atomic_dec(&delayed_refs->procs_running_refs);
2807 smp_mb();
2808 if (waitqueue_active(&delayed_refs->wait))
2809 wake_up(&delayed_refs->wait);
2810
2811 spin_unlock(&delayed_refs->lock);
2812 assert_qgroups_uptodate(trans); 2759 assert_qgroups_uptodate(trans);
2813 return 0; 2760 return 0;
2814} 2761}
@@ -2850,12 +2797,13 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2850 struct rb_node *node; 2797 struct rb_node *node;
2851 int ret = 0; 2798 int ret = 0;
2852 2799
2853 ret = -ENOENT;
2854 delayed_refs = &trans->transaction->delayed_refs; 2800 delayed_refs = &trans->transaction->delayed_refs;
2855 spin_lock(&delayed_refs->lock); 2801 spin_lock(&delayed_refs->lock);
2856 head = btrfs_find_delayed_ref_head(trans, bytenr); 2802 head = btrfs_find_delayed_ref_head(trans, bytenr);
2857 if (!head) 2803 if (!head) {
2858 goto out; 2804 spin_unlock(&delayed_refs->lock);
2805 return 0;
2806 }
2859 2807
2860 if (!mutex_trylock(&head->mutex)) { 2808 if (!mutex_trylock(&head->mutex)) {
2861 atomic_inc(&head->node.refs); 2809 atomic_inc(&head->node.refs);
@@ -2872,40 +2820,35 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2872 btrfs_put_delayed_ref(&head->node); 2820 btrfs_put_delayed_ref(&head->node);
2873 return -EAGAIN; 2821 return -EAGAIN;
2874 } 2822 }
2823 spin_unlock(&delayed_refs->lock);
2875 2824
2876 node = rb_prev(&head->node.rb_node); 2825 spin_lock(&head->lock);
2877 if (!node) 2826 node = rb_first(&head->ref_root);
2878 goto out_unlock; 2827 while (node) {
2879 2828 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2880 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2829 node = rb_next(node);
2881
2882 if (ref->bytenr != bytenr)
2883 goto out_unlock;
2884
2885 ret = 1;
2886 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2887 goto out_unlock;
2888 2830
2889 data_ref = btrfs_delayed_node_to_data_ref(ref); 2831 /* If it's a shared ref we know a cross reference exists */
2832 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
2833 ret = 1;
2834 break;
2835 }
2890 2836
2891 node = rb_prev(node); 2837 data_ref = btrfs_delayed_node_to_data_ref(ref);
2892 if (node) {
2893 int seq = ref->seq;
2894 2838
2895 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2839 /*
2896 if (ref->bytenr == bytenr && ref->seq == seq) 2840 * If our ref doesn't match the one we're currently looking at
2897 goto out_unlock; 2841 * then we have a cross reference.
2842 */
2843 if (data_ref->root != root->root_key.objectid ||
2844 data_ref->objectid != objectid ||
2845 data_ref->offset != offset) {
2846 ret = 1;
2847 break;
2848 }
2898 } 2849 }
2899 2850 spin_unlock(&head->lock);
2900 if (data_ref->root != root->root_key.objectid ||
2901 data_ref->objectid != objectid || data_ref->offset != offset)
2902 goto out_unlock;
2903
2904 ret = 0;
2905out_unlock:
2906 mutex_unlock(&head->mutex); 2851 mutex_unlock(&head->mutex);
2907out:
2908 spin_unlock(&delayed_refs->lock);
2909 return ret; 2852 return ret;
2910} 2853}
2911 2854
@@ -3402,6 +3345,23 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
3402 return readonly; 3345 return readonly;
3403} 3346}
3404 3347
3348static const char *alloc_name(u64 flags)
3349{
3350 switch (flags) {
3351 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
3352 return "mixed";
3353 case BTRFS_BLOCK_GROUP_METADATA:
3354 return "metadata";
3355 case BTRFS_BLOCK_GROUP_DATA:
3356 return "data";
3357 case BTRFS_BLOCK_GROUP_SYSTEM:
3358 return "system";
3359 default:
3360 WARN_ON(1);
3361 return "invalid-combination";
3362 };
3363}
3364
3405static int update_space_info(struct btrfs_fs_info *info, u64 flags, 3365static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3406 u64 total_bytes, u64 bytes_used, 3366 u64 total_bytes, u64 bytes_used,
3407 struct btrfs_space_info **space_info) 3367 struct btrfs_space_info **space_info)
@@ -3439,8 +3399,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3439 return ret; 3399 return ret;
3440 } 3400 }
3441 3401
3442 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 3402 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
3443 INIT_LIST_HEAD(&found->block_groups[i]); 3403 INIT_LIST_HEAD(&found->block_groups[i]);
3404 kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
3405 }
3444 init_rwsem(&found->groups_sem); 3406 init_rwsem(&found->groups_sem);
3445 spin_lock_init(&found->lock); 3407 spin_lock_init(&found->lock);
3446 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; 3408 found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
@@ -3457,11 +3419,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3457 found->chunk_alloc = 0; 3419 found->chunk_alloc = 0;
3458 found->flush = 0; 3420 found->flush = 0;
3459 init_waitqueue_head(&found->wait); 3421 init_waitqueue_head(&found->wait);
3422
3423 ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
3424 info->space_info_kobj, "%s",
3425 alloc_name(found->flags));
3426 if (ret) {
3427 kfree(found);
3428 return ret;
3429 }
3430
3460 *space_info = found; 3431 *space_info = found;
3461 list_add_rcu(&found->list, &info->space_info); 3432 list_add_rcu(&found->list, &info->space_info);
3462 if (flags & BTRFS_BLOCK_GROUP_DATA) 3433 if (flags & BTRFS_BLOCK_GROUP_DATA)
3463 info->data_sinfo = found; 3434 info->data_sinfo = found;
3464 return 0; 3435
3436 return ret;
3465} 3437}
3466 3438
3467static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 3439static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
@@ -4637,7 +4609,7 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
4637 u64 num_bytes) 4609 u64 num_bytes)
4638{ 4610{
4639 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 4611 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
4640 if (global_rsv->full || global_rsv == block_rsv || 4612 if (global_rsv == block_rsv ||
4641 block_rsv->space_info != global_rsv->space_info) 4613 block_rsv->space_info != global_rsv->space_info)
4642 global_rsv = NULL; 4614 global_rsv = NULL;
4643 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv, 4615 block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
@@ -5916,24 +5888,16 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5916{ 5888{
5917 struct btrfs_delayed_ref_head *head; 5889 struct btrfs_delayed_ref_head *head;
5918 struct btrfs_delayed_ref_root *delayed_refs; 5890 struct btrfs_delayed_ref_root *delayed_refs;
5919 struct btrfs_delayed_ref_node *ref;
5920 struct rb_node *node;
5921 int ret = 0; 5891 int ret = 0;
5922 5892
5923 delayed_refs = &trans->transaction->delayed_refs; 5893 delayed_refs = &trans->transaction->delayed_refs;
5924 spin_lock(&delayed_refs->lock); 5894 spin_lock(&delayed_refs->lock);
5925 head = btrfs_find_delayed_ref_head(trans, bytenr); 5895 head = btrfs_find_delayed_ref_head(trans, bytenr);
5926 if (!head) 5896 if (!head)
5927 goto out; 5897 goto out_delayed_unlock;
5928 5898
5929 node = rb_prev(&head->node.rb_node); 5899 spin_lock(&head->lock);
5930 if (!node) 5900 if (rb_first(&head->ref_root))
5931 goto out;
5932
5933 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
5934
5935 /* there are still entries for this ref, we can't drop it */
5936 if (ref->bytenr == bytenr)
5937 goto out; 5901 goto out;
5938 5902
5939 if (head->extent_op) { 5903 if (head->extent_op) {
@@ -5955,19 +5919,19 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5955 * ahead and process it. 5919 * ahead and process it.
5956 */ 5920 */
5957 head->node.in_tree = 0; 5921 head->node.in_tree = 0;
5958 rb_erase(&head->node.rb_node, &delayed_refs->root); 5922 rb_erase(&head->href_node, &delayed_refs->href_root);
5959 5923
5960 delayed_refs->num_entries--; 5924 atomic_dec(&delayed_refs->num_entries);
5961 5925
5962 /* 5926 /*
5963 * we don't take a ref on the node because we're removing it from the 5927 * we don't take a ref on the node because we're removing it from the
5964 * tree, so we just steal the ref the tree was holding. 5928 * tree, so we just steal the ref the tree was holding.
5965 */ 5929 */
5966 delayed_refs->num_heads--; 5930 delayed_refs->num_heads--;
5967 if (list_empty(&head->cluster)) 5931 if (head->processing == 0)
5968 delayed_refs->num_heads_ready--; 5932 delayed_refs->num_heads_ready--;
5969 5933 head->processing = 0;
5970 list_del_init(&head->cluster); 5934 spin_unlock(&head->lock);
5971 spin_unlock(&delayed_refs->lock); 5935 spin_unlock(&delayed_refs->lock);
5972 5936
5973 BUG_ON(head->extent_op); 5937 BUG_ON(head->extent_op);
@@ -5978,6 +5942,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5978 btrfs_put_delayed_ref(&head->node); 5942 btrfs_put_delayed_ref(&head->node);
5979 return ret; 5943 return ret;
5980out: 5944out:
5945 spin_unlock(&head->lock);
5946
5947out_delayed_unlock:
5981 spin_unlock(&delayed_refs->lock); 5948 spin_unlock(&delayed_refs->lock);
5982 return 0; 5949 return 0;
5983} 5950}
@@ -6145,11 +6112,29 @@ int __get_raid_index(u64 flags)
6145 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ 6112 return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
6146} 6113}
6147 6114
6148static int get_block_group_index(struct btrfs_block_group_cache *cache) 6115int get_block_group_index(struct btrfs_block_group_cache *cache)
6149{ 6116{
6150 return __get_raid_index(cache->flags); 6117 return __get_raid_index(cache->flags);
6151} 6118}
6152 6119
6120static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
6121 [BTRFS_RAID_RAID10] = "raid10",
6122 [BTRFS_RAID_RAID1] = "raid1",
6123 [BTRFS_RAID_DUP] = "dup",
6124 [BTRFS_RAID_RAID0] = "raid0",
6125 [BTRFS_RAID_SINGLE] = "single",
6126 [BTRFS_RAID_RAID5] = "raid5",
6127 [BTRFS_RAID_RAID6] = "raid6",
6128};
6129
6130static const char *get_raid_name(enum btrfs_raid_types type)
6131{
6132 if (type >= BTRFS_NR_RAID_TYPES)
6133 return NULL;
6134
6135 return btrfs_raid_type_names[type];
6136}
6137
6153enum btrfs_loop_type { 6138enum btrfs_loop_type {
6154 LOOP_CACHING_NOWAIT = 0, 6139 LOOP_CACHING_NOWAIT = 0,
6155 LOOP_CACHING_WAIT = 1, 6140 LOOP_CACHING_WAIT = 1,
@@ -6177,7 +6162,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6177 struct btrfs_root *root = orig_root->fs_info->extent_root; 6162 struct btrfs_root *root = orig_root->fs_info->extent_root;
6178 struct btrfs_free_cluster *last_ptr = NULL; 6163 struct btrfs_free_cluster *last_ptr = NULL;
6179 struct btrfs_block_group_cache *block_group = NULL; 6164 struct btrfs_block_group_cache *block_group = NULL;
6180 struct btrfs_block_group_cache *used_block_group;
6181 u64 search_start = 0; 6165 u64 search_start = 0;
6182 u64 max_extent_size = 0; 6166 u64 max_extent_size = 0;
6183 int empty_cluster = 2 * 1024 * 1024; 6167 int empty_cluster = 2 * 1024 * 1024;
@@ -6186,7 +6170,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6186 int index = __get_raid_index(flags); 6170 int index = __get_raid_index(flags);
6187 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ? 6171 int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
6188 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 6172 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
6189 bool found_uncached_bg = false;
6190 bool failed_cluster_refill = false; 6173 bool failed_cluster_refill = false;
6191 bool failed_alloc = false; 6174 bool failed_alloc = false;
6192 bool use_cluster = true; 6175 bool use_cluster = true;
@@ -6239,7 +6222,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6239 if (search_start == hint_byte) { 6222 if (search_start == hint_byte) {
6240 block_group = btrfs_lookup_block_group(root->fs_info, 6223 block_group = btrfs_lookup_block_group(root->fs_info,
6241 search_start); 6224 search_start);
6242 used_block_group = block_group;
6243 /* 6225 /*
6244 * we don't want to use the block group if it doesn't match our 6226 * we don't want to use the block group if it doesn't match our
6245 * allocation bits, or if its not cached. 6227 * allocation bits, or if its not cached.
@@ -6276,7 +6258,6 @@ search:
6276 u64 offset; 6258 u64 offset;
6277 int cached; 6259 int cached;
6278 6260
6279 used_block_group = block_group;
6280 btrfs_get_block_group(block_group); 6261 btrfs_get_block_group(block_group);
6281 search_start = block_group->key.objectid; 6262 search_start = block_group->key.objectid;
6282 6263
@@ -6304,7 +6285,6 @@ search:
6304have_block_group: 6285have_block_group:
6305 cached = block_group_cache_done(block_group); 6286 cached = block_group_cache_done(block_group);
6306 if (unlikely(!cached)) { 6287 if (unlikely(!cached)) {
6307 found_uncached_bg = true;
6308 ret = cache_block_group(block_group, 0); 6288 ret = cache_block_group(block_group, 0);
6309 BUG_ON(ret < 0); 6289 BUG_ON(ret < 0);
6310 ret = 0; 6290 ret = 0;
@@ -6320,6 +6300,7 @@ have_block_group:
6320 * lets look there 6300 * lets look there
6321 */ 6301 */
6322 if (last_ptr) { 6302 if (last_ptr) {
6303 struct btrfs_block_group_cache *used_block_group;
6323 unsigned long aligned_cluster; 6304 unsigned long aligned_cluster;
6324 /* 6305 /*
6325 * the refill lock keeps out other 6306 * the refill lock keeps out other
@@ -6330,10 +6311,8 @@ have_block_group:
6330 if (used_block_group != block_group && 6311 if (used_block_group != block_group &&
6331 (!used_block_group || 6312 (!used_block_group ||
6332 used_block_group->ro || 6313 used_block_group->ro ||
6333 !block_group_bits(used_block_group, flags))) { 6314 !block_group_bits(used_block_group, flags)))
6334 used_block_group = block_group;
6335 goto refill_cluster; 6315 goto refill_cluster;
6336 }
6337 6316
6338 if (used_block_group != block_group) 6317 if (used_block_group != block_group)
6339 btrfs_get_block_group(used_block_group); 6318 btrfs_get_block_group(used_block_group);
@@ -6347,17 +6326,19 @@ have_block_group:
6347 /* we have a block, we're done */ 6326 /* we have a block, we're done */
6348 spin_unlock(&last_ptr->refill_lock); 6327 spin_unlock(&last_ptr->refill_lock);
6349 trace_btrfs_reserve_extent_cluster(root, 6328 trace_btrfs_reserve_extent_cluster(root,
6350 block_group, search_start, num_bytes); 6329 used_block_group,
6330 search_start, num_bytes);
6331 if (used_block_group != block_group) {
6332 btrfs_put_block_group(block_group);
6333 block_group = used_block_group;
6334 }
6351 goto checks; 6335 goto checks;
6352 } 6336 }
6353 6337
6354 WARN_ON(last_ptr->block_group != used_block_group); 6338 WARN_ON(last_ptr->block_group != used_block_group);
6355 if (used_block_group != block_group) { 6339 if (used_block_group != block_group)
6356 btrfs_put_block_group(used_block_group); 6340 btrfs_put_block_group(used_block_group);
6357 used_block_group = block_group;
6358 }
6359refill_cluster: 6341refill_cluster:
6360 BUG_ON(used_block_group != block_group);
6361 /* If we are on LOOP_NO_EMPTY_SIZE, we can't 6342 /* If we are on LOOP_NO_EMPTY_SIZE, we can't
6362 * set up a new clusters, so lets just skip it 6343 * set up a new clusters, so lets just skip it
6363 * and let the allocator find whatever block 6344 * and let the allocator find whatever block
@@ -6476,25 +6457,25 @@ unclustered_alloc:
6476 goto loop; 6457 goto loop;
6477 } 6458 }
6478checks: 6459checks:
6479 search_start = stripe_align(root, used_block_group, 6460 search_start = stripe_align(root, block_group,
6480 offset, num_bytes); 6461 offset, num_bytes);
6481 6462
6482 /* move on to the next group */ 6463 /* move on to the next group */
6483 if (search_start + num_bytes > 6464 if (search_start + num_bytes >
6484 used_block_group->key.objectid + used_block_group->key.offset) { 6465 block_group->key.objectid + block_group->key.offset) {
6485 btrfs_add_free_space(used_block_group, offset, num_bytes); 6466 btrfs_add_free_space(block_group, offset, num_bytes);
6486 goto loop; 6467 goto loop;
6487 } 6468 }
6488 6469
6489 if (offset < search_start) 6470 if (offset < search_start)
6490 btrfs_add_free_space(used_block_group, offset, 6471 btrfs_add_free_space(block_group, offset,
6491 search_start - offset); 6472 search_start - offset);
6492 BUG_ON(offset > search_start); 6473 BUG_ON(offset > search_start);
6493 6474
6494 ret = btrfs_update_reserved_bytes(used_block_group, num_bytes, 6475 ret = btrfs_update_reserved_bytes(block_group, num_bytes,
6495 alloc_type); 6476 alloc_type);
6496 if (ret == -EAGAIN) { 6477 if (ret == -EAGAIN) {
6497 btrfs_add_free_space(used_block_group, offset, num_bytes); 6478 btrfs_add_free_space(block_group, offset, num_bytes);
6498 goto loop; 6479 goto loop;
6499 } 6480 }
6500 6481
@@ -6504,16 +6485,12 @@ checks:
6504 6485
6505 trace_btrfs_reserve_extent(orig_root, block_group, 6486 trace_btrfs_reserve_extent(orig_root, block_group,
6506 search_start, num_bytes); 6487 search_start, num_bytes);
6507 if (used_block_group != block_group)
6508 btrfs_put_block_group(used_block_group);
6509 btrfs_put_block_group(block_group); 6488 btrfs_put_block_group(block_group);
6510 break; 6489 break;
6511loop: 6490loop:
6512 failed_cluster_refill = false; 6491 failed_cluster_refill = false;
6513 failed_alloc = false; 6492 failed_alloc = false;
6514 BUG_ON(index != get_block_group_index(block_group)); 6493 BUG_ON(index != get_block_group_index(block_group));
6515 if (used_block_group != block_group)
6516 btrfs_put_block_group(used_block_group);
6517 btrfs_put_block_group(block_group); 6494 btrfs_put_block_group(block_group);
6518 } 6495 }
6519 up_read(&space_info->groups_sem); 6496 up_read(&space_info->groups_sem);
@@ -6584,12 +6561,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6584 int index = 0; 6561 int index = 0;
6585 6562
6586 spin_lock(&info->lock); 6563 spin_lock(&info->lock);
6587 printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n", 6564 printk(KERN_INFO "BTRFS: space_info %llu has %llu free, is %sfull\n",
6588 info->flags, 6565 info->flags,
6589 info->total_bytes - info->bytes_used - info->bytes_pinned - 6566 info->total_bytes - info->bytes_used - info->bytes_pinned -
6590 info->bytes_reserved - info->bytes_readonly, 6567 info->bytes_reserved - info->bytes_readonly,
6591 (info->full) ? "" : "not "); 6568 (info->full) ? "" : "not ");
6592 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " 6569 printk(KERN_INFO "BTRFS: space_info total=%llu, used=%llu, pinned=%llu, "
6593 "reserved=%llu, may_use=%llu, readonly=%llu\n", 6570 "reserved=%llu, may_use=%llu, readonly=%llu\n",
6594 info->total_bytes, info->bytes_used, info->bytes_pinned, 6571 info->total_bytes, info->bytes_used, info->bytes_pinned,
6595 info->bytes_reserved, info->bytes_may_use, 6572 info->bytes_reserved, info->bytes_may_use,
@@ -6603,7 +6580,9 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
6603again: 6580again:
6604 list_for_each_entry(cache, &info->block_groups[index], list) { 6581 list_for_each_entry(cache, &info->block_groups[index], list) {
6605 spin_lock(&cache->lock); 6582 spin_lock(&cache->lock);
6606 printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", 6583 printk(KERN_INFO "BTRFS: "
6584 "block group %llu has %llu bytes, "
6585 "%llu used %llu pinned %llu reserved %s\n",
6607 cache->key.objectid, cache->key.offset, 6586 cache->key.objectid, cache->key.offset,
6608 btrfs_block_group_used(&cache->item), cache->pinned, 6587 btrfs_block_group_used(&cache->item), cache->pinned,
6609 cache->reserved, cache->ro ? "[readonly]" : ""); 6588 cache->reserved, cache->ro ? "[readonly]" : "");
@@ -6966,7 +6945,7 @@ again:
6966 /*DEFAULT_RATELIMIT_BURST*/ 1); 6945 /*DEFAULT_RATELIMIT_BURST*/ 1);
6967 if (__ratelimit(&_rs)) 6946 if (__ratelimit(&_rs))
6968 WARN(1, KERN_DEBUG 6947 WARN(1, KERN_DEBUG
6969 "btrfs: block rsv returned %d\n", ret); 6948 "BTRFS: block rsv returned %d\n", ret);
6970 } 6949 }
6971try_reserve: 6950try_reserve:
6972 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6951 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
@@ -7714,7 +7693,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7714 7693
7715 btrfs_end_transaction_throttle(trans, tree_root); 7694 btrfs_end_transaction_throttle(trans, tree_root);
7716 if (!for_reloc && btrfs_need_cleaner_sleep(root)) { 7695 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
7717 pr_debug("btrfs: drop snapshot early exit\n"); 7696 pr_debug("BTRFS: drop snapshot early exit\n");
7718 err = -EAGAIN; 7697 err = -EAGAIN;
7719 goto out_free; 7698 goto out_free;
7720 } 7699 }
@@ -7779,7 +7758,7 @@ out:
7779 */ 7758 */
7780 if (!for_reloc && root_dropped == false) 7759 if (!for_reloc && root_dropped == false)
7781 btrfs_add_dead_root(root); 7760 btrfs_add_dead_root(root);
7782 if (err) 7761 if (err && err != -EAGAIN)
7783 btrfs_std_error(root->fs_info, err); 7762 btrfs_std_error(root->fs_info, err);
7784 return err; 7763 return err;
7785} 7764}
@@ -8333,6 +8312,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8333 release_global_block_rsv(info); 8312 release_global_block_rsv(info);
8334 8313
8335 while (!list_empty(&info->space_info)) { 8314 while (!list_empty(&info->space_info)) {
8315 int i;
8316
8336 space_info = list_entry(info->space_info.next, 8317 space_info = list_entry(info->space_info.next,
8337 struct btrfs_space_info, 8318 struct btrfs_space_info,
8338 list); 8319 list);
@@ -8343,9 +8324,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
8343 dump_space_info(space_info, 0, 0); 8324 dump_space_info(space_info, 0, 0);
8344 } 8325 }
8345 } 8326 }
8346 percpu_counter_destroy(&space_info->total_bytes_pinned);
8347 list_del(&space_info->list); 8327 list_del(&space_info->list);
8348 kfree(space_info); 8328 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
8329 struct kobject *kobj;
8330 kobj = &space_info->block_group_kobjs[i];
8331 if (kobj->parent) {
8332 kobject_del(kobj);
8333 kobject_put(kobj);
8334 }
8335 }
8336 kobject_del(&space_info->kobj);
8337 kobject_put(&space_info->kobj);
8349 } 8338 }
8350 return 0; 8339 return 0;
8351} 8340}
@@ -8356,10 +8345,57 @@ static void __link_block_group(struct btrfs_space_info *space_info,
8356 int index = get_block_group_index(cache); 8345 int index = get_block_group_index(cache);
8357 8346
8358 down_write(&space_info->groups_sem); 8347 down_write(&space_info->groups_sem);
8348 if (list_empty(&space_info->block_groups[index])) {
8349 struct kobject *kobj = &space_info->block_group_kobjs[index];
8350 int ret;
8351
8352 kobject_get(&space_info->kobj); /* put in release */
8353 ret = kobject_add(kobj, &space_info->kobj, "%s",
8354 get_raid_name(index));
8355 if (ret) {
8356 pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
8357 kobject_put(&space_info->kobj);
8358 }
8359 }
8359 list_add_tail(&cache->list, &space_info->block_groups[index]); 8360 list_add_tail(&cache->list, &space_info->block_groups[index]);
8360 up_write(&space_info->groups_sem); 8361 up_write(&space_info->groups_sem);
8361} 8362}
8362 8363
8364static struct btrfs_block_group_cache *
8365btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
8366{
8367 struct btrfs_block_group_cache *cache;
8368
8369 cache = kzalloc(sizeof(*cache), GFP_NOFS);
8370 if (!cache)
8371 return NULL;
8372
8373 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8374 GFP_NOFS);
8375 if (!cache->free_space_ctl) {
8376 kfree(cache);
8377 return NULL;
8378 }
8379
8380 cache->key.objectid = start;
8381 cache->key.offset = size;
8382 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8383
8384 cache->sectorsize = root->sectorsize;
8385 cache->fs_info = root->fs_info;
8386 cache->full_stripe_len = btrfs_full_stripe_len(root,
8387 &root->fs_info->mapping_tree,
8388 start);
8389 atomic_set(&cache->count, 1);
8390 spin_lock_init(&cache->lock);
8391 INIT_LIST_HEAD(&cache->list);
8392 INIT_LIST_HEAD(&cache->cluster_list);
8393 INIT_LIST_HEAD(&cache->new_bg_list);
8394 btrfs_init_free_space_ctl(cache);
8395
8396 return cache;
8397}
8398
8363int btrfs_read_block_groups(struct btrfs_root *root) 8399int btrfs_read_block_groups(struct btrfs_root *root)
8364{ 8400{
8365 struct btrfs_path *path; 8401 struct btrfs_path *path;
@@ -8395,26 +8431,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8395 break; 8431 break;
8396 if (ret != 0) 8432 if (ret != 0)
8397 goto error; 8433 goto error;
8434
8398 leaf = path->nodes[0]; 8435 leaf = path->nodes[0];
8399 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 8436 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8400 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8437
8438 cache = btrfs_create_block_group_cache(root, found_key.objectid,
8439 found_key.offset);
8401 if (!cache) { 8440 if (!cache) {
8402 ret = -ENOMEM; 8441 ret = -ENOMEM;
8403 goto error; 8442 goto error;
8404 } 8443 }
8405 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8406 GFP_NOFS);
8407 if (!cache->free_space_ctl) {
8408 kfree(cache);
8409 ret = -ENOMEM;
8410 goto error;
8411 }
8412
8413 atomic_set(&cache->count, 1);
8414 spin_lock_init(&cache->lock);
8415 cache->fs_info = info;
8416 INIT_LIST_HEAD(&cache->list);
8417 INIT_LIST_HEAD(&cache->cluster_list);
8418 8444
8419 if (need_clear) { 8445 if (need_clear) {
8420 /* 8446 /*
@@ -8435,16 +8461,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8435 read_extent_buffer(leaf, &cache->item, 8461 read_extent_buffer(leaf, &cache->item,
8436 btrfs_item_ptr_offset(leaf, path->slots[0]), 8462 btrfs_item_ptr_offset(leaf, path->slots[0]),
8437 sizeof(cache->item)); 8463 sizeof(cache->item));
8438 memcpy(&cache->key, &found_key, sizeof(found_key)); 8464 cache->flags = btrfs_block_group_flags(&cache->item);
8439 8465
8440 key.objectid = found_key.objectid + found_key.offset; 8466 key.objectid = found_key.objectid + found_key.offset;
8441 btrfs_release_path(path); 8467 btrfs_release_path(path);
8442 cache->flags = btrfs_block_group_flags(&cache->item);
8443 cache->sectorsize = root->sectorsize;
8444 cache->full_stripe_len = btrfs_full_stripe_len(root,
8445 &root->fs_info->mapping_tree,
8446 found_key.objectid);
8447 btrfs_init_free_space_ctl(cache);
8448 8468
8449 /* 8469 /*
8450 * We need to exclude the super stripes now so that the space 8470 * We need to exclude the super stripes now so that the space
@@ -8458,8 +8478,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8458 * case. 8478 * case.
8459 */ 8479 */
8460 free_excluded_extents(root, cache); 8480 free_excluded_extents(root, cache);
8461 kfree(cache->free_space_ctl); 8481 btrfs_put_block_group(cache);
8462 kfree(cache);
8463 goto error; 8482 goto error;
8464 } 8483 }
8465 8484
@@ -8590,38 +8609,15 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8590 8609
8591 root->fs_info->last_trans_log_full_commit = trans->transid; 8610 root->fs_info->last_trans_log_full_commit = trans->transid;
8592 8611
8593 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8612 cache = btrfs_create_block_group_cache(root, chunk_offset, size);
8594 if (!cache) 8613 if (!cache)
8595 return -ENOMEM; 8614 return -ENOMEM;
8596 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
8597 GFP_NOFS);
8598 if (!cache->free_space_ctl) {
8599 kfree(cache);
8600 return -ENOMEM;
8601 }
8602
8603 cache->key.objectid = chunk_offset;
8604 cache->key.offset = size;
8605 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8606 cache->sectorsize = root->sectorsize;
8607 cache->fs_info = root->fs_info;
8608 cache->full_stripe_len = btrfs_full_stripe_len(root,
8609 &root->fs_info->mapping_tree,
8610 chunk_offset);
8611
8612 atomic_set(&cache->count, 1);
8613 spin_lock_init(&cache->lock);
8614 INIT_LIST_HEAD(&cache->list);
8615 INIT_LIST_HEAD(&cache->cluster_list);
8616 INIT_LIST_HEAD(&cache->new_bg_list);
8617
8618 btrfs_init_free_space_ctl(cache);
8619 8615
8620 btrfs_set_block_group_used(&cache->item, bytes_used); 8616 btrfs_set_block_group_used(&cache->item, bytes_used);
8621 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 8617 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8622 cache->flags = type;
8623 btrfs_set_block_group_flags(&cache->item, type); 8618 btrfs_set_block_group_flags(&cache->item, type);
8624 8619
8620 cache->flags = type;
8625 cache->last_byte_to_unpin = (u64)-1; 8621 cache->last_byte_to_unpin = (u64)-1;
8626 cache->cached = BTRFS_CACHE_FINISHED; 8622 cache->cached = BTRFS_CACHE_FINISHED;
8627 ret = exclude_super_stripes(root, cache); 8623 ret = exclude_super_stripes(root, cache);
@@ -8631,8 +8627,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8631 * case. 8627 * case.
8632 */ 8628 */
8633 free_excluded_extents(root, cache); 8629 free_excluded_extents(root, cache);
8634 kfree(cache->free_space_ctl); 8630 btrfs_put_block_group(cache);
8635 kfree(cache);
8636 return ret; 8631 return ret;
8637 } 8632 }
8638 8633
@@ -8796,8 +8791,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8796 * are still on the list after taking the semaphore 8791 * are still on the list after taking the semaphore
8797 */ 8792 */
8798 list_del_init(&block_group->list); 8793 list_del_init(&block_group->list);
8799 if (list_empty(&block_group->space_info->block_groups[index])) 8794 if (list_empty(&block_group->space_info->block_groups[index])) {
8795 kobject_del(&block_group->space_info->block_group_kobjs[index]);
8796 kobject_put(&block_group->space_info->block_group_kobjs[index]);
8800 clear_avail_alloc_bits(root->fs_info, block_group->flags); 8797 clear_avail_alloc_bits(root->fs_info, block_group->flags);
8798 }
8801 up_write(&block_group->space_info->groups_sem); 8799 up_write(&block_group->space_info->groups_sem);
8802 8800
8803 if (block_group->cached == BTRFS_CACHE_STARTED) 8801 if (block_group->cached == BTRFS_CACHE_STARTED)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ff43802a7c88..85bbd01f1271 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -59,7 +59,7 @@ void btrfs_leak_debug_check(void)
59 59
60 while (!list_empty(&states)) { 60 while (!list_empty(&states)) {
61 state = list_entry(states.next, struct extent_state, leak_list); 61 state = list_entry(states.next, struct extent_state, leak_list);
62 printk(KERN_ERR "btrfs state leak: start %llu end %llu " 62 printk(KERN_ERR "BTRFS: state leak: start %llu end %llu "
63 "state %lu in tree %p refs %d\n", 63 "state %lu in tree %p refs %d\n",
64 state->start, state->end, state->state, state->tree, 64 state->start, state->end, state->state, state->tree,
65 atomic_read(&state->refs)); 65 atomic_read(&state->refs));
@@ -69,7 +69,7 @@ void btrfs_leak_debug_check(void)
69 69
70 while (!list_empty(&buffers)) { 70 while (!list_empty(&buffers)) {
71 eb = list_entry(buffers.next, struct extent_buffer, leak_list); 71 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
72 printk(KERN_ERR "btrfs buffer leak start %llu len %lu " 72 printk(KERN_ERR "BTRFS: buffer leak start %llu len %lu "
73 "refs %d\n", 73 "refs %d\n",
74 eb->start, eb->len, atomic_read(&eb->refs)); 74 eb->start, eb->len, atomic_read(&eb->refs));
75 list_del(&eb->leak_list); 75 list_del(&eb->leak_list);
@@ -77,16 +77,22 @@ void btrfs_leak_debug_check(void)
77 } 77 }
78} 78}
79 79
80#define btrfs_debug_check_extent_io_range(inode, start, end) \ 80#define btrfs_debug_check_extent_io_range(tree, start, end) \
81 __btrfs_debug_check_extent_io_range(__func__, (inode), (start), (end)) 81 __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end))
82static inline void __btrfs_debug_check_extent_io_range(const char *caller, 82static inline void __btrfs_debug_check_extent_io_range(const char *caller,
83 struct inode *inode, u64 start, u64 end) 83 struct extent_io_tree *tree, u64 start, u64 end)
84{ 84{
85 u64 isize = i_size_read(inode); 85 struct inode *inode;
86 u64 isize;
87
88 if (!tree->mapping)
89 return;
86 90
91 inode = tree->mapping->host;
92 isize = i_size_read(inode);
87 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 93 if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
88 printk_ratelimited(KERN_DEBUG 94 printk_ratelimited(KERN_DEBUG
89 "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n", 95 "BTRFS: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
90 caller, btrfs_ino(inode), isize, start, end); 96 caller, btrfs_ino(inode), isize, start, end);
91 } 97 }
92} 98}
@@ -124,6 +130,8 @@ static noinline void flush_write_bio(void *data);
124static inline struct btrfs_fs_info * 130static inline struct btrfs_fs_info *
125tree_fs_info(struct extent_io_tree *tree) 131tree_fs_info(struct extent_io_tree *tree)
126{ 132{
133 if (!tree->mapping)
134 return NULL;
127 return btrfs_sb(tree->mapping->host->i_sb); 135 return btrfs_sb(tree->mapping->host->i_sb);
128} 136}
129 137
@@ -186,11 +194,9 @@ void extent_io_tree_init(struct extent_io_tree *tree,
186 struct address_space *mapping) 194 struct address_space *mapping)
187{ 195{
188 tree->state = RB_ROOT; 196 tree->state = RB_ROOT;
189 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
190 tree->ops = NULL; 197 tree->ops = NULL;
191 tree->dirty_bytes = 0; 198 tree->dirty_bytes = 0;
192 spin_lock_init(&tree->lock); 199 spin_lock_init(&tree->lock);
193 spin_lock_init(&tree->buffer_lock);
194 tree->mapping = mapping; 200 tree->mapping = mapping;
195} 201}
196 202
@@ -224,12 +230,20 @@ void free_extent_state(struct extent_state *state)
224} 230}
225 231
226static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 232static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
227 struct rb_node *node) 233 struct rb_node *node,
234 struct rb_node ***p_in,
235 struct rb_node **parent_in)
228{ 236{
229 struct rb_node **p = &root->rb_node; 237 struct rb_node **p = &root->rb_node;
230 struct rb_node *parent = NULL; 238 struct rb_node *parent = NULL;
231 struct tree_entry *entry; 239 struct tree_entry *entry;
232 240
241 if (p_in && parent_in) {
242 p = *p_in;
243 parent = *parent_in;
244 goto do_insert;
245 }
246
233 while (*p) { 247 while (*p) {
234 parent = *p; 248 parent = *p;
235 entry = rb_entry(parent, struct tree_entry, rb_node); 249 entry = rb_entry(parent, struct tree_entry, rb_node);
@@ -242,35 +256,43 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
242 return parent; 256 return parent;
243 } 257 }
244 258
259do_insert:
245 rb_link_node(node, parent, p); 260 rb_link_node(node, parent, p);
246 rb_insert_color(node, root); 261 rb_insert_color(node, root);
247 return NULL; 262 return NULL;
248} 263}
249 264
250static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, 265static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
251 struct rb_node **prev_ret, 266 struct rb_node **prev_ret,
252 struct rb_node **next_ret) 267 struct rb_node **next_ret,
268 struct rb_node ***p_ret,
269 struct rb_node **parent_ret)
253{ 270{
254 struct rb_root *root = &tree->state; 271 struct rb_root *root = &tree->state;
255 struct rb_node *n = root->rb_node; 272 struct rb_node **n = &root->rb_node;
256 struct rb_node *prev = NULL; 273 struct rb_node *prev = NULL;
257 struct rb_node *orig_prev = NULL; 274 struct rb_node *orig_prev = NULL;
258 struct tree_entry *entry; 275 struct tree_entry *entry;
259 struct tree_entry *prev_entry = NULL; 276 struct tree_entry *prev_entry = NULL;
260 277
261 while (n) { 278 while (*n) {
262 entry = rb_entry(n, struct tree_entry, rb_node); 279 prev = *n;
263 prev = n; 280 entry = rb_entry(prev, struct tree_entry, rb_node);
264 prev_entry = entry; 281 prev_entry = entry;
265 282
266 if (offset < entry->start) 283 if (offset < entry->start)
267 n = n->rb_left; 284 n = &(*n)->rb_left;
268 else if (offset > entry->end) 285 else if (offset > entry->end)
269 n = n->rb_right; 286 n = &(*n)->rb_right;
270 else 287 else
271 return n; 288 return *n;
272 } 289 }
273 290
291 if (p_ret)
292 *p_ret = n;
293 if (parent_ret)
294 *parent_ret = prev;
295
274 if (prev_ret) { 296 if (prev_ret) {
275 orig_prev = prev; 297 orig_prev = prev;
276 while (prev && offset > prev_entry->end) { 298 while (prev && offset > prev_entry->end) {
@@ -292,18 +314,27 @@ static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
292 return NULL; 314 return NULL;
293} 315}
294 316
295static inline struct rb_node *tree_search(struct extent_io_tree *tree, 317static inline struct rb_node *
296 u64 offset) 318tree_search_for_insert(struct extent_io_tree *tree,
319 u64 offset,
320 struct rb_node ***p_ret,
321 struct rb_node **parent_ret)
297{ 322{
298 struct rb_node *prev = NULL; 323 struct rb_node *prev = NULL;
299 struct rb_node *ret; 324 struct rb_node *ret;
300 325
301 ret = __etree_search(tree, offset, &prev, NULL); 326 ret = __etree_search(tree, offset, &prev, NULL, p_ret, parent_ret);
302 if (!ret) 327 if (!ret)
303 return prev; 328 return prev;
304 return ret; 329 return ret;
305} 330}
306 331
332static inline struct rb_node *tree_search(struct extent_io_tree *tree,
333 u64 offset)
334{
335 return tree_search_for_insert(tree, offset, NULL, NULL);
336}
337
307static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, 338static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
308 struct extent_state *other) 339 struct extent_state *other)
309{ 340{
@@ -385,23 +416,25 @@ static void set_state_bits(struct extent_io_tree *tree,
385 */ 416 */
386static int insert_state(struct extent_io_tree *tree, 417static int insert_state(struct extent_io_tree *tree,
387 struct extent_state *state, u64 start, u64 end, 418 struct extent_state *state, u64 start, u64 end,
419 struct rb_node ***p,
420 struct rb_node **parent,
388 unsigned long *bits) 421 unsigned long *bits)
389{ 422{
390 struct rb_node *node; 423 struct rb_node *node;
391 424
392 if (end < start) 425 if (end < start)
393 WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", 426 WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
394 end, start); 427 end, start);
395 state->start = start; 428 state->start = start;
396 state->end = end; 429 state->end = end;
397 430
398 set_state_bits(tree, state, bits); 431 set_state_bits(tree, state, bits);
399 432
400 node = tree_insert(&tree->state, end, &state->rb_node); 433 node = tree_insert(&tree->state, end, &state->rb_node, p, parent);
401 if (node) { 434 if (node) {
402 struct extent_state *found; 435 struct extent_state *found;
403 found = rb_entry(node, struct extent_state, rb_node); 436 found = rb_entry(node, struct extent_state, rb_node);
404 printk(KERN_ERR "btrfs found node %llu %llu on insert of " 437 printk(KERN_ERR "BTRFS: found node %llu %llu on insert of "
405 "%llu %llu\n", 438 "%llu %llu\n",
406 found->start, found->end, start, end); 439 found->start, found->end, start, end);
407 return -EEXIST; 440 return -EEXIST;
@@ -444,7 +477,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
444 prealloc->state = orig->state; 477 prealloc->state = orig->state;
445 orig->start = split; 478 orig->start = split;
446 479
447 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 480 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node,
481 NULL, NULL);
448 if (node) { 482 if (node) {
449 free_extent_state(prealloc); 483 free_extent_state(prealloc);
450 return -EEXIST; 484 return -EEXIST;
@@ -542,7 +576,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
542 int err; 576 int err;
543 int clear = 0; 577 int clear = 0;
544 578
545 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); 579 btrfs_debug_check_extent_io_range(tree, start, end);
546 580
547 if (bits & EXTENT_DELALLOC) 581 if (bits & EXTENT_DELALLOC)
548 bits |= EXTENT_NORESERVE; 582 bits |= EXTENT_NORESERVE;
@@ -702,7 +736,7 @@ static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
702 struct extent_state *state; 736 struct extent_state *state;
703 struct rb_node *node; 737 struct rb_node *node;
704 738
705 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); 739 btrfs_debug_check_extent_io_range(tree, start, end);
706 740
707 spin_lock(&tree->lock); 741 spin_lock(&tree->lock);
708again: 742again:
@@ -783,11 +817,13 @@ __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
783 struct extent_state *state; 817 struct extent_state *state;
784 struct extent_state *prealloc = NULL; 818 struct extent_state *prealloc = NULL;
785 struct rb_node *node; 819 struct rb_node *node;
820 struct rb_node **p;
821 struct rb_node *parent;
786 int err = 0; 822 int err = 0;
787 u64 last_start; 823 u64 last_start;
788 u64 last_end; 824 u64 last_end;
789 825
790 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); 826 btrfs_debug_check_extent_io_range(tree, start, end);
791 827
792 bits |= EXTENT_FIRST_DELALLOC; 828 bits |= EXTENT_FIRST_DELALLOC;
793again: 829again:
@@ -809,14 +845,16 @@ again:
809 * this search will find all the extents that end after 845 * this search will find all the extents that end after
810 * our range starts. 846 * our range starts.
811 */ 847 */
812 node = tree_search(tree, start); 848 node = tree_search_for_insert(tree, start, &p, &parent);
813 if (!node) { 849 if (!node) {
814 prealloc = alloc_extent_state_atomic(prealloc); 850 prealloc = alloc_extent_state_atomic(prealloc);
815 BUG_ON(!prealloc); 851 BUG_ON(!prealloc);
816 err = insert_state(tree, prealloc, start, end, &bits); 852 err = insert_state(tree, prealloc, start, end,
853 &p, &parent, &bits);
817 if (err) 854 if (err)
818 extent_io_tree_panic(tree, err); 855 extent_io_tree_panic(tree, err);
819 856
857 cache_state(prealloc, cached_state);
820 prealloc = NULL; 858 prealloc = NULL;
821 goto out; 859 goto out;
822 } 860 }
@@ -919,7 +957,7 @@ hit_next:
919 * the later extent. 957 * the later extent.
920 */ 958 */
921 err = insert_state(tree, prealloc, start, this_end, 959 err = insert_state(tree, prealloc, start, this_end,
922 &bits); 960 NULL, NULL, &bits);
923 if (err) 961 if (err)
924 extent_io_tree_panic(tree, err); 962 extent_io_tree_panic(tree, err);
925 963
@@ -1005,11 +1043,13 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
1005 struct extent_state *state; 1043 struct extent_state *state;
1006 struct extent_state *prealloc = NULL; 1044 struct extent_state *prealloc = NULL;
1007 struct rb_node *node; 1045 struct rb_node *node;
1046 struct rb_node **p;
1047 struct rb_node *parent;
1008 int err = 0; 1048 int err = 0;
1009 u64 last_start; 1049 u64 last_start;
1010 u64 last_end; 1050 u64 last_end;
1011 1051
1012 btrfs_debug_check_extent_io_range(tree->mapping->host, start, end); 1052 btrfs_debug_check_extent_io_range(tree, start, end);
1013 1053
1014again: 1054again:
1015 if (!prealloc && (mask & __GFP_WAIT)) { 1055 if (!prealloc && (mask & __GFP_WAIT)) {
@@ -1032,17 +1072,19 @@ again:
1032 * this search will find all the extents that end after 1072 * this search will find all the extents that end after
1033 * our range starts. 1073 * our range starts.
1034 */ 1074 */
1035 node = tree_search(tree, start); 1075 node = tree_search_for_insert(tree, start, &p, &parent);
1036 if (!node) { 1076 if (!node) {
1037 prealloc = alloc_extent_state_atomic(prealloc); 1077 prealloc = alloc_extent_state_atomic(prealloc);
1038 if (!prealloc) { 1078 if (!prealloc) {
1039 err = -ENOMEM; 1079 err = -ENOMEM;
1040 goto out; 1080 goto out;
1041 } 1081 }
1042 err = insert_state(tree, prealloc, start, end, &bits); 1082 err = insert_state(tree, prealloc, start, end,
1043 prealloc = NULL; 1083 &p, &parent, &bits);
1044 if (err) 1084 if (err)
1045 extent_io_tree_panic(tree, err); 1085 extent_io_tree_panic(tree, err);
1086 cache_state(prealloc, cached_state);
1087 prealloc = NULL;
1046 goto out; 1088 goto out;
1047 } 1089 }
1048 state = rb_entry(node, struct extent_state, rb_node); 1090 state = rb_entry(node, struct extent_state, rb_node);
@@ -1135,7 +1177,7 @@ hit_next:
1135 * the later extent. 1177 * the later extent.
1136 */ 1178 */
1137 err = insert_state(tree, prealloc, start, this_end, 1179 err = insert_state(tree, prealloc, start, this_end,
1138 &bits); 1180 NULL, NULL, &bits);
1139 if (err) 1181 if (err)
1140 extent_io_tree_panic(tree, err); 1182 extent_io_tree_panic(tree, err);
1141 cache_state(prealloc, cached_state); 1183 cache_state(prealloc, cached_state);
@@ -1984,7 +2026,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
1984 bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 2026 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1985 if (!bio) 2027 if (!bio)
1986 return -EIO; 2028 return -EIO;
1987 bio->bi_size = 0; 2029 bio->bi_iter.bi_size = 0;
1988 map_length = length; 2030 map_length = length;
1989 2031
1990 ret = btrfs_map_block(fs_info, WRITE, logical, 2032 ret = btrfs_map_block(fs_info, WRITE, logical,
@@ -1995,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
1995 } 2037 }
1996 BUG_ON(mirror_num != bbio->mirror_num); 2038 BUG_ON(mirror_num != bbio->mirror_num);
1997 sector = bbio->stripes[mirror_num-1].physical >> 9; 2039 sector = bbio->stripes[mirror_num-1].physical >> 9;
1998 bio->bi_sector = sector; 2040 bio->bi_iter.bi_sector = sector;
1999 dev = bbio->stripes[mirror_num-1].dev; 2041 dev = bbio->stripes[mirror_num-1].dev;
2000 kfree(bbio); 2042 kfree(bbio);
2001 if (!dev || !dev->bdev || !dev->writeable) { 2043 if (!dev || !dev->bdev || !dev->writeable) {
@@ -2012,9 +2054,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
2012 return -EIO; 2054 return -EIO;
2013 } 2055 }
2014 2056
2015 printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " 2057 printk_ratelimited_in_rcu(KERN_INFO
2016 "(dev %s sector %llu)\n", page->mapping->host->i_ino, 2058 "BTRFS: read error corrected: ino %lu off %llu "
2017 start, rcu_str_deref(dev->name), sector); 2059 "(dev %s sector %llu)\n", page->mapping->host->i_ino,
2060 start, rcu_str_deref(dev->name), sector);
2018 2061
2019 bio_put(bio); 2062 bio_put(bio);
2020 return 0; 2063 return 0;
@@ -2156,7 +2199,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2156 return -EIO; 2199 return -EIO;
2157 } 2200 }
2158 2201
2159 if (em->start > start || em->start + em->len < start) { 2202 if (em->start > start || em->start + em->len <= start) {
2160 free_extent_map(em); 2203 free_extent_map(em);
2161 em = NULL; 2204 em = NULL;
2162 } 2205 }
@@ -2268,9 +2311,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
2268 return -EIO; 2311 return -EIO;
2269 } 2312 }
2270 bio->bi_end_io = failed_bio->bi_end_io; 2313 bio->bi_end_io = failed_bio->bi_end_io;
2271 bio->bi_sector = failrec->logical >> 9; 2314 bio->bi_iter.bi_sector = failrec->logical >> 9;
2272 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 2315 bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
2273 bio->bi_size = 0; 2316 bio->bi_iter.bi_size = 0;
2274 2317
2275 btrfs_failed_bio = btrfs_io_bio(failed_bio); 2318 btrfs_failed_bio = btrfs_io_bio(failed_bio);
2276 if (btrfs_failed_bio->csum) { 2319 if (btrfs_failed_bio->csum) {
@@ -2332,37 +2375,39 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
2332 */ 2375 */
2333static void end_bio_extent_writepage(struct bio *bio, int err) 2376static void end_bio_extent_writepage(struct bio *bio, int err)
2334{ 2377{
2335 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 2378 struct bio_vec *bvec;
2336 struct extent_io_tree *tree;
2337 u64 start; 2379 u64 start;
2338 u64 end; 2380 u64 end;
2381 int i;
2339 2382
2340 do { 2383 bio_for_each_segment_all(bvec, bio, i) {
2341 struct page *page = bvec->bv_page; 2384 struct page *page = bvec->bv_page;
2342 tree = &BTRFS_I(page->mapping->host)->io_tree;
2343 2385
2344 /* We always issue full-page reads, but if some block 2386 /* We always issue full-page reads, but if some block
2345 * in a page fails to read, blk_update_request() will 2387 * in a page fails to read, blk_update_request() will
2346 * advance bv_offset and adjust bv_len to compensate. 2388 * advance bv_offset and adjust bv_len to compensate.
2347 * Print a warning for nonzero offsets, and an error 2389 * Print a warning for nonzero offsets, and an error
2348 * if they don't add up to a full page. */ 2390 * if they don't add up to a full page. */
2349 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) 2391 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2350 printk("%s page write in btrfs with offset %u and length %u\n", 2392 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2351 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE 2393 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2352 ? KERN_ERR "partial" : KERN_INFO "incomplete", 2394 "partial page write in btrfs with offset %u and length %u",
2353 bvec->bv_offset, bvec->bv_len); 2395 bvec->bv_offset, bvec->bv_len);
2396 else
2397 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2398 "incomplete page write in btrfs with offset %u and "
2399 "length %u",
2400 bvec->bv_offset, bvec->bv_len);
2401 }
2354 2402
2355 start = page_offset(page); 2403 start = page_offset(page);
2356 end = start + bvec->bv_offset + bvec->bv_len - 1; 2404 end = start + bvec->bv_offset + bvec->bv_len - 1;
2357 2405
2358 if (--bvec >= bio->bi_io_vec)
2359 prefetchw(&bvec->bv_page->flags);
2360
2361 if (end_extent_writepage(page, err, start, end)) 2406 if (end_extent_writepage(page, err, start, end))
2362 continue; 2407 continue;
2363 2408
2364 end_page_writeback(page); 2409 end_page_writeback(page);
2365 } while (bvec >= bio->bi_io_vec); 2410 }
2366 2411
2367 bio_put(bio); 2412 bio_put(bio);
2368} 2413}
@@ -2392,9 +2437,8 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
2392 */ 2437 */
2393static void end_bio_extent_readpage(struct bio *bio, int err) 2438static void end_bio_extent_readpage(struct bio *bio, int err)
2394{ 2439{
2440 struct bio_vec *bvec;
2395 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 2441 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
2396 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
2397 struct bio_vec *bvec = bio->bi_io_vec;
2398 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 2442 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
2399 struct extent_io_tree *tree; 2443 struct extent_io_tree *tree;
2400 u64 offset = 0; 2444 u64 offset = 0;
@@ -2405,16 +2449,17 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2405 u64 extent_len = 0; 2449 u64 extent_len = 0;
2406 int mirror; 2450 int mirror;
2407 int ret; 2451 int ret;
2452 int i;
2408 2453
2409 if (err) 2454 if (err)
2410 uptodate = 0; 2455 uptodate = 0;
2411 2456
2412 do { 2457 bio_for_each_segment_all(bvec, bio, i) {
2413 struct page *page = bvec->bv_page; 2458 struct page *page = bvec->bv_page;
2414 struct inode *inode = page->mapping->host; 2459 struct inode *inode = page->mapping->host;
2415 2460
2416 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " 2461 pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
2417 "mirror=%lu\n", (u64)bio->bi_sector, err, 2462 "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err,
2418 io_bio->mirror_num); 2463 io_bio->mirror_num);
2419 tree = &BTRFS_I(inode)->io_tree; 2464 tree = &BTRFS_I(inode)->io_tree;
2420 2465
@@ -2423,19 +2468,22 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2423 * advance bv_offset and adjust bv_len to compensate. 2468 * advance bv_offset and adjust bv_len to compensate.
2424 * Print a warning for nonzero offsets, and an error 2469 * Print a warning for nonzero offsets, and an error
2425 * if they don't add up to a full page. */ 2470 * if they don't add up to a full page. */
2426 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) 2471 if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
2427 printk("%s page read in btrfs with offset %u and length %u\n", 2472 if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
2428 bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE 2473 btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
2429 ? KERN_ERR "partial" : KERN_INFO "incomplete", 2474 "partial page read in btrfs with offset %u and length %u",
2430 bvec->bv_offset, bvec->bv_len); 2475 bvec->bv_offset, bvec->bv_len);
2476 else
2477 btrfs_info(BTRFS_I(page->mapping->host)->root->fs_info,
2478 "incomplete page read in btrfs with offset %u and "
2479 "length %u",
2480 bvec->bv_offset, bvec->bv_len);
2481 }
2431 2482
2432 start = page_offset(page); 2483 start = page_offset(page);
2433 end = start + bvec->bv_offset + bvec->bv_len - 1; 2484 end = start + bvec->bv_offset + bvec->bv_len - 1;
2434 len = bvec->bv_len; 2485 len = bvec->bv_len;
2435 2486
2436 if (++bvec <= bvec_end)
2437 prefetchw(&bvec->bv_page->flags);
2438
2439 mirror = io_bio->mirror_num; 2487 mirror = io_bio->mirror_num;
2440 if (likely(uptodate && tree->ops && 2488 if (likely(uptodate && tree->ops &&
2441 tree->ops->readpage_end_io_hook)) { 2489 tree->ops->readpage_end_io_hook)) {
@@ -2516,7 +2564,7 @@ readpage_ok:
2516 extent_start = start; 2564 extent_start = start;
2517 extent_len = end + 1 - start; 2565 extent_len = end + 1 - start;
2518 } 2566 }
2519 } while (bvec <= bvec_end); 2567 }
2520 2568
2521 if (extent_len) 2569 if (extent_len)
2522 endio_readpage_release_extent(tree, extent_start, extent_len, 2570 endio_readpage_release_extent(tree, extent_start, extent_len,
@@ -2547,9 +2595,8 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
2547 } 2595 }
2548 2596
2549 if (bio) { 2597 if (bio) {
2550 bio->bi_size = 0;
2551 bio->bi_bdev = bdev; 2598 bio->bi_bdev = bdev;
2552 bio->bi_sector = first_sector; 2599 bio->bi_iter.bi_sector = first_sector;
2553 btrfs_bio = btrfs_io_bio(bio); 2600 btrfs_bio = btrfs_io_bio(bio);
2554 btrfs_bio->csum = NULL; 2601 btrfs_bio->csum = NULL;
2555 btrfs_bio->csum_allocated = NULL; 2602 btrfs_bio->csum_allocated = NULL;
@@ -2643,7 +2690,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
2643 if (bio_ret && *bio_ret) { 2690 if (bio_ret && *bio_ret) {
2644 bio = *bio_ret; 2691 bio = *bio_ret;
2645 if (old_compressed) 2692 if (old_compressed)
2646 contig = bio->bi_sector == sector; 2693 contig = bio->bi_iter.bi_sector == sector;
2647 else 2694 else
2648 contig = bio_end_sector(bio) == sector; 2695 contig = bio_end_sector(bio) == sector;
2649 2696
@@ -3287,8 +3334,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
3287 3334
3288 set_range_writeback(tree, cur, cur + iosize - 1); 3335 set_range_writeback(tree, cur, cur + iosize - 1);
3289 if (!PageWriteback(page)) { 3336 if (!PageWriteback(page)) {
3290 printk(KERN_ERR "btrfs warning page %lu not " 3337 btrfs_err(BTRFS_I(inode)->root->fs_info,
3291 "writeback, cur %llu end %llu\n", 3338 "page %lu not writeback, cur %llu end %llu",
3292 page->index, cur, end); 3339 page->index, cur, end);
3293 } 3340 }
3294 3341
@@ -3410,20 +3457,18 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb)
3410 3457
3411static void end_bio_extent_buffer_writepage(struct bio *bio, int err) 3458static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
3412{ 3459{
3413 int uptodate = err == 0; 3460 struct bio_vec *bvec;
3414 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
3415 struct extent_buffer *eb; 3461 struct extent_buffer *eb;
3416 int done; 3462 int i, done;
3417 3463
3418 do { 3464 bio_for_each_segment_all(bvec, bio, i) {
3419 struct page *page = bvec->bv_page; 3465 struct page *page = bvec->bv_page;
3420 3466
3421 bvec--;
3422 eb = (struct extent_buffer *)page->private; 3467 eb = (struct extent_buffer *)page->private;
3423 BUG_ON(!eb); 3468 BUG_ON(!eb);
3424 done = atomic_dec_and_test(&eb->io_pages); 3469 done = atomic_dec_and_test(&eb->io_pages);
3425 3470
3426 if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { 3471 if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
3427 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); 3472 set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
3428 ClearPageUptodate(page); 3473 ClearPageUptodate(page);
3429 SetPageError(page); 3474 SetPageError(page);
@@ -3435,10 +3480,9 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
3435 continue; 3480 continue;
3436 3481
3437 end_extent_buffer_writeback(eb); 3482 end_extent_buffer_writeback(eb);
3438 } while (bvec >= bio->bi_io_vec); 3483 }
3439 3484
3440 bio_put(bio); 3485 bio_put(bio);
3441
3442} 3486}
3443 3487
3444static int write_one_eb(struct extent_buffer *eb, 3488static int write_one_eb(struct extent_buffer *eb,
@@ -3447,6 +3491,7 @@ static int write_one_eb(struct extent_buffer *eb,
3447 struct extent_page_data *epd) 3491 struct extent_page_data *epd)
3448{ 3492{
3449 struct block_device *bdev = fs_info->fs_devices->latest_bdev; 3493 struct block_device *bdev = fs_info->fs_devices->latest_bdev;
3494 struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
3450 u64 offset = eb->start; 3495 u64 offset = eb->start;
3451 unsigned long i, num_pages; 3496 unsigned long i, num_pages;
3452 unsigned long bio_flags = 0; 3497 unsigned long bio_flags = 0;
@@ -3464,7 +3509,7 @@ static int write_one_eb(struct extent_buffer *eb,
3464 3509
3465 clear_page_dirty_for_io(p); 3510 clear_page_dirty_for_io(p);
3466 set_page_writeback(p); 3511 set_page_writeback(p);
3467 ret = submit_extent_page(rw, eb->tree, p, offset >> 9, 3512 ret = submit_extent_page(rw, tree, p, offset >> 9,
3468 PAGE_CACHE_SIZE, 0, bdev, &epd->bio, 3513 PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
3469 -1, end_bio_extent_buffer_writepage, 3514 -1, end_bio_extent_buffer_writepage,
3470 0, epd->bio_flags, bio_flags); 3515 0, epd->bio_flags, bio_flags);
@@ -4082,12 +4127,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4082 struct extent_map *em = NULL; 4127 struct extent_map *em = NULL;
4083 struct extent_state *cached_state = NULL; 4128 struct extent_state *cached_state = NULL;
4084 struct btrfs_path *path; 4129 struct btrfs_path *path;
4085 struct btrfs_file_extent_item *item;
4086 int end = 0; 4130 int end = 0;
4087 u64 em_start = 0; 4131 u64 em_start = 0;
4088 u64 em_len = 0; 4132 u64 em_len = 0;
4089 u64 em_end = 0; 4133 u64 em_end = 0;
4090 unsigned long emflags;
4091 4134
4092 if (len == 0) 4135 if (len == 0)
4093 return -EINVAL; 4136 return -EINVAL;
@@ -4112,8 +4155,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4112 } 4155 }
4113 WARN_ON(!ret); 4156 WARN_ON(!ret);
4114 path->slots[0]--; 4157 path->slots[0]--;
4115 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
4116 struct btrfs_file_extent_item);
4117 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 4158 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
4118 found_type = btrfs_key_type(&found_key); 4159 found_type = btrfs_key_type(&found_key);
4119 4160
@@ -4181,7 +4222,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4181 offset_in_extent = em_start - em->start; 4222 offset_in_extent = em_start - em->start;
4182 em_end = extent_map_end(em); 4223 em_end = extent_map_end(em);
4183 em_len = em_end - em_start; 4224 em_len = em_end - em_start;
4184 emflags = em->flags;
4185 disko = 0; 4225 disko = 0;
4186 flags = 0; 4226 flags = 0;
4187 4227
@@ -4333,10 +4373,9 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
4333 __free_extent_buffer(eb); 4373 __free_extent_buffer(eb);
4334} 4374}
4335 4375
4336static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 4376static struct extent_buffer *
4337 u64 start, 4377__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
4338 unsigned long len, 4378 unsigned long len, gfp_t mask)
4339 gfp_t mask)
4340{ 4379{
4341 struct extent_buffer *eb = NULL; 4380 struct extent_buffer *eb = NULL;
4342 4381
@@ -4345,7 +4384,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
4345 return NULL; 4384 return NULL;
4346 eb->start = start; 4385 eb->start = start;
4347 eb->len = len; 4386 eb->len = len;
4348 eb->tree = tree; 4387 eb->fs_info = fs_info;
4349 eb->bflags = 0; 4388 eb->bflags = 0;
4350 rwlock_init(&eb->lock); 4389 rwlock_init(&eb->lock);
4351 atomic_set(&eb->write_locks, 0); 4390 atomic_set(&eb->write_locks, 0);
@@ -4477,13 +4516,14 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
4477 } 4516 }
4478} 4517}
4479 4518
4480struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 4519struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
4481 u64 start) 4520 u64 start)
4482{ 4521{
4483 struct extent_buffer *eb; 4522 struct extent_buffer *eb;
4484 4523
4485 rcu_read_lock(); 4524 rcu_read_lock();
4486 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 4525 eb = radix_tree_lookup(&fs_info->buffer_radix,
4526 start >> PAGE_CACHE_SHIFT);
4487 if (eb && atomic_inc_not_zero(&eb->refs)) { 4527 if (eb && atomic_inc_not_zero(&eb->refs)) {
4488 rcu_read_unlock(); 4528 rcu_read_unlock();
4489 mark_extent_buffer_accessed(eb); 4529 mark_extent_buffer_accessed(eb);
@@ -4494,7 +4534,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
4494 return NULL; 4534 return NULL;
4495} 4535}
4496 4536
4497struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 4537struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
4498 u64 start, unsigned long len) 4538 u64 start, unsigned long len)
4499{ 4539{
4500 unsigned long num_pages = num_extent_pages(start, len); 4540 unsigned long num_pages = num_extent_pages(start, len);
@@ -4503,16 +4543,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
4503 struct extent_buffer *eb; 4543 struct extent_buffer *eb;
4504 struct extent_buffer *exists = NULL; 4544 struct extent_buffer *exists = NULL;
4505 struct page *p; 4545 struct page *p;
4506 struct address_space *mapping = tree->mapping; 4546 struct address_space *mapping = fs_info->btree_inode->i_mapping;
4507 int uptodate = 1; 4547 int uptodate = 1;
4508 int ret; 4548 int ret;
4509 4549
4510 4550 eb = find_extent_buffer(fs_info, start);
4511 eb = find_extent_buffer(tree, start);
4512 if (eb) 4551 if (eb)
4513 return eb; 4552 return eb;
4514 4553
4515 eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS); 4554 eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS);
4516 if (!eb) 4555 if (!eb)
4517 return NULL; 4556 return NULL;
4518 4557
@@ -4567,12 +4606,13 @@ again:
4567 if (ret) 4606 if (ret)
4568 goto free_eb; 4607 goto free_eb;
4569 4608
4570 spin_lock(&tree->buffer_lock); 4609 spin_lock(&fs_info->buffer_lock);
4571 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb); 4610 ret = radix_tree_insert(&fs_info->buffer_radix,
4572 spin_unlock(&tree->buffer_lock); 4611 start >> PAGE_CACHE_SHIFT, eb);
4612 spin_unlock(&fs_info->buffer_lock);
4573 radix_tree_preload_end(); 4613 radix_tree_preload_end();
4574 if (ret == -EEXIST) { 4614 if (ret == -EEXIST) {
4575 exists = find_extent_buffer(tree, start); 4615 exists = find_extent_buffer(fs_info, start);
4576 if (exists) 4616 if (exists)
4577 goto free_eb; 4617 goto free_eb;
4578 else 4618 else
@@ -4580,6 +4620,7 @@ again:
4580 } 4620 }
4581 /* add one reference for the tree */ 4621 /* add one reference for the tree */
4582 check_buffer_tree_ref(eb); 4622 check_buffer_tree_ref(eb);
4623 set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
4583 4624
4584 /* 4625 /*
4585 * there is a race where release page may have 4626 * there is a race where release page may have
@@ -4623,17 +4664,17 @@ static int release_extent_buffer(struct extent_buffer *eb)
4623{ 4664{
4624 WARN_ON(atomic_read(&eb->refs) == 0); 4665 WARN_ON(atomic_read(&eb->refs) == 0);
4625 if (atomic_dec_and_test(&eb->refs)) { 4666 if (atomic_dec_and_test(&eb->refs)) {
4626 if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) { 4667 if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) {
4627 spin_unlock(&eb->refs_lock); 4668 struct btrfs_fs_info *fs_info = eb->fs_info;
4628 } else {
4629 struct extent_io_tree *tree = eb->tree;
4630 4669
4631 spin_unlock(&eb->refs_lock); 4670 spin_unlock(&eb->refs_lock);
4632 4671
4633 spin_lock(&tree->buffer_lock); 4672 spin_lock(&fs_info->buffer_lock);
4634 radix_tree_delete(&tree->buffer, 4673 radix_tree_delete(&fs_info->buffer_radix,
4635 eb->start >> PAGE_CACHE_SHIFT); 4674 eb->start >> PAGE_CACHE_SHIFT);
4636 spin_unlock(&tree->buffer_lock); 4675 spin_unlock(&fs_info->buffer_lock);
4676 } else {
4677 spin_unlock(&eb->refs_lock);
4637 } 4678 }
4638 4679
4639 /* Should be safe to release our pages at this point */ 4680 /* Should be safe to release our pages at this point */
@@ -5112,12 +5153,12 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5112 unsigned long src_i; 5153 unsigned long src_i;
5113 5154
5114 if (src_offset + len > dst->len) { 5155 if (src_offset + len > dst->len) {
5115 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 5156 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
5116 "len %lu dst len %lu\n", src_offset, len, dst->len); 5157 "len %lu dst len %lu\n", src_offset, len, dst->len);
5117 BUG_ON(1); 5158 BUG_ON(1);
5118 } 5159 }
5119 if (dst_offset + len > dst->len) { 5160 if (dst_offset + len > dst->len) {
5120 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 5161 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
5121 "len %lu dst len %lu\n", dst_offset, len, dst->len); 5162 "len %lu dst len %lu\n", dst_offset, len, dst->len);
5122 BUG_ON(1); 5163 BUG_ON(1);
5123 } 5164 }
@@ -5159,12 +5200,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
5159 unsigned long src_i; 5200 unsigned long src_i;
5160 5201
5161 if (src_offset + len > dst->len) { 5202 if (src_offset + len > dst->len) {
5162 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 5203 printk(KERN_ERR "BTRFS: memmove bogus src_offset %lu move "
5163 "len %lu len %lu\n", src_offset, len, dst->len); 5204 "len %lu len %lu\n", src_offset, len, dst->len);
5164 BUG_ON(1); 5205 BUG_ON(1);
5165 } 5206 }
5166 if (dst_offset + len > dst->len) { 5207 if (dst_offset + len > dst->len) {
5167 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 5208 printk(KERN_ERR "BTRFS: memmove bogus dst_offset %lu move "
5168 "len %lu len %lu\n", dst_offset, len, dst->len); 5209 "len %lu len %lu\n", dst_offset, len, dst->len);
5169 BUG_ON(1); 5210 BUG_ON(1);
5170 } 5211 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 19620c58f096..58b27e5ab521 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -43,6 +43,7 @@
43#define EXTENT_BUFFER_WRITEBACK 7 43#define EXTENT_BUFFER_WRITEBACK 7
44#define EXTENT_BUFFER_IOERR 8 44#define EXTENT_BUFFER_IOERR 8
45#define EXTENT_BUFFER_DUMMY 9 45#define EXTENT_BUFFER_DUMMY 9
46#define EXTENT_BUFFER_IN_TREE 10
46 47
47/* these are flags for extent_clear_unlock_delalloc */ 48/* these are flags for extent_clear_unlock_delalloc */
48#define PAGE_UNLOCK (1 << 0) 49#define PAGE_UNLOCK (1 << 0)
@@ -94,12 +95,10 @@ struct extent_io_ops {
94 95
95struct extent_io_tree { 96struct extent_io_tree {
96 struct rb_root state; 97 struct rb_root state;
97 struct radix_tree_root buffer;
98 struct address_space *mapping; 98 struct address_space *mapping;
99 u64 dirty_bytes; 99 u64 dirty_bytes;
100 int track_uptodate; 100 int track_uptodate;
101 spinlock_t lock; 101 spinlock_t lock;
102 spinlock_t buffer_lock;
103 struct extent_io_ops *ops; 102 struct extent_io_ops *ops;
104}; 103};
105 104
@@ -130,7 +129,7 @@ struct extent_buffer {
130 unsigned long map_start; 129 unsigned long map_start;
131 unsigned long map_len; 130 unsigned long map_len;
132 unsigned long bflags; 131 unsigned long bflags;
133 struct extent_io_tree *tree; 132 struct btrfs_fs_info *fs_info;
134 spinlock_t refs_lock; 133 spinlock_t refs_lock;
135 atomic_t refs; 134 atomic_t refs;
136 atomic_t io_pages; 135 atomic_t io_pages;
@@ -266,11 +265,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
266int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); 265int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
267void set_page_extent_mapped(struct page *page); 266void set_page_extent_mapped(struct page *page);
268 267
269struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 268struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
270 u64 start, unsigned long len); 269 u64 start, unsigned long len);
271struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); 270struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
272struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); 271struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
273struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 272struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
274 u64 start); 273 u64 start);
275void free_extent_buffer(struct extent_buffer *eb); 274void free_extent_buffer(struct extent_buffer *eb);
276void free_extent_buffer_stale(struct extent_buffer *eb); 275void free_extent_buffer_stale(struct extent_buffer *eb);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a4a7a1a8da95..996ad56b57db 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -79,12 +79,21 @@ void free_extent_map(struct extent_map *em)
79 } 79 }
80} 80}
81 81
82static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 82/* simple helper to do math around the end of an extent, handling wrap */
83 struct rb_node *node) 83static u64 range_end(u64 start, u64 len)
84{
85 if (start + len < start)
86 return (u64)-1;
87 return start + len;
88}
89
90static int tree_insert(struct rb_root *root, struct extent_map *em)
84{ 91{
85 struct rb_node **p = &root->rb_node; 92 struct rb_node **p = &root->rb_node;
86 struct rb_node *parent = NULL; 93 struct rb_node *parent = NULL;
87 struct extent_map *entry; 94 struct extent_map *entry = NULL;
95 struct rb_node *orig_parent = NULL;
96 u64 end = range_end(em->start, em->len);
88 97
89 while (*p) { 98 while (*p) {
90 parent = *p; 99 parent = *p;
@@ -92,19 +101,37 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
92 101
93 WARN_ON(!entry->in_tree); 102 WARN_ON(!entry->in_tree);
94 103
95 if (offset < entry->start) 104 if (em->start < entry->start)
96 p = &(*p)->rb_left; 105 p = &(*p)->rb_left;
97 else if (offset >= extent_map_end(entry)) 106 else if (em->start >= extent_map_end(entry))
98 p = &(*p)->rb_right; 107 p = &(*p)->rb_right;
99 else 108 else
100 return parent; 109 return -EEXIST;
101 } 110 }
102 111
103 entry = rb_entry(node, struct extent_map, rb_node); 112 orig_parent = parent;
104 entry->in_tree = 1; 113 while (parent && em->start >= extent_map_end(entry)) {
105 rb_link_node(node, parent, p); 114 parent = rb_next(parent);
106 rb_insert_color(node, root); 115 entry = rb_entry(parent, struct extent_map, rb_node);
107 return NULL; 116 }
117 if (parent)
118 if (end > entry->start && em->start < extent_map_end(entry))
119 return -EEXIST;
120
121 parent = orig_parent;
122 entry = rb_entry(parent, struct extent_map, rb_node);
123 while (parent && em->start < entry->start) {
124 parent = rb_prev(parent);
125 entry = rb_entry(parent, struct extent_map, rb_node);
126 }
127 if (parent)
128 if (end > entry->start && em->start < extent_map_end(entry))
129 return -EEXIST;
130
131 em->in_tree = 1;
132 rb_link_node(&em->rb_node, orig_parent, p);
133 rb_insert_color(&em->rb_node, root);
134 return 0;
108} 135}
109 136
110/* 137/*
@@ -228,7 +255,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
228 merge = rb_entry(rb, struct extent_map, rb_node); 255 merge = rb_entry(rb, struct extent_map, rb_node);
229 if (rb && mergable_maps(em, merge)) { 256 if (rb && mergable_maps(em, merge)) {
230 em->len += merge->len; 257 em->len += merge->len;
231 em->block_len += merge->len; 258 em->block_len += merge->block_len;
232 rb_erase(&merge->rb_node, &tree->map); 259 rb_erase(&merge->rb_node, &tree->map);
233 merge->in_tree = 0; 260 merge->in_tree = 0;
234 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; 261 em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
@@ -310,20 +337,11 @@ int add_extent_mapping(struct extent_map_tree *tree,
310 struct extent_map *em, int modified) 337 struct extent_map *em, int modified)
311{ 338{
312 int ret = 0; 339 int ret = 0;
313 struct rb_node *rb;
314 struct extent_map *exist;
315 340
316 exist = lookup_extent_mapping(tree, em->start, em->len); 341 ret = tree_insert(&tree->map, em);
317 if (exist) { 342 if (ret)
318 free_extent_map(exist);
319 ret = -EEXIST;
320 goto out;
321 }
322 rb = tree_insert(&tree->map, em->start, &em->rb_node);
323 if (rb) {
324 ret = -EEXIST;
325 goto out; 343 goto out;
326 } 344
327 atomic_inc(&em->refs); 345 atomic_inc(&em->refs);
328 346
329 em->mod_start = em->start; 347 em->mod_start = em->start;
@@ -337,14 +355,6 @@ out:
337 return ret; 355 return ret;
338} 356}
339 357
340/* simple helper to do math around the end of an extent, handling wrap */
341static u64 range_end(u64 start, u64 len)
342{
343 if (start + len < start)
344 return (u64)-1;
345 return start + len;
346}
347
348static struct extent_map * 358static struct extent_map *
349__lookup_extent_mapping(struct extent_map_tree *tree, 359__lookup_extent_mapping(struct extent_map_tree *tree,
350 u64 start, u64 len, int strict) 360 u64 start, u64 len, int strict)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 6f3848860283..127555b29f58 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -182,7 +182,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
182 if (!path) 182 if (!path)
183 return -ENOMEM; 183 return -ENOMEM;
184 184
185 nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits; 185 nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
186 if (!dst) { 186 if (!dst) {
187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { 187 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
188 btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size, 188 btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
@@ -201,7 +201,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
201 csum = (u8 *)dst; 201 csum = (u8 *)dst;
202 } 202 }
203 203
204 if (bio->bi_size > PAGE_CACHE_SIZE * 8) 204 if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
205 path->reada = 2; 205 path->reada = 2;
206 206
207 WARN_ON(bio->bi_vcnt <= 0); 207 WARN_ON(bio->bi_vcnt <= 0);
@@ -217,7 +217,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
217 path->skip_locking = 1; 217 path->skip_locking = 1;
218 } 218 }
219 219
220 disk_bytenr = (u64)bio->bi_sector << 9; 220 disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
221 if (dio) 221 if (dio)
222 offset = logical_offset; 222 offset = logical_offset;
223 while (bio_index < bio->bi_vcnt) { 223 while (bio_index < bio->bi_vcnt) {
@@ -246,8 +246,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
246 offset + bvec->bv_len - 1, 246 offset + bvec->bv_len - 1,
247 EXTENT_NODATASUM, GFP_NOFS); 247 EXTENT_NODATASUM, GFP_NOFS);
248 } else { 248 } else {
249 printk(KERN_INFO "btrfs no csum found " 249 btrfs_info(BTRFS_I(inode)->root->fs_info,
250 "for inode %llu start %llu\n", 250 "no csum found for inode %llu start %llu",
251 btrfs_ino(inode), offset); 251 btrfs_ino(inode), offset);
252 } 252 }
253 item = NULL; 253 item = NULL;
@@ -302,7 +302,7 @@ int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
302 struct btrfs_dio_private *dip, struct bio *bio, 302 struct btrfs_dio_private *dip, struct bio *bio,
303 u64 offset) 303 u64 offset)
304{ 304{
305 int len = (bio->bi_sector << 9) - dip->disk_bytenr; 305 int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
306 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 306 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
307 int ret; 307 int ret;
308 308
@@ -447,11 +447,12 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
447 u64 offset; 447 u64 offset;
448 448
449 WARN_ON(bio->bi_vcnt <= 0); 449 WARN_ON(bio->bi_vcnt <= 0);
450 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); 450 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_iter.bi_size),
451 GFP_NOFS);
451 if (!sums) 452 if (!sums)
452 return -ENOMEM; 453 return -ENOMEM;
453 454
454 sums->len = bio->bi_size; 455 sums->len = bio->bi_iter.bi_size;
455 INIT_LIST_HEAD(&sums->list); 456 INIT_LIST_HEAD(&sums->list);
456 457
457 if (contig) 458 if (contig)
@@ -461,7 +462,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
461 462
462 ordered = btrfs_lookup_ordered_extent(inode, offset); 463 ordered = btrfs_lookup_ordered_extent(inode, offset);
463 BUG_ON(!ordered); /* Logic error */ 464 BUG_ON(!ordered); /* Logic error */
464 sums->bytenr = (u64)bio->bi_sector << 9; 465 sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
465 index = 0; 466 index = 0;
466 467
467 while (bio_index < bio->bi_vcnt) { 468 while (bio_index < bio->bi_vcnt) {
@@ -476,7 +477,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
476 btrfs_add_ordered_sum(inode, ordered, sums); 477 btrfs_add_ordered_sum(inode, ordered, sums);
477 btrfs_put_ordered_extent(ordered); 478 btrfs_put_ordered_extent(ordered);
478 479
479 bytes_left = bio->bi_size - total_bytes; 480 bytes_left = bio->bi_iter.bi_size - total_bytes;
480 481
481 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left), 482 sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
482 GFP_NOFS); 483 GFP_NOFS);
@@ -484,7 +485,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
484 sums->len = bytes_left; 485 sums->len = bytes_left;
485 ordered = btrfs_lookup_ordered_extent(inode, offset); 486 ordered = btrfs_lookup_ordered_extent(inode, offset);
486 BUG_ON(!ordered); /* Logic error */ 487 BUG_ON(!ordered); /* Logic error */
487 sums->bytenr = ((u64)bio->bi_sector << 9) + 488 sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) +
488 total_bytes; 489 total_bytes;
489 index = 0; 490 index = 0;
490 } 491 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 82d0342763c5..0165b8672f09 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -692,7 +692,10 @@ next:
692int __btrfs_drop_extents(struct btrfs_trans_handle *trans, 692int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
693 struct btrfs_root *root, struct inode *inode, 693 struct btrfs_root *root, struct inode *inode,
694 struct btrfs_path *path, u64 start, u64 end, 694 struct btrfs_path *path, u64 start, u64 end,
695 u64 *drop_end, int drop_cache) 695 u64 *drop_end, int drop_cache,
696 int replace_extent,
697 u32 extent_item_size,
698 int *key_inserted)
696{ 699{
697 struct extent_buffer *leaf; 700 struct extent_buffer *leaf;
698 struct btrfs_file_extent_item *fi; 701 struct btrfs_file_extent_item *fi;
@@ -712,6 +715,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
712 int modify_tree = -1; 715 int modify_tree = -1;
713 int update_refs = (root->ref_cows || root == root->fs_info->tree_root); 716 int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
714 int found = 0; 717 int found = 0;
718 int leafs_visited = 0;
715 719
716 if (drop_cache) 720 if (drop_cache)
717 btrfs_drop_extent_cache(inode, start, end - 1, 0); 721 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -733,6 +737,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
733 path->slots[0]--; 737 path->slots[0]--;
734 } 738 }
735 ret = 0; 739 ret = 0;
740 leafs_visited++;
736next_slot: 741next_slot:
737 leaf = path->nodes[0]; 742 leaf = path->nodes[0];
738 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 743 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -744,6 +749,7 @@ next_slot:
744 ret = 0; 749 ret = 0;
745 break; 750 break;
746 } 751 }
752 leafs_visited++;
747 leaf = path->nodes[0]; 753 leaf = path->nodes[0];
748 recow = 1; 754 recow = 1;
749 } 755 }
@@ -766,7 +772,8 @@ next_slot:
766 btrfs_file_extent_num_bytes(leaf, fi); 772 btrfs_file_extent_num_bytes(leaf, fi);
767 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 773 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
768 extent_end = key.offset + 774 extent_end = key.offset +
769 btrfs_file_extent_inline_len(leaf, fi); 775 btrfs_file_extent_inline_len(leaf,
776 path->slots[0], fi);
770 } else { 777 } else {
771 WARN_ON(1); 778 WARN_ON(1);
772 extent_end = search_start; 779 extent_end = search_start;
@@ -927,14 +934,44 @@ next_slot:
927 } 934 }
928 935
929 if (!ret && del_nr > 0) { 936 if (!ret && del_nr > 0) {
937 /*
938 * Set path->slots[0] to first slot, so that after the delete
939 * if items are move off from our leaf to its immediate left or
940 * right neighbor leafs, we end up with a correct and adjusted
941 * path->slots[0] for our insertion.
942 */
943 path->slots[0] = del_slot;
930 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 944 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
931 if (ret) 945 if (ret)
932 btrfs_abort_transaction(trans, root, ret); 946 btrfs_abort_transaction(trans, root, ret);
947
948 leaf = path->nodes[0];
949 /*
950 * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that
951 * is, its contents got pushed to its neighbors), in which case
952 * it means path->locks[0] == 0
953 */
954 if (!ret && replace_extent && leafs_visited == 1 &&
955 path->locks[0] &&
956 btrfs_leaf_free_space(root, leaf) >=
957 sizeof(struct btrfs_item) + extent_item_size) {
958
959 key.objectid = ino;
960 key.type = BTRFS_EXTENT_DATA_KEY;
961 key.offset = start;
962 setup_items_for_insert(root, path, &key,
963 &extent_item_size,
964 extent_item_size,
965 sizeof(struct btrfs_item) +
966 extent_item_size, 1);
967 *key_inserted = 1;
968 }
933 } 969 }
934 970
971 if (!replace_extent || !(*key_inserted))
972 btrfs_release_path(path);
935 if (drop_end) 973 if (drop_end)
936 *drop_end = found ? min(end, extent_end) : end; 974 *drop_end = found ? min(end, extent_end) : end;
937 btrfs_release_path(path);
938 return ret; 975 return ret;
939} 976}
940 977
@@ -949,7 +986,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
949 if (!path) 986 if (!path)
950 return -ENOMEM; 987 return -ENOMEM;
951 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, 988 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
952 drop_cache); 989 drop_cache, 0, 0, NULL);
953 btrfs_free_path(path); 990 btrfs_free_path(path);
954 return ret; 991 return ret;
955} 992}
@@ -1235,29 +1272,18 @@ static int prepare_uptodate_page(struct page *page, u64 pos,
1235} 1272}
1236 1273
1237/* 1274/*
1238 * this gets pages into the page cache and locks them down, it also properly 1275 * this just gets pages into the page cache and locks them down.
1239 * waits for data=ordered extents to finish before allowing the pages to be
1240 * modified.
1241 */ 1276 */
1242static noinline int prepare_pages(struct btrfs_root *root, struct file *file, 1277static noinline int prepare_pages(struct inode *inode, struct page **pages,
1243 struct page **pages, size_t num_pages, 1278 size_t num_pages, loff_t pos,
1244 loff_t pos, unsigned long first_index, 1279 size_t write_bytes, bool force_uptodate)
1245 size_t write_bytes, bool force_uptodate)
1246{ 1280{
1247 struct extent_state *cached_state = NULL;
1248 int i; 1281 int i;
1249 unsigned long index = pos >> PAGE_CACHE_SHIFT; 1282 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1250 struct inode *inode = file_inode(file);
1251 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1283 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1252 int err = 0; 1284 int err = 0;
1253 int faili = 0; 1285 int faili;
1254 u64 start_pos;
1255 u64 last_pos;
1256
1257 start_pos = pos & ~((u64)root->sectorsize - 1);
1258 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
1259 1286
1260again:
1261 for (i = 0; i < num_pages; i++) { 1287 for (i = 0; i < num_pages; i++) {
1262 pages[i] = find_or_create_page(inode->i_mapping, index + i, 1288 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1263 mask | __GFP_WRITE); 1289 mask | __GFP_WRITE);
@@ -1280,57 +1306,85 @@ again:
1280 } 1306 }
1281 wait_on_page_writeback(pages[i]); 1307 wait_on_page_writeback(pages[i]);
1282 } 1308 }
1283 faili = num_pages - 1; 1309
1284 err = 0; 1310 return 0;
1311fail:
1312 while (faili >= 0) {
1313 unlock_page(pages[faili]);
1314 page_cache_release(pages[faili]);
1315 faili--;
1316 }
1317 return err;
1318
1319}
1320
1321/*
1322 * This function locks the extent and properly waits for data=ordered extents
1323 * to finish before allowing the pages to be modified if need.
1324 *
1325 * The return value:
1326 * 1 - the extent is locked
1327 * 0 - the extent is not locked, and everything is OK
1328 * -EAGAIN - need re-prepare the pages
1329 * the other < 0 number - Something wrong happens
1330 */
1331static noinline int
1332lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
1333 size_t num_pages, loff_t pos,
1334 u64 *lockstart, u64 *lockend,
1335 struct extent_state **cached_state)
1336{
1337 u64 start_pos;
1338 u64 last_pos;
1339 int i;
1340 int ret = 0;
1341
1342 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1343 last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
1344
1285 if (start_pos < inode->i_size) { 1345 if (start_pos < inode->i_size) {
1286 struct btrfs_ordered_extent *ordered; 1346 struct btrfs_ordered_extent *ordered;
1287 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1347 lock_extent_bits(&BTRFS_I(inode)->io_tree,
1288 start_pos, last_pos - 1, 0, &cached_state); 1348 start_pos, last_pos, 0, cached_state);
1289 ordered = btrfs_lookup_first_ordered_extent(inode, 1349 ordered = btrfs_lookup_first_ordered_extent(inode, last_pos);
1290 last_pos - 1);
1291 if (ordered && 1350 if (ordered &&
1292 ordered->file_offset + ordered->len > start_pos && 1351 ordered->file_offset + ordered->len > start_pos &&
1293 ordered->file_offset < last_pos) { 1352 ordered->file_offset <= last_pos) {
1294 btrfs_put_ordered_extent(ordered); 1353 btrfs_put_ordered_extent(ordered);
1295 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1354 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1296 start_pos, last_pos - 1, 1355 start_pos, last_pos,
1297 &cached_state, GFP_NOFS); 1356 cached_state, GFP_NOFS);
1298 for (i = 0; i < num_pages; i++) { 1357 for (i = 0; i < num_pages; i++) {
1299 unlock_page(pages[i]); 1358 unlock_page(pages[i]);
1300 page_cache_release(pages[i]); 1359 page_cache_release(pages[i]);
1301 } 1360 }
1302 err = btrfs_wait_ordered_range(inode, start_pos, 1361 ret = btrfs_wait_ordered_range(inode, start_pos,
1303 last_pos - start_pos); 1362 last_pos - start_pos + 1);
1304 if (err) 1363 if (ret)
1305 goto fail; 1364 return ret;
1306 goto again; 1365 else
1366 return -EAGAIN;
1307 } 1367 }
1308 if (ordered) 1368 if (ordered)
1309 btrfs_put_ordered_extent(ordered); 1369 btrfs_put_ordered_extent(ordered);
1310 1370
1311 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, 1371 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
1312 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1372 last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
1313 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1373 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
1314 0, 0, &cached_state, GFP_NOFS); 1374 0, 0, cached_state, GFP_NOFS);
1315 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1375 *lockstart = start_pos;
1316 start_pos, last_pos - 1, &cached_state, 1376 *lockend = last_pos;
1317 GFP_NOFS); 1377 ret = 1;
1318 } 1378 }
1379
1319 for (i = 0; i < num_pages; i++) { 1380 for (i = 0; i < num_pages; i++) {
1320 if (clear_page_dirty_for_io(pages[i])) 1381 if (clear_page_dirty_for_io(pages[i]))
1321 account_page_redirty(pages[i]); 1382 account_page_redirty(pages[i]);
1322 set_page_extent_mapped(pages[i]); 1383 set_page_extent_mapped(pages[i]);
1323 WARN_ON(!PageLocked(pages[i])); 1384 WARN_ON(!PageLocked(pages[i]));
1324 } 1385 }
1325 return 0;
1326fail:
1327 while (faili >= 0) {
1328 unlock_page(pages[faili]);
1329 page_cache_release(pages[faili]);
1330 faili--;
1331 }
1332 return err;
1333 1386
1387 return ret;
1334} 1388}
1335 1389
1336static noinline int check_can_nocow(struct inode *inode, loff_t pos, 1390static noinline int check_can_nocow(struct inode *inode, loff_t pos,
@@ -1381,13 +1435,17 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1381 struct inode *inode = file_inode(file); 1435 struct inode *inode = file_inode(file);
1382 struct btrfs_root *root = BTRFS_I(inode)->root; 1436 struct btrfs_root *root = BTRFS_I(inode)->root;
1383 struct page **pages = NULL; 1437 struct page **pages = NULL;
1438 struct extent_state *cached_state = NULL;
1384 u64 release_bytes = 0; 1439 u64 release_bytes = 0;
1440 u64 lockstart;
1441 u64 lockend;
1385 unsigned long first_index; 1442 unsigned long first_index;
1386 size_t num_written = 0; 1443 size_t num_written = 0;
1387 int nrptrs; 1444 int nrptrs;
1388 int ret = 0; 1445 int ret = 0;
1389 bool only_release_metadata = false; 1446 bool only_release_metadata = false;
1390 bool force_page_uptodate = false; 1447 bool force_page_uptodate = false;
1448 bool need_unlock;
1391 1449
1392 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1450 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
1393 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 1451 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -1456,18 +1514,31 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1456 } 1514 }
1457 1515
1458 release_bytes = reserve_bytes; 1516 release_bytes = reserve_bytes;
1459 1517 need_unlock = false;
1518again:
1460 /* 1519 /*
1461 * This is going to setup the pages array with the number of 1520 * This is going to setup the pages array with the number of
1462 * pages we want, so we don't really need to worry about the 1521 * pages we want, so we don't really need to worry about the
1463 * contents of pages from loop to loop 1522 * contents of pages from loop to loop
1464 */ 1523 */
1465 ret = prepare_pages(root, file, pages, num_pages, 1524 ret = prepare_pages(inode, pages, num_pages,
1466 pos, first_index, write_bytes, 1525 pos, write_bytes,
1467 force_page_uptodate); 1526 force_page_uptodate);
1468 if (ret) 1527 if (ret)
1469 break; 1528 break;
1470 1529
1530 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
1531 pos, &lockstart, &lockend,
1532 &cached_state);
1533 if (ret < 0) {
1534 if (ret == -EAGAIN)
1535 goto again;
1536 break;
1537 } else if (ret > 0) {
1538 need_unlock = true;
1539 ret = 0;
1540 }
1541
1471 copied = btrfs_copy_from_user(pos, num_pages, 1542 copied = btrfs_copy_from_user(pos, num_pages,
1472 write_bytes, pages, i); 1543 write_bytes, pages, i);
1473 1544
@@ -1512,19 +1583,21 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1512 } 1583 }
1513 1584
1514 release_bytes = dirty_pages << PAGE_CACHE_SHIFT; 1585 release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
1515 if (copied > 0) { 1586
1587 if (copied > 0)
1516 ret = btrfs_dirty_pages(root, inode, pages, 1588 ret = btrfs_dirty_pages(root, inode, pages,
1517 dirty_pages, pos, copied, 1589 dirty_pages, pos, copied,
1518 NULL); 1590 NULL);
1519 if (ret) { 1591 if (need_unlock)
1520 btrfs_drop_pages(pages, num_pages); 1592 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1521 break; 1593 lockstart, lockend, &cached_state,
1522 } 1594 GFP_NOFS);
1595 if (ret) {
1596 btrfs_drop_pages(pages, num_pages);
1597 break;
1523 } 1598 }
1524 1599
1525 release_bytes = 0; 1600 release_bytes = 0;
1526 btrfs_drop_pages(pages, num_pages);
1527
1528 if (only_release_metadata && copied > 0) { 1601 if (only_release_metadata && copied > 0) {
1529 u64 lockstart = round_down(pos, root->sectorsize); 1602 u64 lockstart = round_down(pos, root->sectorsize);
1530 u64 lockend = lockstart + 1603 u64 lockend = lockstart +
@@ -1536,6 +1609,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1536 only_release_metadata = false; 1609 only_release_metadata = false;
1537 } 1610 }
1538 1611
1612 btrfs_drop_pages(pages, num_pages);
1613
1539 cond_resched(); 1614 cond_resched();
1540 1615
1541 balance_dirty_pages_ratelimited(inode->i_mapping); 1616 balance_dirty_pages_ratelimited(inode->i_mapping);
@@ -1857,12 +1932,24 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1857 if (file->private_data) 1932 if (file->private_data)
1858 btrfs_ioctl_trans_end(file); 1933 btrfs_ioctl_trans_end(file);
1859 1934
1935 /*
1936 * We use start here because we will need to wait on the IO to complete
1937 * in btrfs_sync_log, which could require joining a transaction (for
1938 * example checking cross references in the nocow path). If we use join
1939 * here we could get into a situation where we're waiting on IO to
1940 * happen that is blocked on a transaction trying to commit. With start
1941 * we inc the extwriter counter, so we wait for all extwriters to exit
1942 * before we start blocking join'ers. This comment is to keep somebody
1943 * from thinking they are super smart and changing this to
1944 * btrfs_join_transaction *cough*Josef*cough*.
1945 */
1860 trans = btrfs_start_transaction(root, 0); 1946 trans = btrfs_start_transaction(root, 0);
1861 if (IS_ERR(trans)) { 1947 if (IS_ERR(trans)) {
1862 ret = PTR_ERR(trans); 1948 ret = PTR_ERR(trans);
1863 mutex_unlock(&inode->i_mutex); 1949 mutex_unlock(&inode->i_mutex);
1864 goto out; 1950 goto out;
1865 } 1951 }
1952 trans->sync = true;
1866 1953
1867 ret = btrfs_log_dentry_safe(trans, root, dentry); 1954 ret = btrfs_log_dentry_safe(trans, root, dentry);
1868 if (ret < 0) { 1955 if (ret < 0) {
@@ -1963,11 +2050,13 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
1963 struct btrfs_key key; 2050 struct btrfs_key key;
1964 int ret; 2051 int ret;
1965 2052
2053 if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
2054 goto out;
2055
1966 key.objectid = btrfs_ino(inode); 2056 key.objectid = btrfs_ino(inode);
1967 key.type = BTRFS_EXTENT_DATA_KEY; 2057 key.type = BTRFS_EXTENT_DATA_KEY;
1968 key.offset = offset; 2058 key.offset = offset;
1969 2059
1970
1971 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 2060 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1972 if (ret < 0) 2061 if (ret < 0)
1973 return ret; 2062 return ret;
@@ -2064,8 +2153,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2064 u64 drop_end; 2153 u64 drop_end;
2065 int ret = 0; 2154 int ret = 0;
2066 int err = 0; 2155 int err = 0;
2156 int rsv_count;
2067 bool same_page = ((offset >> PAGE_CACHE_SHIFT) == 2157 bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
2068 ((offset + len - 1) >> PAGE_CACHE_SHIFT)); 2158 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
2159 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2069 2160
2070 ret = btrfs_wait_ordered_range(inode, offset, len); 2161 ret = btrfs_wait_ordered_range(inode, offset, len);
2071 if (ret) 2162 if (ret)
@@ -2125,7 +2216,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2125 * we need to try again. 2216 * we need to try again.
2126 */ 2217 */
2127 if ((!ordered || 2218 if ((!ordered ||
2128 (ordered->file_offset + ordered->len < lockstart || 2219 (ordered->file_offset + ordered->len <= lockstart ||
2129 ordered->file_offset > lockend)) && 2220 ordered->file_offset > lockend)) &&
2130 !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, 2221 !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
2131 lockend, EXTENT_UPTODATE, 0, 2222 lockend, EXTENT_UPTODATE, 0,
@@ -2163,9 +2254,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2163 /* 2254 /*
2164 * 1 - update the inode 2255 * 1 - update the inode
2165 * 1 - removing the extents in the range 2256 * 1 - removing the extents in the range
2166 * 1 - adding the hole extent 2257 * 1 - adding the hole extent if no_holes isn't set
2167 */ 2258 */
2168 trans = btrfs_start_transaction(root, 3); 2259 rsv_count = no_holes ? 2 : 3;
2260 trans = btrfs_start_transaction(root, rsv_count);
2169 if (IS_ERR(trans)) { 2261 if (IS_ERR(trans)) {
2170 err = PTR_ERR(trans); 2262 err = PTR_ERR(trans);
2171 goto out_free; 2263 goto out_free;
@@ -2179,7 +2271,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2179 while (cur_offset < lockend) { 2271 while (cur_offset < lockend) {
2180 ret = __btrfs_drop_extents(trans, root, inode, path, 2272 ret = __btrfs_drop_extents(trans, root, inode, path,
2181 cur_offset, lockend + 1, 2273 cur_offset, lockend + 1,
2182 &drop_end, 1); 2274 &drop_end, 1, 0, 0, NULL);
2183 if (ret != -ENOSPC) 2275 if (ret != -ENOSPC)
2184 break; 2276 break;
2185 2277
@@ -2202,7 +2294,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2202 btrfs_end_transaction(trans, root); 2294 btrfs_end_transaction(trans, root);
2203 btrfs_btree_balance_dirty(root); 2295 btrfs_btree_balance_dirty(root);
2204 2296
2205 trans = btrfs_start_transaction(root, 3); 2297 trans = btrfs_start_transaction(root, rsv_count);
2206 if (IS_ERR(trans)) { 2298 if (IS_ERR(trans)) {
2207 ret = PTR_ERR(trans); 2299 ret = PTR_ERR(trans);
2208 trans = NULL; 2300 trans = NULL;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 057be95b1e1e..73f3de7a083c 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -347,8 +347,8 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
347 btrfs_readpage(NULL, page); 347 btrfs_readpage(NULL, page);
348 lock_page(page); 348 lock_page(page);
349 if (!PageUptodate(page)) { 349 if (!PageUptodate(page)) {
350 printk(KERN_ERR "btrfs: error reading free " 350 btrfs_err(BTRFS_I(inode)->root->fs_info,
351 "space cache\n"); 351 "error reading free space cache");
352 io_ctl_drop_pages(io_ctl); 352 io_ctl_drop_pages(io_ctl);
353 return -EIO; 353 return -EIO;
354 } 354 }
@@ -405,7 +405,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
405 405
406 gen = io_ctl->cur; 406 gen = io_ctl->cur;
407 if (le64_to_cpu(*gen) != generation) { 407 if (le64_to_cpu(*gen) != generation) {
408 printk_ratelimited(KERN_ERR "btrfs: space cache generation " 408 printk_ratelimited(KERN_ERR "BTRFS: space cache generation "
409 "(%Lu) does not match inode (%Lu)\n", *gen, 409 "(%Lu) does not match inode (%Lu)\n", *gen,
410 generation); 410 generation);
411 io_ctl_unmap_page(io_ctl); 411 io_ctl_unmap_page(io_ctl);
@@ -463,7 +463,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
463 PAGE_CACHE_SIZE - offset); 463 PAGE_CACHE_SIZE - offset);
464 btrfs_csum_final(crc, (char *)&crc); 464 btrfs_csum_final(crc, (char *)&crc);
465 if (val != crc) { 465 if (val != crc) {
466 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " 466 printk_ratelimited(KERN_ERR "BTRFS: csum mismatch on free "
467 "space cache\n"); 467 "space cache\n");
468 io_ctl_unmap_page(io_ctl); 468 io_ctl_unmap_page(io_ctl);
469 return -EIO; 469 return -EIO;
@@ -1902,7 +1902,7 @@ out:
1902 spin_unlock(&ctl->tree_lock); 1902 spin_unlock(&ctl->tree_lock);
1903 1903
1904 if (ret) { 1904 if (ret) {
1905 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1905 printk(KERN_CRIT "BTRFS: unable to add free space :%d\n", ret);
1906 ASSERT(ret != -EEXIST); 1906 ASSERT(ret != -EEXIST);
1907 } 1907 }
1908 1908
@@ -2011,14 +2011,15 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
2011 info = rb_entry(n, struct btrfs_free_space, offset_index); 2011 info = rb_entry(n, struct btrfs_free_space, offset_index);
2012 if (info->bytes >= bytes && !block_group->ro) 2012 if (info->bytes >= bytes && !block_group->ro)
2013 count++; 2013 count++;
2014 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 2014 btrfs_crit(block_group->fs_info,
2015 info->offset, info->bytes, 2015 "entry offset %llu, bytes %llu, bitmap %s",
2016 info->offset, info->bytes,
2016 (info->bitmap) ? "yes" : "no"); 2017 (info->bitmap) ? "yes" : "no");
2017 } 2018 }
2018 printk(KERN_INFO "block group has cluster?: %s\n", 2019 btrfs_info(block_group->fs_info, "block group has cluster?: %s",
2019 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2020 list_empty(&block_group->cluster_list) ? "no" : "yes");
2020 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 2021 btrfs_info(block_group->fs_info,
2021 "\n", count); 2022 "%d blocks of free space at or bigger than bytes is", count);
2022} 2023}
2023 2024
2024void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2025void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
@@ -2421,7 +2422,6 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2421 struct btrfs_free_space *entry = NULL; 2422 struct btrfs_free_space *entry = NULL;
2422 struct btrfs_free_space *last; 2423 struct btrfs_free_space *last;
2423 struct rb_node *node; 2424 struct rb_node *node;
2424 u64 window_start;
2425 u64 window_free; 2425 u64 window_free;
2426 u64 max_extent; 2426 u64 max_extent;
2427 u64 total_size = 0; 2427 u64 total_size = 0;
@@ -2443,7 +2443,6 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2443 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2443 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2444 } 2444 }
2445 2445
2446 window_start = entry->offset;
2447 window_free = entry->bytes; 2446 window_free = entry->bytes;
2448 max_extent = entry->bytes; 2447 max_extent = entry->bytes;
2449 first = entry; 2448 first = entry;
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
new file mode 100644
index 000000000000..85889aa82c62
--- /dev/null
+++ b/fs/btrfs/hash.c
@@ -0,0 +1,50 @@
1/*
2 * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13
14#include <crypto/hash.h>
15#include <linux/err.h>
16#include "hash.h"
17
18static struct crypto_shash *tfm;
19
20int __init btrfs_hash_init(void)
21{
22 tfm = crypto_alloc_shash("crc32c", 0, 0);
23 if (IS_ERR(tfm))
24 return PTR_ERR(tfm);
25
26 return 0;
27}
28
29void btrfs_hash_exit(void)
30{
31 crypto_free_shash(tfm);
32}
33
34u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length)
35{
36 struct {
37 struct shash_desc shash;
38 char ctx[crypto_shash_descsize(tfm)];
39 } desc;
40 int err;
41
42 desc.shash.tfm = tfm;
43 desc.shash.flags = 0;
44 *(u32 *)desc.ctx = crc;
45
46 err = crypto_shash_update(&desc.shash, address, length);
47 BUG_ON(err);
48
49 return *(u32 *)desc.ctx;
50}
diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h
index 1d982812ab67..118a2316e5d3 100644
--- a/fs/btrfs/hash.h
+++ b/fs/btrfs/hash.h
@@ -19,10 +19,15 @@
19#ifndef __HASH__ 19#ifndef __HASH__
20#define __HASH__ 20#define __HASH__
21 21
22#include <linux/crc32c.h> 22int __init btrfs_hash_init(void);
23
24void btrfs_hash_exit(void);
25
26u32 btrfs_crc32c(u32 crc, const void *address, unsigned int length);
27
23static inline u64 btrfs_name_hash(const char *name, int len) 28static inline u64 btrfs_name_hash(const char *name, int len)
24{ 29{
25 return crc32c((u32)~1, name, len); 30 return btrfs_crc32c((u32)~1, name, len);
26} 31}
27 32
28/* 33/*
@@ -31,7 +36,7 @@ static inline u64 btrfs_name_hash(const char *name, int len)
31static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name, 36static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
32 int len) 37 int len)
33{ 38{
34 return (u64) crc32c(parent_objectid, name, len); 39 return (u64) btrfs_crc32c(parent_objectid, name, len);
35} 40}
36 41
37#endif 42#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index ec82fae07097..2be38df703c9 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -91,32 +91,6 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path, u64 ref_objectid,
91 return 0; 91 return 0;
92} 92}
93 93
94static struct btrfs_inode_ref *
95btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans,
96 struct btrfs_root *root,
97 struct btrfs_path *path,
98 const char *name, int name_len,
99 u64 inode_objectid, u64 ref_objectid, int ins_len,
100 int cow)
101{
102 int ret;
103 struct btrfs_key key;
104 struct btrfs_inode_ref *ref;
105
106 key.objectid = inode_objectid;
107 key.type = BTRFS_INODE_REF_KEY;
108 key.offset = ref_objectid;
109
110 ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
111 if (ret < 0)
112 return ERR_PTR(ret);
113 if (ret > 0)
114 return NULL;
115 if (!find_name_in_backref(path, name, name_len, &ref))
116 return NULL;
117 return ref;
118}
119
120/* Returns NULL if no extref found */ 94/* Returns NULL if no extref found */
121struct btrfs_inode_extref * 95struct btrfs_inode_extref *
122btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans, 96btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
@@ -144,45 +118,6 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
144 return extref; 118 return extref;
145} 119}
146 120
147int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
148 struct btrfs_root *root,
149 struct btrfs_path *path,
150 const char *name, int name_len,
151 u64 inode_objectid, u64 ref_objectid, int mod,
152 u64 *ret_index)
153{
154 struct btrfs_inode_ref *ref;
155 struct btrfs_inode_extref *extref;
156 int ins_len = mod < 0 ? -1 : 0;
157 int cow = mod != 0;
158
159 ref = btrfs_lookup_inode_ref(trans, root, path, name, name_len,
160 inode_objectid, ref_objectid, ins_len,
161 cow);
162 if (IS_ERR(ref))
163 return PTR_ERR(ref);
164
165 if (ref != NULL) {
166 *ret_index = btrfs_inode_ref_index(path->nodes[0], ref);
167 return 0;
168 }
169
170 btrfs_release_path(path);
171
172 extref = btrfs_lookup_inode_extref(trans, root, path, name,
173 name_len, inode_objectid,
174 ref_objectid, ins_len, cow);
175 if (IS_ERR(extref))
176 return PTR_ERR(extref);
177
178 if (extref) {
179 *ret_index = btrfs_inode_extref_index(path->nodes[0], extref);
180 return 0;
181 }
182
183 return -ENOENT;
184}
185
186static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans, 121static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
187 struct btrfs_root *root, 122 struct btrfs_root *root,
188 const char *name, int name_len, 123 const char *name, int name_len,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f1a77449d032..d3d44486290b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -58,9 +58,10 @@
58#include "inode-map.h" 58#include "inode-map.h"
59#include "backref.h" 59#include "backref.h"
60#include "hash.h" 60#include "hash.h"
61#include "props.h"
61 62
62struct btrfs_iget_args { 63struct btrfs_iget_args {
63 u64 ino; 64 struct btrfs_key *location;
64 struct btrfs_root *root; 65 struct btrfs_root *root;
65}; 66};
66 67
@@ -125,13 +126,12 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
125 * no overlapping inline items exist in the btree 126 * no overlapping inline items exist in the btree
126 */ 127 */
127static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, 128static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
129 struct btrfs_path *path, int extent_inserted,
128 struct btrfs_root *root, struct inode *inode, 130 struct btrfs_root *root, struct inode *inode,
129 u64 start, size_t size, size_t compressed_size, 131 u64 start, size_t size, size_t compressed_size,
130 int compress_type, 132 int compress_type,
131 struct page **compressed_pages) 133 struct page **compressed_pages)
132{ 134{
133 struct btrfs_key key;
134 struct btrfs_path *path;
135 struct extent_buffer *leaf; 135 struct extent_buffer *leaf;
136 struct page *page = NULL; 136 struct page *page = NULL;
137 char *kaddr; 137 char *kaddr;
@@ -140,29 +140,29 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
140 int err = 0; 140 int err = 0;
141 int ret; 141 int ret;
142 size_t cur_size = size; 142 size_t cur_size = size;
143 size_t datasize;
144 unsigned long offset; 143 unsigned long offset;
145 144
146 if (compressed_size && compressed_pages) 145 if (compressed_size && compressed_pages)
147 cur_size = compressed_size; 146 cur_size = compressed_size;
148 147
149 path = btrfs_alloc_path(); 148 inode_add_bytes(inode, size);
150 if (!path)
151 return -ENOMEM;
152 149
153 path->leave_spinning = 1; 150 if (!extent_inserted) {
151 struct btrfs_key key;
152 size_t datasize;
154 153
155 key.objectid = btrfs_ino(inode); 154 key.objectid = btrfs_ino(inode);
156 key.offset = start; 155 key.offset = start;
157 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 156 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
158 datasize = btrfs_file_extent_calc_inline_size(cur_size);
159 157
160 inode_add_bytes(inode, size); 158 datasize = btrfs_file_extent_calc_inline_size(cur_size);
161 ret = btrfs_insert_empty_item(trans, root, path, &key, 159 path->leave_spinning = 1;
162 datasize); 160 ret = btrfs_insert_empty_item(trans, root, path, &key,
163 if (ret) { 161 datasize);
164 err = ret; 162 if (ret) {
165 goto fail; 163 err = ret;
164 goto fail;
165 }
166 } 166 }
167 leaf = path->nodes[0]; 167 leaf = path->nodes[0];
168 ei = btrfs_item_ptr(leaf, path->slots[0], 168 ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -203,7 +203,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
203 page_cache_release(page); 203 page_cache_release(page);
204 } 204 }
205 btrfs_mark_buffer_dirty(leaf); 205 btrfs_mark_buffer_dirty(leaf);
206 btrfs_free_path(path); 206 btrfs_release_path(path);
207 207
208 /* 208 /*
209 * we're an inline extent, so nobody can 209 * we're an inline extent, so nobody can
@@ -219,7 +219,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
219 219
220 return ret; 220 return ret;
221fail: 221fail:
222 btrfs_free_path(path);
223 return err; 222 return err;
224} 223}
225 224
@@ -242,6 +241,9 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
242 u64 aligned_end = ALIGN(end, root->sectorsize); 241 u64 aligned_end = ALIGN(end, root->sectorsize);
243 u64 data_len = inline_len; 242 u64 data_len = inline_len;
244 int ret; 243 int ret;
244 struct btrfs_path *path;
245 int extent_inserted = 0;
246 u32 extent_item_size;
245 247
246 if (compressed_size) 248 if (compressed_size)
247 data_len = compressed_size; 249 data_len = compressed_size;
@@ -256,12 +258,27 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
256 return 1; 258 return 1;
257 } 259 }
258 260
261 path = btrfs_alloc_path();
262 if (!path)
263 return -ENOMEM;
264
259 trans = btrfs_join_transaction(root); 265 trans = btrfs_join_transaction(root);
260 if (IS_ERR(trans)) 266 if (IS_ERR(trans)) {
267 btrfs_free_path(path);
261 return PTR_ERR(trans); 268 return PTR_ERR(trans);
269 }
262 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 270 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
263 271
264 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); 272 if (compressed_size && compressed_pages)
273 extent_item_size = btrfs_file_extent_calc_inline_size(
274 compressed_size);
275 else
276 extent_item_size = btrfs_file_extent_calc_inline_size(
277 inline_len);
278
279 ret = __btrfs_drop_extents(trans, root, inode, path,
280 start, aligned_end, NULL,
281 1, 1, extent_item_size, &extent_inserted);
265 if (ret) { 282 if (ret) {
266 btrfs_abort_transaction(trans, root, ret); 283 btrfs_abort_transaction(trans, root, ret);
267 goto out; 284 goto out;
@@ -269,7 +286,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
269 286
270 if (isize > actual_end) 287 if (isize > actual_end)
271 inline_len = min_t(u64, isize, actual_end); 288 inline_len = min_t(u64, isize, actual_end);
272 ret = insert_inline_extent(trans, root, inode, start, 289 ret = insert_inline_extent(trans, path, extent_inserted,
290 root, inode, start,
273 inline_len, compressed_size, 291 inline_len, compressed_size,
274 compress_type, compressed_pages); 292 compress_type, compressed_pages);
275 if (ret && ret != -ENOSPC) { 293 if (ret && ret != -ENOSPC) {
@@ -284,6 +302,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
284 btrfs_delalloc_release_metadata(inode, end + 1 - start); 302 btrfs_delalloc_release_metadata(inode, end + 1 - start);
285 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 303 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
286out: 304out:
305 btrfs_free_path(path);
287 btrfs_end_transaction(trans, root); 306 btrfs_end_transaction(trans, root);
288 return ret; 307 return ret;
289} 308}
@@ -1262,7 +1281,8 @@ next_slot:
1262 nocow = 1; 1281 nocow = 1;
1263 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1282 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1264 extent_end = found_key.offset + 1283 extent_end = found_key.offset +
1265 btrfs_file_extent_inline_len(leaf, fi); 1284 btrfs_file_extent_inline_len(leaf,
1285 path->slots[0], fi);
1266 extent_end = ALIGN(extent_end, root->sectorsize); 1286 extent_end = ALIGN(extent_end, root->sectorsize);
1267 } else { 1287 } else {
1268 BUG_ON(1); 1288 BUG_ON(1);
@@ -1577,7 +1597,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
1577 unsigned long bio_flags) 1597 unsigned long bio_flags)
1578{ 1598{
1579 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; 1599 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
1580 u64 logical = (u64)bio->bi_sector << 9; 1600 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
1581 u64 length = 0; 1601 u64 length = 0;
1582 u64 map_length; 1602 u64 map_length;
1583 int ret; 1603 int ret;
@@ -1585,7 +1605,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
1585 if (bio_flags & EXTENT_BIO_COMPRESSED) 1605 if (bio_flags & EXTENT_BIO_COMPRESSED)
1586 return 0; 1606 return 0;
1587 1607
1588 length = bio->bi_size; 1608 length = bio->bi_iter.bi_size;
1589 map_length = length; 1609 map_length = length;
1590 ret = btrfs_map_block(root->fs_info, rw, logical, 1610 ret = btrfs_map_block(root->fs_info, rw, logical,
1591 &map_length, NULL, 0); 1611 &map_length, NULL, 0);
@@ -1841,14 +1861,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1841 struct btrfs_path *path; 1861 struct btrfs_path *path;
1842 struct extent_buffer *leaf; 1862 struct extent_buffer *leaf;
1843 struct btrfs_key ins; 1863 struct btrfs_key ins;
1864 int extent_inserted = 0;
1844 int ret; 1865 int ret;
1845 1866
1846 path = btrfs_alloc_path(); 1867 path = btrfs_alloc_path();
1847 if (!path) 1868 if (!path)
1848 return -ENOMEM; 1869 return -ENOMEM;
1849 1870
1850 path->leave_spinning = 1;
1851
1852 /* 1871 /*
1853 * we may be replacing one extent in the tree with another. 1872 * we may be replacing one extent in the tree with another.
1854 * The new extent is pinned in the extent map, and we don't want 1873 * The new extent is pinned in the extent map, and we don't want
@@ -1858,17 +1877,23 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1858 * the caller is expected to unpin it and allow it to be merged 1877 * the caller is expected to unpin it and allow it to be merged
1859 * with the others. 1878 * with the others.
1860 */ 1879 */
1861 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1880 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
1862 file_pos + num_bytes, 0); 1881 file_pos + num_bytes, NULL, 0,
1882 1, sizeof(*fi), &extent_inserted);
1863 if (ret) 1883 if (ret)
1864 goto out; 1884 goto out;
1865 1885
1866 ins.objectid = btrfs_ino(inode); 1886 if (!extent_inserted) {
1867 ins.offset = file_pos; 1887 ins.objectid = btrfs_ino(inode);
1868 ins.type = BTRFS_EXTENT_DATA_KEY; 1888 ins.offset = file_pos;
1869 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); 1889 ins.type = BTRFS_EXTENT_DATA_KEY;
1870 if (ret) 1890
1871 goto out; 1891 path->leave_spinning = 1;
1892 ret = btrfs_insert_empty_item(trans, root, path, &ins,
1893 sizeof(*fi));
1894 if (ret)
1895 goto out;
1896 }
1872 leaf = path->nodes[0]; 1897 leaf = path->nodes[0];
1873 fi = btrfs_item_ptr(leaf, path->slots[0], 1898 fi = btrfs_item_ptr(leaf, path->slots[0],
1874 struct btrfs_file_extent_item); 1899 struct btrfs_file_extent_item);
@@ -2290,7 +2315,7 @@ again:
2290 u64 extent_len; 2315 u64 extent_len;
2291 struct btrfs_key found_key; 2316 struct btrfs_key found_key;
2292 2317
2293 ret = btrfs_search_slot(trans, root, &key, path, 1, 1); 2318 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2294 if (ret < 0) 2319 if (ret < 0)
2295 goto out_free_path; 2320 goto out_free_path;
2296 2321
@@ -2543,12 +2568,6 @@ out_kfree:
2543 return NULL; 2568 return NULL;
2544} 2569}
2545 2570
2546/*
2547 * helper function for btrfs_finish_ordered_io, this
2548 * just reads in some of the csum leaves to prime them into ram
2549 * before we start the transaction. It limits the amount of btree
2550 * reads required while inside the transaction.
2551 */
2552/* as ordered data IO finishes, this gets called so we can finish 2571/* as ordered data IO finishes, this gets called so we can finish
2553 * an ordered extent if the range of bytes in the file it covers are 2572 * an ordered extent if the range of bytes in the file it covers are
2554 * fully written. 2573 * fully written.
@@ -2610,7 +2629,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2610 EXTENT_DEFRAG, 1, cached_state); 2629 EXTENT_DEFRAG, 1, cached_state);
2611 if (ret) { 2630 if (ret) {
2612 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 2631 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2613 if (last_snapshot >= BTRFS_I(inode)->generation) 2632 if (0 && last_snapshot >= BTRFS_I(inode)->generation)
2614 /* the inode is shared */ 2633 /* the inode is shared */
2615 new = record_old_file_extents(inode, ordered_extent); 2634 new = record_old_file_extents(inode, ordered_extent);
2616 2635
@@ -3248,7 +3267,8 @@ out:
3248 * slot is the slot the inode is in, objectid is the objectid of the inode 3267 * slot is the slot the inode is in, objectid is the objectid of the inode
3249 */ 3268 */
3250static noinline int acls_after_inode_item(struct extent_buffer *leaf, 3269static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3251 int slot, u64 objectid) 3270 int slot, u64 objectid,
3271 int *first_xattr_slot)
3252{ 3272{
3253 u32 nritems = btrfs_header_nritems(leaf); 3273 u32 nritems = btrfs_header_nritems(leaf);
3254 struct btrfs_key found_key; 3274 struct btrfs_key found_key;
@@ -3264,6 +3284,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3264 } 3284 }
3265 3285
3266 slot++; 3286 slot++;
3287 *first_xattr_slot = -1;
3267 while (slot < nritems) { 3288 while (slot < nritems) {
3268 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3289 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3269 3290
@@ -3273,6 +3294,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3273 3294
3274 /* we found an xattr, assume we've got an acl */ 3295 /* we found an xattr, assume we've got an acl */
3275 if (found_key.type == BTRFS_XATTR_ITEM_KEY) { 3296 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3297 if (*first_xattr_slot == -1)
3298 *first_xattr_slot = slot;
3276 if (found_key.offset == xattr_access || 3299 if (found_key.offset == xattr_access ||
3277 found_key.offset == xattr_default) 3300 found_key.offset == xattr_default)
3278 return 1; 3301 return 1;
@@ -3301,6 +3324,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3301 * something larger than an xattr. We have to assume the inode 3324 * something larger than an xattr. We have to assume the inode
3302 * has acls 3325 * has acls
3303 */ 3326 */
3327 if (*first_xattr_slot == -1)
3328 *first_xattr_slot = slot;
3304 return 1; 3329 return 1;
3305} 3330}
3306 3331
@@ -3315,10 +3340,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
3315 struct btrfs_timespec *tspec; 3340 struct btrfs_timespec *tspec;
3316 struct btrfs_root *root = BTRFS_I(inode)->root; 3341 struct btrfs_root *root = BTRFS_I(inode)->root;
3317 struct btrfs_key location; 3342 struct btrfs_key location;
3343 unsigned long ptr;
3318 int maybe_acls; 3344 int maybe_acls;
3319 u32 rdev; 3345 u32 rdev;
3320 int ret; 3346 int ret;
3321 bool filled = false; 3347 bool filled = false;
3348 int first_xattr_slot;
3322 3349
3323 ret = btrfs_fill_inode(inode, &rdev); 3350 ret = btrfs_fill_inode(inode, &rdev);
3324 if (!ret) 3351 if (!ret)
@@ -3328,7 +3355,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
3328 if (!path) 3355 if (!path)
3329 goto make_bad; 3356 goto make_bad;
3330 3357
3331 path->leave_spinning = 1;
3332 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 3358 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3333 3359
3334 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 3360 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -3338,7 +3364,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
3338 leaf = path->nodes[0]; 3364 leaf = path->nodes[0];
3339 3365
3340 if (filled) 3366 if (filled)
3341 goto cache_acl; 3367 goto cache_index;
3342 3368
3343 inode_item = btrfs_item_ptr(leaf, path->slots[0], 3369 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3344 struct btrfs_inode_item); 3370 struct btrfs_inode_item);
@@ -3381,18 +3407,51 @@ static void btrfs_read_locked_inode(struct inode *inode)
3381 3407
3382 BTRFS_I(inode)->index_cnt = (u64)-1; 3408 BTRFS_I(inode)->index_cnt = (u64)-1;
3383 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 3409 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3410
3411cache_index:
3412 path->slots[0]++;
3413 if (inode->i_nlink != 1 ||
3414 path->slots[0] >= btrfs_header_nritems(leaf))
3415 goto cache_acl;
3416
3417 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3418 if (location.objectid != btrfs_ino(inode))
3419 goto cache_acl;
3420
3421 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3422 if (location.type == BTRFS_INODE_REF_KEY) {
3423 struct btrfs_inode_ref *ref;
3424
3425 ref = (struct btrfs_inode_ref *)ptr;
3426 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3427 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3428 struct btrfs_inode_extref *extref;
3429
3430 extref = (struct btrfs_inode_extref *)ptr;
3431 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3432 extref);
3433 }
3384cache_acl: 3434cache_acl:
3385 /* 3435 /*
3386 * try to precache a NULL acl entry for files that don't have 3436 * try to precache a NULL acl entry for files that don't have
3387 * any xattrs or acls 3437 * any xattrs or acls
3388 */ 3438 */
3389 maybe_acls = acls_after_inode_item(leaf, path->slots[0], 3439 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3390 btrfs_ino(inode)); 3440 btrfs_ino(inode), &first_xattr_slot);
3441 if (first_xattr_slot != -1) {
3442 path->slots[0] = first_xattr_slot;
3443 ret = btrfs_load_inode_props(inode, path);
3444 if (ret)
3445 btrfs_err(root->fs_info,
3446 "error loading props for ino %llu (root %llu): %d\n",
3447 btrfs_ino(inode),
3448 root->root_key.objectid, ret);
3449 }
3450 btrfs_free_path(path);
3451
3391 if (!maybe_acls) 3452 if (!maybe_acls)
3392 cache_no_acl(inode); 3453 cache_no_acl(inode);
3393 3454
3394 btrfs_free_path(path);
3395
3396 switch (inode->i_mode & S_IFMT) { 3455 switch (inode->i_mode & S_IFMT) {
3397 case S_IFREG: 3456 case S_IFREG:
3398 inode->i_mapping->a_ops = &btrfs_aops; 3457 inode->i_mapping->a_ops = &btrfs_aops;
@@ -3496,7 +3555,6 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3496 goto failed; 3555 goto failed;
3497 } 3556 }
3498 3557
3499 btrfs_unlock_up_safe(path, 1);
3500 leaf = path->nodes[0]; 3558 leaf = path->nodes[0];
3501 inode_item = btrfs_item_ptr(leaf, path->slots[0], 3559 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3502 struct btrfs_inode_item); 3560 struct btrfs_inode_item);
@@ -3593,6 +3651,24 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3593 goto err; 3651 goto err;
3594 btrfs_release_path(path); 3652 btrfs_release_path(path);
3595 3653
3654 /*
3655 * If we don't have dir index, we have to get it by looking up
3656 * the inode ref, since we get the inode ref, remove it directly,
3657 * it is unnecessary to do delayed deletion.
3658 *
3659 * But if we have dir index, needn't search inode ref to get it.
3660 * Since the inode ref is close to the inode item, it is better
3661 * that we delay to delete it, and just do this deletion when
3662 * we update the inode item.
3663 */
3664 if (BTRFS_I(inode)->dir_index) {
3665 ret = btrfs_delayed_delete_inode_ref(inode);
3666 if (!ret) {
3667 index = BTRFS_I(inode)->dir_index;
3668 goto skip_backref;
3669 }
3670 }
3671
3596 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino, 3672 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
3597 dir_ino, &index); 3673 dir_ino, &index);
3598 if (ret) { 3674 if (ret) {
@@ -3602,7 +3678,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3602 btrfs_abort_transaction(trans, root, ret); 3678 btrfs_abort_transaction(trans, root, ret);
3603 goto err; 3679 goto err;
3604 } 3680 }
3605 3681skip_backref:
3606 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index); 3682 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
3607 if (ret) { 3683 if (ret) {
3608 btrfs_abort_transaction(trans, root, ret); 3684 btrfs_abort_transaction(trans, root, ret);
@@ -3948,7 +4024,7 @@ search_again:
3948 btrfs_file_extent_num_bytes(leaf, fi); 4024 btrfs_file_extent_num_bytes(leaf, fi);
3949 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 4025 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
3950 item_end += btrfs_file_extent_inline_len(leaf, 4026 item_end += btrfs_file_extent_inline_len(leaf,
3951 fi); 4027 path->slots[0], fi);
3952 } 4028 }
3953 item_end--; 4029 item_end--;
3954 } 4030 }
@@ -4018,6 +4094,12 @@ search_again:
4018 inode_sub_bytes(inode, item_end + 1 - 4094 inode_sub_bytes(inode, item_end + 1 -
4019 new_size); 4095 new_size);
4020 } 4096 }
4097
4098 /*
4099 * update the ram bytes to properly reflect
4100 * the new size of our item
4101 */
4102 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4021 size = 4103 size =
4022 btrfs_file_extent_calc_inline_size(size); 4104 btrfs_file_extent_calc_inline_size(size);
4023 btrfs_truncate_item(root, path, size, 1); 4105 btrfs_truncate_item(root, path, size, 1);
@@ -4203,6 +4285,49 @@ out:
4203 return ret; 4285 return ret;
4204} 4286}
4205 4287
4288static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
4289 u64 offset, u64 len)
4290{
4291 struct btrfs_trans_handle *trans;
4292 int ret;
4293
4294 /*
4295 * Still need to make sure the inode looks like it's been updated so
4296 * that any holes get logged if we fsync.
4297 */
4298 if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
4299 BTRFS_I(inode)->last_trans = root->fs_info->generation;
4300 BTRFS_I(inode)->last_sub_trans = root->log_transid;
4301 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
4302 return 0;
4303 }
4304
4305 /*
4306 * 1 - for the one we're dropping
4307 * 1 - for the one we're adding
4308 * 1 - for updating the inode.
4309 */
4310 trans = btrfs_start_transaction(root, 3);
4311 if (IS_ERR(trans))
4312 return PTR_ERR(trans);
4313
4314 ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
4315 if (ret) {
4316 btrfs_abort_transaction(trans, root, ret);
4317 btrfs_end_transaction(trans, root);
4318 return ret;
4319 }
4320
4321 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
4322 0, 0, len, 0, len, 0, 0, 0);
4323 if (ret)
4324 btrfs_abort_transaction(trans, root, ret);
4325 else
4326 btrfs_update_inode(trans, root, inode);
4327 btrfs_end_transaction(trans, root);
4328 return ret;
4329}
4330
4206/* 4331/*
4207 * This function puts in dummy file extents for the area we're creating a hole 4332 * This function puts in dummy file extents for the area we're creating a hole
4208 * for. So if we are truncating this file to a larger size we need to insert 4333 * for. So if we are truncating this file to a larger size we need to insert
@@ -4211,7 +4336,6 @@ out:
4211 */ 4336 */
4212int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) 4337int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4213{ 4338{
4214 struct btrfs_trans_handle *trans;
4215 struct btrfs_root *root = BTRFS_I(inode)->root; 4339 struct btrfs_root *root = BTRFS_I(inode)->root;
4216 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 4340 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4217 struct extent_map *em = NULL; 4341 struct extent_map *em = NULL;
@@ -4266,31 +4390,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4266 struct extent_map *hole_em; 4390 struct extent_map *hole_em;
4267 hole_size = last_byte - cur_offset; 4391 hole_size = last_byte - cur_offset;
4268 4392
4269 trans = btrfs_start_transaction(root, 3); 4393 err = maybe_insert_hole(root, inode, cur_offset,
4270 if (IS_ERR(trans)) { 4394 hole_size);
4271 err = PTR_ERR(trans); 4395 if (err)
4272 break;
4273 }
4274
4275 err = btrfs_drop_extents(trans, root, inode,
4276 cur_offset,
4277 cur_offset + hole_size, 1);
4278 if (err) {
4279 btrfs_abort_transaction(trans, root, err);
4280 btrfs_end_transaction(trans, root);
4281 break;
4282 }
4283
4284 err = btrfs_insert_file_extent(trans, root,
4285 btrfs_ino(inode), cur_offset, 0,
4286 0, hole_size, 0, hole_size,
4287 0, 0, 0);
4288 if (err) {
4289 btrfs_abort_transaction(trans, root, err);
4290 btrfs_end_transaction(trans, root);
4291 break; 4396 break;
4292 }
4293
4294 btrfs_drop_extent_cache(inode, cur_offset, 4397 btrfs_drop_extent_cache(inode, cur_offset,
4295 cur_offset + hole_size - 1, 0); 4398 cur_offset + hole_size - 1, 0);
4296 hole_em = alloc_extent_map(); 4399 hole_em = alloc_extent_map();
@@ -4309,7 +4412,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4309 hole_em->ram_bytes = hole_size; 4412 hole_em->ram_bytes = hole_size;
4310 hole_em->bdev = root->fs_info->fs_devices->latest_bdev; 4413 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
4311 hole_em->compress_type = BTRFS_COMPRESS_NONE; 4414 hole_em->compress_type = BTRFS_COMPRESS_NONE;
4312 hole_em->generation = trans->transid; 4415 hole_em->generation = root->fs_info->generation;
4313 4416
4314 while (1) { 4417 while (1) {
4315 write_lock(&em_tree->lock); 4418 write_lock(&em_tree->lock);
@@ -4322,17 +4425,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4322 hole_size - 1, 0); 4425 hole_size - 1, 0);
4323 } 4426 }
4324 free_extent_map(hole_em); 4427 free_extent_map(hole_em);
4325next:
4326 btrfs_update_inode(trans, root, inode);
4327 btrfs_end_transaction(trans, root);
4328 } 4428 }
4429next:
4329 free_extent_map(em); 4430 free_extent_map(em);
4330 em = NULL; 4431 em = NULL;
4331 cur_offset = last_byte; 4432 cur_offset = last_byte;
4332 if (cur_offset >= block_end) 4433 if (cur_offset >= block_end)
4333 break; 4434 break;
4334 } 4435 }
4335
4336 free_extent_map(em); 4436 free_extent_map(em);
4337 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, 4437 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
4338 GFP_NOFS); 4438 GFP_NOFS);
@@ -4354,8 +4454,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4354 * these flags set. For all other operations the VFS set these flags 4454 * these flags set. For all other operations the VFS set these flags
4355 * explicitly if it wants a timestamp update. 4455 * explicitly if it wants a timestamp update.
4356 */ 4456 */
4357 if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) 4457 if (newsize != oldsize) {
4358 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); 4458 inode_inc_iversion(inode);
4459 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
4460 inode->i_ctime = inode->i_mtime =
4461 current_fs_time(inode->i_sb);
4462 }
4359 4463
4360 if (newsize > oldsize) { 4464 if (newsize > oldsize) {
4361 truncate_pagecache(inode, newsize); 4465 truncate_pagecache(inode, newsize);
@@ -4464,12 +4568,70 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
4464 err = btrfs_dirty_inode(inode); 4568 err = btrfs_dirty_inode(inode);
4465 4569
4466 if (!err && attr->ia_valid & ATTR_MODE) 4570 if (!err && attr->ia_valid & ATTR_MODE)
4467 err = btrfs_acl_chmod(inode); 4571 err = posix_acl_chmod(inode, inode->i_mode);
4468 } 4572 }
4469 4573
4470 return err; 4574 return err;
4471} 4575}
4472 4576
4577/*
4578 * While truncating the inode pages during eviction, we get the VFS calling
4579 * btrfs_invalidatepage() against each page of the inode. This is slow because
4580 * the calls to btrfs_invalidatepage() result in a huge amount of calls to
4581 * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
4582 * extent_state structures over and over, wasting lots of time.
4583 *
4584 * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
4585 * those expensive operations on a per page basis and do only the ordered io
4586 * finishing, while we release here the extent_map and extent_state structures,
4587 * without the excessive merging and splitting.
4588 */
4589static void evict_inode_truncate_pages(struct inode *inode)
4590{
4591 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4592 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
4593 struct rb_node *node;
4594
4595 ASSERT(inode->i_state & I_FREEING);
4596 truncate_inode_pages(&inode->i_data, 0);
4597
4598 write_lock(&map_tree->lock);
4599 while (!RB_EMPTY_ROOT(&map_tree->map)) {
4600 struct extent_map *em;
4601
4602 node = rb_first(&map_tree->map);
4603 em = rb_entry(node, struct extent_map, rb_node);
4604 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
4605 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
4606 remove_extent_mapping(map_tree, em);
4607 free_extent_map(em);
4608 }
4609 write_unlock(&map_tree->lock);
4610
4611 spin_lock(&io_tree->lock);
4612 while (!RB_EMPTY_ROOT(&io_tree->state)) {
4613 struct extent_state *state;
4614 struct extent_state *cached_state = NULL;
4615
4616 node = rb_first(&io_tree->state);
4617 state = rb_entry(node, struct extent_state, rb_node);
4618 atomic_inc(&state->refs);
4619 spin_unlock(&io_tree->lock);
4620
4621 lock_extent_bits(io_tree, state->start, state->end,
4622 0, &cached_state);
4623 clear_extent_bit(io_tree, state->start, state->end,
4624 EXTENT_LOCKED | EXTENT_DIRTY |
4625 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
4626 EXTENT_DEFRAG, 1, 1,
4627 &cached_state, GFP_NOFS);
4628 free_extent_state(state);
4629
4630 spin_lock(&io_tree->lock);
4631 }
4632 spin_unlock(&io_tree->lock);
4633}
4634
4473void btrfs_evict_inode(struct inode *inode) 4635void btrfs_evict_inode(struct inode *inode)
4474{ 4636{
4475 struct btrfs_trans_handle *trans; 4637 struct btrfs_trans_handle *trans;
@@ -4480,7 +4642,8 @@ void btrfs_evict_inode(struct inode *inode)
4480 4642
4481 trace_btrfs_inode_evict(inode); 4643 trace_btrfs_inode_evict(inode);
4482 4644
4483 truncate_inode_pages(&inode->i_data, 0); 4645 evict_inode_truncate_pages(inode);
4646
4484 if (inode->i_nlink && 4647 if (inode->i_nlink &&
4485 ((btrfs_root_refs(&root->root_item) != 0 && 4648 ((btrfs_root_refs(&root->root_item) != 0 &&
4486 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) || 4649 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
@@ -4655,9 +4818,9 @@ static int fixup_tree_root_location(struct btrfs_root *root,
4655 } 4818 }
4656 4819
4657 err = -ENOENT; 4820 err = -ENOENT;
4658 ret = btrfs_find_root_ref(root->fs_info->tree_root, path, 4821 ret = btrfs_find_item(root->fs_info->tree_root, path,
4659 BTRFS_I(dir)->root->root_key.objectid, 4822 BTRFS_I(dir)->root->root_key.objectid,
4660 location->objectid); 4823 location->objectid, BTRFS_ROOT_REF_KEY, NULL);
4661 if (ret) { 4824 if (ret) {
4662 if (ret < 0) 4825 if (ret < 0)
4663 err = ret; 4826 err = ret;
@@ -4818,7 +4981,9 @@ again:
4818static int btrfs_init_locked_inode(struct inode *inode, void *p) 4981static int btrfs_init_locked_inode(struct inode *inode, void *p)
4819{ 4982{
4820 struct btrfs_iget_args *args = p; 4983 struct btrfs_iget_args *args = p;
4821 inode->i_ino = args->ino; 4984 inode->i_ino = args->location->objectid;
4985 memcpy(&BTRFS_I(inode)->location, args->location,
4986 sizeof(*args->location));
4822 BTRFS_I(inode)->root = args->root; 4987 BTRFS_I(inode)->root = args->root;
4823 return 0; 4988 return 0;
4824} 4989}
@@ -4826,19 +4991,19 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
4826static int btrfs_find_actor(struct inode *inode, void *opaque) 4991static int btrfs_find_actor(struct inode *inode, void *opaque)
4827{ 4992{
4828 struct btrfs_iget_args *args = opaque; 4993 struct btrfs_iget_args *args = opaque;
4829 return args->ino == btrfs_ino(inode) && 4994 return args->location->objectid == BTRFS_I(inode)->location.objectid &&
4830 args->root == BTRFS_I(inode)->root; 4995 args->root == BTRFS_I(inode)->root;
4831} 4996}
4832 4997
4833static struct inode *btrfs_iget_locked(struct super_block *s, 4998static struct inode *btrfs_iget_locked(struct super_block *s,
4834 u64 objectid, 4999 struct btrfs_key *location,
4835 struct btrfs_root *root) 5000 struct btrfs_root *root)
4836{ 5001{
4837 struct inode *inode; 5002 struct inode *inode;
4838 struct btrfs_iget_args args; 5003 struct btrfs_iget_args args;
4839 unsigned long hashval = btrfs_inode_hash(objectid, root); 5004 unsigned long hashval = btrfs_inode_hash(location->objectid, root);
4840 5005
4841 args.ino = objectid; 5006 args.location = location;
4842 args.root = root; 5007 args.root = root;
4843 5008
4844 inode = iget5_locked(s, hashval, btrfs_find_actor, 5009 inode = iget5_locked(s, hashval, btrfs_find_actor,
@@ -4855,13 +5020,11 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
4855{ 5020{
4856 struct inode *inode; 5021 struct inode *inode;
4857 5022
4858 inode = btrfs_iget_locked(s, location->objectid, root); 5023 inode = btrfs_iget_locked(s, location, root);
4859 if (!inode) 5024 if (!inode)
4860 return ERR_PTR(-ENOMEM); 5025 return ERR_PTR(-ENOMEM);
4861 5026
4862 if (inode->i_state & I_NEW) { 5027 if (inode->i_state & I_NEW) {
4863 BTRFS_I(inode)->root = root;
4864 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
4865 btrfs_read_locked_inode(inode); 5028 btrfs_read_locked_inode(inode);
4866 if (!is_bad_inode(inode)) { 5029 if (!is_bad_inode(inode)) {
4867 inode_tree_add(inode); 5030 inode_tree_add(inode);
@@ -4917,7 +5080,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4917 return ERR_PTR(ret); 5080 return ERR_PTR(ret);
4918 5081
4919 if (location.objectid == 0) 5082 if (location.objectid == 0)
4920 return NULL; 5083 return ERR_PTR(-ENOENT);
4921 5084
4922 if (location.type == BTRFS_INODE_ITEM_KEY) { 5085 if (location.type == BTRFS_INODE_ITEM_KEY) {
4923 inode = btrfs_iget(dir->i_sb, &location, root, NULL); 5086 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
@@ -4981,10 +5144,17 @@ static void btrfs_dentry_release(struct dentry *dentry)
4981static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, 5144static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4982 unsigned int flags) 5145 unsigned int flags)
4983{ 5146{
4984 struct dentry *ret; 5147 struct inode *inode;
4985 5148
4986 ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); 5149 inode = btrfs_lookup_dentry(dir, dentry);
4987 return ret; 5150 if (IS_ERR(inode)) {
5151 if (PTR_ERR(inode) == -ENOENT)
5152 inode = NULL;
5153 else
5154 return ERR_CAST(inode);
5155 }
5156
5157 return d_materialise_unique(dentry, inode);
4988} 5158}
4989 5159
4990unsigned char btrfs_filetype_table[] = { 5160unsigned char btrfs_filetype_table[] = {
@@ -5354,7 +5524,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5354 u32 sizes[2]; 5524 u32 sizes[2];
5355 unsigned long ptr; 5525 unsigned long ptr;
5356 int ret; 5526 int ret;
5357 int owner;
5358 5527
5359 path = btrfs_alloc_path(); 5528 path = btrfs_alloc_path();
5360 if (!path) 5529 if (!path)
@@ -5388,6 +5557,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5388 * number 5557 * number
5389 */ 5558 */
5390 BTRFS_I(inode)->index_cnt = 2; 5559 BTRFS_I(inode)->index_cnt = 2;
5560 BTRFS_I(inode)->dir_index = *index;
5391 BTRFS_I(inode)->root = root; 5561 BTRFS_I(inode)->root = root;
5392 BTRFS_I(inode)->generation = trans->transid; 5562 BTRFS_I(inode)->generation = trans->transid;
5393 inode->i_generation = BTRFS_I(inode)->generation; 5563 inode->i_generation = BTRFS_I(inode)->generation;
@@ -5400,11 +5570,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5400 */ 5570 */
5401 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); 5571 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
5402 5572
5403 if (S_ISDIR(mode))
5404 owner = 0;
5405 else
5406 owner = 1;
5407
5408 key[0].objectid = objectid; 5573 key[0].objectid = objectid;
5409 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 5574 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
5410 key[0].offset = 0; 5575 key[0].offset = 0;
@@ -5469,6 +5634,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5469 5634
5470 btrfs_update_root_times(trans, root); 5635 btrfs_update_root_times(trans, root);
5471 5636
5637 ret = btrfs_inode_inherit_props(trans, inode, dir);
5638 if (ret)
5639 btrfs_err(root->fs_info,
5640 "error inheriting props for ino %llu (root %llu): %d",
5641 btrfs_ino(inode), root->root_key.objectid, ret);
5642
5472 return inode; 5643 return inode;
5473fail: 5644fail:
5474 if (dir) 5645 if (dir)
@@ -5737,6 +5908,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
5737 goto fail; 5908 goto fail;
5738 } 5909 }
5739 5910
5911 /* There are several dir indexes for this inode, clear the cache. */
5912 BTRFS_I(inode)->dir_index = 0ULL;
5740 inc_nlink(inode); 5913 inc_nlink(inode);
5741 inode_inc_iversion(inode); 5914 inode_inc_iversion(inode);
5742 inode->i_ctime = CURRENT_TIME; 5915 inode->i_ctime = CURRENT_TIME;
@@ -6000,7 +6173,7 @@ again:
6000 btrfs_file_extent_num_bytes(leaf, item); 6173 btrfs_file_extent_num_bytes(leaf, item);
6001 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 6174 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
6002 size_t size; 6175 size_t size;
6003 size = btrfs_file_extent_inline_len(leaf, item); 6176 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6004 extent_end = ALIGN(extent_start + size, root->sectorsize); 6177 extent_end = ALIGN(extent_start + size, root->sectorsize);
6005 } 6178 }
6006next: 6179next:
@@ -6069,7 +6242,7 @@ next:
6069 goto out; 6242 goto out;
6070 } 6243 }
6071 6244
6072 size = btrfs_file_extent_inline_len(leaf, item); 6245 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6073 extent_offset = page_offset(page) + pg_offset - extent_start; 6246 extent_offset = page_offset(page) + pg_offset - extent_start;
6074 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset, 6247 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
6075 size - extent_offset); 6248 size - extent_offset);
@@ -6386,6 +6559,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6386 int slot; 6559 int slot;
6387 int found_type; 6560 int found_type;
6388 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW); 6561 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
6562
6389 path = btrfs_alloc_path(); 6563 path = btrfs_alloc_path();
6390 if (!path) 6564 if (!path)
6391 return -ENOMEM; 6565 return -ENOMEM;
@@ -6429,6 +6603,10 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6429 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG) 6603 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
6430 goto out; 6604 goto out;
6431 6605
6606 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6607 if (extent_end <= offset)
6608 goto out;
6609
6432 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6610 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6433 if (disk_bytenr == 0) 6611 if (disk_bytenr == 0)
6434 goto out; 6612 goto out;
@@ -6446,8 +6624,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6446 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 6624 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6447 } 6625 }
6448 6626
6449 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6450
6451 if (btrfs_extent_readonly(root, disk_bytenr)) 6627 if (btrfs_extent_readonly(root, disk_bytenr))
6452 goto out; 6628 goto out;
6453 btrfs_release_path(path); 6629 btrfs_release_path(path);
@@ -6779,17 +6955,16 @@ unlock_err:
6779static void btrfs_endio_direct_read(struct bio *bio, int err) 6955static void btrfs_endio_direct_read(struct bio *bio, int err)
6780{ 6956{
6781 struct btrfs_dio_private *dip = bio->bi_private; 6957 struct btrfs_dio_private *dip = bio->bi_private;
6782 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 6958 struct bio_vec *bvec;
6783 struct bio_vec *bvec = bio->bi_io_vec;
6784 struct inode *inode = dip->inode; 6959 struct inode *inode = dip->inode;
6785 struct btrfs_root *root = BTRFS_I(inode)->root; 6960 struct btrfs_root *root = BTRFS_I(inode)->root;
6786 struct bio *dio_bio; 6961 struct bio *dio_bio;
6787 u32 *csums = (u32 *)dip->csum; 6962 u32 *csums = (u32 *)dip->csum;
6788 int index = 0;
6789 u64 start; 6963 u64 start;
6964 int i;
6790 6965
6791 start = dip->logical_offset; 6966 start = dip->logical_offset;
6792 do { 6967 bio_for_each_segment_all(bvec, bio, i) {
6793 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { 6968 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
6794 struct page *page = bvec->bv_page; 6969 struct page *page = bvec->bv_page;
6795 char *kaddr; 6970 char *kaddr;
@@ -6805,18 +6980,16 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
6805 local_irq_restore(flags); 6980 local_irq_restore(flags);
6806 6981
6807 flush_dcache_page(bvec->bv_page); 6982 flush_dcache_page(bvec->bv_page);
6808 if (csum != csums[index]) { 6983 if (csum != csums[i]) {
6809 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u", 6984 btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
6810 btrfs_ino(inode), start, csum, 6985 btrfs_ino(inode), start, csum,
6811 csums[index]); 6986 csums[i]);
6812 err = -EIO; 6987 err = -EIO;
6813 } 6988 }
6814 } 6989 }
6815 6990
6816 start += bvec->bv_len; 6991 start += bvec->bv_len;
6817 bvec++; 6992 }
6818 index++;
6819 } while (bvec <= bvec_end);
6820 6993
6821 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 6994 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
6822 dip->logical_offset + dip->bytes - 1); 6995 dip->logical_offset + dip->bytes - 1);
@@ -6894,10 +7067,11 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
6894 struct btrfs_dio_private *dip = bio->bi_private; 7067 struct btrfs_dio_private *dip = bio->bi_private;
6895 7068
6896 if (err) { 7069 if (err) {
6897 printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " 7070 btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
6898 "sector %#Lx len %u err no %d\n", 7071 "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
6899 btrfs_ino(dip->inode), bio->bi_rw, 7072 btrfs_ino(dip->inode), bio->bi_rw,
6900 (unsigned long long)bio->bi_sector, bio->bi_size, err); 7073 (unsigned long long)bio->bi_iter.bi_sector,
7074 bio->bi_iter.bi_size, err);
6901 dip->errors = 1; 7075 dip->errors = 1;
6902 7076
6903 /* 7077 /*
@@ -6988,7 +7162,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6988 struct bio *bio; 7162 struct bio *bio;
6989 struct bio *orig_bio = dip->orig_bio; 7163 struct bio *orig_bio = dip->orig_bio;
6990 struct bio_vec *bvec = orig_bio->bi_io_vec; 7164 struct bio_vec *bvec = orig_bio->bi_io_vec;
6991 u64 start_sector = orig_bio->bi_sector; 7165 u64 start_sector = orig_bio->bi_iter.bi_sector;
6992 u64 file_offset = dip->logical_offset; 7166 u64 file_offset = dip->logical_offset;
6993 u64 submit_len = 0; 7167 u64 submit_len = 0;
6994 u64 map_length; 7168 u64 map_length;
@@ -6996,7 +7170,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6996 int ret = 0; 7170 int ret = 0;
6997 int async_submit = 0; 7171 int async_submit = 0;
6998 7172
6999 map_length = orig_bio->bi_size; 7173 map_length = orig_bio->bi_iter.bi_size;
7000 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9, 7174 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
7001 &map_length, NULL, 0); 7175 &map_length, NULL, 0);
7002 if (ret) { 7176 if (ret) {
@@ -7004,7 +7178,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7004 return -EIO; 7178 return -EIO;
7005 } 7179 }
7006 7180
7007 if (map_length >= orig_bio->bi_size) { 7181 if (map_length >= orig_bio->bi_iter.bi_size) {
7008 bio = orig_bio; 7182 bio = orig_bio;
7009 goto submit; 7183 goto submit;
7010 } 7184 }
@@ -7056,7 +7230,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7056 bio->bi_private = dip; 7230 bio->bi_private = dip;
7057 bio->bi_end_io = btrfs_end_dio_bio; 7231 bio->bi_end_io = btrfs_end_dio_bio;
7058 7232
7059 map_length = orig_bio->bi_size; 7233 map_length = orig_bio->bi_iter.bi_size;
7060 ret = btrfs_map_block(root->fs_info, rw, 7234 ret = btrfs_map_block(root->fs_info, rw,
7061 start_sector << 9, 7235 start_sector << 9,
7062 &map_length, NULL, 0); 7236 &map_length, NULL, 0);
@@ -7114,7 +7288,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7114 7288
7115 if (!skip_sum && !write) { 7289 if (!skip_sum && !write) {
7116 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); 7290 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
7117 sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits; 7291 sum_len = dio_bio->bi_iter.bi_size >>
7292 inode->i_sb->s_blocksize_bits;
7118 sum_len *= csum_size; 7293 sum_len *= csum_size;
7119 } else { 7294 } else {
7120 sum_len = 0; 7295 sum_len = 0;
@@ -7129,8 +7304,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7129 dip->private = dio_bio->bi_private; 7304 dip->private = dio_bio->bi_private;
7130 dip->inode = inode; 7305 dip->inode = inode;
7131 dip->logical_offset = file_offset; 7306 dip->logical_offset = file_offset;
7132 dip->bytes = dio_bio->bi_size; 7307 dip->bytes = dio_bio->bi_iter.bi_size;
7133 dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; 7308 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
7134 io_bio->bi_private = dip; 7309 io_bio->bi_private = dip;
7135 dip->errors = 0; 7310 dip->errors = 0;
7136 dip->orig_bio = io_bio; 7311 dip->orig_bio = io_bio;
@@ -7367,6 +7542,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7367 struct extent_state *cached_state = NULL; 7542 struct extent_state *cached_state = NULL;
7368 u64 page_start = page_offset(page); 7543 u64 page_start = page_offset(page);
7369 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 7544 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
7545 int inode_evicting = inode->i_state & I_FREEING;
7370 7546
7371 /* 7547 /*
7372 * we have the page locked, so new writeback can't start, 7548 * we have the page locked, so new writeback can't start,
@@ -7382,17 +7558,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7382 btrfs_releasepage(page, GFP_NOFS); 7558 btrfs_releasepage(page, GFP_NOFS);
7383 return; 7559 return;
7384 } 7560 }
7385 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 7561
7386 ordered = btrfs_lookup_ordered_extent(inode, page_offset(page)); 7562 if (!inode_evicting)
7563 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
7564 ordered = btrfs_lookup_ordered_extent(inode, page_start);
7387 if (ordered) { 7565 if (ordered) {
7388 /* 7566 /*
7389 * IO on this page will never be started, so we need 7567 * IO on this page will never be started, so we need
7390 * to account for any ordered extents now 7568 * to account for any ordered extents now
7391 */ 7569 */
7392 clear_extent_bit(tree, page_start, page_end, 7570 if (!inode_evicting)
7393 EXTENT_DIRTY | EXTENT_DELALLOC | 7571 clear_extent_bit(tree, page_start, page_end,
7394 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | 7572 EXTENT_DIRTY | EXTENT_DELALLOC |
7395 EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS); 7573 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
7574 EXTENT_DEFRAG, 1, 0, &cached_state,
7575 GFP_NOFS);
7396 /* 7576 /*
7397 * whoever cleared the private bit is responsible 7577 * whoever cleared the private bit is responsible
7398 * for the finish_ordered_io 7578 * for the finish_ordered_io
@@ -7416,14 +7596,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
7416 btrfs_finish_ordered_io(ordered); 7596 btrfs_finish_ordered_io(ordered);
7417 } 7597 }
7418 btrfs_put_ordered_extent(ordered); 7598 btrfs_put_ordered_extent(ordered);
7419 cached_state = NULL; 7599 if (!inode_evicting) {
7420 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 7600 cached_state = NULL;
7601 lock_extent_bits(tree, page_start, page_end, 0,
7602 &cached_state);
7603 }
7604 }
7605
7606 if (!inode_evicting) {
7607 clear_extent_bit(tree, page_start, page_end,
7608 EXTENT_LOCKED | EXTENT_DIRTY |
7609 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
7610 EXTENT_DEFRAG, 1, 1,
7611 &cached_state, GFP_NOFS);
7612
7613 __btrfs_releasepage(page, GFP_NOFS);
7421 } 7614 }
7422 clear_extent_bit(tree, page_start, page_end,
7423 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
7424 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
7425 &cached_state, GFP_NOFS);
7426 __btrfs_releasepage(page, GFP_NOFS);
7427 7615
7428 ClearPageChecked(page); 7616 ClearPageChecked(page);
7429 if (PagePrivate(page)) { 7617 if (PagePrivate(page)) {
@@ -7733,7 +7921,9 @@ out:
7733 * create a new subvolume directory/inode (helper for the ioctl). 7921 * create a new subvolume directory/inode (helper for the ioctl).
7734 */ 7922 */
7735int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 7923int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
7736 struct btrfs_root *new_root, u64 new_dirid) 7924 struct btrfs_root *new_root,
7925 struct btrfs_root *parent_root,
7926 u64 new_dirid)
7737{ 7927{
7738 struct inode *inode; 7928 struct inode *inode;
7739 int err; 7929 int err;
@@ -7751,6 +7941,12 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
7751 set_nlink(inode, 1); 7941 set_nlink(inode, 1);
7752 btrfs_i_size_write(inode, 0); 7942 btrfs_i_size_write(inode, 0);
7753 7943
7944 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
7945 if (err)
7946 btrfs_err(new_root->fs_info,
7947 "error inheriting subvolume %llu properties: %d\n",
7948 new_root->root_key.objectid, err);
7949
7754 err = btrfs_update_inode(trans, new_root, inode); 7950 err = btrfs_update_inode(trans, new_root, inode);
7755 7951
7756 iput(inode); 7952 iput(inode);
@@ -7776,6 +7972,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
7776 ei->flags = 0; 7972 ei->flags = 0;
7777 ei->csum_bytes = 0; 7973 ei->csum_bytes = 0;
7778 ei->index_cnt = (u64)-1; 7974 ei->index_cnt = (u64)-1;
7975 ei->dir_index = 0;
7779 ei->last_unlink_trans = 0; 7976 ei->last_unlink_trans = 0;
7780 ei->last_log_commit = 0; 7977 ei->last_log_commit = 0;
7781 7978
@@ -8063,6 +8260,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8063 if (ret) 8260 if (ret)
8064 goto out_fail; 8261 goto out_fail;
8065 8262
8263 BTRFS_I(old_inode)->dir_index = 0ULL;
8066 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) { 8264 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
8067 /* force full log commit if subvolume involved. */ 8265 /* force full log commit if subvolume involved. */
8068 root->fs_info->last_trans_log_full_commit = trans->transid; 8266 root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -8151,6 +8349,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8151 goto out_fail; 8349 goto out_fail;
8152 } 8350 }
8153 8351
8352 if (old_inode->i_nlink == 1)
8353 BTRFS_I(old_inode)->dir_index = index;
8354
8154 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { 8355 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
8155 struct dentry *parent = new_dentry->d_parent; 8356 struct dentry *parent = new_dentry->d_parent;
8156 btrfs_log_new_name(trans, old_inode, old_dir, parent); 8357 btrfs_log_new_name(trans, old_inode, old_dir, parent);
@@ -8286,7 +8487,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8286{ 8487{
8287 int ret; 8488 int ret;
8288 8489
8289 if (root->fs_info->sb->s_flags & MS_RDONLY) 8490 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
8290 return -EROFS; 8491 return -EROFS;
8291 8492
8292 ret = __start_delalloc_inodes(root, delay_iput); 8493 ret = __start_delalloc_inodes(root, delay_iput);
@@ -8312,7 +8513,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
8312 struct list_head splice; 8513 struct list_head splice;
8313 int ret; 8514 int ret;
8314 8515
8315 if (fs_info->sb->s_flags & MS_RDONLY) 8516 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
8316 return -EROFS; 8517 return -EROFS;
8317 8518
8318 INIT_LIST_HEAD(&splice); 8519 INIT_LIST_HEAD(&splice);
@@ -8649,12 +8850,14 @@ static const struct inode_operations btrfs_dir_inode_operations = {
8649 .removexattr = btrfs_removexattr, 8850 .removexattr = btrfs_removexattr,
8650 .permission = btrfs_permission, 8851 .permission = btrfs_permission,
8651 .get_acl = btrfs_get_acl, 8852 .get_acl = btrfs_get_acl,
8853 .set_acl = btrfs_set_acl,
8652 .update_time = btrfs_update_time, 8854 .update_time = btrfs_update_time,
8653}; 8855};
8654static const struct inode_operations btrfs_dir_ro_inode_operations = { 8856static const struct inode_operations btrfs_dir_ro_inode_operations = {
8655 .lookup = btrfs_lookup, 8857 .lookup = btrfs_lookup,
8656 .permission = btrfs_permission, 8858 .permission = btrfs_permission,
8657 .get_acl = btrfs_get_acl, 8859 .get_acl = btrfs_get_acl,
8860 .set_acl = btrfs_set_acl,
8658 .update_time = btrfs_update_time, 8861 .update_time = btrfs_update_time,
8659}; 8862};
8660 8863
@@ -8724,6 +8927,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
8724 .permission = btrfs_permission, 8927 .permission = btrfs_permission,
8725 .fiemap = btrfs_fiemap, 8928 .fiemap = btrfs_fiemap,
8726 .get_acl = btrfs_get_acl, 8929 .get_acl = btrfs_get_acl,
8930 .set_acl = btrfs_set_acl,
8727 .update_time = btrfs_update_time, 8931 .update_time = btrfs_update_time,
8728}; 8932};
8729static const struct inode_operations btrfs_special_inode_operations = { 8933static const struct inode_operations btrfs_special_inode_operations = {
@@ -8735,6 +8939,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
8735 .listxattr = btrfs_listxattr, 8939 .listxattr = btrfs_listxattr,
8736 .removexattr = btrfs_removexattr, 8940 .removexattr = btrfs_removexattr,
8737 .get_acl = btrfs_get_acl, 8941 .get_acl = btrfs_get_acl,
8942 .set_acl = btrfs_set_acl,
8738 .update_time = btrfs_update_time, 8943 .update_time = btrfs_update_time,
8739}; 8944};
8740static const struct inode_operations btrfs_symlink_inode_operations = { 8945static const struct inode_operations btrfs_symlink_inode_operations = {
@@ -8748,7 +8953,6 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
8748 .getxattr = btrfs_getxattr, 8953 .getxattr = btrfs_getxattr,
8749 .listxattr = btrfs_listxattr, 8954 .listxattr = btrfs_listxattr,
8750 .removexattr = btrfs_removexattr, 8955 .removexattr = btrfs_removexattr,
8751 .get_acl = btrfs_get_acl,
8752 .update_time = btrfs_update_time, 8956 .update_time = btrfs_update_time,
8753}; 8957};
8754 8958
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 21da5762b0b1..a6d8efa46bfe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -56,6 +56,8 @@
56#include "rcu-string.h" 56#include "rcu-string.h"
57#include "send.h" 57#include "send.h"
58#include "dev-replace.h" 58#include "dev-replace.h"
59#include "props.h"
60#include "sysfs.h"
59 61
60static int btrfs_clone(struct inode *src, struct inode *inode, 62static int btrfs_clone(struct inode *src, struct inode *inode,
61 u64 off, u64 olen, u64 olen_aligned, u64 destoff); 63 u64 off, u64 olen, u64 olen_aligned, u64 destoff);
@@ -190,6 +192,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
190 unsigned int i_oldflags; 192 unsigned int i_oldflags;
191 umode_t mode; 193 umode_t mode;
192 194
195 if (!inode_owner_or_capable(inode))
196 return -EPERM;
197
193 if (btrfs_root_readonly(root)) 198 if (btrfs_root_readonly(root))
194 return -EROFS; 199 return -EROFS;
195 200
@@ -200,9 +205,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
200 if (ret) 205 if (ret)
201 return ret; 206 return ret;
202 207
203 if (!inode_owner_or_capable(inode))
204 return -EACCES;
205
206 ret = mnt_want_write_file(file); 208 ret = mnt_want_write_file(file);
207 if (ret) 209 if (ret)
208 return ret; 210 return ret;
@@ -280,9 +282,25 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
280 if (flags & FS_NOCOMP_FL) { 282 if (flags & FS_NOCOMP_FL) {
281 ip->flags &= ~BTRFS_INODE_COMPRESS; 283 ip->flags &= ~BTRFS_INODE_COMPRESS;
282 ip->flags |= BTRFS_INODE_NOCOMPRESS; 284 ip->flags |= BTRFS_INODE_NOCOMPRESS;
285
286 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
287 if (ret && ret != -ENODATA)
288 goto out_drop;
283 } else if (flags & FS_COMPR_FL) { 289 } else if (flags & FS_COMPR_FL) {
290 const char *comp;
291
284 ip->flags |= BTRFS_INODE_COMPRESS; 292 ip->flags |= BTRFS_INODE_COMPRESS;
285 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 293 ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
294
295 if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
296 comp = "lzo";
297 else
298 comp = "zlib";
299 ret = btrfs_set_prop(inode, "btrfs.compression",
300 comp, strlen(comp), 0);
301 if (ret)
302 goto out_drop;
303
286 } else { 304 } else {
287 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 305 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
288 } 306 }
@@ -392,6 +410,7 @@ static noinline int create_subvol(struct inode *dir,
392 struct btrfs_root *new_root; 410 struct btrfs_root *new_root;
393 struct btrfs_block_rsv block_rsv; 411 struct btrfs_block_rsv block_rsv;
394 struct timespec cur_time = CURRENT_TIME; 412 struct timespec cur_time = CURRENT_TIME;
413 struct inode *inode;
395 int ret; 414 int ret;
396 int err; 415 int err;
397 u64 objectid; 416 u64 objectid;
@@ -417,7 +436,9 @@ static noinline int create_subvol(struct inode *dir,
417 trans = btrfs_start_transaction(root, 0); 436 trans = btrfs_start_transaction(root, 0);
418 if (IS_ERR(trans)) { 437 if (IS_ERR(trans)) {
419 ret = PTR_ERR(trans); 438 ret = PTR_ERR(trans);
420 goto out; 439 btrfs_subvolume_release_metadata(root, &block_rsv,
440 qgroup_reserved);
441 return ret;
421 } 442 }
422 trans->block_rsv = &block_rsv; 443 trans->block_rsv = &block_rsv;
423 trans->bytes_reserved = block_rsv.size; 444 trans->bytes_reserved = block_rsv.size;
@@ -500,7 +521,7 @@ static noinline int create_subvol(struct inode *dir,
500 521
501 btrfs_record_root_in_trans(trans, new_root); 522 btrfs_record_root_in_trans(trans, new_root);
502 523
503 ret = btrfs_create_subvol_root(trans, new_root, new_dirid); 524 ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
504 if (ret) { 525 if (ret) {
505 /* We potentially lose an unused inode item here */ 526 /* We potentially lose an unused inode item here */
506 btrfs_abort_transaction(trans, root, ret); 527 btrfs_abort_transaction(trans, root, ret);
@@ -542,6 +563,8 @@ static noinline int create_subvol(struct inode *dir,
542fail: 563fail:
543 trans->block_rsv = NULL; 564 trans->block_rsv = NULL;
544 trans->bytes_reserved = 0; 565 trans->bytes_reserved = 0;
566 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
567
545 if (async_transid) { 568 if (async_transid) {
546 *async_transid = trans->transid; 569 *async_transid = trans->transid;
547 err = btrfs_commit_transaction_async(trans, root, 1); 570 err = btrfs_commit_transaction_async(trans, root, 1);
@@ -553,10 +576,12 @@ fail:
553 if (err && !ret) 576 if (err && !ret)
554 ret = err; 577 ret = err;
555 578
556 if (!ret) 579 if (!ret) {
557 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 580 inode = btrfs_lookup_dentry(dir, dentry);
558out: 581 if (IS_ERR(inode))
559 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); 582 return PTR_ERR(inode);
583 d_instantiate(dentry, inode);
584 }
560 return ret; 585 return ret;
561} 586}
562 587
@@ -642,7 +667,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
642 ret = PTR_ERR(inode); 667 ret = PTR_ERR(inode);
643 goto fail; 668 goto fail;
644 } 669 }
645 BUG_ON(!inode); 670
646 d_instantiate(dentry, inode); 671 d_instantiate(dentry, inode);
647 ret = 0; 672 ret = 0;
648fail: 673fail:
@@ -1011,7 +1036,7 @@ out:
1011static int cluster_pages_for_defrag(struct inode *inode, 1036static int cluster_pages_for_defrag(struct inode *inode,
1012 struct page **pages, 1037 struct page **pages,
1013 unsigned long start_index, 1038 unsigned long start_index,
1014 int num_pages) 1039 unsigned long num_pages)
1015{ 1040{
1016 unsigned long file_end; 1041 unsigned long file_end;
1017 u64 isize = i_size_read(inode); 1042 u64 isize = i_size_read(inode);
@@ -1169,8 +1194,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1169 int defrag_count = 0; 1194 int defrag_count = 0;
1170 int compress_type = BTRFS_COMPRESS_ZLIB; 1195 int compress_type = BTRFS_COMPRESS_ZLIB;
1171 int extent_thresh = range->extent_thresh; 1196 int extent_thresh = range->extent_thresh;
1172 int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; 1197 unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
1173 int cluster = max_cluster; 1198 unsigned long cluster = max_cluster;
1174 u64 new_align = ~((u64)128 * 1024 - 1); 1199 u64 new_align = ~((u64)128 * 1024 - 1);
1175 struct page **pages = NULL; 1200 struct page **pages = NULL;
1176 1201
@@ -1254,7 +1279,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1254 break; 1279 break;
1255 1280
1256 if (btrfs_defrag_cancelled(root->fs_info)) { 1281 if (btrfs_defrag_cancelled(root->fs_info)) {
1257 printk(KERN_DEBUG "btrfs: defrag_file cancelled\n"); 1282 printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n");
1258 ret = -EAGAIN; 1283 ret = -EAGAIN;
1259 break; 1284 break;
1260 } 1285 }
@@ -1416,20 +1441,20 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1416 ret = -EINVAL; 1441 ret = -EINVAL;
1417 goto out_free; 1442 goto out_free;
1418 } 1443 }
1419 printk(KERN_INFO "btrfs: resizing devid %llu\n", devid); 1444 btrfs_info(root->fs_info, "resizing devid %llu", devid);
1420 } 1445 }
1421 1446
1422 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1447 device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
1423 if (!device) { 1448 if (!device) {
1424 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1449 btrfs_info(root->fs_info, "resizer unable to find device %llu",
1425 devid); 1450 devid);
1426 ret = -ENODEV; 1451 ret = -ENODEV;
1427 goto out_free; 1452 goto out_free;
1428 } 1453 }
1429 1454
1430 if (!device->writeable) { 1455 if (!device->writeable) {
1431 printk(KERN_INFO "btrfs: resizer unable to apply on " 1456 btrfs_info(root->fs_info,
1432 "readonly device %llu\n", 1457 "resizer unable to apply on readonly device %llu",
1433 devid); 1458 devid);
1434 ret = -EPERM; 1459 ret = -EPERM;
1435 goto out_free; 1460 goto out_free;
@@ -1466,6 +1491,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1466 } 1491 }
1467 new_size = old_size - new_size; 1492 new_size = old_size - new_size;
1468 } else if (mod > 0) { 1493 } else if (mod > 0) {
1494 if (new_size > ULLONG_MAX - old_size) {
1495 ret = -EINVAL;
1496 goto out_free;
1497 }
1469 new_size = old_size + new_size; 1498 new_size = old_size + new_size;
1470 } 1499 }
1471 1500
@@ -1481,7 +1510,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1481 do_div(new_size, root->sectorsize); 1510 do_div(new_size, root->sectorsize);
1482 new_size *= root->sectorsize; 1511 new_size *= root->sectorsize;
1483 1512
1484 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", 1513 printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
1485 rcu_str_deref(device->name), new_size); 1514 rcu_str_deref(device->name), new_size);
1486 1515
1487 if (new_size > old_size) { 1516 if (new_size > old_size) {
@@ -1542,9 +1571,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1542 1571
1543 src_inode = file_inode(src.file); 1572 src_inode = file_inode(src.file);
1544 if (src_inode->i_sb != file_inode(file)->i_sb) { 1573 if (src_inode->i_sb != file_inode(file)->i_sb) {
1545 printk(KERN_INFO "btrfs: Snapshot src from " 1574 btrfs_info(BTRFS_I(src_inode)->root->fs_info,
1546 "another FS\n"); 1575 "Snapshot src from another FS");
1547 ret = -EINVAL; 1576 ret = -EINVAL;
1577 } else if (!inode_owner_or_capable(src_inode)) {
1578 /*
1579 * Subvolume creation is not restricted, but snapshots
1580 * are limited to own subvolumes only
1581 */
1582 ret = -EPERM;
1548 } else { 1583 } else {
1549 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1584 ret = btrfs_mksubvol(&file->f_path, name, namelen,
1550 BTRFS_I(src_inode)->root, 1585 BTRFS_I(src_inode)->root,
@@ -1662,6 +1697,9 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1662 u64 flags; 1697 u64 flags;
1663 int ret = 0; 1698 int ret = 0;
1664 1699
1700 if (!inode_owner_or_capable(inode))
1701 return -EPERM;
1702
1665 ret = mnt_want_write_file(file); 1703 ret = mnt_want_write_file(file);
1666 if (ret) 1704 if (ret)
1667 goto out; 1705 goto out;
@@ -1686,11 +1724,6 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1686 goto out_drop_write; 1724 goto out_drop_write;
1687 } 1725 }
1688 1726
1689 if (!inode_owner_or_capable(inode)) {
1690 ret = -EACCES;
1691 goto out_drop_write;
1692 }
1693
1694 down_write(&root->fs_info->subvol_sem); 1727 down_write(&root->fs_info->subvol_sem);
1695 1728
1696 /* nothing to do */ 1729 /* nothing to do */
@@ -1698,12 +1731,28 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1698 goto out_drop_sem; 1731 goto out_drop_sem;
1699 1732
1700 root_flags = btrfs_root_flags(&root->root_item); 1733 root_flags = btrfs_root_flags(&root->root_item);
1701 if (flags & BTRFS_SUBVOL_RDONLY) 1734 if (flags & BTRFS_SUBVOL_RDONLY) {
1702 btrfs_set_root_flags(&root->root_item, 1735 btrfs_set_root_flags(&root->root_item,
1703 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1736 root_flags | BTRFS_ROOT_SUBVOL_RDONLY);
1704 else 1737 } else {
1705 btrfs_set_root_flags(&root->root_item, 1738 /*
1739 * Block RO -> RW transition if this subvolume is involved in
1740 * send
1741 */
1742 spin_lock(&root->root_item_lock);
1743 if (root->send_in_progress == 0) {
1744 btrfs_set_root_flags(&root->root_item,
1706 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1745 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
1746 spin_unlock(&root->root_item_lock);
1747 } else {
1748 spin_unlock(&root->root_item_lock);
1749 btrfs_warn(root->fs_info,
1750 "Attempt to set subvolume %llu read-write during send",
1751 root->root_key.objectid);
1752 ret = -EPERM;
1753 goto out_drop_sem;
1754 }
1755 }
1707 1756
1708 trans = btrfs_start_transaction(root, 1); 1757 trans = btrfs_start_transaction(root, 1);
1709 if (IS_ERR(trans)) { 1758 if (IS_ERR(trans)) {
@@ -1910,7 +1959,7 @@ static noinline int search_ioctl(struct inode *inode,
1910 key.offset = (u64)-1; 1959 key.offset = (u64)-1;
1911 root = btrfs_read_fs_root_no_name(info, &key); 1960 root = btrfs_read_fs_root_no_name(info, &key);
1912 if (IS_ERR(root)) { 1961 if (IS_ERR(root)) {
1913 printk(KERN_ERR "could not find root %llu\n", 1962 printk(KERN_ERR "BTRFS: could not find root %llu\n",
1914 sk->tree_id); 1963 sk->tree_id);
1915 btrfs_free_path(path); 1964 btrfs_free_path(path);
1916 return -ENOENT; 1965 return -ENOENT;
@@ -2000,7 +2049,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
2000 key.offset = (u64)-1; 2049 key.offset = (u64)-1;
2001 root = btrfs_read_fs_root_no_name(info, &key); 2050 root = btrfs_read_fs_root_no_name(info, &key);
2002 if (IS_ERR(root)) { 2051 if (IS_ERR(root)) {
2003 printk(KERN_ERR "could not find root %llu\n", tree_id); 2052 printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id);
2004 ret = -ENOENT; 2053 ret = -ENOENT;
2005 goto out; 2054 goto out;
2006 } 2055 }
@@ -2686,14 +2735,11 @@ out_unlock:
2686#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) 2735#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
2687 2736
2688static long btrfs_ioctl_file_extent_same(struct file *file, 2737static long btrfs_ioctl_file_extent_same(struct file *file,
2689 void __user *argp) 2738 struct btrfs_ioctl_same_args __user *argp)
2690{ 2739{
2691 struct btrfs_ioctl_same_args tmp;
2692 struct btrfs_ioctl_same_args *same; 2740 struct btrfs_ioctl_same_args *same;
2693 struct btrfs_ioctl_same_extent_info *info; 2741 struct btrfs_ioctl_same_extent_info *info;
2694 struct inode *src = file->f_dentry->d_inode; 2742 struct inode *src = file_inode(file);
2695 struct file *dst_file = NULL;
2696 struct inode *dst;
2697 u64 off; 2743 u64 off;
2698 u64 len; 2744 u64 len;
2699 int i; 2745 int i;
@@ -2701,6 +2747,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2701 unsigned long size; 2747 unsigned long size;
2702 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 2748 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
2703 bool is_admin = capable(CAP_SYS_ADMIN); 2749 bool is_admin = capable(CAP_SYS_ADMIN);
2750 u16 count;
2704 2751
2705 if (!(file->f_mode & FMODE_READ)) 2752 if (!(file->f_mode & FMODE_READ))
2706 return -EINVAL; 2753 return -EINVAL;
@@ -2709,17 +2756,14 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2709 if (ret) 2756 if (ret)
2710 return ret; 2757 return ret;
2711 2758
2712 if (copy_from_user(&tmp, 2759 if (get_user(count, &argp->dest_count)) {
2713 (struct btrfs_ioctl_same_args __user *)argp,
2714 sizeof(tmp))) {
2715 ret = -EFAULT; 2760 ret = -EFAULT;
2716 goto out; 2761 goto out;
2717 } 2762 }
2718 2763
2719 size = sizeof(tmp) + 2764 size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
2720 tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
2721 2765
2722 same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size); 2766 same = memdup_user(argp, size);
2723 2767
2724 if (IS_ERR(same)) { 2768 if (IS_ERR(same)) {
2725 ret = PTR_ERR(same); 2769 ret = PTR_ERR(same);
@@ -2756,52 +2800,35 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2756 goto out; 2800 goto out;
2757 2801
2758 /* pre-format output fields to sane values */ 2802 /* pre-format output fields to sane values */
2759 for (i = 0; i < same->dest_count; i++) { 2803 for (i = 0; i < count; i++) {
2760 same->info[i].bytes_deduped = 0ULL; 2804 same->info[i].bytes_deduped = 0ULL;
2761 same->info[i].status = 0; 2805 same->info[i].status = 0;
2762 } 2806 }
2763 2807
2764 ret = 0; 2808 for (i = 0, info = same->info; i < count; i++, info++) {
2765 for (i = 0; i < same->dest_count; i++) { 2809 struct inode *dst;
2766 info = &same->info[i]; 2810 struct fd dst_file = fdget(info->fd);
2767 2811 if (!dst_file.file) {
2768 dst_file = fget(info->fd);
2769 if (!dst_file) {
2770 info->status = -EBADF; 2812 info->status = -EBADF;
2771 goto next; 2813 continue;
2772 } 2814 }
2815 dst = file_inode(dst_file.file);
2773 2816
2774 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { 2817 if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
2775 info->status = -EINVAL; 2818 info->status = -EINVAL;
2776 goto next; 2819 } else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
2777 } 2820 info->status = -EXDEV;
2778 2821 } else if (S_ISDIR(dst->i_mode)) {
2779 info->status = -EXDEV;
2780 if (file->f_path.mnt != dst_file->f_path.mnt)
2781 goto next;
2782
2783 dst = dst_file->f_dentry->d_inode;
2784 if (src->i_sb != dst->i_sb)
2785 goto next;
2786
2787 if (S_ISDIR(dst->i_mode)) {
2788 info->status = -EISDIR; 2822 info->status = -EISDIR;
2789 goto next; 2823 } else if (!S_ISREG(dst->i_mode)) {
2790 }
2791
2792 if (!S_ISREG(dst->i_mode)) {
2793 info->status = -EACCES; 2824 info->status = -EACCES;
2794 goto next; 2825 } else {
2826 info->status = btrfs_extent_same(src, off, len, dst,
2827 info->logical_offset);
2828 if (info->status == 0)
2829 info->bytes_deduped += len;
2795 } 2830 }
2796 2831 fdput(dst_file);
2797 info->status = btrfs_extent_same(src, off, len, dst,
2798 info->logical_offset);
2799 if (info->status == 0)
2800 info->bytes_deduped += len;
2801
2802next:
2803 if (dst_file)
2804 fput(dst_file);
2805 } 2832 }
2806 2833
2807 ret = copy_to_user(argp, same, size); 2834 ret = copy_to_user(argp, same, size);
@@ -2860,12 +2887,14 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2860 * note the key will change type as we walk through the 2887 * note the key will change type as we walk through the
2861 * tree. 2888 * tree.
2862 */ 2889 */
2890 path->leave_spinning = 1;
2863 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 2891 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
2864 0, 0); 2892 0, 0);
2865 if (ret < 0) 2893 if (ret < 0)
2866 goto out; 2894 goto out;
2867 2895
2868 nritems = btrfs_header_nritems(path->nodes[0]); 2896 nritems = btrfs_header_nritems(path->nodes[0]);
2897process_slot:
2869 if (path->slots[0] >= nritems) { 2898 if (path->slots[0] >= nritems) {
2870 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 2899 ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
2871 if (ret < 0) 2900 if (ret < 0)
@@ -2892,11 +2921,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2892 u8 comp; 2921 u8 comp;
2893 u64 endoff; 2922 u64 endoff;
2894 2923
2895 size = btrfs_item_size_nr(leaf, slot);
2896 read_extent_buffer(leaf, buf,
2897 btrfs_item_ptr_offset(leaf, slot),
2898 size);
2899
2900 extent = btrfs_item_ptr(leaf, slot, 2924 extent = btrfs_item_ptr(leaf, slot,
2901 struct btrfs_file_extent_item); 2925 struct btrfs_file_extent_item);
2902 comp = btrfs_file_extent_compression(leaf, extent); 2926 comp = btrfs_file_extent_compression(leaf, extent);
@@ -2915,11 +2939,20 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
2915 datal = btrfs_file_extent_ram_bytes(leaf, 2939 datal = btrfs_file_extent_ram_bytes(leaf,
2916 extent); 2940 extent);
2917 } 2941 }
2918 btrfs_release_path(path);
2919 2942
2920 if (key.offset + datal <= off || 2943 if (key.offset + datal <= off ||
2921 key.offset >= off + len - 1) 2944 key.offset >= off + len - 1) {
2922 goto next; 2945 path->slots[0]++;
2946 goto process_slot;
2947 }
2948
2949 size = btrfs_item_size_nr(leaf, slot);
2950 read_extent_buffer(leaf, buf,
2951 btrfs_item_ptr_offset(leaf, slot),
2952 size);
2953
2954 btrfs_release_path(path);
2955 path->leave_spinning = 0;
2923 2956
2924 memcpy(&new_key, &key, sizeof(new_key)); 2957 memcpy(&new_key, &key, sizeof(new_key));
2925 new_key.objectid = btrfs_ino(inode); 2958 new_key.objectid = btrfs_ino(inode);
@@ -3090,7 +3123,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
3090 } 3123 }
3091 ret = btrfs_end_transaction(trans, root); 3124 ret = btrfs_end_transaction(trans, root);
3092 } 3125 }
3093next:
3094 btrfs_release_path(path); 3126 btrfs_release_path(path);
3095 key.offset++; 3127 key.offset++;
3096 } 3128 }
@@ -3218,9 +3250,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
3218 3250
3219 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 3251 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
3220out_unlock: 3252out_unlock:
3221 mutex_unlock(&src->i_mutex); 3253 if (!same_inode) {
3222 if (!same_inode) 3254 if (inode < src) {
3223 mutex_unlock(&inode->i_mutex); 3255 mutex_unlock(&src->i_mutex);
3256 mutex_unlock(&inode->i_mutex);
3257 } else {
3258 mutex_unlock(&inode->i_mutex);
3259 mutex_unlock(&src->i_mutex);
3260 }
3261 } else {
3262 mutex_unlock(&src->i_mutex);
3263 }
3224out_fput: 3264out_fput:
3225 fdput(src_file); 3265 fdput(src_file);
3226out_drop_write: 3266out_drop_write:
@@ -3343,8 +3383,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
3343 if (IS_ERR_OR_NULL(di)) { 3383 if (IS_ERR_OR_NULL(di)) {
3344 btrfs_free_path(path); 3384 btrfs_free_path(path);
3345 btrfs_end_transaction(trans, root); 3385 btrfs_end_transaction(trans, root);
3346 printk(KERN_ERR "Umm, you don't have the default dir item, " 3386 btrfs_err(new_root->fs_info, "Umm, you don't have the default dir"
3347 "this isn't going to work\n"); 3387 "item, this isn't going to work");
3348 ret = -ENOENT; 3388 ret = -ENOENT;
3349 goto out; 3389 goto out;
3350 } 3390 }
@@ -4325,6 +4365,9 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
4325 int ret = 0; 4365 int ret = 0;
4326 int received_uuid_changed; 4366 int received_uuid_changed;
4327 4367
4368 if (!inode_owner_or_capable(inode))
4369 return -EPERM;
4370
4328 ret = mnt_want_write_file(file); 4371 ret = mnt_want_write_file(file);
4329 if (ret < 0) 4372 if (ret < 0)
4330 return ret; 4373 return ret;
@@ -4341,11 +4384,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
4341 goto out; 4384 goto out;
4342 } 4385 }
4343 4386
4344 if (!inode_owner_or_capable(inode)) {
4345 ret = -EACCES;
4346 goto out;
4347 }
4348
4349 sa = memdup_user(arg, sizeof(*sa)); 4387 sa = memdup_user(arg, sizeof(*sa));
4350 if (IS_ERR(sa)) { 4388 if (IS_ERR(sa)) {
4351 ret = PTR_ERR(sa); 4389 ret = PTR_ERR(sa);
@@ -4431,8 +4469,8 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
4431 len = strnlen(label, BTRFS_LABEL_SIZE); 4469 len = strnlen(label, BTRFS_LABEL_SIZE);
4432 4470
4433 if (len == BTRFS_LABEL_SIZE) { 4471 if (len == BTRFS_LABEL_SIZE) {
4434 pr_warn("btrfs: label is too long, return the first %zu bytes\n", 4472 btrfs_warn(root->fs_info,
4435 --len); 4473 "label is too long, return the first %zu bytes", --len);
4436 } 4474 }
4437 4475
4438 ret = copy_to_user(arg, label, len); 4476 ret = copy_to_user(arg, label, len);
@@ -4455,7 +4493,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
4455 return -EFAULT; 4493 return -EFAULT;
4456 4494
4457 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { 4495 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
4458 pr_err("btrfs: unable to set label with more than %d bytes\n", 4496 btrfs_err(root->fs_info, "unable to set label with more than %d bytes",
4459 BTRFS_LABEL_SIZE - 1); 4497 BTRFS_LABEL_SIZE - 1);
4460 return -EINVAL; 4498 return -EINVAL;
4461 } 4499 }
@@ -4473,13 +4511,173 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
4473 spin_lock(&root->fs_info->super_lock); 4511 spin_lock(&root->fs_info->super_lock);
4474 strcpy(super_block->label, label); 4512 strcpy(super_block->label, label);
4475 spin_unlock(&root->fs_info->super_lock); 4513 spin_unlock(&root->fs_info->super_lock);
4476 ret = btrfs_end_transaction(trans, root); 4514 ret = btrfs_commit_transaction(trans, root);
4477 4515
4478out_unlock: 4516out_unlock:
4479 mnt_drop_write_file(file); 4517 mnt_drop_write_file(file);
4480 return ret; 4518 return ret;
4481} 4519}
4482 4520
4521#define INIT_FEATURE_FLAGS(suffix) \
4522 { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \
4523 .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
4524 .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
4525
4526static int btrfs_ioctl_get_supported_features(struct file *file,
4527 void __user *arg)
4528{
4529 static struct btrfs_ioctl_feature_flags features[3] = {
4530 INIT_FEATURE_FLAGS(SUPP),
4531 INIT_FEATURE_FLAGS(SAFE_SET),
4532 INIT_FEATURE_FLAGS(SAFE_CLEAR)
4533 };
4534
4535 if (copy_to_user(arg, &features, sizeof(features)))
4536 return -EFAULT;
4537
4538 return 0;
4539}
4540
4541static int btrfs_ioctl_get_features(struct file *file, void __user *arg)
4542{
4543 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
4544 struct btrfs_super_block *super_block = root->fs_info->super_copy;
4545 struct btrfs_ioctl_feature_flags features;
4546
4547 features.compat_flags = btrfs_super_compat_flags(super_block);
4548 features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block);
4549 features.incompat_flags = btrfs_super_incompat_flags(super_block);
4550
4551 if (copy_to_user(arg, &features, sizeof(features)))
4552 return -EFAULT;
4553
4554 return 0;
4555}
4556
4557static int check_feature_bits(struct btrfs_root *root,
4558 enum btrfs_feature_set set,
4559 u64 change_mask, u64 flags, u64 supported_flags,
4560 u64 safe_set, u64 safe_clear)
4561{
4562 const char *type = btrfs_feature_set_names[set];
4563 char *names;
4564 u64 disallowed, unsupported;
4565 u64 set_mask = flags & change_mask;
4566 u64 clear_mask = ~flags & change_mask;
4567
4568 unsupported = set_mask & ~supported_flags;
4569 if (unsupported) {
4570 names = btrfs_printable_features(set, unsupported);
4571 if (names) {
4572 btrfs_warn(root->fs_info,
4573 "this kernel does not support the %s feature bit%s",
4574 names, strchr(names, ',') ? "s" : "");
4575 kfree(names);
4576 } else
4577 btrfs_warn(root->fs_info,
4578 "this kernel does not support %s bits 0x%llx",
4579 type, unsupported);
4580 return -EOPNOTSUPP;
4581 }
4582
4583 disallowed = set_mask & ~safe_set;
4584 if (disallowed) {
4585 names = btrfs_printable_features(set, disallowed);
4586 if (names) {
4587 btrfs_warn(root->fs_info,
4588 "can't set the %s feature bit%s while mounted",
4589 names, strchr(names, ',') ? "s" : "");
4590 kfree(names);
4591 } else
4592 btrfs_warn(root->fs_info,
4593 "can't set %s bits 0x%llx while mounted",
4594 type, disallowed);
4595 return -EPERM;
4596 }
4597
4598 disallowed = clear_mask & ~safe_clear;
4599 if (disallowed) {
4600 names = btrfs_printable_features(set, disallowed);
4601 if (names) {
4602 btrfs_warn(root->fs_info,
4603 "can't clear the %s feature bit%s while mounted",
4604 names, strchr(names, ',') ? "s" : "");
4605 kfree(names);
4606 } else
4607 btrfs_warn(root->fs_info,
4608 "can't clear %s bits 0x%llx while mounted",
4609 type, disallowed);
4610 return -EPERM;
4611 }
4612
4613 return 0;
4614}
4615
4616#define check_feature(root, change_mask, flags, mask_base) \
4617check_feature_bits(root, FEAT_##mask_base, change_mask, flags, \
4618 BTRFS_FEATURE_ ## mask_base ## _SUPP, \
4619 BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \
4620 BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR)
4621
4622static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
4623{
4624 struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
4625 struct btrfs_super_block *super_block = root->fs_info->super_copy;
4626 struct btrfs_ioctl_feature_flags flags[2];
4627 struct btrfs_trans_handle *trans;
4628 u64 newflags;
4629 int ret;
4630
4631 if (!capable(CAP_SYS_ADMIN))
4632 return -EPERM;
4633
4634 if (copy_from_user(flags, arg, sizeof(flags)))
4635 return -EFAULT;
4636
4637 /* Nothing to do */
4638 if (!flags[0].compat_flags && !flags[0].compat_ro_flags &&
4639 !flags[0].incompat_flags)
4640 return 0;
4641
4642 ret = check_feature(root, flags[0].compat_flags,
4643 flags[1].compat_flags, COMPAT);
4644 if (ret)
4645 return ret;
4646
4647 ret = check_feature(root, flags[0].compat_ro_flags,
4648 flags[1].compat_ro_flags, COMPAT_RO);
4649 if (ret)
4650 return ret;
4651
4652 ret = check_feature(root, flags[0].incompat_flags,
4653 flags[1].incompat_flags, INCOMPAT);
4654 if (ret)
4655 return ret;
4656
4657 trans = btrfs_start_transaction(root, 0);
4658 if (IS_ERR(trans))
4659 return PTR_ERR(trans);
4660
4661 spin_lock(&root->fs_info->super_lock);
4662 newflags = btrfs_super_compat_flags(super_block);
4663 newflags |= flags[0].compat_flags & flags[1].compat_flags;
4664 newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags);
4665 btrfs_set_super_compat_flags(super_block, newflags);
4666
4667 newflags = btrfs_super_compat_ro_flags(super_block);
4668 newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags;
4669 newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags);
4670 btrfs_set_super_compat_ro_flags(super_block, newflags);
4671
4672 newflags = btrfs_super_incompat_flags(super_block);
4673 newflags |= flags[0].incompat_flags & flags[1].incompat_flags;
4674 newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags);
4675 btrfs_set_super_incompat_flags(super_block, newflags);
4676 spin_unlock(&root->fs_info->super_lock);
4677
4678 return btrfs_commit_transaction(trans, root);
4679}
4680
4483long btrfs_ioctl(struct file *file, unsigned int 4681long btrfs_ioctl(struct file *file, unsigned int
4484 cmd, unsigned long arg) 4682 cmd, unsigned long arg)
4485{ 4683{
@@ -4598,6 +4796,12 @@ long btrfs_ioctl(struct file *file, unsigned int
4598 return btrfs_ioctl_set_fslabel(file, argp); 4796 return btrfs_ioctl_set_fslabel(file, argp);
4599 case BTRFS_IOC_FILE_EXTENT_SAME: 4797 case BTRFS_IOC_FILE_EXTENT_SAME:
4600 return btrfs_ioctl_file_extent_same(file, argp); 4798 return btrfs_ioctl_file_extent_same(file, argp);
4799 case BTRFS_IOC_GET_SUPPORTED_FEATURES:
4800 return btrfs_ioctl_get_supported_features(file, argp);
4801 case BTRFS_IOC_GET_FEATURES:
4802 return btrfs_ioctl_get_features(file, argp);
4803 case BTRFS_IOC_SET_FEATURES:
4804 return btrfs_ioctl_set_features(file, argp);
4601 } 4805 }
4602 4806
4603 return -ENOTTY; 4807 return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index b6a6f07c5ce2..b47f669aca75 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -141,7 +141,7 @@ static int lzo_compress_pages(struct list_head *ws,
141 ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf, 141 ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
142 &out_len, workspace->mem); 142 &out_len, workspace->mem);
143 if (ret != LZO_E_OK) { 143 if (ret != LZO_E_OK) {
144 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", 144 printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
145 ret); 145 ret);
146 ret = -1; 146 ret = -1;
147 goto out; 147 goto out;
@@ -357,7 +357,7 @@ cont:
357 if (need_unmap) 357 if (need_unmap)
358 kunmap(pages_in[page_in_index - 1]); 358 kunmap(pages_in[page_in_index - 1]);
359 if (ret != LZO_E_OK) { 359 if (ret != LZO_E_OK) {
360 printk(KERN_WARNING "btrfs decompress failed\n"); 360 printk(KERN_WARNING "BTRFS: decompress failed\n");
361 ret = -1; 361 ret = -1;
362 break; 362 break;
363 } 363 }
@@ -401,7 +401,7 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
401 out_len = PAGE_CACHE_SIZE; 401 out_len = PAGE_CACHE_SIZE;
402 ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len); 402 ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
403 if (ret != LZO_E_OK) { 403 if (ret != LZO_E_OK) {
404 printk(KERN_WARNING "btrfs decompress failed!\n"); 404 printk(KERN_WARNING "BTRFS: decompress failed!\n");
405 ret = -1; 405 ret = -1;
406 goto out; 406 goto out;
407 } 407 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 69582d5b69d1..b16450b840e7 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -336,13 +336,14 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
336 entry->len); 336 entry->len);
337 *file_offset = dec_end; 337 *file_offset = dec_end;
338 if (dec_start > dec_end) { 338 if (dec_start > dec_end) {
339 printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n", 339 btrfs_crit(BTRFS_I(inode)->root->fs_info,
340 dec_start, dec_end); 340 "bad ordering dec_start %llu end %llu", dec_start, dec_end);
341 } 341 }
342 to_dec = dec_end - dec_start; 342 to_dec = dec_end - dec_start;
343 if (to_dec > entry->bytes_left) { 343 if (to_dec > entry->bytes_left) {
344 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", 344 btrfs_crit(BTRFS_I(inode)->root->fs_info,
345 entry->bytes_left, to_dec); 345 "bad ordered accounting left %llu size %llu",
346 entry->bytes_left, to_dec);
346 } 347 }
347 entry->bytes_left -= to_dec; 348 entry->bytes_left -= to_dec;
348 if (!uptodate) 349 if (!uptodate)
@@ -401,7 +402,8 @@ have_entry:
401 } 402 }
402 403
403 if (io_size > entry->bytes_left) { 404 if (io_size > entry->bytes_left) {
404 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", 405 btrfs_crit(BTRFS_I(inode)->root->fs_info,
406 "bad ordered accounting left %llu size %llu",
405 entry->bytes_left, io_size); 407 entry->bytes_left, io_size);
406 } 408 }
407 entry->bytes_left -= io_size; 409 entry->bytes_left -= io_size;
@@ -520,7 +522,8 @@ void btrfs_remove_ordered_extent(struct inode *inode,
520 spin_lock_irq(&tree->lock); 522 spin_lock_irq(&tree->lock);
521 node = &entry->rb_node; 523 node = &entry->rb_node;
522 rb_erase(node, &tree->tree); 524 rb_erase(node, &tree->tree);
523 tree->last = NULL; 525 if (tree->last == node)
526 tree->last = NULL;
524 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 527 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
525 spin_unlock_irq(&tree->lock); 528 spin_unlock_irq(&tree->lock);
526 529
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 24cad1695af7..65793edb38ca 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -69,23 +69,3 @@ out:
69 btrfs_free_path(path); 69 btrfs_free_path(path);
70 return ret; 70 return ret;
71} 71}
72
73int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
74{
75 struct btrfs_path *path;
76 struct btrfs_key key;
77 int ret;
78
79 key.objectid = BTRFS_ORPHAN_OBJECTID;
80 key.type = BTRFS_ORPHAN_ITEM_KEY;
81 key.offset = offset;
82
83 path = btrfs_alloc_path();
84 if (!path)
85 return -ENOMEM;
86
87 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
88
89 btrfs_free_path(path);
90 return ret;
91}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 417053b17181..6efd70d3b64f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -154,7 +154,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
154 u32 item_size) 154 u32 item_size)
155{ 155{
156 if (!IS_ALIGNED(item_size, sizeof(u64))) { 156 if (!IS_ALIGNED(item_size, sizeof(u64))) {
157 pr_warn("btrfs: uuid item with illegal size %lu!\n", 157 pr_warn("BTRFS: uuid item with illegal size %lu!\n",
158 (unsigned long)item_size); 158 (unsigned long)item_size);
159 return; 159 return;
160 } 160 }
@@ -249,7 +249,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
249 BTRFS_FILE_EXTENT_INLINE) { 249 BTRFS_FILE_EXTENT_INLINE) {
250 printk(KERN_INFO "\t\tinline extent data " 250 printk(KERN_INFO "\t\tinline extent data "
251 "size %u\n", 251 "size %u\n",
252 btrfs_file_extent_inline_len(l, fi)); 252 btrfs_file_extent_inline_len(l, i, fi));
253 break; 253 break;
254 } 254 }
255 printk(KERN_INFO "\t\textent data disk bytenr %llu " 255 printk(KERN_INFO "\t\textent data disk bytenr %llu "
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
new file mode 100644
index 000000000000..129b1dd28527
--- /dev/null
+++ b/fs/btrfs/props.c
@@ -0,0 +1,427 @@
1/*
2 * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/hashtable.h>
20#include "props.h"
21#include "btrfs_inode.h"
22#include "hash.h"
23#include "transaction.h"
24#include "xattr.h"
25
26#define BTRFS_PROP_HANDLERS_HT_BITS 8
27static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
28
29struct prop_handler {
30 struct hlist_node node;
31 const char *xattr_name;
32 int (*validate)(const char *value, size_t len);
33 int (*apply)(struct inode *inode, const char *value, size_t len);
34 const char *(*extract)(struct inode *inode);
35 int inheritable;
36};
37
38static int prop_compression_validate(const char *value, size_t len);
39static int prop_compression_apply(struct inode *inode,
40 const char *value,
41 size_t len);
42static const char *prop_compression_extract(struct inode *inode);
43
44static struct prop_handler prop_handlers[] = {
45 {
46 .xattr_name = XATTR_BTRFS_PREFIX "compression",
47 .validate = prop_compression_validate,
48 .apply = prop_compression_apply,
49 .extract = prop_compression_extract,
50 .inheritable = 1
51 },
52 {
53 .xattr_name = NULL
54 }
55};
56
57void __init btrfs_props_init(void)
58{
59 struct prop_handler *p;
60
61 hash_init(prop_handlers_ht);
62
63 for (p = &prop_handlers[0]; p->xattr_name; p++) {
64 u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
65
66 hash_add(prop_handlers_ht, &p->node, h);
67 }
68}
69
70static const struct hlist_head *find_prop_handlers_by_hash(const u64 hash)
71{
72 struct hlist_head *h;
73
74 h = &prop_handlers_ht[hash_min(hash, BTRFS_PROP_HANDLERS_HT_BITS)];
75 if (hlist_empty(h))
76 return NULL;
77
78 return h;
79}
80
81static const struct prop_handler *
82find_prop_handler(const char *name,
83 const struct hlist_head *handlers)
84{
85 struct prop_handler *h;
86
87 if (!handlers) {
88 u64 hash = btrfs_name_hash(name, strlen(name));
89
90 handlers = find_prop_handlers_by_hash(hash);
91 if (!handlers)
92 return NULL;
93 }
94
95 hlist_for_each_entry(h, handlers, node)
96 if (!strcmp(h->xattr_name, name))
97 return h;
98
99 return NULL;
100}
101
102static int __btrfs_set_prop(struct btrfs_trans_handle *trans,
103 struct inode *inode,
104 const char *name,
105 const char *value,
106 size_t value_len,
107 int flags)
108{
109 const struct prop_handler *handler;
110 int ret;
111
112 if (strlen(name) <= XATTR_BTRFS_PREFIX_LEN)
113 return -EINVAL;
114
115 handler = find_prop_handler(name, NULL);
116 if (!handler)
117 return -EINVAL;
118
119 if (value_len == 0) {
120 ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
121 NULL, 0, flags);
122 if (ret)
123 return ret;
124
125 ret = handler->apply(inode, NULL, 0);
126 ASSERT(ret == 0);
127
128 return ret;
129 }
130
131 ret = handler->validate(value, value_len);
132 if (ret)
133 return ret;
134 ret = __btrfs_setxattr(trans, inode, handler->xattr_name,
135 value, value_len, flags);
136 if (ret)
137 return ret;
138 ret = handler->apply(inode, value, value_len);
139 if (ret) {
140 __btrfs_setxattr(trans, inode, handler->xattr_name,
141 NULL, 0, flags);
142 return ret;
143 }
144
145 set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
146
147 return 0;
148}
149
150int btrfs_set_prop(struct inode *inode,
151 const char *name,
152 const char *value,
153 size_t value_len,
154 int flags)
155{
156 return __btrfs_set_prop(NULL, inode, name, value, value_len, flags);
157}
158
159static int iterate_object_props(struct btrfs_root *root,
160 struct btrfs_path *path,
161 u64 objectid,
162 void (*iterator)(void *,
163 const struct prop_handler *,
164 const char *,
165 size_t),
166 void *ctx)
167{
168 int ret;
169 char *name_buf = NULL;
170 char *value_buf = NULL;
171 int name_buf_len = 0;
172 int value_buf_len = 0;
173
174 while (1) {
175 struct btrfs_key key;
176 struct btrfs_dir_item *di;
177 struct extent_buffer *leaf;
178 u32 total_len, cur, this_len;
179 int slot;
180 const struct hlist_head *handlers;
181
182 slot = path->slots[0];
183 leaf = path->nodes[0];
184
185 if (slot >= btrfs_header_nritems(leaf)) {
186 ret = btrfs_next_leaf(root, path);
187 if (ret < 0)
188 goto out;
189 else if (ret > 0)
190 break;
191 continue;
192 }
193
194 btrfs_item_key_to_cpu(leaf, &key, slot);
195 if (key.objectid != objectid)
196 break;
197 if (key.type != BTRFS_XATTR_ITEM_KEY)
198 break;
199
200 handlers = find_prop_handlers_by_hash(key.offset);
201 if (!handlers)
202 goto next_slot;
203
204 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
205 cur = 0;
206 total_len = btrfs_item_size_nr(leaf, slot);
207
208 while (cur < total_len) {
209 u32 name_len = btrfs_dir_name_len(leaf, di);
210 u32 data_len = btrfs_dir_data_len(leaf, di);
211 unsigned long name_ptr, data_ptr;
212 const struct prop_handler *handler;
213
214 this_len = sizeof(*di) + name_len + data_len;
215 name_ptr = (unsigned long)(di + 1);
216 data_ptr = name_ptr + name_len;
217
218 if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
219 memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
220 name_ptr,
221 XATTR_BTRFS_PREFIX_LEN))
222 goto next_dir_item;
223
224 if (name_len >= name_buf_len) {
225 kfree(name_buf);
226 name_buf_len = name_len + 1;
227 name_buf = kmalloc(name_buf_len, GFP_NOFS);
228 if (!name_buf) {
229 ret = -ENOMEM;
230 goto out;
231 }
232 }
233 read_extent_buffer(leaf, name_buf, name_ptr, name_len);
234 name_buf[name_len] = '\0';
235
236 handler = find_prop_handler(name_buf, handlers);
237 if (!handler)
238 goto next_dir_item;
239
240 if (data_len > value_buf_len) {
241 kfree(value_buf);
242 value_buf_len = data_len;
243 value_buf = kmalloc(data_len, GFP_NOFS);
244 if (!value_buf) {
245 ret = -ENOMEM;
246 goto out;
247 }
248 }
249 read_extent_buffer(leaf, value_buf, data_ptr, data_len);
250
251 iterator(ctx, handler, value_buf, data_len);
252next_dir_item:
253 cur += this_len;
254 di = (struct btrfs_dir_item *)((char *) di + this_len);
255 }
256
257next_slot:
258 path->slots[0]++;
259 }
260
261 ret = 0;
262out:
263 btrfs_release_path(path);
264 kfree(name_buf);
265 kfree(value_buf);
266
267 return ret;
268}
269
270static void inode_prop_iterator(void *ctx,
271 const struct prop_handler *handler,
272 const char *value,
273 size_t len)
274{
275 struct inode *inode = ctx;
276 struct btrfs_root *root = BTRFS_I(inode)->root;
277 int ret;
278
279 ret = handler->apply(inode, value, len);
280 if (unlikely(ret))
281 btrfs_warn(root->fs_info,
282 "error applying prop %s to ino %llu (root %llu): %d",
283 handler->xattr_name, btrfs_ino(inode),
284 root->root_key.objectid, ret);
285 else
286 set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
287}
288
289int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
290{
291 struct btrfs_root *root = BTRFS_I(inode)->root;
292 u64 ino = btrfs_ino(inode);
293 int ret;
294
295 ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
296
297 return ret;
298}
299
300static int inherit_props(struct btrfs_trans_handle *trans,
301 struct inode *inode,
302 struct inode *parent)
303{
304 const struct prop_handler *h;
305 struct btrfs_root *root = BTRFS_I(inode)->root;
306 int ret;
307
308 if (!test_bit(BTRFS_INODE_HAS_PROPS,
309 &BTRFS_I(parent)->runtime_flags))
310 return 0;
311
312 for (h = &prop_handlers[0]; h->xattr_name; h++) {
313 const char *value;
314 u64 num_bytes;
315
316 if (!h->inheritable)
317 continue;
318
319 value = h->extract(parent);
320 if (!value)
321 continue;
322
323 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
324 ret = btrfs_block_rsv_add(root, trans->block_rsv,
325 num_bytes, BTRFS_RESERVE_NO_FLUSH);
326 if (ret)
327 goto out;
328 ret = __btrfs_set_prop(trans, inode, h->xattr_name,
329 value, strlen(value), 0);
330 btrfs_block_rsv_release(root, trans->block_rsv, num_bytes);
331 if (ret)
332 goto out;
333 }
334 ret = 0;
335out:
336 return ret;
337}
338
339int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
340 struct inode *inode,
341 struct inode *dir)
342{
343 if (!dir)
344 return 0;
345
346 return inherit_props(trans, inode, dir);
347}
348
349int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
350 struct btrfs_root *root,
351 struct btrfs_root *parent_root)
352{
353 struct btrfs_key key;
354 struct inode *parent_inode, *child_inode;
355 int ret;
356
357 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
358 key.type = BTRFS_INODE_ITEM_KEY;
359 key.offset = 0;
360
361 parent_inode = btrfs_iget(parent_root->fs_info->sb, &key,
362 parent_root, NULL);
363 if (IS_ERR(parent_inode))
364 return PTR_ERR(parent_inode);
365
366 child_inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
367 if (IS_ERR(child_inode)) {
368 iput(parent_inode);
369 return PTR_ERR(child_inode);
370 }
371
372 ret = inherit_props(trans, child_inode, parent_inode);
373 iput(child_inode);
374 iput(parent_inode);
375
376 return ret;
377}
378
379static int prop_compression_validate(const char *value, size_t len)
380{
381 if (!strncmp("lzo", value, len))
382 return 0;
383 else if (!strncmp("zlib", value, len))
384 return 0;
385
386 return -EINVAL;
387}
388
389static int prop_compression_apply(struct inode *inode,
390 const char *value,
391 size_t len)
392{
393 int type;
394
395 if (len == 0) {
396 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
397 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
398 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
399
400 return 0;
401 }
402
403 if (!strncmp("lzo", value, len))
404 type = BTRFS_COMPRESS_LZO;
405 else if (!strncmp("zlib", value, len))
406 type = BTRFS_COMPRESS_ZLIB;
407 else
408 return -EINVAL;
409
410 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
411 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
412 BTRFS_I(inode)->force_compress = type;
413
414 return 0;
415}
416
417static const char *prop_compression_extract(struct inode *inode)
418{
419 switch (BTRFS_I(inode)->force_compress) {
420 case BTRFS_COMPRESS_ZLIB:
421 return "zlib";
422 case BTRFS_COMPRESS_LZO:
423 return "lzo";
424 }
425
426 return NULL;
427}
diff --git a/fs/btrfs/props.h b/fs/btrfs/props.h
new file mode 100644
index 000000000000..100f18829d50
--- /dev/null
+++ b/fs/btrfs/props.h
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) 2014 Filipe David Borba Manana <fdmanana@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#ifndef __BTRFS_PROPS_H
20#define __BTRFS_PROPS_H
21
22#include "ctree.h"
23
24void __init btrfs_props_init(void);
25
26int btrfs_set_prop(struct inode *inode,
27 const char *name,
28 const char *value,
29 size_t value_len,
30 int flags);
31
32int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
33
34int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
35 struct inode *inode,
36 struct inode *dir);
37
38int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
39 struct btrfs_root *root,
40 struct btrfs_root *parent_root);
41
42#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4e6ef490619e..472302a2d745 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -301,16 +301,16 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
301 301
302 if (btrfs_qgroup_status_version(l, ptr) != 302 if (btrfs_qgroup_status_version(l, ptr) !=
303 BTRFS_QGROUP_STATUS_VERSION) { 303 BTRFS_QGROUP_STATUS_VERSION) {
304 printk(KERN_ERR 304 btrfs_err(fs_info,
305 "btrfs: old qgroup version, quota disabled\n"); 305 "old qgroup version, quota disabled");
306 goto out; 306 goto out;
307 } 307 }
308 if (btrfs_qgroup_status_generation(l, ptr) != 308 if (btrfs_qgroup_status_generation(l, ptr) !=
309 fs_info->generation) { 309 fs_info->generation) {
310 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 310 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
311 printk(KERN_ERR 311 btrfs_err(fs_info,
312 "btrfs: qgroup generation mismatch, " 312 "qgroup generation mismatch, "
313 "marked as inconsistent\n"); 313 "marked as inconsistent");
314 } 314 }
315 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 315 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
316 ptr); 316 ptr);
@@ -325,7 +325,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
325 qgroup = find_qgroup_rb(fs_info, found_key.offset); 325 qgroup = find_qgroup_rb(fs_info, found_key.offset);
326 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 326 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
327 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 327 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
328 printk(KERN_ERR "btrfs: inconsitent qgroup config\n"); 328 btrfs_err(fs_info, "inconsitent qgroup config");
329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330 } 330 }
331 if (!qgroup) { 331 if (!qgroup) {
@@ -396,8 +396,8 @@ next1:
396 ret = add_relation_rb(fs_info, found_key.objectid, 396 ret = add_relation_rb(fs_info, found_key.objectid,
397 found_key.offset); 397 found_key.offset);
398 if (ret == -ENOENT) { 398 if (ret == -ENOENT) {
399 printk(KERN_WARNING 399 btrfs_warn(fs_info,
400 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", 400 "orphan qgroup relation 0x%llx->0x%llx",
401 found_key.objectid, found_key.offset); 401 found_key.objectid, found_key.offset);
402 ret = 0; /* ignore the error */ 402 ret = 0; /* ignore the error */
403 } 403 }
@@ -644,8 +644,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
644 644
645 l = path->nodes[0]; 645 l = path->nodes[0];
646 slot = path->slots[0]; 646 slot = path->slots[0];
647 qgroup_limit = btrfs_item_ptr(l, path->slots[0], 647 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
648 struct btrfs_qgroup_limit_item);
649 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); 648 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
650 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); 649 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
651 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); 650 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
@@ -687,8 +686,7 @@ static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
687 686
688 l = path->nodes[0]; 687 l = path->nodes[0];
689 slot = path->slots[0]; 688 slot = path->slots[0];
690 qgroup_info = btrfs_item_ptr(l, path->slots[0], 689 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
691 struct btrfs_qgroup_info_item);
692 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 690 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
693 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 691 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
694 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 692 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
@@ -1161,7 +1159,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1161 limit->rsv_excl); 1159 limit->rsv_excl);
1162 if (ret) { 1160 if (ret) {
1163 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1161 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1164 printk(KERN_INFO "unable to update quota limit for %llu\n", 1162 btrfs_info(fs_info, "unable to update quota limit for %llu",
1165 qgroupid); 1163 qgroupid);
1166 } 1164 }
1167 1165
@@ -1349,7 +1347,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1349 struct btrfs_delayed_ref_node *node, 1347 struct btrfs_delayed_ref_node *node,
1350 struct btrfs_delayed_extent_op *extent_op) 1348 struct btrfs_delayed_extent_op *extent_op)
1351{ 1349{
1352 struct btrfs_key ins;
1353 struct btrfs_root *quota_root; 1350 struct btrfs_root *quota_root;
1354 u64 ref_root; 1351 u64 ref_root;
1355 struct btrfs_qgroup *qgroup; 1352 struct btrfs_qgroup *qgroup;
@@ -1363,10 +1360,6 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1363 1360
1364 BUG_ON(!fs_info->quota_root); 1361 BUG_ON(!fs_info->quota_root);
1365 1362
1366 ins.objectid = node->bytenr;
1367 ins.offset = node->num_bytes;
1368 ins.type = BTRFS_EXTENT_ITEM_KEY;
1369
1370 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1363 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1371 node->type == BTRFS_SHARED_BLOCK_REF_KEY) { 1364 node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1372 struct btrfs_delayed_tree_ref *ref; 1365 struct btrfs_delayed_tree_ref *ref;
@@ -1840,7 +1833,9 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1840{ 1833{
1841 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 1834 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1842 return; 1835 return;
1843 pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n", 1836 btrfs_err(trans->root->fs_info,
1837 "qgroups not uptodate in trans handle %p: list is%s empty, "
1838 "seq is %#x.%x",
1844 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 1839 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1845 (u32)(trans->delayed_ref_elem.seq >> 32), 1840 (u32)(trans->delayed_ref_elem.seq >> 32),
1846 (u32)trans->delayed_ref_elem.seq); 1841 (u32)trans->delayed_ref_elem.seq);
@@ -1902,9 +1897,17 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1902 mutex_unlock(&fs_info->qgroup_rescan_lock); 1897 mutex_unlock(&fs_info->qgroup_rescan_lock);
1903 1898
1904 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 1899 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1900 u64 num_bytes;
1901
1905 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 1902 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1906 if (found.type != BTRFS_EXTENT_ITEM_KEY) 1903 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
1904 found.type != BTRFS_METADATA_ITEM_KEY)
1907 continue; 1905 continue;
1906 if (found.type == BTRFS_METADATA_ITEM_KEY)
1907 num_bytes = fs_info->extent_root->leafsize;
1908 else
1909 num_bytes = found.offset;
1910
1908 ret = btrfs_find_all_roots(trans, fs_info, found.objectid, 1911 ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1909 tree_mod_seq_elem.seq, &roots); 1912 tree_mod_seq_elem.seq, &roots);
1910 if (ret < 0) 1913 if (ret < 0)
@@ -1949,12 +1952,12 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1949 struct btrfs_qgroup_list *glist; 1952 struct btrfs_qgroup_list *glist;
1950 1953
1951 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux; 1954 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1952 qg->rfer += found.offset; 1955 qg->rfer += num_bytes;
1953 qg->rfer_cmpr += found.offset; 1956 qg->rfer_cmpr += num_bytes;
1954 WARN_ON(qg->tag >= seq); 1957 WARN_ON(qg->tag >= seq);
1955 if (qg->refcnt - seq == roots->nnodes) { 1958 if (qg->refcnt - seq == roots->nnodes) {
1956 qg->excl += found.offset; 1959 qg->excl += num_bytes;
1957 qg->excl_cmpr += found.offset; 1960 qg->excl_cmpr += num_bytes;
1958 } 1961 }
1959 qgroup_dirty(fs_info, qg); 1962 qgroup_dirty(fs_info, qg);
1960 1963
@@ -2037,10 +2040,10 @@ out:
2037 mutex_unlock(&fs_info->qgroup_rescan_lock); 2040 mutex_unlock(&fs_info->qgroup_rescan_lock);
2038 2041
2039 if (err >= 0) { 2042 if (err >= 0) {
2040 pr_info("btrfs: qgroup scan completed%s\n", 2043 btrfs_info(fs_info, "qgroup scan completed%s",
2041 err == 2 ? " (inconsistency flag cleared)" : ""); 2044 err == 2 ? " (inconsistency flag cleared)" : "");
2042 } else { 2045 } else {
2043 pr_err("btrfs: qgroup scan failed with %d\n", err); 2046 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2044 } 2047 }
2045 2048
2046 complete_all(&fs_info->qgroup_rescan_completion); 2049 complete_all(&fs_info->qgroup_rescan_completion);
@@ -2096,7 +2099,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2096 2099
2097 if (ret) { 2100 if (ret) {
2098err: 2101err:
2099 pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret); 2102 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2100 return ret; 2103 return ret;
2101 } 2104 }
2102 2105
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 24ac21840a9a..9af0b25d991a 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1032,8 +1032,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1032 1032
1033 /* see if we can add this page onto our existing bio */ 1033 /* see if we can add this page onto our existing bio */
1034 if (last) { 1034 if (last) {
1035 last_end = (u64)last->bi_sector << 9; 1035 last_end = (u64)last->bi_iter.bi_sector << 9;
1036 last_end += last->bi_size; 1036 last_end += last->bi_iter.bi_size;
1037 1037
1038 /* 1038 /*
1039 * we can't merge these if they are from different 1039 * we can't merge these if they are from different
@@ -1053,9 +1053,9 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1053 if (!bio) 1053 if (!bio)
1054 return -ENOMEM; 1054 return -ENOMEM;
1055 1055
1056 bio->bi_size = 0; 1056 bio->bi_iter.bi_size = 0;
1057 bio->bi_bdev = stripe->dev->bdev; 1057 bio->bi_bdev = stripe->dev->bdev;
1058 bio->bi_sector = disk_start >> 9; 1058 bio->bi_iter.bi_sector = disk_start >> 9;
1059 set_bit(BIO_UPTODATE, &bio->bi_flags); 1059 set_bit(BIO_UPTODATE, &bio->bi_flags);
1060 1060
1061 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); 1061 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
@@ -1111,7 +1111,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1111 1111
1112 spin_lock_irq(&rbio->bio_list_lock); 1112 spin_lock_irq(&rbio->bio_list_lock);
1113 bio_list_for_each(bio, &rbio->bio_list) { 1113 bio_list_for_each(bio, &rbio->bio_list) {
1114 start = (u64)bio->bi_sector << 9; 1114 start = (u64)bio->bi_iter.bi_sector << 9;
1115 stripe_offset = start - rbio->raid_map[0]; 1115 stripe_offset = start - rbio->raid_map[0];
1116 page_index = stripe_offset >> PAGE_CACHE_SHIFT; 1116 page_index = stripe_offset >> PAGE_CACHE_SHIFT;
1117 1117
@@ -1272,7 +1272,7 @@ cleanup:
1272static int find_bio_stripe(struct btrfs_raid_bio *rbio, 1272static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1273 struct bio *bio) 1273 struct bio *bio)
1274{ 1274{
1275 u64 physical = bio->bi_sector; 1275 u64 physical = bio->bi_iter.bi_sector;
1276 u64 stripe_start; 1276 u64 stripe_start;
1277 int i; 1277 int i;
1278 struct btrfs_bio_stripe *stripe; 1278 struct btrfs_bio_stripe *stripe;
@@ -1298,7 +1298,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1298static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio, 1298static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1299 struct bio *bio) 1299 struct bio *bio)
1300{ 1300{
1301 u64 logical = bio->bi_sector; 1301 u64 logical = bio->bi_iter.bi_sector;
1302 u64 stripe_start; 1302 u64 stripe_start;
1303 int i; 1303 int i;
1304 1304
@@ -1602,8 +1602,8 @@ static int plug_cmp(void *priv, struct list_head *a, struct list_head *b)
1602 plug_list); 1602 plug_list);
1603 struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio, 1603 struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
1604 plug_list); 1604 plug_list);
1605 u64 a_sector = ra->bio_list.head->bi_sector; 1605 u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
1606 u64 b_sector = rb->bio_list.head->bi_sector; 1606 u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
1607 1607
1608 if (a_sector < b_sector) 1608 if (a_sector < b_sector)
1609 return -1; 1609 return -1;
@@ -1691,7 +1691,7 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1691 if (IS_ERR(rbio)) 1691 if (IS_ERR(rbio))
1692 return PTR_ERR(rbio); 1692 return PTR_ERR(rbio);
1693 bio_list_add(&rbio->bio_list, bio); 1693 bio_list_add(&rbio->bio_list, bio);
1694 rbio->bio_list_bytes = bio->bi_size; 1694 rbio->bio_list_bytes = bio->bi_iter.bi_size;
1695 1695
1696 /* 1696 /*
1697 * don't plug on full rbios, just get them out the door 1697 * don't plug on full rbios, just get them out the door
@@ -2044,7 +2044,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2044 2044
2045 rbio->read_rebuild = 1; 2045 rbio->read_rebuild = 1;
2046 bio_list_add(&rbio->bio_list, bio); 2046 bio_list_add(&rbio->bio_list, bio);
2047 rbio->bio_list_bytes = bio->bi_size; 2047 rbio->bio_list_bytes = bio->bi_iter.bi_size;
2048 2048
2049 rbio->faila = find_logical_bio_stripe(rbio, bio); 2049 rbio->faila = find_logical_bio_stripe(rbio, bio);
2050 if (rbio->faila == -1) { 2050 if (rbio->faila == -1) {
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 1031b69252c5..31c797c48c3e 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -189,8 +189,8 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
189 */ 189 */
190#ifdef DEBUG 190#ifdef DEBUG
191 if (rec->generation != generation) { 191 if (rec->generation != generation) {
192 printk(KERN_DEBUG "generation mismatch for " 192 btrfs_debug(root->fs_info,
193 "(%llu,%d,%llu) %llu != %llu\n", 193 "generation mismatch for (%llu,%d,%llu) %llu != %llu",
194 key.objectid, key.type, key.offset, 194 key.objectid, key.type, key.offset,
195 rec->generation, generation); 195 rec->generation, generation);
196 } 196 }
@@ -365,8 +365,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
365 goto error; 365 goto error;
366 366
367 if (bbio->num_stripes > BTRFS_MAX_MIRRORS) { 367 if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
368 printk(KERN_ERR "btrfs readahead: more than %d copies not " 368 btrfs_err(root->fs_info,
369 "supported", BTRFS_MAX_MIRRORS); 369 "readahead: more than %d copies not supported",
370 BTRFS_MAX_MIRRORS);
370 goto error; 371 goto error;
371 } 372 }
372 373
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 429c73c374b8..07b3b36f40ee 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -94,6 +94,7 @@ struct backref_edge {
94 94
95#define LOWER 0 95#define LOWER 0
96#define UPPER 1 96#define UPPER 1
97#define RELOCATION_RESERVED_NODES 256
97 98
98struct backref_cache { 99struct backref_cache {
99 /* red black tree of all backref nodes in the cache */ 100 /* red black tree of all backref nodes in the cache */
@@ -176,6 +177,8 @@ struct reloc_control {
176 u64 merging_rsv_size; 177 u64 merging_rsv_size;
177 /* size of relocated tree nodes */ 178 /* size of relocated tree nodes */
178 u64 nodes_relocated; 179 u64 nodes_relocated;
180 /* reserved size for block group relocation*/
181 u64 reserved_bytes;
179 182
180 u64 search_start; 183 u64 search_start;
181 u64 extents_found; 184 u64 extents_found;
@@ -184,7 +187,6 @@ struct reloc_control {
184 unsigned int create_reloc_tree:1; 187 unsigned int create_reloc_tree:1;
185 unsigned int merge_reloc_tree:1; 188 unsigned int merge_reloc_tree:1;
186 unsigned int found_file_extent:1; 189 unsigned int found_file_extent:1;
187 unsigned int commit_transaction:1;
188}; 190};
189 191
190/* stages of data relocation */ 192/* stages of data relocation */
@@ -2309,9 +2311,6 @@ void free_reloc_roots(struct list_head *list)
2309 reloc_root = list_entry(list->next, struct btrfs_root, 2311 reloc_root = list_entry(list->next, struct btrfs_root,
2310 root_list); 2312 root_list);
2311 __del_reloc_root(reloc_root); 2313 __del_reloc_root(reloc_root);
2312 free_extent_buffer(reloc_root->node);
2313 free_extent_buffer(reloc_root->commit_root);
2314 kfree(reloc_root);
2315 } 2314 }
2316} 2315}
2317 2316
@@ -2353,10 +2352,9 @@ again:
2353 2352
2354 ret = merge_reloc_root(rc, root); 2353 ret = merge_reloc_root(rc, root);
2355 if (ret) { 2354 if (ret) {
2356 __del_reloc_root(reloc_root); 2355 if (list_empty(&reloc_root->root_list))
2357 free_extent_buffer(reloc_root->node); 2356 list_add_tail(&reloc_root->root_list,
2358 free_extent_buffer(reloc_root->commit_root); 2357 &reloc_roots);
2359 kfree(reloc_root);
2360 goto out; 2358 goto out;
2361 } 2359 }
2362 } else { 2360 } else {
@@ -2452,7 +2450,7 @@ static noinline_for_stack
2452struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 2450struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2453 struct reloc_control *rc, 2451 struct reloc_control *rc,
2454 struct backref_node *node, 2452 struct backref_node *node,
2455 struct backref_edge *edges[], int *nr) 2453 struct backref_edge *edges[])
2456{ 2454{
2457 struct backref_node *next; 2455 struct backref_node *next;
2458 struct btrfs_root *root; 2456 struct btrfs_root *root;
@@ -2494,7 +2492,6 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2494 if (!root) 2492 if (!root)
2495 return NULL; 2493 return NULL;
2496 2494
2497 *nr = index;
2498 next = node; 2495 next = node;
2499 /* setup backref node path for btrfs_reloc_cow_block */ 2496 /* setup backref node path for btrfs_reloc_cow_block */
2500 while (1) { 2497 while (1) {
@@ -2590,28 +2587,36 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2590 struct btrfs_root *root = rc->extent_root; 2587 struct btrfs_root *root = rc->extent_root;
2591 u64 num_bytes; 2588 u64 num_bytes;
2592 int ret; 2589 int ret;
2590 u64 tmp;
2593 2591
2594 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2592 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2595 2593
2596 trans->block_rsv = rc->block_rsv; 2594 trans->block_rsv = rc->block_rsv;
2597 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, 2595 rc->reserved_bytes += num_bytes;
2598 BTRFS_RESERVE_FLUSH_ALL); 2596 ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
2597 BTRFS_RESERVE_FLUSH_ALL);
2599 if (ret) { 2598 if (ret) {
2600 if (ret == -EAGAIN) 2599 if (ret == -EAGAIN) {
2601 rc->commit_transaction = 1; 2600 tmp = rc->extent_root->nodesize *
2601 RELOCATION_RESERVED_NODES;
2602 while (tmp <= rc->reserved_bytes)
2603 tmp <<= 1;
2604 /*
2605 * only one thread can access block_rsv at this point,
2606 * so we don't need hold lock to protect block_rsv.
2607 * we expand more reservation size here to allow enough
2608 * space for relocation and we will return eailer in
2609 * enospc case.
2610 */
2611 rc->block_rsv->size = tmp + rc->extent_root->nodesize *
2612 RELOCATION_RESERVED_NODES;
2613 }
2602 return ret; 2614 return ret;
2603 } 2615 }
2604 2616
2605 return 0; 2617 return 0;
2606} 2618}
2607 2619
2608static void release_metadata_space(struct reloc_control *rc,
2609 struct backref_node *node)
2610{
2611 u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
2612 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
2613}
2614
2615/* 2620/*
2616 * relocate a block tree, and then update pointers in upper level 2621 * relocate a block tree, and then update pointers in upper level
2617 * blocks that reference the block to point to the new location. 2622 * blocks that reference the block to point to the new location.
@@ -2633,7 +2638,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2633 u32 blocksize; 2638 u32 blocksize;
2634 u64 bytenr; 2639 u64 bytenr;
2635 u64 generation; 2640 u64 generation;
2636 int nr;
2637 int slot; 2641 int slot;
2638 int ret; 2642 int ret;
2639 int err = 0; 2643 int err = 0;
@@ -2646,7 +2650,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2646 cond_resched(); 2650 cond_resched();
2647 2651
2648 upper = edge->node[UPPER]; 2652 upper = edge->node[UPPER];
2649 root = select_reloc_root(trans, rc, upper, edges, &nr); 2653 root = select_reloc_root(trans, rc, upper, edges);
2650 BUG_ON(!root); 2654 BUG_ON(!root);
2651 2655
2652 if (upper->eb && !upper->locked) { 2656 if (upper->eb && !upper->locked) {
@@ -2898,7 +2902,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
2898 struct btrfs_path *path) 2902 struct btrfs_path *path)
2899{ 2903{
2900 struct btrfs_root *root; 2904 struct btrfs_root *root;
2901 int release = 0;
2902 int ret = 0; 2905 int ret = 0;
2903 2906
2904 if (!node) 2907 if (!node)
@@ -2915,7 +2918,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
2915 ret = reserve_metadata_space(trans, rc, node); 2918 ret = reserve_metadata_space(trans, rc, node);
2916 if (ret) 2919 if (ret)
2917 goto out; 2920 goto out;
2918 release = 1;
2919 } 2921 }
2920 2922
2921 if (root) { 2923 if (root) {
@@ -2940,11 +2942,8 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
2940 ret = do_relocation(trans, rc, node, key, path, 1); 2942 ret = do_relocation(trans, rc, node, key, path, 1);
2941 } 2943 }
2942out: 2944out:
2943 if (ret || node->level == 0 || node->cowonly) { 2945 if (ret || node->level == 0 || node->cowonly)
2944 if (release)
2945 release_metadata_space(rc, node);
2946 remove_backref_node(&rc->backref_cache, node); 2946 remove_backref_node(&rc->backref_cache, node);
2947 }
2948 return ret; 2947 return ret;
2949} 2948}
2950 2949
@@ -3867,29 +3866,20 @@ static noinline_for_stack
3867int prepare_to_relocate(struct reloc_control *rc) 3866int prepare_to_relocate(struct reloc_control *rc)
3868{ 3867{
3869 struct btrfs_trans_handle *trans; 3868 struct btrfs_trans_handle *trans;
3870 int ret;
3871 3869
3872 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, 3870 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
3873 BTRFS_BLOCK_RSV_TEMP); 3871 BTRFS_BLOCK_RSV_TEMP);
3874 if (!rc->block_rsv) 3872 if (!rc->block_rsv)
3875 return -ENOMEM; 3873 return -ENOMEM;
3876 3874
3877 /*
3878 * reserve some space for creating reloc trees.
3879 * btrfs_init_reloc_root will use them when there
3880 * is no reservation in transaction handle.
3881 */
3882 ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv,
3883 rc->extent_root->nodesize * 256,
3884 BTRFS_RESERVE_FLUSH_ALL);
3885 if (ret)
3886 return ret;
3887
3888 memset(&rc->cluster, 0, sizeof(rc->cluster)); 3875 memset(&rc->cluster, 0, sizeof(rc->cluster));
3889 rc->search_start = rc->block_group->key.objectid; 3876 rc->search_start = rc->block_group->key.objectid;
3890 rc->extents_found = 0; 3877 rc->extents_found = 0;
3891 rc->nodes_relocated = 0; 3878 rc->nodes_relocated = 0;
3892 rc->merging_rsv_size = 0; 3879 rc->merging_rsv_size = 0;
3880 rc->reserved_bytes = 0;
3881 rc->block_rsv->size = rc->extent_root->nodesize *
3882 RELOCATION_RESERVED_NODES;
3893 3883
3894 rc->create_reloc_tree = 1; 3884 rc->create_reloc_tree = 1;
3895 set_reloc_control(rc); 3885 set_reloc_control(rc);
@@ -3933,6 +3923,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3933 } 3923 }
3934 3924
3935 while (1) { 3925 while (1) {
3926 rc->reserved_bytes = 0;
3927 ret = btrfs_block_rsv_refill(rc->extent_root,
3928 rc->block_rsv, rc->block_rsv->size,
3929 BTRFS_RESERVE_FLUSH_ALL);
3930 if (ret) {
3931 err = ret;
3932 break;
3933 }
3936 progress++; 3934 progress++;
3937 trans = btrfs_start_transaction(rc->extent_root, 0); 3935 trans = btrfs_start_transaction(rc->extent_root, 0);
3938 if (IS_ERR(trans)) { 3936 if (IS_ERR(trans)) {
@@ -4011,6 +4009,12 @@ restart:
4011 if (!RB_EMPTY_ROOT(&blocks)) { 4009 if (!RB_EMPTY_ROOT(&blocks)) {
4012 ret = relocate_tree_blocks(trans, rc, &blocks); 4010 ret = relocate_tree_blocks(trans, rc, &blocks);
4013 if (ret < 0) { 4011 if (ret < 0) {
4012 /*
4013 * if we fail to relocate tree blocks, force to update
4014 * backref cache when committing transaction.
4015 */
4016 rc->backref_cache.last_trans = trans->transid - 1;
4017
4014 if (ret != -EAGAIN) { 4018 if (ret != -EAGAIN) {
4015 err = ret; 4019 err = ret;
4016 break; 4020 break;
@@ -4020,14 +4024,8 @@ restart:
4020 } 4024 }
4021 } 4025 }
4022 4026
4023 if (rc->commit_transaction) { 4027 btrfs_end_transaction_throttle(trans, rc->extent_root);
4024 rc->commit_transaction = 0; 4028 btrfs_btree_balance_dirty(rc->extent_root);
4025 ret = btrfs_commit_transaction(trans, rc->extent_root);
4026 BUG_ON(ret);
4027 } else {
4028 btrfs_end_transaction_throttle(trans, rc->extent_root);
4029 btrfs_btree_balance_dirty(rc->extent_root);
4030 }
4031 trans = NULL; 4029 trans = NULL;
4032 4030
4033 if (rc->stage == MOVE_DATA_EXTENTS && 4031 if (rc->stage == MOVE_DATA_EXTENTS &&
@@ -4247,7 +4245,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4247 goto out; 4245 goto out;
4248 } 4246 }
4249 4247
4250 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 4248 btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu",
4251 rc->block_group->key.objectid, rc->block_group->flags); 4249 rc->block_group->key.objectid, rc->block_group->flags);
4252 4250
4253 ret = btrfs_start_delalloc_roots(fs_info, 0); 4251 ret = btrfs_start_delalloc_roots(fs_info, 0);
@@ -4269,7 +4267,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4269 if (rc->extents_found == 0) 4267 if (rc->extents_found == 0)
4270 break; 4268 break;
4271 4269
4272 printk(KERN_INFO "btrfs: found %llu extents\n", 4270 btrfs_info(extent_root->fs_info, "found %llu extents",
4273 rc->extents_found); 4271 rc->extents_found);
4274 4272
4275 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4273 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
@@ -4285,11 +4283,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4285 } 4283 }
4286 } 4284 }
4287 4285
4288 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
4289 rc->block_group->key.objectid,
4290 rc->block_group->key.objectid +
4291 rc->block_group->key.offset - 1);
4292
4293 WARN_ON(rc->block_group->pinned > 0); 4286 WARN_ON(rc->block_group->pinned > 0);
4294 WARN_ON(rc->block_group->reserved > 0); 4287 WARN_ON(rc->block_group->reserved > 0);
4295 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 4288 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ec71ea44d2b4..1389b69059de 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -44,7 +44,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
44 if (!need_reset && btrfs_root_generation(item) 44 if (!need_reset && btrfs_root_generation(item)
45 != btrfs_root_generation_v2(item)) { 45 != btrfs_root_generation_v2(item)) {
46 if (btrfs_root_generation_v2(item) != 0) { 46 if (btrfs_root_generation_v2(item) != 0) {
47 printk(KERN_WARNING "btrfs: mismatching " 47 printk(KERN_WARNING "BTRFS: mismatching "
48 "generation and generation_v2 " 48 "generation and generation_v2 "
49 "found in root item. This root " 49 "found in root item. This root "
50 "was probably mounted with an " 50 "was probably mounted with an "
@@ -154,7 +154,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
154 154
155 if (ret != 0) { 155 if (ret != 0) {
156 btrfs_print_leaf(root, path->nodes[0]); 156 btrfs_print_leaf(root, path->nodes[0]);
157 printk(KERN_CRIT "unable to update root key %llu %u %llu\n", 157 btrfs_crit(root->fs_info, "unable to update root key %llu %u %llu",
158 key->objectid, key->type, key->offset); 158 key->objectid, key->type, key->offset);
159 BUG_ON(1); 159 BUG_ON(1);
160 } 160 }
@@ -400,21 +400,6 @@ out:
400 return err; 400 return err;
401} 401}
402 402
403int btrfs_find_root_ref(struct btrfs_root *tree_root,
404 struct btrfs_path *path,
405 u64 root_id, u64 ref_id)
406{
407 struct btrfs_key key;
408 int ret;
409
410 key.objectid = root_id;
411 key.type = BTRFS_ROOT_REF_KEY;
412 key.offset = ref_id;
413
414 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
415 return ret;
416}
417
418/* 403/*
419 * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY 404 * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY
420 * or BTRFS_ROOT_BACKREF_KEY. 405 * or BTRFS_ROOT_BACKREF_KEY.
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 1fd3f33c330a..efba5d1282ee 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -256,6 +256,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
256static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 256static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
257 int mirror_num, u64 physical_for_dev_replace); 257 int mirror_num, u64 physical_for_dev_replace);
258static void copy_nocow_pages_worker(struct btrfs_work *work); 258static void copy_nocow_pages_worker(struct btrfs_work *work);
259static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
260static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
259 261
260 262
261static void scrub_pending_bio_inc(struct scrub_ctx *sctx) 263static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
@@ -269,6 +271,29 @@ static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
269 wake_up(&sctx->list_wait); 271 wake_up(&sctx->list_wait);
270} 272}
271 273
274static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
275{
276 while (atomic_read(&fs_info->scrub_pause_req)) {
277 mutex_unlock(&fs_info->scrub_lock);
278 wait_event(fs_info->scrub_pause_wait,
279 atomic_read(&fs_info->scrub_pause_req) == 0);
280 mutex_lock(&fs_info->scrub_lock);
281 }
282}
283
284static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
285{
286 atomic_inc(&fs_info->scrubs_paused);
287 wake_up(&fs_info->scrub_pause_wait);
288
289 mutex_lock(&fs_info->scrub_lock);
290 __scrub_blocked_if_needed(fs_info);
291 atomic_dec(&fs_info->scrubs_paused);
292 mutex_unlock(&fs_info->scrub_lock);
293
294 wake_up(&fs_info->scrub_pause_wait);
295}
296
272/* 297/*
273 * used for workers that require transaction commits (i.e., for the 298 * used for workers that require transaction commits (i.e., for the
274 * NOCOW case) 299 * NOCOW case)
@@ -480,7 +505,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
480 * hold all of the paths here 505 * hold all of the paths here
481 */ 506 */
482 for (i = 0; i < ipath->fspath->elem_cnt; ++i) 507 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
483 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " 508 printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
484 "%s, sector %llu, root %llu, inode %llu, offset %llu, " 509 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
485 "length %llu, links %u (path: %s)\n", swarn->errstr, 510 "length %llu, links %u (path: %s)\n", swarn->errstr,
486 swarn->logical, rcu_str_deref(swarn->dev->name), 511 swarn->logical, rcu_str_deref(swarn->dev->name),
@@ -492,7 +517,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
492 return 0; 517 return 0;
493 518
494err: 519err:
495 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev " 520 printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
496 "%s, sector %llu, root %llu, inode %llu, offset %llu: path " 521 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
497 "resolving failed with ret=%d\n", swarn->errstr, 522 "resolving failed with ret=%d\n", swarn->errstr,
498 swarn->logical, rcu_str_deref(swarn->dev->name), 523 swarn->logical, rcu_str_deref(swarn->dev->name),
@@ -555,7 +580,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
555 ret = tree_backref_for_extent(&ptr, eb, ei, item_size, 580 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
556 &ref_root, &ref_level); 581 &ref_root, &ref_level);
557 printk_in_rcu(KERN_WARNING 582 printk_in_rcu(KERN_WARNING
558 "btrfs: %s at logical %llu on dev %s, " 583 "BTRFS: %s at logical %llu on dev %s, "
559 "sector %llu: metadata %s (level %d) in tree " 584 "sector %llu: metadata %s (level %d) in tree "
560 "%llu\n", errstr, swarn.logical, 585 "%llu\n", errstr, swarn.logical,
561 rcu_str_deref(dev->name), 586 rcu_str_deref(dev->name),
@@ -704,13 +729,11 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work)
704 struct scrub_fixup_nodatasum *fixup; 729 struct scrub_fixup_nodatasum *fixup;
705 struct scrub_ctx *sctx; 730 struct scrub_ctx *sctx;
706 struct btrfs_trans_handle *trans = NULL; 731 struct btrfs_trans_handle *trans = NULL;
707 struct btrfs_fs_info *fs_info;
708 struct btrfs_path *path; 732 struct btrfs_path *path;
709 int uncorrectable = 0; 733 int uncorrectable = 0;
710 734
711 fixup = container_of(work, struct scrub_fixup_nodatasum, work); 735 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
712 sctx = fixup->sctx; 736 sctx = fixup->sctx;
713 fs_info = fixup->root->fs_info;
714 737
715 path = btrfs_alloc_path(); 738 path = btrfs_alloc_path();
716 if (!path) { 739 if (!path) {
@@ -759,8 +782,8 @@ out:
759 btrfs_dev_replace_stats_inc( 782 btrfs_dev_replace_stats_inc(
760 &sctx->dev_root->fs_info->dev_replace. 783 &sctx->dev_root->fs_info->dev_replace.
761 num_uncorrectable_read_errors); 784 num_uncorrectable_read_errors);
762 printk_ratelimited_in_rcu(KERN_ERR 785 printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
763 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", 786 "unable to fixup (nodatasum) error at logical %llu on dev %s\n",
764 fixup->logical, rcu_str_deref(fixup->dev->name)); 787 fixup->logical, rcu_str_deref(fixup->dev->name));
765 } 788 }
766 789
@@ -1161,7 +1184,7 @@ corrected_error:
1161 sctx->stat.corrected_errors++; 1184 sctx->stat.corrected_errors++;
1162 spin_unlock(&sctx->stat_lock); 1185 spin_unlock(&sctx->stat_lock);
1163 printk_ratelimited_in_rcu(KERN_ERR 1186 printk_ratelimited_in_rcu(KERN_ERR
1164 "btrfs: fixed up error at logical %llu on dev %s\n", 1187 "BTRFS: fixed up error at logical %llu on dev %s\n",
1165 logical, rcu_str_deref(dev->name)); 1188 logical, rcu_str_deref(dev->name));
1166 } 1189 }
1167 } else { 1190 } else {
@@ -1170,7 +1193,7 @@ did_not_correct_error:
1170 sctx->stat.uncorrectable_errors++; 1193 sctx->stat.uncorrectable_errors++;
1171 spin_unlock(&sctx->stat_lock); 1194 spin_unlock(&sctx->stat_lock);
1172 printk_ratelimited_in_rcu(KERN_ERR 1195 printk_ratelimited_in_rcu(KERN_ERR
1173 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", 1196 "BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n",
1174 logical, rcu_str_deref(dev->name)); 1197 logical, rcu_str_deref(dev->name));
1175 } 1198 }
1176 1199
@@ -1308,7 +1331,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1308 continue; 1331 continue;
1309 } 1332 }
1310 bio->bi_bdev = page->dev->bdev; 1333 bio->bi_bdev = page->dev->bdev;
1311 bio->bi_sector = page->physical >> 9; 1334 bio->bi_iter.bi_sector = page->physical >> 9;
1312 1335
1313 bio_add_page(bio, page->page, PAGE_SIZE, 0); 1336 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1314 if (btrfsic_submit_bio_wait(READ, bio)) 1337 if (btrfsic_submit_bio_wait(READ, bio))
@@ -1418,8 +1441,9 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1418 int ret; 1441 int ret;
1419 1442
1420 if (!page_bad->dev->bdev) { 1443 if (!page_bad->dev->bdev) {
1421 printk_ratelimited(KERN_WARNING 1444 printk_ratelimited(KERN_WARNING "BTRFS: "
1422 "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n"); 1445 "scrub_repair_page_from_good_copy(bdev == NULL) "
1446 "is unexpected!\n");
1423 return -EIO; 1447 return -EIO;
1424 } 1448 }
1425 1449
@@ -1427,7 +1451,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1427 if (!bio) 1451 if (!bio)
1428 return -EIO; 1452 return -EIO;
1429 bio->bi_bdev = page_bad->dev->bdev; 1453 bio->bi_bdev = page_bad->dev->bdev;
1430 bio->bi_sector = page_bad->physical >> 9; 1454 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1431 1455
1432 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); 1456 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1433 if (PAGE_SIZE != ret) { 1457 if (PAGE_SIZE != ret) {
@@ -1520,7 +1544,7 @@ again:
1520 bio->bi_private = sbio; 1544 bio->bi_private = sbio;
1521 bio->bi_end_io = scrub_wr_bio_end_io; 1545 bio->bi_end_io = scrub_wr_bio_end_io;
1522 bio->bi_bdev = sbio->dev->bdev; 1546 bio->bi_bdev = sbio->dev->bdev;
1523 bio->bi_sector = sbio->physical >> 9; 1547 bio->bi_iter.bi_sector = sbio->physical >> 9;
1524 sbio->err = 0; 1548 sbio->err = 0;
1525 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 1549 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1526 spage->physical_for_dev_replace || 1550 spage->physical_for_dev_replace ||
@@ -1877,7 +1901,7 @@ static void scrub_submit(struct scrub_ctx *sctx)
1877 * This case is handled correctly (but _very_ slowly). 1901 * This case is handled correctly (but _very_ slowly).
1878 */ 1902 */
1879 printk_ratelimited(KERN_WARNING 1903 printk_ratelimited(KERN_WARNING
1880 "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n"); 1904 "BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
1881 bio_endio(sbio->bio, -EIO); 1905 bio_endio(sbio->bio, -EIO);
1882 } else { 1906 } else {
1883 btrfsic_submit_bio(READ, sbio->bio); 1907 btrfsic_submit_bio(READ, sbio->bio);
@@ -1926,7 +1950,7 @@ again:
1926 bio->bi_private = sbio; 1950 bio->bi_private = sbio;
1927 bio->bi_end_io = scrub_bio_end_io; 1951 bio->bi_end_io = scrub_bio_end_io;
1928 bio->bi_bdev = sbio->dev->bdev; 1952 bio->bi_bdev = sbio->dev->bdev;
1929 bio->bi_sector = sbio->physical >> 9; 1953 bio->bi_iter.bi_sector = sbio->physical >> 9;
1930 sbio->err = 0; 1954 sbio->err = 0;
1931 } else if (sbio->physical + sbio->page_count * PAGE_SIZE != 1955 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1932 spage->physical || 1956 spage->physical ||
@@ -2286,8 +2310,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2286 2310
2287 wait_event(sctx->list_wait, 2311 wait_event(sctx->list_wait,
2288 atomic_read(&sctx->bios_in_flight) == 0); 2312 atomic_read(&sctx->bios_in_flight) == 0);
2289 atomic_inc(&fs_info->scrubs_paused); 2313 scrub_blocked_if_needed(fs_info);
2290 wake_up(&fs_info->scrub_pause_wait);
2291 2314
2292 /* FIXME it might be better to start readahead at commit root */ 2315 /* FIXME it might be better to start readahead at commit root */
2293 key_start.objectid = logical; 2316 key_start.objectid = logical;
@@ -2311,16 +2334,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2311 if (!IS_ERR(reada2)) 2334 if (!IS_ERR(reada2))
2312 btrfs_reada_wait(reada2); 2335 btrfs_reada_wait(reada2);
2313 2336
2314 mutex_lock(&fs_info->scrub_lock);
2315 while (atomic_read(&fs_info->scrub_pause_req)) {
2316 mutex_unlock(&fs_info->scrub_lock);
2317 wait_event(fs_info->scrub_pause_wait,
2318 atomic_read(&fs_info->scrub_pause_req) == 0);
2319 mutex_lock(&fs_info->scrub_lock);
2320 }
2321 atomic_dec(&fs_info->scrubs_paused);
2322 mutex_unlock(&fs_info->scrub_lock);
2323 wake_up(&fs_info->scrub_pause_wait);
2324 2337
2325 /* 2338 /*
2326 * collect all data csums for the stripe to avoid seeking during 2339 * collect all data csums for the stripe to avoid seeking during
@@ -2357,22 +2370,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2357 wait_event(sctx->list_wait, 2370 wait_event(sctx->list_wait,
2358 atomic_read(&sctx->bios_in_flight) == 0); 2371 atomic_read(&sctx->bios_in_flight) == 0);
2359 atomic_set(&sctx->wr_ctx.flush_all_writes, 0); 2372 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2360 atomic_inc(&fs_info->scrubs_paused); 2373 scrub_blocked_if_needed(fs_info);
2361 wake_up(&fs_info->scrub_pause_wait);
2362 mutex_lock(&fs_info->scrub_lock);
2363 while (atomic_read(&fs_info->scrub_pause_req)) {
2364 mutex_unlock(&fs_info->scrub_lock);
2365 wait_event(fs_info->scrub_pause_wait,
2366 atomic_read(&fs_info->scrub_pause_req) == 0);
2367 mutex_lock(&fs_info->scrub_lock);
2368 }
2369 atomic_dec(&fs_info->scrubs_paused);
2370 mutex_unlock(&fs_info->scrub_lock);
2371 wake_up(&fs_info->scrub_pause_wait);
2372 } 2374 }
2373 2375
2376 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2377 key.type = BTRFS_METADATA_ITEM_KEY;
2378 else
2379 key.type = BTRFS_EXTENT_ITEM_KEY;
2374 key.objectid = logical; 2380 key.objectid = logical;
2375 key.type = BTRFS_EXTENT_ITEM_KEY;
2376 key.offset = (u64)-1; 2381 key.offset = (u64)-1;
2377 2382
2378 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2383 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -2380,8 +2385,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2380 goto out; 2385 goto out;
2381 2386
2382 if (ret > 0) { 2387 if (ret > 0) {
2383 ret = btrfs_previous_item(root, path, 0, 2388 ret = btrfs_previous_extent_item(root, path, 0);
2384 BTRFS_EXTENT_ITEM_KEY);
2385 if (ret < 0) 2389 if (ret < 0)
2386 goto out; 2390 goto out;
2387 if (ret > 0) { 2391 if (ret > 0) {
@@ -2439,9 +2443,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2439 2443
2440 if (key.objectid < logical && 2444 if (key.objectid < logical &&
2441 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { 2445 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
2442 printk(KERN_ERR 2446 btrfs_err(fs_info,
2443 "btrfs scrub: tree block %llu spanning " 2447 "scrub: tree block %llu spanning "
2444 "stripes, ignored. logical=%llu\n", 2448 "stripes, ignored. logical=%llu",
2445 key.objectid, logical); 2449 key.objectid, logical);
2446 goto next; 2450 goto next;
2447 } 2451 }
@@ -2683,21 +2687,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2683 wait_event(sctx->list_wait, 2687 wait_event(sctx->list_wait,
2684 atomic_read(&sctx->bios_in_flight) == 0); 2688 atomic_read(&sctx->bios_in_flight) == 0);
2685 atomic_set(&sctx->wr_ctx.flush_all_writes, 0); 2689 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2686 atomic_inc(&fs_info->scrubs_paused);
2687 wake_up(&fs_info->scrub_pause_wait);
2688 wait_event(sctx->list_wait, 2690 wait_event(sctx->list_wait,
2689 atomic_read(&sctx->workers_pending) == 0); 2691 atomic_read(&sctx->workers_pending) == 0);
2690 2692 scrub_blocked_if_needed(fs_info);
2691 mutex_lock(&fs_info->scrub_lock);
2692 while (atomic_read(&fs_info->scrub_pause_req)) {
2693 mutex_unlock(&fs_info->scrub_lock);
2694 wait_event(fs_info->scrub_pause_wait,
2695 atomic_read(&fs_info->scrub_pause_req) == 0);
2696 mutex_lock(&fs_info->scrub_lock);
2697 }
2698 atomic_dec(&fs_info->scrubs_paused);
2699 mutex_unlock(&fs_info->scrub_lock);
2700 wake_up(&fs_info->scrub_pause_wait);
2701 2693
2702 btrfs_put_block_group(cache); 2694 btrfs_put_block_group(cache);
2703 if (ret) 2695 if (ret)
@@ -2823,8 +2815,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2823 * check some assumptions 2815 * check some assumptions
2824 */ 2816 */
2825 if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) { 2817 if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
2826 printk(KERN_ERR 2818 btrfs_err(fs_info,
2827 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n", 2819 "scrub: size assumption nodesize == leafsize (%d == %d) fails",
2828 fs_info->chunk_root->nodesize, 2820 fs_info->chunk_root->nodesize,
2829 fs_info->chunk_root->leafsize); 2821 fs_info->chunk_root->leafsize);
2830 return -EINVAL; 2822 return -EINVAL;
@@ -2836,16 +2828,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2836 * the way scrub is implemented. Do not handle this 2828 * the way scrub is implemented. Do not handle this
2837 * situation at all because it won't ever happen. 2829 * situation at all because it won't ever happen.
2838 */ 2830 */
2839 printk(KERN_ERR 2831 btrfs_err(fs_info,
2840 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n", 2832 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
2841 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN); 2833 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
2842 return -EINVAL; 2834 return -EINVAL;
2843 } 2835 }
2844 2836
2845 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { 2837 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
2846 /* not supported for data w/o checksums */ 2838 /* not supported for data w/o checksums */
2847 printk(KERN_ERR 2839 btrfs_err(fs_info,
2848 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n", 2840 "scrub: size assumption sectorsize != PAGE_SIZE "
2841 "(%d != %lu) fails",
2849 fs_info->chunk_root->sectorsize, PAGE_SIZE); 2842 fs_info->chunk_root->sectorsize, PAGE_SIZE);
2850 return -EINVAL; 2843 return -EINVAL;
2851 } 2844 }
@@ -2858,7 +2851,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2858 * would exhaust the array bounds of pagev member in 2851 * would exhaust the array bounds of pagev member in
2859 * struct scrub_block 2852 * struct scrub_block
2860 */ 2853 */
2861 pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n", 2854 btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize "
2855 "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
2862 fs_info->chunk_root->nodesize, 2856 fs_info->chunk_root->nodesize,
2863 SCRUB_MAX_PAGES_PER_BLOCK, 2857 SCRUB_MAX_PAGES_PER_BLOCK,
2864 fs_info->chunk_root->sectorsize, 2858 fs_info->chunk_root->sectorsize,
@@ -2908,7 +2902,13 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2908 } 2902 }
2909 sctx->readonly = readonly; 2903 sctx->readonly = readonly;
2910 dev->scrub_device = sctx; 2904 dev->scrub_device = sctx;
2905 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2911 2906
2907 /*
2908 * checking @scrub_pause_req here, we can avoid
2909 * race between committing transaction and scrubbing.
2910 */
2911 __scrub_blocked_if_needed(fs_info);
2912 atomic_inc(&fs_info->scrubs_running); 2912 atomic_inc(&fs_info->scrubs_running);
2913 mutex_unlock(&fs_info->scrub_lock); 2913 mutex_unlock(&fs_info->scrub_lock);
2914 2914
@@ -2917,9 +2917,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2917 * by holding device list mutex, we can 2917 * by holding device list mutex, we can
2918 * kick off writing super in log tree sync. 2918 * kick off writing super in log tree sync.
2919 */ 2919 */
2920 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2920 ret = scrub_supers(sctx, dev); 2921 ret = scrub_supers(sctx, dev);
2922 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2921 } 2923 }
2922 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2923 2924
2924 if (!ret) 2925 if (!ret)
2925 ret = scrub_enumerate_chunks(sctx, dev, start, end, 2926 ret = scrub_enumerate_chunks(sctx, dev, start, end,
@@ -3167,7 +3168,8 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3167 ret = iterate_inodes_from_logical(logical, fs_info, path, 3168 ret = iterate_inodes_from_logical(logical, fs_info, path,
3168 record_inode_for_nocow, nocow_ctx); 3169 record_inode_for_nocow, nocow_ctx);
3169 if (ret != 0 && ret != -ENOENT) { 3170 if (ret != 0 && ret != -ENOENT) {
3170 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n", 3171 btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, "
3172 "phys %llu, len %llu, mir %u, ret %d",
3171 logical, physical_for_dev_replace, len, mirror_num, 3173 logical, physical_for_dev_replace, len, mirror_num,
3172 ret); 3174 ret);
3173 not_written = 1; 3175 not_written = 1;
@@ -3289,7 +3291,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3289again: 3291again:
3290 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 3292 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
3291 if (!page) { 3293 if (!page) {
3292 pr_err("find_or_create_page() failed\n"); 3294 btrfs_err(fs_info, "find_or_create_page() failed");
3293 ret = -ENOMEM; 3295 ret = -ENOMEM;
3294 goto out; 3296 goto out;
3295 } 3297 }
@@ -3361,7 +3363,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
3361 return -EIO; 3363 return -EIO;
3362 if (!dev->bdev) { 3364 if (!dev->bdev) {
3363 printk_ratelimited(KERN_WARNING 3365 printk_ratelimited(KERN_WARNING
3364 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); 3366 "BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
3365 return -EIO; 3367 return -EIO;
3366 } 3368 }
3367 bio = btrfs_io_bio_alloc(GFP_NOFS, 1); 3369 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
@@ -3371,8 +3373,8 @@ static int write_page_nocow(struct scrub_ctx *sctx,
3371 spin_unlock(&sctx->stat_lock); 3373 spin_unlock(&sctx->stat_lock);
3372 return -ENOMEM; 3374 return -ENOMEM;
3373 } 3375 }
3374 bio->bi_size = 0; 3376 bio->bi_iter.bi_size = 0;
3375 bio->bi_sector = physical_for_dev_replace >> 9; 3377 bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
3376 bio->bi_bdev = dev->bdev; 3378 bio->bi_bdev = dev->bdev;
3377 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); 3379 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
3378 if (ret != PAGE_CACHE_SIZE) { 3380 if (ret != PAGE_CACHE_SIZE) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 945d1db98f26..9dde9717c1b9 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -24,12 +24,12 @@
24#include <linux/xattr.h> 24#include <linux/xattr.h>
25#include <linux/posix_acl_xattr.h> 25#include <linux/posix_acl_xattr.h>
26#include <linux/radix-tree.h> 26#include <linux/radix-tree.h>
27#include <linux/crc32c.h>
28#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
29#include <linux/string.h> 28#include <linux/string.h>
30 29
31#include "send.h" 30#include "send.h"
32#include "backref.h" 31#include "backref.h"
32#include "hash.h"
33#include "locking.h" 33#include "locking.h"
34#include "disk-io.h" 34#include "disk-io.h"
35#include "btrfs_inode.h" 35#include "btrfs_inode.h"
@@ -88,8 +88,6 @@ struct send_ctx {
88 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 88 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
89 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 89 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
90 90
91 struct vfsmount *mnt;
92
93 struct btrfs_root *send_root; 91 struct btrfs_root *send_root;
94 struct btrfs_root *parent_root; 92 struct btrfs_root *parent_root;
95 struct clone_root *clone_roots; 93 struct clone_root *clone_roots;
@@ -111,6 +109,7 @@ struct send_ctx {
111 int cur_inode_deleted; 109 int cur_inode_deleted;
112 u64 cur_inode_size; 110 u64 cur_inode_size;
113 u64 cur_inode_mode; 111 u64 cur_inode_mode;
112 u64 cur_inode_last_extent;
114 113
115 u64 send_progress; 114 u64 send_progress;
116 115
@@ -122,6 +121,74 @@ struct send_ctx {
122 int name_cache_size; 121 int name_cache_size;
123 122
124 char *read_buf; 123 char *read_buf;
124
125 /*
126 * We process inodes by their increasing order, so if before an
127 * incremental send we reverse the parent/child relationship of
128 * directories such that a directory with a lower inode number was
129 * the parent of a directory with a higher inode number, and the one
130 * becoming the new parent got renamed too, we can't rename/move the
131 * directory with lower inode number when we finish processing it - we
132 * must process the directory with higher inode number first, then
133 * rename/move it and then rename/move the directory with lower inode
134 * number. Example follows.
135 *
136 * Tree state when the first send was performed:
137 *
138 * .
139 * |-- a (ino 257)
140 * |-- b (ino 258)
141 * |
142 * |
143 * |-- c (ino 259)
144 * | |-- d (ino 260)
145 * |
146 * |-- c2 (ino 261)
147 *
148 * Tree state when the second (incremental) send is performed:
149 *
150 * .
151 * |-- a (ino 257)
152 * |-- b (ino 258)
153 * |-- c2 (ino 261)
154 * |-- d2 (ino 260)
155 * |-- cc (ino 259)
156 *
157 * The sequence of steps that lead to the second state was:
158 *
159 * mv /a/b/c/d /a/b/c2/d2
160 * mv /a/b/c /a/b/c2/d2/cc
161 *
162 * "c" has lower inode number, but we can't move it (2nd mv operation)
163 * before we move "d", which has higher inode number.
164 *
165 * So we just memorize which move/rename operations must be performed
166 * later when their respective parent is processed and moved/renamed.
167 */
168
169 /* Indexed by parent directory inode number. */
170 struct rb_root pending_dir_moves;
171
172 /*
173 * Reverse index, indexed by the inode number of a directory that
174 * is waiting for the move/rename of its immediate parent before its
175 * own move/rename can be performed.
176 */
177 struct rb_root waiting_dir_moves;
178};
179
180struct pending_dir_move {
181 struct rb_node node;
182 struct list_head list;
183 u64 parent_ino;
184 u64 ino;
185 u64 gen;
186 struct list_head update_refs;
187};
188
189struct waiting_dir_move {
190 struct rb_node node;
191 u64 ino;
125}; 192};
126 193
127struct name_cache_entry { 194struct name_cache_entry {
@@ -145,6 +212,15 @@ struct name_cache_entry {
145 char name[]; 212 char name[];
146}; 213};
147 214
215static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
216
217static int need_send_hole(struct send_ctx *sctx)
218{
219 return (sctx->parent_root && !sctx->cur_inode_new &&
220 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
221 S_ISREG(sctx->cur_inode_mode));
222}
223
148static void fs_path_reset(struct fs_path *p) 224static void fs_path_reset(struct fs_path *p)
149{ 225{
150 if (p->reversed) { 226 if (p->reversed) {
@@ -336,16 +412,6 @@ out:
336 return ret; 412 return ret;
337} 413}
338 414
339#if 0
340static void fs_path_remove(struct fs_path *p)
341{
342 BUG_ON(p->reversed);
343 while (p->start != p->end && *p->end != '/')
344 p->end--;
345 *p->end = 0;
346}
347#endif
348
349static int fs_path_copy(struct fs_path *p, struct fs_path *from) 415static int fs_path_copy(struct fs_path *p, struct fs_path *from)
350{ 416{
351 int ret; 417 int ret;
@@ -436,30 +502,15 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
436 return 0; 502 return 0;
437} 503}
438 504
439#if 0 505#define TLV_PUT_DEFINE_INT(bits) \
440static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value) 506 static int tlv_put_u##bits(struct send_ctx *sctx, \
441{ 507 u##bits attr, u##bits value) \
442 return tlv_put(sctx, attr, &value, sizeof(value)); 508 { \
443} 509 __le##bits __tmp = cpu_to_le##bits(value); \
444 510 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
445static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value) 511 }
446{
447 __le16 tmp = cpu_to_le16(value);
448 return tlv_put(sctx, attr, &tmp, sizeof(tmp));
449}
450
451static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
452{
453 __le32 tmp = cpu_to_le32(value);
454 return tlv_put(sctx, attr, &tmp, sizeof(tmp));
455}
456#endif
457 512
458static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value) 513TLV_PUT_DEFINE_INT(64)
459{
460 __le64 tmp = cpu_to_le64(value);
461 return tlv_put(sctx, attr, &tmp, sizeof(tmp));
462}
463 514
464static int tlv_put_string(struct send_ctx *sctx, u16 attr, 515static int tlv_put_string(struct send_ctx *sctx, u16 attr,
465 const char *str, int len) 516 const char *str, int len)
@@ -475,17 +526,6 @@ static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
475 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 526 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
476} 527}
477 528
478#if 0
479static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
480 struct timespec *ts)
481{
482 struct btrfs_timespec bts;
483 bts.sec = cpu_to_le64(ts->tv_sec);
484 bts.nsec = cpu_to_le32(ts->tv_nsec);
485 return tlv_put(sctx, attr, &bts, sizeof(bts));
486}
487#endif
488
489static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 529static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
490 struct extent_buffer *eb, 530 struct extent_buffer *eb,
491 struct btrfs_timespec *ts) 531 struct btrfs_timespec *ts)
@@ -533,12 +573,6 @@ static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
533 if (ret < 0) \ 573 if (ret < 0) \
534 goto tlv_put_failure; \ 574 goto tlv_put_failure; \
535 } while (0) 575 } while (0)
536#define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
537 do { \
538 ret = tlv_put_timespec(sctx, attrtype, ts); \
539 if (ret < 0) \
540 goto tlv_put_failure; \
541 } while (0)
542#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 576#define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
543 do { \ 577 do { \
544 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 578 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
@@ -586,7 +620,7 @@ static int send_cmd(struct send_ctx *sctx)
586 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); 620 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
587 hdr->crc = 0; 621 hdr->crc = 0;
588 622
589 crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); 623 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
590 hdr->crc = cpu_to_le32(crc); 624 hdr->crc = cpu_to_le32(crc);
591 625
592 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, 626 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
@@ -1270,7 +1304,7 @@ static int find_extent_clone(struct send_ctx *sctx,
1270 if (!backref_ctx->found_itself) { 1304 if (!backref_ctx->found_itself) {
1271 /* found a bug in backref code? */ 1305 /* found a bug in backref code? */
1272 ret = -EIO; 1306 ret = -EIO;
1273 printk(KERN_ERR "btrfs: ERROR did not find backref in " 1307 btrfs_err(sctx->send_root->fs_info, "did not find backref in "
1274 "send_root. inode=%llu, offset=%llu, " 1308 "send_root. inode=%llu, offset=%llu, "
1275 "disk_byte=%llu found extent=%llu\n", 1309 "disk_byte=%llu found extent=%llu\n",
1276 ino, data_offset, disk_byte, found_key.objectid); 1310 ino, data_offset, disk_byte, found_key.objectid);
@@ -1298,6 +1332,16 @@ verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1298 } 1332 }
1299 1333
1300 if (cur_clone_root) { 1334 if (cur_clone_root) {
1335 if (compressed != BTRFS_COMPRESS_NONE) {
1336 /*
1337 * Offsets given by iterate_extent_inodes() are relative
1338 * to the start of the extent, we need to add logical
1339 * offset from the file extent item.
1340 * (See why at backref.c:check_extent_in_eb())
1341 */
1342 cur_clone_root->offset += btrfs_file_extent_offset(eb,
1343 fi);
1344 }
1301 *found = cur_clone_root; 1345 *found = cur_clone_root;
1302 ret = 0; 1346 ret = 0;
1303 } else { 1347 } else {
@@ -1343,7 +1387,7 @@ static int read_symlink(struct btrfs_root *root,
1343 BUG_ON(compression); 1387 BUG_ON(compression);
1344 1388
1345 off = btrfs_file_extent_inline_start(ei); 1389 off = btrfs_file_extent_inline_start(ei);
1346 len = btrfs_file_extent_inline_len(path->nodes[0], ei); 1390 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
1347 1391
1348 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1392 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1349 1393
@@ -1372,7 +1416,7 @@ static int gen_unique_name(struct send_ctx *sctx,
1372 return -ENOMEM; 1416 return -ENOMEM;
1373 1417
1374 while (1) { 1418 while (1) {
1375 len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu", 1419 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
1376 ino, gen, idx); 1420 ino, gen, idx);
1377 if (len >= sizeof(tmp)) { 1421 if (len >= sizeof(tmp)) {
1378 /* should really not happen */ 1422 /* should really not happen */
@@ -1933,6 +1977,7 @@ static void name_cache_free(struct send_ctx *sctx)
1933 */ 1977 */
1934static int __get_cur_name_and_parent(struct send_ctx *sctx, 1978static int __get_cur_name_and_parent(struct send_ctx *sctx,
1935 u64 ino, u64 gen, 1979 u64 ino, u64 gen,
1980 int skip_name_cache,
1936 u64 *parent_ino, 1981 u64 *parent_ino,
1937 u64 *parent_gen, 1982 u64 *parent_gen,
1938 struct fs_path *dest) 1983 struct fs_path *dest)
@@ -1942,6 +1987,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
1942 struct btrfs_path *path = NULL; 1987 struct btrfs_path *path = NULL;
1943 struct name_cache_entry *nce = NULL; 1988 struct name_cache_entry *nce = NULL;
1944 1989
1990 if (skip_name_cache)
1991 goto get_ref;
1945 /* 1992 /*
1946 * First check if we already did a call to this function with the same 1993 * First check if we already did a call to this function with the same
1947 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 1994 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
@@ -1986,11 +2033,12 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
1986 goto out_cache; 2033 goto out_cache;
1987 } 2034 }
1988 2035
2036get_ref:
1989 /* 2037 /*
1990 * Depending on whether the inode was already processed or not, use 2038 * Depending on whether the inode was already processed or not, use
1991 * send_root or parent_root for ref lookup. 2039 * send_root or parent_root for ref lookup.
1992 */ 2040 */
1993 if (ino < sctx->send_progress) 2041 if (ino < sctx->send_progress && !skip_name_cache)
1994 ret = get_first_ref(sctx->send_root, ino, 2042 ret = get_first_ref(sctx->send_root, ino,
1995 parent_ino, parent_gen, dest); 2043 parent_ino, parent_gen, dest);
1996 else 2044 else
@@ -2014,6 +2062,8 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
2014 goto out; 2062 goto out;
2015 ret = 1; 2063 ret = 1;
2016 } 2064 }
2065 if (skip_name_cache)
2066 goto out;
2017 2067
2018out_cache: 2068out_cache:
2019 /* 2069 /*
@@ -2081,6 +2131,9 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2081 u64 parent_inode = 0; 2131 u64 parent_inode = 0;
2082 u64 parent_gen = 0; 2132 u64 parent_gen = 0;
2083 int stop = 0; 2133 int stop = 0;
2134 u64 start_ino = ino;
2135 u64 start_gen = gen;
2136 int skip_name_cache = 0;
2084 2137
2085 name = fs_path_alloc(); 2138 name = fs_path_alloc();
2086 if (!name) { 2139 if (!name) {
@@ -2088,19 +2141,32 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2088 goto out; 2141 goto out;
2089 } 2142 }
2090 2143
2144 if (is_waiting_for_move(sctx, ino))
2145 skip_name_cache = 1;
2146
2147again:
2091 dest->reversed = 1; 2148 dest->reversed = 1;
2092 fs_path_reset(dest); 2149 fs_path_reset(dest);
2093 2150
2094 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2151 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2095 fs_path_reset(name); 2152 fs_path_reset(name);
2096 2153
2097 ret = __get_cur_name_and_parent(sctx, ino, gen, 2154 ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache,
2098 &parent_inode, &parent_gen, name); 2155 &parent_inode, &parent_gen, name);
2099 if (ret < 0) 2156 if (ret < 0)
2100 goto out; 2157 goto out;
2101 if (ret) 2158 if (ret)
2102 stop = 1; 2159 stop = 1;
2103 2160
2161 if (!skip_name_cache &&
2162 is_waiting_for_move(sctx, parent_inode)) {
2163 ino = start_ino;
2164 gen = start_gen;
2165 stop = 0;
2166 skip_name_cache = 1;
2167 goto again;
2168 }
2169
2104 ret = fs_path_add_path(dest, name); 2170 ret = fs_path_add_path(dest, name);
2105 if (ret < 0) 2171 if (ret < 0)
2106 goto out; 2172 goto out;
@@ -2131,7 +2197,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
2131 char *name = NULL; 2197 char *name = NULL;
2132 int namelen; 2198 int namelen;
2133 2199
2134 path = alloc_path_for_send(); 2200 path = btrfs_alloc_path();
2135 if (!path) 2201 if (!path)
2136 return -ENOMEM; 2202 return -ENOMEM;
2137 2203
@@ -2180,12 +2246,12 @@ static int send_subvol_begin(struct send_ctx *sctx)
2180 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2246 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2181 sctx->send_root->root_item.uuid); 2247 sctx->send_root->root_item.uuid);
2182 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2248 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2183 sctx->send_root->root_item.ctransid); 2249 le64_to_cpu(sctx->send_root->root_item.ctransid));
2184 if (parent_root) { 2250 if (parent_root) {
2185 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2251 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2186 sctx->parent_root->root_item.uuid); 2252 sctx->parent_root->root_item.uuid);
2187 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2253 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2188 sctx->parent_root->root_item.ctransid); 2254 le64_to_cpu(sctx->parent_root->root_item.ctransid));
2189 } 2255 }
2190 2256
2191 ret = send_cmd(sctx); 2257 ret = send_cmd(sctx);
@@ -2672,10 +2738,347 @@ out:
2672 return ret; 2738 return ret;
2673} 2739}
2674 2740
2741static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
2742{
2743 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2744 struct waiting_dir_move *entry;
2745
2746 while (n) {
2747 entry = rb_entry(n, struct waiting_dir_move, node);
2748 if (ino < entry->ino)
2749 n = n->rb_left;
2750 else if (ino > entry->ino)
2751 n = n->rb_right;
2752 else
2753 return 1;
2754 }
2755 return 0;
2756}
2757
2758static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2759{
2760 struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
2761 struct rb_node *parent = NULL;
2762 struct waiting_dir_move *entry, *dm;
2763
2764 dm = kmalloc(sizeof(*dm), GFP_NOFS);
2765 if (!dm)
2766 return -ENOMEM;
2767 dm->ino = ino;
2768
2769 while (*p) {
2770 parent = *p;
2771 entry = rb_entry(parent, struct waiting_dir_move, node);
2772 if (ino < entry->ino) {
2773 p = &(*p)->rb_left;
2774 } else if (ino > entry->ino) {
2775 p = &(*p)->rb_right;
2776 } else {
2777 kfree(dm);
2778 return -EEXIST;
2779 }
2780 }
2781
2782 rb_link_node(&dm->node, parent, p);
2783 rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
2784 return 0;
2785}
2786
2787static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino)
2788{
2789 struct rb_node *n = sctx->waiting_dir_moves.rb_node;
2790 struct waiting_dir_move *entry;
2791
2792 while (n) {
2793 entry = rb_entry(n, struct waiting_dir_move, node);
2794 if (ino < entry->ino) {
2795 n = n->rb_left;
2796 } else if (ino > entry->ino) {
2797 n = n->rb_right;
2798 } else {
2799 rb_erase(&entry->node, &sctx->waiting_dir_moves);
2800 kfree(entry);
2801 return 0;
2802 }
2803 }
2804 return -ENOENT;
2805}
2806
2807static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
2808{
2809 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2810 struct rb_node *parent = NULL;
2811 struct pending_dir_move *entry, *pm;
2812 struct recorded_ref *cur;
2813 int exists = 0;
2814 int ret;
2815
2816 pm = kmalloc(sizeof(*pm), GFP_NOFS);
2817 if (!pm)
2818 return -ENOMEM;
2819 pm->parent_ino = parent_ino;
2820 pm->ino = sctx->cur_ino;
2821 pm->gen = sctx->cur_inode_gen;
2822 INIT_LIST_HEAD(&pm->list);
2823 INIT_LIST_HEAD(&pm->update_refs);
2824 RB_CLEAR_NODE(&pm->node);
2825
2826 while (*p) {
2827 parent = *p;
2828 entry = rb_entry(parent, struct pending_dir_move, node);
2829 if (parent_ino < entry->parent_ino) {
2830 p = &(*p)->rb_left;
2831 } else if (parent_ino > entry->parent_ino) {
2832 p = &(*p)->rb_right;
2833 } else {
2834 exists = 1;
2835 break;
2836 }
2837 }
2838
2839 list_for_each_entry(cur, &sctx->deleted_refs, list) {
2840 ret = dup_ref(cur, &pm->update_refs);
2841 if (ret < 0)
2842 goto out;
2843 }
2844 list_for_each_entry(cur, &sctx->new_refs, list) {
2845 ret = dup_ref(cur, &pm->update_refs);
2846 if (ret < 0)
2847 goto out;
2848 }
2849
2850 ret = add_waiting_dir_move(sctx, pm->ino);
2851 if (ret)
2852 goto out;
2853
2854 if (exists) {
2855 list_add_tail(&pm->list, &entry->list);
2856 } else {
2857 rb_link_node(&pm->node, parent, p);
2858 rb_insert_color(&pm->node, &sctx->pending_dir_moves);
2859 }
2860 ret = 0;
2861out:
2862 if (ret) {
2863 __free_recorded_refs(&pm->update_refs);
2864 kfree(pm);
2865 }
2866 return ret;
2867}
2868
2869static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
2870 u64 parent_ino)
2871{
2872 struct rb_node *n = sctx->pending_dir_moves.rb_node;
2873 struct pending_dir_move *entry;
2874
2875 while (n) {
2876 entry = rb_entry(n, struct pending_dir_move, node);
2877 if (parent_ino < entry->parent_ino)
2878 n = n->rb_left;
2879 else if (parent_ino > entry->parent_ino)
2880 n = n->rb_right;
2881 else
2882 return entry;
2883 }
2884 return NULL;
2885}
2886
2887static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
2888{
2889 struct fs_path *from_path = NULL;
2890 struct fs_path *to_path = NULL;
2891 u64 orig_progress = sctx->send_progress;
2892 struct recorded_ref *cur;
2893 int ret;
2894
2895 from_path = fs_path_alloc();
2896 if (!from_path)
2897 return -ENOMEM;
2898
2899 sctx->send_progress = pm->ino;
2900 ret = get_cur_path(sctx, pm->ino, pm->gen, from_path);
2901 if (ret < 0)
2902 goto out;
2903
2904 to_path = fs_path_alloc();
2905 if (!to_path) {
2906 ret = -ENOMEM;
2907 goto out;
2908 }
2909
2910 sctx->send_progress = sctx->cur_ino + 1;
2911 ret = del_waiting_dir_move(sctx, pm->ino);
2912 ASSERT(ret == 0);
2913
2914 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
2915 if (ret < 0)
2916 goto out;
2917
2918 ret = send_rename(sctx, from_path, to_path);
2919 if (ret < 0)
2920 goto out;
2921
2922 ret = send_utimes(sctx, pm->ino, pm->gen);
2923 if (ret < 0)
2924 goto out;
2925
2926 /*
2927 * After rename/move, need to update the utimes of both new parent(s)
2928 * and old parent(s).
2929 */
2930 list_for_each_entry(cur, &pm->update_refs, list) {
2931 ret = send_utimes(sctx, cur->dir, cur->dir_gen);
2932 if (ret < 0)
2933 goto out;
2934 }
2935
2936out:
2937 fs_path_free(from_path);
2938 fs_path_free(to_path);
2939 sctx->send_progress = orig_progress;
2940
2941 return ret;
2942}
2943
2944static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
2945{
2946 if (!list_empty(&m->list))
2947 list_del(&m->list);
2948 if (!RB_EMPTY_NODE(&m->node))
2949 rb_erase(&m->node, &sctx->pending_dir_moves);
2950 __free_recorded_refs(&m->update_refs);
2951 kfree(m);
2952}
2953
2954static void tail_append_pending_moves(struct pending_dir_move *moves,
2955 struct list_head *stack)
2956{
2957 if (list_empty(&moves->list)) {
2958 list_add_tail(&moves->list, stack);
2959 } else {
2960 LIST_HEAD(list);
2961 list_splice_init(&moves->list, &list);
2962 list_add_tail(&moves->list, stack);
2963 list_splice_tail(&list, stack);
2964 }
2965}
2966
2967static int apply_children_dir_moves(struct send_ctx *sctx)
2968{
2969 struct pending_dir_move *pm;
2970 struct list_head stack;
2971 u64 parent_ino = sctx->cur_ino;
2972 int ret = 0;
2973
2974 pm = get_pending_dir_moves(sctx, parent_ino);
2975 if (!pm)
2976 return 0;
2977
2978 INIT_LIST_HEAD(&stack);
2979 tail_append_pending_moves(pm, &stack);
2980
2981 while (!list_empty(&stack)) {
2982 pm = list_first_entry(&stack, struct pending_dir_move, list);
2983 parent_ino = pm->ino;
2984 ret = apply_dir_move(sctx, pm);
2985 free_pending_move(sctx, pm);
2986 if (ret)
2987 goto out;
2988 pm = get_pending_dir_moves(sctx, parent_ino);
2989 if (pm)
2990 tail_append_pending_moves(pm, &stack);
2991 }
2992 return 0;
2993
2994out:
2995 while (!list_empty(&stack)) {
2996 pm = list_first_entry(&stack, struct pending_dir_move, list);
2997 free_pending_move(sctx, pm);
2998 }
2999 return ret;
3000}
3001
3002static int wait_for_parent_move(struct send_ctx *sctx,
3003 struct recorded_ref *parent_ref)
3004{
3005 int ret;
3006 u64 ino = parent_ref->dir;
3007 u64 parent_ino_before, parent_ino_after;
3008 u64 new_gen, old_gen;
3009 struct fs_path *path_before = NULL;
3010 struct fs_path *path_after = NULL;
3011 int len1, len2;
3012
3013 if (parent_ref->dir <= sctx->cur_ino)
3014 return 0;
3015
3016 if (is_waiting_for_move(sctx, ino))
3017 return 1;
3018
3019 ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen,
3020 NULL, NULL, NULL, NULL);
3021 if (ret == -ENOENT)
3022 return 0;
3023 else if (ret < 0)
3024 return ret;
3025
3026 ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen,
3027 NULL, NULL, NULL, NULL);
3028 if (ret < 0)
3029 return ret;
3030
3031 if (new_gen != old_gen)
3032 return 0;
3033
3034 path_before = fs_path_alloc();
3035 if (!path_before)
3036 return -ENOMEM;
3037
3038 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
3039 NULL, path_before);
3040 if (ret == -ENOENT) {
3041 ret = 0;
3042 goto out;
3043 } else if (ret < 0) {
3044 goto out;
3045 }
3046
3047 path_after = fs_path_alloc();
3048 if (!path_after) {
3049 ret = -ENOMEM;
3050 goto out;
3051 }
3052
3053 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
3054 NULL, path_after);
3055 if (ret == -ENOENT) {
3056 ret = 0;
3057 goto out;
3058 } else if (ret < 0) {
3059 goto out;
3060 }
3061
3062 len1 = fs_path_len(path_before);
3063 len2 = fs_path_len(path_after);
3064 if ((parent_ino_before != parent_ino_after) && (len1 != len2 ||
3065 memcmp(path_before->start, path_after->start, len1))) {
3066 ret = 1;
3067 goto out;
3068 }
3069 ret = 0;
3070
3071out:
3072 fs_path_free(path_before);
3073 fs_path_free(path_after);
3074
3075 return ret;
3076}
3077
2675/* 3078/*
2676 * This does all the move/link/unlink/rmdir magic. 3079 * This does all the move/link/unlink/rmdir magic.
2677 */ 3080 */
2678static int process_recorded_refs(struct send_ctx *sctx) 3081static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
2679{ 3082{
2680 int ret = 0; 3083 int ret = 0;
2681 struct recorded_ref *cur; 3084 struct recorded_ref *cur;
@@ -2824,11 +3227,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2824 * dirs, we always have one new and one deleted 3227 * dirs, we always have one new and one deleted
2825 * ref. The deleted ref is ignored later. 3228 * ref. The deleted ref is ignored later.
2826 */ 3229 */
2827 ret = send_rename(sctx, valid_path, 3230 if (wait_for_parent_move(sctx, cur)) {
2828 cur->full_path); 3231 ret = add_pending_dir_move(sctx,
2829 if (ret < 0) 3232 cur->dir);
2830 goto out; 3233 *pending_move = 1;
2831 ret = fs_path_copy(valid_path, cur->full_path); 3234 } else {
3235 ret = send_rename(sctx, valid_path,
3236 cur->full_path);
3237 if (!ret)
3238 ret = fs_path_copy(valid_path,
3239 cur->full_path);
3240 }
2832 if (ret < 0) 3241 if (ret < 0)
2833 goto out; 3242 goto out;
2834 } else { 3243 } else {
@@ -3197,6 +3606,7 @@ static int process_all_refs(struct send_ctx *sctx,
3197 struct extent_buffer *eb; 3606 struct extent_buffer *eb;
3198 int slot; 3607 int slot;
3199 iterate_inode_ref_t cb; 3608 iterate_inode_ref_t cb;
3609 int pending_move = 0;
3200 3610
3201 path = alloc_path_for_send(); 3611 path = alloc_path_for_send();
3202 if (!path) 3612 if (!path)
@@ -3240,7 +3650,9 @@ static int process_all_refs(struct send_ctx *sctx,
3240 } 3650 }
3241 btrfs_release_path(path); 3651 btrfs_release_path(path);
3242 3652
3243 ret = process_recorded_refs(sctx); 3653 ret = process_recorded_refs(sctx, &pending_move);
3654 /* Only applicable to an incremental send. */
3655 ASSERT(pending_move == 0);
3244 3656
3245out: 3657out:
3246 btrfs_free_path(path); 3658 btrfs_free_path(path);
@@ -3706,7 +4118,7 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
3706 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4118 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
3707 clone_root->root->root_item.uuid); 4119 clone_root->root->root_item.uuid);
3708 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4120 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
3709 clone_root->root->root_item.ctransid); 4121 le64_to_cpu(clone_root->root->root_item.ctransid));
3710 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4122 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
3711 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4123 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
3712 clone_root->offset); 4124 clone_root->offset);
@@ -3752,6 +4164,39 @@ out:
3752 return ret; 4164 return ret;
3753} 4165}
3754 4166
4167static int send_hole(struct send_ctx *sctx, u64 end)
4168{
4169 struct fs_path *p = NULL;
4170 u64 offset = sctx->cur_inode_last_extent;
4171 u64 len;
4172 int ret = 0;
4173
4174 p = fs_path_alloc();
4175 if (!p)
4176 return -ENOMEM;
4177 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
4178 while (offset < end) {
4179 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
4180
4181 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
4182 if (ret < 0)
4183 break;
4184 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
4185 if (ret < 0)
4186 break;
4187 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
4188 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
4189 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len);
4190 ret = send_cmd(sctx);
4191 if (ret < 0)
4192 break;
4193 offset += len;
4194 }
4195tlv_put_failure:
4196 fs_path_free(p);
4197 return ret;
4198}
4199
3755static int send_write_or_clone(struct send_ctx *sctx, 4200static int send_write_or_clone(struct send_ctx *sctx,
3756 struct btrfs_path *path, 4201 struct btrfs_path *path,
3757 struct btrfs_key *key, 4202 struct btrfs_key *key,
@@ -3764,12 +4209,14 @@ static int send_write_or_clone(struct send_ctx *sctx,
3764 u64 len; 4209 u64 len;
3765 u32 l; 4210 u32 l;
3766 u8 type; 4211 u8 type;
4212 u64 bs = sctx->send_root->fs_info->sb->s_blocksize;
3767 4213
3768 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4214 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3769 struct btrfs_file_extent_item); 4215 struct btrfs_file_extent_item);
3770 type = btrfs_file_extent_type(path->nodes[0], ei); 4216 type = btrfs_file_extent_type(path->nodes[0], ei);
3771 if (type == BTRFS_FILE_EXTENT_INLINE) { 4217 if (type == BTRFS_FILE_EXTENT_INLINE) {
3772 len = btrfs_file_extent_inline_len(path->nodes[0], ei); 4218 len = btrfs_file_extent_inline_len(path->nodes[0],
4219 path->slots[0], ei);
3773 /* 4220 /*
3774 * it is possible the inline item won't cover the whole page, 4221 * it is possible the inline item won't cover the whole page,
3775 * but there may be items after this page. Make 4222 * but there may be items after this page. Make
@@ -3787,7 +4234,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
3787 goto out; 4234 goto out;
3788 } 4235 }
3789 4236
3790 if (clone_root) { 4237 if (clone_root && IS_ALIGNED(offset + len, bs)) {
3791 ret = send_clone(sctx, offset, len, clone_root); 4238 ret = send_clone(sctx, offset, len, clone_root);
3792 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { 4239 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
3793 ret = send_update_extent(sctx, offset, len); 4240 ret = send_update_extent(sctx, offset, len);
@@ -3979,6 +4426,101 @@ out:
3979 return ret; 4426 return ret;
3980} 4427}
3981 4428
4429static int get_last_extent(struct send_ctx *sctx, u64 offset)
4430{
4431 struct btrfs_path *path;
4432 struct btrfs_root *root = sctx->send_root;
4433 struct btrfs_file_extent_item *fi;
4434 struct btrfs_key key;
4435 u64 extent_end;
4436 u8 type;
4437 int ret;
4438
4439 path = alloc_path_for_send();
4440 if (!path)
4441 return -ENOMEM;
4442
4443 sctx->cur_inode_last_extent = 0;
4444
4445 key.objectid = sctx->cur_ino;
4446 key.type = BTRFS_EXTENT_DATA_KEY;
4447 key.offset = offset;
4448 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
4449 if (ret < 0)
4450 goto out;
4451 ret = 0;
4452 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4453 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
4454 goto out;
4455
4456 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4457 struct btrfs_file_extent_item);
4458 type = btrfs_file_extent_type(path->nodes[0], fi);
4459 if (type == BTRFS_FILE_EXTENT_INLINE) {
4460 u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4461 path->slots[0], fi);
4462 extent_end = ALIGN(key.offset + size,
4463 sctx->send_root->sectorsize);
4464 } else {
4465 extent_end = key.offset +
4466 btrfs_file_extent_num_bytes(path->nodes[0], fi);
4467 }
4468 sctx->cur_inode_last_extent = extent_end;
4469out:
4470 btrfs_free_path(path);
4471 return ret;
4472}
4473
4474static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
4475 struct btrfs_key *key)
4476{
4477 struct btrfs_file_extent_item *fi;
4478 u64 extent_end;
4479 u8 type;
4480 int ret = 0;
4481
4482 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
4483 return 0;
4484
4485 if (sctx->cur_inode_last_extent == (u64)-1) {
4486 ret = get_last_extent(sctx, key->offset - 1);
4487 if (ret)
4488 return ret;
4489 }
4490
4491 fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
4492 struct btrfs_file_extent_item);
4493 type = btrfs_file_extent_type(path->nodes[0], fi);
4494 if (type == BTRFS_FILE_EXTENT_INLINE) {
4495 u64 size = btrfs_file_extent_inline_len(path->nodes[0],
4496 path->slots[0], fi);
4497 extent_end = ALIGN(key->offset + size,
4498 sctx->send_root->sectorsize);
4499 } else {
4500 extent_end = key->offset +
4501 btrfs_file_extent_num_bytes(path->nodes[0], fi);
4502 }
4503
4504 if (path->slots[0] == 0 &&
4505 sctx->cur_inode_last_extent < key->offset) {
4506 /*
4507 * We might have skipped entire leafs that contained only
4508 * file extent items for our current inode. These leafs have
4509 * a generation number smaller (older) than the one in the
4510 * current leaf and the leaf our last extent came from, and
4511 * are located between these 2 leafs.
4512 */
4513 ret = get_last_extent(sctx, key->offset - 1);
4514 if (ret)
4515 return ret;
4516 }
4517
4518 if (sctx->cur_inode_last_extent < key->offset)
4519 ret = send_hole(sctx, key->offset);
4520 sctx->cur_inode_last_extent = extent_end;
4521 return ret;
4522}
4523
3982static int process_extent(struct send_ctx *sctx, 4524static int process_extent(struct send_ctx *sctx,
3983 struct btrfs_path *path, 4525 struct btrfs_path *path,
3984 struct btrfs_key *key) 4526 struct btrfs_key *key)
@@ -3995,7 +4537,7 @@ static int process_extent(struct send_ctx *sctx,
3995 goto out; 4537 goto out;
3996 if (ret) { 4538 if (ret) {
3997 ret = 0; 4539 ret = 0;
3998 goto out; 4540 goto out_hole;
3999 } 4541 }
4000 } else { 4542 } else {
4001 struct btrfs_file_extent_item *ei; 4543 struct btrfs_file_extent_item *ei;
@@ -4031,7 +4573,10 @@ static int process_extent(struct send_ctx *sctx,
4031 goto out; 4573 goto out;
4032 4574
4033 ret = send_write_or_clone(sctx, path, key, found_clone); 4575 ret = send_write_or_clone(sctx, path, key, found_clone);
4034 4576 if (ret)
4577 goto out;
4578out_hole:
4579 ret = maybe_send_hole(sctx, path, key);
4035out: 4580out:
4036 return ret; 4581 return ret;
4037} 4582}
@@ -4054,17 +4599,25 @@ static int process_all_extents(struct send_ctx *sctx)
4054 key.objectid = sctx->cmp_key->objectid; 4599 key.objectid = sctx->cmp_key->objectid;
4055 key.type = BTRFS_EXTENT_DATA_KEY; 4600 key.type = BTRFS_EXTENT_DATA_KEY;
4056 key.offset = 0; 4601 key.offset = 0;
4057 while (1) { 4602 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4058 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 4603 if (ret < 0)
4059 if (ret < 0) 4604 goto out;
4060 goto out;
4061 if (ret) {
4062 ret = 0;
4063 goto out;
4064 }
4065 4605
4606 while (1) {
4066 eb = path->nodes[0]; 4607 eb = path->nodes[0];
4067 slot = path->slots[0]; 4608 slot = path->slots[0];
4609
4610 if (slot >= btrfs_header_nritems(eb)) {
4611 ret = btrfs_next_leaf(root, path);
4612 if (ret < 0) {
4613 goto out;
4614 } else if (ret > 0) {
4615 ret = 0;
4616 break;
4617 }
4618 continue;
4619 }
4620
4068 btrfs_item_key_to_cpu(eb, &found_key, slot); 4621 btrfs_item_key_to_cpu(eb, &found_key, slot);
4069 4622
4070 if (found_key.objectid != key.objectid || 4623 if (found_key.objectid != key.objectid ||
@@ -4077,8 +4630,7 @@ static int process_all_extents(struct send_ctx *sctx)
4077 if (ret < 0) 4630 if (ret < 0)
4078 goto out; 4631 goto out;
4079 4632
4080 btrfs_release_path(path); 4633 path->slots[0]++;
4081 key.offset = found_key.offset + 1;
4082 } 4634 }
4083 4635
4084out: 4636out:
@@ -4086,7 +4638,9 @@ out:
4086 return ret; 4638 return ret;
4087} 4639}
4088 4640
4089static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) 4641static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
4642 int *pending_move,
4643 int *refs_processed)
4090{ 4644{
4091 int ret = 0; 4645 int ret = 0;
4092 4646
@@ -4098,17 +4652,11 @@ static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
4098 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 4652 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
4099 goto out; 4653 goto out;
4100 4654
4101 ret = process_recorded_refs(sctx); 4655 ret = process_recorded_refs(sctx, pending_move);
4102 if (ret < 0) 4656 if (ret < 0)
4103 goto out; 4657 goto out;
4104 4658
4105 /* 4659 *refs_processed = 1;
4106 * We have processed the refs and thus need to advance send_progress.
4107 * Now, calls to get_cur_xxx will take the updated refs of the current
4108 * inode into account.
4109 */
4110 sctx->send_progress = sctx->cur_ino + 1;
4111
4112out: 4660out:
4113 return ret; 4661 return ret;
4114} 4662}
@@ -4124,11 +4672,29 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4124 u64 right_gid; 4672 u64 right_gid;
4125 int need_chmod = 0; 4673 int need_chmod = 0;
4126 int need_chown = 0; 4674 int need_chown = 0;
4675 int pending_move = 0;
4676 int refs_processed = 0;
4127 4677
4128 ret = process_recorded_refs_if_needed(sctx, at_end); 4678 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
4679 &refs_processed);
4129 if (ret < 0) 4680 if (ret < 0)
4130 goto out; 4681 goto out;
4131 4682
4683 /*
4684 * We have processed the refs and thus need to advance send_progress.
4685 * Now, calls to get_cur_xxx will take the updated refs of the current
4686 * inode into account.
4687 *
4688 * On the other hand, if our current inode is a directory and couldn't
4689 * be moved/renamed because its parent was renamed/moved too and it has
4690 * a higher inode number, we can only move/rename our current inode
4691 * after we moved/renamed its parent. Therefore in this case operate on
4692 * the old path (pre move/rename) of our current inode, and the
4693 * move/rename will be performed later.
4694 */
4695 if (refs_processed && !pending_move)
4696 sctx->send_progress = sctx->cur_ino + 1;
4697
4132 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 4698 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4133 goto out; 4699 goto out;
4134 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 4700 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
@@ -4157,6 +4723,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4157 } 4723 }
4158 4724
4159 if (S_ISREG(sctx->cur_inode_mode)) { 4725 if (S_ISREG(sctx->cur_inode_mode)) {
4726 if (need_send_hole(sctx)) {
4727 if (sctx->cur_inode_last_extent == (u64)-1) {
4728 ret = get_last_extent(sctx, (u64)-1);
4729 if (ret)
4730 goto out;
4731 }
4732 if (sctx->cur_inode_last_extent <
4733 sctx->cur_inode_size) {
4734 ret = send_hole(sctx, sctx->cur_inode_size);
4735 if (ret)
4736 goto out;
4737 }
4738 }
4160 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 4739 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4161 sctx->cur_inode_size); 4740 sctx->cur_inode_size);
4162 if (ret < 0) 4741 if (ret < 0)
@@ -4177,9 +4756,21 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4177 } 4756 }
4178 4757
4179 /* 4758 /*
4180 * Need to send that every time, no matter if it actually changed 4759 * If other directory inodes depended on our current directory
4181 * between the two trees as we have done changes to the inode before. 4760 * inode's move/rename, now do their move/rename operations.
4761 */
4762 if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
4763 ret = apply_children_dir_moves(sctx);
4764 if (ret)
4765 goto out;
4766 }
4767
4768 /*
4769 * Need to send that every time, no matter if it actually
4770 * changed between the two trees as we have done changes to
4771 * the inode before.
4182 */ 4772 */
4773 sctx->send_progress = sctx->cur_ino + 1;
4183 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 4774 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4184 if (ret < 0) 4775 if (ret < 0)
4185 goto out; 4776 goto out;
@@ -4200,6 +4791,7 @@ static int changed_inode(struct send_ctx *sctx,
4200 4791
4201 sctx->cur_ino = key->objectid; 4792 sctx->cur_ino = key->objectid;
4202 sctx->cur_inode_new_gen = 0; 4793 sctx->cur_inode_new_gen = 0;
4794 sctx->cur_inode_last_extent = (u64)-1;
4203 4795
4204 /* 4796 /*
4205 * Set send_progress to current inode. This will tell all get_cur_xxx 4797 * Set send_progress to current inode. This will tell all get_cur_xxx
@@ -4480,14 +5072,18 @@ static int changed_cb(struct btrfs_root *left_root,
4480 struct send_ctx *sctx = ctx; 5072 struct send_ctx *sctx = ctx;
4481 5073
4482 if (result == BTRFS_COMPARE_TREE_SAME) { 5074 if (result == BTRFS_COMPARE_TREE_SAME) {
4483 if (key->type != BTRFS_INODE_REF_KEY && 5075 if (key->type == BTRFS_INODE_REF_KEY ||
4484 key->type != BTRFS_INODE_EXTREF_KEY) 5076 key->type == BTRFS_INODE_EXTREF_KEY) {
4485 return 0; 5077 ret = compare_refs(sctx, left_path, key);
4486 ret = compare_refs(sctx, left_path, key); 5078 if (!ret)
4487 if (!ret) 5079 return 0;
5080 if (ret < 0)
5081 return ret;
5082 } else if (key->type == BTRFS_EXTENT_DATA_KEY) {
5083 return maybe_send_hole(sctx, left_path, key);
5084 } else {
4488 return 0; 5085 return 0;
4489 if (ret < 0) 5086 }
4490 return ret;
4491 result = BTRFS_COMPARE_TREE_CHANGED; 5087 result = BTRFS_COMPARE_TREE_CHANGED;
4492 ret = 0; 5088 ret = 0;
4493 } 5089 }
@@ -4522,7 +5118,6 @@ out:
4522static int full_send_tree(struct send_ctx *sctx) 5118static int full_send_tree(struct send_ctx *sctx)
4523{ 5119{
4524 int ret; 5120 int ret;
4525 struct btrfs_trans_handle *trans = NULL;
4526 struct btrfs_root *send_root = sctx->send_root; 5121 struct btrfs_root *send_root = sctx->send_root;
4527 struct btrfs_key key; 5122 struct btrfs_key key;
4528 struct btrfs_key found_key; 5123 struct btrfs_key found_key;
@@ -4544,19 +5139,6 @@ static int full_send_tree(struct send_ctx *sctx)
4544 key.type = BTRFS_INODE_ITEM_KEY; 5139 key.type = BTRFS_INODE_ITEM_KEY;
4545 key.offset = 0; 5140 key.offset = 0;
4546 5141
4547join_trans:
4548 /*
4549 * We need to make sure the transaction does not get committed
4550 * while we do anything on commit roots. Join a transaction to prevent
4551 * this.
4552 */
4553 trans = btrfs_join_transaction(send_root);
4554 if (IS_ERR(trans)) {
4555 ret = PTR_ERR(trans);
4556 trans = NULL;
4557 goto out;
4558 }
4559
4560 /* 5142 /*
4561 * Make sure the tree has not changed after re-joining. We detect this 5143 * Make sure the tree has not changed after re-joining. We detect this
4562 * by comparing start_ctransid and ctransid. They should always match. 5144 * by comparing start_ctransid and ctransid. They should always match.
@@ -4566,7 +5148,7 @@ join_trans:
4566 spin_unlock(&send_root->root_item_lock); 5148 spin_unlock(&send_root->root_item_lock);
4567 5149
4568 if (ctransid != start_ctransid) { 5150 if (ctransid != start_ctransid) {
4569 WARN(1, KERN_WARNING "btrfs: the root that you're trying to " 5151 WARN(1, KERN_WARNING "BTRFS: the root that you're trying to "
4570 "send was modified in between. This is " 5152 "send was modified in between. This is "
4571 "probably a bug.\n"); 5153 "probably a bug.\n");
4572 ret = -EIO; 5154 ret = -EIO;
@@ -4580,19 +5162,6 @@ join_trans:
4580 goto out_finish; 5162 goto out_finish;
4581 5163
4582 while (1) { 5164 while (1) {
4583 /*
4584 * When someone want to commit while we iterate, end the
4585 * joined transaction and rejoin.
4586 */
4587 if (btrfs_should_end_transaction(trans, send_root)) {
4588 ret = btrfs_end_transaction(trans, send_root);
4589 trans = NULL;
4590 if (ret < 0)
4591 goto out;
4592 btrfs_release_path(path);
4593 goto join_trans;
4594 }
4595
4596 eb = path->nodes[0]; 5165 eb = path->nodes[0];
4597 slot = path->slots[0]; 5166 slot = path->slots[0];
4598 btrfs_item_key_to_cpu(eb, &found_key, slot); 5167 btrfs_item_key_to_cpu(eb, &found_key, slot);
@@ -4620,12 +5189,6 @@ out_finish:
4620 5189
4621out: 5190out:
4622 btrfs_free_path(path); 5191 btrfs_free_path(path);
4623 if (trans) {
4624 if (!ret)
4625 ret = btrfs_end_transaction(trans, send_root);
4626 else
4627 btrfs_end_transaction(trans, send_root);
4628 }
4629 return ret; 5192 return ret;
4630} 5193}
4631 5194
@@ -4662,6 +5225,21 @@ out:
4662 return ret; 5225 return ret;
4663} 5226}
4664 5227
5228static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
5229{
5230 spin_lock(&root->root_item_lock);
5231 root->send_in_progress--;
5232 /*
5233 * Not much left to do, we don't know why it's unbalanced and
5234 * can't blindly reset it to 0.
5235 */
5236 if (root->send_in_progress < 0)
5237 btrfs_err(root->fs_info,
5238 "send_in_progres unbalanced %d root %llu\n",
5239 root->send_in_progress, root->root_key.objectid);
5240 spin_unlock(&root->root_item_lock);
5241}
5242
4665long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 5243long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4666{ 5244{
4667 int ret = 0; 5245 int ret = 0;
@@ -4673,6 +5251,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4673 struct send_ctx *sctx = NULL; 5251 struct send_ctx *sctx = NULL;
4674 u32 i; 5252 u32 i;
4675 u64 *clone_sources_tmp = NULL; 5253 u64 *clone_sources_tmp = NULL;
5254 int clone_sources_to_rollback = 0;
5255 int sort_clone_roots = 0;
5256 int index;
4676 5257
4677 if (!capable(CAP_SYS_ADMIN)) 5258 if (!capable(CAP_SYS_ADMIN))
4678 return -EPERM; 5259 return -EPERM;
@@ -4681,38 +5262,26 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4681 fs_info = send_root->fs_info; 5262 fs_info = send_root->fs_info;
4682 5263
4683 /* 5264 /*
5265 * The subvolume must remain read-only during send, protect against
5266 * making it RW.
5267 */
5268 spin_lock(&send_root->root_item_lock);
5269 send_root->send_in_progress++;
5270 spin_unlock(&send_root->root_item_lock);
5271
5272 /*
4684 * This is done when we lookup the root, it should already be complete 5273 * This is done when we lookup the root, it should already be complete
4685 * by the time we get here. 5274 * by the time we get here.
4686 */ 5275 */
4687 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 5276 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
4688 5277
4689 /* 5278 /*
4690 * If we just created this root we need to make sure that the orphan 5279 * Userspace tools do the checks and warn the user if it's
4691 * cleanup has been done and committed since we search the commit root, 5280 * not RO.
4692 * so check its commit root transid with our otransid and if they match
4693 * commit the transaction to make sure everything is updated.
4694 */ 5281 */
4695 down_read(&send_root->fs_info->extent_commit_sem); 5282 if (!btrfs_root_readonly(send_root)) {
4696 if (btrfs_header_generation(send_root->commit_root) == 5283 ret = -EPERM;
4697 btrfs_root_otransid(&send_root->root_item)) { 5284 goto out;
4698 struct btrfs_trans_handle *trans;
4699
4700 up_read(&send_root->fs_info->extent_commit_sem);
4701
4702 trans = btrfs_attach_transaction_barrier(send_root);
4703 if (IS_ERR(trans)) {
4704 if (PTR_ERR(trans) != -ENOENT) {
4705 ret = PTR_ERR(trans);
4706 goto out;
4707 }
4708 /* ENOENT means theres no transaction */
4709 } else {
4710 ret = btrfs_commit_transaction(trans, send_root);
4711 if (ret)
4712 goto out;
4713 }
4714 } else {
4715 up_read(&send_root->fs_info->extent_commit_sem);
4716 } 5285 }
4717 5286
4718 arg = memdup_user(arg_, sizeof(*arg)); 5287 arg = memdup_user(arg_, sizeof(*arg));
@@ -4753,8 +5322,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4753 goto out; 5322 goto out;
4754 } 5323 }
4755 5324
4756 sctx->mnt = mnt_file->f_path.mnt;
4757
4758 sctx->send_root = send_root; 5325 sctx->send_root = send_root;
4759 sctx->clone_roots_cnt = arg->clone_sources_count; 5326 sctx->clone_roots_cnt = arg->clone_sources_count;
4760 5327
@@ -4771,6 +5338,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4771 goto out; 5338 goto out;
4772 } 5339 }
4773 5340
5341 sctx->pending_dir_moves = RB_ROOT;
5342 sctx->waiting_dir_moves = RB_ROOT;
5343
4774 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 5344 sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
4775 (arg->clone_sources_count + 1)); 5345 (arg->clone_sources_count + 1));
4776 if (!sctx->clone_roots) { 5346 if (!sctx->clone_roots) {
@@ -4798,11 +5368,27 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4798 key.objectid = clone_sources_tmp[i]; 5368 key.objectid = clone_sources_tmp[i];
4799 key.type = BTRFS_ROOT_ITEM_KEY; 5369 key.type = BTRFS_ROOT_ITEM_KEY;
4800 key.offset = (u64)-1; 5370 key.offset = (u64)-1;
5371
5372 index = srcu_read_lock(&fs_info->subvol_srcu);
5373
4801 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 5374 clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
4802 if (IS_ERR(clone_root)) { 5375 if (IS_ERR(clone_root)) {
5376 srcu_read_unlock(&fs_info->subvol_srcu, index);
4803 ret = PTR_ERR(clone_root); 5377 ret = PTR_ERR(clone_root);
4804 goto out; 5378 goto out;
4805 } 5379 }
5380 clone_sources_to_rollback = i + 1;
5381 spin_lock(&clone_root->root_item_lock);
5382 clone_root->send_in_progress++;
5383 if (!btrfs_root_readonly(clone_root)) {
5384 spin_unlock(&clone_root->root_item_lock);
5385 srcu_read_unlock(&fs_info->subvol_srcu, index);
5386 ret = -EPERM;
5387 goto out;
5388 }
5389 spin_unlock(&clone_root->root_item_lock);
5390 srcu_read_unlock(&fs_info->subvol_srcu, index);
5391
4806 sctx->clone_roots[i].root = clone_root; 5392 sctx->clone_roots[i].root = clone_root;
4807 } 5393 }
4808 vfree(clone_sources_tmp); 5394 vfree(clone_sources_tmp);
@@ -4813,11 +5399,27 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4813 key.objectid = arg->parent_root; 5399 key.objectid = arg->parent_root;
4814 key.type = BTRFS_ROOT_ITEM_KEY; 5400 key.type = BTRFS_ROOT_ITEM_KEY;
4815 key.offset = (u64)-1; 5401 key.offset = (u64)-1;
5402
5403 index = srcu_read_lock(&fs_info->subvol_srcu);
5404
4816 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 5405 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
4817 if (IS_ERR(sctx->parent_root)) { 5406 if (IS_ERR(sctx->parent_root)) {
5407 srcu_read_unlock(&fs_info->subvol_srcu, index);
4818 ret = PTR_ERR(sctx->parent_root); 5408 ret = PTR_ERR(sctx->parent_root);
4819 goto out; 5409 goto out;
4820 } 5410 }
5411
5412 spin_lock(&sctx->parent_root->root_item_lock);
5413 sctx->parent_root->send_in_progress++;
5414 if (!btrfs_root_readonly(sctx->parent_root)) {
5415 spin_unlock(&sctx->parent_root->root_item_lock);
5416 srcu_read_unlock(&fs_info->subvol_srcu, index);
5417 ret = -EPERM;
5418 goto out;
5419 }
5420 spin_unlock(&sctx->parent_root->root_item_lock);
5421
5422 srcu_read_unlock(&fs_info->subvol_srcu, index);
4821 } 5423 }
4822 5424
4823 /* 5425 /*
@@ -4831,6 +5433,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4831 sort(sctx->clone_roots, sctx->clone_roots_cnt, 5433 sort(sctx->clone_roots, sctx->clone_roots_cnt,
4832 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 5434 sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
4833 NULL); 5435 NULL);
5436 sort_clone_roots = 1;
4834 5437
4835 ret = send_subvol(sctx); 5438 ret = send_subvol(sctx);
4836 if (ret < 0) 5439 if (ret < 0)
@@ -4846,6 +5449,48 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4846 } 5449 }
4847 5450
4848out: 5451out:
5452 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
5453 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
5454 struct rb_node *n;
5455 struct pending_dir_move *pm;
5456
5457 n = rb_first(&sctx->pending_dir_moves);
5458 pm = rb_entry(n, struct pending_dir_move, node);
5459 while (!list_empty(&pm->list)) {
5460 struct pending_dir_move *pm2;
5461
5462 pm2 = list_first_entry(&pm->list,
5463 struct pending_dir_move, list);
5464 free_pending_move(sctx, pm2);
5465 }
5466 free_pending_move(sctx, pm);
5467 }
5468
5469 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
5470 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
5471 struct rb_node *n;
5472 struct waiting_dir_move *dm;
5473
5474 n = rb_first(&sctx->waiting_dir_moves);
5475 dm = rb_entry(n, struct waiting_dir_move, node);
5476 rb_erase(&dm->node, &sctx->waiting_dir_moves);
5477 kfree(dm);
5478 }
5479
5480 if (sort_clone_roots) {
5481 for (i = 0; i < sctx->clone_roots_cnt; i++)
5482 btrfs_root_dec_send_in_progress(
5483 sctx->clone_roots[i].root);
5484 } else {
5485 for (i = 0; sctx && i < clone_sources_to_rollback; i++)
5486 btrfs_root_dec_send_in_progress(
5487 sctx->clone_roots[i].root);
5488
5489 btrfs_root_dec_send_in_progress(send_root);
5490 }
5491 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
5492 btrfs_root_dec_send_in_progress(sctx->parent_root);
5493
4849 kfree(arg); 5494 kfree(arg);
4850 vfree(clone_sources_tmp); 5495 vfree(clone_sources_tmp);
4851 5496
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d71a11d13dfa..d04db817be5c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -48,6 +48,8 @@
48#include "transaction.h" 48#include "transaction.h"
49#include "btrfs_inode.h" 49#include "btrfs_inode.h"
50#include "print-tree.h" 50#include "print-tree.h"
51#include "hash.h"
52#include "props.h"
51#include "xattr.h" 53#include "xattr.h"
52#include "volumes.h" 54#include "volumes.h"
53#include "export.h" 55#include "export.h"
@@ -152,11 +154,12 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
152 vaf.fmt = fmt; 154 vaf.fmt = fmt;
153 vaf.va = &args; 155 vaf.va = &args;
154 156
155 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s (%pV)\n", 157 printk(KERN_CRIT
158 "BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
156 sb->s_id, function, line, errno, errstr, &vaf); 159 sb->s_id, function, line, errno, errstr, &vaf);
157 va_end(args); 160 va_end(args);
158 } else { 161 } else {
159 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s\n", 162 printk(KERN_CRIT "BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
160 sb->s_id, function, line, errno, errstr); 163 sb->s_id, function, line, errno, errstr);
161 } 164 }
162 165
@@ -250,7 +253,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
250 */ 253 */
251 if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, 254 if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
252 &root->fs_info->fs_state)) { 255 &root->fs_info->fs_state)) {
253 WARN(1, KERN_DEBUG "btrfs: Transaction aborted (error %d)\n", 256 WARN(1, KERN_DEBUG "BTRFS: Transaction aborted (error %d)\n",
254 errno); 257 errno);
255 } 258 }
256 trans->aborted = errno; 259 trans->aborted = errno;
@@ -294,8 +297,8 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
294 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n", 297 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
295 s_id, function, line, &vaf, errno, errstr); 298 s_id, function, line, &vaf, errno, errstr);
296 299
297 printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n", 300 btrfs_crit(fs_info, "panic in %s:%d: %pV (errno=%d %s)",
298 s_id, function, line, &vaf, errno, errstr); 301 function, line, &vaf, errno, errstr);
299 va_end(args); 302 va_end(args);
300 /* Caller calls BUG() */ 303 /* Caller calls BUG() */
301} 304}
@@ -322,7 +325,9 @@ enum {
322 Opt_no_space_cache, Opt_recovery, Opt_skip_balance, 325 Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
323 Opt_check_integrity, Opt_check_integrity_including_extent_data, 326 Opt_check_integrity, Opt_check_integrity_including_extent_data,
324 Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, 327 Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
325 Opt_commit_interval, 328 Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
329 Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
330 Opt_datasum, Opt_treelog, Opt_noinode_cache,
326 Opt_err, 331 Opt_err,
327}; 332};
328 333
@@ -332,8 +337,11 @@ static match_table_t tokens = {
332 {Opt_subvolid, "subvolid=%s"}, 337 {Opt_subvolid, "subvolid=%s"},
333 {Opt_device, "device=%s"}, 338 {Opt_device, "device=%s"},
334 {Opt_nodatasum, "nodatasum"}, 339 {Opt_nodatasum, "nodatasum"},
340 {Opt_datasum, "datasum"},
335 {Opt_nodatacow, "nodatacow"}, 341 {Opt_nodatacow, "nodatacow"},
342 {Opt_datacow, "datacow"},
336 {Opt_nobarrier, "nobarrier"}, 343 {Opt_nobarrier, "nobarrier"},
344 {Opt_barrier, "barrier"},
337 {Opt_max_inline, "max_inline=%s"}, 345 {Opt_max_inline, "max_inline=%s"},
338 {Opt_alloc_start, "alloc_start=%s"}, 346 {Opt_alloc_start, "alloc_start=%s"},
339 {Opt_thread_pool, "thread_pool=%d"}, 347 {Opt_thread_pool, "thread_pool=%d"},
@@ -344,18 +352,25 @@ static match_table_t tokens = {
344 {Opt_ssd, "ssd"}, 352 {Opt_ssd, "ssd"},
345 {Opt_ssd_spread, "ssd_spread"}, 353 {Opt_ssd_spread, "ssd_spread"},
346 {Opt_nossd, "nossd"}, 354 {Opt_nossd, "nossd"},
355 {Opt_acl, "acl"},
347 {Opt_noacl, "noacl"}, 356 {Opt_noacl, "noacl"},
348 {Opt_notreelog, "notreelog"}, 357 {Opt_notreelog, "notreelog"},
358 {Opt_treelog, "treelog"},
349 {Opt_flushoncommit, "flushoncommit"}, 359 {Opt_flushoncommit, "flushoncommit"},
360 {Opt_noflushoncommit, "noflushoncommit"},
350 {Opt_ratio, "metadata_ratio=%d"}, 361 {Opt_ratio, "metadata_ratio=%d"},
351 {Opt_discard, "discard"}, 362 {Opt_discard, "discard"},
363 {Opt_nodiscard, "nodiscard"},
352 {Opt_space_cache, "space_cache"}, 364 {Opt_space_cache, "space_cache"},
353 {Opt_clear_cache, "clear_cache"}, 365 {Opt_clear_cache, "clear_cache"},
354 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 366 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
355 {Opt_enospc_debug, "enospc_debug"}, 367 {Opt_enospc_debug, "enospc_debug"},
368 {Opt_noenospc_debug, "noenospc_debug"},
356 {Opt_subvolrootid, "subvolrootid=%d"}, 369 {Opt_subvolrootid, "subvolrootid=%d"},
357 {Opt_defrag, "autodefrag"}, 370 {Opt_defrag, "autodefrag"},
371 {Opt_nodefrag, "noautodefrag"},
358 {Opt_inode_cache, "inode_cache"}, 372 {Opt_inode_cache, "inode_cache"},
373 {Opt_noinode_cache, "noinode_cache"},
359 {Opt_no_space_cache, "nospace_cache"}, 374 {Opt_no_space_cache, "nospace_cache"},
360 {Opt_recovery, "recovery"}, 375 {Opt_recovery, "recovery"},
361 {Opt_skip_balance, "skip_balance"}, 376 {Opt_skip_balance, "skip_balance"},
@@ -368,6 +383,20 @@ static match_table_t tokens = {
368 {Opt_err, NULL}, 383 {Opt_err, NULL},
369}; 384};
370 385
386#define btrfs_set_and_info(root, opt, fmt, args...) \
387{ \
388 if (!btrfs_test_opt(root, opt)) \
389 btrfs_info(root->fs_info, fmt, ##args); \
390 btrfs_set_opt(root->fs_info->mount_opt, opt); \
391}
392
393#define btrfs_clear_and_info(root, opt, fmt, args...) \
394{ \
395 if (btrfs_test_opt(root, opt)) \
396 btrfs_info(root->fs_info, fmt, ##args); \
397 btrfs_clear_opt(root->fs_info->mount_opt, opt); \
398}
399
371/* 400/*
372 * Regular mount options parser. Everything that is needed only when 401 * Regular mount options parser. Everything that is needed only when
373 * reading in a new superblock is parsed here. 402 * reading in a new superblock is parsed here.
@@ -383,6 +412,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
383 int ret = 0; 412 int ret = 0;
384 char *compress_type; 413 char *compress_type;
385 bool compress_force = false; 414 bool compress_force = false;
415 bool compress = false;
386 416
387 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); 417 cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
388 if (cache_gen) 418 if (cache_gen)
@@ -409,7 +439,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
409 token = match_token(p, tokens, args); 439 token = match_token(p, tokens, args);
410 switch (token) { 440 switch (token) {
411 case Opt_degraded: 441 case Opt_degraded:
412 printk(KERN_INFO "btrfs: allowing degraded mounts\n"); 442 btrfs_info(root->fs_info, "allowing degraded mounts");
413 btrfs_set_opt(info->mount_opt, DEGRADED); 443 btrfs_set_opt(info->mount_opt, DEGRADED);
414 break; 444 break;
415 case Opt_subvol: 445 case Opt_subvol:
@@ -422,27 +452,45 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
422 */ 452 */
423 break; 453 break;
424 case Opt_nodatasum: 454 case Opt_nodatasum:
425 printk(KERN_INFO "btrfs: setting nodatasum\n"); 455 btrfs_set_and_info(root, NODATASUM,
426 btrfs_set_opt(info->mount_opt, NODATASUM); 456 "setting nodatasum");
457 break;
458 case Opt_datasum:
459 if (btrfs_test_opt(root, NODATASUM)) {
460 if (btrfs_test_opt(root, NODATACOW))
461 btrfs_info(root->fs_info, "setting datasum, datacow enabled");
462 else
463 btrfs_info(root->fs_info, "setting datasum");
464 }
465 btrfs_clear_opt(info->mount_opt, NODATACOW);
466 btrfs_clear_opt(info->mount_opt, NODATASUM);
427 break; 467 break;
428 case Opt_nodatacow: 468 case Opt_nodatacow:
429 if (!btrfs_test_opt(root, COMPRESS) || 469 if (!btrfs_test_opt(root, NODATACOW)) {
430 !btrfs_test_opt(root, FORCE_COMPRESS)) { 470 if (!btrfs_test_opt(root, COMPRESS) ||
431 printk(KERN_INFO "btrfs: setting nodatacow, compression disabled\n"); 471 !btrfs_test_opt(root, FORCE_COMPRESS)) {
432 } else { 472 btrfs_info(root->fs_info,
433 printk(KERN_INFO "btrfs: setting nodatacow\n"); 473 "setting nodatacow, compression disabled");
474 } else {
475 btrfs_info(root->fs_info, "setting nodatacow");
476 }
434 } 477 }
435 btrfs_clear_opt(info->mount_opt, COMPRESS); 478 btrfs_clear_opt(info->mount_opt, COMPRESS);
436 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 479 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
437 btrfs_set_opt(info->mount_opt, NODATACOW); 480 btrfs_set_opt(info->mount_opt, NODATACOW);
438 btrfs_set_opt(info->mount_opt, NODATASUM); 481 btrfs_set_opt(info->mount_opt, NODATASUM);
439 break; 482 break;
483 case Opt_datacow:
484 btrfs_clear_and_info(root, NODATACOW,
485 "setting datacow");
486 break;
440 case Opt_compress_force: 487 case Opt_compress_force:
441 case Opt_compress_force_type: 488 case Opt_compress_force_type:
442 compress_force = true; 489 compress_force = true;
443 /* Fallthrough */ 490 /* Fallthrough */
444 case Opt_compress: 491 case Opt_compress:
445 case Opt_compress_type: 492 case Opt_compress_type:
493 compress = true;
446 if (token == Opt_compress || 494 if (token == Opt_compress ||
447 token == Opt_compress_force || 495 token == Opt_compress_force ||
448 strcmp(args[0].from, "zlib") == 0) { 496 strcmp(args[0].from, "zlib") == 0) {
@@ -469,34 +517,36 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
469 } 517 }
470 518
471 if (compress_force) { 519 if (compress_force) {
472 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 520 btrfs_set_and_info(root, FORCE_COMPRESS,
473 pr_info("btrfs: force %s compression\n", 521 "force %s compression",
474 compress_type); 522 compress_type);
475 } else if (btrfs_test_opt(root, COMPRESS)) { 523 } else if (compress) {
476 pr_info("btrfs: use %s compression\n", 524 if (!btrfs_test_opt(root, COMPRESS))
477 compress_type); 525 btrfs_info(root->fs_info,
526 "btrfs: use %s compression\n",
527 compress_type);
478 } 528 }
479 break; 529 break;
480 case Opt_ssd: 530 case Opt_ssd:
481 printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); 531 btrfs_set_and_info(root, SSD,
482 btrfs_set_opt(info->mount_opt, SSD); 532 "use ssd allocation scheme");
483 break; 533 break;
484 case Opt_ssd_spread: 534 case Opt_ssd_spread:
485 printk(KERN_INFO "btrfs: use spread ssd " 535 btrfs_set_and_info(root, SSD_SPREAD,
486 "allocation scheme\n"); 536 "use spread ssd allocation scheme");
487 btrfs_set_opt(info->mount_opt, SSD);
488 btrfs_set_opt(info->mount_opt, SSD_SPREAD);
489 break; 537 break;
490 case Opt_nossd: 538 case Opt_nossd:
491 printk(KERN_INFO "btrfs: not using ssd allocation " 539 btrfs_clear_and_info(root, NOSSD,
492 "scheme\n"); 540 "not using ssd allocation scheme");
493 btrfs_set_opt(info->mount_opt, NOSSD);
494 btrfs_clear_opt(info->mount_opt, SSD); 541 btrfs_clear_opt(info->mount_opt, SSD);
495 btrfs_clear_opt(info->mount_opt, SSD_SPREAD); 542 break;
543 case Opt_barrier:
544 btrfs_clear_and_info(root, NOBARRIER,
545 "turning on barriers");
496 break; 546 break;
497 case Opt_nobarrier: 547 case Opt_nobarrier:
498 printk(KERN_INFO "btrfs: turning off barriers\n"); 548 btrfs_set_and_info(root, NOBARRIER,
499 btrfs_set_opt(info->mount_opt, NOBARRIER); 549 "turning off barriers");
500 break; 550 break;
501 case Opt_thread_pool: 551 case Opt_thread_pool:
502 ret = match_int(&args[0], &intarg); 552 ret = match_int(&args[0], &intarg);
@@ -516,11 +566,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
516 kfree(num); 566 kfree(num);
517 567
518 if (info->max_inline) { 568 if (info->max_inline) {
519 info->max_inline = max_t(u64, 569 info->max_inline = min_t(u64,
520 info->max_inline, 570 info->max_inline,
521 root->sectorsize); 571 root->sectorsize);
522 } 572 }
523 printk(KERN_INFO "btrfs: max_inline at %llu\n", 573 btrfs_info(root->fs_info, "max_inline at %llu",
524 info->max_inline); 574 info->max_inline);
525 } else { 575 } else {
526 ret = -ENOMEM; 576 ret = -ENOMEM;
@@ -534,24 +584,34 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
534 info->alloc_start = memparse(num, NULL); 584 info->alloc_start = memparse(num, NULL);
535 mutex_unlock(&info->chunk_mutex); 585 mutex_unlock(&info->chunk_mutex);
536 kfree(num); 586 kfree(num);
537 printk(KERN_INFO 587 btrfs_info(root->fs_info, "allocations start at %llu",
538 "btrfs: allocations start at %llu\n",
539 info->alloc_start); 588 info->alloc_start);
540 } else { 589 } else {
541 ret = -ENOMEM; 590 ret = -ENOMEM;
542 goto out; 591 goto out;
543 } 592 }
544 break; 593 break;
594 case Opt_acl:
595 root->fs_info->sb->s_flags |= MS_POSIXACL;
596 break;
545 case Opt_noacl: 597 case Opt_noacl:
546 root->fs_info->sb->s_flags &= ~MS_POSIXACL; 598 root->fs_info->sb->s_flags &= ~MS_POSIXACL;
547 break; 599 break;
548 case Opt_notreelog: 600 case Opt_notreelog:
549 printk(KERN_INFO "btrfs: disabling tree log\n"); 601 btrfs_set_and_info(root, NOTREELOG,
550 btrfs_set_opt(info->mount_opt, NOTREELOG); 602 "disabling tree log");
603 break;
604 case Opt_treelog:
605 btrfs_clear_and_info(root, NOTREELOG,
606 "enabling tree log");
551 break; 607 break;
552 case Opt_flushoncommit: 608 case Opt_flushoncommit:
553 printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); 609 btrfs_set_and_info(root, FLUSHONCOMMIT,
554 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); 610 "turning on flush-on-commit");
611 break;
612 case Opt_noflushoncommit:
613 btrfs_clear_and_info(root, FLUSHONCOMMIT,
614 "turning off flush-on-commit");
555 break; 615 break;
556 case Opt_ratio: 616 case Opt_ratio:
557 ret = match_int(&args[0], &intarg); 617 ret = match_int(&args[0], &intarg);
@@ -559,7 +619,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
559 goto out; 619 goto out;
560 } else if (intarg >= 0) { 620 } else if (intarg >= 0) {
561 info->metadata_ratio = intarg; 621 info->metadata_ratio = intarg;
562 printk(KERN_INFO "btrfs: metadata ratio %d\n", 622 btrfs_info(root->fs_info, "metadata ratio %d",
563 info->metadata_ratio); 623 info->metadata_ratio);
564 } else { 624 } else {
565 ret = -EINVAL; 625 ret = -EINVAL;
@@ -567,25 +627,35 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
567 } 627 }
568 break; 628 break;
569 case Opt_discard: 629 case Opt_discard:
570 btrfs_set_opt(info->mount_opt, DISCARD); 630 btrfs_set_and_info(root, DISCARD,
631 "turning on discard");
632 break;
633 case Opt_nodiscard:
634 btrfs_clear_and_info(root, DISCARD,
635 "turning off discard");
571 break; 636 break;
572 case Opt_space_cache: 637 case Opt_space_cache:
573 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 638 btrfs_set_and_info(root, SPACE_CACHE,
639 "enabling disk space caching");
574 break; 640 break;
575 case Opt_rescan_uuid_tree: 641 case Opt_rescan_uuid_tree:
576 btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); 642 btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
577 break; 643 break;
578 case Opt_no_space_cache: 644 case Opt_no_space_cache:
579 printk(KERN_INFO "btrfs: disabling disk space caching\n"); 645 btrfs_clear_and_info(root, SPACE_CACHE,
580 btrfs_clear_opt(info->mount_opt, SPACE_CACHE); 646 "disabling disk space caching");
581 break; 647 break;
582 case Opt_inode_cache: 648 case Opt_inode_cache:
583 printk(KERN_INFO "btrfs: enabling inode map caching\n"); 649 btrfs_set_and_info(root, CHANGE_INODE_CACHE,
584 btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE); 650 "enabling inode map caching");
651 break;
652 case Opt_noinode_cache:
653 btrfs_clear_and_info(root, CHANGE_INODE_CACHE,
654 "disabling inode map caching");
585 break; 655 break;
586 case Opt_clear_cache: 656 case Opt_clear_cache:
587 printk(KERN_INFO "btrfs: force clearing of disk cache\n"); 657 btrfs_set_and_info(root, CLEAR_CACHE,
588 btrfs_set_opt(info->mount_opt, CLEAR_CACHE); 658 "force clearing of disk cache");
589 break; 659 break;
590 case Opt_user_subvol_rm_allowed: 660 case Opt_user_subvol_rm_allowed:
591 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 661 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
@@ -593,12 +663,19 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
593 case Opt_enospc_debug: 663 case Opt_enospc_debug:
594 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); 664 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
595 break; 665 break;
666 case Opt_noenospc_debug:
667 btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
668 break;
596 case Opt_defrag: 669 case Opt_defrag:
597 printk(KERN_INFO "btrfs: enabling auto defrag\n"); 670 btrfs_set_and_info(root, AUTO_DEFRAG,
598 btrfs_set_opt(info->mount_opt, AUTO_DEFRAG); 671 "enabling auto defrag");
672 break;
673 case Opt_nodefrag:
674 btrfs_clear_and_info(root, AUTO_DEFRAG,
675 "disabling auto defrag");
599 break; 676 break;
600 case Opt_recovery: 677 case Opt_recovery:
601 printk(KERN_INFO "btrfs: enabling auto recovery\n"); 678 btrfs_info(root->fs_info, "enabling auto recovery");
602 btrfs_set_opt(info->mount_opt, RECOVERY); 679 btrfs_set_opt(info->mount_opt, RECOVERY);
603 break; 680 break;
604 case Opt_skip_balance: 681 case Opt_skip_balance:
@@ -606,14 +683,14 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
606 break; 683 break;
607#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 684#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
608 case Opt_check_integrity_including_extent_data: 685 case Opt_check_integrity_including_extent_data:
609 printk(KERN_INFO "btrfs: enabling check integrity" 686 btrfs_info(root->fs_info,
610 " including extent data\n"); 687 "enabling check integrity including extent data");
611 btrfs_set_opt(info->mount_opt, 688 btrfs_set_opt(info->mount_opt,
612 CHECK_INTEGRITY_INCLUDING_EXTENT_DATA); 689 CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
613 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 690 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
614 break; 691 break;
615 case Opt_check_integrity: 692 case Opt_check_integrity:
616 printk(KERN_INFO "btrfs: enabling check integrity\n"); 693 btrfs_info(root->fs_info, "enabling check integrity");
617 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 694 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
618 break; 695 break;
619 case Opt_check_integrity_print_mask: 696 case Opt_check_integrity_print_mask:
@@ -622,8 +699,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
622 goto out; 699 goto out;
623 } else if (intarg >= 0) { 700 } else if (intarg >= 0) {
624 info->check_integrity_print_mask = intarg; 701 info->check_integrity_print_mask = intarg;
625 printk(KERN_INFO "btrfs:" 702 btrfs_info(root->fs_info, "check_integrity_print_mask 0x%x",
626 " check_integrity_print_mask 0x%x\n",
627 info->check_integrity_print_mask); 703 info->check_integrity_print_mask);
628 } else { 704 } else {
629 ret = -EINVAL; 705 ret = -EINVAL;
@@ -634,8 +710,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
634 case Opt_check_integrity_including_extent_data: 710 case Opt_check_integrity_including_extent_data:
635 case Opt_check_integrity: 711 case Opt_check_integrity:
636 case Opt_check_integrity_print_mask: 712 case Opt_check_integrity_print_mask:
637 printk(KERN_ERR "btrfs: support for check_integrity*" 713 btrfs_err(root->fs_info,
638 " not compiled in!\n"); 714 "support for check_integrity* not compiled in!");
639 ret = -EINVAL; 715 ret = -EINVAL;
640 goto out; 716 goto out;
641#endif 717#endif
@@ -655,28 +731,24 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
655 intarg = 0; 731 intarg = 0;
656 ret = match_int(&args[0], &intarg); 732 ret = match_int(&args[0], &intarg);
657 if (ret < 0) { 733 if (ret < 0) {
658 printk(KERN_ERR 734 btrfs_err(root->fs_info, "invalid commit interval");
659 "btrfs: invalid commit interval\n");
660 ret = -EINVAL; 735 ret = -EINVAL;
661 goto out; 736 goto out;
662 } 737 }
663 if (intarg > 0) { 738 if (intarg > 0) {
664 if (intarg > 300) { 739 if (intarg > 300) {
665 printk(KERN_WARNING 740 btrfs_warn(root->fs_info, "excessive commit interval %d",
666 "btrfs: excessive commit interval %d\n",
667 intarg); 741 intarg);
668 } 742 }
669 info->commit_interval = intarg; 743 info->commit_interval = intarg;
670 } else { 744 } else {
671 printk(KERN_INFO 745 btrfs_info(root->fs_info, "using default commit interval %ds",
672 "btrfs: using default commit interval %ds\n",
673 BTRFS_DEFAULT_COMMIT_INTERVAL); 746 BTRFS_DEFAULT_COMMIT_INTERVAL);
674 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; 747 info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
675 } 748 }
676 break; 749 break;
677 case Opt_err: 750 case Opt_err:
678 printk(KERN_INFO "btrfs: unrecognized mount option " 751 btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
679 "'%s'\n", p);
680 ret = -EINVAL; 752 ret = -EINVAL;
681 goto out; 753 goto out;
682 default: 754 default:
@@ -685,7 +757,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
685 } 757 }
686out: 758out:
687 if (!ret && btrfs_test_opt(root, SPACE_CACHE)) 759 if (!ret && btrfs_test_opt(root, SPACE_CACHE))
688 printk(KERN_INFO "btrfs: disk space caching is enabled\n"); 760 btrfs_info(root->fs_info, "disk space caching is enabled");
689 kfree(orig); 761 kfree(orig);
690 return ret; 762 return ret;
691} 763}
@@ -748,7 +820,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
748 break; 820 break;
749 case Opt_subvolrootid: 821 case Opt_subvolrootid:
750 printk(KERN_WARNING 822 printk(KERN_WARNING
751 "btrfs: 'subvolrootid' mount option is deprecated and has no effect\n"); 823 "BTRFS: 'subvolrootid' mount option is deprecated and has "
824 "no effect\n");
752 break; 825 break;
753 case Opt_device: 826 case Opt_device:
754 device_name = match_strdup(&args[0]); 827 device_name = match_strdup(&args[0]);
@@ -782,6 +855,7 @@ static struct dentry *get_default_root(struct super_block *sb,
782 struct btrfs_path *path; 855 struct btrfs_path *path;
783 struct btrfs_key location; 856 struct btrfs_key location;
784 struct inode *inode; 857 struct inode *inode;
858 struct dentry *dentry;
785 u64 dir_id; 859 u64 dir_id;
786 int new = 0; 860 int new = 0;
787 861
@@ -852,7 +926,13 @@ setup_root:
852 return dget(sb->s_root); 926 return dget(sb->s_root);
853 } 927 }
854 928
855 return d_obtain_alias(inode); 929 dentry = d_obtain_alias(inode);
930 if (!IS_ERR(dentry)) {
931 spin_lock(&dentry->d_lock);
932 dentry->d_flags &= ~DCACHE_DISCONNECTED;
933 spin_unlock(&dentry->d_lock);
934 }
935 return dentry;
856} 936}
857 937
858static int btrfs_fill_super(struct super_block *sb, 938static int btrfs_fill_super(struct super_block *sb,
@@ -877,7 +957,7 @@ static int btrfs_fill_super(struct super_block *sb,
877 sb->s_flags |= MS_I_VERSION; 957 sb->s_flags |= MS_I_VERSION;
878 err = open_ctree(sb, fs_devices, (char *)data); 958 err = open_ctree(sb, fs_devices, (char *)data);
879 if (err) { 959 if (err) {
880 printk("btrfs: open_ctree failed\n"); 960 printk(KERN_ERR "BTRFS: open_ctree failed\n");
881 return err; 961 return err;
882 } 962 }
883 963
@@ -1115,7 +1195,7 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
1115 dput(root); 1195 dput(root);
1116 root = ERR_PTR(-EINVAL); 1196 root = ERR_PTR(-EINVAL);
1117 deactivate_locked_super(s); 1197 deactivate_locked_super(s);
1118 printk(KERN_ERR "btrfs: '%s' is not a valid subvolume\n", 1198 printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n",
1119 subvol_name); 1199 subvol_name);
1120 } 1200 }
1121 1201
@@ -1240,7 +1320,7 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1240 1320
1241 fs_info->thread_pool_size = new_pool_size; 1321 fs_info->thread_pool_size = new_pool_size;
1242 1322
1243 printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n", 1323 btrfs_info(fs_info, "resize thread pool %d -> %d",
1244 old_pool_size, new_pool_size); 1324 old_pool_size, new_pool_size);
1245 1325
1246 btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); 1326 btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size);
@@ -1346,7 +1426,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1346 } else { 1426 } else {
1347 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { 1427 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1348 btrfs_err(fs_info, 1428 btrfs_err(fs_info,
1349 "Remounting read-write after error is not allowed\n"); 1429 "Remounting read-write after error is not allowed");
1350 ret = -EINVAL; 1430 ret = -EINVAL;
1351 goto restore; 1431 goto restore;
1352 } 1432 }
@@ -1358,8 +1438,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1358 if (fs_info->fs_devices->missing_devices > 1438 if (fs_info->fs_devices->missing_devices >
1359 fs_info->num_tolerated_disk_barrier_failures && 1439 fs_info->num_tolerated_disk_barrier_failures &&
1360 !(*flags & MS_RDONLY)) { 1440 !(*flags & MS_RDONLY)) {
1361 printk(KERN_WARNING 1441 btrfs_warn(fs_info,
1362 "Btrfs: too many missing devices, writeable remount is not allowed\n"); 1442 "too many missing devices, writeable remount is not allowed");
1363 ret = -EACCES; 1443 ret = -EACCES;
1364 goto restore; 1444 goto restore;
1365 } 1445 }
@@ -1384,16 +1464,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1384 1464
1385 ret = btrfs_resume_dev_replace_async(fs_info); 1465 ret = btrfs_resume_dev_replace_async(fs_info);
1386 if (ret) { 1466 if (ret) {
1387 pr_warn("btrfs: failed to resume dev_replace\n"); 1467 btrfs_warn(fs_info, "failed to resume dev_replace");
1388 goto restore; 1468 goto restore;
1389 } 1469 }
1390 1470
1391 if (!fs_info->uuid_root) { 1471 if (!fs_info->uuid_root) {
1392 pr_info("btrfs: creating UUID tree\n"); 1472 btrfs_info(fs_info, "creating UUID tree");
1393 ret = btrfs_create_uuid_tree(fs_info); 1473 ret = btrfs_create_uuid_tree(fs_info);
1394 if (ret) { 1474 if (ret) {
1395 pr_warn("btrfs: failed to create the uuid tree" 1475 btrfs_warn(fs_info, "failed to create the UUID tree %d", ret);
1396 "%d\n", ret);
1397 goto restore; 1476 goto restore;
1398 } 1477 }
1399 } 1478 }
@@ -1773,7 +1852,7 @@ static int btrfs_interface_init(void)
1773static void btrfs_interface_exit(void) 1852static void btrfs_interface_exit(void)
1774{ 1853{
1775 if (misc_deregister(&btrfs_misc) < 0) 1854 if (misc_deregister(&btrfs_misc) < 0)
1776 printk(KERN_INFO "btrfs: misc_deregister failed for control device\n"); 1855 printk(KERN_INFO "BTRFS: misc_deregister failed for control device\n");
1777} 1856}
1778 1857
1779static void btrfs_print_info(void) 1858static void btrfs_print_info(void)
@@ -1818,10 +1897,16 @@ static int __init init_btrfs_fs(void)
1818{ 1897{
1819 int err; 1898 int err;
1820 1899
1821 err = btrfs_init_sysfs(); 1900 err = btrfs_hash_init();
1822 if (err) 1901 if (err)
1823 return err; 1902 return err;
1824 1903
1904 btrfs_props_init();
1905
1906 err = btrfs_init_sysfs();
1907 if (err)
1908 goto free_hash;
1909
1825 btrfs_init_compress(); 1910 btrfs_init_compress();
1826 1911
1827 err = btrfs_init_cachep(); 1912 err = btrfs_init_cachep();
@@ -1895,6 +1980,8 @@ free_cachep:
1895free_compress: 1980free_compress:
1896 btrfs_exit_compress(); 1981 btrfs_exit_compress();
1897 btrfs_exit_sysfs(); 1982 btrfs_exit_sysfs();
1983free_hash:
1984 btrfs_hash_exit();
1898 return err; 1985 return err;
1899} 1986}
1900 1987
@@ -1913,9 +2000,10 @@ static void __exit exit_btrfs_fs(void)
1913 btrfs_exit_sysfs(); 2000 btrfs_exit_sysfs();
1914 btrfs_cleanup_fs_uuids(); 2001 btrfs_cleanup_fs_uuids();
1915 btrfs_exit_compress(); 2002 btrfs_exit_compress();
2003 btrfs_hash_exit();
1916} 2004}
1917 2005
1918module_init(init_btrfs_fs) 2006late_initcall(init_btrfs_fs);
1919module_exit(exit_btrfs_fs) 2007module_exit(exit_btrfs_fs)
1920 2008
1921MODULE_LICENSE("GPL"); 2009MODULE_LICENSE("GPL");
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 5b326cd60a4a..865f4cf9a769 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -22,24 +22,647 @@
22#include <linux/completion.h> 22#include <linux/completion.h>
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/kobject.h> 24#include <linux/kobject.h>
25#include <linux/bug.h>
26#include <linux/genhd.h>
25 27
26#include "ctree.h" 28#include "ctree.h"
27#include "disk-io.h" 29#include "disk-io.h"
28#include "transaction.h" 30#include "transaction.h"
31#include "sysfs.h"
32#include "volumes.h"
33
34static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
35
36static u64 get_features(struct btrfs_fs_info *fs_info,
37 enum btrfs_feature_set set)
38{
39 struct btrfs_super_block *disk_super = fs_info->super_copy;
40 if (set == FEAT_COMPAT)
41 return btrfs_super_compat_flags(disk_super);
42 else if (set == FEAT_COMPAT_RO)
43 return btrfs_super_compat_ro_flags(disk_super);
44 else
45 return btrfs_super_incompat_flags(disk_super);
46}
47
48static void set_features(struct btrfs_fs_info *fs_info,
49 enum btrfs_feature_set set, u64 features)
50{
51 struct btrfs_super_block *disk_super = fs_info->super_copy;
52 if (set == FEAT_COMPAT)
53 btrfs_set_super_compat_flags(disk_super, features);
54 else if (set == FEAT_COMPAT_RO)
55 btrfs_set_super_compat_ro_flags(disk_super, features);
56 else
57 btrfs_set_super_incompat_flags(disk_super, features);
58}
59
60static int can_modify_feature(struct btrfs_feature_attr *fa)
61{
62 int val = 0;
63 u64 set, clear;
64 switch (fa->feature_set) {
65 case FEAT_COMPAT:
66 set = BTRFS_FEATURE_COMPAT_SAFE_SET;
67 clear = BTRFS_FEATURE_COMPAT_SAFE_CLEAR;
68 break;
69 case FEAT_COMPAT_RO:
70 set = BTRFS_FEATURE_COMPAT_RO_SAFE_SET;
71 clear = BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR;
72 break;
73 case FEAT_INCOMPAT:
74 set = BTRFS_FEATURE_INCOMPAT_SAFE_SET;
75 clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR;
76 break;
77 default:
78 printk(KERN_WARNING "btrfs: sysfs: unknown feature set %d\n",
79 fa->feature_set);
80 return 0;
81 }
82
83 if (set & fa->feature_bit)
84 val |= 1;
85 if (clear & fa->feature_bit)
86 val |= 2;
87
88 return val;
89}
90
91static ssize_t btrfs_feature_attr_show(struct kobject *kobj,
92 struct kobj_attribute *a, char *buf)
93{
94 int val = 0;
95 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
96 struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a);
97 if (fs_info) {
98 u64 features = get_features(fs_info, fa->feature_set);
99 if (features & fa->feature_bit)
100 val = 1;
101 } else
102 val = can_modify_feature(fa);
103
104 return snprintf(buf, PAGE_SIZE, "%d\n", val);
105}
106
107static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
108 struct kobj_attribute *a,
109 const char *buf, size_t count)
110{
111 struct btrfs_fs_info *fs_info;
112 struct btrfs_feature_attr *fa = to_btrfs_feature_attr(a);
113 struct btrfs_trans_handle *trans;
114 u64 features, set, clear;
115 unsigned long val;
116 int ret;
117
118 fs_info = to_fs_info(kobj);
119 if (!fs_info)
120 return -EPERM;
121
122 ret = kstrtoul(skip_spaces(buf), 0, &val);
123 if (ret)
124 return ret;
125
126 if (fa->feature_set == FEAT_COMPAT) {
127 set = BTRFS_FEATURE_COMPAT_SAFE_SET;
128 clear = BTRFS_FEATURE_COMPAT_SAFE_CLEAR;
129 } else if (fa->feature_set == FEAT_COMPAT_RO) {
130 set = BTRFS_FEATURE_COMPAT_RO_SAFE_SET;
131 clear = BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR;
132 } else {
133 set = BTRFS_FEATURE_INCOMPAT_SAFE_SET;
134 clear = BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR;
135 }
136
137 features = get_features(fs_info, fa->feature_set);
138
139 /* Nothing to do */
140 if ((val && (features & fa->feature_bit)) ||
141 (!val && !(features & fa->feature_bit)))
142 return count;
143
144 if ((val && !(set & fa->feature_bit)) ||
145 (!val && !(clear & fa->feature_bit))) {
146 btrfs_info(fs_info,
147 "%sabling feature %s on mounted fs is not supported.",
148 val ? "En" : "Dis", fa->kobj_attr.attr.name);
149 return -EPERM;
150 }
151
152 btrfs_info(fs_info, "%s %s feature flag",
153 val ? "Setting" : "Clearing", fa->kobj_attr.attr.name);
154
155 trans = btrfs_start_transaction(fs_info->fs_root, 0);
156 if (IS_ERR(trans))
157 return PTR_ERR(trans);
158
159 spin_lock(&fs_info->super_lock);
160 features = get_features(fs_info, fa->feature_set);
161 if (val)
162 features |= fa->feature_bit;
163 else
164 features &= ~fa->feature_bit;
165 set_features(fs_info, fa->feature_set, features);
166 spin_unlock(&fs_info->super_lock);
167
168 ret = btrfs_commit_transaction(trans, fs_info->fs_root);
169 if (ret)
170 return ret;
171
172 return count;
173}
174
175static umode_t btrfs_feature_visible(struct kobject *kobj,
176 struct attribute *attr, int unused)
177{
178 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
179 umode_t mode = attr->mode;
180
181 if (fs_info) {
182 struct btrfs_feature_attr *fa;
183 u64 features;
184
185 fa = attr_to_btrfs_feature_attr(attr);
186 features = get_features(fs_info, fa->feature_set);
187
188 if (can_modify_feature(fa))
189 mode |= S_IWUSR;
190 else if (!(features & fa->feature_bit))
191 mode = 0;
192 }
193
194 return mode;
195}
196
197BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF);
198BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL);
199BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS);
200BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO);
201BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA);
202BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
203BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
204BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
205BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
206
207static struct attribute *btrfs_supported_feature_attrs[] = {
208 BTRFS_FEAT_ATTR_PTR(mixed_backref),
209 BTRFS_FEAT_ATTR_PTR(default_subvol),
210 BTRFS_FEAT_ATTR_PTR(mixed_groups),
211 BTRFS_FEAT_ATTR_PTR(compress_lzo),
212 BTRFS_FEAT_ATTR_PTR(big_metadata),
213 BTRFS_FEAT_ATTR_PTR(extended_iref),
214 BTRFS_FEAT_ATTR_PTR(raid56),
215 BTRFS_FEAT_ATTR_PTR(skinny_metadata),
216 BTRFS_FEAT_ATTR_PTR(no_holes),
217 NULL
218};
219
220static const struct attribute_group btrfs_feature_attr_group = {
221 .name = "features",
222 .is_visible = btrfs_feature_visible,
223 .attrs = btrfs_supported_feature_attrs,
224};
225
226static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
227{
228 u64 val;
229 if (lock)
230 spin_lock(lock);
231 val = *value_ptr;
232 if (lock)
233 spin_unlock(lock);
234 return snprintf(buf, PAGE_SIZE, "%llu\n", val);
235}
236
237static ssize_t global_rsv_size_show(struct kobject *kobj,
238 struct kobj_attribute *ka, char *buf)
239{
240 struct btrfs_fs_info *fs_info = to_fs_info(kobj->parent);
241 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
242 return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf);
243}
244BTRFS_ATTR(global_rsv_size, 0444, global_rsv_size_show);
245
246static ssize_t global_rsv_reserved_show(struct kobject *kobj,
247 struct kobj_attribute *a, char *buf)
248{
249 struct btrfs_fs_info *fs_info = to_fs_info(kobj->parent);
250 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
251 return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf);
252}
253BTRFS_ATTR(global_rsv_reserved, 0444, global_rsv_reserved_show);
254
255#define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj)
256
257static ssize_t raid_bytes_show(struct kobject *kobj,
258 struct kobj_attribute *attr, char *buf);
259BTRFS_RAID_ATTR(total_bytes, raid_bytes_show);
260BTRFS_RAID_ATTR(used_bytes, raid_bytes_show);
261
262static ssize_t raid_bytes_show(struct kobject *kobj,
263 struct kobj_attribute *attr, char *buf)
264
265{
266 struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
267 struct btrfs_block_group_cache *block_group;
268 int index = kobj - sinfo->block_group_kobjs;
269 u64 val = 0;
270
271 down_read(&sinfo->groups_sem);
272 list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
273 if (&attr->attr == BTRFS_RAID_ATTR_PTR(total_bytes))
274 val += block_group->key.offset;
275 else
276 val += btrfs_block_group_used(&block_group->item);
277 }
278 up_read(&sinfo->groups_sem);
279 return snprintf(buf, PAGE_SIZE, "%llu\n", val);
280}
281
282static struct attribute *raid_attributes[] = {
283 BTRFS_RAID_ATTR_PTR(total_bytes),
284 BTRFS_RAID_ATTR_PTR(used_bytes),
285 NULL
286};
287
288static void release_raid_kobj(struct kobject *kobj)
289{
290 kobject_put(kobj->parent);
291}
292
293struct kobj_type btrfs_raid_ktype = {
294 .sysfs_ops = &kobj_sysfs_ops,
295 .release = release_raid_kobj,
296 .default_attrs = raid_attributes,
297};
298
299#define SPACE_INFO_ATTR(field) \
300static ssize_t btrfs_space_info_show_##field(struct kobject *kobj, \
301 struct kobj_attribute *a, \
302 char *buf) \
303{ \
304 struct btrfs_space_info *sinfo = to_space_info(kobj); \
305 return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \
306} \
307BTRFS_ATTR(field, 0444, btrfs_space_info_show_##field)
308
309static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj,
310 struct kobj_attribute *a,
311 char *buf)
312{
313 struct btrfs_space_info *sinfo = to_space_info(kobj);
314 s64 val = percpu_counter_sum(&sinfo->total_bytes_pinned);
315 return snprintf(buf, PAGE_SIZE, "%lld\n", val);
316}
317
318SPACE_INFO_ATTR(flags);
319SPACE_INFO_ATTR(total_bytes);
320SPACE_INFO_ATTR(bytes_used);
321SPACE_INFO_ATTR(bytes_pinned);
322SPACE_INFO_ATTR(bytes_reserved);
323SPACE_INFO_ATTR(bytes_may_use);
324SPACE_INFO_ATTR(disk_used);
325SPACE_INFO_ATTR(disk_total);
326BTRFS_ATTR(total_bytes_pinned, 0444, btrfs_space_info_show_total_bytes_pinned);
327
328static struct attribute *space_info_attrs[] = {
329 BTRFS_ATTR_PTR(flags),
330 BTRFS_ATTR_PTR(total_bytes),
331 BTRFS_ATTR_PTR(bytes_used),
332 BTRFS_ATTR_PTR(bytes_pinned),
333 BTRFS_ATTR_PTR(bytes_reserved),
334 BTRFS_ATTR_PTR(bytes_may_use),
335 BTRFS_ATTR_PTR(disk_used),
336 BTRFS_ATTR_PTR(disk_total),
337 BTRFS_ATTR_PTR(total_bytes_pinned),
338 NULL,
339};
340
341static void space_info_release(struct kobject *kobj)
342{
343 struct btrfs_space_info *sinfo = to_space_info(kobj);
344 percpu_counter_destroy(&sinfo->total_bytes_pinned);
345 kfree(sinfo);
346}
347
348struct kobj_type space_info_ktype = {
349 .sysfs_ops = &kobj_sysfs_ops,
350 .release = space_info_release,
351 .default_attrs = space_info_attrs,
352};
353
354static const struct attribute *allocation_attrs[] = {
355 BTRFS_ATTR_PTR(global_rsv_reserved),
356 BTRFS_ATTR_PTR(global_rsv_size),
357 NULL,
358};
359
360static ssize_t btrfs_label_show(struct kobject *kobj,
361 struct kobj_attribute *a, char *buf)
362{
363 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
364 return snprintf(buf, PAGE_SIZE, "%s\n", fs_info->super_copy->label);
365}
366
367static ssize_t btrfs_label_store(struct kobject *kobj,
368 struct kobj_attribute *a,
369 const char *buf, size_t len)
370{
371 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
372 struct btrfs_trans_handle *trans;
373 struct btrfs_root *root = fs_info->fs_root;
374 int ret;
375
376 if (len >= BTRFS_LABEL_SIZE) {
377 pr_err("BTRFS: unable to set label with more than %d bytes\n",
378 BTRFS_LABEL_SIZE - 1);
379 return -EINVAL;
380 }
381
382 trans = btrfs_start_transaction(root, 0);
383 if (IS_ERR(trans))
384 return PTR_ERR(trans);
385
386 spin_lock(&root->fs_info->super_lock);
387 strcpy(fs_info->super_copy->label, buf);
388 spin_unlock(&root->fs_info->super_lock);
389 ret = btrfs_commit_transaction(trans, root);
390
391 if (!ret)
392 return len;
393
394 return ret;
395}
396BTRFS_ATTR_RW(label, 0644, btrfs_label_show, btrfs_label_store);
397
398static struct attribute *btrfs_attrs[] = {
399 BTRFS_ATTR_PTR(label),
400 NULL,
401};
402
403static void btrfs_release_super_kobj(struct kobject *kobj)
404{
405 struct btrfs_fs_info *fs_info = to_fs_info(kobj);
406 complete(&fs_info->kobj_unregister);
407}
408
409static struct kobj_type btrfs_ktype = {
410 .sysfs_ops = &kobj_sysfs_ops,
411 .release = btrfs_release_super_kobj,
412 .default_attrs = btrfs_attrs,
413};
414
415static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
416{
417 if (kobj->ktype != &btrfs_ktype)
418 return NULL;
419 return container_of(kobj, struct btrfs_fs_info, super_kobj);
420}
421
422#define NUM_FEATURE_BITS 64
423static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
424static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
425
426static u64 supported_feature_masks[3] = {
427 [FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
428 [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
429 [FEAT_INCOMPAT] = BTRFS_FEATURE_INCOMPAT_SUPP,
430};
431
432static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
433{
434 int set;
435
436 for (set = 0; set < FEAT_MAX; set++) {
437 int i;
438 struct attribute *attrs[2];
439 struct attribute_group agroup = {
440 .name = "features",
441 .attrs = attrs,
442 };
443 u64 features = get_features(fs_info, set);
444 features &= ~supported_feature_masks[set];
445
446 if (!features)
447 continue;
448
449 attrs[1] = NULL;
450 for (i = 0; i < NUM_FEATURE_BITS; i++) {
451 struct btrfs_feature_attr *fa;
452
453 if (!(features & (1ULL << i)))
454 continue;
455
456 fa = &btrfs_feature_attrs[set][i];
457 attrs[0] = &fa->kobj_attr.attr;
458 if (add) {
459 int ret;
460 ret = sysfs_merge_group(&fs_info->super_kobj,
461 &agroup);
462 if (ret)
463 return ret;
464 } else
465 sysfs_unmerge_group(&fs_info->super_kobj,
466 &agroup);
467 }
468
469 }
470 return 0;
471}
472
473static void __btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
474{
475 kobject_del(&fs_info->super_kobj);
476 kobject_put(&fs_info->super_kobj);
477 wait_for_completion(&fs_info->kobj_unregister);
478}
479
480void btrfs_sysfs_remove_one(struct btrfs_fs_info *fs_info)
481{
482 if (fs_info->space_info_kobj) {
483 sysfs_remove_files(fs_info->space_info_kobj, allocation_attrs);
484 kobject_del(fs_info->space_info_kobj);
485 kobject_put(fs_info->space_info_kobj);
486 }
487 kobject_del(fs_info->device_dir_kobj);
488 kobject_put(fs_info->device_dir_kobj);
489 addrm_unknown_feature_attrs(fs_info, false);
490 sysfs_remove_group(&fs_info->super_kobj, &btrfs_feature_attr_group);
491 __btrfs_sysfs_remove_one(fs_info);
492}
493
494const char * const btrfs_feature_set_names[3] = {
495 [FEAT_COMPAT] = "compat",
496 [FEAT_COMPAT_RO] = "compat_ro",
497 [FEAT_INCOMPAT] = "incompat",
498};
499
500char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags)
501{
502 size_t bufsize = 4096; /* safe max, 64 names * 64 bytes */
503 int len = 0;
504 int i;
505 char *str;
506
507 str = kmalloc(bufsize, GFP_KERNEL);
508 if (!str)
509 return str;
510
511 for (i = 0; i < ARRAY_SIZE(btrfs_feature_attrs[set]); i++) {
512 const char *name;
513
514 if (!(flags & (1ULL << i)))
515 continue;
516
517 name = btrfs_feature_attrs[set][i].kobj_attr.attr.name;
518 len += snprintf(str + len, bufsize - len, "%s%s",
519 len ? "," : "", name);
520 }
521
522 return str;
523}
524
525static void init_feature_attrs(void)
526{
527 struct btrfs_feature_attr *fa;
528 int set, i;
529
530 BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
531 ARRAY_SIZE(btrfs_feature_attrs));
532 BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
533 ARRAY_SIZE(btrfs_feature_attrs[0]));
534
535 memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
536 memset(btrfs_unknown_feature_names, 0,
537 sizeof(btrfs_unknown_feature_names));
538
539 for (i = 0; btrfs_supported_feature_attrs[i]; i++) {
540 struct btrfs_feature_attr *sfa;
541 struct attribute *a = btrfs_supported_feature_attrs[i];
542 int bit;
543 sfa = attr_to_btrfs_feature_attr(a);
544 bit = ilog2(sfa->feature_bit);
545 fa = &btrfs_feature_attrs[sfa->feature_set][bit];
546
547 fa->kobj_attr.attr.name = sfa->kobj_attr.attr.name;
548 }
549
550 for (set = 0; set < FEAT_MAX; set++) {
551 for (i = 0; i < ARRAY_SIZE(btrfs_feature_attrs[set]); i++) {
552 char *name = btrfs_unknown_feature_names[set][i];
553 fa = &btrfs_feature_attrs[set][i];
554
555 if (fa->kobj_attr.attr.name)
556 continue;
557
558 snprintf(name, 13, "%s:%u",
559 btrfs_feature_set_names[set], i);
560
561 fa->kobj_attr.attr.name = name;
562 fa->kobj_attr.attr.mode = S_IRUGO;
563 fa->feature_set = set;
564 fa->feature_bit = 1ULL << i;
565 }
566 }
567}
568
569static int add_device_membership(struct btrfs_fs_info *fs_info)
570{
571 int error = 0;
572 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
573 struct btrfs_device *dev;
574
575 fs_info->device_dir_kobj = kobject_create_and_add("devices",
576 &fs_info->super_kobj);
577 if (!fs_info->device_dir_kobj)
578 return -ENOMEM;
579
580 list_for_each_entry(dev, &fs_devices->devices, dev_list) {
581 struct hd_struct *disk;
582 struct kobject *disk_kobj;
583
584 if (!dev->bdev)
585 continue;
586
587 disk = dev->bdev->bd_part;
588 disk_kobj = &part_to_dev(disk)->kobj;
589
590 error = sysfs_create_link(fs_info->device_dir_kobj,
591 disk_kobj, disk_kobj->name);
592 if (error)
593 break;
594 }
595
596 return error;
597}
29 598
30/* /sys/fs/btrfs/ entry */ 599/* /sys/fs/btrfs/ entry */
31static struct kset *btrfs_kset; 600static struct kset *btrfs_kset;
32 601
602int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info)
603{
604 int error;
605
606 init_completion(&fs_info->kobj_unregister);
607 fs_info->super_kobj.kset = btrfs_kset;
608 error = kobject_init_and_add(&fs_info->super_kobj, &btrfs_ktype, NULL,
609 "%pU", fs_info->fsid);
610 if (error)
611 return error;
612
613 error = sysfs_create_group(&fs_info->super_kobj,
614 &btrfs_feature_attr_group);
615 if (error) {
616 __btrfs_sysfs_remove_one(fs_info);
617 return error;
618 }
619
620 error = addrm_unknown_feature_attrs(fs_info, true);
621 if (error)
622 goto failure;
623
624 error = add_device_membership(fs_info);
625 if (error)
626 goto failure;
627
628 fs_info->space_info_kobj = kobject_create_and_add("allocation",
629 &fs_info->super_kobj);
630 if (!fs_info->space_info_kobj) {
631 error = -ENOMEM;
632 goto failure;
633 }
634
635 error = sysfs_create_files(fs_info->space_info_kobj, allocation_attrs);
636 if (error)
637 goto failure;
638
639 return 0;
640failure:
641 btrfs_sysfs_remove_one(fs_info);
642 return error;
643}
644
33int btrfs_init_sysfs(void) 645int btrfs_init_sysfs(void)
34{ 646{
647 int ret;
35 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); 648 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
36 if (!btrfs_kset) 649 if (!btrfs_kset)
37 return -ENOMEM; 650 return -ENOMEM;
651
652 init_feature_attrs();
653
654 ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
655 if (ret) {
656 kset_unregister(btrfs_kset);
657 return ret;
658 }
659
38 return 0; 660 return 0;
39} 661}
40 662
41void btrfs_exit_sysfs(void) 663void btrfs_exit_sysfs(void)
42{ 664{
665 sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
43 kset_unregister(btrfs_kset); 666 kset_unregister(btrfs_kset);
44} 667}
45 668
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
new file mode 100644
index 000000000000..f3cea3710d44
--- /dev/null
+++ b/fs/btrfs/sysfs.h
@@ -0,0 +1,64 @@
1#ifndef _BTRFS_SYSFS_H_
2#define _BTRFS_SYSFS_H_
3
4enum btrfs_feature_set {
5 FEAT_COMPAT,
6 FEAT_COMPAT_RO,
7 FEAT_INCOMPAT,
8 FEAT_MAX
9};
10
11#define __INIT_KOBJ_ATTR(_name, _mode, _show, _store) \
12{ \
13 .attr = { .name = __stringify(_name), .mode = _mode }, \
14 .show = _show, \
15 .store = _store, \
16}
17
18#define BTRFS_ATTR_RW(_name, _mode, _show, _store) \
19static struct kobj_attribute btrfs_attr_##_name = \
20 __INIT_KOBJ_ATTR(_name, _mode, _show, _store)
21#define BTRFS_ATTR(_name, _mode, _show) \
22 BTRFS_ATTR_RW(_name, _mode, _show, NULL)
23#define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr)
24
25#define BTRFS_RAID_ATTR(_name, _show) \
26static struct kobj_attribute btrfs_raid_attr_##_name = \
27 __INIT_KOBJ_ATTR(_name, 0444, _show, NULL)
28#define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr)
29
30
31struct btrfs_feature_attr {
32 struct kobj_attribute kobj_attr;
33 enum btrfs_feature_set feature_set;
34 u64 feature_bit;
35};
36
37#define BTRFS_FEAT_ATTR(_name, _feature_set, _prefix, _feature_bit) \
38static struct btrfs_feature_attr btrfs_attr_##_name = { \
39 .kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO, \
40 btrfs_feature_attr_show, \
41 btrfs_feature_attr_store), \
42 .feature_set = _feature_set, \
43 .feature_bit = _prefix ##_## _feature_bit, \
44}
45#define BTRFS_FEAT_ATTR_PTR(_name) (&btrfs_attr_##_name.kobj_attr.attr)
46
47#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
48 BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
49#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
50 BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
51#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
52 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
53
54/* convert from attribute */
55#define to_btrfs_feature_attr(a) \
56 container_of(a, struct btrfs_feature_attr, kobj_attr)
57#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr)
58#define attr_to_btrfs_feature_attr(a) \
59 to_btrfs_feature_attr(attr_to_btrfs_attr(a))
60char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
61extern const char * const btrfs_feature_set_names[3];
62extern struct kobj_type space_info_ktype;
63extern struct kobj_type btrfs_raid_ktype;
64#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index b353bc806ca0..312560a9123d 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -21,7 +21,7 @@
21 21
22#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 22#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
23 23
24#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__) 24#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
25 25
26int btrfs_test_free_space_cache(void); 26int btrfs_test_free_space_cache(void);
27int btrfs_test_extent_buffer_operations(void); 27int btrfs_test_extent_buffer_operations(void);
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 6fc82010dc15..c8d9ddf84c69 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -101,7 +101,7 @@ static int test_extents(struct btrfs_block_group_cache *cache)
101 101
102 ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); 102 ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
103 if (ret) { 103 if (ret) {
104 test_msg("Error removing middle peice %d\n", ret); 104 test_msg("Error removing middle piece %d\n", ret);
105 return ret; 105 return ret;
106 } 106 }
107 107
@@ -266,7 +266,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
266 } 266 }
267 267
268 if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { 268 if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
269 test_msg("Left over peices after removing overlapping\n"); 269 test_msg("Left over pieces after removing overlapping\n");
270 return -1; 270 return -1;
271 } 271 }
272 272
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c6a872a8a468..34cd83184c4a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -62,7 +62,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
62 WARN_ON(atomic_read(&transaction->use_count) == 0); 62 WARN_ON(atomic_read(&transaction->use_count) == 0);
63 if (atomic_dec_and_test(&transaction->use_count)) { 63 if (atomic_dec_and_test(&transaction->use_count)) {
64 BUG_ON(!list_empty(&transaction->list)); 64 BUG_ON(!list_empty(&transaction->list));
65 WARN_ON(transaction->delayed_refs.root.rb_node); 65 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
66 while (!list_empty(&transaction->pending_chunks)) { 66 while (!list_empty(&transaction->pending_chunks)) {
67 struct extent_map *em; 67 struct extent_map *em;
68 68
@@ -183,8 +183,8 @@ loop:
183 atomic_set(&cur_trans->use_count, 2); 183 atomic_set(&cur_trans->use_count, 2);
184 cur_trans->start_time = get_seconds(); 184 cur_trans->start_time = get_seconds();
185 185
186 cur_trans->delayed_refs.root = RB_ROOT; 186 cur_trans->delayed_refs.href_root = RB_ROOT;
187 cur_trans->delayed_refs.num_entries = 0; 187 atomic_set(&cur_trans->delayed_refs.num_entries, 0);
188 cur_trans->delayed_refs.num_heads_ready = 0; 188 cur_trans->delayed_refs.num_heads_ready = 0;
189 cur_trans->delayed_refs.num_heads = 0; 189 cur_trans->delayed_refs.num_heads = 0;
190 cur_trans->delayed_refs.flushing = 0; 190 cur_trans->delayed_refs.flushing = 0;
@@ -196,17 +196,14 @@ loop:
196 */ 196 */
197 smp_mb(); 197 smp_mb();
198 if (!list_empty(&fs_info->tree_mod_seq_list)) 198 if (!list_empty(&fs_info->tree_mod_seq_list))
199 WARN(1, KERN_ERR "btrfs: tree_mod_seq_list not empty when " 199 WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when "
200 "creating a fresh transaction\n"); 200 "creating a fresh transaction\n");
201 if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) 201 if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
202 WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " 202 WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when "
203 "creating a fresh transaction\n"); 203 "creating a fresh transaction\n");
204 atomic64_set(&fs_info->tree_mod_seq, 0); 204 atomic64_set(&fs_info->tree_mod_seq, 0);
205 205
206 spin_lock_init(&cur_trans->delayed_refs.lock); 206 spin_lock_init(&cur_trans->delayed_refs.lock);
207 atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0);
208 atomic_set(&cur_trans->delayed_refs.ref_seq, 0);
209 init_waitqueue_head(&cur_trans->delayed_refs.wait);
210 207
211 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 208 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
212 INIT_LIST_HEAD(&cur_trans->ordered_operations); 209 INIT_LIST_HEAD(&cur_trans->ordered_operations);
@@ -472,6 +469,7 @@ again:
472 h->type = type; 469 h->type = type;
473 h->allocating_chunk = false; 470 h->allocating_chunk = false;
474 h->reloc_reserved = false; 471 h->reloc_reserved = false;
472 h->sync = false;
475 INIT_LIST_HEAD(&h->qgroup_ref_list); 473 INIT_LIST_HEAD(&h->qgroup_ref_list);
476 INIT_LIST_HEAD(&h->new_bgs); 474 INIT_LIST_HEAD(&h->new_bgs);
477 475
@@ -647,7 +645,7 @@ static int should_end_transaction(struct btrfs_trans_handle *trans,
647 struct btrfs_root *root) 645 struct btrfs_root *root)
648{ 646{
649 if (root->fs_info->global_block_rsv.space_info->full && 647 if (root->fs_info->global_block_rsv.space_info->full &&
650 btrfs_should_throttle_delayed_refs(trans, root)) 648 btrfs_check_space_for_delayed_refs(trans, root))
651 return 1; 649 return 1;
652 650
653 return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); 651 return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
@@ -711,8 +709,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
711 btrfs_create_pending_block_groups(trans, root); 709 btrfs_create_pending_block_groups(trans, root);
712 710
713 trans->delayed_ref_updates = 0; 711 trans->delayed_ref_updates = 0;
714 if (btrfs_should_throttle_delayed_refs(trans, root)) { 712 if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) {
715 cur = max_t(unsigned long, cur, 1); 713 cur = max_t(unsigned long, cur, 32);
716 trans->delayed_ref_updates = 0; 714 trans->delayed_ref_updates = 0;
717 btrfs_run_delayed_refs(trans, root, cur); 715 btrfs_run_delayed_refs(trans, root, cur);
718 } 716 }
@@ -788,12 +786,6 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
788 return __btrfs_end_transaction(trans, root, 1); 786 return __btrfs_end_transaction(trans, root, 1);
789} 787}
790 788
791int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
792 struct btrfs_root *root)
793{
794 return __btrfs_end_transaction(trans, root, 1);
795}
796
797/* 789/*
798 * when btree blocks are allocated, they have some corresponding bits set for 790 * when btree blocks are allocated, they have some corresponding bits set for
799 * them in one of two extent_io trees. This is used to make sure all of 791 * them in one of two extent_io trees. This is used to make sure all of
@@ -1105,7 +1097,7 @@ int btrfs_defrag_root(struct btrfs_root *root)
1105 break; 1097 break;
1106 1098
1107 if (btrfs_defrag_cancelled(root->fs_info)) { 1099 if (btrfs_defrag_cancelled(root->fs_info)) {
1108 printk(KERN_DEBUG "btrfs: defrag_root cancelled\n"); 1100 pr_debug("BTRFS: defrag_root cancelled\n");
1109 ret = -EAGAIN; 1101 ret = -EAGAIN;
1110 break; 1102 break;
1111 } 1103 }
@@ -1746,6 +1738,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1746 goto cleanup_transaction; 1738 goto cleanup_transaction;
1747 1739
1748 btrfs_wait_delalloc_flush(root->fs_info); 1740 btrfs_wait_delalloc_flush(root->fs_info);
1741
1742 btrfs_scrub_pause(root);
1749 /* 1743 /*
1750 * Ok now we need to make sure to block out any other joins while we 1744 * Ok now we need to make sure to block out any other joins while we
1751 * commit the transaction. We could have started a join before setting 1745 * commit the transaction. We could have started a join before setting
@@ -1810,7 +1804,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1810 1804
1811 WARN_ON(cur_trans != trans->transaction); 1805 WARN_ON(cur_trans != trans->transaction);
1812 1806
1813 btrfs_scrub_pause(root);
1814 /* btrfs_commit_tree_roots is responsible for getting the 1807 /* btrfs_commit_tree_roots is responsible for getting the
1815 * various roots consistent with each other. Every pointer 1808 * various roots consistent with each other. Every pointer
1816 * in the tree of tree roots has to point to the most up to date 1809 * in the tree of tree roots has to point to the most up to date
@@ -1833,6 +1826,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1833 goto cleanup_transaction; 1826 goto cleanup_transaction;
1834 } 1827 }
1835 1828
1829 /*
1830 * Since the transaction is done, we should set the inode map cache flag
1831 * before any other comming transaction.
1832 */
1833 if (btrfs_test_opt(root, CHANGE_INODE_CACHE))
1834 btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1835 else
1836 btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE);
1837
1836 /* commit_fs_roots gets rid of all the tree log roots, it is now 1838 /* commit_fs_roots gets rid of all the tree log roots, it is now
1837 * safe to free the root of tree log roots 1839 * safe to free the root of tree log roots
1838 */ 1840 */
@@ -1975,10 +1977,23 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
1975 } 1977 }
1976 root = list_first_entry(&fs_info->dead_roots, 1978 root = list_first_entry(&fs_info->dead_roots,
1977 struct btrfs_root, root_list); 1979 struct btrfs_root, root_list);
1980 /*
1981 * Make sure root is not involved in send,
1982 * if we fail with first root, we return
1983 * directly rather than continue.
1984 */
1985 spin_lock(&root->root_item_lock);
1986 if (root->send_in_progress) {
1987 spin_unlock(&fs_info->trans_lock);
1988 spin_unlock(&root->root_item_lock);
1989 return 0;
1990 }
1991 spin_unlock(&root->root_item_lock);
1992
1978 list_del_init(&root->root_list); 1993 list_del_init(&root->root_list);
1979 spin_unlock(&fs_info->trans_lock); 1994 spin_unlock(&fs_info->trans_lock);
1980 1995
1981 pr_debug("btrfs: cleaner removing %llu\n", root->objectid); 1996 pr_debug("BTRFS: cleaner removing %llu\n", root->objectid);
1982 1997
1983 btrfs_kill_all_delayed_nodes(root); 1998 btrfs_kill_all_delayed_nodes(root);
1984 1999
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7657d115067d..6ac037e9f9f0 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -93,6 +93,7 @@ struct btrfs_trans_handle {
93 short adding_csums; 93 short adding_csums;
94 bool allocating_chunk; 94 bool allocating_chunk;
95 bool reloc_reserved; 95 bool reloc_reserved;
96 bool sync;
96 unsigned int type; 97 unsigned int type;
97 /* 98 /*
98 * this root is only needed to validate that the root passed to 99 * this root is only needed to validate that the root passed to
@@ -154,8 +155,6 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
154 int wait_for_unblock); 155 int wait_for_unblock);
155int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 156int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
156 struct btrfs_root *root); 157 struct btrfs_root *root);
157int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
158 struct btrfs_root *root);
159int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 158int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
160 struct btrfs_root *root); 159 struct btrfs_root *root);
161void btrfs_throttle(struct btrfs_root *root); 160void btrfs_throttle(struct btrfs_root *root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9f7fc51ca334..39d83da03e03 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -570,7 +570,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
570 if (btrfs_file_extent_disk_bytenr(eb, item) == 0) 570 if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
571 nbytes = 0; 571 nbytes = 0;
572 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { 572 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
573 size = btrfs_file_extent_inline_len(eb, item); 573 size = btrfs_file_extent_inline_len(eb, slot, item);
574 nbytes = btrfs_file_extent_ram_bytes(eb, item); 574 nbytes = btrfs_file_extent_ram_bytes(eb, item);
575 extent_end = ALIGN(start + size, root->sectorsize); 575 extent_end = ALIGN(start + size, root->sectorsize);
576 } else { 576 } else {
@@ -1238,7 +1238,8 @@ static int insert_orphan_item(struct btrfs_trans_handle *trans,
1238 struct btrfs_root *root, u64 offset) 1238 struct btrfs_root *root, u64 offset)
1239{ 1239{
1240 int ret; 1240 int ret;
1241 ret = btrfs_find_orphan_item(root, offset); 1241 ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
1242 offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
1242 if (ret > 0) 1243 if (ret > 0)
1243 ret = btrfs_insert_orphan_item(trans, root, offset); 1244 ret = btrfs_insert_orphan_item(trans, root, offset);
1244 return ret; 1245 return ret;
@@ -3194,7 +3195,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
3194static noinline int copy_items(struct btrfs_trans_handle *trans, 3195static noinline int copy_items(struct btrfs_trans_handle *trans,
3195 struct inode *inode, 3196 struct inode *inode,
3196 struct btrfs_path *dst_path, 3197 struct btrfs_path *dst_path,
3197 struct extent_buffer *src, 3198 struct btrfs_path *src_path, u64 *last_extent,
3198 int start_slot, int nr, int inode_only) 3199 int start_slot, int nr, int inode_only)
3199{ 3200{
3200 unsigned long src_offset; 3201 unsigned long src_offset;
@@ -3202,6 +3203,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3202 struct btrfs_root *log = BTRFS_I(inode)->root->log_root; 3203 struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
3203 struct btrfs_file_extent_item *extent; 3204 struct btrfs_file_extent_item *extent;
3204 struct btrfs_inode_item *inode_item; 3205 struct btrfs_inode_item *inode_item;
3206 struct extent_buffer *src = src_path->nodes[0];
3207 struct btrfs_key first_key, last_key, key;
3205 int ret; 3208 int ret;
3206 struct btrfs_key *ins_keys; 3209 struct btrfs_key *ins_keys;
3207 u32 *ins_sizes; 3210 u32 *ins_sizes;
@@ -3209,6 +3212,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3209 int i; 3212 int i;
3210 struct list_head ordered_sums; 3213 struct list_head ordered_sums;
3211 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3214 int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3215 bool has_extents = false;
3216 bool need_find_last_extent = (*last_extent == 0);
3217 bool done = false;
3212 3218
3213 INIT_LIST_HEAD(&ordered_sums); 3219 INIT_LIST_HEAD(&ordered_sums);
3214 3220
@@ -3217,6 +3223,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3217 if (!ins_data) 3223 if (!ins_data)
3218 return -ENOMEM; 3224 return -ENOMEM;
3219 3225
3226 first_key.objectid = (u64)-1;
3227
3220 ins_sizes = (u32 *)ins_data; 3228 ins_sizes = (u32 *)ins_data;
3221 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); 3229 ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
3222 3230
@@ -3237,6 +3245,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3237 3245
3238 src_offset = btrfs_item_ptr_offset(src, start_slot + i); 3246 src_offset = btrfs_item_ptr_offset(src, start_slot + i);
3239 3247
3248 if ((i == (nr - 1)))
3249 last_key = ins_keys[i];
3250
3240 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { 3251 if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
3241 inode_item = btrfs_item_ptr(dst_path->nodes[0], 3252 inode_item = btrfs_item_ptr(dst_path->nodes[0],
3242 dst_path->slots[0], 3253 dst_path->slots[0],
@@ -3248,6 +3259,21 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3248 src_offset, ins_sizes[i]); 3259 src_offset, ins_sizes[i]);
3249 } 3260 }
3250 3261
3262 /*
3263 * We set need_find_last_extent here in case we know we were
3264 * processing other items and then walk into the first extent in
3265 * the inode. If we don't hit an extent then nothing changes,
3266 * we'll do the last search the next time around.
3267 */
3268 if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
3269 has_extents = true;
3270 if (need_find_last_extent &&
3271 first_key.objectid == (u64)-1)
3272 first_key = ins_keys[i];
3273 } else {
3274 need_find_last_extent = false;
3275 }
3276
3251 /* take a reference on file data extents so that truncates 3277 /* take a reference on file data extents so that truncates
3252 * or deletes of this inode don't have to relog the inode 3278 * or deletes of this inode don't have to relog the inode
3253 * again 3279 * again
@@ -3312,6 +3338,128 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3312 list_del(&sums->list); 3338 list_del(&sums->list);
3313 kfree(sums); 3339 kfree(sums);
3314 } 3340 }
3341
3342 if (!has_extents)
3343 return ret;
3344
3345 /*
3346 * Because we use btrfs_search_forward we could skip leaves that were
3347 * not modified and then assume *last_extent is valid when it really
3348 * isn't. So back up to the previous leaf and read the end of the last
3349 * extent before we go and fill in holes.
3350 */
3351 if (need_find_last_extent) {
3352 u64 len;
3353
3354 ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
3355 if (ret < 0)
3356 return ret;
3357 if (ret)
3358 goto fill_holes;
3359 if (src_path->slots[0])
3360 src_path->slots[0]--;
3361 src = src_path->nodes[0];
3362 btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
3363 if (key.objectid != btrfs_ino(inode) ||
3364 key.type != BTRFS_EXTENT_DATA_KEY)
3365 goto fill_holes;
3366 extent = btrfs_item_ptr(src, src_path->slots[0],
3367 struct btrfs_file_extent_item);
3368 if (btrfs_file_extent_type(src, extent) ==
3369 BTRFS_FILE_EXTENT_INLINE) {
3370 len = btrfs_file_extent_inline_len(src,
3371 src_path->slots[0],
3372 extent);
3373 *last_extent = ALIGN(key.offset + len,
3374 log->sectorsize);
3375 } else {
3376 len = btrfs_file_extent_num_bytes(src, extent);
3377 *last_extent = key.offset + len;
3378 }
3379 }
3380fill_holes:
3381 /* So we did prev_leaf, now we need to move to the next leaf, but a few
3382 * things could have happened
3383 *
3384 * 1) A merge could have happened, so we could currently be on a leaf
3385 * that holds what we were copying in the first place.
3386 * 2) A split could have happened, and now not all of the items we want
3387 * are on the same leaf.
3388 *
3389 * So we need to adjust how we search for holes, we need to drop the
3390 * path and re-search for the first extent key we found, and then walk
3391 * forward until we hit the last one we copied.
3392 */
3393 if (need_find_last_extent) {
3394 /* btrfs_prev_leaf could return 1 without releasing the path */
3395 btrfs_release_path(src_path);
3396 ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key,
3397 src_path, 0, 0);
3398 if (ret < 0)
3399 return ret;
3400 ASSERT(ret == 0);
3401 src = src_path->nodes[0];
3402 i = src_path->slots[0];
3403 } else {
3404 i = start_slot;
3405 }
3406
3407 /*
3408 * Ok so here we need to go through and fill in any holes we may have
3409 * to make sure that holes are punched for those areas in case they had
3410 * extents previously.
3411 */
3412 while (!done) {
3413 u64 offset, len;
3414 u64 extent_end;
3415
3416 if (i >= btrfs_header_nritems(src_path->nodes[0])) {
3417 ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
3418 if (ret < 0)
3419 return ret;
3420 ASSERT(ret == 0);
3421 src = src_path->nodes[0];
3422 i = 0;
3423 }
3424
3425 btrfs_item_key_to_cpu(src, &key, i);
3426 if (!btrfs_comp_cpu_keys(&key, &last_key))
3427 done = true;
3428 if (key.objectid != btrfs_ino(inode) ||
3429 key.type != BTRFS_EXTENT_DATA_KEY) {
3430 i++;
3431 continue;
3432 }
3433 extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
3434 if (btrfs_file_extent_type(src, extent) ==
3435 BTRFS_FILE_EXTENT_INLINE) {
3436 len = btrfs_file_extent_inline_len(src, i, extent);
3437 extent_end = ALIGN(key.offset + len, log->sectorsize);
3438 } else {
3439 len = btrfs_file_extent_num_bytes(src, extent);
3440 extent_end = key.offset + len;
3441 }
3442 i++;
3443
3444 if (*last_extent == key.offset) {
3445 *last_extent = extent_end;
3446 continue;
3447 }
3448 offset = *last_extent;
3449 len = key.offset - *last_extent;
3450 ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
3451 offset, 0, 0, len, 0, len, 0,
3452 0, 0);
3453 if (ret)
3454 break;
3455 *last_extent = offset + len;
3456 }
3457 /*
3458 * Need to let the callers know we dropped the path so they should
3459 * re-search.
3460 */
3461 if (!ret && need_find_last_extent)
3462 ret = 1;
3315 return ret; 3463 return ret;
3316} 3464}
3317 3465
@@ -3349,21 +3497,27 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3349 int ret; 3497 int ret;
3350 int index = log->log_transid % 2; 3498 int index = log->log_transid % 2;
3351 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3499 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3352 3500 int extent_inserted = 0;
3353 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3354 em->start + em->len, NULL, 0);
3355 if (ret)
3356 return ret;
3357 3501
3358 INIT_LIST_HEAD(&ordered_sums); 3502 INIT_LIST_HEAD(&ordered_sums);
3359 btrfs_init_map_token(&token); 3503 btrfs_init_map_token(&token);
3360 key.objectid = btrfs_ino(inode);
3361 key.type = BTRFS_EXTENT_DATA_KEY;
3362 key.offset = em->start;
3363 3504
3364 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi)); 3505 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3506 em->start + em->len, NULL, 0, 1,
3507 sizeof(*fi), &extent_inserted);
3365 if (ret) 3508 if (ret)
3366 return ret; 3509 return ret;
3510
3511 if (!extent_inserted) {
3512 key.objectid = btrfs_ino(inode);
3513 key.type = BTRFS_EXTENT_DATA_KEY;
3514 key.offset = em->start;
3515
3516 ret = btrfs_insert_empty_item(trans, log, path, &key,
3517 sizeof(*fi));
3518 if (ret)
3519 return ret;
3520 }
3367 leaf = path->nodes[0]; 3521 leaf = path->nodes[0];
3368 fi = btrfs_item_ptr(leaf, path->slots[0], 3522 fi = btrfs_item_ptr(leaf, path->slots[0],
3369 struct btrfs_file_extent_item); 3523 struct btrfs_file_extent_item);
@@ -3485,7 +3639,11 @@ again:
3485 * start over after this. 3639 * start over after this.
3486 */ 3640 */
3487 3641
3488 wait_event(ordered->wait, ordered->csum_bytes_left == 0); 3642 if (ordered->csum_bytes_left) {
3643 btrfs_start_ordered_extent(inode, ordered, 0);
3644 wait_event(ordered->wait,
3645 ordered->csum_bytes_left == 0);
3646 }
3489 3647
3490 list_for_each_entry(sum, &ordered->list, list) { 3648 list_for_each_entry(sum, &ordered->list, list) {
3491 ret = btrfs_csum_file_blocks(trans, log, sum); 3649 ret = btrfs_csum_file_blocks(trans, log, sum);
@@ -3630,6 +3788,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3630 struct btrfs_key max_key; 3788 struct btrfs_key max_key;
3631 struct btrfs_root *log = root->log_root; 3789 struct btrfs_root *log = root->log_root;
3632 struct extent_buffer *src = NULL; 3790 struct extent_buffer *src = NULL;
3791 u64 last_extent = 0;
3633 int err = 0; 3792 int err = 0;
3634 int ret; 3793 int ret;
3635 int nritems; 3794 int nritems;
@@ -3745,11 +3904,15 @@ again:
3745 goto next_slot; 3904 goto next_slot;
3746 } 3905 }
3747 3906
3748 ret = copy_items(trans, inode, dst_path, src, ins_start_slot, 3907 ret = copy_items(trans, inode, dst_path, path, &last_extent,
3749 ins_nr, inode_only); 3908 ins_start_slot, ins_nr, inode_only);
3750 if (ret) { 3909 if (ret < 0) {
3751 err = ret; 3910 err = ret;
3752 goto out_unlock; 3911 goto out_unlock;
3912 } if (ret) {
3913 ins_nr = 0;
3914 btrfs_release_path(path);
3915 continue;
3753 } 3916 }
3754 ins_nr = 1; 3917 ins_nr = 1;
3755 ins_start_slot = path->slots[0]; 3918 ins_start_slot = path->slots[0];
@@ -3763,13 +3926,14 @@ next_slot:
3763 goto again; 3926 goto again;
3764 } 3927 }
3765 if (ins_nr) { 3928 if (ins_nr) {
3766 ret = copy_items(trans, inode, dst_path, src, 3929 ret = copy_items(trans, inode, dst_path, path,
3767 ins_start_slot, 3930 &last_extent, ins_start_slot,
3768 ins_nr, inode_only); 3931 ins_nr, inode_only);
3769 if (ret) { 3932 if (ret < 0) {
3770 err = ret; 3933 err = ret;
3771 goto out_unlock; 3934 goto out_unlock;
3772 } 3935 }
3936 ret = 0;
3773 ins_nr = 0; 3937 ins_nr = 0;
3774 } 3938 }
3775 btrfs_release_path(path); 3939 btrfs_release_path(path);
@@ -3784,12 +3948,13 @@ next_slot:
3784 } 3948 }
3785 } 3949 }
3786 if (ins_nr) { 3950 if (ins_nr) {
3787 ret = copy_items(trans, inode, dst_path, src, ins_start_slot, 3951 ret = copy_items(trans, inode, dst_path, path, &last_extent,
3788 ins_nr, inode_only); 3952 ins_start_slot, ins_nr, inode_only);
3789 if (ret) { 3953 if (ret < 0) {
3790 err = ret; 3954 err = ret;
3791 goto out_unlock; 3955 goto out_unlock;
3792 } 3956 }
3957 ret = 0;
3793 ins_nr = 0; 3958 ins_nr = 0;
3794 } 3959 }
3795 3960
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index b0a523b2c60e..840a38b2778a 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -5,8 +5,8 @@
5 */ 5 */
6 6
7#include <linux/slab.h> 7#include <linux/slab.h>
8#include <linux/export.h>
9#include "ulist.h" 8#include "ulist.h"
9#include "ctree.h"
10 10
11/* 11/*
12 * ulist is a generic data structure to hold a collection of unique u64 12 * ulist is a generic data structure to hold a collection of unique u64
@@ -14,10 +14,6 @@
14 * enumerating it. 14 * enumerating it.
15 * It is possible to store an auxiliary value along with the key. 15 * It is possible to store an auxiliary value along with the key.
16 * 16 *
17 * The implementation is preliminary and can probably be sped up
18 * significantly. A first step would be to store the values in an rbtree
19 * as soon as ULIST_SIZE is exceeded.
20 *
21 * A sample usage for ulists is the enumeration of directed graphs without 17 * A sample usage for ulists is the enumeration of directed graphs without
22 * visiting a node twice. The pseudo-code could look like this: 18 * visiting a node twice. The pseudo-code could look like this:
23 * 19 *
@@ -50,12 +46,10 @@
50 */ 46 */
51void ulist_init(struct ulist *ulist) 47void ulist_init(struct ulist *ulist)
52{ 48{
53 ulist->nnodes = 0; 49 INIT_LIST_HEAD(&ulist->nodes);
54 ulist->nodes = ulist->int_nodes;
55 ulist->nodes_alloced = ULIST_SIZE;
56 ulist->root = RB_ROOT; 50 ulist->root = RB_ROOT;
51 ulist->nnodes = 0;
57} 52}
58EXPORT_SYMBOL(ulist_init);
59 53
60/** 54/**
61 * ulist_fini - free up additionally allocated memory for the ulist 55 * ulist_fini - free up additionally allocated memory for the ulist
@@ -64,18 +58,17 @@ EXPORT_SYMBOL(ulist_init);
64 * This is useful in cases where the base 'struct ulist' has been statically 58 * This is useful in cases where the base 'struct ulist' has been statically
65 * allocated. 59 * allocated.
66 */ 60 */
67void ulist_fini(struct ulist *ulist) 61static void ulist_fini(struct ulist *ulist)
68{ 62{
69 /* 63 struct ulist_node *node;
70 * The first ULIST_SIZE elements are stored inline in struct ulist. 64 struct ulist_node *next;
71 * Only if more elements are alocated they need to be freed. 65
72 */ 66 list_for_each_entry_safe(node, next, &ulist->nodes, list) {
73 if (ulist->nodes_alloced > ULIST_SIZE) 67 kfree(node);
74 kfree(ulist->nodes); 68 }
75 ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */
76 ulist->root = RB_ROOT; 69 ulist->root = RB_ROOT;
70 INIT_LIST_HEAD(&ulist->nodes);
77} 71}
78EXPORT_SYMBOL(ulist_fini);
79 72
80/** 73/**
81 * ulist_reinit - prepare a ulist for reuse 74 * ulist_reinit - prepare a ulist for reuse
@@ -89,7 +82,6 @@ void ulist_reinit(struct ulist *ulist)
89 ulist_fini(ulist); 82 ulist_fini(ulist);
90 ulist_init(ulist); 83 ulist_init(ulist);
91} 84}
92EXPORT_SYMBOL(ulist_reinit);
93 85
94/** 86/**
95 * ulist_alloc - dynamically allocate a ulist 87 * ulist_alloc - dynamically allocate a ulist
@@ -108,7 +100,6 @@ struct ulist *ulist_alloc(gfp_t gfp_mask)
108 100
109 return ulist; 101 return ulist;
110} 102}
111EXPORT_SYMBOL(ulist_alloc);
112 103
113/** 104/**
114 * ulist_free - free dynamically allocated ulist 105 * ulist_free - free dynamically allocated ulist
@@ -123,7 +114,6 @@ void ulist_free(struct ulist *ulist)
123 ulist_fini(ulist); 114 ulist_fini(ulist);
124 kfree(ulist); 115 kfree(ulist);
125} 116}
126EXPORT_SYMBOL(ulist_free);
127 117
128static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val) 118static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
129{ 119{
@@ -192,63 +182,32 @@ int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask)
192int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, 182int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
193 u64 *old_aux, gfp_t gfp_mask) 183 u64 *old_aux, gfp_t gfp_mask)
194{ 184{
195 int ret = 0; 185 int ret;
196 struct ulist_node *node = NULL; 186 struct ulist_node *node;
187
197 node = ulist_rbtree_search(ulist, val); 188 node = ulist_rbtree_search(ulist, val);
198 if (node) { 189 if (node) {
199 if (old_aux) 190 if (old_aux)
200 *old_aux = node->aux; 191 *old_aux = node->aux;
201 return 0; 192 return 0;
202 } 193 }
194 node = kmalloc(sizeof(*node), gfp_mask);
195 if (!node)
196 return -ENOMEM;
203 197
204 if (ulist->nnodes >= ulist->nodes_alloced) { 198 node->val = val;
205 u64 new_alloced = ulist->nodes_alloced + 128; 199 node->aux = aux;
206 struct ulist_node *new_nodes; 200#ifdef CONFIG_BTRFS_DEBUG
207 void *old = NULL; 201 node->seqnum = ulist->nnodes;
208 int i; 202#endif
209
210 for (i = 0; i < ulist->nnodes; i++)
211 rb_erase(&ulist->nodes[i].rb_node, &ulist->root);
212
213 /*
214 * if nodes_alloced == ULIST_SIZE no memory has been allocated
215 * yet, so pass NULL to krealloc
216 */
217 if (ulist->nodes_alloced > ULIST_SIZE)
218 old = ulist->nodes;
219 203
220 new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced, 204 ret = ulist_rbtree_insert(ulist, node);
221 gfp_mask); 205 ASSERT(!ret);
222 if (!new_nodes) 206 list_add_tail(&node->list, &ulist->nodes);
223 return -ENOMEM; 207 ulist->nnodes++;
224
225 if (!old)
226 memcpy(new_nodes, ulist->int_nodes,
227 sizeof(ulist->int_nodes));
228
229 ulist->nodes = new_nodes;
230 ulist->nodes_alloced = new_alloced;
231
232 /*
233 * krealloc actually uses memcpy, which does not copy rb_node
234 * pointers, so we have to do it ourselves. Otherwise we may
235 * be bitten by crashes.
236 */
237 for (i = 0; i < ulist->nnodes; i++) {
238 ret = ulist_rbtree_insert(ulist, &ulist->nodes[i]);
239 if (ret < 0)
240 return ret;
241 }
242 }
243 ulist->nodes[ulist->nnodes].val = val;
244 ulist->nodes[ulist->nnodes].aux = aux;
245 ret = ulist_rbtree_insert(ulist, &ulist->nodes[ulist->nnodes]);
246 BUG_ON(ret);
247 ++ulist->nnodes;
248 208
249 return 1; 209 return 1;
250} 210}
251EXPORT_SYMBOL(ulist_add);
252 211
253/** 212/**
254 * ulist_next - iterate ulist 213 * ulist_next - iterate ulist
@@ -268,11 +227,25 @@ EXPORT_SYMBOL(ulist_add);
268 */ 227 */
269struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter) 228struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter)
270{ 229{
271 if (ulist->nnodes == 0) 230 struct ulist_node *node;
231
232 if (list_empty(&ulist->nodes))
272 return NULL; 233 return NULL;
273 if (uiter->i < 0 || uiter->i >= ulist->nnodes) 234 if (uiter->cur_list && uiter->cur_list->next == &ulist->nodes)
274 return NULL; 235 return NULL;
275 236 if (uiter->cur_list) {
276 return &ulist->nodes[uiter->i++]; 237 uiter->cur_list = uiter->cur_list->next;
238 } else {
239 uiter->cur_list = ulist->nodes.next;
240#ifdef CONFIG_BTRFS_DEBUG
241 uiter->i = 0;
242#endif
243 }
244 node = list_entry(uiter->cur_list, struct ulist_node, list);
245#ifdef CONFIG_BTRFS_DEBUG
246 ASSERT(node->seqnum == uiter->i);
247 ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes);
248 uiter->i++;
249#endif
250 return node;
277} 251}
278EXPORT_SYMBOL(ulist_next);
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index fb36731074b5..7f78cbf5cf41 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -17,18 +17,12 @@
17 * enumerating it. 17 * enumerating it.
18 * It is possible to store an auxiliary value along with the key. 18 * It is possible to store an auxiliary value along with the key.
19 * 19 *
20 * The implementation is preliminary and can probably be sped up
21 * significantly. A first step would be to store the values in an rbtree
22 * as soon as ULIST_SIZE is exceeded.
23 */ 20 */
24
25/*
26 * number of elements statically allocated inside struct ulist
27 */
28#define ULIST_SIZE 16
29
30struct ulist_iterator { 21struct ulist_iterator {
22#ifdef CONFIG_BTRFS_DEBUG
31 int i; 23 int i;
24#endif
25 struct list_head *cur_list; /* hint to start search */
32}; 26};
33 27
34/* 28/*
@@ -37,6 +31,12 @@ struct ulist_iterator {
37struct ulist_node { 31struct ulist_node {
38 u64 val; /* value to store */ 32 u64 val; /* value to store */
39 u64 aux; /* auxiliary value saved along with the val */ 33 u64 aux; /* auxiliary value saved along with the val */
34
35#ifdef CONFIG_BTRFS_DEBUG
36 int seqnum; /* sequence number this node is added */
37#endif
38
39 struct list_head list; /* used to link node */
40 struct rb_node rb_node; /* used to speed up search */ 40 struct rb_node rb_node; /* used to speed up search */
41}; 41};
42 42
@@ -46,28 +46,11 @@ struct ulist {
46 */ 46 */
47 unsigned long nnodes; 47 unsigned long nnodes;
48 48
49 /* 49 struct list_head nodes;
50 * number of nodes we already have room for
51 */
52 unsigned long nodes_alloced;
53
54 /*
55 * pointer to the array storing the elements. The first ULIST_SIZE
56 * elements are stored inline. In this case the it points to int_nodes.
57 * After exceeding ULIST_SIZE, dynamic memory is allocated.
58 */
59 struct ulist_node *nodes;
60
61 struct rb_root root; 50 struct rb_root root;
62
63 /*
64 * inline storage space for the first ULIST_SIZE entries
65 */
66 struct ulist_node int_nodes[ULIST_SIZE];
67}; 51};
68 52
69void ulist_init(struct ulist *ulist); 53void ulist_init(struct ulist *ulist);
70void ulist_fini(struct ulist *ulist);
71void ulist_reinit(struct ulist *ulist); 54void ulist_reinit(struct ulist *ulist);
72struct ulist *ulist_alloc(gfp_t gfp_mask); 55struct ulist *ulist_alloc(gfp_t gfp_mask);
73void ulist_free(struct ulist *ulist); 56void ulist_free(struct ulist *ulist);
@@ -77,6 +60,6 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
77struct ulist_node *ulist_next(struct ulist *ulist, 60struct ulist_node *ulist_next(struct ulist *ulist,
78 struct ulist_iterator *uiter); 61 struct ulist_iterator *uiter);
79 62
80#define ULIST_ITER_INIT(uiter) ((uiter)->i = 0) 63#define ULIST_ITER_INIT(uiter) ((uiter)->cur_list = NULL)
81 64
82#endif 65#endif
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index fbda90004fe9..f6a4c03ee7d8 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -69,7 +69,7 @@ static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
69 ret = -ENOENT; 69 ret = -ENOENT;
70 70
71 if (!IS_ALIGNED(item_size, sizeof(u64))) { 71 if (!IS_ALIGNED(item_size, sizeof(u64))) {
72 pr_warn("btrfs: uuid item with illegal size %lu!\n", 72 btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!",
73 (unsigned long)item_size); 73 (unsigned long)item_size);
74 goto out; 74 goto out;
75 } 75 }
@@ -137,7 +137,8 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
137 offset = btrfs_item_ptr_offset(eb, slot); 137 offset = btrfs_item_ptr_offset(eb, slot);
138 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le); 138 offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
139 } else if (ret < 0) { 139 } else if (ret < 0) {
140 pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n", 140 btrfs_warn(uuid_root->fs_info, "insert uuid item failed %d "
141 "(0x%016llx, 0x%016llx) type %u!",
141 ret, (unsigned long long)key.objectid, 142 ret, (unsigned long long)key.objectid,
142 (unsigned long long)key.offset, type); 143 (unsigned long long)key.offset, type);
143 goto out; 144 goto out;
@@ -183,7 +184,7 @@ int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
183 184
184 ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1); 185 ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
185 if (ret < 0) { 186 if (ret < 0) {
186 pr_warn("btrfs: error %d while searching for uuid item!\n", 187 btrfs_warn(uuid_root->fs_info, "error %d while searching for uuid item!",
187 ret); 188 ret);
188 goto out; 189 goto out;
189 } 190 }
@@ -197,7 +198,7 @@ int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
197 offset = btrfs_item_ptr_offset(eb, slot); 198 offset = btrfs_item_ptr_offset(eb, slot);
198 item_size = btrfs_item_size_nr(eb, slot); 199 item_size = btrfs_item_size_nr(eb, slot);
199 if (!IS_ALIGNED(item_size, sizeof(u64))) { 200 if (!IS_ALIGNED(item_size, sizeof(u64))) {
200 pr_warn("btrfs: uuid item with illegal size %lu!\n", 201 btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!",
201 (unsigned long)item_size); 202 (unsigned long)item_size);
202 ret = -ENOENT; 203 ret = -ENOENT;
203 goto out; 204 goto out;
@@ -299,7 +300,7 @@ again_search_slot:
299 offset = btrfs_item_ptr_offset(leaf, slot); 300 offset = btrfs_item_ptr_offset(leaf, slot);
300 item_size = btrfs_item_size_nr(leaf, slot); 301 item_size = btrfs_item_size_nr(leaf, slot);
301 if (!IS_ALIGNED(item_size, sizeof(u64))) { 302 if (!IS_ALIGNED(item_size, sizeof(u64))) {
302 pr_warn("btrfs: uuid item with illegal size %lu!\n", 303 btrfs_warn(fs_info, "uuid item with illegal size %lu!",
303 (unsigned long)item_size); 304 (unsigned long)item_size);
304 goto skip; 305 goto skip;
305 } 306 }
@@ -349,6 +350,6 @@ skip:
349out: 350out:
350 btrfs_free_path(path); 351 btrfs_free_path(path);
351 if (ret) 352 if (ret)
352 pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret); 353 btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret);
353 return 0; 354 return 0;
354} 355}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 92303f42baaa..bab0b84d8f80 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,7 +125,7 @@ static void btrfs_kobject_uevent(struct block_device *bdev,
125 125
126 ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action); 126 ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
127 if (ret) 127 if (ret)
128 pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n", 128 pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n",
129 action, 129 action,
130 kobject_name(&disk_to_dev(bdev->bd_disk)->kobj), 130 kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
131 &disk_to_dev(bdev->bd_disk)->kobj); 131 &disk_to_dev(bdev->bd_disk)->kobj);
@@ -200,7 +200,7 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
200 200
201 if (IS_ERR(*bdev)) { 201 if (IS_ERR(*bdev)) {
202 ret = PTR_ERR(*bdev); 202 ret = PTR_ERR(*bdev);
203 printk(KERN_INFO "btrfs: open %s failed\n", device_path); 203 printk(KERN_INFO "BTRFS: open %s failed\n", device_path);
204 goto error; 204 goto error;
205 } 205 }
206 206
@@ -912,9 +912,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
912 if (disk_super->label[0]) { 912 if (disk_super->label[0]) {
913 if (disk_super->label[BTRFS_LABEL_SIZE - 1]) 913 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
914 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; 914 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
915 printk(KERN_INFO "btrfs: device label %s ", disk_super->label); 915 printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
916 } else { 916 } else {
917 printk(KERN_INFO "btrfs: device fsid %pU ", disk_super->fsid); 917 printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
918 } 918 }
919 919
920 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); 920 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
@@ -1813,7 +1813,7 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
1813 } 1813 }
1814 1814
1815 if (!*device) { 1815 if (!*device) {
1816 pr_err("btrfs: no missing device found\n"); 1816 btrfs_err(root->fs_info, "no missing device found");
1817 return -ENOENT; 1817 return -ENOENT;
1818 } 1818 }
1819 1819
@@ -3052,7 +3052,7 @@ loop:
3052error: 3052error:
3053 btrfs_free_path(path); 3053 btrfs_free_path(path);
3054 if (enospc_errors) { 3054 if (enospc_errors) {
3055 printk(KERN_INFO "btrfs: %d enospc errors during balance\n", 3055 btrfs_info(fs_info, "%d enospc errors during balance",
3056 enospc_errors); 3056 enospc_errors);
3057 if (!ret) 3057 if (!ret)
3058 ret = -ENOSPC; 3058 ret = -ENOSPC;
@@ -3138,8 +3138,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3138 if (!(bctl->flags & BTRFS_BALANCE_DATA) || 3138 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
3139 !(bctl->flags & BTRFS_BALANCE_METADATA) || 3139 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
3140 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { 3140 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
3141 printk(KERN_ERR "btrfs: with mixed groups data and " 3141 btrfs_err(fs_info, "with mixed groups data and "
3142 "metadata balance options must be the same\n"); 3142 "metadata balance options must be the same");
3143 ret = -EINVAL; 3143 ret = -EINVAL;
3144 goto out; 3144 goto out;
3145 } 3145 }
@@ -3165,8 +3165,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3165 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3165 if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3166 (!alloc_profile_is_valid(bctl->data.target, 1) || 3166 (!alloc_profile_is_valid(bctl->data.target, 1) ||
3167 (bctl->data.target & ~allowed))) { 3167 (bctl->data.target & ~allowed))) {
3168 printk(KERN_ERR "btrfs: unable to start balance with target " 3168 btrfs_err(fs_info, "unable to start balance with target "
3169 "data profile %llu\n", 3169 "data profile %llu",
3170 bctl->data.target); 3170 bctl->data.target);
3171 ret = -EINVAL; 3171 ret = -EINVAL;
3172 goto out; 3172 goto out;
@@ -3174,8 +3174,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3174 if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3174 if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3175 (!alloc_profile_is_valid(bctl->meta.target, 1) || 3175 (!alloc_profile_is_valid(bctl->meta.target, 1) ||
3176 (bctl->meta.target & ~allowed))) { 3176 (bctl->meta.target & ~allowed))) {
3177 printk(KERN_ERR "btrfs: unable to start balance with target " 3177 btrfs_err(fs_info,
3178 "metadata profile %llu\n", 3178 "unable to start balance with target metadata profile %llu",
3179 bctl->meta.target); 3179 bctl->meta.target);
3180 ret = -EINVAL; 3180 ret = -EINVAL;
3181 goto out; 3181 goto out;
@@ -3183,8 +3183,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3183 if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3183 if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3184 (!alloc_profile_is_valid(bctl->sys.target, 1) || 3184 (!alloc_profile_is_valid(bctl->sys.target, 1) ||
3185 (bctl->sys.target & ~allowed))) { 3185 (bctl->sys.target & ~allowed))) {
3186 printk(KERN_ERR "btrfs: unable to start balance with target " 3186 btrfs_err(fs_info,
3187 "system profile %llu\n", 3187 "unable to start balance with target system profile %llu",
3188 bctl->sys.target); 3188 bctl->sys.target);
3189 ret = -EINVAL; 3189 ret = -EINVAL;
3190 goto out; 3190 goto out;
@@ -3193,7 +3193,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3193 /* allow dup'ed data chunks only in mixed mode */ 3193 /* allow dup'ed data chunks only in mixed mode */
3194 if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3194 if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3195 (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) { 3195 (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
3196 printk(KERN_ERR "btrfs: dup for data is not allowed\n"); 3196 btrfs_err(fs_info, "dup for data is not allowed");
3197 ret = -EINVAL; 3197 ret = -EINVAL;
3198 goto out; 3198 goto out;
3199 } 3199 }
@@ -3213,11 +3213,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3213 (fs_info->avail_metadata_alloc_bits & allowed) && 3213 (fs_info->avail_metadata_alloc_bits & allowed) &&
3214 !(bctl->meta.target & allowed))) { 3214 !(bctl->meta.target & allowed))) {
3215 if (bctl->flags & BTRFS_BALANCE_FORCE) { 3215 if (bctl->flags & BTRFS_BALANCE_FORCE) {
3216 printk(KERN_INFO "btrfs: force reducing metadata " 3216 btrfs_info(fs_info, "force reducing metadata integrity");
3217 "integrity\n");
3218 } else { 3217 } else {
3219 printk(KERN_ERR "btrfs: balance will reduce metadata " 3218 btrfs_err(fs_info, "balance will reduce metadata "
3220 "integrity, use force if you want this\n"); 3219 "integrity, use force if you want this");
3221 ret = -EINVAL; 3220 ret = -EINVAL;
3222 goto out; 3221 goto out;
3223 } 3222 }
@@ -3303,7 +3302,7 @@ static int balance_kthread(void *data)
3303 mutex_lock(&fs_info->balance_mutex); 3302 mutex_lock(&fs_info->balance_mutex);
3304 3303
3305 if (fs_info->balance_ctl) { 3304 if (fs_info->balance_ctl) {
3306 printk(KERN_INFO "btrfs: continuing balance\n"); 3305 btrfs_info(fs_info, "continuing balance");
3307 ret = btrfs_balance(fs_info->balance_ctl, NULL); 3306 ret = btrfs_balance(fs_info->balance_ctl, NULL);
3308 } 3307 }
3309 3308
@@ -3325,7 +3324,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
3325 spin_unlock(&fs_info->balance_lock); 3324 spin_unlock(&fs_info->balance_lock);
3326 3325
3327 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { 3326 if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
3328 printk(KERN_INFO "btrfs: force skipping balance\n"); 3327 btrfs_info(fs_info, "force skipping balance");
3329 return 0; 3328 return 0;
3330 } 3329 }
3331 3330
@@ -3543,7 +3542,7 @@ update_tree:
3543 BTRFS_UUID_KEY_SUBVOL, 3542 BTRFS_UUID_KEY_SUBVOL,
3544 key.objectid); 3543 key.objectid);
3545 if (ret < 0) { 3544 if (ret < 0) {
3546 pr_warn("btrfs: uuid_tree_add failed %d\n", 3545 btrfs_warn(fs_info, "uuid_tree_add failed %d",
3547 ret); 3546 ret);
3548 break; 3547 break;
3549 } 3548 }
@@ -3555,7 +3554,7 @@ update_tree:
3555 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 3554 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
3556 key.objectid); 3555 key.objectid);
3557 if (ret < 0) { 3556 if (ret < 0) {
3558 pr_warn("btrfs: uuid_tree_add failed %d\n", 3557 btrfs_warn(fs_info, "uuid_tree_add failed %d",
3559 ret); 3558 ret);
3560 break; 3559 break;
3561 } 3560 }
@@ -3590,7 +3589,7 @@ out:
3590 if (trans && !IS_ERR(trans)) 3589 if (trans && !IS_ERR(trans))
3591 btrfs_end_transaction(trans, fs_info->uuid_root); 3590 btrfs_end_transaction(trans, fs_info->uuid_root);
3592 if (ret) 3591 if (ret)
3593 pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); 3592 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
3594 else 3593 else
3595 fs_info->update_uuid_tree_gen = 1; 3594 fs_info->update_uuid_tree_gen = 1;
3596 up(&fs_info->uuid_tree_rescan_sem); 3595 up(&fs_info->uuid_tree_rescan_sem);
@@ -3654,7 +3653,7 @@ static int btrfs_uuid_rescan_kthread(void *data)
3654 */ 3653 */
3655 ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry); 3654 ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
3656 if (ret < 0) { 3655 if (ret < 0) {
3657 pr_warn("btrfs: iterating uuid_tree failed %d\n", ret); 3656 btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret);
3658 up(&fs_info->uuid_tree_rescan_sem); 3657 up(&fs_info->uuid_tree_rescan_sem);
3659 return ret; 3658 return ret;
3660 } 3659 }
@@ -3695,7 +3694,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
3695 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); 3694 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
3696 if (IS_ERR(task)) { 3695 if (IS_ERR(task)) {
3697 /* fs_info->update_uuid_tree_gen remains 0 in all error case */ 3696 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
3698 pr_warn("btrfs: failed to start uuid_scan task\n"); 3697 btrfs_warn(fs_info, "failed to start uuid_scan task");
3699 up(&fs_info->uuid_tree_rescan_sem); 3698 up(&fs_info->uuid_tree_rescan_sem);
3700 return PTR_ERR(task); 3699 return PTR_ERR(task);
3701 } 3700 }
@@ -3711,7 +3710,7 @@ int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
3711 task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); 3710 task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
3712 if (IS_ERR(task)) { 3711 if (IS_ERR(task)) {
3713 /* fs_info->update_uuid_tree_gen remains 0 in all error case */ 3712 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
3714 pr_warn("btrfs: failed to start uuid_rescan task\n"); 3713 btrfs_warn(fs_info, "failed to start uuid_rescan task");
3715 up(&fs_info->uuid_tree_rescan_sem); 3714 up(&fs_info->uuid_tree_rescan_sem);
3716 return PTR_ERR(task); 3715 return PTR_ERR(task);
3717 } 3716 }
@@ -4033,7 +4032,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4033 max_stripe_size = 32 * 1024 * 1024; 4032 max_stripe_size = 32 * 1024 * 1024;
4034 max_chunk_size = 2 * max_stripe_size; 4033 max_chunk_size = 2 * max_stripe_size;
4035 } else { 4034 } else {
4036 printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", 4035 btrfs_err(info, "invalid chunk type 0x%llx requested\n",
4037 type); 4036 type);
4038 BUG_ON(1); 4037 BUG_ON(1);
4039 } 4038 }
@@ -4065,7 +4064,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4065 4064
4066 if (!device->writeable) { 4065 if (!device->writeable) {
4067 WARN(1, KERN_ERR 4066 WARN(1, KERN_ERR
4068 "btrfs: read-only device in alloc_list\n"); 4067 "BTRFS: read-only device in alloc_list\n");
4069 continue; 4068 continue;
4070 } 4069 }
4071 4070
@@ -5193,13 +5192,13 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
5193 read_unlock(&em_tree->lock); 5192 read_unlock(&em_tree->lock);
5194 5193
5195 if (!em) { 5194 if (!em) {
5196 printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n", 5195 printk(KERN_ERR "BTRFS: couldn't find em for chunk %Lu\n",
5197 chunk_start); 5196 chunk_start);
5198 return -EIO; 5197 return -EIO;
5199 } 5198 }
5200 5199
5201 if (em->start != chunk_start) { 5200 if (em->start != chunk_start) {
5202 printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n", 5201 printk(KERN_ERR "BTRFS: bad chunk start, em=%Lu, wanted=%Lu\n",
5203 em->start, chunk_start); 5202 em->start, chunk_start);
5204 free_extent_map(em); 5203 free_extent_map(em);
5205 return -EIO; 5204 return -EIO;
@@ -5298,6 +5297,13 @@ static void btrfs_end_bio(struct bio *bio, int err)
5298 bio_put(bio); 5297 bio_put(bio);
5299 bio = bbio->orig_bio; 5298 bio = bbio->orig_bio;
5300 } 5299 }
5300
5301 /*
5302 * We have original bio now. So increment bi_remaining to
5303 * account for it in endio
5304 */
5305 atomic_inc(&bio->bi_remaining);
5306
5301 bio->bi_private = bbio->private; 5307 bio->bi_private = bbio->private;
5302 bio->bi_end_io = bbio->end_io; 5308 bio->bi_end_io = bbio->end_io;
5303 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5309 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
@@ -5411,7 +5417,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
5411 if (!q->merge_bvec_fn) 5417 if (!q->merge_bvec_fn)
5412 return 1; 5418 return 1;
5413 5419
5414 bvm.bi_size = bio->bi_size - prev->bv_len; 5420 bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len;
5415 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) 5421 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
5416 return 0; 5422 return 0;
5417 return 1; 5423 return 1;
@@ -5426,7 +5432,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
5426 bio->bi_private = bbio; 5432 bio->bi_private = bbio;
5427 btrfs_io_bio(bio)->stripe_index = dev_nr; 5433 btrfs_io_bio(bio)->stripe_index = dev_nr;
5428 bio->bi_end_io = btrfs_end_bio; 5434 bio->bi_end_io = btrfs_end_bio;
5429 bio->bi_sector = physical >> 9; 5435 bio->bi_iter.bi_sector = physical >> 9;
5430#ifdef DEBUG 5436#ifdef DEBUG
5431 { 5437 {
5432 struct rcu_string *name; 5438 struct rcu_string *name;
@@ -5464,7 +5470,7 @@ again:
5464 while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) { 5470 while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
5465 if (bio_add_page(bio, bvec->bv_page, bvec->bv_len, 5471 if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
5466 bvec->bv_offset) < bvec->bv_len) { 5472 bvec->bv_offset) < bvec->bv_len) {
5467 u64 len = bio->bi_size; 5473 u64 len = bio->bi_iter.bi_size;
5468 5474
5469 atomic_inc(&bbio->stripes_pending); 5475 atomic_inc(&bbio->stripes_pending);
5470 submit_stripe_bio(root, bbio, bio, physical, dev_nr, 5476 submit_stripe_bio(root, bbio, bio, physical, dev_nr,
@@ -5486,7 +5492,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
5486 bio->bi_private = bbio->private; 5492 bio->bi_private = bbio->private;
5487 bio->bi_end_io = bbio->end_io; 5493 bio->bi_end_io = bbio->end_io;
5488 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; 5494 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
5489 bio->bi_sector = logical >> 9; 5495 bio->bi_iter.bi_sector = logical >> 9;
5490 kfree(bbio); 5496 kfree(bbio);
5491 bio_endio(bio, -EIO); 5497 bio_endio(bio, -EIO);
5492 } 5498 }
@@ -5497,7 +5503,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5497{ 5503{
5498 struct btrfs_device *dev; 5504 struct btrfs_device *dev;
5499 struct bio *first_bio = bio; 5505 struct bio *first_bio = bio;
5500 u64 logical = (u64)bio->bi_sector << 9; 5506 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
5501 u64 length = 0; 5507 u64 length = 0;
5502 u64 map_length; 5508 u64 map_length;
5503 u64 *raid_map = NULL; 5509 u64 *raid_map = NULL;
@@ -5506,7 +5512,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
5506 int total_devs = 1; 5512 int total_devs = 1;
5507 struct btrfs_bio *bbio = NULL; 5513 struct btrfs_bio *bbio = NULL;
5508 5514
5509 length = bio->bi_size; 5515 length = bio->bi_iter.bi_size;
5510 map_length = length; 5516 map_length = length;
5511 5517
5512 ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, 5518 ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
@@ -6123,7 +6129,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
6123 BUG_ON(!path); 6129 BUG_ON(!path);
6124 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); 6130 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
6125 if (ret < 0) { 6131 if (ret < 0) {
6126 printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", 6132 printk_in_rcu(KERN_WARNING "BTRFS: "
6133 "error %d while searching for dev_stats item for device %s!\n",
6127 ret, rcu_str_deref(device->name)); 6134 ret, rcu_str_deref(device->name));
6128 goto out; 6135 goto out;
6129 } 6136 }
@@ -6133,7 +6140,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
6133 /* need to delete old one and insert a new one */ 6140 /* need to delete old one and insert a new one */
6134 ret = btrfs_del_item(trans, dev_root, path); 6141 ret = btrfs_del_item(trans, dev_root, path);
6135 if (ret != 0) { 6142 if (ret != 0) {
6136 printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", 6143 printk_in_rcu(KERN_WARNING "BTRFS: "
6144 "delete too small dev_stats item for device %s failed %d!\n",
6137 rcu_str_deref(device->name), ret); 6145 rcu_str_deref(device->name), ret);
6138 goto out; 6146 goto out;
6139 } 6147 }
@@ -6146,7 +6154,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
6146 ret = btrfs_insert_empty_item(trans, dev_root, path, 6154 ret = btrfs_insert_empty_item(trans, dev_root, path,
6147 &key, sizeof(*ptr)); 6155 &key, sizeof(*ptr));
6148 if (ret < 0) { 6156 if (ret < 0) {
6149 printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", 6157 printk_in_rcu(KERN_WARNING "BTRFS: "
6158 "insert dev_stats item for device %s failed %d!\n",
6150 rcu_str_deref(device->name), ret); 6159 rcu_str_deref(device->name), ret);
6151 goto out; 6160 goto out;
6152 } 6161 }
@@ -6199,16 +6208,14 @@ static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
6199{ 6208{
6200 if (!dev->dev_stats_valid) 6209 if (!dev->dev_stats_valid)
6201 return; 6210 return;
6202 printk_ratelimited_in_rcu(KERN_ERR 6211 printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
6203 "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 6212 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
6204 rcu_str_deref(dev->name), 6213 rcu_str_deref(dev->name),
6205 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 6214 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
6206 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 6215 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
6207 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), 6216 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
6208 btrfs_dev_stat_read(dev, 6217 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
6209 BTRFS_DEV_STAT_CORRUPTION_ERRS), 6218 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
6210 btrfs_dev_stat_read(dev,
6211 BTRFS_DEV_STAT_GENERATION_ERRS));
6212} 6219}
6213 6220
6214static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) 6221static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
@@ -6221,7 +6228,8 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
6221 if (i == BTRFS_DEV_STAT_VALUES_MAX) 6228 if (i == BTRFS_DEV_STAT_VALUES_MAX)
6222 return; /* all values == 0, suppress message */ 6229 return; /* all values == 0, suppress message */
6223 6230
6224 printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", 6231 printk_in_rcu(KERN_INFO "BTRFS: "
6232 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
6225 rcu_str_deref(dev->name), 6233 rcu_str_deref(dev->name),
6226 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), 6234 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
6227 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), 6235 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
@@ -6242,12 +6250,10 @@ int btrfs_get_dev_stats(struct btrfs_root *root,
6242 mutex_unlock(&fs_devices->device_list_mutex); 6250 mutex_unlock(&fs_devices->device_list_mutex);
6243 6251
6244 if (!dev) { 6252 if (!dev) {
6245 printk(KERN_WARNING 6253 btrfs_warn(root->fs_info, "get dev_stats failed, device not found");
6246 "btrfs: get dev_stats failed, device not found\n");
6247 return -ENODEV; 6254 return -ENODEV;
6248 } else if (!dev->dev_stats_valid) { 6255 } else if (!dev->dev_stats_valid) {
6249 printk(KERN_WARNING 6256 btrfs_warn(root->fs_info, "get dev_stats failed, not yet valid");
6250 "btrfs: get dev_stats failed, not yet valid\n");
6251 return -ENODEV; 6257 return -ENODEV;
6252 } else if (stats->flags & BTRFS_DEV_STATS_RESET) { 6258 } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
6253 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { 6259 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 05740b9789e4..ad8328d797ea 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -22,11 +22,13 @@
22#include <linux/rwsem.h> 22#include <linux/rwsem.h>
23#include <linux/xattr.h> 23#include <linux/xattr.h>
24#include <linux/security.h> 24#include <linux/security.h>
25#include <linux/posix_acl_xattr.h>
25#include "ctree.h" 26#include "ctree.h"
26#include "btrfs_inode.h" 27#include "btrfs_inode.h"
27#include "transaction.h" 28#include "transaction.h"
28#include "xattr.h" 29#include "xattr.h"
29#include "disk-io.h" 30#include "disk-io.h"
31#include "props.h"
30 32
31 33
32ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 34ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@ -313,8 +315,8 @@ err:
313 */ 315 */
314const struct xattr_handler *btrfs_xattr_handlers[] = { 316const struct xattr_handler *btrfs_xattr_handlers[] = {
315#ifdef CONFIG_BTRFS_FS_POSIX_ACL 317#ifdef CONFIG_BTRFS_FS_POSIX_ACL
316 &btrfs_xattr_acl_access_handler, 318 &posix_acl_access_xattr_handler,
317 &btrfs_xattr_acl_default_handler, 319 &posix_acl_default_xattr_handler,
318#endif 320#endif
319 NULL, 321 NULL,
320}; 322};
@@ -331,7 +333,8 @@ static bool btrfs_is_valid_xattr(const char *name)
331 XATTR_SECURITY_PREFIX_LEN) || 333 XATTR_SECURITY_PREFIX_LEN) ||
332 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || 334 !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
333 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || 335 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
334 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); 336 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) ||
337 !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN);
335} 338}
336 339
337ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, 340ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
@@ -373,6 +376,10 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
373 if (!btrfs_is_valid_xattr(name)) 376 if (!btrfs_is_valid_xattr(name))
374 return -EOPNOTSUPP; 377 return -EOPNOTSUPP;
375 378
379 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
380 return btrfs_set_prop(dentry->d_inode, name,
381 value, size, flags);
382
376 if (size == 0) 383 if (size == 0)
377 value = ""; /* empty EA, do not remove */ 384 value = ""; /* empty EA, do not remove */
378 385
@@ -402,6 +409,10 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
402 if (!btrfs_is_valid_xattr(name)) 409 if (!btrfs_is_valid_xattr(name))
403 return -EOPNOTSUPP; 410 return -EOPNOTSUPP;
404 411
412 if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
413 return btrfs_set_prop(dentry->d_inode, name,
414 NULL, 0, XATTR_REPLACE);
415
405 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0, 416 return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
406 XATTR_REPLACE); 417 XATTR_REPLACE);
407} 418}
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index b3cc8039134b..5049608d1388 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -21,8 +21,6 @@
21 21
22#include <linux/xattr.h> 22#include <linux/xattr.h>
23 23
24extern const struct xattr_handler btrfs_xattr_acl_access_handler;
25extern const struct xattr_handler btrfs_xattr_acl_default_handler;
26extern const struct xattr_handler *btrfs_xattr_handlers[]; 24extern const struct xattr_handler *btrfs_xattr_handlers[];
27 25
28extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 26extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 9acb846c3e7f..8e57191950cb 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -97,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws,
97 *total_in = 0; 97 *total_in = 0;
98 98
99 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 99 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
100 printk(KERN_WARNING "btrfs: deflateInit failed\n"); 100 printk(KERN_WARNING "BTRFS: deflateInit failed\n");
101 ret = -1; 101 ret = -1;
102 goto out; 102 goto out;
103 } 103 }
@@ -125,7 +125,7 @@ static int zlib_compress_pages(struct list_head *ws,
125 while (workspace->def_strm.total_in < len) { 125 while (workspace->def_strm.total_in < len) {
126 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); 126 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
127 if (ret != Z_OK) { 127 if (ret != Z_OK) {
128 printk(KERN_DEBUG "btrfs: deflate in loop returned %d\n", 128 printk(KERN_DEBUG "BTRFS: deflate in loop returned %d\n",
129 ret); 129 ret);
130 zlib_deflateEnd(&workspace->def_strm); 130 zlib_deflateEnd(&workspace->def_strm);
131 ret = -1; 131 ret = -1;
@@ -252,7 +252,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
252 } 252 }
253 253
254 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 254 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
255 printk(KERN_WARNING "btrfs: inflateInit failed\n"); 255 printk(KERN_WARNING "BTRFS: inflateInit failed\n");
256 return -1; 256 return -1;
257 } 257 }
258 while (workspace->inf_strm.total_in < srclen) { 258 while (workspace->inf_strm.total_in < srclen) {
@@ -336,7 +336,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
336 } 336 }
337 337
338 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 338 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
339 printk(KERN_WARNING "btrfs: inflateInit failed\n"); 339 printk(KERN_WARNING "BTRFS: inflateInit failed\n");
340 return -1; 340 return -1;
341 } 341 }
342 342