aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.c685
-rw-r--r--fs/btrfs/ctree.h308
-rw-r--r--fs/btrfs/delayed-ref.c509
-rw-r--r--fs/btrfs/delayed-ref.h85
-rw-r--r--fs/btrfs/disk-io.c95
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/extent-tree.c2614
-rw-r--r--fs/btrfs/file.c76
-rw-r--r--fs/btrfs/inode.c132
-rw-r--r--fs/btrfs/ioctl.c21
-rw-r--r--fs/btrfs/print-tree.c155
-rw-r--r--fs/btrfs/relocation.c3711
-rw-r--r--fs/btrfs/root-tree.c17
-rw-r--r--fs/btrfs/super.c26
-rw-r--r--fs/btrfs/transaction.c410
-rw-r--r--fs/btrfs/transaction.h12
-rw-r--r--fs/btrfs/tree-log.c102
-rw-r--r--fs/btrfs/volumes.c2
20 files changed, 6928 insertions, 2043 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 94212844a9bc..a35eb36b32fd 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ 9 export.o tree-log.o acl.o free-space-cache.o zlib.o \
10 compression.o delayed-ref.o 10 compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b30986f00b9d..ecf5f7d8166f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -72,6 +72,9 @@ struct btrfs_inode {
72 */ 72 */
73 struct list_head ordered_operations; 73 struct list_head ordered_operations;
74 74
75 /* node for the red-black tree that links inodes in subvolume root */
76 struct rb_node rb_node;
77
75 /* the space_info for where this inode's data allocations are done */ 78 /* the space_info for where this inode's data allocations are done */
76 struct btrfs_space_info *space_info; 79 struct btrfs_space_info *space_info;
77 80
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index fedf8b9f03a2..2b960278a2f9 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -197,14 +197,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
197 u32 nritems; 197 u32 nritems;
198 int ret = 0; 198 int ret = 0;
199 int level; 199 int level;
200 struct btrfs_root *new_root; 200 struct btrfs_disk_key disk_key;
201
202 new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
203 if (!new_root)
204 return -ENOMEM;
205
206 memcpy(new_root, root, sizeof(*new_root));
207 new_root->root_key.objectid = new_root_objectid;
208 201
209 WARN_ON(root->ref_cows && trans->transid != 202 WARN_ON(root->ref_cows && trans->transid !=
210 root->fs_info->running_transaction->transid); 203 root->fs_info->running_transaction->transid);
@@ -212,28 +205,37 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
212 205
213 level = btrfs_header_level(buf); 206 level = btrfs_header_level(buf);
214 nritems = btrfs_header_nritems(buf); 207 nritems = btrfs_header_nritems(buf);
208 if (level == 0)
209 btrfs_item_key(buf, &disk_key, 0);
210 else
211 btrfs_node_key(buf, &disk_key, 0);
215 212
216 cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, 213 cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
217 new_root_objectid, trans->transid, 214 new_root_objectid, &disk_key, level,
218 level, buf->start, 0); 215 buf->start, 0);
219 if (IS_ERR(cow)) { 216 if (IS_ERR(cow))
220 kfree(new_root);
221 return PTR_ERR(cow); 217 return PTR_ERR(cow);
222 }
223 218
224 copy_extent_buffer(cow, buf, 0, 0, cow->len); 219 copy_extent_buffer(cow, buf, 0, 0, cow->len);
225 btrfs_set_header_bytenr(cow, cow->start); 220 btrfs_set_header_bytenr(cow, cow->start);
226 btrfs_set_header_generation(cow, trans->transid); 221 btrfs_set_header_generation(cow, trans->transid);
227 btrfs_set_header_owner(cow, new_root_objectid); 222 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
228 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 223 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
224 BTRFS_HEADER_FLAG_RELOC);
225 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
226 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
227 else
228 btrfs_set_header_owner(cow, new_root_objectid);
229 229
230 write_extent_buffer(cow, root->fs_info->fsid, 230 write_extent_buffer(cow, root->fs_info->fsid,
231 (unsigned long)btrfs_header_fsid(cow), 231 (unsigned long)btrfs_header_fsid(cow),
232 BTRFS_FSID_SIZE); 232 BTRFS_FSID_SIZE);
233 233
234 WARN_ON(btrfs_header_generation(buf) > trans->transid); 234 WARN_ON(btrfs_header_generation(buf) > trans->transid);
235 ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); 235 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
236 kfree(new_root); 236 ret = btrfs_inc_ref(trans, root, cow, 1);
237 else
238 ret = btrfs_inc_ref(trans, root, cow, 0);
237 239
238 if (ret) 240 if (ret)
239 return ret; 241 return ret;
@@ -244,6 +246,125 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
244} 246}
245 247
246/* 248/*
249 * check if the tree block can be shared by multiple trees
250 */
251int btrfs_block_can_be_shared(struct btrfs_root *root,
252 struct extent_buffer *buf)
253{
254 /*
255 * Tree blocks not in refernece counted trees and tree roots
256 * are never shared. If a block was allocated after the last
257 * snapshot and the block was not allocated by tree relocation,
258 * we know the block is not shared.
259 */
260 if (root->ref_cows &&
261 buf != root->node && buf != root->commit_root &&
262 (btrfs_header_generation(buf) <=
263 btrfs_root_last_snapshot(&root->root_item) ||
264 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
265 return 1;
266#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
267 if (root->ref_cows &&
268 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
269 return 1;
270#endif
271 return 0;
272}
273
274static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
275 struct btrfs_root *root,
276 struct extent_buffer *buf,
277 struct extent_buffer *cow)
278{
279 u64 refs;
280 u64 owner;
281 u64 flags;
282 u64 new_flags = 0;
283 int ret;
284
285 /*
286 * Backrefs update rules:
287 *
288 * Always use full backrefs for extent pointers in tree block
289 * allocated by tree relocation.
290 *
291 * If a shared tree block is no longer referenced by its owner
292 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
293 * use full backrefs for extent pointers in tree block.
294 *
295 * If a tree block is been relocating
296 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
297 * use full backrefs for extent pointers in tree block.
298 * The reason for this is some operations (such as drop tree)
299 * are only allowed for blocks use full backrefs.
300 */
301
302 if (btrfs_block_can_be_shared(root, buf)) {
303 ret = btrfs_lookup_extent_info(trans, root, buf->start,
304 buf->len, &refs, &flags);
305 BUG_ON(ret);
306 BUG_ON(refs == 0);
307 } else {
308 refs = 1;
309 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
310 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
311 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
312 else
313 flags = 0;
314 }
315
316 owner = btrfs_header_owner(buf);
317 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
318 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
319
320 if (refs > 1) {
321 if ((owner == root->root_key.objectid ||
322 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
323 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
324 ret = btrfs_inc_ref(trans, root, buf, 1);
325 BUG_ON(ret);
326
327 if (root->root_key.objectid ==
328 BTRFS_TREE_RELOC_OBJECTID) {
329 ret = btrfs_dec_ref(trans, root, buf, 0);
330 BUG_ON(ret);
331 ret = btrfs_inc_ref(trans, root, cow, 1);
332 BUG_ON(ret);
333 }
334 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
335 } else {
336
337 if (root->root_key.objectid ==
338 BTRFS_TREE_RELOC_OBJECTID)
339 ret = btrfs_inc_ref(trans, root, cow, 1);
340 else
341 ret = btrfs_inc_ref(trans, root, cow, 0);
342 BUG_ON(ret);
343 }
344 if (new_flags != 0) {
345 ret = btrfs_set_disk_extent_flags(trans, root,
346 buf->start,
347 buf->len,
348 new_flags, 0);
349 BUG_ON(ret);
350 }
351 } else {
352 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
353 if (root->root_key.objectid ==
354 BTRFS_TREE_RELOC_OBJECTID)
355 ret = btrfs_inc_ref(trans, root, cow, 1);
356 else
357 ret = btrfs_inc_ref(trans, root, cow, 0);
358 BUG_ON(ret);
359 ret = btrfs_dec_ref(trans, root, buf, 1);
360 BUG_ON(ret);
361 }
362 clean_tree_block(trans, root, buf);
363 }
364 return 0;
365}
366
367/*
247 * does the dirty work in cow of a single block. The parent block (if 368 * does the dirty work in cow of a single block. The parent block (if
248 * supplied) is updated to point to the new cow copy. The new buffer is marked 369 * supplied) is updated to point to the new cow copy. The new buffer is marked
249 * dirty and returned locked. If you modify the block it needs to be marked 370 * dirty and returned locked. If you modify the block it needs to be marked
@@ -262,34 +383,39 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
262 struct extent_buffer **cow_ret, 383 struct extent_buffer **cow_ret,
263 u64 search_start, u64 empty_size) 384 u64 search_start, u64 empty_size)
264{ 385{
265 u64 parent_start; 386 struct btrfs_disk_key disk_key;
266 struct extent_buffer *cow; 387 struct extent_buffer *cow;
267 u32 nritems;
268 int ret = 0;
269 int level; 388 int level;
270 int unlock_orig = 0; 389 int unlock_orig = 0;
390 u64 parent_start;
271 391
272 if (*cow_ret == buf) 392 if (*cow_ret == buf)
273 unlock_orig = 1; 393 unlock_orig = 1;
274 394
275 btrfs_assert_tree_locked(buf); 395 btrfs_assert_tree_locked(buf);
276 396
277 if (parent)
278 parent_start = parent->start;
279 else
280 parent_start = 0;
281
282 WARN_ON(root->ref_cows && trans->transid != 397 WARN_ON(root->ref_cows && trans->transid !=
283 root->fs_info->running_transaction->transid); 398 root->fs_info->running_transaction->transid);
284 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 399 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
285 400
286 level = btrfs_header_level(buf); 401 level = btrfs_header_level(buf);
287 nritems = btrfs_header_nritems(buf);
288 402
289 cow = btrfs_alloc_free_block(trans, root, buf->len, 403 if (level == 0)
290 parent_start, root->root_key.objectid, 404 btrfs_item_key(buf, &disk_key, 0);
291 trans->transid, level, 405 else
292 search_start, empty_size); 406 btrfs_node_key(buf, &disk_key, 0);
407
408 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
409 if (parent)
410 parent_start = parent->start;
411 else
412 parent_start = 0;
413 } else
414 parent_start = 0;
415
416 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
417 root->root_key.objectid, &disk_key,
418 level, search_start, empty_size);
293 if (IS_ERR(cow)) 419 if (IS_ERR(cow))
294 return PTR_ERR(cow); 420 return PTR_ERR(cow);
295 421
@@ -298,83 +424,53 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
298 copy_extent_buffer(cow, buf, 0, 0, cow->len); 424 copy_extent_buffer(cow, buf, 0, 0, cow->len);
299 btrfs_set_header_bytenr(cow, cow->start); 425 btrfs_set_header_bytenr(cow, cow->start);
300 btrfs_set_header_generation(cow, trans->transid); 426 btrfs_set_header_generation(cow, trans->transid);
301 btrfs_set_header_owner(cow, root->root_key.objectid); 427 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
302 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 428 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
429 BTRFS_HEADER_FLAG_RELOC);
430 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
431 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
432 else
433 btrfs_set_header_owner(cow, root->root_key.objectid);
303 434
304 write_extent_buffer(cow, root->fs_info->fsid, 435 write_extent_buffer(cow, root->fs_info->fsid,
305 (unsigned long)btrfs_header_fsid(cow), 436 (unsigned long)btrfs_header_fsid(cow),
306 BTRFS_FSID_SIZE); 437 BTRFS_FSID_SIZE);
307 438
308 WARN_ON(btrfs_header_generation(buf) > trans->transid); 439 update_ref_for_cow(trans, root, buf, cow);
309 if (btrfs_header_generation(buf) != trans->transid) {
310 u32 nr_extents;
311 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
312 if (ret)
313 return ret;
314
315 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
316 WARN_ON(ret);
317 } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
318 /*
319 * There are only two places that can drop reference to
320 * tree blocks owned by living reloc trees, one is here,
321 * the other place is btrfs_drop_subtree. In both places,
322 * we check reference count while tree block is locked.
323 * Furthermore, if reference count is one, it won't get
324 * increased by someone else.
325 */
326 u32 refs;
327 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
328 buf->len, &refs);
329 BUG_ON(ret);
330 if (refs == 1) {
331 ret = btrfs_update_ref(trans, root, buf, cow,
332 0, nritems);
333 clean_tree_block(trans, root, buf);
334 } else {
335 ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
336 }
337 BUG_ON(ret);
338 } else {
339 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
340 if (ret)
341 return ret;
342 clean_tree_block(trans, root, buf);
343 }
344
345 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
346 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
347 WARN_ON(ret);
348 }
349 440
350 if (buf == root->node) { 441 if (buf == root->node) {
351 WARN_ON(parent && parent != buf); 442 WARN_ON(parent && parent != buf);
443 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
444 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
445 parent_start = buf->start;
446 else
447 parent_start = 0;
352 448
353 spin_lock(&root->node_lock); 449 spin_lock(&root->node_lock);
354 root->node = cow; 450 root->node = cow;
355 extent_buffer_get(cow); 451 extent_buffer_get(cow);
356 spin_unlock(&root->node_lock); 452 spin_unlock(&root->node_lock);
357 453
358 if (buf != root->commit_root) { 454 btrfs_free_extent(trans, root, buf->start, buf->len,
359 btrfs_free_extent(trans, root, buf->start, 455 parent_start, root->root_key.objectid,
360 buf->len, buf->start, 456 level, 0);
361 root->root_key.objectid,
362 btrfs_header_generation(buf),
363 level, 1);
364 }
365 free_extent_buffer(buf); 457 free_extent_buffer(buf);
366 add_root_to_dirty_list(root); 458 add_root_to_dirty_list(root);
367 } else { 459 } else {
460 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
461 parent_start = parent->start;
462 else
463 parent_start = 0;
464
465 WARN_ON(trans->transid != btrfs_header_generation(parent));
368 btrfs_set_node_blockptr(parent, parent_slot, 466 btrfs_set_node_blockptr(parent, parent_slot,
369 cow->start); 467 cow->start);
370 WARN_ON(trans->transid == 0);
371 btrfs_set_node_ptr_generation(parent, parent_slot, 468 btrfs_set_node_ptr_generation(parent, parent_slot,
372 trans->transid); 469 trans->transid);
373 btrfs_mark_buffer_dirty(parent); 470 btrfs_mark_buffer_dirty(parent);
374 WARN_ON(btrfs_header_generation(parent) != trans->transid);
375 btrfs_free_extent(trans, root, buf->start, buf->len, 471 btrfs_free_extent(trans, root, buf->start, buf->len,
376 parent_start, btrfs_header_owner(parent), 472 parent_start, root->root_key.objectid,
377 btrfs_header_generation(parent), level, 1); 473 level, 0);
378 } 474 }
379 if (unlock_orig) 475 if (unlock_orig)
380 btrfs_tree_unlock(buf); 476 btrfs_tree_unlock(buf);
@@ -384,6 +480,18 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
384 return 0; 480 return 0;
385} 481}
386 482
483static inline int should_cow_block(struct btrfs_trans_handle *trans,
484 struct btrfs_root *root,
485 struct extent_buffer *buf)
486{
487 if (btrfs_header_generation(buf) == trans->transid &&
488 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
489 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
490 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
491 return 0;
492 return 1;
493}
494
387/* 495/*
388 * cows a single block, see __btrfs_cow_block for the real work. 496 * cows a single block, see __btrfs_cow_block for the real work.
389 * This version of it has extra checks so that a block isn't cow'd more than 497 * This version of it has extra checks so that a block isn't cow'd more than
@@ -411,9 +519,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
411 WARN_ON(1); 519 WARN_ON(1);
412 } 520 }
413 521
414 if (btrfs_header_generation(buf) == trans->transid && 522 if (!should_cow_block(trans, root, buf)) {
415 btrfs_header_owner(buf) == root->root_key.objectid &&
416 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
417 *cow_ret = buf; 523 *cow_ret = buf;
418 return 0; 524 return 0;
419 } 525 }
@@ -469,7 +575,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
469/* 575/*
470 * same as comp_keys only with two btrfs_key's 576 * same as comp_keys only with two btrfs_key's
471 */ 577 */
472static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 578int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
473{ 579{
474 if (k1->objectid > k2->objectid) 580 if (k1->objectid > k2->objectid)
475 return 1; 581 return 1;
@@ -845,6 +951,12 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
845 return -1; 951 return -1;
846} 952}
847 953
954int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
955 int level, int *slot)
956{
957 return bin_search(eb, key, level, slot);
958}
959
848/* given a node and slot number, this reads the blocks it points to. The 960/* given a node and slot number, this reads the blocks it points to. The
849 * extent buffer is returned with a reference taken (but unlocked). 961 * extent buffer is returned with a reference taken (but unlocked).
850 * NULL is returned on error. 962 * NULL is returned on error.
@@ -921,13 +1033,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
921 root->node = child; 1033 root->node = child;
922 spin_unlock(&root->node_lock); 1034 spin_unlock(&root->node_lock);
923 1035
924 ret = btrfs_update_extent_ref(trans, root, child->start,
925 child->len,
926 mid->start, child->start,
927 root->root_key.objectid,
928 trans->transid, level - 1);
929 BUG_ON(ret);
930
931 add_root_to_dirty_list(root); 1036 add_root_to_dirty_list(root);
932 btrfs_tree_unlock(child); 1037 btrfs_tree_unlock(child);
933 1038
@@ -938,9 +1043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
938 /* once for the path */ 1043 /* once for the path */
939 free_extent_buffer(mid); 1044 free_extent_buffer(mid);
940 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1045 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
941 mid->start, root->root_key.objectid, 1046 0, root->root_key.objectid, level, 1);
942 btrfs_header_generation(mid),
943 level, 1);
944 /* once for the root ptr */ 1047 /* once for the root ptr */
945 free_extent_buffer(mid); 1048 free_extent_buffer(mid);
946 return ret; 1049 return ret;
@@ -998,7 +1101,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
998 ret = wret; 1101 ret = wret;
999 if (btrfs_header_nritems(right) == 0) { 1102 if (btrfs_header_nritems(right) == 0) {
1000 u64 bytenr = right->start; 1103 u64 bytenr = right->start;
1001 u64 generation = btrfs_header_generation(parent);
1002 u32 blocksize = right->len; 1104 u32 blocksize = right->len;
1003 1105
1004 clean_tree_block(trans, root, right); 1106 clean_tree_block(trans, root, right);
@@ -1010,9 +1112,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1010 if (wret) 1112 if (wret)
1011 ret = wret; 1113 ret = wret;
1012 wret = btrfs_free_extent(trans, root, bytenr, 1114 wret = btrfs_free_extent(trans, root, bytenr,
1013 blocksize, parent->start, 1115 blocksize, 0,
1014 btrfs_header_owner(parent), 1116 root->root_key.objectid,
1015 generation, level, 1); 1117 level, 0);
1016 if (wret) 1118 if (wret)
1017 ret = wret; 1119 ret = wret;
1018 } else { 1120 } else {
@@ -1047,7 +1149,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1047 } 1149 }
1048 if (btrfs_header_nritems(mid) == 0) { 1150 if (btrfs_header_nritems(mid) == 0) {
1049 /* we've managed to empty the middle node, drop it */ 1151 /* we've managed to empty the middle node, drop it */
1050 u64 root_gen = btrfs_header_generation(parent);
1051 u64 bytenr = mid->start; 1152 u64 bytenr = mid->start;
1052 u32 blocksize = mid->len; 1153 u32 blocksize = mid->len;
1053 1154
@@ -1059,9 +1160,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1059 if (wret) 1160 if (wret)
1060 ret = wret; 1161 ret = wret;
1061 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1162 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1062 parent->start, 1163 0, root->root_key.objectid,
1063 btrfs_header_owner(parent), 1164 level, 0);
1064 root_gen, level, 1);
1065 if (wret) 1165 if (wret)
1066 ret = wret; 1166 ret = wret;
1067 } else { 1167 } else {
@@ -1437,7 +1537,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1437{ 1537{
1438 int i; 1538 int i;
1439 1539
1440 if (path->keep_locks || path->lowest_level) 1540 if (path->keep_locks)
1441 return; 1541 return;
1442 1542
1443 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1543 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1614,10 +1714,17 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1614 lowest_unlock = 2; 1714 lowest_unlock = 2;
1615 1715
1616again: 1716again:
1617 if (p->skip_locking) 1717 if (p->search_commit_root) {
1618 b = btrfs_root_node(root); 1718 b = root->commit_root;
1619 else 1719 extent_buffer_get(b);
1620 b = btrfs_lock_root_node(root); 1720 if (!p->skip_locking)
1721 btrfs_tree_lock(b);
1722 } else {
1723 if (p->skip_locking)
1724 b = btrfs_root_node(root);
1725 else
1726 b = btrfs_lock_root_node(root);
1727 }
1621 1728
1622 while (b) { 1729 while (b) {
1623 level = btrfs_header_level(b); 1730 level = btrfs_header_level(b);
@@ -1638,11 +1745,9 @@ again:
1638 * then we don't want to set the path blocking, 1745 * then we don't want to set the path blocking,
1639 * so we test it here 1746 * so we test it here
1640 */ 1747 */
1641 if (btrfs_header_generation(b) == trans->transid && 1748 if (!should_cow_block(trans, root, b))
1642 btrfs_header_owner(b) == root->root_key.objectid &&
1643 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1644 goto cow_done; 1749 goto cow_done;
1645 } 1750
1646 btrfs_set_path_blocking(p); 1751 btrfs_set_path_blocking(p);
1647 1752
1648 wret = btrfs_cow_block(trans, root, b, 1753 wret = btrfs_cow_block(trans, root, b,
@@ -1764,138 +1869,6 @@ done:
1764 return ret; 1869 return ret;
1765} 1870}
1766 1871
1767int btrfs_merge_path(struct btrfs_trans_handle *trans,
1768 struct btrfs_root *root,
1769 struct btrfs_key *node_keys,
1770 u64 *nodes, int lowest_level)
1771{
1772 struct extent_buffer *eb;
1773 struct extent_buffer *parent;
1774 struct btrfs_key key;
1775 u64 bytenr;
1776 u64 generation;
1777 u32 blocksize;
1778 int level;
1779 int slot;
1780 int key_match;
1781 int ret;
1782
1783 eb = btrfs_lock_root_node(root);
1784 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
1785 BUG_ON(ret);
1786
1787 btrfs_set_lock_blocking(eb);
1788
1789 parent = eb;
1790 while (1) {
1791 level = btrfs_header_level(parent);
1792 if (level == 0 || level <= lowest_level)
1793 break;
1794
1795 ret = bin_search(parent, &node_keys[lowest_level], level,
1796 &slot);
1797 if (ret && slot > 0)
1798 slot--;
1799
1800 bytenr = btrfs_node_blockptr(parent, slot);
1801 if (nodes[level - 1] == bytenr)
1802 break;
1803
1804 blocksize = btrfs_level_size(root, level - 1);
1805 generation = btrfs_node_ptr_generation(parent, slot);
1806 btrfs_node_key_to_cpu(eb, &key, slot);
1807 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1808
1809 if (generation == trans->transid) {
1810 eb = read_tree_block(root, bytenr, blocksize,
1811 generation);
1812 btrfs_tree_lock(eb);
1813 btrfs_set_lock_blocking(eb);
1814 }
1815
1816 /*
1817 * if node keys match and node pointer hasn't been modified
1818 * in the running transaction, we can merge the path. for
1819 * blocks owened by reloc trees, the node pointer check is
1820 * skipped, this is because these blocks are fully controlled
1821 * by the space balance code, no one else can modify them.
1822 */
1823 if (!nodes[level - 1] || !key_match ||
1824 (generation == trans->transid &&
1825 btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1826 if (level == 1 || level == lowest_level + 1) {
1827 if (generation == trans->transid) {
1828 btrfs_tree_unlock(eb);
1829 free_extent_buffer(eb);
1830 }
1831 break;
1832 }
1833
1834 if (generation != trans->transid) {
1835 eb = read_tree_block(root, bytenr, blocksize,
1836 generation);
1837 btrfs_tree_lock(eb);
1838 btrfs_set_lock_blocking(eb);
1839 }
1840
1841 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1842 &eb);
1843 BUG_ON(ret);
1844
1845 if (root->root_key.objectid ==
1846 BTRFS_TREE_RELOC_OBJECTID) {
1847 if (!nodes[level - 1]) {
1848 nodes[level - 1] = eb->start;
1849 memcpy(&node_keys[level - 1], &key,
1850 sizeof(node_keys[0]));
1851 } else {
1852 WARN_ON(1);
1853 }
1854 }
1855
1856 btrfs_tree_unlock(parent);
1857 free_extent_buffer(parent);
1858 parent = eb;
1859 continue;
1860 }
1861
1862 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1863 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1864 btrfs_mark_buffer_dirty(parent);
1865
1866 ret = btrfs_inc_extent_ref(trans, root,
1867 nodes[level - 1],
1868 blocksize, parent->start,
1869 btrfs_header_owner(parent),
1870 btrfs_header_generation(parent),
1871 level - 1);
1872 BUG_ON(ret);
1873
1874 /*
1875 * If the block was created in the running transaction,
1876 * it's possible this is the last reference to it, so we
1877 * should drop the subtree.
1878 */
1879 if (generation == trans->transid) {
1880 ret = btrfs_drop_subtree(trans, root, eb, parent);
1881 BUG_ON(ret);
1882 btrfs_tree_unlock(eb);
1883 free_extent_buffer(eb);
1884 } else {
1885 ret = btrfs_free_extent(trans, root, bytenr,
1886 blocksize, parent->start,
1887 btrfs_header_owner(parent),
1888 btrfs_header_generation(parent),
1889 level - 1, 1);
1890 BUG_ON(ret);
1891 }
1892 break;
1893 }
1894 btrfs_tree_unlock(parent);
1895 free_extent_buffer(parent);
1896 return 0;
1897}
1898
1899/* 1872/*
1900 * adjust the pointers going up the tree, starting at level 1873 * adjust the pointers going up the tree, starting at level
1901 * making sure the right key of each node is points to 'key'. 1874 * making sure the right key of each node is points to 'key'.
@@ -2021,9 +1994,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
2021 btrfs_mark_buffer_dirty(src); 1994 btrfs_mark_buffer_dirty(src);
2022 btrfs_mark_buffer_dirty(dst); 1995 btrfs_mark_buffer_dirty(dst);
2023 1996
2024 ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
2025 BUG_ON(ret);
2026
2027 return ret; 1997 return ret;
2028} 1998}
2029 1999
@@ -2083,9 +2053,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
2083 btrfs_mark_buffer_dirty(src); 2053 btrfs_mark_buffer_dirty(src);
2084 btrfs_mark_buffer_dirty(dst); 2054 btrfs_mark_buffer_dirty(dst);
2085 2055
2086 ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
2087 BUG_ON(ret);
2088
2089 return ret; 2056 return ret;
2090} 2057}
2091 2058
@@ -2105,7 +2072,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2105 struct extent_buffer *c; 2072 struct extent_buffer *c;
2106 struct extent_buffer *old; 2073 struct extent_buffer *old;
2107 struct btrfs_disk_key lower_key; 2074 struct btrfs_disk_key lower_key;
2108 int ret;
2109 2075
2110 BUG_ON(path->nodes[level]); 2076 BUG_ON(path->nodes[level]);
2111 BUG_ON(path->nodes[level-1] != root->node); 2077 BUG_ON(path->nodes[level-1] != root->node);
@@ -2117,16 +2083,17 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2117 btrfs_node_key(lower, &lower_key, 0); 2083 btrfs_node_key(lower, &lower_key, 0);
2118 2084
2119 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2085 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2120 root->root_key.objectid, trans->transid, 2086 root->root_key.objectid, &lower_key,
2121 level, root->node->start, 0); 2087 level, root->node->start, 0);
2122 if (IS_ERR(c)) 2088 if (IS_ERR(c))
2123 return PTR_ERR(c); 2089 return PTR_ERR(c);
2124 2090
2125 memset_extent_buffer(c, 0, 0, root->nodesize); 2091 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
2126 btrfs_set_header_nritems(c, 1); 2092 btrfs_set_header_nritems(c, 1);
2127 btrfs_set_header_level(c, level); 2093 btrfs_set_header_level(c, level);
2128 btrfs_set_header_bytenr(c, c->start); 2094 btrfs_set_header_bytenr(c, c->start);
2129 btrfs_set_header_generation(c, trans->transid); 2095 btrfs_set_header_generation(c, trans->transid);
2096 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
2130 btrfs_set_header_owner(c, root->root_key.objectid); 2097 btrfs_set_header_owner(c, root->root_key.objectid);
2131 2098
2132 write_extent_buffer(c, root->fs_info->fsid, 2099 write_extent_buffer(c, root->fs_info->fsid,
@@ -2151,12 +2118,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2151 root->node = c; 2118 root->node = c;
2152 spin_unlock(&root->node_lock); 2119 spin_unlock(&root->node_lock);
2153 2120
2154 ret = btrfs_update_extent_ref(trans, root, lower->start,
2155 lower->len, lower->start, c->start,
2156 root->root_key.objectid,
2157 trans->transid, level - 1);
2158 BUG_ON(ret);
2159
2160 /* the super has an extra ref to root->node */ 2121 /* the super has an extra ref to root->node */
2161 free_extent_buffer(old); 2122 free_extent_buffer(old);
2162 2123
@@ -2244,20 +2205,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2244 } 2205 }
2245 2206
2246 c_nritems = btrfs_header_nritems(c); 2207 c_nritems = btrfs_header_nritems(c);
2208 mid = (c_nritems + 1) / 2;
2209 btrfs_node_key(c, &disk_key, mid);
2247 2210
2248 split = btrfs_alloc_free_block(trans, root, root->nodesize, 2211 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2249 path->nodes[level + 1]->start,
2250 root->root_key.objectid, 2212 root->root_key.objectid,
2251 trans->transid, level, c->start, 0); 2213 &disk_key, level, c->start, 0);
2252 if (IS_ERR(split)) 2214 if (IS_ERR(split))
2253 return PTR_ERR(split); 2215 return PTR_ERR(split);
2254 2216
2255 btrfs_set_header_flags(split, btrfs_header_flags(c)); 2217 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
2256 btrfs_set_header_level(split, btrfs_header_level(c)); 2218 btrfs_set_header_level(split, btrfs_header_level(c));
2257 btrfs_set_header_bytenr(split, split->start); 2219 btrfs_set_header_bytenr(split, split->start);
2258 btrfs_set_header_generation(split, trans->transid); 2220 btrfs_set_header_generation(split, trans->transid);
2221 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
2259 btrfs_set_header_owner(split, root->root_key.objectid); 2222 btrfs_set_header_owner(split, root->root_key.objectid);
2260 btrfs_set_header_flags(split, 0);
2261 write_extent_buffer(split, root->fs_info->fsid, 2223 write_extent_buffer(split, root->fs_info->fsid,
2262 (unsigned long)btrfs_header_fsid(split), 2224 (unsigned long)btrfs_header_fsid(split),
2263 BTRFS_FSID_SIZE); 2225 BTRFS_FSID_SIZE);
@@ -2265,7 +2227,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2265 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2227 (unsigned long)btrfs_header_chunk_tree_uuid(split),
2266 BTRFS_UUID_SIZE); 2228 BTRFS_UUID_SIZE);
2267 2229
2268 mid = (c_nritems + 1) / 2;
2269 2230
2270 copy_extent_buffer(split, c, 2231 copy_extent_buffer(split, c,
2271 btrfs_node_key_ptr_offset(0), 2232 btrfs_node_key_ptr_offset(0),
@@ -2278,16 +2239,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2278 btrfs_mark_buffer_dirty(c); 2239 btrfs_mark_buffer_dirty(c);
2279 btrfs_mark_buffer_dirty(split); 2240 btrfs_mark_buffer_dirty(split);
2280 2241
2281 btrfs_node_key(split, &disk_key, 0);
2282 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2242 wret = insert_ptr(trans, root, path, &disk_key, split->start,
2283 path->slots[level + 1] + 1, 2243 path->slots[level + 1] + 1,
2284 level + 1); 2244 level + 1);
2285 if (wret) 2245 if (wret)
2286 ret = wret; 2246 ret = wret;
2287 2247
2288 ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2289 BUG_ON(ret);
2290
2291 if (path->slots[level] >= mid) { 2248 if (path->slots[level] >= mid) {
2292 path->slots[level] -= mid; 2249 path->slots[level] -= mid;
2293 btrfs_tree_unlock(c); 2250 btrfs_tree_unlock(c);
@@ -2360,7 +2317,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2360 u32 right_nritems; 2317 u32 right_nritems;
2361 u32 data_end; 2318 u32 data_end;
2362 u32 this_item_size; 2319 u32 this_item_size;
2363 int ret;
2364 2320
2365 if (empty) 2321 if (empty)
2366 nr = 0; 2322 nr = 0;
@@ -2473,9 +2429,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2473 btrfs_mark_buffer_dirty(left); 2429 btrfs_mark_buffer_dirty(left);
2474 btrfs_mark_buffer_dirty(right); 2430 btrfs_mark_buffer_dirty(right);
2475 2431
2476 ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2477 BUG_ON(ret);
2478
2479 btrfs_item_key(right, &disk_key, 0); 2432 btrfs_item_key(right, &disk_key, 0);
2480 btrfs_set_node_key(upper, &disk_key, slot + 1); 2433 btrfs_set_node_key(upper, &disk_key, slot + 1);
2481 btrfs_mark_buffer_dirty(upper); 2434 btrfs_mark_buffer_dirty(upper);
@@ -2720,10 +2673,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2720 if (right_nritems) 2673 if (right_nritems)
2721 btrfs_mark_buffer_dirty(right); 2674 btrfs_mark_buffer_dirty(right);
2722 2675
2723 ret = btrfs_update_ref(trans, root, right, left,
2724 old_left_nritems, push_items);
2725 BUG_ON(ret);
2726
2727 btrfs_item_key(right, &disk_key, 0); 2676 btrfs_item_key(right, &disk_key, 0);
2728 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2677 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2729 if (wret) 2678 if (wret)
@@ -2880,9 +2829,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2880 btrfs_mark_buffer_dirty(l); 2829 btrfs_mark_buffer_dirty(l);
2881 BUG_ON(path->slots[0] != slot); 2830 BUG_ON(path->slots[0] != slot);
2882 2831
2883 ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2884 BUG_ON(ret);
2885
2886 if (mid <= slot) { 2832 if (mid <= slot) {
2887 btrfs_tree_unlock(path->nodes[0]); 2833 btrfs_tree_unlock(path->nodes[0]);
2888 free_extent_buffer(path->nodes[0]); 2834 free_extent_buffer(path->nodes[0]);
@@ -2911,6 +2857,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2911 struct btrfs_path *path, int data_size, 2857 struct btrfs_path *path, int data_size,
2912 int extend) 2858 int extend)
2913{ 2859{
2860 struct btrfs_disk_key disk_key;
2914 struct extent_buffer *l; 2861 struct extent_buffer *l;
2915 u32 nritems; 2862 u32 nritems;
2916 int mid; 2863 int mid;
@@ -2918,7 +2865,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2918 struct extent_buffer *right; 2865 struct extent_buffer *right;
2919 int ret = 0; 2866 int ret = 0;
2920 int wret; 2867 int wret;
2921 int double_split; 2868 int split;
2922 int num_doubles = 0; 2869 int num_doubles = 0;
2923 2870
2924 /* first try to make some room by pushing left and right */ 2871 /* first try to make some room by pushing left and right */
@@ -2945,16 +2892,53 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2945 return ret; 2892 return ret;
2946 } 2893 }
2947again: 2894again:
2948 double_split = 0; 2895 split = 1;
2949 l = path->nodes[0]; 2896 l = path->nodes[0];
2950 slot = path->slots[0]; 2897 slot = path->slots[0];
2951 nritems = btrfs_header_nritems(l); 2898 nritems = btrfs_header_nritems(l);
2952 mid = (nritems + 1) / 2; 2899 mid = (nritems + 1) / 2;
2953 2900
2954 right = btrfs_alloc_free_block(trans, root, root->leafsize, 2901 if (mid <= slot) {
2955 path->nodes[1]->start, 2902 if (nritems == 1 ||
2903 leaf_space_used(l, mid, nritems - mid) + data_size >
2904 BTRFS_LEAF_DATA_SIZE(root)) {
2905 if (slot >= nritems) {
2906 split = 0;
2907 } else {
2908 mid = slot;
2909 if (mid != nritems &&
2910 leaf_space_used(l, mid, nritems - mid) +
2911 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2912 split = 2;
2913 }
2914 }
2915 }
2916 } else {
2917 if (leaf_space_used(l, 0, mid) + data_size >
2918 BTRFS_LEAF_DATA_SIZE(root)) {
2919 if (!extend && data_size && slot == 0) {
2920 split = 0;
2921 } else if ((extend || !data_size) && slot == 0) {
2922 mid = 1;
2923 } else {
2924 mid = slot;
2925 if (mid != nritems &&
2926 leaf_space_used(l, mid, nritems - mid) +
2927 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2928 split = 2 ;
2929 }
2930 }
2931 }
2932 }
2933
2934 if (split == 0)
2935 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2936 else
2937 btrfs_item_key(l, &disk_key, mid);
2938
2939 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
2956 root->root_key.objectid, 2940 root->root_key.objectid,
2957 trans->transid, 0, l->start, 0); 2941 &disk_key, 0, l->start, 0);
2958 if (IS_ERR(right)) { 2942 if (IS_ERR(right)) {
2959 BUG_ON(1); 2943 BUG_ON(1);
2960 return PTR_ERR(right); 2944 return PTR_ERR(right);
@@ -2963,6 +2947,7 @@ again:
2963 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2947 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2964 btrfs_set_header_bytenr(right, right->start); 2948 btrfs_set_header_bytenr(right, right->start);
2965 btrfs_set_header_generation(right, trans->transid); 2949 btrfs_set_header_generation(right, trans->transid);
2950 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
2966 btrfs_set_header_owner(right, root->root_key.objectid); 2951 btrfs_set_header_owner(right, root->root_key.objectid);
2967 btrfs_set_header_level(right, 0); 2952 btrfs_set_header_level(right, 0);
2968 write_extent_buffer(right, root->fs_info->fsid, 2953 write_extent_buffer(right, root->fs_info->fsid,
@@ -2973,79 +2958,47 @@ again:
2973 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2958 (unsigned long)btrfs_header_chunk_tree_uuid(right),
2974 BTRFS_UUID_SIZE); 2959 BTRFS_UUID_SIZE);
2975 2960
2976 if (mid <= slot) { 2961 if (split == 0) {
2977 if (nritems == 1 || 2962 if (mid <= slot) {
2978 leaf_space_used(l, mid, nritems - mid) + data_size > 2963 btrfs_set_header_nritems(right, 0);
2979 BTRFS_LEAF_DATA_SIZE(root)) { 2964 wret = insert_ptr(trans, root, path,
2980 if (slot >= nritems) { 2965 &disk_key, right->start,
2981 struct btrfs_disk_key disk_key; 2966 path->slots[1] + 1, 1);
2982 2967 if (wret)
2983 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2968 ret = wret;
2984 btrfs_set_header_nritems(right, 0);
2985 wret = insert_ptr(trans, root, path,
2986 &disk_key, right->start,
2987 path->slots[1] + 1, 1);
2988 if (wret)
2989 ret = wret;
2990 2969
2991 btrfs_tree_unlock(path->nodes[0]); 2970 btrfs_tree_unlock(path->nodes[0]);
2992 free_extent_buffer(path->nodes[0]); 2971 free_extent_buffer(path->nodes[0]);
2993 path->nodes[0] = right; 2972 path->nodes[0] = right;
2994 path->slots[0] = 0; 2973 path->slots[0] = 0;
2995 path->slots[1] += 1; 2974 path->slots[1] += 1;
2996 btrfs_mark_buffer_dirty(right); 2975 } else {
2997 return ret; 2976 btrfs_set_header_nritems(right, 0);
2998 } 2977 wret = insert_ptr(trans, root, path,
2999 mid = slot; 2978 &disk_key,
3000 if (mid != nritems && 2979 right->start,
3001 leaf_space_used(l, mid, nritems - mid) + 2980 path->slots[1], 1);
3002 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2981 if (wret)
3003 double_split = 1; 2982 ret = wret;
3004 } 2983 btrfs_tree_unlock(path->nodes[0]);
3005 } 2984 free_extent_buffer(path->nodes[0]);
3006 } else { 2985 path->nodes[0] = right;
3007 if (leaf_space_used(l, 0, mid) + data_size > 2986 path->slots[0] = 0;
3008 BTRFS_LEAF_DATA_SIZE(root)) { 2987 if (path->slots[1] == 0) {
3009 if (!extend && data_size && slot == 0) { 2988 wret = fixup_low_keys(trans, root,
3010 struct btrfs_disk_key disk_key; 2989 path, &disk_key, 1);
3011
3012 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3013 btrfs_set_header_nritems(right, 0);
3014 wret = insert_ptr(trans, root, path,
3015 &disk_key,
3016 right->start,
3017 path->slots[1], 1);
3018 if (wret) 2990 if (wret)
3019 ret = wret; 2991 ret = wret;
3020 btrfs_tree_unlock(path->nodes[0]);
3021 free_extent_buffer(path->nodes[0]);
3022 path->nodes[0] = right;
3023 path->slots[0] = 0;
3024 if (path->slots[1] == 0) {
3025 wret = fixup_low_keys(trans, root,
3026 path, &disk_key, 1);
3027 if (wret)
3028 ret = wret;
3029 }
3030 btrfs_mark_buffer_dirty(right);
3031 return ret;
3032 } else if ((extend || !data_size) && slot == 0) {
3033 mid = 1;
3034 } else {
3035 mid = slot;
3036 if (mid != nritems &&
3037 leaf_space_used(l, mid, nritems - mid) +
3038 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3039 double_split = 1;
3040 }
3041 } 2992 }
3042 } 2993 }
2994 btrfs_mark_buffer_dirty(right);
2995 return ret;
3043 } 2996 }
3044 2997
3045 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 2998 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
3046 BUG_ON(ret); 2999 BUG_ON(ret);
3047 3000
3048 if (double_split) { 3001 if (split == 2) {
3049 BUG_ON(num_doubles != 0); 3002 BUG_ON(num_doubles != 0);
3050 num_doubles++; 3003 num_doubles++;
3051 goto again; 3004 goto again;
@@ -3447,7 +3400,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3447 /* figure out how many keys we can insert in here */ 3400 /* figure out how many keys we can insert in here */
3448 total_data = data_size[0]; 3401 total_data = data_size[0];
3449 for (i = 1; i < nr; i++) { 3402 for (i = 1; i < nr; i++) {
3450 if (comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3403 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3451 break; 3404 break;
3452 total_data += data_size[i]; 3405 total_data += data_size[i];
3453 } 3406 }
@@ -3745,9 +3698,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3745 3698
3746/* 3699/*
3747 * a helper function to delete the leaf pointed to by path->slots[1] and 3700 * a helper function to delete the leaf pointed to by path->slots[1] and
3748 * path->nodes[1]. bytenr is the node block pointer, but since the callers 3701 * path->nodes[1].
3749 * already know it, it is faster to have them pass it down than to
3750 * read it out of the node again.
3751 * 3702 *
3752 * This deletes the pointer in path->nodes[1] and frees the leaf 3703 * This deletes the pointer in path->nodes[1] and frees the leaf
3753 * block extent. zero is returned if it all worked out, < 0 otherwise. 3704 * block extent. zero is returned if it all worked out, < 0 otherwise.
@@ -3755,15 +3706,14 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3755 * The path must have already been setup for deleting the leaf, including 3706 * The path must have already been setup for deleting the leaf, including
3756 * all the proper balancing. path->nodes[1] must be locked. 3707 * all the proper balancing. path->nodes[1] must be locked.
3757 */ 3708 */
3758noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3709static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3759 struct btrfs_root *root, 3710 struct btrfs_root *root,
3760 struct btrfs_path *path, u64 bytenr) 3711 struct btrfs_path *path,
3712 struct extent_buffer *leaf)
3761{ 3713{
3762 int ret; 3714 int ret;
3763 u64 root_gen = btrfs_header_generation(path->nodes[1]);
3764 u64 parent_start = path->nodes[1]->start;
3765 u64 parent_owner = btrfs_header_owner(path->nodes[1]);
3766 3715
3716 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
3767 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3717 ret = del_ptr(trans, root, path, 1, path->slots[1]);
3768 if (ret) 3718 if (ret)
3769 return ret; 3719 return ret;
@@ -3774,10 +3724,8 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3774 */ 3724 */
3775 btrfs_unlock_up_safe(path, 0); 3725 btrfs_unlock_up_safe(path, 0);
3776 3726
3777 ret = btrfs_free_extent(trans, root, bytenr, 3727 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
3778 btrfs_level_size(root, 0), 3728 0, root->root_key.objectid, 0, 0);
3779 parent_start, parent_owner,
3780 root_gen, 0, 1);
3781 return ret; 3729 return ret;
3782} 3730}
3783/* 3731/*
@@ -3845,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3845 if (leaf == root->node) { 3793 if (leaf == root->node) {
3846 btrfs_set_header_level(leaf, 0); 3794 btrfs_set_header_level(leaf, 0);
3847 } else { 3795 } else {
3848 ret = btrfs_del_leaf(trans, root, path, leaf->start); 3796 ret = btrfs_del_leaf(trans, root, path, leaf);
3849 BUG_ON(ret); 3797 BUG_ON(ret);
3850 } 3798 }
3851 } else { 3799 } else {
@@ -3884,8 +3832,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3884 3832
3885 if (btrfs_header_nritems(leaf) == 0) { 3833 if (btrfs_header_nritems(leaf) == 0) {
3886 path->slots[1] = slot; 3834 path->slots[1] = slot;
3887 ret = btrfs_del_leaf(trans, root, path, 3835 ret = btrfs_del_leaf(trans, root, path, leaf);
3888 leaf->start);
3889 BUG_ON(ret); 3836 BUG_ON(ret);
3890 free_extent_buffer(leaf); 3837 free_extent_buffer(leaf);
3891 } else { 3838 } else {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4414a5d9983a..ce3ab4e13064 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -45,6 +45,8 @@ struct btrfs_ordered_sum;
45 45
46#define BTRFS_MAX_LEVEL 8 46#define BTRFS_MAX_LEVEL 8
47 47
48#define BTRFS_COMPAT_EXTENT_TREE_V0
49
48/* 50/*
49 * files bigger than this get some pre-flushing when they are added 51 * files bigger than this get some pre-flushing when they are added
50 * to the ordered operations list. That way we limit the total 52 * to the ordered operations list. That way we limit the total
@@ -267,7 +269,18 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
267} 269}
268 270
269#define BTRFS_FSID_SIZE 16 271#define BTRFS_FSID_SIZE 16
270#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0) 272#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
273#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
274#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
275#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
276
277#define BTRFS_BACKREF_REV_MAX 256
278#define BTRFS_BACKREF_REV_SHIFT 56
279#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
280 BTRFS_BACKREF_REV_SHIFT)
281
282#define BTRFS_OLD_BACKREF_REV 0
283#define BTRFS_MIXED_BACKREF_REV 1
271 284
272/* 285/*
273 * every tree block (leaf or node) starts with this header. 286 * every tree block (leaf or node) starts with this header.
@@ -296,7 +309,6 @@ struct btrfs_header {
296 sizeof(struct btrfs_item) - \ 309 sizeof(struct btrfs_item) - \
297 sizeof(struct btrfs_file_extent_item)) 310 sizeof(struct btrfs_file_extent_item))
298 311
299#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
300 312
301/* 313/*
302 * this is a very generous portion of the super block, giving us 314 * this is a very generous portion of the super block, giving us
@@ -355,9 +367,12 @@ struct btrfs_super_block {
355 * Compat flags that we support. If any incompat flags are set other than the 367 * Compat flags that we support. If any incompat flags are set other than the
356 * ones specified below then we will fail to mount 368 * ones specified below then we will fail to mount
357 */ 369 */
358#define BTRFS_FEATURE_COMPAT_SUPP 0x0 370#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
359#define BTRFS_FEATURE_COMPAT_RO_SUPP 0x0 371
360#define BTRFS_FEATURE_INCOMPAT_SUPP 0x0 372#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
373#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
374#define BTRFS_FEATURE_INCOMPAT_SUPP \
375 BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF
361 376
362/* 377/*
363 * A leaf is full of items. offset and size tell us where to find 378 * A leaf is full of items. offset and size tell us where to find
@@ -421,23 +436,65 @@ struct btrfs_path {
421 unsigned int keep_locks:1; 436 unsigned int keep_locks:1;
422 unsigned int skip_locking:1; 437 unsigned int skip_locking:1;
423 unsigned int leave_spinning:1; 438 unsigned int leave_spinning:1;
439 unsigned int search_commit_root:1;
424}; 440};
425 441
426/* 442/*
427 * items in the extent btree are used to record the objectid of the 443 * items in the extent btree are used to record the objectid of the
428 * owner of the block and the number of references 444 * owner of the block and the number of references
429 */ 445 */
446
430struct btrfs_extent_item { 447struct btrfs_extent_item {
448 __le64 refs;
449 __le64 generation;
450 __le64 flags;
451} __attribute__ ((__packed__));
452
453struct btrfs_extent_item_v0 {
431 __le32 refs; 454 __le32 refs;
432} __attribute__ ((__packed__)); 455} __attribute__ ((__packed__));
433 456
434struct btrfs_extent_ref { 457#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \
458 sizeof(struct btrfs_item))
459
460#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
461#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
462
463/* following flags only apply to tree blocks */
464
465/* use full backrefs for extent pointers in the block */
466#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
467
468struct btrfs_tree_block_info {
469 struct btrfs_disk_key key;
470 u8 level;
471} __attribute__ ((__packed__));
472
473struct btrfs_extent_data_ref {
474 __le64 root;
475 __le64 objectid;
476 __le64 offset;
477 __le32 count;
478} __attribute__ ((__packed__));
479
480struct btrfs_shared_data_ref {
481 __le32 count;
482} __attribute__ ((__packed__));
483
484struct btrfs_extent_inline_ref {
485 u8 type;
486 u64 offset;
487} __attribute__ ((__packed__));
488
489/* old style backrefs item */
490struct btrfs_extent_ref_v0 {
435 __le64 root; 491 __le64 root;
436 __le64 generation; 492 __le64 generation;
437 __le64 objectid; 493 __le64 objectid;
438 __le32 num_refs; 494 __le32 count;
439} __attribute__ ((__packed__)); 495} __attribute__ ((__packed__));
440 496
497
441/* dev extents record free space on individual devices. The owner 498/* dev extents record free space on individual devices. The owner
442 * field points back to the chunk allocation mapping tree that allocated 499 * field points back to the chunk allocation mapping tree that allocated
443 * the extent. The chunk tree uuid field is a way to double check the owner 500 * the extent. The chunk tree uuid field is a way to double check the owner
@@ -695,12 +752,7 @@ struct btrfs_block_group_cache {
695 struct list_head cluster_list; 752 struct list_head cluster_list;
696}; 753};
697 754
698struct btrfs_leaf_ref_tree { 755struct reloc_control;
699 struct rb_root root;
700 struct list_head list;
701 spinlock_t lock;
702};
703
704struct btrfs_device; 756struct btrfs_device;
705struct btrfs_fs_devices; 757struct btrfs_fs_devices;
706struct btrfs_fs_info { 758struct btrfs_fs_info {
@@ -831,18 +883,11 @@ struct btrfs_fs_info {
831 struct task_struct *cleaner_kthread; 883 struct task_struct *cleaner_kthread;
832 int thread_pool_size; 884 int thread_pool_size;
833 885
834 /* tree relocation relocated fields */
835 struct list_head dead_reloc_roots;
836 struct btrfs_leaf_ref_tree reloc_ref_tree;
837 struct btrfs_leaf_ref_tree shared_ref_tree;
838
839 struct kobject super_kobj; 886 struct kobject super_kobj;
840 struct completion kobj_unregister; 887 struct completion kobj_unregister;
841 int do_barriers; 888 int do_barriers;
842 int closing; 889 int closing;
843 int log_root_recovering; 890 int log_root_recovering;
844 atomic_t throttles;
845 atomic_t throttle_gen;
846 891
847 u64 total_pinned; 892 u64 total_pinned;
848 893
@@ -861,6 +906,8 @@ struct btrfs_fs_info {
861 */ 906 */
862 struct list_head space_info; 907 struct list_head space_info;
863 908
909 struct reloc_control *reloc_ctl;
910
864 spinlock_t delalloc_lock; 911 spinlock_t delalloc_lock;
865 spinlock_t new_trans_lock; 912 spinlock_t new_trans_lock;
866 u64 delalloc_bytes; 913 u64 delalloc_bytes;
@@ -891,7 +938,6 @@ struct btrfs_fs_info {
891 * in ram representation of the tree. extent_root is used for all allocations 938 * in ram representation of the tree. extent_root is used for all allocations
892 * and for the extent tree extent_root root. 939 * and for the extent tree extent_root root.
893 */ 940 */
894struct btrfs_dirty_root;
895struct btrfs_root { 941struct btrfs_root {
896 struct extent_buffer *node; 942 struct extent_buffer *node;
897 943
@@ -899,9 +945,6 @@ struct btrfs_root {
899 spinlock_t node_lock; 945 spinlock_t node_lock;
900 946
901 struct extent_buffer *commit_root; 947 struct extent_buffer *commit_root;
902 struct btrfs_leaf_ref_tree *ref_tree;
903 struct btrfs_leaf_ref_tree ref_tree_struct;
904 struct btrfs_dirty_root *dirty_root;
905 struct btrfs_root *log_root; 948 struct btrfs_root *log_root;
906 struct btrfs_root *reloc_root; 949 struct btrfs_root *reloc_root;
907 950
@@ -952,10 +995,15 @@ struct btrfs_root {
952 /* the dirty list is only used by non-reference counted roots */ 995 /* the dirty list is only used by non-reference counted roots */
953 struct list_head dirty_list; 996 struct list_head dirty_list;
954 997
998 struct list_head root_list;
999
955 spinlock_t list_lock; 1000 spinlock_t list_lock;
956 struct list_head dead_list;
957 struct list_head orphan_list; 1001 struct list_head orphan_list;
958 1002
1003 spinlock_t inode_lock;
1004 /* red-black tree that keeps track of in-memory inodes */
1005 struct rb_root inode_tree;
1006
959 /* 1007 /*
960 * right now this just gets used so that a root has its own devid 1008 * right now this just gets used so that a root has its own devid
961 * for stat. It may be used for more later 1009 * for stat. It may be used for more later
@@ -1017,7 +1065,16 @@ struct btrfs_root {
1017 * are used, and how many references there are to each block 1065 * are used, and how many references there are to each block
1018 */ 1066 */
1019#define BTRFS_EXTENT_ITEM_KEY 168 1067#define BTRFS_EXTENT_ITEM_KEY 168
1020#define BTRFS_EXTENT_REF_KEY 180 1068
1069#define BTRFS_TREE_BLOCK_REF_KEY 176
1070
1071#define BTRFS_EXTENT_DATA_REF_KEY 178
1072
1073#define BTRFS_EXTENT_REF_V0_KEY 180
1074
1075#define BTRFS_SHARED_BLOCK_REF_KEY 182
1076
1077#define BTRFS_SHARED_DATA_REF_KEY 184
1021 1078
1022/* 1079/*
1023 * block groups give us hints into the extent allocation trees. Which 1080 * block groups give us hints into the extent allocation trees. Which
@@ -1317,24 +1374,67 @@ static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
1317 return (u8 *)((unsigned long)dev + ptr); 1374 return (u8 *)((unsigned long)dev + ptr);
1318} 1375}
1319 1376
1320/* struct btrfs_extent_ref */ 1377BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
1321BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); 1378BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
1322BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); 1379 generation, 64);
1323BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); 1380BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
1324BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
1325 1381
1326BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); 1382BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
1327BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, 1383
1328 generation, 64); 1384
1329BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, 1385BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
1330 objectid, 64); 1386
1331BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref, 1387static inline void btrfs_tree_block_key(struct extent_buffer *eb,
1332 num_refs, 32); 1388 struct btrfs_tree_block_info *item,
1389 struct btrfs_disk_key *key)
1390{
1391 read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
1392}
1393
1394static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
1395 struct btrfs_tree_block_info *item,
1396 struct btrfs_disk_key *key)
1397{
1398 write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
1399}
1333 1400
1334/* struct btrfs_extent_item */ 1401BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
1335BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); 1402 root, 64);
1336BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, 1403BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
1337 refs, 32); 1404 objectid, 64);
1405BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
1406 offset, 64);
1407BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
1408 count, 32);
1409
1410BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
1411 count, 32);
1412
1413BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
1414 type, 8);
1415BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
1416 offset, 64);
1417
1418static inline u32 btrfs_extent_inline_ref_size(int type)
1419{
1420 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1421 type == BTRFS_SHARED_BLOCK_REF_KEY)
1422 return sizeof(struct btrfs_extent_inline_ref);
1423 if (type == BTRFS_SHARED_DATA_REF_KEY)
1424 return sizeof(struct btrfs_shared_data_ref) +
1425 sizeof(struct btrfs_extent_inline_ref);
1426 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1427 return sizeof(struct btrfs_extent_data_ref) +
1428 offsetof(struct btrfs_extent_inline_ref, offset);
1429 BUG();
1430 return 0;
1431}
1432
1433BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
1434BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
1435 generation, 64);
1436BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
1437BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
1338 1438
1339/* struct btrfs_node */ 1439/* struct btrfs_node */
1340BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); 1440BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
@@ -1558,6 +1658,21 @@ static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
1558 return (flags & flag) == flag; 1658 return (flags & flag) == flag;
1559} 1659}
1560 1660
1661static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
1662{
1663 u64 flags = btrfs_header_flags(eb);
1664 return flags >> BTRFS_BACKREF_REV_SHIFT;
1665}
1666
1667static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
1668 int rev)
1669{
1670 u64 flags = btrfs_header_flags(eb);
1671 flags &= ~BTRFS_BACKREF_REV_MASK;
1672 flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
1673 btrfs_set_header_flags(eb, flags);
1674}
1675
1561static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) 1676static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
1562{ 1677{
1563 unsigned long ptr = offsetof(struct btrfs_header, fsid); 1678 unsigned long ptr = offsetof(struct btrfs_header, fsid);
@@ -1790,39 +1905,32 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
1790int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1905int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1791 struct btrfs_root *root, struct extent_buffer *leaf); 1906 struct btrfs_root *root, struct extent_buffer *leaf);
1792int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1907int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1793 struct btrfs_root *root, u64 objectid, u64 bytenr); 1908 struct btrfs_root *root,
1909 u64 objectid, u64 offset, u64 bytenr);
1794int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); 1910int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
1795struct btrfs_block_group_cache *btrfs_lookup_block_group( 1911struct btrfs_block_group_cache *btrfs_lookup_block_group(
1796 struct btrfs_fs_info *info, 1912 struct btrfs_fs_info *info,
1797 u64 bytenr); 1913 u64 bytenr);
1914void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1798u64 btrfs_find_block_group(struct btrfs_root *root, 1915u64 btrfs_find_block_group(struct btrfs_root *root,
1799 u64 search_start, u64 search_hint, int owner); 1916 u64 search_start, u64 search_hint, int owner);
1800struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1917struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1801 struct btrfs_root *root, 1918 struct btrfs_root *root, u32 blocksize,
1802 u32 blocksize, u64 parent, 1919 u64 parent, u64 root_objectid,
1803 u64 root_objectid, 1920 struct btrfs_disk_key *key, int level,
1804 u64 ref_generation, 1921 u64 hint, u64 empty_size);
1805 int level,
1806 u64 hint,
1807 u64 empty_size);
1808struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1922struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1809 struct btrfs_root *root, 1923 struct btrfs_root *root,
1810 u64 bytenr, u32 blocksize, 1924 u64 bytenr, u32 blocksize,
1811 int level); 1925 int level);
1812int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1926int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
1813 struct btrfs_root *root, 1927 struct btrfs_root *root,
1814 u64 num_bytes, u64 parent, u64 min_bytes, 1928 u64 root_objectid, u64 owner,
1815 u64 root_objectid, u64 ref_generation, 1929 u64 offset, struct btrfs_key *ins);
1816 u64 owner, u64 empty_size, u64 hint_byte, 1930int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
1817 u64 search_end, struct btrfs_key *ins, u64 data); 1931 struct btrfs_root *root,
1818int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 1932 u64 root_objectid, u64 owner, u64 offset,
1819 struct btrfs_root *root, u64 parent, 1933 struct btrfs_key *ins);
1820 u64 root_objectid, u64 ref_generation,
1821 u64 owner, struct btrfs_key *ins);
1822int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
1823 struct btrfs_root *root, u64 parent,
1824 u64 root_objectid, u64 ref_generation,
1825 u64 owner, struct btrfs_key *ins);
1826int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 1934int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1827 struct btrfs_root *root, 1935 struct btrfs_root *root,
1828 u64 num_bytes, u64 min_alloc_size, 1936 u64 num_bytes, u64 min_alloc_size,
@@ -1830,18 +1938,18 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1830 u64 search_end, struct btrfs_key *ins, 1938 u64 search_end, struct btrfs_key *ins,
1831 u64 data); 1939 u64 data);
1832int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1940int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1833 struct extent_buffer *orig_buf, struct extent_buffer *buf, 1941 struct extent_buffer *buf, int full_backref);
1834 u32 *nr_extents); 1942int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1835int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1943 struct extent_buffer *buf, int full_backref);
1836 struct extent_buffer *buf, u32 nr_extents); 1944int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
1837int btrfs_update_ref(struct btrfs_trans_handle *trans, 1945 struct btrfs_root *root,
1838 struct btrfs_root *root, struct extent_buffer *orig_buf, 1946 u64 bytenr, u64 num_bytes, u64 flags,
1839 struct extent_buffer *buf, int start_slot, int nr); 1947 int is_data);
1840int btrfs_free_extent(struct btrfs_trans_handle *trans, 1948int btrfs_free_extent(struct btrfs_trans_handle *trans,
1841 struct btrfs_root *root, 1949 struct btrfs_root *root,
1842 u64 bytenr, u64 num_bytes, u64 parent, 1950 u64 bytenr, u64 num_bytes, u64 parent,
1843 u64 root_objectid, u64 ref_generation, 1951 u64 root_objectid, u64 owner, u64 offset);
1844 u64 owner_objectid, int pin); 1952
1845int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 1953int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
1846int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 1954int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1847 struct btrfs_root *root, 1955 struct btrfs_root *root,
@@ -1849,13 +1957,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1849int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1957int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1850 struct btrfs_root *root, 1958 struct btrfs_root *root,
1851 u64 bytenr, u64 num_bytes, u64 parent, 1959 u64 bytenr, u64 num_bytes, u64 parent,
1852 u64 root_objectid, u64 ref_generation, 1960 u64 root_objectid, u64 owner, u64 offset);
1853 u64 owner_objectid); 1961
1854int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1855 struct btrfs_root *root, u64 bytenr, u64 num_bytes,
1856 u64 orig_parent, u64 parent,
1857 u64 root_objectid, u64 ref_generation,
1858 u64 owner_objectid);
1859int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 1962int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1860 struct btrfs_root *root); 1963 struct btrfs_root *root);
1861int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 1964int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
@@ -1867,16 +1970,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
1867 u64 size); 1970 u64 size);
1868int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 1971int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
1869 struct btrfs_root *root, u64 group_start); 1972 struct btrfs_root *root, u64 group_start);
1870int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); 1973int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
1871int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 1974 struct btrfs_block_group_cache *group);
1872 struct btrfs_root *root); 1975
1873int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
1874int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
1875 struct btrfs_root *root,
1876 struct extent_buffer *buf, u64 orig_start);
1877int btrfs_add_dead_reloc_root(struct btrfs_root *root);
1878int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
1879int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
1880u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); 1976u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
1881void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); 1977void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
1882void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 1978void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
@@ -1891,13 +1987,12 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
1891void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 1987void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1892 u64 bytes); 1988 u64 bytes);
1893/* ctree.c */ 1989/* ctree.c */
1990int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1991 int level, int *slot);
1992int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
1894int btrfs_previous_item(struct btrfs_root *root, 1993int btrfs_previous_item(struct btrfs_root *root,
1895 struct btrfs_path *path, u64 min_objectid, 1994 struct btrfs_path *path, u64 min_objectid,
1896 int type); 1995 int type);
1897int btrfs_merge_path(struct btrfs_trans_handle *trans,
1898 struct btrfs_root *root,
1899 struct btrfs_key *node_keys,
1900 u64 *nodes, int lowest_level);
1901int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1996int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1902 struct btrfs_root *root, struct btrfs_path *path, 1997 struct btrfs_root *root, struct btrfs_path *path,
1903 struct btrfs_key *new_key); 1998 struct btrfs_key *new_key);
@@ -1918,6 +2013,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
1918 struct btrfs_root *root, 2013 struct btrfs_root *root,
1919 struct extent_buffer *buf, 2014 struct extent_buffer *buf,
1920 struct extent_buffer **cow_ret, u64 new_root_objectid); 2015 struct extent_buffer **cow_ret, u64 new_root_objectid);
2016int btrfs_block_can_be_shared(struct btrfs_root *root,
2017 struct extent_buffer *buf);
1921int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root 2018int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1922 *root, struct btrfs_path *path, u32 data_size); 2019 *root, struct btrfs_path *path, u32 data_size);
1923int btrfs_truncate_item(struct btrfs_trans_handle *trans, 2020int btrfs_truncate_item(struct btrfs_trans_handle *trans,
@@ -1944,9 +2041,6 @@ void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
1944 2041
1945int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2042int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1946 struct btrfs_path *path, int slot, int nr); 2043 struct btrfs_path *path, int slot, int nr);
1947int btrfs_del_leaf(struct btrfs_trans_handle *trans,
1948 struct btrfs_root *root,
1949 struct btrfs_path *path, u64 bytenr);
1950static inline int btrfs_del_item(struct btrfs_trans_handle *trans, 2044static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
1951 struct btrfs_root *root, 2045 struct btrfs_root *root,
1952 struct btrfs_path *path) 2046 struct btrfs_path *path)
@@ -2005,8 +2099,9 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2005 btrfs_root_item *item, struct btrfs_key *key); 2099 btrfs_root_item *item, struct btrfs_key *key);
2006int btrfs_search_root(struct btrfs_root *root, u64 search_start, 2100int btrfs_search_root(struct btrfs_root *root, u64 search_start,
2007 u64 *found_objectid); 2101 u64 *found_objectid);
2008int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, 2102int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2009 struct btrfs_root *latest_root); 2103int btrfs_set_root_node(struct btrfs_root_item *item,
2104 struct extent_buffer *node);
2010/* dir-item.c */ 2105/* dir-item.c */
2011int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2106int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2012 struct btrfs_root *root, const char *name, 2107 struct btrfs_root *root, const char *name,
@@ -2139,7 +2234,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2139int btrfs_readpage(struct file *file, struct page *page); 2234int btrfs_readpage(struct file *file, struct page *page);
2140void btrfs_delete_inode(struct inode *inode); 2235void btrfs_delete_inode(struct inode *inode);
2141void btrfs_put_inode(struct inode *inode); 2236void btrfs_put_inode(struct inode *inode);
2142void btrfs_read_locked_inode(struct inode *inode);
2143int btrfs_write_inode(struct inode *inode, int wait); 2237int btrfs_write_inode(struct inode *inode, int wait);
2144void btrfs_dirty_inode(struct inode *inode); 2238void btrfs_dirty_inode(struct inode *inode);
2145struct inode *btrfs_alloc_inode(struct super_block *sb); 2239struct inode *btrfs_alloc_inode(struct super_block *sb);
@@ -2147,12 +2241,8 @@ void btrfs_destroy_inode(struct inode *inode);
2147int btrfs_init_cachep(void); 2241int btrfs_init_cachep(void);
2148void btrfs_destroy_cachep(void); 2242void btrfs_destroy_cachep(void);
2149long btrfs_ioctl_trans_end(struct file *file); 2243long btrfs_ioctl_trans_end(struct file *file);
2150struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
2151 struct btrfs_root *root, int wait);
2152struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
2153 struct btrfs_root *root);
2154struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 2244struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2155 struct btrfs_root *root, int *is_new); 2245 struct btrfs_root *root);
2156int btrfs_commit_write(struct file *file, struct page *page, 2246int btrfs_commit_write(struct file *file, struct page *page,
2157 unsigned from, unsigned to); 2247 unsigned from, unsigned to);
2158struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2248struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2209,4 +2299,12 @@ int btrfs_check_acl(struct inode *inode, int mask);
2209int btrfs_init_acl(struct inode *inode, struct inode *dir); 2299int btrfs_init_acl(struct inode *inode, struct inode *dir);
2210int btrfs_acl_chmod(struct inode *inode); 2300int btrfs_acl_chmod(struct inode *inode);
2211 2301
2302/* relocation.c */
2303int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
2304int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
2305 struct btrfs_root *root);
2306int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
2307 struct btrfs_root *root);
2308int btrfs_recover_relocation(struct btrfs_root *root);
2309int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
2212#endif 2310#endif
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index d6c01c096a40..84e6781413b1 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -29,27 +29,87 @@
29 * add extents in the middle of btrfs_search_slot, and it allows 29 * add extents in the middle of btrfs_search_slot, and it allows
30 * us to buffer up frequently modified backrefs in an rb tree instead 30 * us to buffer up frequently modified backrefs in an rb tree instead
31 * of hammering updates on the extent allocation tree. 31 * of hammering updates on the extent allocation tree.
32 *
33 * Right now this code is only used for reference counted trees, but
34 * the long term goal is to get rid of the similar code for delayed
35 * extent tree modifications.
36 */ 32 */
37 33
38/* 34/*
39 * entries in the rb tree are ordered by the byte number of the extent 35 * compare two delayed tree backrefs with same bytenr and type
40 * and by the byte number of the parent block. 36 */
37static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
38 struct btrfs_delayed_tree_ref *ref1)
39{
40 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
41 if (ref1->root < ref2->root)
42 return -1;
43 if (ref1->root > ref2->root)
44 return 1;
45 } else {
46 if (ref1->parent < ref2->parent)
47 return -1;
48 if (ref1->parent > ref2->parent)
49 return 1;
50 }
51 return 0;
52}
53
54/*
55 * compare two delayed data backrefs with same bytenr and type
41 */ 56 */
42static int comp_entry(struct btrfs_delayed_ref_node *ref, 57static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
43 u64 bytenr, u64 parent) 58 struct btrfs_delayed_data_ref *ref1)
44{ 59{
45 if (bytenr < ref->bytenr) 60 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
61 if (ref1->root < ref2->root)
62 return -1;
63 if (ref1->root > ref2->root)
64 return 1;
65 if (ref1->objectid < ref2->objectid)
66 return -1;
67 if (ref1->objectid > ref2->objectid)
68 return 1;
69 if (ref1->offset < ref2->offset)
70 return -1;
71 if (ref1->offset > ref2->offset)
72 return 1;
73 } else {
74 if (ref1->parent < ref2->parent)
75 return -1;
76 if (ref1->parent > ref2->parent)
77 return 1;
78 }
79 return 0;
80}
81
82/*
83 * entries in the rb tree are ordered by the byte number of the extent,
84 * type of the delayed backrefs and content of delayed backrefs.
85 */
86static int comp_entry(struct btrfs_delayed_ref_node *ref2,
87 struct btrfs_delayed_ref_node *ref1)
88{
89 if (ref1->bytenr < ref2->bytenr)
46 return -1; 90 return -1;
47 if (bytenr > ref->bytenr) 91 if (ref1->bytenr > ref2->bytenr)
48 return 1; 92 return 1;
49 if (parent < ref->parent) 93 if (ref1->is_head && ref2->is_head)
94 return 0;
95 if (ref2->is_head)
50 return -1; 96 return -1;
51 if (parent > ref->parent) 97 if (ref1->is_head)
52 return 1; 98 return 1;
99 if (ref1->type < ref2->type)
100 return -1;
101 if (ref1->type > ref2->type)
102 return 1;
103 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
104 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
105 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
106 btrfs_delayed_node_to_tree_ref(ref1));
107 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
108 ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
109 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
110 btrfs_delayed_node_to_data_ref(ref1));
111 }
112 BUG();
53 return 0; 113 return 0;
54} 114}
55 115
@@ -59,20 +119,21 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref,
59 * inserted. 119 * inserted.
60 */ 120 */
61static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, 121static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
62 u64 bytenr, u64 parent,
63 struct rb_node *node) 122 struct rb_node *node)
64{ 123{
65 struct rb_node **p = &root->rb_node; 124 struct rb_node **p = &root->rb_node;
66 struct rb_node *parent_node = NULL; 125 struct rb_node *parent_node = NULL;
67 struct btrfs_delayed_ref_node *entry; 126 struct btrfs_delayed_ref_node *entry;
127 struct btrfs_delayed_ref_node *ins;
68 int cmp; 128 int cmp;
69 129
130 ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
70 while (*p) { 131 while (*p) {
71 parent_node = *p; 132 parent_node = *p;
72 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 133 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
73 rb_node); 134 rb_node);
74 135
75 cmp = comp_entry(entry, bytenr, parent); 136 cmp = comp_entry(entry, ins);
76 if (cmp < 0) 137 if (cmp < 0)
77 p = &(*p)->rb_left; 138 p = &(*p)->rb_left;
78 else if (cmp > 0) 139 else if (cmp > 0)
@@ -81,18 +142,17 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
81 return entry; 142 return entry;
82 } 143 }
83 144
84 entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
85 rb_link_node(node, parent_node, p); 145 rb_link_node(node, parent_node, p);
86 rb_insert_color(node, root); 146 rb_insert_color(node, root);
87 return NULL; 147 return NULL;
88} 148}
89 149
90/* 150/*
91 * find an entry based on (bytenr,parent). This returns the delayed 151 * find an head entry based on bytenr. This returns the delayed ref
92 * ref if it was able to find one, or NULL if nothing was in that spot 152 * head if it was able to find one, or NULL if nothing was in that spot
93 */ 153 */
94static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, 154static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
95 u64 bytenr, u64 parent, 155 u64 bytenr,
96 struct btrfs_delayed_ref_node **last) 156 struct btrfs_delayed_ref_node **last)
97{ 157{
98 struct rb_node *n = root->rb_node; 158 struct rb_node *n = root->rb_node;
@@ -105,7 +165,15 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
105 if (last) 165 if (last)
106 *last = entry; 166 *last = entry;
107 167
108 cmp = comp_entry(entry, bytenr, parent); 168 if (bytenr < entry->bytenr)
169 cmp = -1;
170 else if (bytenr > entry->bytenr)
171 cmp = 1;
172 else if (!btrfs_delayed_ref_is_head(entry))
173 cmp = 1;
174 else
175 cmp = 0;
176
109 if (cmp < 0) 177 if (cmp < 0)
110 n = n->rb_left; 178 n = n->rb_left;
111 else if (cmp > 0) 179 else if (cmp > 0)
@@ -154,7 +222,7 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
154 node = rb_first(&delayed_refs->root); 222 node = rb_first(&delayed_refs->root);
155 } else { 223 } else {
156 ref = NULL; 224 ref = NULL;
157 tree_search(&delayed_refs->root, start, (u64)-1, &ref); 225 find_ref_head(&delayed_refs->root, start, &ref);
158 if (ref) { 226 if (ref) {
159 struct btrfs_delayed_ref_node *tmp; 227 struct btrfs_delayed_ref_node *tmp;
160 228
@@ -234,7 +302,7 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
234 delayed_refs = &trans->transaction->delayed_refs; 302 delayed_refs = &trans->transaction->delayed_refs;
235 spin_lock(&delayed_refs->lock); 303 spin_lock(&delayed_refs->lock);
236 304
237 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 305 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
238 if (ref) { 306 if (ref) {
239 prev_node = rb_prev(&ref->rb_node); 307 prev_node = rb_prev(&ref->rb_node);
240 if (!prev_node) 308 if (!prev_node)
@@ -250,25 +318,28 @@ out:
250} 318}
251 319
252/* 320/*
253 * helper function to lookup reference count 321 * helper function to lookup reference count and flags of extent.
254 * 322 *
255 * the head node for delayed ref is used to store the sum of all the 323 * the head node for delayed ref is used to store the sum of all the
256 * reference count modifications queued up in the rbtree. This way you 324 * reference count modifications queued up in the rbtree. the head
257 * can check to see what the reference count would be if all of the 325 * node may also store the extent flags to set. This way you can check
258 * delayed refs are processed. 326 * to see what the reference count and extent flags would be if all of
327 * the delayed refs are not processed.
259 */ 328 */
260int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 329int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
261 struct btrfs_root *root, u64 bytenr, 330 struct btrfs_root *root, u64 bytenr,
262 u64 num_bytes, u32 *refs) 331 u64 num_bytes, u64 *refs, u64 *flags)
263{ 332{
264 struct btrfs_delayed_ref_node *ref; 333 struct btrfs_delayed_ref_node *ref;
265 struct btrfs_delayed_ref_head *head; 334 struct btrfs_delayed_ref_head *head;
266 struct btrfs_delayed_ref_root *delayed_refs; 335 struct btrfs_delayed_ref_root *delayed_refs;
267 struct btrfs_path *path; 336 struct btrfs_path *path;
268 struct extent_buffer *leaf;
269 struct btrfs_extent_item *ei; 337 struct btrfs_extent_item *ei;
338 struct extent_buffer *leaf;
270 struct btrfs_key key; 339 struct btrfs_key key;
271 u32 num_refs; 340 u32 item_size;
341 u64 num_refs;
342 u64 extent_flags;
272 int ret; 343 int ret;
273 344
274 path = btrfs_alloc_path(); 345 path = btrfs_alloc_path();
@@ -287,37 +358,60 @@ again:
287 358
288 if (ret == 0) { 359 if (ret == 0) {
289 leaf = path->nodes[0]; 360 leaf = path->nodes[0];
290 ei = btrfs_item_ptr(leaf, path->slots[0], 361 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
291 struct btrfs_extent_item); 362 if (item_size >= sizeof(*ei)) {
292 num_refs = btrfs_extent_refs(leaf, ei); 363 ei = btrfs_item_ptr(leaf, path->slots[0],
364 struct btrfs_extent_item);
365 num_refs = btrfs_extent_refs(leaf, ei);
366 extent_flags = btrfs_extent_flags(leaf, ei);
367 } else {
368#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
369 struct btrfs_extent_item_v0 *ei0;
370 BUG_ON(item_size != sizeof(*ei0));
371 ei0 = btrfs_item_ptr(leaf, path->slots[0],
372 struct btrfs_extent_item_v0);
373 num_refs = btrfs_extent_refs_v0(leaf, ei0);
374 /* FIXME: this isn't correct for data */
375 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
376#else
377 BUG();
378#endif
379 }
380 BUG_ON(num_refs == 0);
293 } else { 381 } else {
294 num_refs = 0; 382 num_refs = 0;
383 extent_flags = 0;
295 ret = 0; 384 ret = 0;
296 } 385 }
297 386
298 spin_lock(&delayed_refs->lock); 387 spin_lock(&delayed_refs->lock);
299 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 388 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
300 if (ref) { 389 if (ref) {
301 head = btrfs_delayed_node_to_head(ref); 390 head = btrfs_delayed_node_to_head(ref);
302 if (mutex_trylock(&head->mutex)) { 391 if (!mutex_trylock(&head->mutex)) {
303 num_refs += ref->ref_mod; 392 atomic_inc(&ref->refs);
304 mutex_unlock(&head->mutex); 393 spin_unlock(&delayed_refs->lock);
305 *refs = num_refs;
306 goto out;
307 }
308 394
309 atomic_inc(&ref->refs); 395 btrfs_release_path(root->fs_info->extent_root, path);
310 spin_unlock(&delayed_refs->lock);
311 396
312 btrfs_release_path(root->fs_info->extent_root, path); 397 mutex_lock(&head->mutex);
398 mutex_unlock(&head->mutex);
399 btrfs_put_delayed_ref(ref);
400 goto again;
401 }
402 if (head->extent_op && head->extent_op->update_flags)
403 extent_flags |= head->extent_op->flags_to_set;
404 else
405 BUG_ON(num_refs == 0);
313 406
314 mutex_lock(&head->mutex); 407 num_refs += ref->ref_mod;
315 mutex_unlock(&head->mutex); 408 mutex_unlock(&head->mutex);
316 btrfs_put_delayed_ref(ref);
317 goto again;
318 } else {
319 *refs = num_refs;
320 } 409 }
410 WARN_ON(num_refs == 0);
411 if (refs)
412 *refs = num_refs;
413 if (flags)
414 *flags = extent_flags;
321out: 415out:
322 spin_unlock(&delayed_refs->lock); 416 spin_unlock(&delayed_refs->lock);
323 btrfs_free_path(path); 417 btrfs_free_path(path);
@@ -338,16 +432,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
338 struct btrfs_delayed_ref_node *existing, 432 struct btrfs_delayed_ref_node *existing,
339 struct btrfs_delayed_ref_node *update) 433 struct btrfs_delayed_ref_node *update)
340{ 434{
341 struct btrfs_delayed_ref *existing_ref; 435 if (update->action != existing->action) {
342 struct btrfs_delayed_ref *ref;
343
344 existing_ref = btrfs_delayed_node_to_ref(existing);
345 ref = btrfs_delayed_node_to_ref(update);
346
347 if (ref->pin)
348 existing_ref->pin = 1;
349
350 if (ref->action != existing_ref->action) {
351 /* 436 /*
352 * this is effectively undoing either an add or a 437 * this is effectively undoing either an add or a
353 * drop. We decrement the ref_mod, and if it goes 438 * drop. We decrement the ref_mod, and if it goes
@@ -363,20 +448,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
363 delayed_refs->num_entries--; 448 delayed_refs->num_entries--;
364 if (trans->delayed_ref_updates) 449 if (trans->delayed_ref_updates)
365 trans->delayed_ref_updates--; 450 trans->delayed_ref_updates--;
451 } else {
452 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
453 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
366 } 454 }
367 } else { 455 } else {
368 if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { 456 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
369 /* if we're adding refs, make sure all the 457 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
370 * details match up. The extent could
371 * have been totally freed and reallocated
372 * by a different owner before the delayed
373 * ref entries were removed.
374 */
375 existing_ref->owner_objectid = ref->owner_objectid;
376 existing_ref->generation = ref->generation;
377 existing_ref->root = ref->root;
378 existing->num_bytes = update->num_bytes;
379 }
380 /* 458 /*
381 * the action on the existing ref matches 459 * the action on the existing ref matches
382 * the action on the ref we're trying to add. 460 * the action on the ref we're trying to add.
@@ -401,6 +479,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
401 479
402 existing_ref = btrfs_delayed_node_to_head(existing); 480 existing_ref = btrfs_delayed_node_to_head(existing);
403 ref = btrfs_delayed_node_to_head(update); 481 ref = btrfs_delayed_node_to_head(update);
482 BUG_ON(existing_ref->is_data != ref->is_data);
404 483
405 if (ref->must_insert_reserved) { 484 if (ref->must_insert_reserved) {
406 /* if the extent was freed and then 485 /* if the extent was freed and then
@@ -420,6 +499,24 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
420 499
421 } 500 }
422 501
502 if (ref->extent_op) {
503 if (!existing_ref->extent_op) {
504 existing_ref->extent_op = ref->extent_op;
505 } else {
506 if (ref->extent_op->update_key) {
507 memcpy(&existing_ref->extent_op->key,
508 &ref->extent_op->key,
509 sizeof(ref->extent_op->key));
510 existing_ref->extent_op->update_key = 1;
511 }
512 if (ref->extent_op->update_flags) {
513 existing_ref->extent_op->flags_to_set |=
514 ref->extent_op->flags_to_set;
515 existing_ref->extent_op->update_flags = 1;
516 }
517 kfree(ref->extent_op);
518 }
519 }
423 /* 520 /*
424 * update the reference mod on the head to reflect this new operation 521 * update the reference mod on the head to reflect this new operation
425 */ 522 */
@@ -427,19 +524,16 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
427} 524}
428 525
429/* 526/*
430 * helper function to actually insert a delayed ref into the rbtree. 527 * helper function to actually insert a head node into the rbtree.
431 * this does all the dirty work in terms of maintaining the correct 528 * this does all the dirty work in terms of maintaining the correct
432 * overall modification count in the head node and properly dealing 529 * overall modification count.
433 * with updating existing nodes as new modifications are queued.
434 */ 530 */
435static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 531static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
436 struct btrfs_delayed_ref_node *ref, 532 struct btrfs_delayed_ref_node *ref,
437 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 533 u64 bytenr, u64 num_bytes,
438 u64 ref_generation, u64 owner_objectid, int action, 534 int action, int is_data)
439 int pin)
440{ 535{
441 struct btrfs_delayed_ref_node *existing; 536 struct btrfs_delayed_ref_node *existing;
442 struct btrfs_delayed_ref *full_ref;
443 struct btrfs_delayed_ref_head *head_ref = NULL; 537 struct btrfs_delayed_ref_head *head_ref = NULL;
444 struct btrfs_delayed_ref_root *delayed_refs; 538 struct btrfs_delayed_ref_root *delayed_refs;
445 int count_mod = 1; 539 int count_mod = 1;
@@ -449,12 +543,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
449 * the head node stores the sum of all the mods, so dropping a ref 543 * the head node stores the sum of all the mods, so dropping a ref
450 * should drop the sum in the head node by one. 544 * should drop the sum in the head node by one.
451 */ 545 */
452 if (parent == (u64)-1) { 546 if (action == BTRFS_UPDATE_DELAYED_HEAD)
453 if (action == BTRFS_DROP_DELAYED_REF) 547 count_mod = 0;
454 count_mod = -1; 548 else if (action == BTRFS_DROP_DELAYED_REF)
455 else if (action == BTRFS_UPDATE_DELAYED_HEAD) 549 count_mod = -1;
456 count_mod = 0;
457 }
458 550
459 /* 551 /*
460 * BTRFS_ADD_DELAYED_EXTENT means that we need to update 552 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
@@ -467,57 +559,148 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
467 * Once we record must_insert_reserved, switch the action to 559 * Once we record must_insert_reserved, switch the action to
468 * BTRFS_ADD_DELAYED_REF because other special casing is not required. 560 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
469 */ 561 */
470 if (action == BTRFS_ADD_DELAYED_EXTENT) { 562 if (action == BTRFS_ADD_DELAYED_EXTENT)
471 must_insert_reserved = 1; 563 must_insert_reserved = 1;
472 action = BTRFS_ADD_DELAYED_REF; 564 else
473 } else {
474 must_insert_reserved = 0; 565 must_insert_reserved = 0;
475 }
476
477 566
478 delayed_refs = &trans->transaction->delayed_refs; 567 delayed_refs = &trans->transaction->delayed_refs;
479 568
480 /* first set the basic ref node struct up */ 569 /* first set the basic ref node struct up */
481 atomic_set(&ref->refs, 1); 570 atomic_set(&ref->refs, 1);
482 ref->bytenr = bytenr; 571 ref->bytenr = bytenr;
483 ref->parent = parent; 572 ref->num_bytes = num_bytes;
484 ref->ref_mod = count_mod; 573 ref->ref_mod = count_mod;
574 ref->type = 0;
575 ref->action = 0;
576 ref->is_head = 1;
485 ref->in_tree = 1; 577 ref->in_tree = 1;
578
579 head_ref = btrfs_delayed_node_to_head(ref);
580 head_ref->must_insert_reserved = must_insert_reserved;
581 head_ref->is_data = is_data;
582
583 INIT_LIST_HEAD(&head_ref->cluster);
584 mutex_init(&head_ref->mutex);
585
586 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
587
588 if (existing) {
589 update_existing_head_ref(existing, ref);
590 /*
591 * we've updated the existing ref, free the newly
592 * allocated ref
593 */
594 kfree(ref);
595 } else {
596 delayed_refs->num_heads++;
597 delayed_refs->num_heads_ready++;
598 delayed_refs->num_entries++;
599 trans->delayed_ref_updates++;
600 }
601 return 0;
602}
603
604/*
605 * helper to insert a delayed tree ref into the rbtree.
606 */
607static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
608 struct btrfs_delayed_ref_node *ref,
609 u64 bytenr, u64 num_bytes, u64 parent,
610 u64 ref_root, int level, int action)
611{
612 struct btrfs_delayed_ref_node *existing;
613 struct btrfs_delayed_tree_ref *full_ref;
614 struct btrfs_delayed_ref_root *delayed_refs;
615
616 if (action == BTRFS_ADD_DELAYED_EXTENT)
617 action = BTRFS_ADD_DELAYED_REF;
618
619 delayed_refs = &trans->transaction->delayed_refs;
620
621 /* first set the basic ref node struct up */
622 atomic_set(&ref->refs, 1);
623 ref->bytenr = bytenr;
486 ref->num_bytes = num_bytes; 624 ref->num_bytes = num_bytes;
625 ref->ref_mod = 1;
626 ref->action = action;
627 ref->is_head = 0;
628 ref->in_tree = 1;
487 629
488 if (btrfs_delayed_ref_is_head(ref)) { 630 full_ref = btrfs_delayed_node_to_tree_ref(ref);
489 head_ref = btrfs_delayed_node_to_head(ref); 631 if (parent) {
490 head_ref->must_insert_reserved = must_insert_reserved; 632 full_ref->parent = parent;
491 INIT_LIST_HEAD(&head_ref->cluster); 633 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
492 mutex_init(&head_ref->mutex);
493 } else { 634 } else {
494 full_ref = btrfs_delayed_node_to_ref(ref);
495 full_ref->root = ref_root; 635 full_ref->root = ref_root;
496 full_ref->generation = ref_generation; 636 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
497 full_ref->owner_objectid = owner_objectid;
498 full_ref->pin = pin;
499 full_ref->action = action;
500 } 637 }
638 full_ref->level = level;
501 639
502 existing = tree_insert(&delayed_refs->root, bytenr, 640 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
503 parent, &ref->rb_node);
504 641
505 if (existing) { 642 if (existing) {
506 if (btrfs_delayed_ref_is_head(ref)) 643 update_existing_ref(trans, delayed_refs, existing, ref);
507 update_existing_head_ref(existing, ref); 644 /*
508 else 645 * we've updated the existing ref, free the newly
509 update_existing_ref(trans, delayed_refs, existing, ref); 646 * allocated ref
647 */
648 kfree(ref);
649 } else {
650 delayed_refs->num_entries++;
651 trans->delayed_ref_updates++;
652 }
653 return 0;
654}
655
656/*
657 * helper to insert a delayed data ref into the rbtree.
658 */
659static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
660 struct btrfs_delayed_ref_node *ref,
661 u64 bytenr, u64 num_bytes, u64 parent,
662 u64 ref_root, u64 owner, u64 offset,
663 int action)
664{
665 struct btrfs_delayed_ref_node *existing;
666 struct btrfs_delayed_data_ref *full_ref;
667 struct btrfs_delayed_ref_root *delayed_refs;
668
669 if (action == BTRFS_ADD_DELAYED_EXTENT)
670 action = BTRFS_ADD_DELAYED_REF;
671
672 delayed_refs = &trans->transaction->delayed_refs;
673
674 /* first set the basic ref node struct up */
675 atomic_set(&ref->refs, 1);
676 ref->bytenr = bytenr;
677 ref->num_bytes = num_bytes;
678 ref->ref_mod = 1;
679 ref->action = action;
680 ref->is_head = 0;
681 ref->in_tree = 1;
682
683 full_ref = btrfs_delayed_node_to_data_ref(ref);
684 if (parent) {
685 full_ref->parent = parent;
686 ref->type = BTRFS_SHARED_DATA_REF_KEY;
687 } else {
688 full_ref->root = ref_root;
689 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
690 }
691 full_ref->objectid = owner;
692 full_ref->offset = offset;
510 693
694 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
695
696 if (existing) {
697 update_existing_ref(trans, delayed_refs, existing, ref);
511 /* 698 /*
512 * we've updated the existing ref, free the newly 699 * we've updated the existing ref, free the newly
513 * allocated ref 700 * allocated ref
514 */ 701 */
515 kfree(ref); 702 kfree(ref);
516 } else { 703 } else {
517 if (btrfs_delayed_ref_is_head(ref)) {
518 delayed_refs->num_heads++;
519 delayed_refs->num_heads_ready++;
520 }
521 delayed_refs->num_entries++; 704 delayed_refs->num_entries++;
522 trans->delayed_ref_updates++; 705 trans->delayed_ref_updates++;
523 } 706 }
@@ -525,37 +708,78 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
525} 708}
526 709
527/* 710/*
528 * add a delayed ref to the tree. This does all of the accounting required 711 * add a delayed tree ref. This does all of the accounting required
529 * to make sure the delayed ref is eventually processed before this 712 * to make sure the delayed ref is eventually processed before this
530 * transaction commits. 713 * transaction commits.
531 */ 714 */
532int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 715int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
533 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 716 u64 bytenr, u64 num_bytes, u64 parent,
534 u64 ref_generation, u64 owner_objectid, int action, 717 u64 ref_root, int level, int action,
535 int pin) 718 struct btrfs_delayed_extent_op *extent_op)
536{ 719{
537 struct btrfs_delayed_ref *ref; 720 struct btrfs_delayed_tree_ref *ref;
538 struct btrfs_delayed_ref_head *head_ref; 721 struct btrfs_delayed_ref_head *head_ref;
539 struct btrfs_delayed_ref_root *delayed_refs; 722 struct btrfs_delayed_ref_root *delayed_refs;
540 int ret; 723 int ret;
541 724
725 BUG_ON(extent_op && extent_op->is_data);
542 ref = kmalloc(sizeof(*ref), GFP_NOFS); 726 ref = kmalloc(sizeof(*ref), GFP_NOFS);
543 if (!ref) 727 if (!ref)
544 return -ENOMEM; 728 return -ENOMEM;
545 729
730 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
731 if (!head_ref) {
732 kfree(ref);
733 return -ENOMEM;
734 }
735
736 head_ref->extent_op = extent_op;
737
738 delayed_refs = &trans->transaction->delayed_refs;
739 spin_lock(&delayed_refs->lock);
740
546 /* 741 /*
547 * the parent = 0 case comes from cases where we don't actually 742 * insert both the head node and the new ref without dropping
548 * know the parent yet. It will get updated later via a add/drop 743 * the spin lock
549 * pair.
550 */ 744 */
551 if (parent == 0) 745 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
552 parent = bytenr; 746 action, 0);
747 BUG_ON(ret);
748
749 ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
750 parent, ref_root, level, action);
751 BUG_ON(ret);
752 spin_unlock(&delayed_refs->lock);
753 return 0;
754}
755
756/*
757 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
758 */
759int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
760 u64 bytenr, u64 num_bytes,
761 u64 parent, u64 ref_root,
762 u64 owner, u64 offset, int action,
763 struct btrfs_delayed_extent_op *extent_op)
764{
765 struct btrfs_delayed_data_ref *ref;
766 struct btrfs_delayed_ref_head *head_ref;
767 struct btrfs_delayed_ref_root *delayed_refs;
768 int ret;
769
770 BUG_ON(extent_op && !extent_op->is_data);
771 ref = kmalloc(sizeof(*ref), GFP_NOFS);
772 if (!ref)
773 return -ENOMEM;
553 774
554 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); 775 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
555 if (!head_ref) { 776 if (!head_ref) {
556 kfree(ref); 777 kfree(ref);
557 return -ENOMEM; 778 return -ENOMEM;
558 } 779 }
780
781 head_ref->extent_op = extent_op;
782
559 delayed_refs = &trans->transaction->delayed_refs; 783 delayed_refs = &trans->transaction->delayed_refs;
560 spin_lock(&delayed_refs->lock); 784 spin_lock(&delayed_refs->lock);
561 785
@@ -563,14 +787,39 @@ int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
563 * insert both the head node and the new ref without dropping 787 * insert both the head node and the new ref without dropping
564 * the spin lock 788 * the spin lock
565 */ 789 */
566 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, 790 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
567 (u64)-1, 0, 0, 0, action, pin); 791 action, 1);
568 BUG_ON(ret); 792 BUG_ON(ret);
569 793
570 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, 794 ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
571 parent, ref_root, ref_generation, 795 parent, ref_root, owner, offset, action);
572 owner_objectid, action, pin); 796 BUG_ON(ret);
797 spin_unlock(&delayed_refs->lock);
798 return 0;
799}
800
801int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
802 u64 bytenr, u64 num_bytes,
803 struct btrfs_delayed_extent_op *extent_op)
804{
805 struct btrfs_delayed_ref_head *head_ref;
806 struct btrfs_delayed_ref_root *delayed_refs;
807 int ret;
808
809 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
810 if (!head_ref)
811 return -ENOMEM;
812
813 head_ref->extent_op = extent_op;
814
815 delayed_refs = &trans->transaction->delayed_refs;
816 spin_lock(&delayed_refs->lock);
817
818 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
819 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
820 extent_op->is_data);
573 BUG_ON(ret); 821 BUG_ON(ret);
822
574 spin_unlock(&delayed_refs->lock); 823 spin_unlock(&delayed_refs->lock);
575 return 0; 824 return 0;
576} 825}
@@ -587,7 +836,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
587 struct btrfs_delayed_ref_root *delayed_refs; 836 struct btrfs_delayed_ref_root *delayed_refs;
588 837
589 delayed_refs = &trans->transaction->delayed_refs; 838 delayed_refs = &trans->transaction->delayed_refs;
590 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 839 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
591 if (ref) 840 if (ref)
592 return btrfs_delayed_node_to_head(ref); 841 return btrfs_delayed_node_to_head(ref);
593 return NULL; 842 return NULL;
@@ -603,6 +852,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
603 * 852 *
604 * It is the same as doing a ref add and delete in two separate calls. 853 * It is the same as doing a ref add and delete in two separate calls.
605 */ 854 */
855#if 0
606int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, 856int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
607 u64 bytenr, u64 num_bytes, u64 orig_parent, 857 u64 bytenr, u64 num_bytes, u64 orig_parent,
608 u64 parent, u64 orig_ref_root, u64 ref_root, 858 u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -666,3 +916,4 @@ int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
666 spin_unlock(&delayed_refs->lock); 916 spin_unlock(&delayed_refs->lock);
667 return 0; 917 return 0;
668} 918}
919#endif
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 3bec2ff0b15c..f6fc67ddad36 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -30,9 +30,6 @@ struct btrfs_delayed_ref_node {
30 /* the starting bytenr of the extent */ 30 /* the starting bytenr of the extent */
31 u64 bytenr; 31 u64 bytenr;
32 32
33 /* the parent our backref will point to */
34 u64 parent;
35
36 /* the size of the extent */ 33 /* the size of the extent */
37 u64 num_bytes; 34 u64 num_bytes;
38 35
@@ -50,10 +47,21 @@ struct btrfs_delayed_ref_node {
50 */ 47 */
51 int ref_mod; 48 int ref_mod;
52 49
50 unsigned int action:8;
51 unsigned int type:8;
53 /* is this node still in the rbtree? */ 52 /* is this node still in the rbtree? */
53 unsigned int is_head:1;
54 unsigned int in_tree:1; 54 unsigned int in_tree:1;
55}; 55};
56 56
57struct btrfs_delayed_extent_op {
58 struct btrfs_disk_key key;
59 u64 flags_to_set;
60 unsigned int update_key:1;
61 unsigned int update_flags:1;
62 unsigned int is_data:1;
63};
64
57/* 65/*
58 * the head refs are used to hold a lock on a given extent, which allows us 66 * the head refs are used to hold a lock on a given extent, which allows us
59 * to make sure that only one process is running the delayed refs 67 * to make sure that only one process is running the delayed refs
@@ -71,6 +79,7 @@ struct btrfs_delayed_ref_head {
71 79
72 struct list_head cluster; 80 struct list_head cluster;
73 81
82 struct btrfs_delayed_extent_op *extent_op;
74 /* 83 /*
75 * when a new extent is allocated, it is just reserved in memory 84 * when a new extent is allocated, it is just reserved in memory
76 * The actual extent isn't inserted into the extent allocation tree 85 * The actual extent isn't inserted into the extent allocation tree
@@ -84,27 +93,26 @@ struct btrfs_delayed_ref_head {
84 * the free has happened. 93 * the free has happened.
85 */ 94 */
86 unsigned int must_insert_reserved:1; 95 unsigned int must_insert_reserved:1;
96 unsigned int is_data:1;
87}; 97};
88 98
89struct btrfs_delayed_ref { 99struct btrfs_delayed_tree_ref {
90 struct btrfs_delayed_ref_node node; 100 struct btrfs_delayed_ref_node node;
101 union {
102 u64 root;
103 u64 parent;
104 };
105 int level;
106};
91 107
92 /* the root objectid our ref will point to */ 108struct btrfs_delayed_data_ref {
93 u64 root; 109 struct btrfs_delayed_ref_node node;
94 110 union {
95 /* the generation for the backref */ 111 u64 root;
96 u64 generation; 112 u64 parent;
97 113 };
98 /* owner_objectid of the backref */ 114 u64 objectid;
99 u64 owner_objectid; 115 u64 offset;
100
101 /* operation done by this entry in the rbtree */
102 u8 action;
103
104 /* if pin == 1, when the extent is freed it will be pinned until
105 * transaction commit
106 */
107 unsigned int pin:1;
108}; 116};
109 117
110struct btrfs_delayed_ref_root { 118struct btrfs_delayed_ref_root {
@@ -143,17 +151,25 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
143 } 151 }
144} 152}
145 153
146int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 154int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
147 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 155 u64 bytenr, u64 num_bytes, u64 parent,
148 u64 ref_generation, u64 owner_objectid, int action, 156 u64 ref_root, int level, int action,
149 int pin); 157 struct btrfs_delayed_extent_op *extent_op);
158int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
159 u64 bytenr, u64 num_bytes,
160 u64 parent, u64 ref_root,
161 u64 owner, u64 offset, int action,
162 struct btrfs_delayed_extent_op *extent_op);
163int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
164 u64 bytenr, u64 num_bytes,
165 struct btrfs_delayed_extent_op *extent_op);
150 166
151struct btrfs_delayed_ref_head * 167struct btrfs_delayed_ref_head *
152btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 168btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
153int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); 169int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
154int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 170int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
155 struct btrfs_root *root, u64 bytenr, 171 struct btrfs_root *root, u64 bytenr,
156 u64 num_bytes, u32 *refs); 172 u64 num_bytes, u64 *refs, u64 *flags);
157int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, 173int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
158 u64 bytenr, u64 num_bytes, u64 orig_parent, 174 u64 bytenr, u64 num_bytes, u64 orig_parent,
159 u64 parent, u64 orig_ref_root, u64 ref_root, 175 u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -169,18 +185,24 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
169 */ 185 */
170static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) 186static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
171{ 187{
172 return node->parent == (u64)-1; 188 return node->is_head;
173} 189}
174 190
175/* 191/*
176 * helper functions to cast a node into its container 192 * helper functions to cast a node into its container
177 */ 193 */
178static inline struct btrfs_delayed_ref * 194static inline struct btrfs_delayed_tree_ref *
179btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) 195btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
180{ 196{
181 WARN_ON(btrfs_delayed_ref_is_head(node)); 197 WARN_ON(btrfs_delayed_ref_is_head(node));
182 return container_of(node, struct btrfs_delayed_ref, node); 198 return container_of(node, struct btrfs_delayed_tree_ref, node);
199}
183 200
201static inline struct btrfs_delayed_data_ref *
202btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
203{
204 WARN_ON(btrfs_delayed_ref_is_head(node));
205 return container_of(node, struct btrfs_delayed_data_ref, node);
184} 206}
185 207
186static inline struct btrfs_delayed_ref_head * 208static inline struct btrfs_delayed_ref_head *
@@ -188,6 +210,5 @@ btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
188{ 210{
189 WARN_ON(!btrfs_delayed_ref_is_head(node)); 211 WARN_ON(!btrfs_delayed_ref_is_head(node));
190 return container_of(node, struct btrfs_delayed_ref_head, node); 212 return container_of(node, struct btrfs_delayed_ref_head, node);
191
192} 213}
193#endif 214#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4b0ea0b80c23..7f5c6e3e9992 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -36,7 +36,6 @@
36#include "print-tree.h" 36#include "print-tree.h"
37#include "async-thread.h" 37#include "async-thread.h"
38#include "locking.h" 38#include "locking.h"
39#include "ref-cache.h"
40#include "tree-log.h" 39#include "tree-log.h"
41#include "free-space-cache.h" 40#include "free-space-cache.h"
42 41
@@ -884,7 +883,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
884{ 883{
885 root->node = NULL; 884 root->node = NULL;
886 root->commit_root = NULL; 885 root->commit_root = NULL;
887 root->ref_tree = NULL;
888 root->sectorsize = sectorsize; 886 root->sectorsize = sectorsize;
889 root->nodesize = nodesize; 887 root->nodesize = nodesize;
890 root->leafsize = leafsize; 888 root->leafsize = leafsize;
@@ -899,12 +897,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
899 root->last_inode_alloc = 0; 897 root->last_inode_alloc = 0;
900 root->name = NULL; 898 root->name = NULL;
901 root->in_sysfs = 0; 899 root->in_sysfs = 0;
900 root->inode_tree.rb_node = NULL;
902 901
903 INIT_LIST_HEAD(&root->dirty_list); 902 INIT_LIST_HEAD(&root->dirty_list);
904 INIT_LIST_HEAD(&root->orphan_list); 903 INIT_LIST_HEAD(&root->orphan_list);
905 INIT_LIST_HEAD(&root->dead_list); 904 INIT_LIST_HEAD(&root->root_list);
906 spin_lock_init(&root->node_lock); 905 spin_lock_init(&root->node_lock);
907 spin_lock_init(&root->list_lock); 906 spin_lock_init(&root->list_lock);
907 spin_lock_init(&root->inode_lock);
908 mutex_init(&root->objectid_mutex); 908 mutex_init(&root->objectid_mutex);
909 mutex_init(&root->log_mutex); 909 mutex_init(&root->log_mutex);
910 init_waitqueue_head(&root->log_writer_wait); 910 init_waitqueue_head(&root->log_writer_wait);
@@ -918,9 +918,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
918 extent_io_tree_init(&root->dirty_log_pages, 918 extent_io_tree_init(&root->dirty_log_pages,
919 fs_info->btree_inode->i_mapping, GFP_NOFS); 919 fs_info->btree_inode->i_mapping, GFP_NOFS);
920 920
921 btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
922 root->ref_tree = &root->ref_tree_struct;
923
924 memset(&root->root_key, 0, sizeof(root->root_key)); 921 memset(&root->root_key, 0, sizeof(root->root_key));
925 memset(&root->root_item, 0, sizeof(root->root_item)); 922 memset(&root->root_item, 0, sizeof(root->root_item));
926 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 923 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -959,6 +956,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
959 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 956 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
960 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 957 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
961 blocksize, generation); 958 blocksize, generation);
959 root->commit_root = btrfs_root_node(root);
962 BUG_ON(!root->node); 960 BUG_ON(!root->node);
963 return 0; 961 return 0;
964} 962}
@@ -1025,20 +1023,19 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1025 */ 1023 */
1026 root->ref_cows = 0; 1024 root->ref_cows = 0;
1027 1025
1028 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 1026 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
1029 0, BTRFS_TREE_LOG_OBJECTID, 1027 BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
1030 trans->transid, 0, 0, 0);
1031 if (IS_ERR(leaf)) { 1028 if (IS_ERR(leaf)) {
1032 kfree(root); 1029 kfree(root);
1033 return ERR_CAST(leaf); 1030 return ERR_CAST(leaf);
1034 } 1031 }
1035 1032
1033 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
1034 btrfs_set_header_bytenr(leaf, leaf->start);
1035 btrfs_set_header_generation(leaf, trans->transid);
1036 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
1037 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
1036 root->node = leaf; 1038 root->node = leaf;
1037 btrfs_set_header_nritems(root->node, 0);
1038 btrfs_set_header_level(root->node, 0);
1039 btrfs_set_header_bytenr(root->node, root->node->start);
1040 btrfs_set_header_generation(root->node, trans->transid);
1041 btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
1042 1039
1043 write_extent_buffer(root->node, root->fs_info->fsid, 1040 write_extent_buffer(root->node, root->fs_info->fsid,
1044 (unsigned long)btrfs_header_fsid(root->node), 1041 (unsigned long)btrfs_header_fsid(root->node),
@@ -1081,8 +1078,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1081 inode_item->nbytes = cpu_to_le64(root->leafsize); 1078 inode_item->nbytes = cpu_to_le64(root->leafsize);
1082 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 1079 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
1083 1080
1084 btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); 1081 btrfs_set_root_node(&log_root->root_item, log_root->node);
1085 btrfs_set_root_generation(&log_root->root_item, trans->transid);
1086 1082
1087 WARN_ON(root->log_root); 1083 WARN_ON(root->log_root);
1088 root->log_root = log_root; 1084 root->log_root = log_root;
@@ -1144,6 +1140,7 @@ out:
1144 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1140 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1145 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1141 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1146 blocksize, generation); 1142 blocksize, generation);
1143 root->commit_root = btrfs_root_node(root);
1147 BUG_ON(!root->node); 1144 BUG_ON(!root->node);
1148insert: 1145insert:
1149 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1146 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
@@ -1210,7 +1207,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1210 } 1207 }
1211 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 1208 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
1212 ret = btrfs_find_dead_roots(fs_info->tree_root, 1209 ret = btrfs_find_dead_roots(fs_info->tree_root,
1213 root->root_key.objectid, root); 1210 root->root_key.objectid);
1214 BUG_ON(ret); 1211 BUG_ON(ret);
1215 btrfs_orphan_cleanup(root); 1212 btrfs_orphan_cleanup(root);
1216 } 1213 }
@@ -1569,8 +1566,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1569 atomic_set(&fs_info->async_delalloc_pages, 0); 1566 atomic_set(&fs_info->async_delalloc_pages, 0);
1570 atomic_set(&fs_info->async_submit_draining, 0); 1567 atomic_set(&fs_info->async_submit_draining, 0);
1571 atomic_set(&fs_info->nr_async_bios, 0); 1568 atomic_set(&fs_info->nr_async_bios, 0);
1572 atomic_set(&fs_info->throttles, 0);
1573 atomic_set(&fs_info->throttle_gen, 0);
1574 fs_info->sb = sb; 1569 fs_info->sb = sb;
1575 fs_info->max_extent = (u64)-1; 1570 fs_info->max_extent = (u64)-1;
1576 fs_info->max_inline = 8192 * 1024; 1571 fs_info->max_inline = 8192 * 1024;
@@ -1598,6 +1593,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1598 fs_info->btree_inode->i_mapping->a_ops = &btree_aops; 1593 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
1599 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; 1594 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
1600 1595
1596 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
1601 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1597 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1602 fs_info->btree_inode->i_mapping, 1598 fs_info->btree_inode->i_mapping,
1603 GFP_NOFS); 1599 GFP_NOFS);
@@ -1613,10 +1609,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1613 fs_info->btree_inode->i_mapping, GFP_NOFS); 1609 fs_info->btree_inode->i_mapping, GFP_NOFS);
1614 fs_info->do_barriers = 1; 1610 fs_info->do_barriers = 1;
1615 1611
1616 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
1617 btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
1618 btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
1619
1620 BTRFS_I(fs_info->btree_inode)->root = tree_root; 1612 BTRFS_I(fs_info->btree_inode)->root = tree_root;
1621 memset(&BTRFS_I(fs_info->btree_inode)->location, 0, 1613 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
1622 sizeof(struct btrfs_key)); 1614 sizeof(struct btrfs_key));
@@ -1674,6 +1666,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1674 goto fail_iput; 1666 goto fail_iput;
1675 } 1667 }
1676 1668
1669 features = btrfs_super_incompat_flags(disk_super);
1670 if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
1671 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
1672 btrfs_set_super_incompat_flags(disk_super, features);
1673 }
1674
1677 features = btrfs_super_compat_ro_flags(disk_super) & 1675 features = btrfs_super_compat_ro_flags(disk_super) &
1678 ~BTRFS_FEATURE_COMPAT_RO_SUPP; 1676 ~BTRFS_FEATURE_COMPAT_RO_SUPP;
1679 if (!(sb->s_flags & MS_RDONLY) && features) { 1677 if (!(sb->s_flags & MS_RDONLY) && features) {
@@ -1771,7 +1769,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1771 if (ret) { 1769 if (ret) {
1772 printk(KERN_WARNING "btrfs: failed to read the system " 1770 printk(KERN_WARNING "btrfs: failed to read the system "
1773 "array on %s\n", sb->s_id); 1771 "array on %s\n", sb->s_id);
1774 goto fail_sys_array; 1772 goto fail_sb_buffer;
1775 } 1773 }
1776 1774
1777 blocksize = btrfs_level_size(tree_root, 1775 blocksize = btrfs_level_size(tree_root,
@@ -1785,6 +1783,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1785 btrfs_super_chunk_root(disk_super), 1783 btrfs_super_chunk_root(disk_super),
1786 blocksize, generation); 1784 blocksize, generation);
1787 BUG_ON(!chunk_root->node); 1785 BUG_ON(!chunk_root->node);
1786 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
1787 chunk_root->commit_root = btrfs_root_node(chunk_root);
1788 1788
1789 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, 1789 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
1790 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), 1790 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
@@ -1810,7 +1810,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1810 blocksize, generation); 1810 blocksize, generation);
1811 if (!tree_root->node) 1811 if (!tree_root->node)
1812 goto fail_chunk_root; 1812 goto fail_chunk_root;
1813 1813 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
1814 tree_root->commit_root = btrfs_root_node(tree_root);
1814 1815
1815 ret = find_and_setup_root(tree_root, fs_info, 1816 ret = find_and_setup_root(tree_root, fs_info,
1816 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 1817 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
@@ -1820,14 +1821,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1820 1821
1821 ret = find_and_setup_root(tree_root, fs_info, 1822 ret = find_and_setup_root(tree_root, fs_info,
1822 BTRFS_DEV_TREE_OBJECTID, dev_root); 1823 BTRFS_DEV_TREE_OBJECTID, dev_root);
1823 dev_root->track_dirty = 1;
1824 if (ret) 1824 if (ret)
1825 goto fail_extent_root; 1825 goto fail_extent_root;
1826 dev_root->track_dirty = 1;
1826 1827
1827 ret = find_and_setup_root(tree_root, fs_info, 1828 ret = find_and_setup_root(tree_root, fs_info,
1828 BTRFS_CSUM_TREE_OBJECTID, csum_root); 1829 BTRFS_CSUM_TREE_OBJECTID, csum_root);
1829 if (ret) 1830 if (ret)
1830 goto fail_extent_root; 1831 goto fail_dev_root;
1831 1832
1832 csum_root->track_dirty = 1; 1833 csum_root->track_dirty = 1;
1833 1834
@@ -1881,7 +1882,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1881 } 1882 }
1882 1883
1883 if (!(sb->s_flags & MS_RDONLY)) { 1884 if (!(sb->s_flags & MS_RDONLY)) {
1884 ret = btrfs_cleanup_reloc_trees(tree_root); 1885 ret = btrfs_recover_relocation(tree_root);
1885 BUG_ON(ret); 1886 BUG_ON(ret);
1886 } 1887 }
1887 1888
@@ -1908,14 +1909,19 @@ fail_cleaner:
1908 1909
1909fail_csum_root: 1910fail_csum_root:
1910 free_extent_buffer(csum_root->node); 1911 free_extent_buffer(csum_root->node);
1912 free_extent_buffer(csum_root->commit_root);
1913fail_dev_root:
1914 free_extent_buffer(dev_root->node);
1915 free_extent_buffer(dev_root->commit_root);
1911fail_extent_root: 1916fail_extent_root:
1912 free_extent_buffer(extent_root->node); 1917 free_extent_buffer(extent_root->node);
1918 free_extent_buffer(extent_root->commit_root);
1913fail_tree_root: 1919fail_tree_root:
1914 free_extent_buffer(tree_root->node); 1920 free_extent_buffer(tree_root->node);
1921 free_extent_buffer(tree_root->commit_root);
1915fail_chunk_root: 1922fail_chunk_root:
1916 free_extent_buffer(chunk_root->node); 1923 free_extent_buffer(chunk_root->node);
1917fail_sys_array: 1924 free_extent_buffer(chunk_root->commit_root);
1918 free_extent_buffer(dev_root->node);
1919fail_sb_buffer: 1925fail_sb_buffer:
1920 btrfs_stop_workers(&fs_info->fixup_workers); 1926 btrfs_stop_workers(&fs_info->fixup_workers);
1921 btrfs_stop_workers(&fs_info->delalloc_workers); 1927 btrfs_stop_workers(&fs_info->delalloc_workers);
@@ -2173,6 +2179,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
2173 2179
2174int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2180int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2175{ 2181{
2182 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2176 radix_tree_delete(&fs_info->fs_roots_radix, 2183 radix_tree_delete(&fs_info->fs_roots_radix,
2177 (unsigned long)root->root_key.objectid); 2184 (unsigned long)root->root_key.objectid);
2178 if (root->anon_super.s_dev) { 2185 if (root->anon_super.s_dev) {
@@ -2219,10 +2226,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2219 ARRAY_SIZE(gang)); 2226 ARRAY_SIZE(gang));
2220 if (!ret) 2227 if (!ret)
2221 break; 2228 break;
2229
2230 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2222 for (i = 0; i < ret; i++) { 2231 for (i = 0; i < ret; i++) {
2223 root_objectid = gang[i]->root_key.objectid; 2232 root_objectid = gang[i]->root_key.objectid;
2224 ret = btrfs_find_dead_roots(fs_info->tree_root, 2233 ret = btrfs_find_dead_roots(fs_info->tree_root,
2225 root_objectid, gang[i]); 2234 root_objectid);
2226 BUG_ON(ret); 2235 BUG_ON(ret);
2227 btrfs_orphan_cleanup(gang[i]); 2236 btrfs_orphan_cleanup(gang[i]);
2228 } 2237 }
@@ -2278,20 +2287,16 @@ int close_ctree(struct btrfs_root *root)
2278 (unsigned long long)fs_info->total_ref_cache_size); 2287 (unsigned long long)fs_info->total_ref_cache_size);
2279 } 2288 }
2280 2289
2281 if (fs_info->extent_root->node) 2290 free_extent_buffer(fs_info->extent_root->node);
2282 free_extent_buffer(fs_info->extent_root->node); 2291 free_extent_buffer(fs_info->extent_root->commit_root);
2283 2292 free_extent_buffer(fs_info->tree_root->node);
2284 if (fs_info->tree_root->node) 2293 free_extent_buffer(fs_info->tree_root->commit_root);
2285 free_extent_buffer(fs_info->tree_root->node); 2294 free_extent_buffer(root->fs_info->chunk_root->node);
2286 2295 free_extent_buffer(root->fs_info->chunk_root->commit_root);
2287 if (root->fs_info->chunk_root->node) 2296 free_extent_buffer(root->fs_info->dev_root->node);
2288 free_extent_buffer(root->fs_info->chunk_root->node); 2297 free_extent_buffer(root->fs_info->dev_root->commit_root);
2289 2298 free_extent_buffer(root->fs_info->csum_root->node);
2290 if (root->fs_info->dev_root->node) 2299 free_extent_buffer(root->fs_info->csum_root->commit_root);
2291 free_extent_buffer(root->fs_info->dev_root->node);
2292
2293 if (root->fs_info->csum_root->node)
2294 free_extent_buffer(root->fs_info->csum_root->node);
2295 2300
2296 btrfs_free_block_groups(root->fs_info); 2301 btrfs_free_block_groups(root->fs_info);
2297 2302
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 85315d2c90de..9596b40caa4e 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -78,7 +78,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
78 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 78 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
79 key.offset = 0; 79 key.offset = 0;
80 80
81 inode = btrfs_iget(sb, &key, root, NULL); 81 inode = btrfs_iget(sb, &key, root);
82 if (IS_ERR(inode)) 82 if (IS_ERR(inode))
83 return (void *)inode; 83 return (void *)inode;
84 84
@@ -192,7 +192,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
192 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 192 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
193 key.offset = 0; 193 key.offset = 0;
194 194
195 return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); 195 return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
196} 196}
197 197
198const struct export_operations btrfs_export_ops = { 198const struct export_operations btrfs_export_ops = {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 35af93355063..a42419c276e2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -30,43 +30,33 @@
30#include "transaction.h" 30#include "transaction.h"
31#include "volumes.h" 31#include "volumes.h"
32#include "locking.h" 32#include "locking.h"
33#include "ref-cache.h"
34#include "free-space-cache.h" 33#include "free-space-cache.h"
35 34
36#define PENDING_EXTENT_INSERT 0
37#define PENDING_EXTENT_DELETE 1
38#define PENDING_BACKREF_UPDATE 2
39
40struct pending_extent_op {
41 int type;
42 u64 bytenr;
43 u64 num_bytes;
44 u64 parent;
45 u64 orig_parent;
46 u64 generation;
47 u64 orig_generation;
48 int level;
49 struct list_head list;
50 int del;
51};
52
53static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
54 struct btrfs_root *root, u64 parent,
55 u64 root_objectid, u64 ref_generation,
56 u64 owner, struct btrfs_key *ins,
57 int ref_mod);
58static int update_reserved_extents(struct btrfs_root *root, 35static int update_reserved_extents(struct btrfs_root *root,
59 u64 bytenr, u64 num, int reserve); 36 u64 bytenr, u64 num, int reserve);
60static int update_block_group(struct btrfs_trans_handle *trans, 37static int update_block_group(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root, 38 struct btrfs_root *root,
62 u64 bytenr, u64 num_bytes, int alloc, 39 u64 bytenr, u64 num_bytes, int alloc,
63 int mark_free); 40 int mark_free);
64static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
65 struct btrfs_root *root, 42 struct btrfs_root *root,
66 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 bytenr, u64 num_bytes, u64 parent,
67 u64 root_objectid, u64 ref_generation, 44 u64 root_objectid, u64 owner_objectid,
68 u64 owner_objectid, int pin, 45 u64 owner_offset, int refs_to_drop,
69 int ref_to_drop); 46 struct btrfs_delayed_extent_op *extra_op);
47static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
48 struct extent_buffer *leaf,
49 struct btrfs_extent_item *ei);
50static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
51 struct btrfs_root *root,
52 u64 parent, u64 root_objectid,
53 u64 flags, u64 owner, u64 offset,
54 struct btrfs_key *ins, int ref_mod);
55static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
56 struct btrfs_root *root,
57 u64 parent, u64 root_objectid,
58 u64 flags, struct btrfs_disk_key *key,
59 int level, struct btrfs_key *ins);
70 60
71static int do_chunk_alloc(struct btrfs_trans_handle *trans, 61static int do_chunk_alloc(struct btrfs_trans_handle *trans,
72 struct btrfs_root *extent_root, u64 alloc_bytes, 62 struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -453,196 +443,973 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
453 * maintenance. This is actually the same as #2, but with a slightly 443 * maintenance. This is actually the same as #2, but with a slightly
454 * different use case. 444 * different use case.
455 * 445 *
446 * There are two kinds of back refs. The implicit back refs is optimized
447 * for pointers in non-shared tree blocks. For a given pointer in a block,
448 * back refs of this kind provide information about the block's owner tree
449 * and the pointer's key. These information allow us to find the block by
450 * b-tree searching. The full back refs is for pointers in tree blocks not
451 * referenced by their owner trees. The location of tree block is recorded
452 * in the back refs. Actually the full back refs is generic, and can be
453 * used in all cases the implicit back refs is used. The major shortcoming
454 * of the full back refs is its overhead. Every time a tree block gets
455 * COWed, we have to update back refs entry for all pointers in it.
456 *
457 * For a newly allocated tree block, we use implicit back refs for
458 * pointers in it. This means most tree related operations only involve
459 * implicit back refs. For a tree block created in old transaction, the
460 * only way to drop a reference to it is COW it. So we can detect the
461 * event that tree block loses its owner tree's reference and do the
462 * back refs conversion.
463 *
464 * When a tree block is COW'd through a tree, there are four cases:
465 *
466 * The reference count of the block is one and the tree is the block's
467 * owner tree. Nothing to do in this case.
468 *
469 * The reference count of the block is one and the tree is not the
470 * block's owner tree. In this case, full back refs is used for pointers
471 * in the block. Remove these full back refs, add implicit back refs for
472 * every pointers in the new block.
473 *
474 * The reference count of the block is greater than one and the tree is
475 * the block's owner tree. In this case, implicit back refs is used for
476 * pointers in the block. Add full back refs for every pointers in the
477 * block, increase lower level extents' reference counts. The original
478 * implicit back refs are entailed to the new block.
479 *
480 * The reference count of the block is greater than one and the tree is
481 * not the block's owner tree. Add implicit back refs for every pointer in
482 * the new block, increase lower level extents' reference count.
483 *
484 * Back Reference Key composing:
485 *
486 * The key objectid corresponds to the first byte in the extent,
487 * The key type is used to differentiate between types of back refs.
488 * There are different meanings of the key offset for different types
489 * of back refs.
490 *
456 * File extents can be referenced by: 491 * File extents can be referenced by:
457 * 492 *
458 * - multiple snapshots, subvolumes, or different generations in one subvol 493 * - multiple snapshots, subvolumes, or different generations in one subvol
459 * - different files inside a single subvolume 494 * - different files inside a single subvolume
460 * - different offsets inside a file (bookend extents in file.c) 495 * - different offsets inside a file (bookend extents in file.c)
461 * 496 *
462 * The extent ref structure has fields for: 497 * The extent ref structure for the implicit back refs has fields for:
463 * 498 *
464 * - Objectid of the subvolume root 499 * - Objectid of the subvolume root
465 * - Generation number of the tree holding the reference
466 * - objectid of the file holding the reference 500 * - objectid of the file holding the reference
467 * - number of references holding by parent node (alway 1 for tree blocks) 501 * - original offset in the file
468 * 502 * - how many bookend extents
469 * Btree leaf may hold multiple references to a file extent. In most cases,
470 * these references are from same file and the corresponding offsets inside
471 * the file are close together.
472 *
473 * When a file extent is allocated the fields are filled in:
474 * (root_key.objectid, trans->transid, inode objectid, 1)
475 * 503 *
476 * When a leaf is cow'd new references are added for every file extent found 504 * The key offset for the implicit back refs is hash of the first
477 * in the leaf. It looks similar to the create case, but trans->transid will 505 * three fields.
478 * be different when the block is cow'd.
479 * 506 *
480 * (root_key.objectid, trans->transid, inode objectid, 507 * The extent ref structure for the full back refs has field for:
481 * number of references in the leaf)
482 * 508 *
483 * When a file extent is removed either during snapshot deletion or 509 * - number of pointers in the tree leaf
484 * file truncation, we find the corresponding back reference and check
485 * the following fields:
486 * 510 *
487 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), 511 * The key offset for the implicit back refs is the first byte of
488 * inode objectid) 512 * the tree leaf
489 * 513 *
490 * Btree extents can be referenced by: 514 * When a file extent is allocated, The implicit back refs is used.
491 * 515 * the fields are filled in:
492 * - Different subvolumes
493 * - Different generations of the same subvolume
494 *
495 * When a tree block is created, back references are inserted:
496 * 516 *
497 * (root->root_key.objectid, trans->transid, level, 1) 517 * (root_key.objectid, inode objectid, offset in file, 1)
498 * 518 *
499 * When a tree block is cow'd, new back references are added for all the 519 * When a file extent is removed file truncation, we find the
500 * blocks it points to. If the tree block isn't in reference counted root, 520 * corresponding implicit back refs and check the following fields:
501 * the old back references are removed. These new back references are of
502 * the form (trans->transid will have increased since creation):
503 * 521 *
504 * (root->root_key.objectid, trans->transid, level, 1) 522 * (btrfs_header_owner(leaf), inode objectid, offset in file)
505 * 523 *
506 * When a backref is in deleting, the following fields are checked: 524 * Btree extents can be referenced by:
507 * 525 *
508 * if backref was for a tree root: 526 * - Different subvolumes
509 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
510 * else
511 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
512 * 527 *
513 * Back Reference Key composing: 528 * Both the implicit back refs and the full back refs for tree blocks
529 * only consist of key. The key offset for the implicit back refs is
530 * objectid of block's owner tree. The key offset for the full back refs
531 * is the first byte of parent block.
514 * 532 *
515 * The key objectid corresponds to the first byte in the extent, the key 533 * When implicit back refs is used, information about the lowest key and
516 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first 534 * level of the tree block are required. These information are stored in
517 * byte of parent extent. If a extent is tree root, the key offset is set 535 * tree block info structure.
518 * to the key objectid.
519 */ 536 */
520 537
521static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans, 538#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
522 struct btrfs_root *root, 539static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
523 struct btrfs_path *path, 540 struct btrfs_root *root,
524 u64 bytenr, u64 parent, 541 struct btrfs_path *path,
525 u64 ref_root, u64 ref_generation, 542 u64 owner, u32 extra_size)
526 u64 owner_objectid, int del)
527{ 543{
544 struct btrfs_extent_item *item;
545 struct btrfs_extent_item_v0 *ei0;
546 struct btrfs_extent_ref_v0 *ref0;
547 struct btrfs_tree_block_info *bi;
548 struct extent_buffer *leaf;
528 struct btrfs_key key; 549 struct btrfs_key key;
529 struct btrfs_extent_ref *ref; 550 struct btrfs_key found_key;
551 u32 new_size = sizeof(*item);
552 u64 refs;
553 int ret;
554
555 leaf = path->nodes[0];
556 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
557
558 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
559 ei0 = btrfs_item_ptr(leaf, path->slots[0],
560 struct btrfs_extent_item_v0);
561 refs = btrfs_extent_refs_v0(leaf, ei0);
562
563 if (owner == (u64)-1) {
564 while (1) {
565 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
566 ret = btrfs_next_leaf(root, path);
567 if (ret < 0)
568 return ret;
569 BUG_ON(ret > 0);
570 leaf = path->nodes[0];
571 }
572 btrfs_item_key_to_cpu(leaf, &found_key,
573 path->slots[0]);
574 BUG_ON(key.objectid != found_key.objectid);
575 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
576 path->slots[0]++;
577 continue;
578 }
579 ref0 = btrfs_item_ptr(leaf, path->slots[0],
580 struct btrfs_extent_ref_v0);
581 owner = btrfs_ref_objectid_v0(leaf, ref0);
582 break;
583 }
584 }
585 btrfs_release_path(root, path);
586
587 if (owner < BTRFS_FIRST_FREE_OBJECTID)
588 new_size += sizeof(*bi);
589
590 new_size -= sizeof(*ei0);
591 ret = btrfs_search_slot(trans, root, &key, path,
592 new_size + extra_size, 1);
593 if (ret < 0)
594 return ret;
595 BUG_ON(ret);
596
597 ret = btrfs_extend_item(trans, root, path, new_size);
598 BUG_ON(ret);
599
600 leaf = path->nodes[0];
601 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
602 btrfs_set_extent_refs(leaf, item, refs);
603 /* FIXME: get real generation */
604 btrfs_set_extent_generation(leaf, item, 0);
605 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
606 btrfs_set_extent_flags(leaf, item,
607 BTRFS_EXTENT_FLAG_TREE_BLOCK |
608 BTRFS_BLOCK_FLAG_FULL_BACKREF);
609 bi = (struct btrfs_tree_block_info *)(item + 1);
610 /* FIXME: get first key of the block */
611 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
612 btrfs_set_tree_block_level(leaf, bi, (int)owner);
613 } else {
614 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
615 }
616 btrfs_mark_buffer_dirty(leaf);
617 return 0;
618}
619#endif
620
621static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
622{
623 u32 high_crc = ~(u32)0;
624 u32 low_crc = ~(u32)0;
625 __le64 lenum;
626
627 lenum = cpu_to_le64(root_objectid);
628 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
629 lenum = cpu_to_le64(owner);
630 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
631 lenum = cpu_to_le64(offset);
632 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
633
634 return ((u64)high_crc << 31) ^ (u64)low_crc;
635}
636
637static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
638 struct btrfs_extent_data_ref *ref)
639{
640 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
641 btrfs_extent_data_ref_objectid(leaf, ref),
642 btrfs_extent_data_ref_offset(leaf, ref));
643}
644
645static int match_extent_data_ref(struct extent_buffer *leaf,
646 struct btrfs_extent_data_ref *ref,
647 u64 root_objectid, u64 owner, u64 offset)
648{
649 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
650 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
651 btrfs_extent_data_ref_offset(leaf, ref) != offset)
652 return 0;
653 return 1;
654}
655
656static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
657 struct btrfs_root *root,
658 struct btrfs_path *path,
659 u64 bytenr, u64 parent,
660 u64 root_objectid,
661 u64 owner, u64 offset)
662{
663 struct btrfs_key key;
664 struct btrfs_extent_data_ref *ref;
530 struct extent_buffer *leaf; 665 struct extent_buffer *leaf;
531 u64 ref_objectid; 666 u32 nritems;
532 int ret; 667 int ret;
668 int recow;
669 int err = -ENOENT;
533 670
534 key.objectid = bytenr; 671 key.objectid = bytenr;
535 key.type = BTRFS_EXTENT_REF_KEY; 672 if (parent) {
536 key.offset = parent; 673 key.type = BTRFS_SHARED_DATA_REF_KEY;
674 key.offset = parent;
675 } else {
676 key.type = BTRFS_EXTENT_DATA_REF_KEY;
677 key.offset = hash_extent_data_ref(root_objectid,
678 owner, offset);
679 }
680again:
681 recow = 0;
682 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
683 if (ret < 0) {
684 err = ret;
685 goto fail;
686 }
537 687
538 ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); 688 if (parent) {
539 if (ret < 0) 689 if (!ret)
540 goto out; 690 return 0;
541 if (ret > 0) { 691#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
542 ret = -ENOENT; 692 key.type = BTRFS_EXTENT_REF_V0_KEY;
543 goto out; 693 btrfs_release_path(root, path);
694 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
695 if (ret < 0) {
696 err = ret;
697 goto fail;
698 }
699 if (!ret)
700 return 0;
701#endif
702 goto fail;
544 } 703 }
545 704
546 leaf = path->nodes[0]; 705 leaf = path->nodes[0];
547 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 706 nritems = btrfs_header_nritems(leaf);
548 ref_objectid = btrfs_ref_objectid(leaf, ref); 707 while (1) {
549 if (btrfs_ref_root(leaf, ref) != ref_root || 708 if (path->slots[0] >= nritems) {
550 btrfs_ref_generation(leaf, ref) != ref_generation || 709 ret = btrfs_next_leaf(root, path);
551 (ref_objectid != owner_objectid && 710 if (ret < 0)
552 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { 711 err = ret;
553 ret = -EIO; 712 if (ret)
554 WARN_ON(1); 713 goto fail;
555 goto out; 714
715 leaf = path->nodes[0];
716 nritems = btrfs_header_nritems(leaf);
717 recow = 1;
718 }
719
720 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
721 if (key.objectid != bytenr ||
722 key.type != BTRFS_EXTENT_DATA_REF_KEY)
723 goto fail;
724
725 ref = btrfs_item_ptr(leaf, path->slots[0],
726 struct btrfs_extent_data_ref);
727
728 if (match_extent_data_ref(leaf, ref, root_objectid,
729 owner, offset)) {
730 if (recow) {
731 btrfs_release_path(root, path);
732 goto again;
733 }
734 err = 0;
735 break;
736 }
737 path->slots[0]++;
556 } 738 }
557 ret = 0; 739fail:
558out: 740 return err;
559 return ret;
560} 741}
561 742
562static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, 743static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
563 struct btrfs_root *root, 744 struct btrfs_root *root,
564 struct btrfs_path *path, 745 struct btrfs_path *path,
565 u64 bytenr, u64 parent, 746 u64 bytenr, u64 parent,
566 u64 ref_root, u64 ref_generation, 747 u64 root_objectid, u64 owner,
567 u64 owner_objectid, 748 u64 offset, int refs_to_add)
568 int refs_to_add)
569{ 749{
570 struct btrfs_key key; 750 struct btrfs_key key;
571 struct extent_buffer *leaf; 751 struct extent_buffer *leaf;
572 struct btrfs_extent_ref *ref; 752 u32 size;
573 u32 num_refs; 753 u32 num_refs;
574 int ret; 754 int ret;
575 755
576 key.objectid = bytenr; 756 key.objectid = bytenr;
577 key.type = BTRFS_EXTENT_REF_KEY; 757 if (parent) {
578 key.offset = parent; 758 key.type = BTRFS_SHARED_DATA_REF_KEY;
759 key.offset = parent;
760 size = sizeof(struct btrfs_shared_data_ref);
761 } else {
762 key.type = BTRFS_EXTENT_DATA_REF_KEY;
763 key.offset = hash_extent_data_ref(root_objectid,
764 owner, offset);
765 size = sizeof(struct btrfs_extent_data_ref);
766 }
579 767
580 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); 768 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
581 if (ret == 0) { 769 if (ret && ret != -EEXIST)
582 leaf = path->nodes[0]; 770 goto fail;
583 ref = btrfs_item_ptr(leaf, path->slots[0], 771
584 struct btrfs_extent_ref); 772 leaf = path->nodes[0];
585 btrfs_set_ref_root(leaf, ref, ref_root); 773 if (parent) {
586 btrfs_set_ref_generation(leaf, ref, ref_generation); 774 struct btrfs_shared_data_ref *ref;
587 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
588 btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
589 } else if (ret == -EEXIST) {
590 u64 existing_owner;
591
592 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
593 leaf = path->nodes[0];
594 ref = btrfs_item_ptr(leaf, path->slots[0], 775 ref = btrfs_item_ptr(leaf, path->slots[0],
595 struct btrfs_extent_ref); 776 struct btrfs_shared_data_ref);
596 if (btrfs_ref_root(leaf, ref) != ref_root || 777 if (ret == 0) {
597 btrfs_ref_generation(leaf, ref) != ref_generation) { 778 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
598 ret = -EIO; 779 } else {
599 WARN_ON(1); 780 num_refs = btrfs_shared_data_ref_count(leaf, ref);
600 goto out; 781 num_refs += refs_to_add;
782 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
601 } 783 }
784 } else {
785 struct btrfs_extent_data_ref *ref;
786 while (ret == -EEXIST) {
787 ref = btrfs_item_ptr(leaf, path->slots[0],
788 struct btrfs_extent_data_ref);
789 if (match_extent_data_ref(leaf, ref, root_objectid,
790 owner, offset))
791 break;
792 btrfs_release_path(root, path);
793 key.offset++;
794 ret = btrfs_insert_empty_item(trans, root, path, &key,
795 size);
796 if (ret && ret != -EEXIST)
797 goto fail;
602 798
603 num_refs = btrfs_ref_num_refs(leaf, ref); 799 leaf = path->nodes[0];
604 BUG_ON(num_refs == 0); 800 }
605 btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add); 801 ref = btrfs_item_ptr(leaf, path->slots[0],
606 802 struct btrfs_extent_data_ref);
607 existing_owner = btrfs_ref_objectid(leaf, ref); 803 if (ret == 0) {
608 if (existing_owner != owner_objectid && 804 btrfs_set_extent_data_ref_root(leaf, ref,
609 existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { 805 root_objectid);
610 btrfs_set_ref_objectid(leaf, ref, 806 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
611 BTRFS_MULTIPLE_OBJECTIDS); 807 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
808 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
809 } else {
810 num_refs = btrfs_extent_data_ref_count(leaf, ref);
811 num_refs += refs_to_add;
812 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
612 } 813 }
613 ret = 0;
614 } else {
615 goto out;
616 } 814 }
617 btrfs_unlock_up_safe(path, 1); 815 btrfs_mark_buffer_dirty(leaf);
618 btrfs_mark_buffer_dirty(path->nodes[0]); 816 ret = 0;
619out: 817fail:
620 btrfs_release_path(root, path); 818 btrfs_release_path(root, path);
621 return ret; 819 return ret;
622} 820}
623 821
624static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, 822static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
625 struct btrfs_root *root, 823 struct btrfs_root *root,
626 struct btrfs_path *path, 824 struct btrfs_path *path,
627 int refs_to_drop) 825 int refs_to_drop)
628{ 826{
827 struct btrfs_key key;
828 struct btrfs_extent_data_ref *ref1 = NULL;
829 struct btrfs_shared_data_ref *ref2 = NULL;
629 struct extent_buffer *leaf; 830 struct extent_buffer *leaf;
630 struct btrfs_extent_ref *ref; 831 u32 num_refs = 0;
631 u32 num_refs;
632 int ret = 0; 832 int ret = 0;
633 833
634 leaf = path->nodes[0]; 834 leaf = path->nodes[0];
635 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 835 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
636 num_refs = btrfs_ref_num_refs(leaf, ref); 836
837 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
838 ref1 = btrfs_item_ptr(leaf, path->slots[0],
839 struct btrfs_extent_data_ref);
840 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
841 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
842 ref2 = btrfs_item_ptr(leaf, path->slots[0],
843 struct btrfs_shared_data_ref);
844 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
845#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
846 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
847 struct btrfs_extent_ref_v0 *ref0;
848 ref0 = btrfs_item_ptr(leaf, path->slots[0],
849 struct btrfs_extent_ref_v0);
850 num_refs = btrfs_ref_count_v0(leaf, ref0);
851#endif
852 } else {
853 BUG();
854 }
855
637 BUG_ON(num_refs < refs_to_drop); 856 BUG_ON(num_refs < refs_to_drop);
638 num_refs -= refs_to_drop; 857 num_refs -= refs_to_drop;
858
639 if (num_refs == 0) { 859 if (num_refs == 0) {
640 ret = btrfs_del_item(trans, root, path); 860 ret = btrfs_del_item(trans, root, path);
641 } else { 861 } else {
642 btrfs_set_ref_num_refs(leaf, ref, num_refs); 862 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
863 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
864 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
865 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
866#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
867 else {
868 struct btrfs_extent_ref_v0 *ref0;
869 ref0 = btrfs_item_ptr(leaf, path->slots[0],
870 struct btrfs_extent_ref_v0);
871 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
872 }
873#endif
643 btrfs_mark_buffer_dirty(leaf); 874 btrfs_mark_buffer_dirty(leaf);
644 } 875 }
876 return ret;
877}
878
879static noinline u32 extent_data_ref_count(struct btrfs_root *root,
880 struct btrfs_path *path,
881 struct btrfs_extent_inline_ref *iref)
882{
883 struct btrfs_key key;
884 struct extent_buffer *leaf;
885 struct btrfs_extent_data_ref *ref1;
886 struct btrfs_shared_data_ref *ref2;
887 u32 num_refs = 0;
888
889 leaf = path->nodes[0];
890 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
891 if (iref) {
892 if (btrfs_extent_inline_ref_type(leaf, iref) ==
893 BTRFS_EXTENT_DATA_REF_KEY) {
894 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
895 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
896 } else {
897 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
898 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
899 }
900 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
901 ref1 = btrfs_item_ptr(leaf, path->slots[0],
902 struct btrfs_extent_data_ref);
903 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
904 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
905 ref2 = btrfs_item_ptr(leaf, path->slots[0],
906 struct btrfs_shared_data_ref);
907 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
908#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
909 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
910 struct btrfs_extent_ref_v0 *ref0;
911 ref0 = btrfs_item_ptr(leaf, path->slots[0],
912 struct btrfs_extent_ref_v0);
913 num_refs = btrfs_ref_count_v0(leaf, ref0);
914#endif
915 } else {
916 WARN_ON(1);
917 }
918 return num_refs;
919}
920
921static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
922 struct btrfs_root *root,
923 struct btrfs_path *path,
924 u64 bytenr, u64 parent,
925 u64 root_objectid)
926{
927 struct btrfs_key key;
928 int ret;
929
930 key.objectid = bytenr;
931 if (parent) {
932 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
933 key.offset = parent;
934 } else {
935 key.type = BTRFS_TREE_BLOCK_REF_KEY;
936 key.offset = root_objectid;
937 }
938
939 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
940 if (ret > 0)
941 ret = -ENOENT;
942#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
943 if (ret == -ENOENT && parent) {
944 btrfs_release_path(root, path);
945 key.type = BTRFS_EXTENT_REF_V0_KEY;
946 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
947 if (ret > 0)
948 ret = -ENOENT;
949 }
950#endif
951 return ret;
952}
953
954static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
955 struct btrfs_root *root,
956 struct btrfs_path *path,
957 u64 bytenr, u64 parent,
958 u64 root_objectid)
959{
960 struct btrfs_key key;
961 int ret;
962
963 key.objectid = bytenr;
964 if (parent) {
965 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
966 key.offset = parent;
967 } else {
968 key.type = BTRFS_TREE_BLOCK_REF_KEY;
969 key.offset = root_objectid;
970 }
971
972 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
973 btrfs_release_path(root, path);
974 return ret;
975}
976
977static inline int extent_ref_type(u64 parent, u64 owner)
978{
979 int type;
980 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
981 if (parent > 0)
982 type = BTRFS_SHARED_BLOCK_REF_KEY;
983 else
984 type = BTRFS_TREE_BLOCK_REF_KEY;
985 } else {
986 if (parent > 0)
987 type = BTRFS_SHARED_DATA_REF_KEY;
988 else
989 type = BTRFS_EXTENT_DATA_REF_KEY;
990 }
991 return type;
992}
993
994static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
995
996{
997 int level;
998 BUG_ON(!path->keep_locks);
999 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
1000 if (!path->nodes[level])
1001 break;
1002 btrfs_assert_tree_locked(path->nodes[level]);
1003 if (path->slots[level] + 1 >=
1004 btrfs_header_nritems(path->nodes[level]))
1005 continue;
1006 if (level == 0)
1007 btrfs_item_key_to_cpu(path->nodes[level], key,
1008 path->slots[level] + 1);
1009 else
1010 btrfs_node_key_to_cpu(path->nodes[level], key,
1011 path->slots[level] + 1);
1012 return 0;
1013 }
1014 return 1;
1015}
1016
1017/*
1018 * look for inline back ref. if back ref is found, *ref_ret is set
1019 * to the address of inline back ref, and 0 is returned.
1020 *
1021 * if back ref isn't found, *ref_ret is set to the address where it
1022 * should be inserted, and -ENOENT is returned.
1023 *
1024 * if insert is true and there are too many inline back refs, the path
1025 * points to the extent item, and -EAGAIN is returned.
1026 *
1027 * NOTE: inline back refs are ordered in the same way that back ref
1028 * items in the tree are ordered.
1029 */
1030static noinline_for_stack
1031int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1032 struct btrfs_root *root,
1033 struct btrfs_path *path,
1034 struct btrfs_extent_inline_ref **ref_ret,
1035 u64 bytenr, u64 num_bytes,
1036 u64 parent, u64 root_objectid,
1037 u64 owner, u64 offset, int insert)
1038{
1039 struct btrfs_key key;
1040 struct extent_buffer *leaf;
1041 struct btrfs_extent_item *ei;
1042 struct btrfs_extent_inline_ref *iref;
1043 u64 flags;
1044 u64 item_size;
1045 unsigned long ptr;
1046 unsigned long end;
1047 int extra_size;
1048 int type;
1049 int want;
1050 int ret;
1051 int err = 0;
1052
1053 key.objectid = bytenr;
1054 key.type = BTRFS_EXTENT_ITEM_KEY;
1055 key.offset = num_bytes;
1056
1057 want = extent_ref_type(parent, owner);
1058 if (insert) {
1059 extra_size = btrfs_extent_inline_ref_size(want);
1060 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
1061 path->keep_locks = 1;
1062 } else
1063 extra_size = -1;
1064 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1065 if (ret < 0) {
1066 err = ret;
1067 goto out;
1068 }
1069 BUG_ON(ret);
1070
1071 leaf = path->nodes[0];
1072 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1073#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1074 if (item_size < sizeof(*ei)) {
1075 if (!insert) {
1076 err = -ENOENT;
1077 goto out;
1078 }
1079 ret = convert_extent_item_v0(trans, root, path, owner,
1080 extra_size);
1081 if (ret < 0) {
1082 err = ret;
1083 goto out;
1084 }
1085 leaf = path->nodes[0];
1086 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1087 }
1088#endif
1089 BUG_ON(item_size < sizeof(*ei));
1090
1091 if (owner < BTRFS_FIRST_FREE_OBJECTID && insert &&
1092 item_size + extra_size >= BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1093 err = -EAGAIN;
1094 goto out;
1095 }
1096
1097 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1098 flags = btrfs_extent_flags(leaf, ei);
1099
1100 ptr = (unsigned long)(ei + 1);
1101 end = (unsigned long)ei + item_size;
1102
1103 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1104 ptr += sizeof(struct btrfs_tree_block_info);
1105 BUG_ON(ptr > end);
1106 } else {
1107 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1108 }
1109
1110 err = -ENOENT;
1111 while (1) {
1112 if (ptr >= end) {
1113 WARN_ON(ptr > end);
1114 break;
1115 }
1116 iref = (struct btrfs_extent_inline_ref *)ptr;
1117 type = btrfs_extent_inline_ref_type(leaf, iref);
1118 if (want < type)
1119 break;
1120 if (want > type) {
1121 ptr += btrfs_extent_inline_ref_size(type);
1122 continue;
1123 }
1124
1125 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1126 struct btrfs_extent_data_ref *dref;
1127 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1128 if (match_extent_data_ref(leaf, dref, root_objectid,
1129 owner, offset)) {
1130 err = 0;
1131 break;
1132 }
1133 if (hash_extent_data_ref_item(leaf, dref) <
1134 hash_extent_data_ref(root_objectid, owner, offset))
1135 break;
1136 } else {
1137 u64 ref_offset;
1138 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1139 if (parent > 0) {
1140 if (parent == ref_offset) {
1141 err = 0;
1142 break;
1143 }
1144 if (ref_offset < parent)
1145 break;
1146 } else {
1147 if (root_objectid == ref_offset) {
1148 err = 0;
1149 break;
1150 }
1151 if (ref_offset < root_objectid)
1152 break;
1153 }
1154 }
1155 ptr += btrfs_extent_inline_ref_size(type);
1156 }
1157 if (err == -ENOENT && insert) {
1158 if (item_size + extra_size >=
1159 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1160 err = -EAGAIN;
1161 goto out;
1162 }
1163 /*
1164 * To add new inline back ref, we have to make sure
1165 * there is no corresponding back ref item.
1166 * For simplicity, we just do not add new inline back
1167 * ref if there is any kind of item for this block
1168 */
1169 if (owner >= BTRFS_FIRST_FREE_OBJECTID &&
1170 find_next_key(path, &key) == 0 && key.objectid == bytenr) {
1171 err = -EAGAIN;
1172 goto out;
1173 }
1174 }
1175 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1176out:
1177 if (insert && owner >= BTRFS_FIRST_FREE_OBJECTID) {
1178 path->keep_locks = 0;
1179 btrfs_unlock_up_safe(path, 1);
1180 }
1181 return err;
1182}
1183
1184/*
1185 * helper to add new inline back ref
1186 */
1187static noinline_for_stack
1188int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1189 struct btrfs_root *root,
1190 struct btrfs_path *path,
1191 struct btrfs_extent_inline_ref *iref,
1192 u64 parent, u64 root_objectid,
1193 u64 owner, u64 offset, int refs_to_add,
1194 struct btrfs_delayed_extent_op *extent_op)
1195{
1196 struct extent_buffer *leaf;
1197 struct btrfs_extent_item *ei;
1198 unsigned long ptr;
1199 unsigned long end;
1200 unsigned long item_offset;
1201 u64 refs;
1202 int size;
1203 int type;
1204 int ret;
1205
1206 leaf = path->nodes[0];
1207 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1208 item_offset = (unsigned long)iref - (unsigned long)ei;
1209
1210 type = extent_ref_type(parent, owner);
1211 size = btrfs_extent_inline_ref_size(type);
1212
1213 ret = btrfs_extend_item(trans, root, path, size);
1214 BUG_ON(ret);
1215
1216 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1217 refs = btrfs_extent_refs(leaf, ei);
1218 refs += refs_to_add;
1219 btrfs_set_extent_refs(leaf, ei, refs);
1220 if (extent_op)
1221 __run_delayed_extent_op(extent_op, leaf, ei);
1222
1223 ptr = (unsigned long)ei + item_offset;
1224 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1225 if (ptr < end - size)
1226 memmove_extent_buffer(leaf, ptr + size, ptr,
1227 end - size - ptr);
1228
1229 iref = (struct btrfs_extent_inline_ref *)ptr;
1230 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1231 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1232 struct btrfs_extent_data_ref *dref;
1233 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1234 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1235 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1236 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1237 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1238 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1239 struct btrfs_shared_data_ref *sref;
1240 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1241 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1242 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1243 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1244 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1245 } else {
1246 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1247 }
1248 btrfs_mark_buffer_dirty(leaf);
1249 return 0;
1250}
1251
1252static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1253 struct btrfs_root *root,
1254 struct btrfs_path *path,
1255 struct btrfs_extent_inline_ref **ref_ret,
1256 u64 bytenr, u64 num_bytes, u64 parent,
1257 u64 root_objectid, u64 owner, u64 offset)
1258{
1259 int ret;
1260
1261 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1262 bytenr, num_bytes, parent,
1263 root_objectid, owner, offset, 0);
1264 if (ret != -ENOENT)
1265 return ret;
1266
645 btrfs_release_path(root, path); 1267 btrfs_release_path(root, path);
1268 *ref_ret = NULL;
1269
1270 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1271 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1272 root_objectid);
1273 } else {
1274 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1275 root_objectid, owner, offset);
1276 }
1277 return ret;
1278}
1279
1280/*
1281 * helper to update/remove inline back ref
1282 */
1283static noinline_for_stack
1284int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1285 struct btrfs_root *root,
1286 struct btrfs_path *path,
1287 struct btrfs_extent_inline_ref *iref,
1288 int refs_to_mod,
1289 struct btrfs_delayed_extent_op *extent_op)
1290{
1291 struct extent_buffer *leaf;
1292 struct btrfs_extent_item *ei;
1293 struct btrfs_extent_data_ref *dref = NULL;
1294 struct btrfs_shared_data_ref *sref = NULL;
1295 unsigned long ptr;
1296 unsigned long end;
1297 u32 item_size;
1298 int size;
1299 int type;
1300 int ret;
1301 u64 refs;
1302
1303 leaf = path->nodes[0];
1304 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1305 refs = btrfs_extent_refs(leaf, ei);
1306 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1307 refs += refs_to_mod;
1308 btrfs_set_extent_refs(leaf, ei, refs);
1309 if (extent_op)
1310 __run_delayed_extent_op(extent_op, leaf, ei);
1311
1312 type = btrfs_extent_inline_ref_type(leaf, iref);
1313
1314 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1315 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1316 refs = btrfs_extent_data_ref_count(leaf, dref);
1317 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1318 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1319 refs = btrfs_shared_data_ref_count(leaf, sref);
1320 } else {
1321 refs = 1;
1322 BUG_ON(refs_to_mod != -1);
1323 }
1324
1325 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1326 refs += refs_to_mod;
1327
1328 if (refs > 0) {
1329 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1330 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1331 else
1332 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1333 } else {
1334 size = btrfs_extent_inline_ref_size(type);
1335 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1336 ptr = (unsigned long)iref;
1337 end = (unsigned long)ei + item_size;
1338 if (ptr + size < end)
1339 memmove_extent_buffer(leaf, ptr, ptr + size,
1340 end - ptr - size);
1341 item_size -= size;
1342 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1343 BUG_ON(ret);
1344 }
1345 btrfs_mark_buffer_dirty(leaf);
1346 return 0;
1347}
1348
1349static noinline_for_stack
1350int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1351 struct btrfs_root *root,
1352 struct btrfs_path *path,
1353 u64 bytenr, u64 num_bytes, u64 parent,
1354 u64 root_objectid, u64 owner,
1355 u64 offset, int refs_to_add,
1356 struct btrfs_delayed_extent_op *extent_op)
1357{
1358 struct btrfs_extent_inline_ref *iref;
1359 int ret;
1360
1361 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1362 bytenr, num_bytes, parent,
1363 root_objectid, owner, offset, 1);
1364 if (ret == 0) {
1365 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1366 ret = update_inline_extent_backref(trans, root, path, iref,
1367 refs_to_add, extent_op);
1368 } else if (ret == -ENOENT) {
1369 ret = setup_inline_extent_backref(trans, root, path, iref,
1370 parent, root_objectid,
1371 owner, offset, refs_to_add,
1372 extent_op);
1373 }
1374 return ret;
1375}
1376
1377static int insert_extent_backref(struct btrfs_trans_handle *trans,
1378 struct btrfs_root *root,
1379 struct btrfs_path *path,
1380 u64 bytenr, u64 parent, u64 root_objectid,
1381 u64 owner, u64 offset, int refs_to_add)
1382{
1383 int ret;
1384 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1385 BUG_ON(refs_to_add != 1);
1386 ret = insert_tree_block_ref(trans, root, path, bytenr,
1387 parent, root_objectid);
1388 } else {
1389 ret = insert_extent_data_ref(trans, root, path, bytenr,
1390 parent, root_objectid,
1391 owner, offset, refs_to_add);
1392 }
1393 return ret;
1394}
1395
1396static int remove_extent_backref(struct btrfs_trans_handle *trans,
1397 struct btrfs_root *root,
1398 struct btrfs_path *path,
1399 struct btrfs_extent_inline_ref *iref,
1400 int refs_to_drop, int is_data)
1401{
1402 int ret;
1403
1404 BUG_ON(!is_data && refs_to_drop != 1);
1405 if (iref) {
1406 ret = update_inline_extent_backref(trans, root, path, iref,
1407 -refs_to_drop, NULL);
1408 } else if (is_data) {
1409 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1410 } else {
1411 ret = btrfs_del_item(trans, root, path);
1412 }
646 return ret; 1413 return ret;
647} 1414}
648 1415
@@ -686,71 +1453,40 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
686#endif 1453#endif
687} 1454}
688 1455
689static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1456int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
690 struct btrfs_root *root, u64 bytenr, 1457 struct btrfs_root *root,
691 u64 num_bytes, 1458 u64 bytenr, u64 num_bytes, u64 parent,
692 u64 orig_parent, u64 parent, 1459 u64 root_objectid, u64 owner, u64 offset)
693 u64 orig_root, u64 ref_root,
694 u64 orig_generation, u64 ref_generation,
695 u64 owner_objectid)
696{ 1460{
697 int ret; 1461 int ret;
698 int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID; 1462 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1463 root_objectid == BTRFS_TREE_LOG_OBJECTID);
699 1464
700 ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes, 1465 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
701 orig_parent, parent, orig_root, 1466 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
702 ref_root, orig_generation, 1467 parent, root_objectid, (int)owner,
703 ref_generation, owner_objectid, pin); 1468 BTRFS_ADD_DELAYED_REF, NULL);
704 BUG_ON(ret); 1469 } else {
1470 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
1471 parent, root_objectid, owner, offset,
1472 BTRFS_ADD_DELAYED_REF, NULL);
1473 }
705 return ret; 1474 return ret;
706} 1475}
707 1476
708int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
709 struct btrfs_root *root, u64 bytenr,
710 u64 num_bytes, u64 orig_parent, u64 parent,
711 u64 ref_root, u64 ref_generation,
712 u64 owner_objectid)
713{
714 int ret;
715 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
716 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
717 return 0;
718
719 ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
720 orig_parent, parent, ref_root,
721 ref_root, ref_generation,
722 ref_generation, owner_objectid);
723 return ret;
724}
725static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1477static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
726 struct btrfs_root *root, u64 bytenr, 1478 struct btrfs_root *root,
727 u64 num_bytes, 1479 u64 bytenr, u64 num_bytes,
728 u64 orig_parent, u64 parent, 1480 u64 parent, u64 root_objectid,
729 u64 orig_root, u64 ref_root, 1481 u64 owner, u64 offset, int refs_to_add,
730 u64 orig_generation, u64 ref_generation, 1482 struct btrfs_delayed_extent_op *extent_op)
731 u64 owner_objectid)
732{
733 int ret;
734
735 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
736 ref_generation, owner_objectid,
737 BTRFS_ADD_DELAYED_REF, 0);
738 BUG_ON(ret);
739 return ret;
740}
741
742static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
743 struct btrfs_root *root, u64 bytenr,
744 u64 num_bytes, u64 parent, u64 ref_root,
745 u64 ref_generation, u64 owner_objectid,
746 int refs_to_add)
747{ 1483{
748 struct btrfs_path *path; 1484 struct btrfs_path *path;
749 int ret; 1485 struct extent_buffer *leaf;
750 struct btrfs_key key;
751 struct extent_buffer *l;
752 struct btrfs_extent_item *item; 1486 struct btrfs_extent_item *item;
753 u32 refs; 1487 u64 refs;
1488 int ret;
1489 int err = 0;
754 1490
755 path = btrfs_alloc_path(); 1491 path = btrfs_alloc_path();
756 if (!path) 1492 if (!path)
@@ -758,43 +1494,27 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
758 1494
759 path->reada = 1; 1495 path->reada = 1;
760 path->leave_spinning = 1; 1496 path->leave_spinning = 1;
761 key.objectid = bytenr; 1497 /* this will setup the path even if it fails to insert the back ref */
762 key.type = BTRFS_EXTENT_ITEM_KEY; 1498 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
763 key.offset = num_bytes; 1499 path, bytenr, num_bytes, parent,
764 1500 root_objectid, owner, offset,
765 /* first find the extent item and update its reference count */ 1501 refs_to_add, extent_op);
766 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1502 if (ret == 0)
767 path, 0, 1); 1503 goto out;
768 if (ret < 0) {
769 btrfs_set_path_blocking(path);
770 return ret;
771 }
772
773 if (ret > 0) {
774 WARN_ON(1);
775 btrfs_free_path(path);
776 return -EIO;
777 }
778 l = path->nodes[0];
779 1504
780 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 1505 if (ret != -EAGAIN) {
781 if (key.objectid != bytenr) { 1506 err = ret;
782 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); 1507 goto out;
783 printk(KERN_ERR "btrfs wanted %llu found %llu\n",
784 (unsigned long long)bytenr,
785 (unsigned long long)key.objectid);
786 BUG();
787 } 1508 }
788 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
789 1509
790 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 1510 leaf = path->nodes[0];
791 1511 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
792 refs = btrfs_extent_refs(l, item); 1512 refs = btrfs_extent_refs(leaf, item);
793 btrfs_set_extent_refs(l, item, refs + refs_to_add); 1513 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
794 btrfs_unlock_up_safe(path, 1); 1514 if (extent_op)
795 1515 __run_delayed_extent_op(extent_op, leaf, item);
796 btrfs_mark_buffer_dirty(path->nodes[0]);
797 1516
1517 btrfs_mark_buffer_dirty(leaf);
798 btrfs_release_path(root->fs_info->extent_root, path); 1518 btrfs_release_path(root->fs_info->extent_root, path);
799 1519
800 path->reada = 1; 1520 path->reada = 1;
@@ -802,56 +1522,197 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
802 1522
803 /* now insert the actual backref */ 1523 /* now insert the actual backref */
804 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1524 ret = insert_extent_backref(trans, root->fs_info->extent_root,
805 path, bytenr, parent, 1525 path, bytenr, parent, root_objectid,
806 ref_root, ref_generation, 1526 owner, offset, refs_to_add);
807 owner_objectid, refs_to_add);
808 BUG_ON(ret); 1527 BUG_ON(ret);
1528out:
809 btrfs_free_path(path); 1529 btrfs_free_path(path);
810 return 0; 1530 return err;
811} 1531}
812 1532
813int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1533static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
814 struct btrfs_root *root, 1534 struct btrfs_root *root,
815 u64 bytenr, u64 num_bytes, u64 parent, 1535 struct btrfs_delayed_ref_node *node,
816 u64 ref_root, u64 ref_generation, 1536 struct btrfs_delayed_extent_op *extent_op,
817 u64 owner_objectid) 1537 int insert_reserved)
818{ 1538{
819 int ret; 1539 int ret = 0;
820 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 1540 struct btrfs_delayed_data_ref *ref;
821 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 1541 struct btrfs_key ins;
822 return 0; 1542 u64 parent = 0;
1543 u64 ref_root = 0;
1544 u64 flags = 0;
1545
1546 ins.objectid = node->bytenr;
1547 ins.offset = node->num_bytes;
1548 ins.type = BTRFS_EXTENT_ITEM_KEY;
1549
1550 ref = btrfs_delayed_node_to_data_ref(node);
1551 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1552 parent = ref->parent;
1553 else
1554 ref_root = ref->root;
823 1555
824 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent, 1556 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
825 0, ref_root, 0, ref_generation, 1557 if (extent_op) {
826 owner_objectid); 1558 BUG_ON(extent_op->update_key);
1559 flags |= extent_op->flags_to_set;
1560 }
1561 ret = alloc_reserved_file_extent(trans, root,
1562 parent, ref_root, flags,
1563 ref->objectid, ref->offset,
1564 &ins, node->ref_mod);
1565 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1566 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1567 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1568 node->num_bytes, parent,
1569 ref_root, ref->objectid,
1570 ref->offset, node->ref_mod,
1571 extent_op);
1572 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1573 ret = __btrfs_free_extent(trans, root, node->bytenr,
1574 node->num_bytes, parent,
1575 ref_root, ref->objectid,
1576 ref->offset, node->ref_mod,
1577 extent_op);
1578 } else {
1579 BUG();
1580 }
827 return ret; 1581 return ret;
828} 1582}
829 1583
830static int drop_delayed_ref(struct btrfs_trans_handle *trans, 1584static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
831 struct btrfs_root *root, 1585 struct extent_buffer *leaf,
832 struct btrfs_delayed_ref_node *node) 1586 struct btrfs_extent_item *ei)
1587{
1588 u64 flags = btrfs_extent_flags(leaf, ei);
1589 if (extent_op->update_flags) {
1590 flags |= extent_op->flags_to_set;
1591 btrfs_set_extent_flags(leaf, ei, flags);
1592 }
1593
1594 if (extent_op->update_key) {
1595 struct btrfs_tree_block_info *bi;
1596 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1597 bi = (struct btrfs_tree_block_info *)(ei + 1);
1598 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1599 }
1600}
1601
1602static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1603 struct btrfs_root *root,
1604 struct btrfs_delayed_ref_node *node,
1605 struct btrfs_delayed_extent_op *extent_op)
1606{
1607 struct btrfs_key key;
1608 struct btrfs_path *path;
1609 struct btrfs_extent_item *ei;
1610 struct extent_buffer *leaf;
1611 u32 item_size;
1612 int ret;
1613 int err = 0;
1614
1615 path = btrfs_alloc_path();
1616 if (!path)
1617 return -ENOMEM;
1618
1619 key.objectid = node->bytenr;
1620 key.type = BTRFS_EXTENT_ITEM_KEY;
1621 key.offset = node->num_bytes;
1622
1623 path->reada = 1;
1624 path->leave_spinning = 1;
1625 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
1626 path, 0, 1);
1627 if (ret < 0) {
1628 err = ret;
1629 goto out;
1630 }
1631 if (ret > 0) {
1632 err = -EIO;
1633 goto out;
1634 }
1635
1636 leaf = path->nodes[0];
1637 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1638#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1639 if (item_size < sizeof(*ei)) {
1640 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1641 path, (u64)-1, 0);
1642 if (ret < 0) {
1643 err = ret;
1644 goto out;
1645 }
1646 leaf = path->nodes[0];
1647 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1648 }
1649#endif
1650 BUG_ON(item_size < sizeof(*ei));
1651 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1652 __run_delayed_extent_op(extent_op, leaf, ei);
1653
1654 btrfs_mark_buffer_dirty(leaf);
1655out:
1656 btrfs_free_path(path);
1657 return err;
1658}
1659
1660static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1661 struct btrfs_root *root,
1662 struct btrfs_delayed_ref_node *node,
1663 struct btrfs_delayed_extent_op *extent_op,
1664 int insert_reserved)
833{ 1665{
834 int ret = 0; 1666 int ret = 0;
835 struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node); 1667 struct btrfs_delayed_tree_ref *ref;
1668 struct btrfs_key ins;
1669 u64 parent = 0;
1670 u64 ref_root = 0;
836 1671
837 BUG_ON(node->ref_mod == 0); 1672 ins.objectid = node->bytenr;
838 ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, 1673 ins.offset = node->num_bytes;
839 node->parent, ref->root, ref->generation, 1674 ins.type = BTRFS_EXTENT_ITEM_KEY;
840 ref->owner_objectid, ref->pin, node->ref_mod);
841 1675
1676 ref = btrfs_delayed_node_to_tree_ref(node);
1677 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1678 parent = ref->parent;
1679 else
1680 ref_root = ref->root;
1681
1682 BUG_ON(node->ref_mod != 1);
1683 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1684 BUG_ON(!extent_op || !extent_op->update_flags ||
1685 !extent_op->update_key);
1686 ret = alloc_reserved_tree_block(trans, root,
1687 parent, ref_root,
1688 extent_op->flags_to_set,
1689 &extent_op->key,
1690 ref->level, &ins);
1691 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1692 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1693 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1694 node->num_bytes, parent, ref_root,
1695 ref->level, 0, 1, extent_op);
1696 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1697 ret = __btrfs_free_extent(trans, root, node->bytenr,
1698 node->num_bytes, parent, ref_root,
1699 ref->level, 0, 1, extent_op);
1700 } else {
1701 BUG();
1702 }
842 return ret; 1703 return ret;
843} 1704}
844 1705
1706
845/* helper function to actually process a single delayed ref entry */ 1707/* helper function to actually process a single delayed ref entry */
846static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1708static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
847 struct btrfs_root *root, 1709 struct btrfs_root *root,
848 struct btrfs_delayed_ref_node *node, 1710 struct btrfs_delayed_ref_node *node,
849 int insert_reserved) 1711 struct btrfs_delayed_extent_op *extent_op,
1712 int insert_reserved)
850{ 1713{
851 int ret; 1714 int ret;
852 struct btrfs_delayed_ref *ref; 1715 if (btrfs_delayed_ref_is_head(node)) {
853
854 if (node->parent == (u64)-1) {
855 struct btrfs_delayed_ref_head *head; 1716 struct btrfs_delayed_ref_head *head;
856 /* 1717 /*
857 * we've hit the end of the chain and we were supposed 1718 * we've hit the end of the chain and we were supposed
@@ -859,44 +1720,35 @@ static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
859 * deleted before we ever needed to insert it, so all 1720 * deleted before we ever needed to insert it, so all
860 * we have to do is clean up the accounting 1721 * we have to do is clean up the accounting
861 */ 1722 */
1723 BUG_ON(extent_op);
1724 head = btrfs_delayed_node_to_head(node);
862 if (insert_reserved) { 1725 if (insert_reserved) {
1726 if (head->is_data) {
1727 ret = btrfs_del_csums(trans, root,
1728 node->bytenr,
1729 node->num_bytes);
1730 BUG_ON(ret);
1731 }
1732 btrfs_update_pinned_extents(root, node->bytenr,
1733 node->num_bytes, 1);
863 update_reserved_extents(root, node->bytenr, 1734 update_reserved_extents(root, node->bytenr,
864 node->num_bytes, 0); 1735 node->num_bytes, 0);
865 } 1736 }
866 head = btrfs_delayed_node_to_head(node);
867 mutex_unlock(&head->mutex); 1737 mutex_unlock(&head->mutex);
868 return 0; 1738 return 0;
869 } 1739 }
870 1740
871 ref = btrfs_delayed_node_to_ref(node); 1741 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
872 if (ref->action == BTRFS_ADD_DELAYED_REF) { 1742 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
873 if (insert_reserved) { 1743 ret = run_delayed_tree_ref(trans, root, node, extent_op,
874 struct btrfs_key ins; 1744 insert_reserved);
875 1745 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
876 ins.objectid = node->bytenr; 1746 node->type == BTRFS_SHARED_DATA_REF_KEY)
877 ins.offset = node->num_bytes; 1747 ret = run_delayed_data_ref(trans, root, node, extent_op,
878 ins.type = BTRFS_EXTENT_ITEM_KEY; 1748 insert_reserved);
879 1749 else
880 /* record the full extent allocation */ 1750 BUG();
881 ret = __btrfs_alloc_reserved_extent(trans, root, 1751 return ret;
882 node->parent, ref->root,
883 ref->generation, ref->owner_objectid,
884 &ins, node->ref_mod);
885 update_reserved_extents(root, node->bytenr,
886 node->num_bytes, 0);
887 } else {
888 /* just add one backref */
889 ret = add_extent_ref(trans, root, node->bytenr,
890 node->num_bytes,
891 node->parent, ref->root, ref->generation,
892 ref->owner_objectid, node->ref_mod);
893 }
894 BUG_ON(ret);
895 } else if (ref->action == BTRFS_DROP_DELAYED_REF) {
896 WARN_ON(insert_reserved);
897 ret = drop_delayed_ref(trans, root, node);
898 }
899 return 0;
900} 1752}
901 1753
902static noinline struct btrfs_delayed_ref_node * 1754static noinline struct btrfs_delayed_ref_node *
@@ -919,7 +1771,7 @@ again:
919 rb_node); 1771 rb_node);
920 if (ref->bytenr != head->node.bytenr) 1772 if (ref->bytenr != head->node.bytenr)
921 break; 1773 break;
922 if (btrfs_delayed_node_to_ref(ref)->action == action) 1774 if (ref->action == action)
923 return ref; 1775 return ref;
924 node = rb_prev(node); 1776 node = rb_prev(node);
925 } 1777 }
@@ -937,6 +1789,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
937 struct btrfs_delayed_ref_root *delayed_refs; 1789 struct btrfs_delayed_ref_root *delayed_refs;
938 struct btrfs_delayed_ref_node *ref; 1790 struct btrfs_delayed_ref_node *ref;
939 struct btrfs_delayed_ref_head *locked_ref = NULL; 1791 struct btrfs_delayed_ref_head *locked_ref = NULL;
1792 struct btrfs_delayed_extent_op *extent_op;
940 int ret; 1793 int ret;
941 int count = 0; 1794 int count = 0;
942 int must_insert_reserved = 0; 1795 int must_insert_reserved = 0;
@@ -975,6 +1828,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
975 must_insert_reserved = locked_ref->must_insert_reserved; 1828 must_insert_reserved = locked_ref->must_insert_reserved;
976 locked_ref->must_insert_reserved = 0; 1829 locked_ref->must_insert_reserved = 0;
977 1830
1831 extent_op = locked_ref->extent_op;
1832 locked_ref->extent_op = NULL;
1833
978 /* 1834 /*
979 * locked_ref is the head node, so we have to go one 1835 * locked_ref is the head node, so we have to go one
980 * node back for any delayed ref updates 1836 * node back for any delayed ref updates
@@ -986,6 +1842,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
986 * so that any accounting fixes can happen 1842 * so that any accounting fixes can happen
987 */ 1843 */
988 ref = &locked_ref->node; 1844 ref = &locked_ref->node;
1845
1846 if (extent_op && must_insert_reserved) {
1847 kfree(extent_op);
1848 extent_op = NULL;
1849 }
1850
1851 if (extent_op) {
1852 spin_unlock(&delayed_refs->lock);
1853
1854 ret = run_delayed_extent_op(trans, root,
1855 ref, extent_op);
1856 BUG_ON(ret);
1857 kfree(extent_op);
1858
1859 cond_resched();
1860 spin_lock(&delayed_refs->lock);
1861 continue;
1862 }
1863
989 list_del_init(&locked_ref->cluster); 1864 list_del_init(&locked_ref->cluster);
990 locked_ref = NULL; 1865 locked_ref = NULL;
991 } 1866 }
@@ -993,14 +1868,17 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
993 ref->in_tree = 0; 1868 ref->in_tree = 0;
994 rb_erase(&ref->rb_node, &delayed_refs->root); 1869 rb_erase(&ref->rb_node, &delayed_refs->root);
995 delayed_refs->num_entries--; 1870 delayed_refs->num_entries--;
1871
996 spin_unlock(&delayed_refs->lock); 1872 spin_unlock(&delayed_refs->lock);
997 1873
998 ret = run_one_delayed_ref(trans, root, ref, 1874 ret = run_one_delayed_ref(trans, root, ref, extent_op,
999 must_insert_reserved); 1875 must_insert_reserved);
1000 BUG_ON(ret); 1876 BUG_ON(ret);
1001 btrfs_put_delayed_ref(ref);
1002 1877
1878 btrfs_put_delayed_ref(ref);
1879 kfree(extent_op);
1003 count++; 1880 count++;
1881
1004 cond_resched(); 1882 cond_resched();
1005 spin_lock(&delayed_refs->lock); 1883 spin_lock(&delayed_refs->lock);
1006 } 1884 }
@@ -1095,25 +1973,112 @@ out:
1095 return 0; 1973 return 0;
1096} 1974}
1097 1975
1098int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1976int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
1099 struct btrfs_root *root, u64 objectid, u64 bytenr) 1977 struct btrfs_root *root,
1978 u64 bytenr, u64 num_bytes, u64 flags,
1979 int is_data)
1980{
1981 struct btrfs_delayed_extent_op *extent_op;
1982 int ret;
1983
1984 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1985 if (!extent_op)
1986 return -ENOMEM;
1987
1988 extent_op->flags_to_set = flags;
1989 extent_op->update_flags = 1;
1990 extent_op->update_key = 0;
1991 extent_op->is_data = is_data ? 1 : 0;
1992
1993 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
1994 if (ret)
1995 kfree(extent_op);
1996 return ret;
1997}
1998
1999static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2000 struct btrfs_root *root,
2001 struct btrfs_path *path,
2002 u64 objectid, u64 offset, u64 bytenr)
2003{
2004 struct btrfs_delayed_ref_head *head;
2005 struct btrfs_delayed_ref_node *ref;
2006 struct btrfs_delayed_data_ref *data_ref;
2007 struct btrfs_delayed_ref_root *delayed_refs;
2008 struct rb_node *node;
2009 int ret = 0;
2010
2011 ret = -ENOENT;
2012 delayed_refs = &trans->transaction->delayed_refs;
2013 spin_lock(&delayed_refs->lock);
2014 head = btrfs_find_delayed_ref_head(trans, bytenr);
2015 if (!head)
2016 goto out;
2017
2018 if (!mutex_trylock(&head->mutex)) {
2019 atomic_inc(&head->node.refs);
2020 spin_unlock(&delayed_refs->lock);
2021
2022 btrfs_release_path(root->fs_info->extent_root, path);
2023
2024 mutex_lock(&head->mutex);
2025 mutex_unlock(&head->mutex);
2026 btrfs_put_delayed_ref(&head->node);
2027 return -EAGAIN;
2028 }
2029
2030 node = rb_prev(&head->node.rb_node);
2031 if (!node)
2032 goto out_unlock;
2033
2034 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2035
2036 if (ref->bytenr != bytenr)
2037 goto out_unlock;
2038
2039 ret = 1;
2040 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2041 goto out_unlock;
2042
2043 data_ref = btrfs_delayed_node_to_data_ref(ref);
2044
2045 node = rb_prev(node);
2046 if (node) {
2047 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2048 if (ref->bytenr == bytenr)
2049 goto out_unlock;
2050 }
2051
2052 if (data_ref->root != root->root_key.objectid ||
2053 data_ref->objectid != objectid || data_ref->offset != offset)
2054 goto out_unlock;
2055
2056 ret = 0;
2057out_unlock:
2058 mutex_unlock(&head->mutex);
2059out:
2060 spin_unlock(&delayed_refs->lock);
2061 return ret;
2062}
2063
2064static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2065 struct btrfs_root *root,
2066 struct btrfs_path *path,
2067 u64 objectid, u64 offset, u64 bytenr)
1100{ 2068{
1101 struct btrfs_root *extent_root = root->fs_info->extent_root; 2069 struct btrfs_root *extent_root = root->fs_info->extent_root;
1102 struct btrfs_path *path;
1103 struct extent_buffer *leaf; 2070 struct extent_buffer *leaf;
1104 struct btrfs_extent_ref *ref_item; 2071 struct btrfs_extent_data_ref *ref;
2072 struct btrfs_extent_inline_ref *iref;
2073 struct btrfs_extent_item *ei;
1105 struct btrfs_key key; 2074 struct btrfs_key key;
1106 struct btrfs_key found_key; 2075 u32 item_size;
1107 u64 ref_root;
1108 u64 last_snapshot;
1109 u32 nritems;
1110 int ret; 2076 int ret;
1111 2077
1112 key.objectid = bytenr; 2078 key.objectid = bytenr;
1113 key.offset = (u64)-1; 2079 key.offset = (u64)-1;
1114 key.type = BTRFS_EXTENT_ITEM_KEY; 2080 key.type = BTRFS_EXTENT_ITEM_KEY;
1115 2081
1116 path = btrfs_alloc_path();
1117 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2082 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1118 if (ret < 0) 2083 if (ret < 0)
1119 goto out; 2084 goto out;
@@ -1125,55 +2090,83 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1125 2090
1126 path->slots[0]--; 2091 path->slots[0]--;
1127 leaf = path->nodes[0]; 2092 leaf = path->nodes[0];
1128 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2093 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1129 2094
1130 if (found_key.objectid != bytenr || 2095 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
1131 found_key.type != BTRFS_EXTENT_ITEM_KEY)
1132 goto out; 2096 goto out;
1133 2097
1134 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 2098 ret = 1;
1135 while (1) { 2099 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1136 leaf = path->nodes[0]; 2100#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1137 nritems = btrfs_header_nritems(leaf); 2101 if (item_size < sizeof(*ei)) {
1138 if (path->slots[0] >= nritems) { 2102 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
1139 ret = btrfs_next_leaf(extent_root, path); 2103 goto out;
1140 if (ret < 0) 2104 }
1141 goto out; 2105#endif
1142 if (ret == 0) 2106 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1143 continue;
1144 break;
1145 }
1146 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1147 if (found_key.objectid != bytenr)
1148 break;
1149 2107
1150 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 2108 if (item_size != sizeof(*ei) +
1151 path->slots[0]++; 2109 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
1152 continue; 2110 goto out;
1153 }
1154 2111
1155 ref_item = btrfs_item_ptr(leaf, path->slots[0], 2112 if (btrfs_extent_generation(leaf, ei) <=
1156 struct btrfs_extent_ref); 2113 btrfs_root_last_snapshot(&root->root_item))
1157 ref_root = btrfs_ref_root(leaf, ref_item); 2114 goto out;
1158 if ((ref_root != root->root_key.objectid && 2115
1159 ref_root != BTRFS_TREE_LOG_OBJECTID) || 2116 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
1160 objectid != btrfs_ref_objectid(leaf, ref_item)) { 2117 if (btrfs_extent_inline_ref_type(leaf, iref) !=
1161 ret = 1; 2118 BTRFS_EXTENT_DATA_REF_KEY)
1162 goto out; 2119 goto out;
1163 } 2120
1164 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) { 2121 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
1165 ret = 1; 2122 if (btrfs_extent_refs(leaf, ei) !=
2123 btrfs_extent_data_ref_count(leaf, ref) ||
2124 btrfs_extent_data_ref_root(leaf, ref) !=
2125 root->root_key.objectid ||
2126 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2127 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2128 goto out;
2129
2130 ret = 0;
2131out:
2132 return ret;
2133}
2134
2135int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2136 struct btrfs_root *root,
2137 u64 objectid, u64 offset, u64 bytenr)
2138{
2139 struct btrfs_path *path;
2140 int ret;
2141 int ret2;
2142
2143 path = btrfs_alloc_path();
2144 if (!path)
2145 return -ENOENT;
2146
2147 do {
2148 ret = check_committed_ref(trans, root, path, objectid,
2149 offset, bytenr);
2150 if (ret && ret != -ENOENT)
1166 goto out; 2151 goto out;
1167 }
1168 2152
1169 path->slots[0]++; 2153 ret2 = check_delayed_ref(trans, root, path, objectid,
2154 offset, bytenr);
2155 } while (ret2 == -EAGAIN);
2156
2157 if (ret2 && ret2 != -ENOENT) {
2158 ret = ret2;
2159 goto out;
1170 } 2160 }
1171 ret = 0; 2161
2162 if (ret != -ENOENT || ret2 != -ENOENT)
2163 ret = 0;
1172out: 2164out:
1173 btrfs_free_path(path); 2165 btrfs_free_path(path);
1174 return ret; 2166 return ret;
1175} 2167}
1176 2168
2169#if 0
1177int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2170int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1178 struct extent_buffer *buf, u32 nr_extents) 2171 struct extent_buffer *buf, u32 nr_extents)
1179{ 2172{
@@ -1291,62 +2284,44 @@ static int refsort_cmp(const void *a_void, const void *b_void)
1291 return 1; 2284 return 1;
1292 return 0; 2285 return 0;
1293} 2286}
2287#endif
1294 2288
1295 2289static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
1296noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1297 struct btrfs_root *root, 2290 struct btrfs_root *root,
1298 struct extent_buffer *orig_buf, 2291 struct extent_buffer *buf,
1299 struct extent_buffer *buf, u32 *nr_extents) 2292 int full_backref, int inc)
1300{ 2293{
1301 u64 bytenr; 2294 u64 bytenr;
2295 u64 num_bytes;
2296 u64 parent;
1302 u64 ref_root; 2297 u64 ref_root;
1303 u64 orig_root;
1304 u64 ref_generation;
1305 u64 orig_generation;
1306 struct refsort *sorted;
1307 u32 nritems; 2298 u32 nritems;
1308 u32 nr_file_extents = 0;
1309 struct btrfs_key key; 2299 struct btrfs_key key;
1310 struct btrfs_file_extent_item *fi; 2300 struct btrfs_file_extent_item *fi;
1311 int i; 2301 int i;
1312 int level; 2302 int level;
1313 int ret = 0; 2303 int ret = 0;
1314 int faili = 0;
1315 int refi = 0;
1316 int slot;
1317 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2304 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1318 u64, u64, u64, u64, u64, u64, u64, u64, u64); 2305 u64, u64, u64, u64, u64, u64);
1319 2306
1320 ref_root = btrfs_header_owner(buf); 2307 ref_root = btrfs_header_owner(buf);
1321 ref_generation = btrfs_header_generation(buf);
1322 orig_root = btrfs_header_owner(orig_buf);
1323 orig_generation = btrfs_header_generation(orig_buf);
1324
1325 nritems = btrfs_header_nritems(buf); 2308 nritems = btrfs_header_nritems(buf);
1326 level = btrfs_header_level(buf); 2309 level = btrfs_header_level(buf);
1327 2310
1328 sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); 2311 if (!root->ref_cows && level == 0)
1329 BUG_ON(!sorted); 2312 return 0;
1330 2313
1331 if (root->ref_cows) { 2314 if (inc)
1332 process_func = __btrfs_inc_extent_ref; 2315 process_func = btrfs_inc_extent_ref;
1333 } else { 2316 else
1334 if (level == 0 && 2317 process_func = btrfs_free_extent;
1335 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 2318
1336 goto out; 2319 if (full_backref)
1337 if (level != 0 && 2320 parent = buf->start;
1338 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) 2321 else
1339 goto out; 2322 parent = 0;
1340 process_func = __btrfs_update_extent_ref;
1341 }
1342 2323
1343 /*
1344 * we make two passes through the items. In the first pass we
1345 * only record the byte number and slot. Then we sort based on
1346 * byte number and do the actual work based on the sorted results
1347 */
1348 for (i = 0; i < nritems; i++) { 2324 for (i = 0; i < nritems; i++) {
1349 cond_resched();
1350 if (level == 0) { 2325 if (level == 0) {
1351 btrfs_item_key_to_cpu(buf, &key, i); 2326 btrfs_item_key_to_cpu(buf, &key, i);
1352 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2327 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
@@ -1360,151 +2335,38 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1360 if (bytenr == 0) 2335 if (bytenr == 0)
1361 continue; 2336 continue;
1362 2337
1363 nr_file_extents++; 2338 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
1364 sorted[refi].bytenr = bytenr; 2339 key.offset -= btrfs_file_extent_offset(buf, fi);
1365 sorted[refi].slot = i; 2340 ret = process_func(trans, root, bytenr, num_bytes,
1366 refi++; 2341 parent, ref_root, key.objectid,
1367 } else { 2342 key.offset);
1368 bytenr = btrfs_node_blockptr(buf, i); 2343 if (ret)
1369 sorted[refi].bytenr = bytenr;
1370 sorted[refi].slot = i;
1371 refi++;
1372 }
1373 }
1374 /*
1375 * if refi == 0, we didn't actually put anything into the sorted
1376 * array and we're done
1377 */
1378 if (refi == 0)
1379 goto out;
1380
1381 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
1382
1383 for (i = 0; i < refi; i++) {
1384 cond_resched();
1385 slot = sorted[i].slot;
1386 bytenr = sorted[i].bytenr;
1387
1388 if (level == 0) {
1389 btrfs_item_key_to_cpu(buf, &key, slot);
1390 fi = btrfs_item_ptr(buf, slot,
1391 struct btrfs_file_extent_item);
1392
1393 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1394 if (bytenr == 0)
1395 continue;
1396
1397 ret = process_func(trans, root, bytenr,
1398 btrfs_file_extent_disk_num_bytes(buf, fi),
1399 orig_buf->start, buf->start,
1400 orig_root, ref_root,
1401 orig_generation, ref_generation,
1402 key.objectid);
1403
1404 if (ret) {
1405 faili = slot;
1406 WARN_ON(1);
1407 goto fail; 2344 goto fail;
1408 }
1409 } else { 2345 } else {
1410 ret = process_func(trans, root, bytenr, buf->len, 2346 bytenr = btrfs_node_blockptr(buf, i);
1411 orig_buf->start, buf->start, 2347 num_bytes = btrfs_level_size(root, level - 1);
1412 orig_root, ref_root, 2348 ret = process_func(trans, root, bytenr, num_bytes,
1413 orig_generation, ref_generation, 2349 parent, ref_root, level - 1, 0);
1414 level - 1); 2350 if (ret)
1415 if (ret) {
1416 faili = slot;
1417 WARN_ON(1);
1418 goto fail; 2351 goto fail;
1419 }
1420 } 2352 }
1421 } 2353 }
1422out:
1423 kfree(sorted);
1424 if (nr_extents) {
1425 if (level == 0)
1426 *nr_extents = nr_file_extents;
1427 else
1428 *nr_extents = nritems;
1429 }
1430 return 0; 2354 return 0;
1431fail: 2355fail:
1432 kfree(sorted); 2356 BUG();
1433 WARN_ON(1);
1434 return ret; 2357 return ret;
1435} 2358}
1436 2359
1437int btrfs_update_ref(struct btrfs_trans_handle *trans, 2360int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1438 struct btrfs_root *root, struct extent_buffer *orig_buf, 2361 struct extent_buffer *buf, int full_backref)
1439 struct extent_buffer *buf, int start_slot, int nr)
1440
1441{ 2362{
1442 u64 bytenr; 2363 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
1443 u64 ref_root; 2364}
1444 u64 orig_root;
1445 u64 ref_generation;
1446 u64 orig_generation;
1447 struct btrfs_key key;
1448 struct btrfs_file_extent_item *fi;
1449 int i;
1450 int ret;
1451 int slot;
1452 int level;
1453
1454 BUG_ON(start_slot < 0);
1455 BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1456
1457 ref_root = btrfs_header_owner(buf);
1458 ref_generation = btrfs_header_generation(buf);
1459 orig_root = btrfs_header_owner(orig_buf);
1460 orig_generation = btrfs_header_generation(orig_buf);
1461 level = btrfs_header_level(buf);
1462
1463 if (!root->ref_cows) {
1464 if (level == 0 &&
1465 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1466 return 0;
1467 if (level != 0 &&
1468 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1469 return 0;
1470 }
1471 2365
1472 for (i = 0, slot = start_slot; i < nr; i++, slot++) { 2366int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1473 cond_resched(); 2367 struct extent_buffer *buf, int full_backref)
1474 if (level == 0) { 2368{
1475 btrfs_item_key_to_cpu(buf, &key, slot); 2369 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
1476 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1477 continue;
1478 fi = btrfs_item_ptr(buf, slot,
1479 struct btrfs_file_extent_item);
1480 if (btrfs_file_extent_type(buf, fi) ==
1481 BTRFS_FILE_EXTENT_INLINE)
1482 continue;
1483 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1484 if (bytenr == 0)
1485 continue;
1486 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1487 btrfs_file_extent_disk_num_bytes(buf, fi),
1488 orig_buf->start, buf->start,
1489 orig_root, ref_root, orig_generation,
1490 ref_generation, key.objectid);
1491 if (ret)
1492 goto fail;
1493 } else {
1494 bytenr = btrfs_node_blockptr(buf, slot);
1495 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1496 buf->len, orig_buf->start,
1497 buf->start, orig_root, ref_root,
1498 orig_generation, ref_generation,
1499 level - 1);
1500 if (ret)
1501 goto fail;
1502 }
1503 }
1504 return 0;
1505fail:
1506 WARN_ON(1);
1507 return -1;
1508} 2370}
1509 2371
1510static int write_one_cache_group(struct btrfs_trans_handle *trans, 2372static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -2007,6 +2869,24 @@ static int update_block_group(struct btrfs_trans_handle *trans,
2007 u64 old_val; 2869 u64 old_val;
2008 u64 byte_in_group; 2870 u64 byte_in_group;
2009 2871
2872 /* block accounting for super block */
2873 spin_lock(&info->delalloc_lock);
2874 old_val = btrfs_super_bytes_used(&info->super_copy);
2875 if (alloc)
2876 old_val += num_bytes;
2877 else
2878 old_val -= num_bytes;
2879 btrfs_set_super_bytes_used(&info->super_copy, old_val);
2880
2881 /* block accounting for root item */
2882 old_val = btrfs_root_used(&root->root_item);
2883 if (alloc)
2884 old_val += num_bytes;
2885 else
2886 old_val -= num_bytes;
2887 btrfs_set_root_used(&root->root_item, old_val);
2888 spin_unlock(&info->delalloc_lock);
2889
2010 while (total) { 2890 while (total) {
2011 cache = btrfs_lookup_block_group(info, bytenr); 2891 cache = btrfs_lookup_block_group(info, bytenr);
2012 if (!cache) 2892 if (!cache)
@@ -2216,8 +3096,6 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
2216 u64 header_owner = btrfs_header_owner(buf); 3096 u64 header_owner = btrfs_header_owner(buf);
2217 u64 header_transid = btrfs_header_generation(buf); 3097 u64 header_transid = btrfs_header_generation(buf);
2218 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3098 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2219 header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2220 header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
2221 header_transid == trans->transid && 3099 header_transid == trans->transid &&
2222 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3100 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2223 *must_clean = buf; 3101 *must_clean = buf;
@@ -2235,63 +3113,77 @@ pinit:
2235 return 0; 3113 return 0;
2236} 3114}
2237 3115
2238/* 3116
2239 * remove an extent from the root, returns 0 on success 3117static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2240 */ 3118 struct btrfs_root *root,
2241static int __free_extent(struct btrfs_trans_handle *trans, 3119 u64 bytenr, u64 num_bytes, u64 parent,
2242 struct btrfs_root *root, 3120 u64 root_objectid, u64 owner_objectid,
2243 u64 bytenr, u64 num_bytes, u64 parent, 3121 u64 owner_offset, int refs_to_drop,
2244 u64 root_objectid, u64 ref_generation, 3122 struct btrfs_delayed_extent_op *extent_op)
2245 u64 owner_objectid, int pin, int mark_free,
2246 int refs_to_drop)
2247{ 3123{
2248 struct btrfs_path *path;
2249 struct btrfs_key key; 3124 struct btrfs_key key;
3125 struct btrfs_path *path;
2250 struct btrfs_fs_info *info = root->fs_info; 3126 struct btrfs_fs_info *info = root->fs_info;
2251 struct btrfs_root *extent_root = info->extent_root; 3127 struct btrfs_root *extent_root = info->extent_root;
2252 struct extent_buffer *leaf; 3128 struct extent_buffer *leaf;
3129 struct btrfs_extent_item *ei;
3130 struct btrfs_extent_inline_ref *iref;
2253 int ret; 3131 int ret;
3132 int is_data;
2254 int extent_slot = 0; 3133 int extent_slot = 0;
2255 int found_extent = 0; 3134 int found_extent = 0;
2256 int num_to_del = 1; 3135 int num_to_del = 1;
2257 struct btrfs_extent_item *ei; 3136 u32 item_size;
2258 u32 refs; 3137 u64 refs;
2259 3138
2260 key.objectid = bytenr;
2261 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
2262 key.offset = num_bytes;
2263 path = btrfs_alloc_path(); 3139 path = btrfs_alloc_path();
2264 if (!path) 3140 if (!path)
2265 return -ENOMEM; 3141 return -ENOMEM;
2266 3142
2267 path->reada = 1; 3143 path->reada = 1;
2268 path->leave_spinning = 1; 3144 path->leave_spinning = 1;
2269 ret = lookup_extent_backref(trans, extent_root, path, 3145
2270 bytenr, parent, root_objectid, 3146 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2271 ref_generation, owner_objectid, 1); 3147 BUG_ON(!is_data && refs_to_drop != 1);
3148
3149 ret = lookup_extent_backref(trans, extent_root, path, &iref,
3150 bytenr, num_bytes, parent,
3151 root_objectid, owner_objectid,
3152 owner_offset);
2272 if (ret == 0) { 3153 if (ret == 0) {
2273 struct btrfs_key found_key;
2274 extent_slot = path->slots[0]; 3154 extent_slot = path->slots[0];
2275 while (extent_slot > 0) { 3155 while (extent_slot >= 0) {
2276 extent_slot--; 3156 btrfs_item_key_to_cpu(path->nodes[0], &key,
2277 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2278 extent_slot); 3157 extent_slot);
2279 if (found_key.objectid != bytenr) 3158 if (key.objectid != bytenr)
2280 break; 3159 break;
2281 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 3160 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2282 found_key.offset == num_bytes) { 3161 key.offset == num_bytes) {
2283 found_extent = 1; 3162 found_extent = 1;
2284 break; 3163 break;
2285 } 3164 }
2286 if (path->slots[0] - extent_slot > 5) 3165 if (path->slots[0] - extent_slot > 5)
2287 break; 3166 break;
3167 extent_slot--;
2288 } 3168 }
3169#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3170 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
3171 if (found_extent && item_size < sizeof(*ei))
3172 found_extent = 0;
3173#endif
2289 if (!found_extent) { 3174 if (!found_extent) {
3175 BUG_ON(iref);
2290 ret = remove_extent_backref(trans, extent_root, path, 3176 ret = remove_extent_backref(trans, extent_root, path,
2291 refs_to_drop); 3177 NULL, refs_to_drop,
3178 is_data);
2292 BUG_ON(ret); 3179 BUG_ON(ret);
2293 btrfs_release_path(extent_root, path); 3180 btrfs_release_path(extent_root, path);
2294 path->leave_spinning = 1; 3181 path->leave_spinning = 1;
3182
3183 key.objectid = bytenr;
3184 key.type = BTRFS_EXTENT_ITEM_KEY;
3185 key.offset = num_bytes;
3186
2295 ret = btrfs_search_slot(trans, extent_root, 3187 ret = btrfs_search_slot(trans, extent_root,
2296 &key, path, -1, 1); 3188 &key, path, -1, 1);
2297 if (ret) { 3189 if (ret) {
@@ -2307,82 +3199,98 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2307 btrfs_print_leaf(extent_root, path->nodes[0]); 3199 btrfs_print_leaf(extent_root, path->nodes[0]);
2308 WARN_ON(1); 3200 WARN_ON(1);
2309 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3201 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2310 "parent %llu root %llu gen %llu owner %llu\n", 3202 "parent %llu root %llu owner %llu offset %llu\n",
2311 (unsigned long long)bytenr, 3203 (unsigned long long)bytenr,
2312 (unsigned long long)parent, 3204 (unsigned long long)parent,
2313 (unsigned long long)root_objectid, 3205 (unsigned long long)root_objectid,
2314 (unsigned long long)ref_generation, 3206 (unsigned long long)owner_objectid,
2315 (unsigned long long)owner_objectid); 3207 (unsigned long long)owner_offset);
2316 } 3208 }
2317 3209
2318 leaf = path->nodes[0]; 3210 leaf = path->nodes[0];
3211 item_size = btrfs_item_size_nr(leaf, extent_slot);
3212#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3213 if (item_size < sizeof(*ei)) {
3214 BUG_ON(found_extent || extent_slot != path->slots[0]);
3215 ret = convert_extent_item_v0(trans, extent_root, path,
3216 owner_objectid, 0);
3217 BUG_ON(ret < 0);
3218
3219 btrfs_release_path(extent_root, path);
3220 path->leave_spinning = 1;
3221
3222 key.objectid = bytenr;
3223 key.type = BTRFS_EXTENT_ITEM_KEY;
3224 key.offset = num_bytes;
3225
3226 ret = btrfs_search_slot(trans, extent_root, &key, path,
3227 -1, 1);
3228 if (ret) {
3229 printk(KERN_ERR "umm, got %d back from search"
3230 ", was looking for %llu\n", ret,
3231 (unsigned long long)bytenr);
3232 btrfs_print_leaf(extent_root, path->nodes[0]);
3233 }
3234 BUG_ON(ret);
3235 extent_slot = path->slots[0];
3236 leaf = path->nodes[0];
3237 item_size = btrfs_item_size_nr(leaf, extent_slot);
3238 }
3239#endif
3240 BUG_ON(item_size < sizeof(*ei));
2319 ei = btrfs_item_ptr(leaf, extent_slot, 3241 ei = btrfs_item_ptr(leaf, extent_slot,
2320 struct btrfs_extent_item); 3242 struct btrfs_extent_item);
2321 refs = btrfs_extent_refs(leaf, ei); 3243 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2322 3244 struct btrfs_tree_block_info *bi;
2323 /* 3245 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
2324 * we're not allowed to delete the extent item if there 3246 bi = (struct btrfs_tree_block_info *)(ei + 1);
2325 * are other delayed ref updates pending 3247 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
2326 */ 3248 }
2327 3249
3250 refs = btrfs_extent_refs(leaf, ei);
2328 BUG_ON(refs < refs_to_drop); 3251 BUG_ON(refs < refs_to_drop);
2329 refs -= refs_to_drop; 3252 refs -= refs_to_drop;
2330 btrfs_set_extent_refs(leaf, ei, refs);
2331 btrfs_mark_buffer_dirty(leaf);
2332 3253
2333 if (refs == 0 && found_extent && 3254 if (refs > 0) {
2334 path->slots[0] == extent_slot + 1) { 3255 if (extent_op)
2335 struct btrfs_extent_ref *ref; 3256 __run_delayed_extent_op(extent_op, leaf, ei);
2336 ref = btrfs_item_ptr(leaf, path->slots[0], 3257 /*
2337 struct btrfs_extent_ref); 3258 * In the case of inline back ref, reference count will
2338 BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop); 3259 * be updated by remove_extent_backref
2339 /* if the back ref and the extent are next to each other
2340 * they get deleted below in one shot
2341 */ 3260 */
2342 path->slots[0] = extent_slot; 3261 if (iref) {
2343 num_to_del = 2; 3262 BUG_ON(!found_extent);
2344 } else if (found_extent) { 3263 } else {
2345 /* otherwise delete the extent back ref */ 3264 btrfs_set_extent_refs(leaf, ei, refs);
2346 ret = remove_extent_backref(trans, extent_root, path, 3265 btrfs_mark_buffer_dirty(leaf);
2347 refs_to_drop); 3266 }
2348 BUG_ON(ret); 3267 if (found_extent) {
2349 /* if refs are 0, we need to setup the path for deletion */ 3268 ret = remove_extent_backref(trans, extent_root, path,
2350 if (refs == 0) { 3269 iref, refs_to_drop,
2351 btrfs_release_path(extent_root, path); 3270 is_data);
2352 path->leave_spinning = 1;
2353 ret = btrfs_search_slot(trans, extent_root, &key, path,
2354 -1, 1);
2355 BUG_ON(ret); 3271 BUG_ON(ret);
2356 } 3272 }
2357 } 3273 } else {
2358 3274 int mark_free = 0;
2359 if (refs == 0) {
2360 u64 super_used;
2361 u64 root_used;
2362 struct extent_buffer *must_clean = NULL; 3275 struct extent_buffer *must_clean = NULL;
2363 3276
2364 if (pin) { 3277 if (found_extent) {
2365 ret = pin_down_bytes(trans, root, path, 3278 BUG_ON(is_data && refs_to_drop !=
2366 bytenr, num_bytes, 3279 extent_data_ref_count(root, path, iref));
2367 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, 3280 if (iref) {
2368 &must_clean); 3281 BUG_ON(path->slots[0] != extent_slot);
2369 if (ret > 0) 3282 } else {
2370 mark_free = 1; 3283 BUG_ON(path->slots[0] != extent_slot + 1);
2371 BUG_ON(ret < 0); 3284 path->slots[0] = extent_slot;
3285 num_to_del = 2;
3286 }
2372 } 3287 }
2373 3288
2374 /* block accounting for super block */ 3289 ret = pin_down_bytes(trans, root, path, bytenr,
2375 spin_lock(&info->delalloc_lock); 3290 num_bytes, is_data, &must_clean);
2376 super_used = btrfs_super_bytes_used(&info->super_copy); 3291 if (ret > 0)
2377 btrfs_set_super_bytes_used(&info->super_copy, 3292 mark_free = 1;
2378 super_used - num_bytes); 3293 BUG_ON(ret < 0);
2379
2380 /* block accounting for root item */
2381 root_used = btrfs_root_used(&root->root_item);
2382 btrfs_set_root_used(&root->root_item,
2383 root_used - num_bytes);
2384 spin_unlock(&info->delalloc_lock);
2385
2386 /* 3294 /*
2387 * it is going to be very rare for someone to be waiting 3295 * it is going to be very rare for someone to be waiting
2388 * on the block we're freeing. del_items might need to 3296 * on the block we're freeing. del_items might need to
@@ -2403,7 +3311,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2403 free_extent_buffer(must_clean); 3311 free_extent_buffer(must_clean);
2404 } 3312 }
2405 3313
2406 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 3314 if (is_data) {
2407 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3315 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2408 BUG_ON(ret); 3316 BUG_ON(ret);
2409 } else { 3317 } else {
@@ -2421,34 +3329,6 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2421} 3329}
2422 3330
2423/* 3331/*
2424 * remove an extent from the root, returns 0 on success
2425 */
2426static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2427 struct btrfs_root *root,
2428 u64 bytenr, u64 num_bytes, u64 parent,
2429 u64 root_objectid, u64 ref_generation,
2430 u64 owner_objectid, int pin,
2431 int refs_to_drop)
2432{
2433 WARN_ON(num_bytes < root->sectorsize);
2434
2435 /*
2436 * if metadata always pin
2437 * if data pin when any transaction has committed this
2438 */
2439 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
2440 ref_generation != trans->transid)
2441 pin = 1;
2442
2443 if (ref_generation != trans->transid)
2444 pin = 1;
2445
2446 return __free_extent(trans, root, bytenr, num_bytes, parent,
2447 root_objectid, ref_generation,
2448 owner_objectid, pin, pin == 0, refs_to_drop);
2449}
2450
2451/*
2452 * when we free an extent, it is possible (and likely) that we free the last 3332 * when we free an extent, it is possible (and likely) that we free the last
2453 * delayed ref for that extent as well. This searches the delayed ref tree for 3333 * delayed ref for that extent as well. This searches the delayed ref tree for
2454 * a given extent, and if there are no other delayed refs to be processed, it 3334 * a given extent, and if there are no other delayed refs to be processed, it
@@ -2479,6 +3359,13 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2479 if (ref->bytenr == bytenr) 3359 if (ref->bytenr == bytenr)
2480 goto out; 3360 goto out;
2481 3361
3362 if (head->extent_op) {
3363 if (!head->must_insert_reserved)
3364 goto out;
3365 kfree(head->extent_op);
3366 head->extent_op = NULL;
3367 }
3368
2482 /* 3369 /*
2483 * waiting for the lock here would deadlock. If someone else has it 3370 * waiting for the lock here would deadlock. If someone else has it
2484 * locked they are already in the process of dropping it anyway 3371 * locked they are already in the process of dropping it anyway
@@ -2507,7 +3394,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2507 spin_unlock(&delayed_refs->lock); 3394 spin_unlock(&delayed_refs->lock);
2508 3395
2509 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 3396 ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
2510 &head->node, head->must_insert_reserved); 3397 &head->node, head->extent_op,
3398 head->must_insert_reserved);
2511 BUG_ON(ret); 3399 BUG_ON(ret);
2512 btrfs_put_delayed_ref(&head->node); 3400 btrfs_put_delayed_ref(&head->node);
2513 return 0; 3401 return 0;
@@ -2519,32 +3407,32 @@ out:
2519int btrfs_free_extent(struct btrfs_trans_handle *trans, 3407int btrfs_free_extent(struct btrfs_trans_handle *trans,
2520 struct btrfs_root *root, 3408 struct btrfs_root *root,
2521 u64 bytenr, u64 num_bytes, u64 parent, 3409 u64 bytenr, u64 num_bytes, u64 parent,
2522 u64 root_objectid, u64 ref_generation, 3410 u64 root_objectid, u64 owner, u64 offset)
2523 u64 owner_objectid, int pin)
2524{ 3411{
2525 int ret; 3412 int ret;
2526 3413
2527 /* 3414 /*
2528 * tree log blocks never actually go into the extent allocation 3415 * tree log blocks never actually go into the extent allocation
2529 * tree, just update pinning info and exit early. 3416 * tree, just update pinning info and exit early.
2530 *
2531 * data extents referenced by the tree log do need to have
2532 * their reference counts bumped.
2533 */ 3417 */
2534 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && 3418 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
2535 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3419 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
2536 /* unlocks the pinned mutex */ 3420 /* unlocks the pinned mutex */
2537 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3421 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2538 update_reserved_extents(root, bytenr, num_bytes, 0); 3422 update_reserved_extents(root, bytenr, num_bytes, 0);
2539 ret = 0; 3423 ret = 0;
2540 } else { 3424 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2541 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, 3425 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
2542 root_objectid, ref_generation, 3426 parent, root_objectid, (int)owner,
2543 owner_objectid, 3427 BTRFS_DROP_DELAYED_REF, NULL);
2544 BTRFS_DROP_DELAYED_REF, 1);
2545 BUG_ON(ret); 3428 BUG_ON(ret);
2546 ret = check_ref_cleanup(trans, root, bytenr); 3429 ret = check_ref_cleanup(trans, root, bytenr);
2547 BUG_ON(ret); 3430 BUG_ON(ret);
3431 } else {
3432 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
3433 parent, root_objectid, owner,
3434 offset, BTRFS_DROP_DELAYED_REF, NULL);
3435 BUG_ON(ret);
2548 } 3436 }
2549 return ret; 3437 return ret;
2550} 3438}
@@ -2969,99 +3857,147 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2969 return ret; 3857 return ret;
2970} 3858}
2971 3859
2972static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3860static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
2973 struct btrfs_root *root, u64 parent, 3861 struct btrfs_root *root,
2974 u64 root_objectid, u64 ref_generation, 3862 u64 parent, u64 root_objectid,
2975 u64 owner, struct btrfs_key *ins, 3863 u64 flags, u64 owner, u64 offset,
2976 int ref_mod) 3864 struct btrfs_key *ins, int ref_mod)
2977{ 3865{
2978 int ret; 3866 int ret;
2979 u64 super_used; 3867 struct btrfs_fs_info *fs_info = root->fs_info;
2980 u64 root_used;
2981 u64 num_bytes = ins->offset;
2982 u32 sizes[2];
2983 struct btrfs_fs_info *info = root->fs_info;
2984 struct btrfs_root *extent_root = info->extent_root;
2985 struct btrfs_extent_item *extent_item; 3868 struct btrfs_extent_item *extent_item;
2986 struct btrfs_extent_ref *ref; 3869 struct btrfs_extent_inline_ref *iref;
2987 struct btrfs_path *path; 3870 struct btrfs_path *path;
2988 struct btrfs_key keys[2]; 3871 struct extent_buffer *leaf;
2989 3872 int type;
2990 if (parent == 0) 3873 u32 size;
2991 parent = ins->objectid;
2992
2993 /* block accounting for super block */
2994 spin_lock(&info->delalloc_lock);
2995 super_used = btrfs_super_bytes_used(&info->super_copy);
2996 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2997 3874
2998 /* block accounting for root item */ 3875 if (parent > 0)
2999 root_used = btrfs_root_used(&root->root_item); 3876 type = BTRFS_SHARED_DATA_REF_KEY;
3000 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 3877 else
3001 spin_unlock(&info->delalloc_lock); 3878 type = BTRFS_EXTENT_DATA_REF_KEY;
3002 3879
3003 memcpy(&keys[0], ins, sizeof(*ins)); 3880 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
3004 keys[1].objectid = ins->objectid;
3005 keys[1].type = BTRFS_EXTENT_REF_KEY;
3006 keys[1].offset = parent;
3007 sizes[0] = sizeof(*extent_item);
3008 sizes[1] = sizeof(*ref);
3009 3881
3010 path = btrfs_alloc_path(); 3882 path = btrfs_alloc_path();
3011 BUG_ON(!path); 3883 BUG_ON(!path);
3012 3884
3013 path->leave_spinning = 1; 3885 path->leave_spinning = 1;
3014 ret = btrfs_insert_empty_items(trans, extent_root, path, keys, 3886 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
3015 sizes, 2); 3887 ins, size);
3016 BUG_ON(ret); 3888 BUG_ON(ret);
3017 3889
3018 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3890 leaf = path->nodes[0];
3891 extent_item = btrfs_item_ptr(leaf, path->slots[0],
3019 struct btrfs_extent_item); 3892 struct btrfs_extent_item);
3020 btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod); 3893 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
3021 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 3894 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
3022 struct btrfs_extent_ref); 3895 btrfs_set_extent_flags(leaf, extent_item,
3023 3896 flags | BTRFS_EXTENT_FLAG_DATA);
3024 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 3897
3025 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 3898 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
3026 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 3899 btrfs_set_extent_inline_ref_type(leaf, iref, type);
3027 btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod); 3900 if (parent > 0) {
3901 struct btrfs_shared_data_ref *ref;
3902 ref = (struct btrfs_shared_data_ref *)(iref + 1);
3903 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
3904 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
3905 } else {
3906 struct btrfs_extent_data_ref *ref;
3907 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3908 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
3909 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
3910 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
3911 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
3912 }
3028 3913
3029 btrfs_mark_buffer_dirty(path->nodes[0]); 3914 btrfs_mark_buffer_dirty(path->nodes[0]);
3030
3031 trans->alloc_exclude_start = 0;
3032 trans->alloc_exclude_nr = 0;
3033 btrfs_free_path(path); 3915 btrfs_free_path(path);
3034 3916
3035 if (ret) 3917 ret = update_block_group(trans, root, ins->objectid, ins->offset,
3036 goto out; 3918 1, 0);
3037
3038 ret = update_block_group(trans, root, ins->objectid,
3039 ins->offset, 1, 0);
3040 if (ret) { 3919 if (ret) {
3041 printk(KERN_ERR "btrfs update block group failed for %llu " 3920 printk(KERN_ERR "btrfs update block group failed for %llu "
3042 "%llu\n", (unsigned long long)ins->objectid, 3921 "%llu\n", (unsigned long long)ins->objectid,
3043 (unsigned long long)ins->offset); 3922 (unsigned long long)ins->offset);
3044 BUG(); 3923 BUG();
3045 } 3924 }
3046out:
3047 return ret; 3925 return ret;
3048} 3926}
3049 3927
3050int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3928static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
3051 struct btrfs_root *root, u64 parent, 3929 struct btrfs_root *root,
3052 u64 root_objectid, u64 ref_generation, 3930 u64 parent, u64 root_objectid,
3053 u64 owner, struct btrfs_key *ins) 3931 u64 flags, struct btrfs_disk_key *key,
3932 int level, struct btrfs_key *ins)
3054{ 3933{
3055 int ret; 3934 int ret;
3935 struct btrfs_fs_info *fs_info = root->fs_info;
3936 struct btrfs_extent_item *extent_item;
3937 struct btrfs_tree_block_info *block_info;
3938 struct btrfs_extent_inline_ref *iref;
3939 struct btrfs_path *path;
3940 struct extent_buffer *leaf;
3941 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
3056 3942
3057 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) 3943 path = btrfs_alloc_path();
3058 return 0; 3944 BUG_ON(!path);
3059 3945
3060 ret = btrfs_add_delayed_ref(trans, ins->objectid, 3946 path->leave_spinning = 1;
3061 ins->offset, parent, root_objectid, 3947 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
3062 ref_generation, owner, 3948 ins, size);
3063 BTRFS_ADD_DELAYED_EXTENT, 0);
3064 BUG_ON(ret); 3949 BUG_ON(ret);
3950
3951 leaf = path->nodes[0];
3952 extent_item = btrfs_item_ptr(leaf, path->slots[0],
3953 struct btrfs_extent_item);
3954 btrfs_set_extent_refs(leaf, extent_item, 1);
3955 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
3956 btrfs_set_extent_flags(leaf, extent_item,
3957 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
3958 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
3959
3960 btrfs_set_tree_block_key(leaf, block_info, key);
3961 btrfs_set_tree_block_level(leaf, block_info, level);
3962
3963 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
3964 if (parent > 0) {
3965 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
3966 btrfs_set_extent_inline_ref_type(leaf, iref,
3967 BTRFS_SHARED_BLOCK_REF_KEY);
3968 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
3969 } else {
3970 btrfs_set_extent_inline_ref_type(leaf, iref,
3971 BTRFS_TREE_BLOCK_REF_KEY);
3972 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
3973 }
3974
3975 btrfs_mark_buffer_dirty(leaf);
3976 btrfs_free_path(path);
3977
3978 ret = update_block_group(trans, root, ins->objectid, ins->offset,
3979 1, 0);
3980 if (ret) {
3981 printk(KERN_ERR "btrfs update block group failed for %llu "
3982 "%llu\n", (unsigned long long)ins->objectid,
3983 (unsigned long long)ins->offset);
3984 BUG();
3985 }
3986 return ret;
3987}
3988
3989int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
3990 struct btrfs_root *root,
3991 u64 root_objectid, u64 owner,
3992 u64 offset, struct btrfs_key *ins)
3993{
3994 int ret;
3995
3996 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
3997
3998 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
3999 0, root_objectid, owner, offset,
4000 BTRFS_ADD_DELAYED_EXTENT, NULL);
3065 return ret; 4001 return ret;
3066} 4002}
3067 4003
@@ -3070,10 +4006,10 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3070 * an extent has been allocated and makes sure to clear the free 4006 * an extent has been allocated and makes sure to clear the free
3071 * space cache bits as well 4007 * space cache bits as well
3072 */ 4008 */
3073int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, 4009int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
3074 struct btrfs_root *root, u64 parent, 4010 struct btrfs_root *root,
3075 u64 root_objectid, u64 ref_generation, 4011 u64 root_objectid, u64 owner, u64 offset,
3076 u64 owner, struct btrfs_key *ins) 4012 struct btrfs_key *ins)
3077{ 4013{
3078 int ret; 4014 int ret;
3079 struct btrfs_block_group_cache *block_group; 4015 struct btrfs_block_group_cache *block_group;
@@ -3087,8 +4023,8 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3087 ins->offset); 4023 ins->offset);
3088 BUG_ON(ret); 4024 BUG_ON(ret);
3089 btrfs_put_block_group(block_group); 4025 btrfs_put_block_group(block_group);
3090 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 4026 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
3091 ref_generation, owner, ins, 1); 4027 0, owner, offset, ins, 1);
3092 return ret; 4028 return ret;
3093} 4029}
3094 4030
@@ -3099,26 +4035,48 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3099 * 4035 *
3100 * returns 0 if everything worked, non-zero otherwise. 4036 * returns 0 if everything worked, non-zero otherwise.
3101 */ 4037 */
3102int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 4038static int alloc_tree_block(struct btrfs_trans_handle *trans,
3103 struct btrfs_root *root, 4039 struct btrfs_root *root,
3104 u64 num_bytes, u64 parent, u64 min_alloc_size, 4040 u64 num_bytes, u64 parent, u64 root_objectid,
3105 u64 root_objectid, u64 ref_generation, 4041 struct btrfs_disk_key *key, int level,
3106 u64 owner_objectid, u64 empty_size, u64 hint_byte, 4042 u64 empty_size, u64 hint_byte, u64 search_end,
3107 u64 search_end, struct btrfs_key *ins, u64 data) 4043 struct btrfs_key *ins)
3108{ 4044{
3109 int ret; 4045 int ret;
3110 ret = __btrfs_reserve_extent(trans, root, num_bytes, 4046 u64 flags = 0;
3111 min_alloc_size, empty_size, hint_byte, 4047
3112 search_end, ins, data); 4048 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4049 empty_size, hint_byte, search_end,
4050 ins, 0);
3113 BUG_ON(ret); 4051 BUG_ON(ret);
4052
4053 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4054 if (parent == 0)
4055 parent = ins->objectid;
4056 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4057 } else
4058 BUG_ON(parent > 0);
4059
4060 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3114 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4061 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
3115 ret = btrfs_add_delayed_ref(trans, ins->objectid, 4062 struct btrfs_delayed_extent_op *extent_op;
3116 ins->offset, parent, root_objectid, 4063 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3117 ref_generation, owner_objectid, 4064 BUG_ON(!extent_op);
3118 BTRFS_ADD_DELAYED_EXTENT, 0); 4065 if (key)
4066 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4067 else
4068 memset(&extent_op->key, 0, sizeof(extent_op->key));
4069 extent_op->flags_to_set = flags;
4070 extent_op->update_key = 1;
4071 extent_op->update_flags = 1;
4072 extent_op->is_data = 0;
4073
4074 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
4075 ins->offset, parent, root_objectid,
4076 level, BTRFS_ADD_DELAYED_EXTENT,
4077 extent_op);
3119 BUG_ON(ret); 4078 BUG_ON(ret);
3120 } 4079 }
3121 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3122 return ret; 4080 return ret;
3123} 4081}
3124 4082
@@ -3157,21 +4115,17 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3157 * returns the tree buffer or NULL. 4115 * returns the tree buffer or NULL.
3158 */ 4116 */
3159struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4117struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3160 struct btrfs_root *root, 4118 struct btrfs_root *root, u32 blocksize,
3161 u32 blocksize, u64 parent, 4119 u64 parent, u64 root_objectid,
3162 u64 root_objectid, 4120 struct btrfs_disk_key *key, int level,
3163 u64 ref_generation, 4121 u64 hint, u64 empty_size)
3164 int level,
3165 u64 hint,
3166 u64 empty_size)
3167{ 4122{
3168 struct btrfs_key ins; 4123 struct btrfs_key ins;
3169 int ret; 4124 int ret;
3170 struct extent_buffer *buf; 4125 struct extent_buffer *buf;
3171 4126
3172 ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, 4127 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
3173 root_objectid, ref_generation, level, 4128 key, level, empty_size, hint, (u64)-1, &ins);
3174 empty_size, hint, (u64)-1, &ins, 0);
3175 if (ret) { 4129 if (ret) {
3176 BUG_ON(ret > 0); 4130 BUG_ON(ret > 0);
3177 return ERR_PTR(ret); 4131 return ERR_PTR(ret);
@@ -3185,32 +4139,19 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3185int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4139int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3186 struct btrfs_root *root, struct extent_buffer *leaf) 4140 struct btrfs_root *root, struct extent_buffer *leaf)
3187{ 4141{
3188 u64 leaf_owner; 4142 u64 disk_bytenr;
3189 u64 leaf_generation; 4143 u64 num_bytes;
3190 struct refsort *sorted;
3191 struct btrfs_key key; 4144 struct btrfs_key key;
3192 struct btrfs_file_extent_item *fi; 4145 struct btrfs_file_extent_item *fi;
4146 u32 nritems;
3193 int i; 4147 int i;
3194 int nritems;
3195 int ret; 4148 int ret;
3196 int refi = 0;
3197 int slot;
3198 4149
3199 BUG_ON(!btrfs_is_leaf(leaf)); 4150 BUG_ON(!btrfs_is_leaf(leaf));
3200 nritems = btrfs_header_nritems(leaf); 4151 nritems = btrfs_header_nritems(leaf);
3201 leaf_owner = btrfs_header_owner(leaf);
3202 leaf_generation = btrfs_header_generation(leaf);
3203 4152
3204 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
3205 /* we do this loop twice. The first time we build a list
3206 * of the extents we have a reference on, then we sort the list
3207 * by bytenr. The second time around we actually do the
3208 * extent freeing.
3209 */
3210 for (i = 0; i < nritems; i++) { 4153 for (i = 0; i < nritems; i++) {
3211 u64 disk_bytenr;
3212 cond_resched(); 4154 cond_resched();
3213
3214 btrfs_item_key_to_cpu(leaf, &key, i); 4155 btrfs_item_key_to_cpu(leaf, &key, i);
3215 4156
3216 /* only extents have references, skip everything else */ 4157 /* only extents have references, skip everything else */
@@ -3230,45 +4171,16 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3230 if (disk_bytenr == 0) 4171 if (disk_bytenr == 0)
3231 continue; 4172 continue;
3232 4173
3233 sorted[refi].bytenr = disk_bytenr; 4174 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
3234 sorted[refi].slot = i; 4175 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
3235 refi++; 4176 leaf->start, 0, key.objectid, 0);
3236 }
3237
3238 if (refi == 0)
3239 goto out;
3240
3241 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
3242
3243 for (i = 0; i < refi; i++) {
3244 u64 disk_bytenr;
3245
3246 disk_bytenr = sorted[i].bytenr;
3247 slot = sorted[i].slot;
3248
3249 cond_resched();
3250
3251 btrfs_item_key_to_cpu(leaf, &key, slot);
3252 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3253 continue;
3254
3255 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
3256
3257 ret = btrfs_free_extent(trans, root, disk_bytenr,
3258 btrfs_file_extent_disk_num_bytes(leaf, fi),
3259 leaf->start, leaf_owner, leaf_generation,
3260 key.objectid, 0);
3261 BUG_ON(ret); 4177 BUG_ON(ret);
3262
3263 atomic_inc(&root->fs_info->throttle_gen);
3264 wake_up(&root->fs_info->transaction_throttle);
3265 cond_resched();
3266 } 4178 }
3267out:
3268 kfree(sorted);
3269 return 0; 4179 return 0;
3270} 4180}
3271 4181
4182#if 0
4183
3272static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4184static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3273 struct btrfs_root *root, 4185 struct btrfs_root *root,
3274 struct btrfs_leaf_ref *ref) 4186 struct btrfs_leaf_ref *ref)
@@ -3311,13 +4223,14 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3311 return 0; 4223 return 0;
3312} 4224}
3313 4225
4226
3314static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, 4227static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3315 struct btrfs_root *root, u64 start, 4228 struct btrfs_root *root, u64 start,
3316 u64 len, u32 *refs) 4229 u64 len, u32 *refs)
3317{ 4230{
3318 int ret; 4231 int ret;
3319 4232
3320 ret = btrfs_lookup_extent_ref(trans, root, start, len, refs); 4233 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
3321 BUG_ON(ret); 4234 BUG_ON(ret);
3322 4235
3323#if 0 /* some debugging code in case we see problems here */ 4236#if 0 /* some debugging code in case we see problems here */
@@ -3352,6 +4265,7 @@ static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3352 return ret; 4265 return ret;
3353} 4266}
3354 4267
4268
3355/* 4269/*
3356 * this is used while deleting old snapshots, and it drops the refs 4270 * this is used while deleting old snapshots, and it drops the refs
3357 * on a whole subtree starting from a level 1 node. 4271 * on a whole subtree starting from a level 1 node.
@@ -3645,32 +4559,36 @@ out:
3645 cond_resched(); 4559 cond_resched();
3646 return 0; 4560 return 0;
3647} 4561}
4562#endif
3648 4563
3649/* 4564/*
3650 * helper function for drop_subtree, this function is similar to 4565 * helper function for drop_subtree, this function is similar to
3651 * walk_down_tree. The main difference is that it checks reference 4566 * walk_down_tree. The main difference is that it checks reference
3652 * counts while tree blocks are locked. 4567 * counts while tree blocks are locked.
3653 */ 4568 */
3654static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, 4569static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
3655 struct btrfs_root *root, 4570 struct btrfs_root *root,
3656 struct btrfs_path *path, int *level) 4571 struct btrfs_path *path, int *level)
3657{ 4572{
3658 struct extent_buffer *next; 4573 struct extent_buffer *next;
3659 struct extent_buffer *cur; 4574 struct extent_buffer *cur;
3660 struct extent_buffer *parent; 4575 struct extent_buffer *parent;
3661 u64 bytenr; 4576 u64 bytenr;
3662 u64 ptr_gen; 4577 u64 ptr_gen;
4578 u64 refs;
4579 u64 flags;
3663 u32 blocksize; 4580 u32 blocksize;
3664 u32 refs;
3665 int ret; 4581 int ret;
3666 4582
3667 cur = path->nodes[*level]; 4583 cur = path->nodes[*level];
3668 ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, 4584 ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
3669 &refs); 4585 &refs, &flags);
3670 BUG_ON(ret); 4586 BUG_ON(ret);
3671 if (refs > 1) 4587 if (refs > 1)
3672 goto out; 4588 goto out;
3673 4589
4590 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4591
3674 while (*level >= 0) { 4592 while (*level >= 0) {
3675 cur = path->nodes[*level]; 4593 cur = path->nodes[*level];
3676 if (*level == 0) { 4594 if (*level == 0) {
@@ -3692,16 +4610,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3692 btrfs_tree_lock(next); 4610 btrfs_tree_lock(next);
3693 btrfs_set_lock_blocking(next); 4611 btrfs_set_lock_blocking(next);
3694 4612
3695 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 4613 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
3696 &refs); 4614 &refs, &flags);
3697 BUG_ON(ret); 4615 BUG_ON(ret);
3698 if (refs > 1) { 4616 if (refs > 1) {
3699 parent = path->nodes[*level]; 4617 parent = path->nodes[*level];
3700 ret = btrfs_free_extent(trans, root, bytenr, 4618 ret = btrfs_free_extent(trans, root, bytenr,
3701 blocksize, parent->start, 4619 blocksize, parent->start,
3702 btrfs_header_owner(parent), 4620 btrfs_header_owner(parent),
3703 btrfs_header_generation(parent), 4621 *level - 1, 0);
3704 *level - 1, 1);
3705 BUG_ON(ret); 4622 BUG_ON(ret);
3706 path->slots[*level]++; 4623 path->slots[*level]++;
3707 btrfs_tree_unlock(next); 4624 btrfs_tree_unlock(next);
@@ -3709,6 +4626,8 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3709 continue; 4626 continue;
3710 } 4627 }
3711 4628
4629 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4630
3712 *level = btrfs_header_level(next); 4631 *level = btrfs_header_level(next);
3713 path->nodes[*level] = next; 4632 path->nodes[*level] = next;
3714 path->slots[*level] = 0; 4633 path->slots[*level] = 0;
@@ -3716,13 +4635,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3716 cond_resched(); 4635 cond_resched();
3717 } 4636 }
3718out: 4637out:
3719 parent = path->nodes[*level + 1]; 4638 if (path->nodes[*level] == root->node)
4639 parent = path->nodes[*level];
4640 else
4641 parent = path->nodes[*level + 1];
3720 bytenr = path->nodes[*level]->start; 4642 bytenr = path->nodes[*level]->start;
3721 blocksize = path->nodes[*level]->len; 4643 blocksize = path->nodes[*level]->len;
3722 4644
3723 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 4645 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
3724 parent->start, btrfs_header_owner(parent), 4646 btrfs_header_owner(parent), *level, 0);
3725 btrfs_header_generation(parent), *level, 1);
3726 BUG_ON(ret); 4647 BUG_ON(ret);
3727 4648
3728 if (path->locks[*level]) { 4649 if (path->locks[*level]) {
@@ -3746,8 +4667,6 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3746 struct btrfs_path *path, 4667 struct btrfs_path *path,
3747 int *level, int max_level) 4668 int *level, int max_level)
3748{ 4669{
3749 u64 root_owner;
3750 u64 root_gen;
3751 struct btrfs_root_item *root_item = &root->root_item; 4670 struct btrfs_root_item *root_item = &root->root_item;
3752 int i; 4671 int i;
3753 int slot; 4672 int slot;
@@ -3755,24 +4674,22 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3755 4674
3756 for (i = *level; i < max_level && path->nodes[i]; i++) { 4675 for (i = *level; i < max_level && path->nodes[i]; i++) {
3757 slot = path->slots[i]; 4676 slot = path->slots[i];
3758 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 4677 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
3759 struct extent_buffer *node;
3760 struct btrfs_disk_key disk_key;
3761
3762 /* 4678 /*
3763 * there is more work to do in this level. 4679 * there is more work to do in this level.
3764 * Update the drop_progress marker to reflect 4680 * Update the drop_progress marker to reflect
3765 * the work we've done so far, and then bump 4681 * the work we've done so far, and then bump
3766 * the slot number 4682 * the slot number
3767 */ 4683 */
3768 node = path->nodes[i];
3769 path->slots[i]++; 4684 path->slots[i]++;
3770 *level = i;
3771 WARN_ON(*level == 0); 4685 WARN_ON(*level == 0);
3772 btrfs_node_key(node, &disk_key, path->slots[i]); 4686 if (max_level == BTRFS_MAX_LEVEL) {
3773 memcpy(&root_item->drop_progress, 4687 btrfs_node_key(path->nodes[i],
3774 &disk_key, sizeof(disk_key)); 4688 &root_item->drop_progress,
3775 root_item->drop_level = i; 4689 path->slots[i]);
4690 root_item->drop_level = i;
4691 }
4692 *level = i;
3776 return 0; 4693 return 0;
3777 } else { 4694 } else {
3778 struct extent_buffer *parent; 4695 struct extent_buffer *parent;
@@ -3786,22 +4703,20 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3786 else 4703 else
3787 parent = path->nodes[*level + 1]; 4704 parent = path->nodes[*level + 1];
3788 4705
3789 root_owner = btrfs_header_owner(parent); 4706 clean_tree_block(trans, root, path->nodes[i]);
3790 root_gen = btrfs_header_generation(parent);
3791
3792 clean_tree_block(trans, root, path->nodes[*level]);
3793 ret = btrfs_free_extent(trans, root, 4707 ret = btrfs_free_extent(trans, root,
3794 path->nodes[*level]->start, 4708 path->nodes[i]->start,
3795 path->nodes[*level]->len, 4709 path->nodes[i]->len,
3796 parent->start, root_owner, 4710 parent->start,
3797 root_gen, *level, 1); 4711 btrfs_header_owner(parent),
4712 *level, 0);
3798 BUG_ON(ret); 4713 BUG_ON(ret);
3799 if (path->locks[*level]) { 4714 if (path->locks[*level]) {
3800 btrfs_tree_unlock(path->nodes[*level]); 4715 btrfs_tree_unlock(path->nodes[i]);
3801 path->locks[*level] = 0; 4716 path->locks[i] = 0;
3802 } 4717 }
3803 free_extent_buffer(path->nodes[*level]); 4718 free_extent_buffer(path->nodes[i]);
3804 path->nodes[*level] = NULL; 4719 path->nodes[i] = NULL;
3805 *level = i + 1; 4720 *level = i + 1;
3806 } 4721 }
3807 } 4722 }
@@ -3820,21 +4735,18 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3820 int wret; 4735 int wret;
3821 int level; 4736 int level;
3822 struct btrfs_path *path; 4737 struct btrfs_path *path;
3823 int i;
3824 int orig_level;
3825 int update_count; 4738 int update_count;
3826 struct btrfs_root_item *root_item = &root->root_item; 4739 struct btrfs_root_item *root_item = &root->root_item;
3827 4740
3828 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3829 path = btrfs_alloc_path(); 4741 path = btrfs_alloc_path();
3830 BUG_ON(!path); 4742 BUG_ON(!path);
3831 4743
3832 level = btrfs_header_level(root->node); 4744 level = btrfs_header_level(root->node);
3833 orig_level = level;
3834 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4745 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3835 path->nodes[level] = root->node; 4746 path->nodes[level] = btrfs_lock_root_node(root);
3836 extent_buffer_get(root->node); 4747 btrfs_set_lock_blocking(path->nodes[level]);
3837 path->slots[level] = 0; 4748 path->slots[level] = 0;
4749 path->locks[level] = 1;
3838 } else { 4750 } else {
3839 struct btrfs_key key; 4751 struct btrfs_key key;
3840 struct btrfs_disk_key found_key; 4752 struct btrfs_disk_key found_key;
@@ -3856,12 +4768,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3856 * unlock our path, this is safe because only this 4768 * unlock our path, this is safe because only this
3857 * function is allowed to delete this snapshot 4769 * function is allowed to delete this snapshot
3858 */ 4770 */
3859 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4771 btrfs_unlock_up_safe(path, 0);
3860 if (path->nodes[i] && path->locks[i]) {
3861 path->locks[i] = 0;
3862 btrfs_tree_unlock(path->nodes[i]);
3863 }
3864 }
3865 } 4772 }
3866 while (1) { 4773 while (1) {
3867 unsigned long update; 4774 unsigned long update;
@@ -3882,8 +4789,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3882 ret = -EAGAIN; 4789 ret = -EAGAIN;
3883 break; 4790 break;
3884 } 4791 }
3885 atomic_inc(&root->fs_info->throttle_gen);
3886 wake_up(&root->fs_info->transaction_throttle);
3887 for (update_count = 0; update_count < 16; update_count++) { 4792 for (update_count = 0; update_count < 16; update_count++) {
3888 update = trans->delayed_ref_updates; 4793 update = trans->delayed_ref_updates;
3889 trans->delayed_ref_updates = 0; 4794 trans->delayed_ref_updates = 0;
@@ -3893,12 +4798,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3893 break; 4798 break;
3894 } 4799 }
3895 } 4800 }
3896 for (i = 0; i <= orig_level; i++) {
3897 if (path->nodes[i]) {
3898 free_extent_buffer(path->nodes[i]);
3899 path->nodes[i] = NULL;
3900 }
3901 }
3902out: 4801out:
3903 btrfs_free_path(path); 4802 btrfs_free_path(path);
3904 return ret; 4803 return ret;
@@ -3931,7 +4830,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3931 path->slots[level] = 0; 4830 path->slots[level] = 0;
3932 4831
3933 while (1) { 4832 while (1) {
3934 wret = walk_down_subtree(trans, root, path, &level); 4833 wret = walk_down_tree(trans, root, path, &level);
3935 if (wret < 0) 4834 if (wret < 0)
3936 ret = wret; 4835 ret = wret;
3937 if (wret != 0) 4836 if (wret != 0)
@@ -3948,6 +4847,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3948 return ret; 4847 return ret;
3949} 4848}
3950 4849
4850#if 0
3951static unsigned long calc_ra(unsigned long start, unsigned long last, 4851static unsigned long calc_ra(unsigned long start, unsigned long last,
3952 unsigned long nr) 4852 unsigned long nr)
3953{ 4853{
@@ -5429,6 +6329,7 @@ out:
5429 kfree(ref_path); 6329 kfree(ref_path);
5430 return ret; 6330 return ret;
5431} 6331}
6332#endif
5432 6333
5433static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6334static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
5434{ 6335{
@@ -5477,7 +6378,8 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root,
5477 u64 calc; 6378 u64 calc;
5478 6379
5479 spin_lock(&shrink_block_group->lock); 6380 spin_lock(&shrink_block_group->lock);
5480 if (btrfs_block_group_used(&shrink_block_group->item) > 0) { 6381 if (btrfs_block_group_used(&shrink_block_group->item) +
6382 shrink_block_group->reserved > 0) {
5481 spin_unlock(&shrink_block_group->lock); 6383 spin_unlock(&shrink_block_group->lock);
5482 6384
5483 trans = btrfs_start_transaction(root, 1); 6385 trans = btrfs_start_transaction(root, 1);
@@ -5502,6 +6404,17 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root,
5502 return 0; 6404 return 0;
5503} 6405}
5504 6406
6407
6408int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
6409 struct btrfs_block_group_cache *group)
6410
6411{
6412 __alloc_chunk_for_shrink(root, group, 1);
6413 set_block_group_readonly(group);
6414 return 0;
6415}
6416
6417#if 0
5505static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6418static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
5506 struct btrfs_root *root, 6419 struct btrfs_root *root,
5507 u64 objectid, u64 size) 6420 u64 objectid, u64 size)
@@ -5781,6 +6694,7 @@ out:
5781 btrfs_free_path(path); 6694 btrfs_free_path(path);
5782 return ret; 6695 return ret;
5783} 6696}
6697#endif
5784 6698
5785static int find_first_block_group(struct btrfs_root *root, 6699static int find_first_block_group(struct btrfs_root *root,
5786 struct btrfs_path *path, struct btrfs_key *key) 6700 struct btrfs_path *path, struct btrfs_key *key)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1d51dc38bb49..0726a734ee38 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -291,16 +291,12 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
291{ 291{
292 u64 extent_end = 0; 292 u64 extent_end = 0;
293 u64 search_start = start; 293 u64 search_start = start;
294 u64 leaf_start;
295 u64 ram_bytes = 0; 294 u64 ram_bytes = 0;
296 u64 orig_parent = 0;
297 u64 disk_bytenr = 0; 295 u64 disk_bytenr = 0;
298 u64 orig_locked_end = locked_end; 296 u64 orig_locked_end = locked_end;
299 u8 compression; 297 u8 compression;
300 u8 encryption; 298 u8 encryption;
301 u16 other_encoding = 0; 299 u16 other_encoding = 0;
302 u64 root_gen;
303 u64 root_owner;
304 struct extent_buffer *leaf; 300 struct extent_buffer *leaf;
305 struct btrfs_file_extent_item *extent; 301 struct btrfs_file_extent_item *extent;
306 struct btrfs_path *path; 302 struct btrfs_path *path;
@@ -340,9 +336,6 @@ next_slot:
340 bookend = 0; 336 bookend = 0;
341 found_extent = 0; 337 found_extent = 0;
342 found_inline = 0; 338 found_inline = 0;
343 leaf_start = 0;
344 root_gen = 0;
345 root_owner = 0;
346 compression = 0; 339 compression = 0;
347 encryption = 0; 340 encryption = 0;
348 extent = NULL; 341 extent = NULL;
@@ -417,9 +410,6 @@ next_slot:
417 if (found_extent) { 410 if (found_extent) {
418 read_extent_buffer(leaf, &old, (unsigned long)extent, 411 read_extent_buffer(leaf, &old, (unsigned long)extent,
419 sizeof(old)); 412 sizeof(old));
420 root_gen = btrfs_header_generation(leaf);
421 root_owner = btrfs_header_owner(leaf);
422 leaf_start = leaf->start;
423 } 413 }
424 414
425 if (end < extent_end && end >= key.offset) { 415 if (end < extent_end && end >= key.offset) {
@@ -443,14 +433,14 @@ next_slot:
443 } 433 }
444 locked_end = extent_end; 434 locked_end = extent_end;
445 } 435 }
446 orig_parent = path->nodes[0]->start;
447 disk_bytenr = le64_to_cpu(old.disk_bytenr); 436 disk_bytenr = le64_to_cpu(old.disk_bytenr);
448 if (disk_bytenr != 0) { 437 if (disk_bytenr != 0) {
449 ret = btrfs_inc_extent_ref(trans, root, 438 ret = btrfs_inc_extent_ref(trans, root,
450 disk_bytenr, 439 disk_bytenr,
451 le64_to_cpu(old.disk_num_bytes), 440 le64_to_cpu(old.disk_num_bytes), 0,
452 orig_parent, root->root_key.objectid, 441 root->root_key.objectid,
453 trans->transid, inode->i_ino); 442 key.objectid, key.offset -
443 le64_to_cpu(old.offset));
454 BUG_ON(ret); 444 BUG_ON(ret);
455 } 445 }
456 } 446 }
@@ -568,17 +558,6 @@ next_slot:
568 btrfs_mark_buffer_dirty(path->nodes[0]); 558 btrfs_mark_buffer_dirty(path->nodes[0]);
569 btrfs_set_lock_blocking(path->nodes[0]); 559 btrfs_set_lock_blocking(path->nodes[0]);
570 560
571 if (disk_bytenr != 0) {
572 ret = btrfs_update_extent_ref(trans, root,
573 disk_bytenr,
574 le64_to_cpu(old.disk_num_bytes),
575 orig_parent,
576 leaf->start,
577 root->root_key.objectid,
578 trans->transid, ins.objectid);
579
580 BUG_ON(ret);
581 }
582 path->leave_spinning = 0; 561 path->leave_spinning = 0;
583 btrfs_release_path(root, path); 562 btrfs_release_path(root, path);
584 if (disk_bytenr != 0) 563 if (disk_bytenr != 0)
@@ -594,8 +573,9 @@ next_slot:
594 ret = btrfs_free_extent(trans, root, 573 ret = btrfs_free_extent(trans, root,
595 old_disk_bytenr, 574 old_disk_bytenr,
596 le64_to_cpu(old.disk_num_bytes), 575 le64_to_cpu(old.disk_num_bytes),
597 leaf_start, root_owner, 576 0, root->root_key.objectid,
598 root_gen, key.objectid, 0); 577 key.objectid, key.offset -
578 le64_to_cpu(old.offset));
599 BUG_ON(ret); 579 BUG_ON(ret);
600 *hint_byte = old_disk_bytenr; 580 *hint_byte = old_disk_bytenr;
601 } 581 }
@@ -664,12 +644,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
664 u64 bytenr; 644 u64 bytenr;
665 u64 num_bytes; 645 u64 num_bytes;
666 u64 extent_end; 646 u64 extent_end;
667 u64 extent_offset; 647 u64 orig_offset;
668 u64 other_start; 648 u64 other_start;
669 u64 other_end; 649 u64 other_end;
670 u64 split = start; 650 u64 split = start;
671 u64 locked_end = end; 651 u64 locked_end = end;
672 u64 orig_parent;
673 int extent_type; 652 int extent_type;
674 int split_end = 1; 653 int split_end = 1;
675 int ret; 654 int ret;
@@ -703,7 +682,7 @@ again:
703 682
704 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 683 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
705 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 684 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
706 extent_offset = btrfs_file_extent_offset(leaf, fi); 685 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
707 686
708 if (key.offset == start) 687 if (key.offset == start)
709 split = end; 688 split = end;
@@ -711,8 +690,6 @@ again:
711 if (key.offset == start && extent_end == end) { 690 if (key.offset == start && extent_end == end) {
712 int del_nr = 0; 691 int del_nr = 0;
713 int del_slot = 0; 692 int del_slot = 0;
714 u64 leaf_owner = btrfs_header_owner(leaf);
715 u64 leaf_gen = btrfs_header_generation(leaf);
716 other_start = end; 693 other_start = end;
717 other_end = 0; 694 other_end = 0;
718 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 695 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
@@ -721,8 +698,8 @@ again:
721 del_slot = path->slots[0] + 1; 698 del_slot = path->slots[0] + 1;
722 del_nr++; 699 del_nr++;
723 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 700 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
724 leaf->start, leaf_owner, 701 0, root->root_key.objectid,
725 leaf_gen, inode->i_ino, 0); 702 inode->i_ino, orig_offset);
726 BUG_ON(ret); 703 BUG_ON(ret);
727 } 704 }
728 other_start = 0; 705 other_start = 0;
@@ -733,8 +710,8 @@ again:
733 del_slot = path->slots[0]; 710 del_slot = path->slots[0];
734 del_nr++; 711 del_nr++;
735 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 712 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
736 leaf->start, leaf_owner, 713 0, root->root_key.objectid,
737 leaf_gen, inode->i_ino, 0); 714 inode->i_ino, orig_offset);
738 BUG_ON(ret); 715 BUG_ON(ret);
739 } 716 }
740 split_end = 0; 717 split_end = 0;
@@ -768,13 +745,12 @@ again:
768 locked_end = extent_end; 745 locked_end = extent_end;
769 } 746 }
770 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset); 747 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
771 extent_offset += split - key.offset;
772 } else { 748 } else {
773 BUG_ON(key.offset != start); 749 BUG_ON(key.offset != start);
774 btrfs_set_file_extent_offset(leaf, fi, extent_offset +
775 split - key.offset);
776 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
777 key.offset = split; 750 key.offset = split;
751 btrfs_set_file_extent_offset(leaf, fi, key.offset -
752 orig_offset);
753 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
778 btrfs_set_item_key_safe(trans, root, path, &key); 754 btrfs_set_item_key_safe(trans, root, path, &key);
779 extent_end = split; 755 extent_end = split;
780 } 756 }
@@ -793,7 +769,8 @@ again:
793 struct btrfs_file_extent_item); 769 struct btrfs_file_extent_item);
794 key.offset = split; 770 key.offset = split;
795 btrfs_set_item_key_safe(trans, root, path, &key); 771 btrfs_set_item_key_safe(trans, root, path, &key);
796 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 772 btrfs_set_file_extent_offset(leaf, fi, key.offset -
773 orig_offset);
797 btrfs_set_file_extent_num_bytes(leaf, fi, 774 btrfs_set_file_extent_num_bytes(leaf, fi,
798 other_end - split); 775 other_end - split);
799 goto done; 776 goto done;
@@ -815,10 +792,9 @@ again:
815 792
816 btrfs_mark_buffer_dirty(leaf); 793 btrfs_mark_buffer_dirty(leaf);
817 794
818 orig_parent = leaf->start; 795 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
819 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 796 root->root_key.objectid,
820 orig_parent, root->root_key.objectid, 797 inode->i_ino, orig_offset);
821 trans->transid, inode->i_ino);
822 BUG_ON(ret); 798 BUG_ON(ret);
823 btrfs_release_path(root, path); 799 btrfs_release_path(root, path);
824 800
@@ -833,20 +809,12 @@ again:
833 btrfs_set_file_extent_type(leaf, fi, extent_type); 809 btrfs_set_file_extent_type(leaf, fi, extent_type);
834 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); 810 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
835 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); 811 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
836 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 812 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
837 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); 813 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
838 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); 814 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
839 btrfs_set_file_extent_compression(leaf, fi, 0); 815 btrfs_set_file_extent_compression(leaf, fi, 0);
840 btrfs_set_file_extent_encryption(leaf, fi, 0); 816 btrfs_set_file_extent_encryption(leaf, fi, 0);
841 btrfs_set_file_extent_other_encoding(leaf, fi, 0); 817 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
842
843 if (orig_parent != leaf->start) {
844 ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
845 orig_parent, leaf->start,
846 root->root_key.objectid,
847 trans->transid, inode->i_ino);
848 BUG_ON(ret);
849 }
850done: 818done:
851 btrfs_mark_buffer_dirty(leaf); 819 btrfs_mark_buffer_dirty(leaf);
852 820
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1c8b0190d031..917bf10597c6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -48,7 +48,6 @@
48#include "ordered-data.h" 48#include "ordered-data.h"
49#include "xattr.h" 49#include "xattr.h"
50#include "tree-log.h" 50#include "tree-log.h"
51#include "ref-cache.h"
52#include "compression.h" 51#include "compression.h"
53#include "locking.h" 52#include "locking.h"
54 53
@@ -944,6 +943,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
944 u64 cow_start; 943 u64 cow_start;
945 u64 cur_offset; 944 u64 cur_offset;
946 u64 extent_end; 945 u64 extent_end;
946 u64 extent_offset;
947 u64 disk_bytenr; 947 u64 disk_bytenr;
948 u64 num_bytes; 948 u64 num_bytes;
949 int extent_type; 949 int extent_type;
@@ -1005,6 +1005,7 @@ next_slot:
1005 if (extent_type == BTRFS_FILE_EXTENT_REG || 1005 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1006 extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 1006 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1007 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1007 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1008 extent_offset = btrfs_file_extent_offset(leaf, fi);
1008 extent_end = found_key.offset + 1009 extent_end = found_key.offset +
1009 btrfs_file_extent_num_bytes(leaf, fi); 1010 btrfs_file_extent_num_bytes(leaf, fi);
1010 if (extent_end <= start) { 1011 if (extent_end <= start) {
@@ -1022,9 +1023,10 @@ next_slot:
1022 if (btrfs_extent_readonly(root, disk_bytenr)) 1023 if (btrfs_extent_readonly(root, disk_bytenr))
1023 goto out_check; 1024 goto out_check;
1024 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 1025 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
1025 disk_bytenr)) 1026 found_key.offset -
1027 extent_offset, disk_bytenr))
1026 goto out_check; 1028 goto out_check;
1027 disk_bytenr += btrfs_file_extent_offset(leaf, fi); 1029 disk_bytenr += extent_offset;
1028 disk_bytenr += cur_offset - found_key.offset; 1030 disk_bytenr += cur_offset - found_key.offset;
1029 num_bytes = min(end + 1, extent_end) - cur_offset; 1031 num_bytes = min(end + 1, extent_end) - cur_offset;
1030 /* 1032 /*
@@ -1489,9 +1491,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1489 ins.objectid = disk_bytenr; 1491 ins.objectid = disk_bytenr;
1490 ins.offset = disk_num_bytes; 1492 ins.offset = disk_num_bytes;
1491 ins.type = BTRFS_EXTENT_ITEM_KEY; 1493 ins.type = BTRFS_EXTENT_ITEM_KEY;
1492 ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, 1494 ret = btrfs_alloc_reserved_file_extent(trans, root,
1493 root->root_key.objectid, 1495 root->root_key.objectid,
1494 trans->transid, inode->i_ino, &ins); 1496 inode->i_ino, file_pos, &ins);
1495 BUG_ON(ret); 1497 BUG_ON(ret);
1496 btrfs_free_path(path); 1498 btrfs_free_path(path);
1497 1499
@@ -1956,23 +1958,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
1956 * crossing root thing. we store the inode number in the 1958 * crossing root thing. we store the inode number in the
1957 * offset of the orphan item. 1959 * offset of the orphan item.
1958 */ 1960 */
1959 inode = btrfs_iget_locked(root->fs_info->sb, 1961 found_key.objectid = found_key.offset;
1960 found_key.offset, root); 1962 found_key.type = BTRFS_INODE_ITEM_KEY;
1961 if (!inode) 1963 found_key.offset = 0;
1964 inode = btrfs_iget(root->fs_info->sb, &found_key, root);
1965 if (IS_ERR(inode))
1962 break; 1966 break;
1963 1967
1964 if (inode->i_state & I_NEW) {
1965 BTRFS_I(inode)->root = root;
1966
1967 /* have to set the location manually */
1968 BTRFS_I(inode)->location.objectid = inode->i_ino;
1969 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
1970 BTRFS_I(inode)->location.offset = 0;
1971
1972 btrfs_read_locked_inode(inode);
1973 unlock_new_inode(inode);
1974 }
1975
1976 /* 1968 /*
1977 * add this inode to the orphan list so btrfs_orphan_del does 1969 * add this inode to the orphan list so btrfs_orphan_del does
1978 * the proper thing when we hit it 1970 * the proper thing when we hit it
@@ -2069,7 +2061,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
2069/* 2061/*
2070 * read an inode from the btree into the in-memory inode 2062 * read an inode from the btree into the in-memory inode
2071 */ 2063 */
2072void btrfs_read_locked_inode(struct inode *inode) 2064static void btrfs_read_locked_inode(struct inode *inode)
2073{ 2065{
2074 struct btrfs_path *path; 2066 struct btrfs_path *path;
2075 struct extent_buffer *leaf; 2067 struct extent_buffer *leaf;
@@ -2599,9 +2591,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2599 struct btrfs_file_extent_item *fi; 2591 struct btrfs_file_extent_item *fi;
2600 u64 extent_start = 0; 2592 u64 extent_start = 0;
2601 u64 extent_num_bytes = 0; 2593 u64 extent_num_bytes = 0;
2594 u64 extent_offset = 0;
2602 u64 item_end = 0; 2595 u64 item_end = 0;
2603 u64 root_gen = 0;
2604 u64 root_owner = 0;
2605 int found_extent; 2596 int found_extent;
2606 int del_item; 2597 int del_item;
2607 int pending_del_nr = 0; 2598 int pending_del_nr = 0;
@@ -2716,6 +2707,9 @@ search_again:
2716 extent_num_bytes = 2707 extent_num_bytes =
2717 btrfs_file_extent_disk_num_bytes(leaf, 2708 btrfs_file_extent_disk_num_bytes(leaf,
2718 fi); 2709 fi);
2710 extent_offset = found_key.offset -
2711 btrfs_file_extent_offset(leaf, fi);
2712
2719 /* FIXME blocksize != 4096 */ 2713 /* FIXME blocksize != 4096 */
2720 num_dec = btrfs_file_extent_num_bytes(leaf, fi); 2714 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
2721 if (extent_start != 0) { 2715 if (extent_start != 0) {
@@ -2723,8 +2717,6 @@ search_again:
2723 if (root->ref_cows) 2717 if (root->ref_cows)
2724 inode_sub_bytes(inode, num_dec); 2718 inode_sub_bytes(inode, num_dec);
2725 } 2719 }
2726 root_gen = btrfs_header_generation(leaf);
2727 root_owner = btrfs_header_owner(leaf);
2728 } 2720 }
2729 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 2721 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
2730 /* 2722 /*
@@ -2768,12 +2760,12 @@ delete:
2768 } else { 2760 } else {
2769 break; 2761 break;
2770 } 2762 }
2771 if (found_extent) { 2763 if (found_extent && root->ref_cows) {
2772 btrfs_set_path_blocking(path); 2764 btrfs_set_path_blocking(path);
2773 ret = btrfs_free_extent(trans, root, extent_start, 2765 ret = btrfs_free_extent(trans, root, extent_start,
2774 extent_num_bytes, 2766 extent_num_bytes, 0,
2775 leaf->start, root_owner, 2767 btrfs_header_owner(leaf),
2776 root_gen, inode->i_ino, 0); 2768 inode->i_ino, extent_offset);
2777 BUG_ON(ret); 2769 BUG_ON(ret);
2778 } 2770 }
2779next: 2771next:
@@ -3105,6 +3097,45 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3105 return 0; 3097 return 0;
3106} 3098}
3107 3099
3100static void inode_tree_add(struct inode *inode)
3101{
3102 struct btrfs_root *root = BTRFS_I(inode)->root;
3103 struct btrfs_inode *entry;
3104 struct rb_node **p = &root->inode_tree.rb_node;
3105 struct rb_node *parent = NULL;
3106
3107 spin_lock(&root->inode_lock);
3108 while (*p) {
3109 parent = *p;
3110 entry = rb_entry(parent, struct btrfs_inode, rb_node);
3111
3112 if (inode->i_ino < entry->vfs_inode.i_ino)
3113 p = &(*p)->rb_left;
3114 else if (inode->i_ino > entry->vfs_inode.i_ino)
3115 p = &(*p)->rb_right;
3116 else {
3117 WARN_ON(!(entry->vfs_inode.i_state &
3118 (I_WILL_FREE | I_FREEING | I_CLEAR)));
3119 break;
3120 }
3121 }
3122 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
3123 rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree);
3124 spin_unlock(&root->inode_lock);
3125}
3126
3127static void inode_tree_del(struct inode *inode)
3128{
3129 struct btrfs_root *root = BTRFS_I(inode)->root;
3130
3131 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
3132 spin_lock(&root->inode_lock);
3133 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
3134 spin_unlock(&root->inode_lock);
3135 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3136 }
3137}
3138
3108static noinline void init_btrfs_i(struct inode *inode) 3139static noinline void init_btrfs_i(struct inode *inode)
3109{ 3140{
3110 struct btrfs_inode *bi = BTRFS_I(inode); 3141 struct btrfs_inode *bi = BTRFS_I(inode);
@@ -3130,6 +3161,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3130 inode->i_mapping, GFP_NOFS); 3161 inode->i_mapping, GFP_NOFS);
3131 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); 3162 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3132 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3163 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3164 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3133 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3165 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3134 mutex_init(&BTRFS_I(inode)->extent_mutex); 3166 mutex_init(&BTRFS_I(inode)->extent_mutex);
3135 mutex_init(&BTRFS_I(inode)->log_mutex); 3167 mutex_init(&BTRFS_I(inode)->log_mutex);
@@ -3152,26 +3184,9 @@ static int btrfs_find_actor(struct inode *inode, void *opaque)
3152 args->root == BTRFS_I(inode)->root; 3184 args->root == BTRFS_I(inode)->root;
3153} 3185}
3154 3186
3155struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, 3187static struct inode *btrfs_iget_locked(struct super_block *s,
3156 struct btrfs_root *root, int wait) 3188 u64 objectid,
3157{ 3189 struct btrfs_root *root)
3158 struct inode *inode;
3159 struct btrfs_iget_args args;
3160 args.ino = objectid;
3161 args.root = root;
3162
3163 if (wait) {
3164 inode = ilookup5(s, objectid, btrfs_find_actor,
3165 (void *)&args);
3166 } else {
3167 inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
3168 (void *)&args);
3169 }
3170 return inode;
3171}
3172
3173struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
3174 struct btrfs_root *root)
3175{ 3190{
3176 struct inode *inode; 3191 struct inode *inode;
3177 struct btrfs_iget_args args; 3192 struct btrfs_iget_args args;
@@ -3188,24 +3203,21 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
3188 * Returns in *is_new if the inode was read from disk 3203 * Returns in *is_new if the inode was read from disk
3189 */ 3204 */
3190struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3205struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3191 struct btrfs_root *root, int *is_new) 3206 struct btrfs_root *root)
3192{ 3207{
3193 struct inode *inode; 3208 struct inode *inode;
3194 3209
3195 inode = btrfs_iget_locked(s, location->objectid, root); 3210 inode = btrfs_iget_locked(s, location->objectid, root);
3196 if (!inode) 3211 if (!inode)
3197 return ERR_PTR(-EACCES); 3212 return ERR_PTR(-ENOMEM);
3198 3213
3199 if (inode->i_state & I_NEW) { 3214 if (inode->i_state & I_NEW) {
3200 BTRFS_I(inode)->root = root; 3215 BTRFS_I(inode)->root = root;
3201 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); 3216 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
3202 btrfs_read_locked_inode(inode); 3217 btrfs_read_locked_inode(inode);
3218
3219 inode_tree_add(inode);
3203 unlock_new_inode(inode); 3220 unlock_new_inode(inode);
3204 if (is_new)
3205 *is_new = 1;
3206 } else {
3207 if (is_new)
3208 *is_new = 0;
3209 } 3221 }
3210 3222
3211 return inode; 3223 return inode;
@@ -3218,7 +3230,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3218 struct btrfs_root *root = bi->root; 3230 struct btrfs_root *root = bi->root;
3219 struct btrfs_root *sub_root = root; 3231 struct btrfs_root *sub_root = root;
3220 struct btrfs_key location; 3232 struct btrfs_key location;
3221 int ret, new; 3233 int ret;
3222 3234
3223 if (dentry->d_name.len > BTRFS_NAME_LEN) 3235 if (dentry->d_name.len > BTRFS_NAME_LEN)
3224 return ERR_PTR(-ENAMETOOLONG); 3236 return ERR_PTR(-ENAMETOOLONG);
@@ -3236,7 +3248,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3236 return ERR_PTR(ret); 3248 return ERR_PTR(ret);
3237 if (ret > 0) 3249 if (ret > 0)
3238 return ERR_PTR(-ENOENT); 3250 return ERR_PTR(-ENOENT);
3239 inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); 3251 inode = btrfs_iget(dir->i_sb, &location, sub_root);
3240 if (IS_ERR(inode)) 3252 if (IS_ERR(inode))
3241 return ERR_CAST(inode); 3253 return ERR_CAST(inode);
3242 } 3254 }
@@ -3631,6 +3643,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3631 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); 3643 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
3632 3644
3633 insert_inode_hash(inode); 3645 insert_inode_hash(inode);
3646 inode_tree_add(inode);
3634 return inode; 3647 return inode;
3635fail: 3648fail:
3636 if (dir) 3649 if (dir)
@@ -4683,6 +4696,7 @@ void btrfs_destroy_inode(struct inode *inode)
4683 btrfs_put_ordered_extent(ordered); 4696 btrfs_put_ordered_extent(ordered);
4684 } 4697 }
4685 } 4698 }
4699 inode_tree_del(inode);
4686 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 4700 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
4687 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 4701 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
4688} 4702}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2624b53ea783..54dfd45cc591 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -82,22 +82,25 @@ static noinline int create_subvol(struct btrfs_root *root,
82 if (ret) 82 if (ret)
83 goto fail; 83 goto fail;
84 84
85 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 85 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
86 objectid, trans->transid, 0, 0, 0); 86 0, objectid, NULL, 0, 0, 0);
87 if (IS_ERR(leaf)) { 87 if (IS_ERR(leaf)) {
88 ret = PTR_ERR(leaf); 88 ret = PTR_ERR(leaf);
89 goto fail; 89 goto fail;
90 } 90 }
91 91
92 btrfs_set_header_nritems(leaf, 0); 92 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
93 btrfs_set_header_level(leaf, 0);
94 btrfs_set_header_bytenr(leaf, leaf->start); 93 btrfs_set_header_bytenr(leaf, leaf->start);
95 btrfs_set_header_generation(leaf, trans->transid); 94 btrfs_set_header_generation(leaf, trans->transid);
95 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
96 btrfs_set_header_owner(leaf, objectid); 96 btrfs_set_header_owner(leaf, objectid);
97 97
98 write_extent_buffer(leaf, root->fs_info->fsid, 98 write_extent_buffer(leaf, root->fs_info->fsid,
99 (unsigned long)btrfs_header_fsid(leaf), 99 (unsigned long)btrfs_header_fsid(leaf),
100 BTRFS_FSID_SIZE); 100 BTRFS_FSID_SIZE);
101 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
102 (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
103 BTRFS_UUID_SIZE);
101 btrfs_mark_buffer_dirty(leaf); 104 btrfs_mark_buffer_dirty(leaf);
102 105
103 inode_item = &root_item.inode; 106 inode_item = &root_item.inode;
@@ -125,7 +128,7 @@ static noinline int create_subvol(struct btrfs_root *root,
125 btrfs_set_root_dirid(&root_item, new_dirid); 128 btrfs_set_root_dirid(&root_item, new_dirid);
126 129
127 key.objectid = objectid; 130 key.objectid = objectid;
128 key.offset = 1; 131 key.offset = 0;
129 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 132 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
130 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 133 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
131 &root_item); 134 &root_item);
@@ -911,10 +914,10 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
911 if (disko) { 914 if (disko) {
912 inode_add_bytes(inode, datal); 915 inode_add_bytes(inode, datal);
913 ret = btrfs_inc_extent_ref(trans, root, 916 ret = btrfs_inc_extent_ref(trans, root,
914 disko, diskl, leaf->start, 917 disko, diskl, 0,
915 root->root_key.objectid, 918 root->root_key.objectid,
916 trans->transid, 919 inode->i_ino,
917 inode->i_ino); 920 new_key.offset - datao);
918 BUG_ON(ret); 921 BUG_ON(ret);
919 } 922 }
920 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 923 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 5f8f218c1005..6d6523da0a30 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -45,22 +45,132 @@ static void print_dev_item(struct extent_buffer *eb,
45 (unsigned long long)btrfs_device_total_bytes(eb, dev_item), 45 (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
46 (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); 46 (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
47} 47}
48static void print_extent_data_ref(struct extent_buffer *eb,
49 struct btrfs_extent_data_ref *ref)
50{
51 printk(KERN_INFO "\t\textent data backref root %llu "
52 "objectid %llu offset %llu count %u\n",
53 (unsigned long long)btrfs_extent_data_ref_root(eb, ref),
54 (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref),
55 (unsigned long long)btrfs_extent_data_ref_offset(eb, ref),
56 btrfs_extent_data_ref_count(eb, ref));
57}
58
59static void print_extent_item(struct extent_buffer *eb, int slot)
60{
61 struct btrfs_extent_item *ei;
62 struct btrfs_extent_inline_ref *iref;
63 struct btrfs_extent_data_ref *dref;
64 struct btrfs_shared_data_ref *sref;
65 struct btrfs_disk_key key;
66 unsigned long end;
67 unsigned long ptr;
68 int type;
69 u32 item_size = btrfs_item_size_nr(eb, slot);
70 u64 flags;
71 u64 offset;
72
73 if (item_size < sizeof(*ei)) {
74#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
75 struct btrfs_extent_item_v0 *ei0;
76 BUG_ON(item_size != sizeof(*ei0));
77 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
78 printk(KERN_INFO "\t\textent refs %u\n",
79 btrfs_extent_refs_v0(eb, ei0));
80 return;
81#else
82 BUG();
83#endif
84 }
85
86 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
87 flags = btrfs_extent_flags(eb, ei);
88
89 printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
90 (unsigned long long)btrfs_extent_refs(eb, ei),
91 (unsigned long long)btrfs_extent_generation(eb, ei),
92 (unsigned long long)flags);
93
94 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
95 struct btrfs_tree_block_info *info;
96 info = (struct btrfs_tree_block_info *)(ei + 1);
97 btrfs_tree_block_key(eb, info, &key);
98 printk(KERN_INFO "\t\ttree block key (%llu %x %llu) "
99 "level %d\n",
100 (unsigned long long)btrfs_disk_key_objectid(&key),
101 key.type,
102 (unsigned long long)btrfs_disk_key_offset(&key),
103 btrfs_tree_block_level(eb, info));
104 iref = (struct btrfs_extent_inline_ref *)(info + 1);
105 } else {
106 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
107 }
108
109 ptr = (unsigned long)iref;
110 end = (unsigned long)ei + item_size;
111 while (ptr < end) {
112 iref = (struct btrfs_extent_inline_ref *)ptr;
113 type = btrfs_extent_inline_ref_type(eb, iref);
114 offset = btrfs_extent_inline_ref_offset(eb, iref);
115 switch (type) {
116 case BTRFS_TREE_BLOCK_REF_KEY:
117 printk(KERN_INFO "\t\ttree block backref "
118 "root %llu\n", (unsigned long long)offset);
119 break;
120 case BTRFS_SHARED_BLOCK_REF_KEY:
121 printk(KERN_INFO "\t\tshared block backref "
122 "parent %llu\n", (unsigned long long)offset);
123 break;
124 case BTRFS_EXTENT_DATA_REF_KEY:
125 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
126 print_extent_data_ref(eb, dref);
127 break;
128 case BTRFS_SHARED_DATA_REF_KEY:
129 sref = (struct btrfs_shared_data_ref *)(iref + 1);
130 printk(KERN_INFO "\t\tshared data backref "
131 "parent %llu count %u\n",
132 (unsigned long long)offset,
133 btrfs_shared_data_ref_count(eb, sref));
134 break;
135 default:
136 BUG();
137 }
138 ptr += btrfs_extent_inline_ref_size(type);
139 }
140 WARN_ON(ptr > end);
141}
142
143#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
144static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
145{
146 struct btrfs_extent_ref_v0 *ref0;
147
148 ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
149 printk("\t\textent back ref root %llu gen %llu "
150 "owner %llu num_refs %lu\n",
151 (unsigned long long)btrfs_ref_root_v0(eb, ref0),
152 (unsigned long long)btrfs_ref_generation_v0(eb, ref0),
153 (unsigned long long)btrfs_ref_objectid_v0(eb, ref0),
154 (unsigned long)btrfs_ref_count_v0(eb, ref0));
155}
156#endif
157
48void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) 158void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
49{ 159{
50 int i; 160 int i;
161 u32 type;
51 u32 nr = btrfs_header_nritems(l); 162 u32 nr = btrfs_header_nritems(l);
52 struct btrfs_item *item; 163 struct btrfs_item *item;
53 struct btrfs_extent_item *ei;
54 struct btrfs_root_item *ri; 164 struct btrfs_root_item *ri;
55 struct btrfs_dir_item *di; 165 struct btrfs_dir_item *di;
56 struct btrfs_inode_item *ii; 166 struct btrfs_inode_item *ii;
57 struct btrfs_block_group_item *bi; 167 struct btrfs_block_group_item *bi;
58 struct btrfs_file_extent_item *fi; 168 struct btrfs_file_extent_item *fi;
169 struct btrfs_extent_data_ref *dref;
170 struct btrfs_shared_data_ref *sref;
171 struct btrfs_dev_extent *dev_extent;
59 struct btrfs_key key; 172 struct btrfs_key key;
60 struct btrfs_key found_key; 173 struct btrfs_key found_key;
61 struct btrfs_extent_ref *ref;
62 struct btrfs_dev_extent *dev_extent;
63 u32 type;
64 174
65 printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", 175 printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
66 (unsigned long long)btrfs_header_bytenr(l), nr, 176 (unsigned long long)btrfs_header_bytenr(l), nr,
@@ -100,20 +210,25 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
100 btrfs_disk_root_refs(l, ri)); 210 btrfs_disk_root_refs(l, ri));
101 break; 211 break;
102 case BTRFS_EXTENT_ITEM_KEY: 212 case BTRFS_EXTENT_ITEM_KEY:
103 ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); 213 print_extent_item(l, i);
104 printk(KERN_INFO "\t\textent data refs %u\n", 214 break;
105 btrfs_extent_refs(l, ei)); 215 case BTRFS_TREE_BLOCK_REF_KEY:
106 break; 216 printk(KERN_INFO "\t\ttree block backref\n");
107 case BTRFS_EXTENT_REF_KEY: 217 break;
108 ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); 218 case BTRFS_SHARED_BLOCK_REF_KEY:
109 printk(KERN_INFO "\t\textent back ref root %llu " 219 printk(KERN_INFO "\t\tshared block backref\n");
110 "gen %llu owner %llu num_refs %lu\n", 220 break;
111 (unsigned long long)btrfs_ref_root(l, ref), 221 case BTRFS_EXTENT_DATA_REF_KEY:
112 (unsigned long long)btrfs_ref_generation(l, ref), 222 dref = btrfs_item_ptr(l, i,
113 (unsigned long long)btrfs_ref_objectid(l, ref), 223 struct btrfs_extent_data_ref);
114 (unsigned long)btrfs_ref_num_refs(l, ref)); 224 print_extent_data_ref(l, dref);
225 break;
226 case BTRFS_SHARED_DATA_REF_KEY:
227 sref = btrfs_item_ptr(l, i,
228 struct btrfs_shared_data_ref);
229 printk(KERN_INFO "\t\tshared data backref count %u\n",
230 btrfs_shared_data_ref_count(l, sref));
115 break; 231 break;
116
117 case BTRFS_EXTENT_DATA_KEY: 232 case BTRFS_EXTENT_DATA_KEY:
118 fi = btrfs_item_ptr(l, i, 233 fi = btrfs_item_ptr(l, i,
119 struct btrfs_file_extent_item); 234 struct btrfs_file_extent_item);
@@ -139,6 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
139 (unsigned long long) 254 (unsigned long long)
140 btrfs_file_extent_ram_bytes(l, fi)); 255 btrfs_file_extent_ram_bytes(l, fi));
141 break; 256 break;
257 case BTRFS_EXTENT_REF_V0_KEY:
258#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
259 print_extent_ref_v0(l, i);
260#else
261 BUG();
262#endif
142 case BTRFS_BLOCK_GROUP_ITEM_KEY: 263 case BTRFS_BLOCK_GROUP_ITEM_KEY:
143 bi = btrfs_item_ptr(l, i, 264 bi = btrfs_item_ptr(l, i,
144 struct btrfs_block_group_item); 265 struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
new file mode 100644
index 000000000000..b23dc209ae10
--- /dev/null
+++ b/fs/btrfs/relocation.c
@@ -0,0 +1,3711 @@
1/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include "ctree.h"
25#include "disk-io.h"
26#include "transaction.h"
27#include "volumes.h"
28#include "locking.h"
29#include "btrfs_inode.h"
30#include "async-thread.h"
31
32/*
33 * backref_node, mapping_node and tree_block start with this
34 */
35struct tree_entry {
36 struct rb_node rb_node;
37 u64 bytenr;
38};
39
40/*
41 * present a tree block in the backref cache
42 */
43struct backref_node {
44 struct rb_node rb_node;
45 u64 bytenr;
46 /* objectid tree block owner */
47 u64 owner;
48 /* list of upper level blocks reference this block */
49 struct list_head upper;
50 /* list of child blocks in the cache */
51 struct list_head lower;
52 /* NULL if this node is not tree root */
53 struct btrfs_root *root;
54 /* extent buffer got by COW the block */
55 struct extent_buffer *eb;
56 /* level of tree block */
57 unsigned int level:8;
58 /* 1 if the block is root of old snapshot */
59 unsigned int old_root:1;
60 /* 1 if no child blocks in the cache */
61 unsigned int lowest:1;
62 /* is the extent buffer locked */
63 unsigned int locked:1;
64 /* has the block been processed */
65 unsigned int processed:1;
66 /* have backrefs of this block been checked */
67 unsigned int checked:1;
68};
69
70/*
71 * present a block pointer in the backref cache
72 */
73struct backref_edge {
74 struct list_head list[2];
75 struct backref_node *node[2];
76 u64 blockptr;
77};
78
79#define LOWER 0
80#define UPPER 1
81
82struct backref_cache {
83 /* red black tree of all backref nodes in the cache */
84 struct rb_root rb_root;
85 /* list of backref nodes with no child block in the cache */
86 struct list_head pending[BTRFS_MAX_LEVEL];
87 spinlock_t lock;
88};
89
90/*
91 * map address of tree root to tree
92 */
93struct mapping_node {
94 struct rb_node rb_node;
95 u64 bytenr;
96 void *data;
97};
98
99struct mapping_tree {
100 struct rb_root rb_root;
101 spinlock_t lock;
102};
103
104/*
105 * present a tree block to process
106 */
107struct tree_block {
108 struct rb_node rb_node;
109 u64 bytenr;
110 struct btrfs_key key;
111 unsigned int level:8;
112 unsigned int key_ready:1;
113};
114
115/* inode vector */
116#define INODEVEC_SIZE 16
117
118struct inodevec {
119 struct list_head list;
120 struct inode *inode[INODEVEC_SIZE];
121 int nr;
122};
123
124struct reloc_control {
125 /* block group to relocate */
126 struct btrfs_block_group_cache *block_group;
127 /* extent tree */
128 struct btrfs_root *extent_root;
129 /* inode for moving data */
130 struct inode *data_inode;
131 struct btrfs_workers workers;
132 /* tree blocks have been processed */
133 struct extent_io_tree processed_blocks;
134 /* map start of tree root to corresponding reloc tree */
135 struct mapping_tree reloc_root_tree;
136 /* list of reloc trees */
137 struct list_head reloc_roots;
138 u64 search_start;
139 u64 extents_found;
140 u64 extents_skipped;
141 int stage;
142 int create_reloc_root;
143 unsigned int found_file_extent:1;
144 unsigned int found_old_snapshot:1;
145};
146
147/* stages of data relocation */
148#define MOVE_DATA_EXTENTS 0
149#define UPDATE_DATA_PTRS 1
150
151/*
152 * merge reloc tree to corresponding fs tree in worker threads
153 */
154struct async_merge {
155 struct btrfs_work work;
156 struct reloc_control *rc;
157 struct btrfs_root *root;
158 struct completion *done;
159 atomic_t *num_pending;
160};
161
162static void mapping_tree_init(struct mapping_tree *tree)
163{
164 tree->rb_root.rb_node = NULL;
165 spin_lock_init(&tree->lock);
166}
167
168static void backref_cache_init(struct backref_cache *cache)
169{
170 int i;
171 cache->rb_root.rb_node = NULL;
172 for (i = 0; i < BTRFS_MAX_LEVEL; i++)
173 INIT_LIST_HEAD(&cache->pending[i]);
174 spin_lock_init(&cache->lock);
175}
176
177static void backref_node_init(struct backref_node *node)
178{
179 memset(node, 0, sizeof(*node));
180 INIT_LIST_HEAD(&node->upper);
181 INIT_LIST_HEAD(&node->lower);
182 RB_CLEAR_NODE(&node->rb_node);
183}
184
185static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
186 struct rb_node *node)
187{
188 struct rb_node **p = &root->rb_node;
189 struct rb_node *parent = NULL;
190 struct tree_entry *entry;
191
192 while (*p) {
193 parent = *p;
194 entry = rb_entry(parent, struct tree_entry, rb_node);
195
196 if (bytenr < entry->bytenr)
197 p = &(*p)->rb_left;
198 else if (bytenr > entry->bytenr)
199 p = &(*p)->rb_right;
200 else
201 return parent;
202 }
203
204 rb_link_node(node, parent, p);
205 rb_insert_color(node, root);
206 return NULL;
207}
208
209static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
210{
211 struct rb_node *n = root->rb_node;
212 struct tree_entry *entry;
213
214 while (n) {
215 entry = rb_entry(n, struct tree_entry, rb_node);
216
217 if (bytenr < entry->bytenr)
218 n = n->rb_left;
219 else if (bytenr > entry->bytenr)
220 n = n->rb_right;
221 else
222 return n;
223 }
224 return NULL;
225}
226
227/*
228 * walk up backref nodes until reach node presents tree root
229 */
230static struct backref_node *walk_up_backref(struct backref_node *node,
231 struct backref_edge *edges[],
232 int *index)
233{
234 struct backref_edge *edge;
235 int idx = *index;
236
237 while (!list_empty(&node->upper)) {
238 edge = list_entry(node->upper.next,
239 struct backref_edge, list[LOWER]);
240 edges[idx++] = edge;
241 node = edge->node[UPPER];
242 }
243 *index = idx;
244 return node;
245}
246
247/*
248 * walk down backref nodes to find start of next reference path
249 */
250static struct backref_node *walk_down_backref(struct backref_edge *edges[],
251 int *index)
252{
253 struct backref_edge *edge;
254 struct backref_node *lower;
255 int idx = *index;
256
257 while (idx > 0) {
258 edge = edges[idx - 1];
259 lower = edge->node[LOWER];
260 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
261 idx--;
262 continue;
263 }
264 edge = list_entry(edge->list[LOWER].next,
265 struct backref_edge, list[LOWER]);
266 edges[idx - 1] = edge;
267 *index = idx;
268 return edge->node[UPPER];
269 }
270 *index = 0;
271 return NULL;
272}
273
274static void drop_node_buffer(struct backref_node *node)
275{
276 if (node->eb) {
277 if (node->locked) {
278 btrfs_tree_unlock(node->eb);
279 node->locked = 0;
280 }
281 free_extent_buffer(node->eb);
282 node->eb = NULL;
283 }
284}
285
286static void drop_backref_node(struct backref_cache *tree,
287 struct backref_node *node)
288{
289 BUG_ON(!node->lowest);
290 BUG_ON(!list_empty(&node->upper));
291
292 drop_node_buffer(node);
293 list_del(&node->lower);
294
295 rb_erase(&node->rb_node, &tree->rb_root);
296 kfree(node);
297}
298
299/*
300 * remove a backref node from the backref cache
301 */
302static void remove_backref_node(struct backref_cache *cache,
303 struct backref_node *node)
304{
305 struct backref_node *upper;
306 struct backref_edge *edge;
307
308 if (!node)
309 return;
310
311 BUG_ON(!node->lowest);
312 while (!list_empty(&node->upper)) {
313 edge = list_entry(node->upper.next, struct backref_edge,
314 list[LOWER]);
315 upper = edge->node[UPPER];
316 list_del(&edge->list[LOWER]);
317 list_del(&edge->list[UPPER]);
318 kfree(edge);
319 /*
320 * add the node to pending list if no other
321 * child block cached.
322 */
323 if (list_empty(&upper->lower)) {
324 list_add_tail(&upper->lower,
325 &cache->pending[upper->level]);
326 upper->lowest = 1;
327 }
328 }
329 drop_backref_node(cache, node);
330}
331
332/*
333 * find reloc tree by address of tree root
334 */
335static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
336 u64 bytenr)
337{
338 struct rb_node *rb_node;
339 struct mapping_node *node;
340 struct btrfs_root *root = NULL;
341
342 spin_lock(&rc->reloc_root_tree.lock);
343 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
344 if (rb_node) {
345 node = rb_entry(rb_node, struct mapping_node, rb_node);
346 root = (struct btrfs_root *)node->data;
347 }
348 spin_unlock(&rc->reloc_root_tree.lock);
349 return root;
350}
351
352static int is_cowonly_root(u64 root_objectid)
353{
354 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
355 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
356 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
357 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
358 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
359 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
360 return 1;
361 return 0;
362}
363
364static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
365 u64 root_objectid)
366{
367 struct btrfs_key key;
368
369 key.objectid = root_objectid;
370 key.type = BTRFS_ROOT_ITEM_KEY;
371 if (is_cowonly_root(root_objectid))
372 key.offset = 0;
373 else
374 key.offset = (u64)-1;
375
376 return btrfs_read_fs_root_no_name(fs_info, &key);
377}
378
379#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
380static noinline_for_stack
381struct btrfs_root *find_tree_root(struct reloc_control *rc,
382 struct extent_buffer *leaf,
383 struct btrfs_extent_ref_v0 *ref0)
384{
385 struct btrfs_root *root;
386 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
387 u64 generation = btrfs_ref_generation_v0(leaf, ref0);
388
389 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
390
391 root = read_fs_root(rc->extent_root->fs_info, root_objectid);
392 BUG_ON(IS_ERR(root));
393
394 if (root->ref_cows &&
395 generation != btrfs_root_generation(&root->root_item))
396 return NULL;
397
398 return root;
399}
400#endif
401
402static noinline_for_stack
403int find_inline_backref(struct extent_buffer *leaf, int slot,
404 unsigned long *ptr, unsigned long *end)
405{
406 struct btrfs_extent_item *ei;
407 struct btrfs_tree_block_info *bi;
408 u32 item_size;
409
410 item_size = btrfs_item_size_nr(leaf, slot);
411#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
412 if (item_size < sizeof(*ei)) {
413 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
414 return 1;
415 }
416#endif
417 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
418 WARN_ON(!(btrfs_extent_flags(leaf, ei) &
419 BTRFS_EXTENT_FLAG_TREE_BLOCK));
420
421 if (item_size <= sizeof(*ei) + sizeof(*bi)) {
422 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
423 return 1;
424 }
425
426 bi = (struct btrfs_tree_block_info *)(ei + 1);
427 *ptr = (unsigned long)(bi + 1);
428 *end = (unsigned long)ei + item_size;
429 return 0;
430}
431
432/*
433 * build backref tree for a given tree block. root of the backref tree
434 * corresponds the tree block, leaves of the backref tree correspond
435 * roots of b-trees that reference the tree block.
436 *
437 * the basic idea of this function is check backrefs of a given block
438 * to find upper level blocks that refernece the block, and then check
439 * bakcrefs of these upper level blocks recursively. the recursion stop
440 * when tree root is reached or backrefs for the block is cached.
441 *
442 * NOTE: if we find backrefs for a block are cached, we know backrefs
443 * for all upper level blocks that directly/indirectly reference the
444 * block are also cached.
445 */
446static struct backref_node *build_backref_tree(struct reloc_control *rc,
447 struct backref_cache *cache,
448 struct btrfs_key *node_key,
449 int level, u64 bytenr)
450{
451 struct btrfs_path *path1;
452 struct btrfs_path *path2;
453 struct extent_buffer *eb;
454 struct btrfs_root *root;
455 struct backref_node *cur;
456 struct backref_node *upper;
457 struct backref_node *lower;
458 struct backref_node *node = NULL;
459 struct backref_node *exist = NULL;
460 struct backref_edge *edge;
461 struct rb_node *rb_node;
462 struct btrfs_key key;
463 unsigned long end;
464 unsigned long ptr;
465 LIST_HEAD(list);
466 int ret;
467 int err = 0;
468
469 path1 = btrfs_alloc_path();
470 path2 = btrfs_alloc_path();
471 if (!path1 || !path2) {
472 err = -ENOMEM;
473 goto out;
474 }
475
476 node = kmalloc(sizeof(*node), GFP_NOFS);
477 if (!node) {
478 err = -ENOMEM;
479 goto out;
480 }
481
482 backref_node_init(node);
483 node->bytenr = bytenr;
484 node->owner = 0;
485 node->level = level;
486 node->lowest = 1;
487 cur = node;
488again:
489 end = 0;
490 ptr = 0;
491 key.objectid = cur->bytenr;
492 key.type = BTRFS_EXTENT_ITEM_KEY;
493 key.offset = (u64)-1;
494
495 path1->search_commit_root = 1;
496 path1->skip_locking = 1;
497 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
498 0, 0);
499 if (ret < 0) {
500 err = ret;
501 goto out;
502 }
503 BUG_ON(!ret || !path1->slots[0]);
504
505 path1->slots[0]--;
506
507 WARN_ON(cur->checked);
508 if (!list_empty(&cur->upper)) {
509 /*
510 * the backref was added previously when processsing
511 * backref of type BTRFS_TREE_BLOCK_REF_KEY
512 */
513 BUG_ON(!list_is_singular(&cur->upper));
514 edge = list_entry(cur->upper.next, struct backref_edge,
515 list[LOWER]);
516 BUG_ON(!list_empty(&edge->list[UPPER]));
517 exist = edge->node[UPPER];
518 /*
519 * add the upper level block to pending list if we need
520 * check its backrefs
521 */
522 if (!exist->checked)
523 list_add_tail(&edge->list[UPPER], &list);
524 } else {
525 exist = NULL;
526 }
527
528 while (1) {
529 cond_resched();
530 eb = path1->nodes[0];
531
532 if (ptr >= end) {
533 if (path1->slots[0] >= btrfs_header_nritems(eb)) {
534 ret = btrfs_next_leaf(rc->extent_root, path1);
535 if (ret < 0) {
536 err = ret;
537 goto out;
538 }
539 if (ret > 0)
540 break;
541 eb = path1->nodes[0];
542 }
543
544 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
545 if (key.objectid != cur->bytenr) {
546 WARN_ON(exist);
547 break;
548 }
549
550 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
551 ret = find_inline_backref(eb, path1->slots[0],
552 &ptr, &end);
553 if (ret)
554 goto next;
555 }
556 }
557
558 if (ptr < end) {
559 /* update key for inline back ref */
560 struct btrfs_extent_inline_ref *iref;
561 iref = (struct btrfs_extent_inline_ref *)ptr;
562 key.type = btrfs_extent_inline_ref_type(eb, iref);
563 key.offset = btrfs_extent_inline_ref_offset(eb, iref);
564 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
565 key.type != BTRFS_SHARED_BLOCK_REF_KEY);
566 }
567
568 if (exist &&
569 ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
570 exist->owner == key.offset) ||
571 (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
572 exist->bytenr == key.offset))) {
573 exist = NULL;
574 goto next;
575 }
576
577#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
578 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
579 key.type == BTRFS_EXTENT_REF_V0_KEY) {
580 if (key.objectid == key.offset &&
581 key.type == BTRFS_EXTENT_REF_V0_KEY) {
582 struct btrfs_extent_ref_v0 *ref0;
583 ref0 = btrfs_item_ptr(eb, path1->slots[0],
584 struct btrfs_extent_ref_v0);
585 root = find_tree_root(rc, eb, ref0);
586 if (root)
587 cur->root = root;
588 else
589 cur->old_root = 1;
590 break;
591 }
592#else
593 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
594 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
595#endif
596 if (key.objectid == key.offset) {
597 /*
598 * only root blocks of reloc trees use
599 * backref of this type.
600 */
601 root = find_reloc_root(rc, cur->bytenr);
602 BUG_ON(!root);
603 cur->root = root;
604 break;
605 }
606
607 edge = kzalloc(sizeof(*edge), GFP_NOFS);
608 if (!edge) {
609 err = -ENOMEM;
610 goto out;
611 }
612 rb_node = tree_search(&cache->rb_root, key.offset);
613 if (!rb_node) {
614 upper = kmalloc(sizeof(*upper), GFP_NOFS);
615 if (!upper) {
616 kfree(edge);
617 err = -ENOMEM;
618 goto out;
619 }
620 backref_node_init(upper);
621 upper->bytenr = key.offset;
622 upper->owner = 0;
623 upper->level = cur->level + 1;
624 /*
625 * backrefs for the upper level block isn't
626 * cached, add the block to pending list
627 */
628 list_add_tail(&edge->list[UPPER], &list);
629 } else {
630 upper = rb_entry(rb_node, struct backref_node,
631 rb_node);
632 INIT_LIST_HEAD(&edge->list[UPPER]);
633 }
634 list_add(&edge->list[LOWER], &cur->upper);
635 edge->node[UPPER] = upper;
636 edge->node[LOWER] = cur;
637
638 goto next;
639 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
640 goto next;
641 }
642
643 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
644 root = read_fs_root(rc->extent_root->fs_info, key.offset);
645 if (IS_ERR(root)) {
646 err = PTR_ERR(root);
647 goto out;
648 }
649
650 if (btrfs_root_level(&root->root_item) == cur->level) {
651 /* tree root */
652 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
653 cur->bytenr);
654 cur->root = root;
655 break;
656 }
657
658 level = cur->level + 1;
659
660 /*
661 * searching the tree to find upper level blocks
662 * reference the block.
663 */
664 path2->search_commit_root = 1;
665 path2->skip_locking = 1;
666 path2->lowest_level = level;
667 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
668 path2->lowest_level = 0;
669 if (ret < 0) {
670 err = ret;
671 goto out;
672 }
673
674 eb = path2->nodes[level];
675 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
676 cur->bytenr);
677
678 lower = cur;
679 for (; level < BTRFS_MAX_LEVEL; level++) {
680 if (!path2->nodes[level]) {
681 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
682 lower->bytenr);
683 lower->root = root;
684 break;
685 }
686
687 edge = kzalloc(sizeof(*edge), GFP_NOFS);
688 if (!edge) {
689 err = -ENOMEM;
690 goto out;
691 }
692
693 eb = path2->nodes[level];
694 rb_node = tree_search(&cache->rb_root, eb->start);
695 if (!rb_node) {
696 upper = kmalloc(sizeof(*upper), GFP_NOFS);
697 if (!upper) {
698 kfree(edge);
699 err = -ENOMEM;
700 goto out;
701 }
702 backref_node_init(upper);
703 upper->bytenr = eb->start;
704 upper->owner = btrfs_header_owner(eb);
705 upper->level = lower->level + 1;
706
707 /*
708 * if we know the block isn't shared
709 * we can void checking its backrefs.
710 */
711 if (btrfs_block_can_be_shared(root, eb))
712 upper->checked = 0;
713 else
714 upper->checked = 1;
715
716 /*
717 * add the block to pending list if we
718 * need check its backrefs. only block
719 * at 'cur->level + 1' is added to the
720 * tail of pending list. this guarantees
721 * we check backrefs from lower level
722 * blocks to upper level blocks.
723 */
724 if (!upper->checked &&
725 level == cur->level + 1) {
726 list_add_tail(&edge->list[UPPER],
727 &list);
728 } else
729 INIT_LIST_HEAD(&edge->list[UPPER]);
730 } else {
731 upper = rb_entry(rb_node, struct backref_node,
732 rb_node);
733 BUG_ON(!upper->checked);
734 INIT_LIST_HEAD(&edge->list[UPPER]);
735 }
736 list_add_tail(&edge->list[LOWER], &lower->upper);
737 edge->node[UPPER] = upper;
738 edge->node[LOWER] = lower;
739
740 if (rb_node)
741 break;
742 lower = upper;
743 upper = NULL;
744 }
745 btrfs_release_path(root, path2);
746next:
747 if (ptr < end) {
748 ptr += btrfs_extent_inline_ref_size(key.type);
749 if (ptr >= end) {
750 WARN_ON(ptr > end);
751 ptr = 0;
752 end = 0;
753 }
754 }
755 if (ptr >= end)
756 path1->slots[0]++;
757 }
758 btrfs_release_path(rc->extent_root, path1);
759
760 cur->checked = 1;
761 WARN_ON(exist);
762
763 /* the pending list isn't empty, take the first block to process */
764 if (!list_empty(&list)) {
765 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
766 list_del_init(&edge->list[UPPER]);
767 cur = edge->node[UPPER];
768 goto again;
769 }
770
771 /*
772 * everything goes well, connect backref nodes and insert backref nodes
773 * into the cache.
774 */
775 BUG_ON(!node->checked);
776 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
777 BUG_ON(rb_node);
778
779 list_for_each_entry(edge, &node->upper, list[LOWER])
780 list_add_tail(&edge->list[UPPER], &list);
781
782 while (!list_empty(&list)) {
783 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
784 list_del_init(&edge->list[UPPER]);
785 upper = edge->node[UPPER];
786
787 if (!RB_EMPTY_NODE(&upper->rb_node)) {
788 if (upper->lowest) {
789 list_del_init(&upper->lower);
790 upper->lowest = 0;
791 }
792
793 list_add_tail(&edge->list[UPPER], &upper->lower);
794 continue;
795 }
796
797 BUG_ON(!upper->checked);
798 rb_node = tree_insert(&cache->rb_root, upper->bytenr,
799 &upper->rb_node);
800 BUG_ON(rb_node);
801
802 list_add_tail(&edge->list[UPPER], &upper->lower);
803
804 list_for_each_entry(edge, &upper->upper, list[LOWER])
805 list_add_tail(&edge->list[UPPER], &list);
806 }
807out:
808 btrfs_free_path(path1);
809 btrfs_free_path(path2);
810 if (err) {
811 INIT_LIST_HEAD(&list);
812 upper = node;
813 while (upper) {
814 if (RB_EMPTY_NODE(&upper->rb_node)) {
815 list_splice_tail(&upper->upper, &list);
816 kfree(upper);
817 }
818
819 if (list_empty(&list))
820 break;
821
822 edge = list_entry(list.next, struct backref_edge,
823 list[LOWER]);
824 upper = edge->node[UPPER];
825 kfree(edge);
826 }
827 return ERR_PTR(err);
828 }
829 return node;
830}
831
832/*
833 * helper to add 'address of tree root -> reloc tree' mapping
834 */
835static int __add_reloc_root(struct btrfs_root *root)
836{
837 struct rb_node *rb_node;
838 struct mapping_node *node;
839 struct reloc_control *rc = root->fs_info->reloc_ctl;
840
841 node = kmalloc(sizeof(*node), GFP_NOFS);
842 BUG_ON(!node);
843
844 node->bytenr = root->node->start;
845 node->data = root;
846
847 spin_lock(&rc->reloc_root_tree.lock);
848 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
849 node->bytenr, &node->rb_node);
850 spin_unlock(&rc->reloc_root_tree.lock);
851 BUG_ON(rb_node);
852
853 list_add_tail(&root->root_list, &rc->reloc_roots);
854 return 0;
855}
856
857/*
858 * helper to update/delete the 'address of tree root -> reloc tree'
859 * mapping
860 */
861static int __update_reloc_root(struct btrfs_root *root, int del)
862{
863 struct rb_node *rb_node;
864 struct mapping_node *node = NULL;
865 struct reloc_control *rc = root->fs_info->reloc_ctl;
866
867 spin_lock(&rc->reloc_root_tree.lock);
868 rb_node = tree_search(&rc->reloc_root_tree.rb_root,
869 root->commit_root->start);
870 if (rb_node) {
871 node = rb_entry(rb_node, struct mapping_node, rb_node);
872 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
873 }
874 spin_unlock(&rc->reloc_root_tree.lock);
875
876 BUG_ON((struct btrfs_root *)node->data != root);
877
878 if (!del) {
879 spin_lock(&rc->reloc_root_tree.lock);
880 node->bytenr = root->node->start;
881 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
882 node->bytenr, &node->rb_node);
883 spin_unlock(&rc->reloc_root_tree.lock);
884 BUG_ON(rb_node);
885 } else {
886 list_del_init(&root->root_list);
887 kfree(node);
888 }
889 return 0;
890}
891
892/*
893 * create reloc tree for a given fs tree. reloc tree is just a
894 * snapshot of the fs tree with special root objectid.
895 */
896int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
897 struct btrfs_root *root)
898{
899 struct btrfs_root *reloc_root;
900 struct extent_buffer *eb;
901 struct btrfs_root_item *root_item;
902 struct btrfs_key root_key;
903 int ret;
904
905 if (root->reloc_root) {
906 reloc_root = root->reloc_root;
907 reloc_root->last_trans = trans->transid;
908 return 0;
909 }
910
911 if (!root->fs_info->reloc_ctl ||
912 !root->fs_info->reloc_ctl->create_reloc_root ||
913 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
914 return 0;
915
916 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
917 BUG_ON(!root_item);
918
919 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
920 root_key.type = BTRFS_ROOT_ITEM_KEY;
921 root_key.offset = root->root_key.objectid;
922
923 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
924 BTRFS_TREE_RELOC_OBJECTID);
925 BUG_ON(ret);
926
927 btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1);
928 memcpy(root_item, &root->root_item, sizeof(*root_item));
929 btrfs_set_root_refs(root_item, 1);
930 btrfs_set_root_bytenr(root_item, eb->start);
931 btrfs_set_root_level(root_item, btrfs_header_level(eb));
932 btrfs_set_root_generation(root_item, trans->transid);
933 memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key));
934 root_item->drop_level = 0;
935
936 btrfs_tree_unlock(eb);
937 free_extent_buffer(eb);
938
939 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
940 &root_key, root_item);
941 BUG_ON(ret);
942 kfree(root_item);
943
944 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
945 &root_key);
946 BUG_ON(IS_ERR(reloc_root));
947 reloc_root->last_trans = trans->transid;
948
949 __add_reloc_root(reloc_root);
950 root->reloc_root = reloc_root;
951 return 0;
952}
953
954/*
955 * update root item of reloc tree
956 */
957int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
958 struct btrfs_root *root)
959{
960 struct btrfs_root *reloc_root;
961 struct btrfs_root_item *root_item;
962 int del = 0;
963 int ret;
964
965 if (!root->reloc_root)
966 return 0;
967
968 reloc_root = root->reloc_root;
969 root_item = &reloc_root->root_item;
970
971 if (btrfs_root_refs(root_item) == 0) {
972 root->reloc_root = NULL;
973 del = 1;
974 }
975
976 __update_reloc_root(reloc_root, del);
977
978 if (reloc_root->commit_root != reloc_root->node) {
979 btrfs_set_root_node(root_item, reloc_root->node);
980 free_extent_buffer(reloc_root->commit_root);
981 reloc_root->commit_root = btrfs_root_node(reloc_root);
982 }
983
984 ret = btrfs_update_root(trans, root->fs_info->tree_root,
985 &reloc_root->root_key, root_item);
986 BUG_ON(ret);
987 return 0;
988}
989
990/*
991 * helper to find first cached inode with inode number >= objectid
992 * in a subvolume
993 */
994static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
995{
996 struct rb_node *node;
997 struct rb_node *prev;
998 struct btrfs_inode *entry;
999 struct inode *inode;
1000
1001 spin_lock(&root->inode_lock);
1002again:
1003 node = root->inode_tree.rb_node;
1004 prev = NULL;
1005 while (node) {
1006 prev = node;
1007 entry = rb_entry(node, struct btrfs_inode, rb_node);
1008
1009 if (objectid < entry->vfs_inode.i_ino)
1010 node = node->rb_left;
1011 else if (objectid > entry->vfs_inode.i_ino)
1012 node = node->rb_right;
1013 else
1014 break;
1015 }
1016 if (!node) {
1017 while (prev) {
1018 entry = rb_entry(prev, struct btrfs_inode, rb_node);
1019 if (objectid <= entry->vfs_inode.i_ino) {
1020 node = prev;
1021 break;
1022 }
1023 prev = rb_next(prev);
1024 }
1025 }
1026 while (node) {
1027 entry = rb_entry(node, struct btrfs_inode, rb_node);
1028 inode = igrab(&entry->vfs_inode);
1029 if (inode) {
1030 spin_unlock(&root->inode_lock);
1031 return inode;
1032 }
1033
1034 objectid = entry->vfs_inode.i_ino + 1;
1035 if (cond_resched_lock(&root->inode_lock))
1036 goto again;
1037
1038 node = rb_next(node);
1039 }
1040 spin_unlock(&root->inode_lock);
1041 return NULL;
1042}
1043
1044static int in_block_group(u64 bytenr,
1045 struct btrfs_block_group_cache *block_group)
1046{
1047 if (bytenr >= block_group->key.objectid &&
1048 bytenr < block_group->key.objectid + block_group->key.offset)
1049 return 1;
1050 return 0;
1051}
1052
1053/*
1054 * get new location of data
1055 */
1056static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1057 u64 bytenr, u64 num_bytes)
1058{
1059 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1060 struct btrfs_path *path;
1061 struct btrfs_file_extent_item *fi;
1062 struct extent_buffer *leaf;
1063 int ret;
1064
1065 path = btrfs_alloc_path();
1066 if (!path)
1067 return -ENOMEM;
1068
1069 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1070 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
1071 bytenr, 0);
1072 if (ret < 0)
1073 goto out;
1074 if (ret > 0) {
1075 ret = -ENOENT;
1076 goto out;
1077 }
1078
1079 leaf = path->nodes[0];
1080 fi = btrfs_item_ptr(leaf, path->slots[0],
1081 struct btrfs_file_extent_item);
1082
1083 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1084 btrfs_file_extent_compression(leaf, fi) ||
1085 btrfs_file_extent_encryption(leaf, fi) ||
1086 btrfs_file_extent_other_encoding(leaf, fi));
1087
1088 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1089 ret = 1;
1090 goto out;
1091 }
1092
1093 if (new_bytenr)
1094 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1095 ret = 0;
1096out:
1097 btrfs_free_path(path);
1098 return ret;
1099}
1100
1101/*
1102 * update file extent items in the tree leaf to point to
1103 * the new locations.
1104 */
1105static int replace_file_extents(struct btrfs_trans_handle *trans,
1106 struct reloc_control *rc,
1107 struct btrfs_root *root,
1108 struct extent_buffer *leaf,
1109 struct list_head *inode_list)
1110{
1111 struct btrfs_key key;
1112 struct btrfs_file_extent_item *fi;
1113 struct inode *inode = NULL;
1114 struct inodevec *ivec = NULL;
1115 u64 parent;
1116 u64 bytenr;
1117 u64 new_bytenr;
1118 u64 num_bytes;
1119 u64 end;
1120 u32 nritems;
1121 u32 i;
1122 int ret;
1123 int first = 1;
1124 int dirty = 0;
1125
1126 if (rc->stage != UPDATE_DATA_PTRS)
1127 return 0;
1128
1129 /* reloc trees always use full backref */
1130 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1131 parent = leaf->start;
1132 else
1133 parent = 0;
1134
1135 nritems = btrfs_header_nritems(leaf);
1136 for (i = 0; i < nritems; i++) {
1137 cond_resched();
1138 btrfs_item_key_to_cpu(leaf, &key, i);
1139 if (key.type != BTRFS_EXTENT_DATA_KEY)
1140 continue;
1141 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1142 if (btrfs_file_extent_type(leaf, fi) ==
1143 BTRFS_FILE_EXTENT_INLINE)
1144 continue;
1145 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1146 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1147 if (bytenr == 0)
1148 continue;
1149 if (!in_block_group(bytenr, rc->block_group))
1150 continue;
1151
1152 /*
1153 * if we are modifying block in fs tree, wait for readpage
1154 * to complete and drop the extent cache
1155 */
1156 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1157 if (!ivec || ivec->nr == INODEVEC_SIZE) {
1158 ivec = kmalloc(sizeof(*ivec), GFP_NOFS);
1159 BUG_ON(!ivec);
1160 ivec->nr = 0;
1161 list_add_tail(&ivec->list, inode_list);
1162 }
1163 if (first) {
1164 inode = find_next_inode(root, key.objectid);
1165 if (inode)
1166 ivec->inode[ivec->nr++] = inode;
1167 first = 0;
1168 } else if (inode && inode->i_ino < key.objectid) {
1169 inode = find_next_inode(root, key.objectid);
1170 if (inode)
1171 ivec->inode[ivec->nr++] = inode;
1172 }
1173 if (inode && inode->i_ino == key.objectid) {
1174 end = key.offset +
1175 btrfs_file_extent_num_bytes(leaf, fi);
1176 WARN_ON(!IS_ALIGNED(key.offset,
1177 root->sectorsize));
1178 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1179 end--;
1180 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1181 key.offset, end,
1182 GFP_NOFS);
1183 if (!ret)
1184 continue;
1185
1186 btrfs_drop_extent_cache(inode, key.offset, end,
1187 1);
1188 unlock_extent(&BTRFS_I(inode)->io_tree,
1189 key.offset, end, GFP_NOFS);
1190 }
1191 }
1192
1193 ret = get_new_location(rc->data_inode, &new_bytenr,
1194 bytenr, num_bytes);
1195 if (ret > 0)
1196 continue;
1197 BUG_ON(ret < 0);
1198
1199 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1200 dirty = 1;
1201
1202 key.offset -= btrfs_file_extent_offset(leaf, fi);
1203 ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1204 num_bytes, parent,
1205 btrfs_header_owner(leaf),
1206 key.objectid, key.offset);
1207 BUG_ON(ret);
1208
1209 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1210 parent, btrfs_header_owner(leaf),
1211 key.objectid, key.offset);
1212 BUG_ON(ret);
1213 }
1214 if (dirty)
1215 btrfs_mark_buffer_dirty(leaf);
1216 return 0;
1217}
1218
1219static noinline_for_stack
1220int memcmp_node_keys(struct extent_buffer *eb, int slot,
1221 struct btrfs_path *path, int level)
1222{
1223 struct btrfs_disk_key key1;
1224 struct btrfs_disk_key key2;
1225 btrfs_node_key(eb, &key1, slot);
1226 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1227 return memcmp(&key1, &key2, sizeof(key1));
1228}
1229
1230/*
1231 * try to replace tree blocks in fs tree with the new blocks
1232 * in reloc tree. tree blocks haven't been modified since the
1233 * reloc tree was create can be replaced.
1234 *
1235 * if a block was replaced, level of the block + 1 is returned.
1236 * if no block got replaced, 0 is returned. if there are other
1237 * errors, a negative error number is returned.
1238 */
1239static int replace_path(struct btrfs_trans_handle *trans,
1240 struct btrfs_root *dest, struct btrfs_root *src,
1241 struct btrfs_path *path, struct btrfs_key *next_key,
1242 struct extent_buffer **leaf,
1243 int lowest_level, int max_level)
1244{
1245 struct extent_buffer *eb;
1246 struct extent_buffer *parent;
1247 struct btrfs_key key;
1248 u64 old_bytenr;
1249 u64 new_bytenr;
1250 u64 old_ptr_gen;
1251 u64 new_ptr_gen;
1252 u64 last_snapshot;
1253 u32 blocksize;
1254 int level;
1255 int ret;
1256 int slot;
1257
1258 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1259 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1260 BUG_ON(lowest_level > 1 && leaf);
1261
1262 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1263
1264 slot = path->slots[lowest_level];
1265 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1266
1267 eb = btrfs_lock_root_node(dest);
1268 btrfs_set_lock_blocking(eb);
1269 level = btrfs_header_level(eb);
1270
1271 if (level < lowest_level) {
1272 btrfs_tree_unlock(eb);
1273 free_extent_buffer(eb);
1274 return 0;
1275 }
1276
1277 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1278 BUG_ON(ret);
1279 btrfs_set_lock_blocking(eb);
1280
1281 if (next_key) {
1282 next_key->objectid = (u64)-1;
1283 next_key->type = (u8)-1;
1284 next_key->offset = (u64)-1;
1285 }
1286
1287 parent = eb;
1288 while (1) {
1289 level = btrfs_header_level(parent);
1290 BUG_ON(level < lowest_level);
1291
1292 ret = btrfs_bin_search(parent, &key, level, &slot);
1293 if (ret && slot > 0)
1294 slot--;
1295
1296 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1297 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1298
1299 old_bytenr = btrfs_node_blockptr(parent, slot);
1300 blocksize = btrfs_level_size(dest, level - 1);
1301 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1302
1303 if (level <= max_level) {
1304 eb = path->nodes[level];
1305 new_bytenr = btrfs_node_blockptr(eb,
1306 path->slots[level]);
1307 new_ptr_gen = btrfs_node_ptr_generation(eb,
1308 path->slots[level]);
1309 } else {
1310 new_bytenr = 0;
1311 new_ptr_gen = 0;
1312 }
1313
1314 if (new_bytenr > 0 && new_bytenr == old_bytenr) {
1315 WARN_ON(1);
1316 ret = level;
1317 break;
1318 }
1319
1320 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1321 memcmp_node_keys(parent, slot, path, level)) {
1322 if (level <= lowest_level && !leaf) {
1323 ret = 0;
1324 break;
1325 }
1326
1327 eb = read_tree_block(dest, old_bytenr, blocksize,
1328 old_ptr_gen);
1329 btrfs_tree_lock(eb);
1330 ret = btrfs_cow_block(trans, dest, eb, parent,
1331 slot, &eb);
1332 BUG_ON(ret);
1333 btrfs_set_lock_blocking(eb);
1334
1335 if (level <= lowest_level) {
1336 *leaf = eb;
1337 ret = 0;
1338 break;
1339 }
1340
1341 btrfs_tree_unlock(parent);
1342 free_extent_buffer(parent);
1343
1344 parent = eb;
1345 continue;
1346 }
1347
1348 btrfs_node_key_to_cpu(path->nodes[level], &key,
1349 path->slots[level]);
1350 btrfs_release_path(src, path);
1351
1352 path->lowest_level = level;
1353 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1354 path->lowest_level = 0;
1355 BUG_ON(ret);
1356
1357 /*
1358 * swap blocks in fs tree and reloc tree.
1359 */
1360 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1361 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1362 btrfs_mark_buffer_dirty(parent);
1363
1364 btrfs_set_node_blockptr(path->nodes[level],
1365 path->slots[level], old_bytenr);
1366 btrfs_set_node_ptr_generation(path->nodes[level],
1367 path->slots[level], old_ptr_gen);
1368 btrfs_mark_buffer_dirty(path->nodes[level]);
1369
1370 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
1371 path->nodes[level]->start,
1372 src->root_key.objectid, level - 1, 0);
1373 BUG_ON(ret);
1374 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
1375 0, dest->root_key.objectid, level - 1,
1376 0);
1377 BUG_ON(ret);
1378
1379 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1380 path->nodes[level]->start,
1381 src->root_key.objectid, level - 1, 0);
1382 BUG_ON(ret);
1383
1384 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1385 0, dest->root_key.objectid, level - 1,
1386 0);
1387 BUG_ON(ret);
1388
1389 btrfs_unlock_up_safe(path, 0);
1390
1391 ret = level;
1392 break;
1393 }
1394 btrfs_tree_unlock(parent);
1395 free_extent_buffer(parent);
1396 return ret;
1397}
1398
1399/*
1400 * helper to find next relocated block in reloc tree
1401 */
1402static noinline_for_stack
1403int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1404 int *level)
1405{
1406 struct extent_buffer *eb;
1407 int i;
1408 u64 last_snapshot;
1409 u32 nritems;
1410
1411 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1412
1413 for (i = 0; i < *level; i++) {
1414 free_extent_buffer(path->nodes[i]);
1415 path->nodes[i] = NULL;
1416 }
1417
1418 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1419 eb = path->nodes[i];
1420 nritems = btrfs_header_nritems(eb);
1421 while (path->slots[i] + 1 < nritems) {
1422 path->slots[i]++;
1423 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1424 last_snapshot)
1425 continue;
1426
1427 *level = i;
1428 return 0;
1429 }
1430 free_extent_buffer(path->nodes[i]);
1431 path->nodes[i] = NULL;
1432 }
1433 return 1;
1434}
1435
1436/*
1437 * walk down reloc tree to find relocated block of lowest level
1438 */
1439static noinline_for_stack
1440int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1441 int *level)
1442{
1443 struct extent_buffer *eb = NULL;
1444 int i;
1445 u64 bytenr;
1446 u64 ptr_gen = 0;
1447 u64 last_snapshot;
1448 u32 blocksize;
1449 u32 nritems;
1450
1451 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1452
1453 for (i = *level; i > 0; i--) {
1454 eb = path->nodes[i];
1455 nritems = btrfs_header_nritems(eb);
1456 while (path->slots[i] < nritems) {
1457 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1458 if (ptr_gen > last_snapshot)
1459 break;
1460 path->slots[i]++;
1461 }
1462 if (path->slots[i] >= nritems) {
1463 if (i == *level)
1464 break;
1465 *level = i + 1;
1466 return 0;
1467 }
1468 if (i == 1) {
1469 *level = i;
1470 return 0;
1471 }
1472
1473 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1474 blocksize = btrfs_level_size(root, i - 1);
1475 eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1476 BUG_ON(btrfs_header_level(eb) != i - 1);
1477 path->nodes[i - 1] = eb;
1478 path->slots[i - 1] = 0;
1479 }
1480 return 1;
1481}
1482
1483/*
1484 * invalidate extent cache for file extents whose key in range of
1485 * [min_key, max_key)
1486 */
1487static int invalidate_extent_cache(struct btrfs_root *root,
1488 struct btrfs_key *min_key,
1489 struct btrfs_key *max_key)
1490{
1491 struct inode *inode = NULL;
1492 u64 objectid;
1493 u64 start, end;
1494
1495 objectid = min_key->objectid;
1496 while (1) {
1497 cond_resched();
1498 iput(inode);
1499
1500 if (objectid > max_key->objectid)
1501 break;
1502
1503 inode = find_next_inode(root, objectid);
1504 if (!inode)
1505 break;
1506
1507 if (inode->i_ino > max_key->objectid) {
1508 iput(inode);
1509 break;
1510 }
1511
1512 objectid = inode->i_ino + 1;
1513 if (!S_ISREG(inode->i_mode))
1514 continue;
1515
1516 if (unlikely(min_key->objectid == inode->i_ino)) {
1517 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1518 continue;
1519 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1520 start = 0;
1521 else {
1522 start = min_key->offset;
1523 WARN_ON(!IS_ALIGNED(start, root->sectorsize));
1524 }
1525 } else {
1526 start = 0;
1527 }
1528
1529 if (unlikely(max_key->objectid == inode->i_ino)) {
1530 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1531 continue;
1532 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1533 end = (u64)-1;
1534 } else {
1535 if (max_key->offset == 0)
1536 continue;
1537 end = max_key->offset;
1538 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1539 end--;
1540 }
1541 } else {
1542 end = (u64)-1;
1543 }
1544
1545 /* the lock_extent waits for readpage to complete */
1546 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1547 btrfs_drop_extent_cache(inode, start, end, 1);
1548 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1549 }
1550 return 0;
1551}
1552
1553static int find_next_key(struct btrfs_path *path, int level,
1554 struct btrfs_key *key)
1555
1556{
1557 while (level < BTRFS_MAX_LEVEL) {
1558 if (!path->nodes[level])
1559 break;
1560 if (path->slots[level] + 1 <
1561 btrfs_header_nritems(path->nodes[level])) {
1562 btrfs_node_key_to_cpu(path->nodes[level], key,
1563 path->slots[level] + 1);
1564 return 0;
1565 }
1566 level++;
1567 }
1568 return 1;
1569}
1570
1571/*
1572 * merge the relocated tree blocks in reloc tree with corresponding
1573 * fs tree.
1574 */
1575static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1576 struct btrfs_root *root)
1577{
1578 LIST_HEAD(inode_list);
1579 struct btrfs_key key;
1580 struct btrfs_key next_key;
1581 struct btrfs_trans_handle *trans;
1582 struct btrfs_root *reloc_root;
1583 struct btrfs_root_item *root_item;
1584 struct btrfs_path *path;
1585 struct extent_buffer *leaf = NULL;
1586 unsigned long nr;
1587 int level;
1588 int max_level;
1589 int replaced = 0;
1590 int ret;
1591 int err = 0;
1592
1593 path = btrfs_alloc_path();
1594 if (!path)
1595 return -ENOMEM;
1596
1597 reloc_root = root->reloc_root;
1598 root_item = &reloc_root->root_item;
1599
1600 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1601 level = btrfs_root_level(root_item);
1602 extent_buffer_get(reloc_root->node);
1603 path->nodes[level] = reloc_root->node;
1604 path->slots[level] = 0;
1605 } else {
1606 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1607
1608 level = root_item->drop_level;
1609 BUG_ON(level == 0);
1610 path->lowest_level = level;
1611 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1612 if (ret < 0) {
1613 btrfs_free_path(path);
1614 return ret;
1615 }
1616
1617 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
1618 path->slots[level]);
1619 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
1620
1621 btrfs_unlock_up_safe(path, 0);
1622 }
1623
1624 if (level == 0 && rc->stage == UPDATE_DATA_PTRS) {
1625 trans = btrfs_start_transaction(root, 1);
1626
1627 leaf = path->nodes[0];
1628 btrfs_item_key_to_cpu(leaf, &key, 0);
1629 btrfs_release_path(reloc_root, path);
1630
1631 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1632 if (ret < 0) {
1633 err = ret;
1634 goto out;
1635 }
1636
1637 leaf = path->nodes[0];
1638 btrfs_unlock_up_safe(path, 1);
1639 ret = replace_file_extents(trans, rc, root, leaf,
1640 &inode_list);
1641 if (ret < 0)
1642 err = ret;
1643 goto out;
1644 }
1645
1646 memset(&next_key, 0, sizeof(next_key));
1647
1648 while (1) {
1649 leaf = NULL;
1650 replaced = 0;
1651 trans = btrfs_start_transaction(root, 1);
1652 max_level = level;
1653
1654 ret = walk_down_reloc_tree(reloc_root, path, &level);
1655 if (ret < 0) {
1656 err = ret;
1657 goto out;
1658 }
1659 if (ret > 0)
1660 break;
1661
1662 if (!find_next_key(path, level, &key) &&
1663 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
1664 ret = 0;
1665 } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) {
1666 ret = replace_path(trans, root, reloc_root,
1667 path, &next_key, &leaf,
1668 level, max_level);
1669 } else {
1670 ret = replace_path(trans, root, reloc_root,
1671 path, &next_key, NULL,
1672 level, max_level);
1673 }
1674 if (ret < 0) {
1675 err = ret;
1676 goto out;
1677 }
1678
1679 if (ret > 0) {
1680 level = ret;
1681 btrfs_node_key_to_cpu(path->nodes[level], &key,
1682 path->slots[level]);
1683 replaced = 1;
1684 } else if (leaf) {
1685 /*
1686 * no block got replaced, try replacing file extents
1687 */
1688 btrfs_item_key_to_cpu(leaf, &key, 0);
1689 ret = replace_file_extents(trans, rc, root, leaf,
1690 &inode_list);
1691 btrfs_tree_unlock(leaf);
1692 free_extent_buffer(leaf);
1693 BUG_ON(ret < 0);
1694 }
1695
1696 ret = walk_up_reloc_tree(reloc_root, path, &level);
1697 if (ret > 0)
1698 break;
1699
1700 BUG_ON(level == 0);
1701 /*
1702 * save the merging progress in the drop_progress.
1703 * this is OK since root refs == 1 in this case.
1704 */
1705 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
1706 path->slots[level]);
1707 root_item->drop_level = level;
1708
1709 nr = trans->blocks_used;
1710 btrfs_end_transaction(trans, root);
1711
1712 btrfs_btree_balance_dirty(root, nr);
1713
1714 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1715 invalidate_extent_cache(root, &key, &next_key);
1716 }
1717
1718 /*
1719 * handle the case only one block in the fs tree need to be
1720 * relocated and the block is tree root.
1721 */
1722 leaf = btrfs_lock_root_node(root);
1723 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
1724 btrfs_tree_unlock(leaf);
1725 free_extent_buffer(leaf);
1726 if (ret < 0)
1727 err = ret;
1728out:
1729 btrfs_free_path(path);
1730
1731 if (err == 0) {
1732 memset(&root_item->drop_progress, 0,
1733 sizeof(root_item->drop_progress));
1734 root_item->drop_level = 0;
1735 btrfs_set_root_refs(root_item, 0);
1736 }
1737
1738 nr = trans->blocks_used;
1739 btrfs_end_transaction(trans, root);
1740
1741 btrfs_btree_balance_dirty(root, nr);
1742
1743 /*
1744 * put inodes while we aren't holding the tree locks
1745 */
1746 while (!list_empty(&inode_list)) {
1747 struct inodevec *ivec;
1748 ivec = list_entry(inode_list.next, struct inodevec, list);
1749 list_del(&ivec->list);
1750 while (ivec->nr > 0) {
1751 ivec->nr--;
1752 iput(ivec->inode[ivec->nr]);
1753 }
1754 kfree(ivec);
1755 }
1756
1757 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1758 invalidate_extent_cache(root, &key, &next_key);
1759
1760 return err;
1761}
1762
1763/*
1764 * callback for the work threads.
1765 * this function merges reloc tree with corresponding fs tree,
1766 * and then drops the reloc tree.
1767 */
1768static void merge_func(struct btrfs_work *work)
1769{
1770 struct btrfs_trans_handle *trans;
1771 struct btrfs_root *root;
1772 struct btrfs_root *reloc_root;
1773 struct async_merge *async;
1774
1775 async = container_of(work, struct async_merge, work);
1776 reloc_root = async->root;
1777
1778 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
1779 root = read_fs_root(reloc_root->fs_info,
1780 reloc_root->root_key.offset);
1781 BUG_ON(IS_ERR(root));
1782 BUG_ON(root->reloc_root != reloc_root);
1783
1784 merge_reloc_root(async->rc, root);
1785
1786 trans = btrfs_start_transaction(root, 1);
1787 btrfs_update_reloc_root(trans, root);
1788 btrfs_end_transaction(trans, root);
1789 }
1790
1791 btrfs_drop_dead_root(reloc_root);
1792
1793 if (atomic_dec_and_test(async->num_pending))
1794 complete(async->done);
1795
1796 kfree(async);
1797}
1798
1799static int merge_reloc_roots(struct reloc_control *rc)
1800{
1801 struct async_merge *async;
1802 struct btrfs_root *root;
1803 struct completion done;
1804 atomic_t num_pending;
1805
1806 init_completion(&done);
1807 atomic_set(&num_pending, 1);
1808
1809 while (!list_empty(&rc->reloc_roots)) {
1810 root = list_entry(rc->reloc_roots.next,
1811 struct btrfs_root, root_list);
1812 list_del_init(&root->root_list);
1813
1814 async = kmalloc(sizeof(*async), GFP_NOFS);
1815 BUG_ON(!async);
1816 async->work.func = merge_func;
1817 async->work.flags = 0;
1818 async->rc = rc;
1819 async->root = root;
1820 async->done = &done;
1821 async->num_pending = &num_pending;
1822 atomic_inc(&num_pending);
1823 btrfs_queue_worker(&rc->workers, &async->work);
1824 }
1825
1826 if (!atomic_dec_and_test(&num_pending))
1827 wait_for_completion(&done);
1828
1829 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
1830 return 0;
1831}
1832
1833static void free_block_list(struct rb_root *blocks)
1834{
1835 struct tree_block *block;
1836 struct rb_node *rb_node;
1837 while ((rb_node = rb_first(blocks))) {
1838 block = rb_entry(rb_node, struct tree_block, rb_node);
1839 rb_erase(rb_node, blocks);
1840 kfree(block);
1841 }
1842}
1843
1844static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
1845 struct btrfs_root *reloc_root)
1846{
1847 struct btrfs_root *root;
1848
1849 if (reloc_root->last_trans == trans->transid)
1850 return 0;
1851
1852 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
1853 BUG_ON(IS_ERR(root));
1854 BUG_ON(root->reloc_root != reloc_root);
1855
1856 return btrfs_record_root_in_trans(trans, root);
1857}
1858
1859/*
1860 * select one tree from trees that references the block.
1861 * for blocks in refernce counted trees, we preper reloc tree.
1862 * if no reloc tree found and reloc_only is true, NULL is returned.
1863 */
1864static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans,
1865 struct backref_node *node,
1866 struct backref_edge *edges[],
1867 int *nr, int reloc_only)
1868{
1869 struct backref_node *next;
1870 struct btrfs_root *root;
1871 int index;
1872 int loop = 0;
1873again:
1874 index = 0;
1875 next = node;
1876 while (1) {
1877 cond_resched();
1878 next = walk_up_backref(next, edges, &index);
1879 root = next->root;
1880 if (!root) {
1881 BUG_ON(!node->old_root);
1882 goto skip;
1883 }
1884
1885 /* no other choice for non-refernce counted tree */
1886 if (!root->ref_cows) {
1887 BUG_ON(reloc_only);
1888 break;
1889 }
1890
1891 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1892 record_reloc_root_in_trans(trans, root);
1893 break;
1894 }
1895
1896 if (loop) {
1897 btrfs_record_root_in_trans(trans, root);
1898 break;
1899 }
1900
1901 if (reloc_only || next != node) {
1902 if (!root->reloc_root)
1903 btrfs_record_root_in_trans(trans, root);
1904 root = root->reloc_root;
1905 /*
1906 * if the reloc tree was created in current
1907 * transation, there is no node in backref tree
1908 * corresponds to the root of the reloc tree.
1909 */
1910 if (btrfs_root_last_snapshot(&root->root_item) ==
1911 trans->transid - 1)
1912 break;
1913 }
1914skip:
1915 root = NULL;
1916 next = walk_down_backref(edges, &index);
1917 if (!next || next->level <= node->level)
1918 break;
1919 }
1920
1921 if (!root && !loop && !reloc_only) {
1922 loop = 1;
1923 goto again;
1924 }
1925
1926 if (root)
1927 *nr = index;
1928 else
1929 *nr = 0;
1930
1931 return root;
1932}
1933
1934static noinline_for_stack
1935struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
1936 struct backref_node *node)
1937{
1938 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
1939 int nr;
1940 return __select_one_root(trans, node, edges, &nr, 0);
1941}
1942
1943static noinline_for_stack
1944struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
1945 struct backref_node *node,
1946 struct backref_edge *edges[], int *nr)
1947{
1948 return __select_one_root(trans, node, edges, nr, 1);
1949}
1950
1951static void grab_path_buffers(struct btrfs_path *path,
1952 struct backref_node *node,
1953 struct backref_edge *edges[], int nr)
1954{
1955 int i = 0;
1956 while (1) {
1957 drop_node_buffer(node);
1958 node->eb = path->nodes[node->level];
1959 BUG_ON(!node->eb);
1960 if (path->locks[node->level])
1961 node->locked = 1;
1962 path->nodes[node->level] = NULL;
1963 path->locks[node->level] = 0;
1964
1965 if (i >= nr)
1966 break;
1967
1968 edges[i]->blockptr = node->eb->start;
1969 node = edges[i]->node[UPPER];
1970 i++;
1971 }
1972}
1973
1974/*
1975 * relocate a block tree, and then update pointers in upper level
1976 * blocks that reference the block to point to the new location.
1977 *
1978 * if called by link_to_upper, the block has already been relocated.
1979 * in that case this function just updates pointers.
1980 */
1981static int do_relocation(struct btrfs_trans_handle *trans,
1982 struct backref_node *node,
1983 struct btrfs_key *key,
1984 struct btrfs_path *path, int lowest)
1985{
1986 struct backref_node *upper;
1987 struct backref_edge *edge;
1988 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
1989 struct btrfs_root *root;
1990 struct extent_buffer *eb;
1991 u32 blocksize;
1992 u64 bytenr;
1993 u64 generation;
1994 int nr;
1995 int slot;
1996 int ret;
1997 int err = 0;
1998
1999 BUG_ON(lowest && node->eb);
2000
2001 path->lowest_level = node->level + 1;
2002 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2003 cond_resched();
2004 if (node->eb && node->eb->start == edge->blockptr)
2005 continue;
2006
2007 upper = edge->node[UPPER];
2008 root = select_reloc_root(trans, upper, edges, &nr);
2009 if (!root)
2010 continue;
2011
2012 if (upper->eb && !upper->locked)
2013 drop_node_buffer(upper);
2014
2015 if (!upper->eb) {
2016 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2017 if (ret < 0) {
2018 err = ret;
2019 break;
2020 }
2021 BUG_ON(ret > 0);
2022
2023 slot = path->slots[upper->level];
2024
2025 btrfs_unlock_up_safe(path, upper->level + 1);
2026 grab_path_buffers(path, upper, edges, nr);
2027
2028 btrfs_release_path(NULL, path);
2029 } else {
2030 ret = btrfs_bin_search(upper->eb, key, upper->level,
2031 &slot);
2032 BUG_ON(ret);
2033 }
2034
2035 bytenr = btrfs_node_blockptr(upper->eb, slot);
2036 if (!lowest) {
2037 if (node->eb->start == bytenr) {
2038 btrfs_tree_unlock(upper->eb);
2039 upper->locked = 0;
2040 continue;
2041 }
2042 } else {
2043 BUG_ON(node->bytenr != bytenr);
2044 }
2045
2046 blocksize = btrfs_level_size(root, node->level);
2047 generation = btrfs_node_ptr_generation(upper->eb, slot);
2048 eb = read_tree_block(root, bytenr, blocksize, generation);
2049 btrfs_tree_lock(eb);
2050 btrfs_set_lock_blocking(eb);
2051
2052 if (!node->eb) {
2053 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2054 slot, &eb);
2055 if (ret < 0) {
2056 err = ret;
2057 break;
2058 }
2059 btrfs_set_lock_blocking(eb);
2060 node->eb = eb;
2061 node->locked = 1;
2062 } else {
2063 btrfs_set_node_blockptr(upper->eb, slot,
2064 node->eb->start);
2065 btrfs_set_node_ptr_generation(upper->eb, slot,
2066 trans->transid);
2067 btrfs_mark_buffer_dirty(upper->eb);
2068
2069 ret = btrfs_inc_extent_ref(trans, root,
2070 node->eb->start, blocksize,
2071 upper->eb->start,
2072 btrfs_header_owner(upper->eb),
2073 node->level, 0);
2074 BUG_ON(ret);
2075
2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2077 BUG_ON(ret);
2078
2079 btrfs_tree_unlock(eb);
2080 free_extent_buffer(eb);
2081 }
2082 if (!lowest) {
2083 btrfs_tree_unlock(upper->eb);
2084 upper->locked = 0;
2085 }
2086 }
2087 path->lowest_level = 0;
2088 return err;
2089}
2090
2091static int link_to_upper(struct btrfs_trans_handle *trans,
2092 struct backref_node *node,
2093 struct btrfs_path *path)
2094{
2095 struct btrfs_key key;
2096 if (!node->eb || list_empty(&node->upper))
2097 return 0;
2098
2099 btrfs_node_key_to_cpu(node->eb, &key, 0);
2100 return do_relocation(trans, node, &key, path, 0);
2101}
2102
2103static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2104 struct backref_cache *cache,
2105 struct btrfs_path *path)
2106{
2107 struct backref_node *node;
2108 int level;
2109 int ret;
2110 int err = 0;
2111
2112 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2113 while (!list_empty(&cache->pending[level])) {
2114 node = list_entry(cache->pending[level].next,
2115 struct backref_node, lower);
2116 BUG_ON(node->level != level);
2117
2118 ret = link_to_upper(trans, node, path);
2119 if (ret < 0)
2120 err = ret;
2121 /*
2122 * this remove the node from the pending list and
2123 * may add some other nodes to the level + 1
2124 * pending list
2125 */
2126 remove_backref_node(cache, node);
2127 }
2128 }
2129 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
2130 return err;
2131}
2132
2133static void mark_block_processed(struct reloc_control *rc,
2134 struct backref_node *node)
2135{
2136 u32 blocksize;
2137 if (node->level == 0 ||
2138 in_block_group(node->bytenr, rc->block_group)) {
2139 blocksize = btrfs_level_size(rc->extent_root, node->level);
2140 set_extent_bits(&rc->processed_blocks, node->bytenr,
2141 node->bytenr + blocksize - 1, EXTENT_DIRTY,
2142 GFP_NOFS);
2143 }
2144 node->processed = 1;
2145}
2146
2147/*
2148 * mark a block and all blocks directly/indirectly reference the block
2149 * as processed.
2150 */
2151static void update_processed_blocks(struct reloc_control *rc,
2152 struct backref_node *node)
2153{
2154 struct backref_node *next = node;
2155 struct backref_edge *edge;
2156 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2157 int index = 0;
2158
2159 while (next) {
2160 cond_resched();
2161 while (1) {
2162 if (next->processed)
2163 break;
2164
2165 mark_block_processed(rc, next);
2166
2167 if (list_empty(&next->upper))
2168 break;
2169
2170 edge = list_entry(next->upper.next,
2171 struct backref_edge, list[LOWER]);
2172 edges[index++] = edge;
2173 next = edge->node[UPPER];
2174 }
2175 next = walk_down_backref(edges, &index);
2176 }
2177}
2178
2179static int tree_block_processed(u64 bytenr, u32 blocksize,
2180 struct reloc_control *rc)
2181{
2182 if (test_range_bit(&rc->processed_blocks, bytenr,
2183 bytenr + blocksize - 1, EXTENT_DIRTY, 1))
2184 return 1;
2185 return 0;
2186}
2187
2188/*
2189 * check if there are any file extent pointers in the leaf point to
2190 * data require processing
2191 */
2192static int check_file_extents(struct reloc_control *rc,
2193 u64 bytenr, u32 blocksize, u64 ptr_gen)
2194{
2195 struct btrfs_key found_key;
2196 struct btrfs_file_extent_item *fi;
2197 struct extent_buffer *leaf;
2198 u32 nritems;
2199 int i;
2200 int ret = 0;
2201
2202 leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen);
2203
2204 nritems = btrfs_header_nritems(leaf);
2205 for (i = 0; i < nritems; i++) {
2206 cond_resched();
2207 btrfs_item_key_to_cpu(leaf, &found_key, i);
2208 if (found_key.type != BTRFS_EXTENT_DATA_KEY)
2209 continue;
2210 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2211 if (btrfs_file_extent_type(leaf, fi) ==
2212 BTRFS_FILE_EXTENT_INLINE)
2213 continue;
2214 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2215 if (bytenr == 0)
2216 continue;
2217 if (in_block_group(bytenr, rc->block_group)) {
2218 ret = 1;
2219 break;
2220 }
2221 }
2222 free_extent_buffer(leaf);
2223 return ret;
2224}
2225
2226/*
2227 * scan child blocks of a given block to find blocks require processing
2228 */
2229static int add_child_blocks(struct btrfs_trans_handle *trans,
2230 struct reloc_control *rc,
2231 struct backref_node *node,
2232 struct rb_root *blocks)
2233{
2234 struct tree_block *block;
2235 struct rb_node *rb_node;
2236 u64 bytenr;
2237 u64 ptr_gen;
2238 u32 blocksize;
2239 u32 nritems;
2240 int i;
2241 int err = 0;
2242
2243 nritems = btrfs_header_nritems(node->eb);
2244 blocksize = btrfs_level_size(rc->extent_root, node->level - 1);
2245 for (i = 0; i < nritems; i++) {
2246 cond_resched();
2247 bytenr = btrfs_node_blockptr(node->eb, i);
2248 ptr_gen = btrfs_node_ptr_generation(node->eb, i);
2249 if (ptr_gen == trans->transid)
2250 continue;
2251 if (!in_block_group(bytenr, rc->block_group) &&
2252 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
2253 continue;
2254 if (tree_block_processed(bytenr, blocksize, rc))
2255 continue;
2256
2257 readahead_tree_block(rc->extent_root,
2258 bytenr, blocksize, ptr_gen);
2259 }
2260
2261 for (i = 0; i < nritems; i++) {
2262 cond_resched();
2263 bytenr = btrfs_node_blockptr(node->eb, i);
2264 ptr_gen = btrfs_node_ptr_generation(node->eb, i);
2265 if (ptr_gen == trans->transid)
2266 continue;
2267 if (!in_block_group(bytenr, rc->block_group) &&
2268 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
2269 continue;
2270 if (tree_block_processed(bytenr, blocksize, rc))
2271 continue;
2272 if (!in_block_group(bytenr, rc->block_group) &&
2273 !check_file_extents(rc, bytenr, blocksize, ptr_gen))
2274 continue;
2275
2276 block = kmalloc(sizeof(*block), GFP_NOFS);
2277 if (!block) {
2278 err = -ENOMEM;
2279 break;
2280 }
2281 block->bytenr = bytenr;
2282 btrfs_node_key_to_cpu(node->eb, &block->key, i);
2283 block->level = node->level - 1;
2284 block->key_ready = 1;
2285 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
2286 BUG_ON(rb_node);
2287 }
2288 if (err)
2289 free_block_list(blocks);
2290 return err;
2291}
2292
2293/*
2294 * find adjacent blocks require processing
2295 */
2296static noinline_for_stack
2297int add_adjacent_blocks(struct btrfs_trans_handle *trans,
2298 struct reloc_control *rc,
2299 struct backref_cache *cache,
2300 struct rb_root *blocks, int level,
2301 struct backref_node **upper)
2302{
2303 struct backref_node *node;
2304 int ret = 0;
2305
2306 WARN_ON(!list_empty(&cache->pending[level]));
2307
2308 if (list_empty(&cache->pending[level + 1]))
2309 return 1;
2310
2311 node = list_entry(cache->pending[level + 1].next,
2312 struct backref_node, lower);
2313 if (node->eb)
2314 ret = add_child_blocks(trans, rc, node, blocks);
2315
2316 *upper = node;
2317 return ret;
2318}
2319
2320static int get_tree_block_key(struct reloc_control *rc,
2321 struct tree_block *block)
2322{
2323 struct extent_buffer *eb;
2324
2325 BUG_ON(block->key_ready);
2326 eb = read_tree_block(rc->extent_root, block->bytenr,
2327 block->key.objectid, block->key.offset);
2328 WARN_ON(btrfs_header_level(eb) != block->level);
2329 if (block->level == 0)
2330 btrfs_item_key_to_cpu(eb, &block->key, 0);
2331 else
2332 btrfs_node_key_to_cpu(eb, &block->key, 0);
2333 free_extent_buffer(eb);
2334 block->key_ready = 1;
2335 return 0;
2336}
2337
2338static int reada_tree_block(struct reloc_control *rc,
2339 struct tree_block *block)
2340{
2341 BUG_ON(block->key_ready);
2342 readahead_tree_block(rc->extent_root, block->bytenr,
2343 block->key.objectid, block->key.offset);
2344 return 0;
2345}
2346
2347/*
2348 * helper function to relocate a tree block
2349 */
2350static int relocate_tree_block(struct btrfs_trans_handle *trans,
2351 struct reloc_control *rc,
2352 struct backref_node *node,
2353 struct btrfs_key *key,
2354 struct btrfs_path *path)
2355{
2356 struct btrfs_root *root;
2357 int ret;
2358
2359 root = select_one_root(trans, node);
2360 if (unlikely(!root)) {
2361 rc->found_old_snapshot = 1;
2362 update_processed_blocks(rc, node);
2363 return 0;
2364 }
2365
2366 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2367 ret = do_relocation(trans, node, key, path, 1);
2368 if (ret < 0)
2369 goto out;
2370 if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) {
2371 ret = replace_file_extents(trans, rc, root,
2372 node->eb, NULL);
2373 if (ret < 0)
2374 goto out;
2375 }
2376 drop_node_buffer(node);
2377 } else if (!root->ref_cows) {
2378 path->lowest_level = node->level;
2379 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2380 btrfs_release_path(root, path);
2381 if (ret < 0)
2382 goto out;
2383 } else if (root != node->root) {
2384 WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS);
2385 }
2386
2387 update_processed_blocks(rc, node);
2388 ret = 0;
2389out:
2390 drop_node_buffer(node);
2391 return ret;
2392}
2393
2394/*
2395 * relocate a list of blocks
2396 */
2397static noinline_for_stack
2398int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2399 struct reloc_control *rc, struct rb_root *blocks)
2400{
2401 struct backref_cache *cache;
2402 struct backref_node *node;
2403 struct btrfs_path *path;
2404 struct tree_block *block;
2405 struct rb_node *rb_node;
2406 int level = -1;
2407 int ret;
2408 int err = 0;
2409
2410 path = btrfs_alloc_path();
2411 if (!path)
2412 return -ENOMEM;
2413
2414 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2415 if (!cache) {
2416 btrfs_free_path(path);
2417 return -ENOMEM;
2418 }
2419
2420 backref_cache_init(cache);
2421
2422 rb_node = rb_first(blocks);
2423 while (rb_node) {
2424 block = rb_entry(rb_node, struct tree_block, rb_node);
2425 if (level == -1)
2426 level = block->level;
2427 else
2428 BUG_ON(level != block->level);
2429 if (!block->key_ready)
2430 reada_tree_block(rc, block);
2431 rb_node = rb_next(rb_node);
2432 }
2433
2434 rb_node = rb_first(blocks);
2435 while (rb_node) {
2436 block = rb_entry(rb_node, struct tree_block, rb_node);
2437 if (!block->key_ready)
2438 get_tree_block_key(rc, block);
2439 rb_node = rb_next(rb_node);
2440 }
2441
2442 rb_node = rb_first(blocks);
2443 while (rb_node) {
2444 block = rb_entry(rb_node, struct tree_block, rb_node);
2445
2446 node = build_backref_tree(rc, cache, &block->key,
2447 block->level, block->bytenr);
2448 if (IS_ERR(node)) {
2449 err = PTR_ERR(node);
2450 goto out;
2451 }
2452
2453 ret = relocate_tree_block(trans, rc, node, &block->key,
2454 path);
2455 if (ret < 0) {
2456 err = ret;
2457 goto out;
2458 }
2459 remove_backref_node(cache, node);
2460 rb_node = rb_next(rb_node);
2461 }
2462
2463 if (level > 0)
2464 goto out;
2465
2466 free_block_list(blocks);
2467
2468 /*
2469 * now backrefs of some upper level tree blocks have been cached,
2470 * try relocating blocks referenced by these upper level blocks.
2471 */
2472 while (1) {
2473 struct backref_node *upper = NULL;
2474 if (trans->transaction->in_commit ||
2475 trans->transaction->delayed_refs.flushing)
2476 break;
2477
2478 ret = add_adjacent_blocks(trans, rc, cache, blocks, level,
2479 &upper);
2480 if (ret < 0)
2481 err = ret;
2482 if (ret != 0)
2483 break;
2484
2485 rb_node = rb_first(blocks);
2486 while (rb_node) {
2487 block = rb_entry(rb_node, struct tree_block, rb_node);
2488 if (trans->transaction->in_commit ||
2489 trans->transaction->delayed_refs.flushing)
2490 goto out;
2491 BUG_ON(!block->key_ready);
2492 node = build_backref_tree(rc, cache, &block->key,
2493 level, block->bytenr);
2494 if (IS_ERR(node)) {
2495 err = PTR_ERR(node);
2496 goto out;
2497 }
2498
2499 ret = relocate_tree_block(trans, rc, node,
2500 &block->key, path);
2501 if (ret < 0) {
2502 err = ret;
2503 goto out;
2504 }
2505 remove_backref_node(cache, node);
2506 rb_node = rb_next(rb_node);
2507 }
2508 free_block_list(blocks);
2509
2510 if (upper) {
2511 ret = link_to_upper(trans, upper, path);
2512 if (ret < 0) {
2513 err = ret;
2514 break;
2515 }
2516 remove_backref_node(cache, upper);
2517 }
2518 }
2519out:
2520 free_block_list(blocks);
2521
2522 ret = finish_pending_nodes(trans, cache, path);
2523 if (ret < 0)
2524 err = ret;
2525
2526 kfree(cache);
2527 btrfs_free_path(path);
2528 return err;
2529}
2530
2531static noinline_for_stack
2532int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
2533{
2534 u64 page_start;
2535 u64 page_end;
2536 unsigned long i;
2537 unsigned long first_index;
2538 unsigned long last_index;
2539 unsigned int total_read = 0;
2540 unsigned int total_dirty = 0;
2541 struct page *page;
2542 struct file_ra_state *ra;
2543 struct btrfs_ordered_extent *ordered;
2544 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2545 int ret = 0;
2546
2547 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2548 if (!ra)
2549 return -ENOMEM;
2550
2551 mutex_lock(&inode->i_mutex);
2552 first_index = start >> PAGE_CACHE_SHIFT;
2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2554
2555 /* make sure the dirty trick played by the caller work */
2556 ret = invalidate_inode_pages2_range(inode->i_mapping,
2557 first_index, last_index);
2558 if (ret)
2559 goto out_unlock;
2560
2561 file_ra_state_init(ra, inode->i_mapping);
2562
2563 for (i = first_index ; i <= last_index; i++) {
2564 if (total_read % ra->ra_pages == 0) {
2565 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2566 min(last_index, ra->ra_pages + i - 1));
2567 }
2568 total_read++;
2569again:
2570 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
2571 BUG_ON(1);
2572 page = grab_cache_page(inode->i_mapping, i);
2573 if (!page) {
2574 ret = -ENOMEM;
2575 goto out_unlock;
2576 }
2577 if (!PageUptodate(page)) {
2578 btrfs_readpage(NULL, page);
2579 lock_page(page);
2580 if (!PageUptodate(page)) {
2581 unlock_page(page);
2582 page_cache_release(page);
2583 ret = -EIO;
2584 goto out_unlock;
2585 }
2586 }
2587 wait_on_page_writeback(page);
2588
2589 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2590 page_end = page_start + PAGE_CACHE_SIZE - 1;
2591 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2592
2593 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2594 if (ordered) {
2595 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2596 unlock_page(page);
2597 page_cache_release(page);
2598 btrfs_start_ordered_extent(inode, ordered, 1);
2599 btrfs_put_ordered_extent(ordered);
2600 goto again;
2601 }
2602 set_page_extent_mapped(page);
2603
2604 if (i == first_index)
2605 set_extent_bits(io_tree, page_start, page_end,
2606 EXTENT_BOUNDARY, GFP_NOFS);
2607 btrfs_set_extent_delalloc(inode, page_start, page_end);
2608
2609 set_page_dirty(page);
2610 total_dirty++;
2611
2612 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2613 unlock_page(page);
2614 page_cache_release(page);
2615 }
2616out_unlock:
2617 mutex_unlock(&inode->i_mutex);
2618 kfree(ra);
2619 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
2620 return ret;
2621}
2622
2623static noinline_for_stack
2624int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
2625{
2626 struct btrfs_root *root = BTRFS_I(inode)->root;
2627 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2628 struct extent_map *em;
2629 u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
2630 u64 end = start + extent_key->offset - 1;
2631
2632 em = alloc_extent_map(GFP_NOFS);
2633 em->start = start;
2634 em->len = extent_key->offset;
2635 em->block_len = extent_key->offset;
2636 em->block_start = extent_key->objectid;
2637 em->bdev = root->fs_info->fs_devices->latest_bdev;
2638 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2639
2640 /* setup extent map to cheat btrfs_readpage */
2641 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2642 while (1) {
2643 int ret;
2644 spin_lock(&em_tree->lock);
2645 ret = add_extent_mapping(em_tree, em);
2646 spin_unlock(&em_tree->lock);
2647 if (ret != -EEXIST) {
2648 free_extent_map(em);
2649 break;
2650 }
2651 btrfs_drop_extent_cache(inode, start, end, 0);
2652 }
2653 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2654
2655 return relocate_inode_pages(inode, start, extent_key->offset);
2656}
2657
2658#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2659static int get_ref_objectid_v0(struct reloc_control *rc,
2660 struct btrfs_path *path,
2661 struct btrfs_key *extent_key,
2662 u64 *ref_objectid, int *path_change)
2663{
2664 struct btrfs_key key;
2665 struct extent_buffer *leaf;
2666 struct btrfs_extent_ref_v0 *ref0;
2667 int ret;
2668 int slot;
2669
2670 leaf = path->nodes[0];
2671 slot = path->slots[0];
2672 while (1) {
2673 if (slot >= btrfs_header_nritems(leaf)) {
2674 ret = btrfs_next_leaf(rc->extent_root, path);
2675 if (ret < 0)
2676 return ret;
2677 BUG_ON(ret > 0);
2678 leaf = path->nodes[0];
2679 slot = path->slots[0];
2680 if (path_change)
2681 *path_change = 1;
2682 }
2683 btrfs_item_key_to_cpu(leaf, &key, slot);
2684 if (key.objectid != extent_key->objectid)
2685 return -ENOENT;
2686
2687 if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
2688 slot++;
2689 continue;
2690 }
2691 ref0 = btrfs_item_ptr(leaf, slot,
2692 struct btrfs_extent_ref_v0);
2693 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
2694 break;
2695 }
2696 return 0;
2697}
2698#endif
2699
2700/*
2701 * helper to add a tree block to the list.
2702 * the major work is getting the generation and level of the block
2703 */
2704static int add_tree_block(struct reloc_control *rc,
2705 struct btrfs_key *extent_key,
2706 struct btrfs_path *path,
2707 struct rb_root *blocks)
2708{
2709 struct extent_buffer *eb;
2710 struct btrfs_extent_item *ei;
2711 struct btrfs_tree_block_info *bi;
2712 struct tree_block *block;
2713 struct rb_node *rb_node;
2714 u32 item_size;
2715 int level = -1;
2716 int generation;
2717
2718 eb = path->nodes[0];
2719 item_size = btrfs_item_size_nr(eb, path->slots[0]);
2720
2721 if (item_size >= sizeof(*ei) + sizeof(*bi)) {
2722 ei = btrfs_item_ptr(eb, path->slots[0],
2723 struct btrfs_extent_item);
2724 bi = (struct btrfs_tree_block_info *)(ei + 1);
2725 generation = btrfs_extent_generation(eb, ei);
2726 level = btrfs_tree_block_level(eb, bi);
2727 } else {
2728#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2729 u64 ref_owner;
2730 int ret;
2731
2732 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2733 ret = get_ref_objectid_v0(rc, path, extent_key,
2734 &ref_owner, NULL);
2735 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
2736 level = (int)ref_owner;
2737 /* FIXME: get real generation */
2738 generation = 0;
2739#else
2740 BUG();
2741#endif
2742 }
2743
2744 btrfs_release_path(rc->extent_root, path);
2745
2746 BUG_ON(level == -1);
2747
2748 block = kmalloc(sizeof(*block), GFP_NOFS);
2749 if (!block)
2750 return -ENOMEM;
2751
2752 block->bytenr = extent_key->objectid;
2753 block->key.objectid = extent_key->offset;
2754 block->key.offset = generation;
2755 block->level = level;
2756 block->key_ready = 0;
2757
2758 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
2759 BUG_ON(rb_node);
2760
2761 return 0;
2762}
2763
2764/*
2765 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
2766 */
2767static int __add_tree_block(struct reloc_control *rc,
2768 u64 bytenr, u32 blocksize,
2769 struct rb_root *blocks)
2770{
2771 struct btrfs_path *path;
2772 struct btrfs_key key;
2773 int ret;
2774
2775 if (tree_block_processed(bytenr, blocksize, rc))
2776 return 0;
2777
2778 if (tree_search(blocks, bytenr))
2779 return 0;
2780
2781 path = btrfs_alloc_path();
2782 if (!path)
2783 return -ENOMEM;
2784
2785 key.objectid = bytenr;
2786 key.type = BTRFS_EXTENT_ITEM_KEY;
2787 key.offset = blocksize;
2788
2789 path->search_commit_root = 1;
2790 path->skip_locking = 1;
2791 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
2792 if (ret < 0)
2793 goto out;
2794 BUG_ON(ret);
2795
2796 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2797 ret = add_tree_block(rc, &key, path, blocks);
2798out:
2799 btrfs_free_path(path);
2800 return ret;
2801}
2802
2803/*
2804 * helper to check if the block use full backrefs for pointers in it
2805 */
2806static int block_use_full_backref(struct reloc_control *rc,
2807 struct extent_buffer *eb)
2808{
2809 struct btrfs_path *path;
2810 struct btrfs_extent_item *ei;
2811 struct btrfs_key key;
2812 u64 flags;
2813 int ret;
2814
2815 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
2816 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
2817 return 1;
2818
2819 path = btrfs_alloc_path();
2820 BUG_ON(!path);
2821
2822 key.objectid = eb->start;
2823 key.type = BTRFS_EXTENT_ITEM_KEY;
2824 key.offset = eb->len;
2825
2826 path->search_commit_root = 1;
2827 path->skip_locking = 1;
2828 ret = btrfs_search_slot(NULL, rc->extent_root,
2829 &key, path, 0, 0);
2830 BUG_ON(ret);
2831
2832 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2833 struct btrfs_extent_item);
2834 flags = btrfs_extent_flags(path->nodes[0], ei);
2835 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2836 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2837 ret = 1;
2838 else
2839 ret = 0;
2840 btrfs_free_path(path);
2841 return ret;
2842}
2843
2844/*
2845 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
2846 * this function scans fs tree to find blocks reference the data extent
2847 */
2848static int find_data_references(struct reloc_control *rc,
2849 struct btrfs_key *extent_key,
2850 struct extent_buffer *leaf,
2851 struct btrfs_extent_data_ref *ref,
2852 struct rb_root *blocks)
2853{
2854 struct btrfs_path *path;
2855 struct tree_block *block;
2856 struct btrfs_root *root;
2857 struct btrfs_file_extent_item *fi;
2858 struct rb_node *rb_node;
2859 struct btrfs_key key;
2860 u64 ref_root;
2861 u64 ref_objectid;
2862 u64 ref_offset;
2863 u32 ref_count;
2864 u32 nritems;
2865 int err = 0;
2866 int added = 0;
2867 int counted;
2868 int ret;
2869
2870 path = btrfs_alloc_path();
2871 if (!path)
2872 return -ENOMEM;
2873
2874 ref_root = btrfs_extent_data_ref_root(leaf, ref);
2875 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
2876 ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
2877 ref_count = btrfs_extent_data_ref_count(leaf, ref);
2878
2879 root = read_fs_root(rc->extent_root->fs_info, ref_root);
2880 if (IS_ERR(root)) {
2881 err = PTR_ERR(root);
2882 goto out;
2883 }
2884
2885 key.objectid = ref_objectid;
2886 key.offset = ref_offset;
2887 key.type = BTRFS_EXTENT_DATA_KEY;
2888
2889 path->search_commit_root = 1;
2890 path->skip_locking = 1;
2891 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2892 if (ret < 0) {
2893 err = ret;
2894 goto out;
2895 }
2896
2897 leaf = path->nodes[0];
2898 nritems = btrfs_header_nritems(leaf);
2899 /*
2900 * the references in tree blocks that use full backrefs
2901 * are not counted in
2902 */
2903 if (block_use_full_backref(rc, leaf))
2904 counted = 0;
2905 else
2906 counted = 1;
2907 rb_node = tree_search(blocks, leaf->start);
2908 if (rb_node) {
2909 if (counted)
2910 added = 1;
2911 else
2912 path->slots[0] = nritems;
2913 }
2914
2915 while (ref_count > 0) {
2916 while (path->slots[0] >= nritems) {
2917 ret = btrfs_next_leaf(root, path);
2918 if (ret < 0) {
2919 err = ret;
2920 goto out;
2921 }
2922 if (ret > 0) {
2923 WARN_ON(1);
2924 goto out;
2925 }
2926
2927 leaf = path->nodes[0];
2928 nritems = btrfs_header_nritems(leaf);
2929 added = 0;
2930
2931 if (block_use_full_backref(rc, leaf))
2932 counted = 0;
2933 else
2934 counted = 1;
2935 rb_node = tree_search(blocks, leaf->start);
2936 if (rb_node) {
2937 if (counted)
2938 added = 1;
2939 else
2940 path->slots[0] = nritems;
2941 }
2942 }
2943
2944 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2945 if (key.objectid != ref_objectid ||
2946 key.type != BTRFS_EXTENT_DATA_KEY) {
2947 WARN_ON(1);
2948 break;
2949 }
2950
2951 fi = btrfs_item_ptr(leaf, path->slots[0],
2952 struct btrfs_file_extent_item);
2953
2954 if (btrfs_file_extent_type(leaf, fi) ==
2955 BTRFS_FILE_EXTENT_INLINE)
2956 goto next;
2957
2958 if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
2959 extent_key->objectid)
2960 goto next;
2961
2962 key.offset -= btrfs_file_extent_offset(leaf, fi);
2963 if (key.offset != ref_offset)
2964 goto next;
2965
2966 if (counted)
2967 ref_count--;
2968 if (added)
2969 goto next;
2970
2971 if (!tree_block_processed(leaf->start, leaf->len, rc)) {
2972 block = kmalloc(sizeof(*block), GFP_NOFS);
2973 if (!block) {
2974 err = -ENOMEM;
2975 break;
2976 }
2977 block->bytenr = leaf->start;
2978 btrfs_item_key_to_cpu(leaf, &block->key, 0);
2979 block->level = 0;
2980 block->key_ready = 1;
2981 rb_node = tree_insert(blocks, block->bytenr,
2982 &block->rb_node);
2983 BUG_ON(rb_node);
2984 }
2985 if (counted)
2986 added = 1;
2987 else
2988 path->slots[0] = nritems;
2989next:
2990 path->slots[0]++;
2991
2992 }
2993out:
2994 btrfs_free_path(path);
2995 return err;
2996}
2997
2998/*
2999 * hepler to find all tree blocks that reference a given data extent
3000 */
3001static noinline_for_stack
3002int add_data_references(struct reloc_control *rc,
3003 struct btrfs_key *extent_key,
3004 struct btrfs_path *path,
3005 struct rb_root *blocks)
3006{
3007 struct btrfs_key key;
3008 struct extent_buffer *eb;
3009 struct btrfs_extent_data_ref *dref;
3010 struct btrfs_extent_inline_ref *iref;
3011 unsigned long ptr;
3012 unsigned long end;
3013 u32 blocksize;
3014 int ret;
3015 int err = 0;
3016
3017 ret = get_new_location(rc->data_inode, NULL, extent_key->objectid,
3018 extent_key->offset);
3019 BUG_ON(ret < 0);
3020 if (ret > 0) {
3021 /* the relocated data is fragmented */
3022 rc->extents_skipped++;
3023 btrfs_release_path(rc->extent_root, path);
3024 return 0;
3025 }
3026
3027 blocksize = btrfs_level_size(rc->extent_root, 0);
3028
3029 eb = path->nodes[0];
3030 ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3031 end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3032#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3033 if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
3034 ptr = end;
3035 else
3036#endif
3037 ptr += sizeof(struct btrfs_extent_item);
3038
3039 while (ptr < end) {
3040 iref = (struct btrfs_extent_inline_ref *)ptr;
3041 key.type = btrfs_extent_inline_ref_type(eb, iref);
3042 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3043 key.offset = btrfs_extent_inline_ref_offset(eb, iref);
3044 ret = __add_tree_block(rc, key.offset, blocksize,
3045 blocks);
3046 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3047 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3048 ret = find_data_references(rc, extent_key,
3049 eb, dref, blocks);
3050 } else {
3051 BUG();
3052 }
3053 ptr += btrfs_extent_inline_ref_size(key.type);
3054 }
3055 WARN_ON(ptr > end);
3056
3057 while (1) {
3058 cond_resched();
3059 eb = path->nodes[0];
3060 if (path->slots[0] >= btrfs_header_nritems(eb)) {
3061 ret = btrfs_next_leaf(rc->extent_root, path);
3062 if (ret < 0) {
3063 err = ret;
3064 break;
3065 }
3066 if (ret > 0)
3067 break;
3068 eb = path->nodes[0];
3069 }
3070
3071 btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
3072 if (key.objectid != extent_key->objectid)
3073 break;
3074
3075#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3076 if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
3077 key.type == BTRFS_EXTENT_REF_V0_KEY) {
3078#else
3079 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
3080 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3081#endif
3082 ret = __add_tree_block(rc, key.offset, blocksize,
3083 blocks);
3084 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3085 dref = btrfs_item_ptr(eb, path->slots[0],
3086 struct btrfs_extent_data_ref);
3087 ret = find_data_references(rc, extent_key,
3088 eb, dref, blocks);
3089 } else {
3090 ret = 0;
3091 }
3092 if (ret) {
3093 err = ret;
3094 break;
3095 }
3096 path->slots[0]++;
3097 }
3098 btrfs_release_path(rc->extent_root, path);
3099 if (err)
3100 free_block_list(blocks);
3101 return err;
3102}
3103
3104/*
3105 * hepler to find next unprocessed extent
3106 */
3107static noinline_for_stack
3108int find_next_extent(struct btrfs_trans_handle *trans,
3109 struct reloc_control *rc, struct btrfs_path *path)
3110{
3111 struct btrfs_key key;
3112 struct extent_buffer *leaf;
3113 u64 start, end, last;
3114 int ret;
3115
3116 last = rc->block_group->key.objectid + rc->block_group->key.offset;
3117 while (1) {
3118 cond_resched();
3119 if (rc->search_start >= last) {
3120 ret = 1;
3121 break;
3122 }
3123
3124 key.objectid = rc->search_start;
3125 key.type = BTRFS_EXTENT_ITEM_KEY;
3126 key.offset = 0;
3127
3128 path->search_commit_root = 1;
3129 path->skip_locking = 1;
3130 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3131 0, 0);
3132 if (ret < 0)
3133 break;
3134next:
3135 leaf = path->nodes[0];
3136 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3137 ret = btrfs_next_leaf(rc->extent_root, path);
3138 if (ret != 0)
3139 break;
3140 leaf = path->nodes[0];
3141 }
3142
3143 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3144 if (key.objectid >= last) {
3145 ret = 1;
3146 break;
3147 }
3148
3149 if (key.type != BTRFS_EXTENT_ITEM_KEY ||
3150 key.objectid + key.offset <= rc->search_start) {
3151 path->slots[0]++;
3152 goto next;
3153 }
3154
3155 ret = find_first_extent_bit(&rc->processed_blocks,
3156 key.objectid, &start, &end,
3157 EXTENT_DIRTY);
3158
3159 if (ret == 0 && start <= key.objectid) {
3160 btrfs_release_path(rc->extent_root, path);
3161 rc->search_start = end + 1;
3162 } else {
3163 rc->search_start = key.objectid + key.offset;
3164 return 0;
3165 }
3166 }
3167 btrfs_release_path(rc->extent_root, path);
3168 return ret;
3169}
3170
3171static void set_reloc_control(struct reloc_control *rc)
3172{
3173 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3174 mutex_lock(&fs_info->trans_mutex);
3175 fs_info->reloc_ctl = rc;
3176 mutex_unlock(&fs_info->trans_mutex);
3177}
3178
3179static void unset_reloc_control(struct reloc_control *rc)
3180{
3181 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3182 mutex_lock(&fs_info->trans_mutex);
3183 fs_info->reloc_ctl = NULL;
3184 mutex_unlock(&fs_info->trans_mutex);
3185}
3186
3187static int check_extent_flags(u64 flags)
3188{
3189 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3190 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3191 return 1;
3192 if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
3193 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3194 return 1;
3195 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3196 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
3197 return 1;
3198 return 0;
3199}
3200
3201static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3202{
3203 struct rb_root blocks = RB_ROOT;
3204 struct btrfs_key key;
3205 struct btrfs_trans_handle *trans = NULL;
3206 struct btrfs_path *path;
3207 struct btrfs_extent_item *ei;
3208 unsigned long nr;
3209 u64 flags;
3210 u32 item_size;
3211 int ret;
3212 int err = 0;
3213
3214 path = btrfs_alloc_path();
3215 if (!path)
3216 return -ENOMEM;
3217
3218 rc->search_start = rc->block_group->key.objectid;
3219 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3220 GFP_NOFS);
3221
3222 rc->create_reloc_root = 1;
3223 set_reloc_control(rc);
3224
3225 trans = btrfs_start_transaction(rc->extent_root, 1);
3226 btrfs_commit_transaction(trans, rc->extent_root);
3227
3228 while (1) {
3229 trans = btrfs_start_transaction(rc->extent_root, 1);
3230
3231 ret = find_next_extent(trans, rc, path);
3232 if (ret < 0)
3233 err = ret;
3234 if (ret != 0)
3235 break;
3236
3237 rc->extents_found++;
3238
3239 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3240 struct btrfs_extent_item);
3241 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3242 item_size = btrfs_item_size_nr(path->nodes[0],
3243 path->slots[0]);
3244 if (item_size >= sizeof(*ei)) {
3245 flags = btrfs_extent_flags(path->nodes[0], ei);
3246 ret = check_extent_flags(flags);
3247 BUG_ON(ret);
3248
3249 } else {
3250#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3251 u64 ref_owner;
3252 int path_change = 0;
3253
3254 BUG_ON(item_size !=
3255 sizeof(struct btrfs_extent_item_v0));
3256 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
3257 &path_change);
3258 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
3259 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
3260 else
3261 flags = BTRFS_EXTENT_FLAG_DATA;
3262
3263 if (path_change) {
3264 btrfs_release_path(rc->extent_root, path);
3265
3266 path->search_commit_root = 1;
3267 path->skip_locking = 1;
3268 ret = btrfs_search_slot(NULL, rc->extent_root,
3269 &key, path, 0, 0);
3270 if (ret < 0) {
3271 err = ret;
3272 break;
3273 }
3274 BUG_ON(ret > 0);
3275 }
3276#else
3277 BUG();
3278#endif
3279 }
3280
3281 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3282 ret = add_tree_block(rc, &key, path, &blocks);
3283 } else if (rc->stage == UPDATE_DATA_PTRS &&
3284 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3285 ret = add_data_references(rc, &key, path, &blocks);
3286 } else {
3287 btrfs_release_path(rc->extent_root, path);
3288 ret = 0;
3289 }
3290 if (ret < 0) {
3291 err = 0;
3292 break;
3293 }
3294
3295 if (!RB_EMPTY_ROOT(&blocks)) {
3296 ret = relocate_tree_blocks(trans, rc, &blocks);
3297 if (ret < 0) {
3298 err = ret;
3299 break;
3300 }
3301 }
3302
3303 nr = trans->blocks_used;
3304 btrfs_end_transaction_throttle(trans, rc->extent_root);
3305 trans = NULL;
3306 btrfs_btree_balance_dirty(rc->extent_root, nr);
3307
3308 if (rc->stage == MOVE_DATA_EXTENTS &&
3309 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3310 rc->found_file_extent = 1;
3311 ret = relocate_data_extent(rc->data_inode, &key);
3312 if (ret < 0) {
3313 err = ret;
3314 break;
3315 }
3316 }
3317 }
3318 btrfs_free_path(path);
3319
3320 if (trans) {
3321 nr = trans->blocks_used;
3322 btrfs_end_transaction(trans, rc->extent_root);
3323 btrfs_btree_balance_dirty(rc->extent_root, nr);
3324 }
3325
3326 rc->create_reloc_root = 0;
3327 smp_mb();
3328
3329 if (rc->extents_found > 0) {
3330 trans = btrfs_start_transaction(rc->extent_root, 1);
3331 btrfs_commit_transaction(trans, rc->extent_root);
3332 }
3333
3334 merge_reloc_roots(rc);
3335
3336 unset_reloc_control(rc);
3337
3338 /* get rid of pinned extents */
3339 trans = btrfs_start_transaction(rc->extent_root, 1);
3340 btrfs_commit_transaction(trans, rc->extent_root);
3341
3342 return err;
3343}
3344
3345static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3346 struct btrfs_root *root,
3347 u64 objectid, u64 size)
3348{
3349 struct btrfs_path *path;
3350 struct btrfs_inode_item *item;
3351 struct extent_buffer *leaf;
3352 int ret;
3353
3354 path = btrfs_alloc_path();
3355 if (!path)
3356 return -ENOMEM;
3357
3358 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3359 if (ret)
3360 goto out;
3361
3362 leaf = path->nodes[0];
3363 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3364 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3365 btrfs_set_inode_generation(leaf, item, 1);
3366 btrfs_set_inode_size(leaf, item, size);
3367 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3368 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
3369 btrfs_mark_buffer_dirty(leaf);
3370 btrfs_release_path(root, path);
3371out:
3372 btrfs_free_path(path);
3373 return ret;
3374}
3375
3376/*
3377 * helper to create inode for data relocation.
3378 * the inode is in data relocation tree and its link count is 0
3379 */
3380static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3381 struct btrfs_block_group_cache *group)
3382{
3383 struct inode *inode = NULL;
3384 struct btrfs_trans_handle *trans;
3385 struct btrfs_root *root;
3386 struct btrfs_key key;
3387 unsigned long nr;
3388 u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
3389 int err = 0;
3390
3391 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
3392 if (IS_ERR(root))
3393 return ERR_CAST(root);
3394
3395 trans = btrfs_start_transaction(root, 1);
3396 BUG_ON(!trans);
3397
3398 err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
3399 if (err)
3400 goto out;
3401
3402 err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
3403 BUG_ON(err);
3404
3405 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
3406 group->key.offset, 0, group->key.offset,
3407 0, 0, 0);
3408 BUG_ON(err);
3409
3410 key.objectid = objectid;
3411 key.type = BTRFS_INODE_ITEM_KEY;
3412 key.offset = 0;
3413 inode = btrfs_iget(root->fs_info->sb, &key, root);
3414 BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3415 BTRFS_I(inode)->index_cnt = group->key.objectid;
3416
3417 err = btrfs_orphan_add(trans, inode);
3418out:
3419 nr = trans->blocks_used;
3420 btrfs_end_transaction(trans, root);
3421
3422 btrfs_btree_balance_dirty(root, nr);
3423 if (err) {
3424 if (inode)
3425 iput(inode);
3426 inode = ERR_PTR(err);
3427 }
3428 return inode;
3429}
3430
3431/*
3432 * function to relocate all extents in a block group.
3433 */
3434int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3435{
3436 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3437 struct reloc_control *rc;
3438 int ret;
3439 int err = 0;
3440
3441 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3442 if (!rc)
3443 return -ENOMEM;
3444
3445 mapping_tree_init(&rc->reloc_root_tree);
3446 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
3447 INIT_LIST_HEAD(&rc->reloc_roots);
3448
3449 rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
3450 BUG_ON(!rc->block_group);
3451
3452 btrfs_init_workers(&rc->workers, "relocate",
3453 fs_info->thread_pool_size);
3454
3455 rc->extent_root = extent_root;
3456 btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
3457
3458 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3459 if (IS_ERR(rc->data_inode)) {
3460 err = PTR_ERR(rc->data_inode);
3461 rc->data_inode = NULL;
3462 goto out;
3463 }
3464
3465 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
3466 (unsigned long long)rc->block_group->key.objectid,
3467 (unsigned long long)rc->block_group->flags);
3468
3469 btrfs_start_delalloc_inodes(fs_info->tree_root);
3470 btrfs_wait_ordered_extents(fs_info->tree_root, 0);
3471
3472 while (1) {
3473 mutex_lock(&fs_info->cleaner_mutex);
3474 btrfs_clean_old_snapshots(fs_info->tree_root);
3475 mutex_unlock(&fs_info->cleaner_mutex);
3476
3477 rc->extents_found = 0;
3478 rc->extents_skipped = 0;
3479
3480 ret = relocate_block_group(rc);
3481 if (ret < 0) {
3482 err = ret;
3483 break;
3484 }
3485
3486 if (rc->extents_found == 0)
3487 break;
3488
3489 printk(KERN_INFO "btrfs: found %llu extents\n",
3490 (unsigned long long)rc->extents_found);
3491
3492 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
3493 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
3494 invalidate_mapping_pages(rc->data_inode->i_mapping,
3495 0, -1);
3496 rc->stage = UPDATE_DATA_PTRS;
3497 } else if (rc->stage == UPDATE_DATA_PTRS &&
3498 rc->extents_skipped >= rc->extents_found) {
3499 iput(rc->data_inode);
3500 rc->data_inode = create_reloc_inode(fs_info,
3501 rc->block_group);
3502 if (IS_ERR(rc->data_inode)) {
3503 err = PTR_ERR(rc->data_inode);
3504 rc->data_inode = NULL;
3505 break;
3506 }
3507 rc->stage = MOVE_DATA_EXTENTS;
3508 rc->found_file_extent = 0;
3509 }
3510 }
3511
3512 filemap_fdatawrite_range(fs_info->btree_inode->i_mapping,
3513 rc->block_group->key.objectid,
3514 rc->block_group->key.objectid +
3515 rc->block_group->key.offset - 1);
3516
3517 WARN_ON(rc->block_group->pinned > 0);
3518 WARN_ON(rc->block_group->reserved > 0);
3519 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
3520out:
3521 iput(rc->data_inode);
3522 btrfs_stop_workers(&rc->workers);
3523 btrfs_put_block_group(rc->block_group);
3524 kfree(rc);
3525 return err;
3526}
3527
3528/*
3529 * recover relocation interrupted by system crash.
3530 *
3531 * this function resumes merging reloc trees with corresponding fs trees.
3532 * this is important for keeping the sharing of tree blocks
3533 */
3534int btrfs_recover_relocation(struct btrfs_root *root)
3535{
3536 LIST_HEAD(reloc_roots);
3537 struct btrfs_key key;
3538 struct btrfs_root *fs_root;
3539 struct btrfs_root *reloc_root;
3540 struct btrfs_path *path;
3541 struct extent_buffer *leaf;
3542 struct reloc_control *rc = NULL;
3543 struct btrfs_trans_handle *trans;
3544 int ret;
3545 int err = 0;
3546
3547 path = btrfs_alloc_path();
3548 if (!path)
3549 return -ENOMEM;
3550
3551 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
3552 key.type = BTRFS_ROOT_ITEM_KEY;
3553 key.offset = (u64)-1;
3554
3555 while (1) {
3556 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
3557 path, 0, 0);
3558 if (ret < 0) {
3559 err = ret;
3560 goto out;
3561 }
3562 if (ret > 0) {
3563 if (path->slots[0] == 0)
3564 break;
3565 path->slots[0]--;
3566 }
3567 leaf = path->nodes[0];
3568 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3569 btrfs_release_path(root->fs_info->tree_root, path);
3570
3571 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
3572 key.type != BTRFS_ROOT_ITEM_KEY)
3573 break;
3574
3575 reloc_root = btrfs_read_fs_root_no_radix(root, &key);
3576 if (IS_ERR(reloc_root)) {
3577 err = PTR_ERR(reloc_root);
3578 goto out;
3579 }
3580
3581 list_add(&reloc_root->root_list, &reloc_roots);
3582
3583 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
3584 fs_root = read_fs_root(root->fs_info,
3585 reloc_root->root_key.offset);
3586 if (IS_ERR(fs_root)) {
3587 err = PTR_ERR(fs_root);
3588 goto out;
3589 }
3590 }
3591
3592 if (key.offset == 0)
3593 break;
3594
3595 key.offset--;
3596 }
3597 btrfs_release_path(root->fs_info->tree_root, path);
3598
3599 if (list_empty(&reloc_roots))
3600 goto out;
3601
3602 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3603 if (!rc) {
3604 err = -ENOMEM;
3605 goto out;
3606 }
3607
3608 mapping_tree_init(&rc->reloc_root_tree);
3609 INIT_LIST_HEAD(&rc->reloc_roots);
3610 btrfs_init_workers(&rc->workers, "relocate",
3611 root->fs_info->thread_pool_size);
3612 rc->extent_root = root->fs_info->extent_root;
3613
3614 set_reloc_control(rc);
3615
3616 while (!list_empty(&reloc_roots)) {
3617 reloc_root = list_entry(reloc_roots.next,
3618 struct btrfs_root, root_list);
3619 list_del(&reloc_root->root_list);
3620
3621 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
3622 list_add_tail(&reloc_root->root_list,
3623 &rc->reloc_roots);
3624 continue;
3625 }
3626
3627 fs_root = read_fs_root(root->fs_info,
3628 reloc_root->root_key.offset);
3629 BUG_ON(IS_ERR(fs_root));
3630
3631 __add_reloc_root(reloc_root);
3632 fs_root->reloc_root = reloc_root;
3633 }
3634
3635 trans = btrfs_start_transaction(rc->extent_root, 1);
3636 btrfs_commit_transaction(trans, rc->extent_root);
3637
3638 merge_reloc_roots(rc);
3639
3640 unset_reloc_control(rc);
3641
3642 trans = btrfs_start_transaction(rc->extent_root, 1);
3643 btrfs_commit_transaction(trans, rc->extent_root);
3644out:
3645 if (rc) {
3646 btrfs_stop_workers(&rc->workers);
3647 kfree(rc);
3648 }
3649 while (!list_empty(&reloc_roots)) {
3650 reloc_root = list_entry(reloc_roots.next,
3651 struct btrfs_root, root_list);
3652 list_del(&reloc_root->root_list);
3653 free_extent_buffer(reloc_root->node);
3654 free_extent_buffer(reloc_root->commit_root);
3655 kfree(reloc_root);
3656 }
3657 btrfs_free_path(path);
3658
3659 if (err == 0) {
3660 /* cleanup orphan inode in data relocation tree */
3661 fs_root = read_fs_root(root->fs_info,
3662 BTRFS_DATA_RELOC_TREE_OBJECTID);
3663 if (IS_ERR(fs_root))
3664 err = PTR_ERR(fs_root);
3665 }
3666 return err;
3667}
3668
3669/*
3670 * helper to add ordered checksum for data relocation.
3671 *
3672 * cloning checksum properly handles the nodatasum extents.
3673 * it also saves CPU time to re-calculate the checksum.
3674 */
3675int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
3676{
3677 struct btrfs_ordered_sum *sums;
3678 struct btrfs_sector_sum *sector_sum;
3679 struct btrfs_ordered_extent *ordered;
3680 struct btrfs_root *root = BTRFS_I(inode)->root;
3681 size_t offset;
3682 int ret;
3683 u64 disk_bytenr;
3684 LIST_HEAD(list);
3685
3686 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
3687 BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
3688
3689 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
3690 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
3691 disk_bytenr + len - 1, &list);
3692
3693 while (!list_empty(&list)) {
3694 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
3695 list_del_init(&sums->list);
3696
3697 sector_sum = sums->sums;
3698 sums->bytenr = ordered->start;
3699
3700 offset = 0;
3701 while (offset < sums->len) {
3702 sector_sum->bytenr += ordered->start - disk_bytenr;
3703 sector_sum++;
3704 offset += root->sectorsize;
3705 }
3706
3707 btrfs_add_ordered_sum(inode, ordered, sums);
3708 }
3709 btrfs_put_ordered_extent(ordered);
3710 return 0;
3711}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b48650de4472..0ddc6d61c55a 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -111,6 +111,15 @@ out:
111 return ret; 111 return ret;
112} 112}
113 113
114int btrfs_set_root_node(struct btrfs_root_item *item,
115 struct extent_buffer *node)
116{
117 btrfs_set_root_bytenr(item, node->start);
118 btrfs_set_root_level(item, btrfs_header_level(node));
119 btrfs_set_root_generation(item, btrfs_header_generation(node));
120 return 0;
121}
122
114/* 123/*
115 * copy the data in 'item' into the btree 124 * copy the data in 'item' into the btree
116 */ 125 */
@@ -164,8 +173,7 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
164 * offset lower than the latest root. They need to be queued for deletion to 173 * offset lower than the latest root. They need to be queued for deletion to
165 * finish what was happening when we crashed. 174 * finish what was happening when we crashed.
166 */ 175 */
167int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, 176int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
168 struct btrfs_root *latest)
169{ 177{
170 struct btrfs_root *dead_root; 178 struct btrfs_root *dead_root;
171 struct btrfs_item *item; 179 struct btrfs_item *item;
@@ -227,10 +235,7 @@ again:
227 goto err; 235 goto err;
228 } 236 }
229 237
230 if (objectid == BTRFS_TREE_RELOC_OBJECTID) 238 ret = btrfs_add_dead_root(dead_root);
231 ret = btrfs_add_dead_reloc_root(dead_root);
232 else
233 ret = btrfs_add_dead_root(dead_root, latest);
234 if (ret) 239 if (ret)
235 goto err; 240 goto err;
236 goto again; 241 goto again;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2ff7cd2db25f..e9ef8c3307fe 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -52,7 +52,6 @@
52#include "export.h" 52#include "export.h"
53#include "compression.h" 53#include "compression.h"
54 54
55
56static struct super_operations btrfs_super_ops; 55static struct super_operations btrfs_super_ops;
57 56
58static void btrfs_put_super(struct super_block *sb) 57static void btrfs_put_super(struct super_block *sb)
@@ -322,7 +321,7 @@ static int btrfs_fill_super(struct super_block *sb,
322 struct dentry *root_dentry; 321 struct dentry *root_dentry;
323 struct btrfs_super_block *disk_super; 322 struct btrfs_super_block *disk_super;
324 struct btrfs_root *tree_root; 323 struct btrfs_root *tree_root;
325 struct btrfs_inode *bi; 324 struct btrfs_key key;
326 int err; 325 int err;
327 326
328 sb->s_maxbytes = MAX_LFS_FILESIZE; 327 sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -341,23 +340,15 @@ static int btrfs_fill_super(struct super_block *sb,
341 } 340 }
342 sb->s_fs_info = tree_root; 341 sb->s_fs_info = tree_root;
343 disk_super = &tree_root->fs_info->super_copy; 342 disk_super = &tree_root->fs_info->super_copy;
344 inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID,
345 tree_root->fs_info->fs_root);
346 bi = BTRFS_I(inode);
347 bi->location.objectid = inode->i_ino;
348 bi->location.offset = 0;
349 bi->root = tree_root->fs_info->fs_root;
350
351 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
352 343
353 if (!inode) { 344 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
354 err = -ENOMEM; 345 key.type = BTRFS_INODE_ITEM_KEY;
346 key.offset = 0;
347 inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root);
348 if (IS_ERR(inode)) {
349 err = PTR_ERR(inode);
355 goto fail_close; 350 goto fail_close;
356 } 351 }
357 if (inode->i_state & I_NEW) {
358 btrfs_read_locked_inode(inode);
359 unlock_new_inode(inode);
360 }
361 352
362 root_dentry = d_alloc_root(inode); 353 root_dentry = d_alloc_root(inode);
363 if (!root_dentry) { 354 if (!root_dentry) {
@@ -584,7 +575,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
584 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) 575 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
585 return -EINVAL; 576 return -EINVAL;
586 577
587 ret = btrfs_cleanup_reloc_trees(root); 578 /* recover relocation */
579 ret = btrfs_recover_relocation(root);
588 WARN_ON(ret); 580 WARN_ON(ret);
589 581
590 ret = btrfs_cleanup_fs_roots(root->fs_info); 582 ret = btrfs_cleanup_fs_roots(root->fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 01b143605ec1..2e177d7f4bb9 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -25,7 +25,6 @@
25#include "disk-io.h" 25#include "disk-io.h"
26#include "transaction.h" 26#include "transaction.h"
27#include "locking.h" 27#include "locking.h"
28#include "ref-cache.h"
29#include "tree-log.h" 28#include "tree-log.h"
30 29
31#define BTRFS_ROOT_TRANS_TAG 0 30#define BTRFS_ROOT_TRANS_TAG 0
@@ -94,45 +93,37 @@ static noinline int join_transaction(struct btrfs_root *root)
94 * to make sure the old root from before we joined the transaction is deleted 93 * to make sure the old root from before we joined the transaction is deleted
95 * when the transaction commits 94 * when the transaction commits
96 */ 95 */
97noinline int btrfs_record_root_in_trans(struct btrfs_root *root) 96static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root)
98{ 98{
99 struct btrfs_dirty_root *dirty; 99 if (root->ref_cows && root->last_trans < trans->transid) {
100 u64 running_trans_id = root->fs_info->running_transaction->transid;
101 if (root->ref_cows && root->last_trans < running_trans_id) {
102 WARN_ON(root == root->fs_info->extent_root); 100 WARN_ON(root == root->fs_info->extent_root);
103 if (root->root_item.refs != 0) { 101 WARN_ON(root->root_item.refs == 0);
104 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 102 WARN_ON(root->commit_root != root->node);
105 (unsigned long)root->root_key.objectid, 103
106 BTRFS_ROOT_TRANS_TAG); 104 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
107 105 (unsigned long)root->root_key.objectid,
108 dirty = kmalloc(sizeof(*dirty), GFP_NOFS); 106 BTRFS_ROOT_TRANS_TAG);
109 BUG_ON(!dirty); 107 root->last_trans = trans->transid;
110 dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); 108 btrfs_init_reloc_root(trans, root);
111 BUG_ON(!dirty->root); 109 }
112 dirty->latest_root = root; 110 return 0;
113 INIT_LIST_HEAD(&dirty->list); 111}
114
115 root->commit_root = btrfs_root_node(root);
116
117 memcpy(dirty->root, root, sizeof(*root));
118 spin_lock_init(&dirty->root->node_lock);
119 spin_lock_init(&dirty->root->list_lock);
120 mutex_init(&dirty->root->objectid_mutex);
121 mutex_init(&dirty->root->log_mutex);
122 INIT_LIST_HEAD(&dirty->root->dead_list);
123 dirty->root->node = root->commit_root;
124 dirty->root->commit_root = NULL;
125 112
126 spin_lock(&root->list_lock); 113int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
127 list_add(&dirty->root->dead_list, &root->dead_list); 114 struct btrfs_root *root)
128 spin_unlock(&root->list_lock); 115{
116 if (!root->ref_cows)
117 return 0;
129 118
130 root->dirty_root = dirty; 119 mutex_lock(&root->fs_info->trans_mutex);
131 } else { 120 if (root->last_trans == trans->transid) {
132 WARN_ON(1); 121 mutex_unlock(&root->fs_info->trans_mutex);
133 } 122 return 0;
134 root->last_trans = running_trans_id;
135 } 123 }
124
125 record_root_in_trans(trans, root);
126 mutex_unlock(&root->fs_info->trans_mutex);
136 return 0; 127 return 0;
137} 128}
138 129
@@ -181,7 +172,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181 ret = join_transaction(root); 172 ret = join_transaction(root);
182 BUG_ON(ret); 173 BUG_ON(ret);
183 174
184 btrfs_record_root_in_trans(root);
185 h->transid = root->fs_info->running_transaction->transid; 175 h->transid = root->fs_info->running_transaction->transid;
186 h->transaction = root->fs_info->running_transaction; 176 h->transaction = root->fs_info->running_transaction;
187 h->blocks_reserved = num_blocks; 177 h->blocks_reserved = num_blocks;
@@ -192,6 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
192 h->delayed_ref_updates = 0; 182 h->delayed_ref_updates = 0;
193 183
194 root->fs_info->running_transaction->use_count++; 184 root->fs_info->running_transaction->use_count++;
185 record_root_in_trans(h, root);
195 mutex_unlock(&root->fs_info->trans_mutex); 186 mutex_unlock(&root->fs_info->trans_mutex);
196 return h; 187 return h;
197} 188}
@@ -233,6 +224,7 @@ static noinline int wait_for_commit(struct btrfs_root *root,
233 return 0; 224 return 0;
234} 225}
235 226
227#if 0
236/* 228/*
237 * rate limit against the drop_snapshot code. This helps to slow down new 229 * rate limit against the drop_snapshot code. This helps to slow down new
238 * operations if the drop_snapshot code isn't able to keep up. 230 * operations if the drop_snapshot code isn't able to keep up.
@@ -273,6 +265,7 @@ harder:
273 goto harder; 265 goto harder;
274 } 266 }
275} 267}
268#endif
276 269
277void btrfs_throttle(struct btrfs_root *root) 270void btrfs_throttle(struct btrfs_root *root)
278{ 271{
@@ -280,7 +273,6 @@ void btrfs_throttle(struct btrfs_root *root)
280 if (!root->fs_info->open_ioctl_trans) 273 if (!root->fs_info->open_ioctl_trans)
281 wait_current_trans(root); 274 wait_current_trans(root);
282 mutex_unlock(&root->fs_info->trans_mutex); 275 mutex_unlock(&root->fs_info->trans_mutex);
283 throttle_on_drops(root);
284} 276}
285 277
286static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 278static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
@@ -323,9 +315,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
323 memset(trans, 0, sizeof(*trans)); 315 memset(trans, 0, sizeof(*trans));
324 kmem_cache_free(btrfs_trans_handle_cachep, trans); 316 kmem_cache_free(btrfs_trans_handle_cachep, trans);
325 317
326 if (throttle)
327 throttle_on_drops(root);
328
329 return 0; 318 return 0;
330} 319}
331 320
@@ -462,12 +451,8 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
462 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 451 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
463 if (old_root_bytenr == root->node->start) 452 if (old_root_bytenr == root->node->start)
464 break; 453 break;
465 btrfs_set_root_bytenr(&root->root_item,
466 root->node->start);
467 btrfs_set_root_level(&root->root_item,
468 btrfs_header_level(root->node));
469 btrfs_set_root_generation(&root->root_item, trans->transid);
470 454
455 btrfs_set_root_node(&root->root_item, root->node);
471 ret = btrfs_update_root(trans, tree_root, 456 ret = btrfs_update_root(trans, tree_root,
472 &root->root_key, 457 &root->root_key,
473 &root->root_item); 458 &root->root_item);
@@ -477,14 +462,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
477 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 462 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
478 BUG_ON(ret); 463 BUG_ON(ret);
479 } 464 }
465 free_extent_buffer(root->commit_root);
466 root->commit_root = btrfs_root_node(root);
480 return 0; 467 return 0;
481} 468}
482 469
483/* 470/*
484 * update all the cowonly tree roots on disk 471 * update all the cowonly tree roots on disk
485 */ 472 */
486int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 473static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
487 struct btrfs_root *root) 474 struct btrfs_root *root)
488{ 475{
489 struct btrfs_fs_info *fs_info = root->fs_info; 476 struct btrfs_fs_info *fs_info = root->fs_info;
490 struct list_head *next; 477 struct list_head *next;
@@ -520,118 +507,54 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
520 * a dirty root struct and adds it into the list of dead roots that need to 507 * a dirty root struct and adds it into the list of dead roots that need to
521 * be deleted 508 * be deleted
522 */ 509 */
523int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) 510int btrfs_add_dead_root(struct btrfs_root *root)
524{ 511{
525 struct btrfs_dirty_root *dirty;
526
527 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
528 if (!dirty)
529 return -ENOMEM;
530 dirty->root = root;
531 dirty->latest_root = latest;
532
533 mutex_lock(&root->fs_info->trans_mutex); 512 mutex_lock(&root->fs_info->trans_mutex);
534 list_add(&dirty->list, &latest->fs_info->dead_roots); 513 list_add(&root->root_list, &root->fs_info->dead_roots);
535 mutex_unlock(&root->fs_info->trans_mutex); 514 mutex_unlock(&root->fs_info->trans_mutex);
536 return 0; 515 return 0;
537} 516}
538 517
539/* 518/*
540 * at transaction commit time we need to schedule the old roots for 519 * update all the cowonly tree roots on disk
541 * deletion via btrfs_drop_snapshot. This runs through all the
542 * reference counted roots that were modified in the current
543 * transaction and puts them into the drop list
544 */ 520 */
545static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, 521static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
546 struct radix_tree_root *radix, 522 struct btrfs_root *root)
547 struct list_head *list)
548{ 523{
549 struct btrfs_dirty_root *dirty;
550 struct btrfs_root *gang[8]; 524 struct btrfs_root *gang[8];
551 struct btrfs_root *root; 525 struct btrfs_fs_info *fs_info = root->fs_info;
552 int i; 526 int i;
553 int ret; 527 int ret;
554 int err = 0; 528 int err = 0;
555 u32 refs;
556 529
557 while (1) { 530 while (1) {
558 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, 531 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
532 (void **)gang, 0,
559 ARRAY_SIZE(gang), 533 ARRAY_SIZE(gang),
560 BTRFS_ROOT_TRANS_TAG); 534 BTRFS_ROOT_TRANS_TAG);
561 if (ret == 0) 535 if (ret == 0)
562 break; 536 break;
563 for (i = 0; i < ret; i++) { 537 for (i = 0; i < ret; i++) {
564 root = gang[i]; 538 root = gang[i];
565 radix_tree_tag_clear(radix, 539 radix_tree_tag_clear(&fs_info->fs_roots_radix,
566 (unsigned long)root->root_key.objectid, 540 (unsigned long)root->root_key.objectid,
567 BTRFS_ROOT_TRANS_TAG); 541 BTRFS_ROOT_TRANS_TAG);
568
569 BUG_ON(!root->ref_tree);
570 dirty = root->dirty_root;
571 542
572 btrfs_free_log(trans, root); 543 btrfs_free_log(trans, root);
573 btrfs_free_reloc_root(trans, root); 544 btrfs_update_reloc_root(trans, root);
574
575 if (root->commit_root == root->node) {
576 WARN_ON(root->node->start !=
577 btrfs_root_bytenr(&root->root_item));
578
579 free_extent_buffer(root->commit_root);
580 root->commit_root = NULL;
581 root->dirty_root = NULL;
582
583 spin_lock(&root->list_lock);
584 list_del_init(&dirty->root->dead_list);
585 spin_unlock(&root->list_lock);
586 545
587 kfree(dirty->root); 546 if (root->commit_root == root->node)
588 kfree(dirty);
589
590 /* make sure to update the root on disk
591 * so we get any updates to the block used
592 * counts
593 */
594 err = btrfs_update_root(trans,
595 root->fs_info->tree_root,
596 &root->root_key,
597 &root->root_item);
598 continue; 547 continue;
599 }
600 548
601 memset(&root->root_item.drop_progress, 0, 549 free_extent_buffer(root->commit_root);
602 sizeof(struct btrfs_disk_key)); 550 root->commit_root = btrfs_root_node(root);
603 root->root_item.drop_level = 0; 551
604 root->commit_root = NULL; 552 btrfs_set_root_node(&root->root_item, root->node);
605 root->dirty_root = NULL; 553 err = btrfs_update_root(trans, fs_info->tree_root,
606 root->root_key.offset = root->fs_info->generation;
607 btrfs_set_root_bytenr(&root->root_item,
608 root->node->start);
609 btrfs_set_root_level(&root->root_item,
610 btrfs_header_level(root->node));
611 btrfs_set_root_generation(&root->root_item,
612 root->root_key.offset);
613
614 err = btrfs_insert_root(trans, root->fs_info->tree_root,
615 &root->root_key, 554 &root->root_key,
616 &root->root_item); 555 &root->root_item);
617 if (err) 556 if (err)
618 break; 557 break;
619
620 refs = btrfs_root_refs(&dirty->root->root_item);
621 btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
622 err = btrfs_update_root(trans, root->fs_info->tree_root,
623 &dirty->root->root_key,
624 &dirty->root->root_item);
625
626 BUG_ON(err);
627 if (refs == 1) {
628 list_add(&dirty->list, list);
629 } else {
630 WARN_ON(1);
631 free_extent_buffer(dirty->root->node);
632 kfree(dirty->root);
633 kfree(dirty);
634 }
635 } 558 }
636 } 559 }
637 return err; 560 return err;
@@ -688,12 +611,8 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
688 TASK_UNINTERRUPTIBLE); 611 TASK_UNINTERRUPTIBLE);
689 mutex_unlock(&info->trans_mutex); 612 mutex_unlock(&info->trans_mutex);
690 613
691 atomic_dec(&info->throttles);
692 wake_up(&info->transaction_throttle);
693
694 schedule(); 614 schedule();
695 615
696 atomic_inc(&info->throttles);
697 mutex_lock(&info->trans_mutex); 616 mutex_lock(&info->trans_mutex);
698 finish_wait(&info->transaction_wait, &wait); 617 finish_wait(&info->transaction_wait, &wait);
699 } 618 }
@@ -705,111 +624,61 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
705 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 624 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
706 * all of them 625 * all of them
707 */ 626 */
708static noinline int drop_dirty_roots(struct btrfs_root *tree_root, 627int btrfs_drop_dead_root(struct btrfs_root *root)
709 struct list_head *list)
710{ 628{
711 struct btrfs_dirty_root *dirty;
712 struct btrfs_trans_handle *trans; 629 struct btrfs_trans_handle *trans;
630 struct btrfs_root *tree_root = root->fs_info->tree_root;
713 unsigned long nr; 631 unsigned long nr;
714 u64 num_bytes; 632 int ret;
715 u64 bytes_used;
716 u64 max_useless;
717 int ret = 0;
718 int err;
719
720 while (!list_empty(list)) {
721 struct btrfs_root *root;
722
723 dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
724 list_del_init(&dirty->list);
725
726 num_bytes = btrfs_root_used(&dirty->root->root_item);
727 root = dirty->latest_root;
728 atomic_inc(&root->fs_info->throttles);
729
730 while (1) {
731 /*
732 * we don't want to jump in and create a bunch of
733 * delayed refs if the transaction is starting to close
734 */
735 wait_transaction_pre_flush(tree_root->fs_info);
736 trans = btrfs_start_transaction(tree_root, 1);
737
738 /*
739 * we've joined a transaction, make sure it isn't
740 * closing right now
741 */
742 if (trans->transaction->delayed_refs.flushing) {
743 btrfs_end_transaction(trans, tree_root);
744 continue;
745 }
746
747 mutex_lock(&root->fs_info->drop_mutex);
748 ret = btrfs_drop_snapshot(trans, dirty->root);
749 if (ret != -EAGAIN)
750 break;
751 mutex_unlock(&root->fs_info->drop_mutex);
752 633
753 err = btrfs_update_root(trans, 634 while (1) {
754 tree_root, 635 /*
755 &dirty->root->root_key, 636 * we don't want to jump in and create a bunch of
756 &dirty->root->root_item); 637 * delayed refs if the transaction is starting to close
757 if (err) 638 */
758 ret = err; 639 wait_transaction_pre_flush(tree_root->fs_info);
759 nr = trans->blocks_used; 640 trans = btrfs_start_transaction(tree_root, 1);
760 ret = btrfs_end_transaction(trans, tree_root);
761 BUG_ON(ret);
762 641
763 btrfs_btree_balance_dirty(tree_root, nr); 642 /*
764 cond_resched(); 643 * we've joined a transaction, make sure it isn't
644 * closing right now
645 */
646 if (trans->transaction->delayed_refs.flushing) {
647 btrfs_end_transaction(trans, tree_root);
648 continue;
765 } 649 }
766 BUG_ON(ret);
767 atomic_dec(&root->fs_info->throttles);
768 wake_up(&root->fs_info->transaction_throttle);
769 650
770 num_bytes -= btrfs_root_used(&dirty->root->root_item); 651 ret = btrfs_drop_snapshot(trans, root);
771 bytes_used = btrfs_root_used(&root->root_item); 652 if (ret != -EAGAIN)
772 if (num_bytes) { 653 break;
773 mutex_lock(&root->fs_info->trans_mutex);
774 btrfs_record_root_in_trans(root);
775 mutex_unlock(&root->fs_info->trans_mutex);
776 btrfs_set_root_used(&root->root_item,
777 bytes_used - num_bytes);
778 }
779 654
780 ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); 655 ret = btrfs_update_root(trans, tree_root,
781 if (ret) { 656 &root->root_key,
782 BUG(); 657 &root->root_item);
658 if (ret)
783 break; 659 break;
784 }
785 mutex_unlock(&root->fs_info->drop_mutex);
786
787 spin_lock(&root->list_lock);
788 list_del_init(&dirty->root->dead_list);
789 if (!list_empty(&root->dead_list)) {
790 struct btrfs_root *oldest;
791 oldest = list_entry(root->dead_list.prev,
792 struct btrfs_root, dead_list);
793 max_useless = oldest->root_key.offset - 1;
794 } else {
795 max_useless = root->root_key.offset - 1;
796 }
797 spin_unlock(&root->list_lock);
798 660
799 nr = trans->blocks_used; 661 nr = trans->blocks_used;
800 ret = btrfs_end_transaction(trans, tree_root); 662 ret = btrfs_end_transaction(trans, tree_root);
801 BUG_ON(ret); 663 BUG_ON(ret);
802 664
803 ret = btrfs_remove_leaf_refs(root, max_useless, 0);
804 BUG_ON(ret);
805
806 free_extent_buffer(dirty->root->node);
807 kfree(dirty->root);
808 kfree(dirty);
809
810 btrfs_btree_balance_dirty(tree_root, nr); 665 btrfs_btree_balance_dirty(tree_root, nr);
811 cond_resched(); 666 cond_resched();
812 } 667 }
668 BUG_ON(ret);
669
670 ret = btrfs_del_root(trans, tree_root, &root->root_key);
671 BUG_ON(ret);
672
673 nr = trans->blocks_used;
674 ret = btrfs_end_transaction(trans, tree_root);
675 BUG_ON(ret);
676
677 free_extent_buffer(root->node);
678 free_extent_buffer(root->commit_root);
679 kfree(root);
680
681 btrfs_btree_balance_dirty(tree_root, nr);
813 return ret; 682 return ret;
814} 683}
815 684
@@ -839,24 +708,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
839 if (ret) 708 if (ret)
840 goto fail; 709 goto fail;
841 710
842 btrfs_record_root_in_trans(root); 711 record_root_in_trans(trans, root);
843 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 712 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
844 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 713 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
845 714
846 key.objectid = objectid; 715 key.objectid = objectid;
847 key.offset = trans->transid; 716 key.offset = 0;
848 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 717 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
849 718
850 old = btrfs_lock_root_node(root); 719 old = btrfs_lock_root_node(root);
851 btrfs_cow_block(trans, root, old, NULL, 0, &old); 720 btrfs_cow_block(trans, root, old, NULL, 0, &old);
721 btrfs_set_lock_blocking(old);
852 722
853 btrfs_copy_root(trans, root, old, &tmp, objectid); 723 btrfs_copy_root(trans, root, old, &tmp, objectid);
854 btrfs_tree_unlock(old); 724 btrfs_tree_unlock(old);
855 free_extent_buffer(old); 725 free_extent_buffer(old);
856 726
857 btrfs_set_root_bytenr(new_root_item, tmp->start); 727 btrfs_set_root_node(new_root_item, tmp);
858 btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
859 btrfs_set_root_generation(new_root_item, trans->transid);
860 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 728 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
861 new_root_item); 729 new_root_item);
862 btrfs_tree_unlock(tmp); 730 btrfs_tree_unlock(tmp);
@@ -964,6 +832,24 @@ static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
964 return 0; 832 return 0;
965} 833}
966 834
835static void update_super_roots(struct btrfs_root *root)
836{
837 struct btrfs_root_item *root_item;
838 struct btrfs_super_block *super;
839
840 super = &root->fs_info->super_copy;
841
842 root_item = &root->fs_info->chunk_root->root_item;
843 super->chunk_root = root_item->bytenr;
844 super->chunk_root_generation = root_item->generation;
845 super->chunk_root_level = root_item->level;
846
847 root_item = &root->fs_info->tree_root->root_item;
848 super->root = root_item->bytenr;
849 super->generation = root_item->generation;
850 super->root_level = root_item->level;
851}
852
967int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 853int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
968 struct btrfs_root *root) 854 struct btrfs_root *root)
969{ 855{
@@ -971,8 +857,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
971 unsigned long timeout = 1; 857 unsigned long timeout = 1;
972 struct btrfs_transaction *cur_trans; 858 struct btrfs_transaction *cur_trans;
973 struct btrfs_transaction *prev_trans = NULL; 859 struct btrfs_transaction *prev_trans = NULL;
974 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
975 struct list_head dirty_fs_roots;
976 struct extent_io_tree *pinned_copy; 860 struct extent_io_tree *pinned_copy;
977 DEFINE_WAIT(wait); 861 DEFINE_WAIT(wait);
978 int ret; 862 int ret;
@@ -999,7 +883,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
999 BUG_ON(ret); 883 BUG_ON(ret);
1000 884
1001 mutex_lock(&root->fs_info->trans_mutex); 885 mutex_lock(&root->fs_info->trans_mutex);
1002 INIT_LIST_HEAD(&dirty_fs_roots);
1003 if (cur_trans->in_commit) { 886 if (cur_trans->in_commit) {
1004 cur_trans->use_count++; 887 cur_trans->use_count++;
1005 mutex_unlock(&root->fs_info->trans_mutex); 888 mutex_unlock(&root->fs_info->trans_mutex);
@@ -1105,41 +988,36 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1105 * with the tree-log code. 988 * with the tree-log code.
1106 */ 989 */
1107 mutex_lock(&root->fs_info->tree_log_mutex); 990 mutex_lock(&root->fs_info->tree_log_mutex);
1108 /*
1109 * keep tree reloc code from adding new reloc trees
1110 */
1111 mutex_lock(&root->fs_info->tree_reloc_mutex);
1112
1113 991
1114 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, 992 ret = commit_fs_roots(trans, root);
1115 &dirty_fs_roots);
1116 BUG_ON(ret); 993 BUG_ON(ret);
1117 994
1118 /* add_dirty_roots gets rid of all the tree log roots, it is now 995 /* commit_fs_roots gets rid of all the tree log roots, it is now
1119 * safe to free the root of tree log roots 996 * safe to free the root of tree log roots
1120 */ 997 */
1121 btrfs_free_log_root_tree(trans, root->fs_info); 998 btrfs_free_log_root_tree(trans, root->fs_info);
1122 999
1123 ret = btrfs_commit_tree_roots(trans, root); 1000 ret = commit_cowonly_roots(trans, root);
1124 BUG_ON(ret); 1001 BUG_ON(ret);
1125 1002
1126 cur_trans = root->fs_info->running_transaction; 1003 cur_trans = root->fs_info->running_transaction;
1127 spin_lock(&root->fs_info->new_trans_lock); 1004 spin_lock(&root->fs_info->new_trans_lock);
1128 root->fs_info->running_transaction = NULL; 1005 root->fs_info->running_transaction = NULL;
1129 spin_unlock(&root->fs_info->new_trans_lock); 1006 spin_unlock(&root->fs_info->new_trans_lock);
1130 btrfs_set_super_generation(&root->fs_info->super_copy, 1007
1131 cur_trans->transid); 1008 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1132 btrfs_set_super_root(&root->fs_info->super_copy, 1009 root->fs_info->tree_root->node);
1133 root->fs_info->tree_root->node->start); 1010 free_extent_buffer(root->fs_info->tree_root->commit_root);
1134 btrfs_set_super_root_level(&root->fs_info->super_copy, 1011 root->fs_info->tree_root->commit_root =
1135 btrfs_header_level(root->fs_info->tree_root->node)); 1012 btrfs_root_node(root->fs_info->tree_root);
1136 1013
1137 btrfs_set_super_chunk_root(&root->fs_info->super_copy, 1014 btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1138 chunk_root->node->start); 1015 root->fs_info->chunk_root->node);
1139 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, 1016 free_extent_buffer(root->fs_info->chunk_root->commit_root);
1140 btrfs_header_level(chunk_root->node)); 1017 root->fs_info->chunk_root->commit_root =
1141 btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy, 1018 btrfs_root_node(root->fs_info->chunk_root);
1142 btrfs_header_generation(chunk_root->node)); 1019
1020 update_super_roots(root);
1143 1021
1144 if (!root->fs_info->log_root_recovering) { 1022 if (!root->fs_info->log_root_recovering) {
1145 btrfs_set_super_log_root(&root->fs_info->super_copy, 0); 1023 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
@@ -1153,7 +1031,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1153 1031
1154 trans->transaction->blocked = 0; 1032 trans->transaction->blocked = 0;
1155 1033
1156 wake_up(&root->fs_info->transaction_throttle);
1157 wake_up(&root->fs_info->transaction_wait); 1034 wake_up(&root->fs_info->transaction_wait);
1158 1035
1159 mutex_unlock(&root->fs_info->trans_mutex); 1036 mutex_unlock(&root->fs_info->trans_mutex);
@@ -1170,9 +1047,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1170 btrfs_finish_extent_commit(trans, root, pinned_copy); 1047 btrfs_finish_extent_commit(trans, root, pinned_copy);
1171 kfree(pinned_copy); 1048 kfree(pinned_copy);
1172 1049
1173 btrfs_drop_dead_reloc_roots(root);
1174 mutex_unlock(&root->fs_info->tree_reloc_mutex);
1175
1176 /* do the directory inserts of any pending snapshot creations */ 1050 /* do the directory inserts of any pending snapshot creations */
1177 finish_pending_snapshots(trans, root->fs_info); 1051 finish_pending_snapshots(trans, root->fs_info);
1178 1052
@@ -1186,16 +1060,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1186 put_transaction(cur_trans); 1060 put_transaction(cur_trans);
1187 put_transaction(cur_trans); 1061 put_transaction(cur_trans);
1188 1062
1189 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
1190 if (root->fs_info->closing)
1191 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
1192
1193 mutex_unlock(&root->fs_info->trans_mutex); 1063 mutex_unlock(&root->fs_info->trans_mutex);
1194 1064
1195 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1065 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1196
1197 if (root->fs_info->closing)
1198 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
1199 return ret; 1066 return ret;
1200} 1067}
1201 1068
@@ -1204,16 +1071,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1204 */ 1071 */
1205int btrfs_clean_old_snapshots(struct btrfs_root *root) 1072int btrfs_clean_old_snapshots(struct btrfs_root *root)
1206{ 1073{
1207 struct list_head dirty_roots; 1074 LIST_HEAD(list);
1208 INIT_LIST_HEAD(&dirty_roots); 1075 struct btrfs_fs_info *fs_info = root->fs_info;
1209again: 1076
1210 mutex_lock(&root->fs_info->trans_mutex); 1077 mutex_lock(&fs_info->trans_mutex);
1211 list_splice_init(&root->fs_info->dead_roots, &dirty_roots); 1078 list_splice_init(&fs_info->dead_roots, &list);
1212 mutex_unlock(&root->fs_info->trans_mutex); 1079 mutex_unlock(&fs_info->trans_mutex);
1213 1080
1214 if (!list_empty(&dirty_roots)) { 1081 while (!list_empty(&list)) {
1215 drop_dirty_roots(root, &dirty_roots); 1082 root = list_entry(list.next, struct btrfs_root, root_list);
1216 goto again; 1083 list_del_init(&root->root_list);
1084 btrfs_drop_dead_root(root);
1217 } 1085 }
1218 return 0; 1086 return 0;
1219} 1087}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94f5bde2b58d..961c3ee5a2e1 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -62,12 +62,6 @@ struct btrfs_pending_snapshot {
62 struct list_head list; 62 struct list_head list;
63}; 63};
64 64
65struct btrfs_dirty_root {
66 struct list_head list;
67 struct btrfs_root *root;
68 struct btrfs_root *latest_root;
69};
70
71static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, 65static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
72 struct inode *inode) 66 struct inode *inode)
73{ 67{
@@ -100,7 +94,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
100int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 94int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
101 struct btrfs_root *root); 95 struct btrfs_root *root);
102 96
103int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest); 97int btrfs_add_dead_root(struct btrfs_root *root);
98int btrfs_drop_dead_root(struct btrfs_root *root);
104int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); 99int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
105int btrfs_clean_old_snapshots(struct btrfs_root *root); 100int btrfs_clean_old_snapshots(struct btrfs_root *root);
106int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 101int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -108,7 +103,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
108int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 103int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
109 struct btrfs_root *root); 104 struct btrfs_root *root);
110void btrfs_throttle(struct btrfs_root *root); 105void btrfs_throttle(struct btrfs_root *root);
111int btrfs_record_root_in_trans(struct btrfs_root *root); 106int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
107 struct btrfs_root *root);
112int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 108int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
113 struct extent_io_tree *dirty_pages); 109 struct extent_io_tree *dirty_pages);
114#endif 110#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index db5e212e8445..2b41fc08c34a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -430,18 +430,16 @@ no_copy:
430static noinline struct inode *read_one_inode(struct btrfs_root *root, 430static noinline struct inode *read_one_inode(struct btrfs_root *root,
431 u64 objectid) 431 u64 objectid)
432{ 432{
433 struct btrfs_key key;
433 struct inode *inode; 434 struct inode *inode;
434 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
435 if (inode->i_state & I_NEW) {
436 BTRFS_I(inode)->root = root;
437 BTRFS_I(inode)->location.objectid = objectid;
438 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
439 BTRFS_I(inode)->location.offset = 0;
440 btrfs_read_locked_inode(inode);
441 unlock_new_inode(inode);
442 435
443 } 436 key.objectid = objectid;
444 if (is_bad_inode(inode)) { 437 key.type = BTRFS_INODE_ITEM_KEY;
438 key.offset = 0;
439 inode = btrfs_iget(root->fs_info->sb, &key, root);
440 if (IS_ERR(inode)) {
441 inode = NULL;
442 } else if (is_bad_inode(inode)) {
445 iput(inode); 443 iput(inode);
446 inode = NULL; 444 inode = NULL;
447 } 445 }
@@ -541,6 +539,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
541 539
542 if (found_type == BTRFS_FILE_EXTENT_REG || 540 if (found_type == BTRFS_FILE_EXTENT_REG ||
543 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 541 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
542 u64 offset;
544 unsigned long dest_offset; 543 unsigned long dest_offset;
545 struct btrfs_key ins; 544 struct btrfs_key ins;
546 545
@@ -555,6 +554,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
555 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); 554 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
556 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 555 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
557 ins.type = BTRFS_EXTENT_ITEM_KEY; 556 ins.type = BTRFS_EXTENT_ITEM_KEY;
557 offset = key->offset - btrfs_file_extent_offset(eb, item);
558 558
559 if (ins.objectid > 0) { 559 if (ins.objectid > 0) {
560 u64 csum_start; 560 u64 csum_start;
@@ -569,19 +569,16 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
569 if (ret == 0) { 569 if (ret == 0) {
570 ret = btrfs_inc_extent_ref(trans, root, 570 ret = btrfs_inc_extent_ref(trans, root,
571 ins.objectid, ins.offset, 571 ins.objectid, ins.offset,
572 path->nodes[0]->start, 572 0, root->root_key.objectid,
573 root->root_key.objectid, 573 key->objectid, offset);
574 trans->transid, key->objectid);
575 } else { 574 } else {
576 /* 575 /*
577 * insert the extent pointer in the extent 576 * insert the extent pointer in the extent
578 * allocation tree 577 * allocation tree
579 */ 578 */
580 ret = btrfs_alloc_logged_extent(trans, root, 579 ret = btrfs_alloc_logged_file_extent(trans,
581 path->nodes[0]->start, 580 root, root->root_key.objectid,
582 root->root_key.objectid, 581 key->objectid, offset, &ins);
583 trans->transid, key->objectid,
584 &ins);
585 BUG_ON(ret); 582 BUG_ON(ret);
586 } 583 }
587 btrfs_release_path(root, path); 584 btrfs_release_path(root, path);
@@ -1706,9 +1703,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1706 btrfs_wait_tree_block_writeback(next); 1703 btrfs_wait_tree_block_writeback(next);
1707 btrfs_tree_unlock(next); 1704 btrfs_tree_unlock(next);
1708 1705
1709 ret = btrfs_drop_leaf_ref(trans, root, next);
1710 BUG_ON(ret);
1711
1712 WARN_ON(root_owner != 1706 WARN_ON(root_owner !=
1713 BTRFS_TREE_LOG_OBJECTID); 1707 BTRFS_TREE_LOG_OBJECTID);
1714 ret = btrfs_free_reserved_extent(root, 1708 ret = btrfs_free_reserved_extent(root,
@@ -1753,10 +1747,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1753 btrfs_wait_tree_block_writeback(next); 1747 btrfs_wait_tree_block_writeback(next);
1754 btrfs_tree_unlock(next); 1748 btrfs_tree_unlock(next);
1755 1749
1756 if (*level == 0) {
1757 ret = btrfs_drop_leaf_ref(trans, root, next);
1758 BUG_ON(ret);
1759 }
1760 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 1750 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
1761 ret = btrfs_free_reserved_extent(root, bytenr, blocksize); 1751 ret = btrfs_free_reserved_extent(root, bytenr, blocksize);
1762 BUG_ON(ret); 1752 BUG_ON(ret);
@@ -1811,12 +1801,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1811 btrfs_wait_tree_block_writeback(next); 1801 btrfs_wait_tree_block_writeback(next);
1812 btrfs_tree_unlock(next); 1802 btrfs_tree_unlock(next);
1813 1803
1814 if (*level == 0) {
1815 ret = btrfs_drop_leaf_ref(trans, root,
1816 next);
1817 BUG_ON(ret);
1818 }
1819
1820 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 1804 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
1821 ret = btrfs_free_reserved_extent(root, 1805 ret = btrfs_free_reserved_extent(root,
1822 path->nodes[*level]->start, 1806 path->nodes[*level]->start,
@@ -1884,11 +1868,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1884 btrfs_wait_tree_block_writeback(next); 1868 btrfs_wait_tree_block_writeback(next);
1885 btrfs_tree_unlock(next); 1869 btrfs_tree_unlock(next);
1886 1870
1887 if (orig_level == 0) {
1888 ret = btrfs_drop_leaf_ref(trans, log,
1889 next);
1890 BUG_ON(ret);
1891 }
1892 WARN_ON(log->root_key.objectid != 1871 WARN_ON(log->root_key.objectid !=
1893 BTRFS_TREE_LOG_OBJECTID); 1872 BTRFS_TREE_LOG_OBJECTID);
1894 ret = btrfs_free_reserved_extent(log, next->start, 1873 ret = btrfs_free_reserved_extent(log, next->start,
@@ -2027,9 +2006,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2027 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 2006 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
2028 BUG_ON(ret); 2007 BUG_ON(ret);
2029 2008
2030 btrfs_set_root_bytenr(&log->root_item, log->node->start); 2009 btrfs_set_root_node(&log->root_item, log->node);
2031 btrfs_set_root_generation(&log->root_item, trans->transid);
2032 btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
2033 2010
2034 root->log_batch = 0; 2011 root->log_batch = 0;
2035 root->log_transid++; 2012 root->log_transid++;
@@ -2581,7 +2558,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2581 ins_keys, ins_sizes, nr); 2558 ins_keys, ins_sizes, nr);
2582 BUG_ON(ret); 2559 BUG_ON(ret);
2583 2560
2584 for (i = 0; i < nr; i++) { 2561 for (i = 0; i < nr; i++, dst_path->slots[0]++) {
2585 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], 2562 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
2586 dst_path->slots[0]); 2563 dst_path->slots[0]);
2587 2564
@@ -2617,36 +2594,31 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2617 found_type = btrfs_file_extent_type(src, extent); 2594 found_type = btrfs_file_extent_type(src, extent);
2618 if (found_type == BTRFS_FILE_EXTENT_REG || 2595 if (found_type == BTRFS_FILE_EXTENT_REG ||
2619 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 2596 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
2620 u64 ds = btrfs_file_extent_disk_bytenr(src, 2597 u64 ds, dl, cs, cl;
2621 extent); 2598 ds = btrfs_file_extent_disk_bytenr(src,
2622 u64 dl = btrfs_file_extent_disk_num_bytes(src, 2599 extent);
2623 extent); 2600 /* ds == 0 is a hole */
2624 u64 cs = btrfs_file_extent_offset(src, extent); 2601 if (ds == 0)
2625 u64 cl = btrfs_file_extent_num_bytes(src, 2602 continue;
2626 extent);; 2603
2604 dl = btrfs_file_extent_disk_num_bytes(src,
2605 extent);
2606 cs = btrfs_file_extent_offset(src, extent);
2607 cl = btrfs_file_extent_num_bytes(src,
2608 extent);;
2627 if (btrfs_file_extent_compression(src, 2609 if (btrfs_file_extent_compression(src,
2628 extent)) { 2610 extent)) {
2629 cs = 0; 2611 cs = 0;
2630 cl = dl; 2612 cl = dl;
2631 } 2613 }
2632 /* ds == 0 is a hole */ 2614
2633 if (ds != 0) { 2615 ret = btrfs_lookup_csums_range(
2634 ret = btrfs_inc_extent_ref(trans, log, 2616 log->fs_info->csum_root,
2635 ds, dl, 2617 ds + cs, ds + cs + cl - 1,
2636 dst_path->nodes[0]->start, 2618 &ordered_sums);
2637 BTRFS_TREE_LOG_OBJECTID, 2619 BUG_ON(ret);
2638 trans->transid,
2639 ins_keys[i].objectid);
2640 BUG_ON(ret);
2641 ret = btrfs_lookup_csums_range(
2642 log->fs_info->csum_root,
2643 ds + cs, ds + cs + cl - 1,
2644 &ordered_sums);
2645 BUG_ON(ret);
2646 }
2647 } 2620 }
2648 } 2621 }
2649 dst_path->slots[0]++;
2650 } 2622 }
2651 2623
2652 btrfs_mark_buffer_dirty(dst_path->nodes[0]); 2624 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
@@ -3029,9 +3001,7 @@ again:
3029 BUG_ON(!wc.replay_dest); 3001 BUG_ON(!wc.replay_dest);
3030 3002
3031 wc.replay_dest->log_root = log; 3003 wc.replay_dest->log_root = log;
3032 mutex_lock(&fs_info->trans_mutex); 3004 btrfs_record_root_in_trans(trans, wc.replay_dest);
3033 btrfs_record_root_in_trans(wc.replay_dest);
3034 mutex_unlock(&fs_info->trans_mutex);
3035 ret = walk_log_tree(trans, log, &wc); 3005 ret = walk_log_tree(trans, log, &wc);
3036 BUG_ON(ret); 3006 BUG_ON(ret);
3037 3007
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a6d35b0054ca..8bc6a8807482 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1671,8 +1671,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1671 int ret; 1671 int ret;
1672 int i; 1672 int i;
1673 1673
1674 printk(KERN_INFO "btrfs relocating chunk %llu\n",
1675 (unsigned long long)chunk_offset);
1676 root = root->fs_info->chunk_root; 1674 root = root->fs_info->chunk_root;
1677 extent_root = root->fs_info->extent_root; 1675 extent_root = root->fs_info->extent_root;
1678 em_tree = &root->fs_info->mapping_tree.map_tree; 1676 em_tree = &root->fs_info->mapping_tree.map_tree;