aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2009-06-10 10:45:14 -0400
committerChris Mason <chris.mason@oracle.com>2009-06-10 11:29:46 -0400
commit5d4f98a28c7d334091c1b7744f48a1acdd2a4ae0 (patch)
treec611d7d824cbcdb777dd2d8e33e2ed1c5df8a9c6
parent5c939df56c3ea018b58e5aa76181284c2053d699 (diff)
Btrfs: Mixed back reference (FORWARD ROLLING FORMAT CHANGE)
This commit introduces a new kind of back reference for btrfs metadata. Once a filesystem has been mounted with this commit, IT WILL NO LONGER BE MOUNTABLE BY OLDER KERNELS. When a tree block in subvolume tree is cow'd, the reference counts of all extents it points to are increased by one. At transaction commit time, the old root of the subvolume is recorded in a "dead root" data structure, and the btree it points to is later walked, dropping reference counts and freeing any blocks where the reference count goes to 0. The increments done during cow and decrements done after commit cancel out, and the walk is a very expensive way to go about freeing the blocks that are no longer referenced by the new btree root. This commit reduces the transaction overhead by avoiding the need for dead root records. When a non-shared tree block is cow'd, we free the old block at once, and the new block inherits old block's references. When a tree block with reference count > 1 is cow'd, we increase the reference counts of all extents the new block points to by one, and decrease the old block's reference count by one. This dead tree avoidance code removes the need to modify the reference counts of lower level extents when a non-shared tree block is cow'd. But we still need to update back ref for all pointers in the block. This is because the location of the block is recorded in the back ref item. We can solve this by introducing a new type of back ref. The new back ref provides information about pointer's key, level and in which tree the pointer lives. This information allow us to find the pointer by searching the tree. The shortcoming of the new back ref is that it only works for pointers in tree blocks referenced by their owner trees. This is mostly a problem for snapshots, where resolving one of these fuzzy back references would be O(number_of_snapshots) and quite slow. The solution used here is to use the fuzzy back references in the common case where a given tree block is only referenced by one root, and use the full back references when multiple roots have a reference on a given block. This commit adds per subvolume red-black tree to keep trace of cached inodes. The red-black tree helps the balancing code to find cached inodes whose inode numbers within a given range. This commit improves the balancing code by introducing several data structures to keep the state of balancing. The most important one is the back ref cache. It caches how the upper level tree blocks are referenced. This greatly reduce the overhead of checking back ref. The improved balancing code scales significantly better with a large number of snapshots. This is a very large commit and was written in a number of pieces. But, they depend heavily on the disk format change and were squashed together to make sure git bisect didn't end up in a bad state wrt space balancing or the format change. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/Makefile4
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.c685
-rw-r--r--fs/btrfs/ctree.h308
-rw-r--r--fs/btrfs/delayed-ref.c509
-rw-r--r--fs/btrfs/delayed-ref.h85
-rw-r--r--fs/btrfs/disk-io.c95
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/extent-tree.c2614
-rw-r--r--fs/btrfs/file.c76
-rw-r--r--fs/btrfs/inode.c132
-rw-r--r--fs/btrfs/ioctl.c21
-rw-r--r--fs/btrfs/print-tree.c155
-rw-r--r--fs/btrfs/relocation.c3711
-rw-r--r--fs/btrfs/root-tree.c17
-rw-r--r--fs/btrfs/super.c26
-rw-r--r--fs/btrfs/transaction.c410
-rw-r--r--fs/btrfs/transaction.h12
-rw-r--r--fs/btrfs/tree-log.c102
-rw-r--r--fs/btrfs/volumes.c2
20 files changed, 6928 insertions, 2043 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 94212844a9bc..a35eb36b32fd 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
9 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ 9 export.o tree-log.o acl.o free-space-cache.o zlib.o \
10 compression.o delayed-ref.o 10 compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index b30986f00b9d..ecf5f7d8166f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -72,6 +72,9 @@ struct btrfs_inode {
72 */ 72 */
73 struct list_head ordered_operations; 73 struct list_head ordered_operations;
74 74
75 /* node for the red-black tree that links inodes in subvolume root */
76 struct rb_node rb_node;
77
75 /* the space_info for where this inode's data allocations are done */ 78 /* the space_info for where this inode's data allocations are done */
76 struct btrfs_space_info *space_info; 79 struct btrfs_space_info *space_info;
77 80
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index fedf8b9f03a2..2b960278a2f9 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -197,14 +197,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
197 u32 nritems; 197 u32 nritems;
198 int ret = 0; 198 int ret = 0;
199 int level; 199 int level;
200 struct btrfs_root *new_root; 200 struct btrfs_disk_key disk_key;
201
202 new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
203 if (!new_root)
204 return -ENOMEM;
205
206 memcpy(new_root, root, sizeof(*new_root));
207 new_root->root_key.objectid = new_root_objectid;
208 201
209 WARN_ON(root->ref_cows && trans->transid != 202 WARN_ON(root->ref_cows && trans->transid !=
210 root->fs_info->running_transaction->transid); 203 root->fs_info->running_transaction->transid);
@@ -212,28 +205,37 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
212 205
213 level = btrfs_header_level(buf); 206 level = btrfs_header_level(buf);
214 nritems = btrfs_header_nritems(buf); 207 nritems = btrfs_header_nritems(buf);
208 if (level == 0)
209 btrfs_item_key(buf, &disk_key, 0);
210 else
211 btrfs_node_key(buf, &disk_key, 0);
215 212
216 cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, 213 cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
217 new_root_objectid, trans->transid, 214 new_root_objectid, &disk_key, level,
218 level, buf->start, 0); 215 buf->start, 0);
219 if (IS_ERR(cow)) { 216 if (IS_ERR(cow))
220 kfree(new_root);
221 return PTR_ERR(cow); 217 return PTR_ERR(cow);
222 }
223 218
224 copy_extent_buffer(cow, buf, 0, 0, cow->len); 219 copy_extent_buffer(cow, buf, 0, 0, cow->len);
225 btrfs_set_header_bytenr(cow, cow->start); 220 btrfs_set_header_bytenr(cow, cow->start);
226 btrfs_set_header_generation(cow, trans->transid); 221 btrfs_set_header_generation(cow, trans->transid);
227 btrfs_set_header_owner(cow, new_root_objectid); 222 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
228 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 223 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
224 BTRFS_HEADER_FLAG_RELOC);
225 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
226 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
227 else
228 btrfs_set_header_owner(cow, new_root_objectid);
229 229
230 write_extent_buffer(cow, root->fs_info->fsid, 230 write_extent_buffer(cow, root->fs_info->fsid,
231 (unsigned long)btrfs_header_fsid(cow), 231 (unsigned long)btrfs_header_fsid(cow),
232 BTRFS_FSID_SIZE); 232 BTRFS_FSID_SIZE);
233 233
234 WARN_ON(btrfs_header_generation(buf) > trans->transid); 234 WARN_ON(btrfs_header_generation(buf) > trans->transid);
235 ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); 235 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
236 kfree(new_root); 236 ret = btrfs_inc_ref(trans, root, cow, 1);
237 else
238 ret = btrfs_inc_ref(trans, root, cow, 0);
237 239
238 if (ret) 240 if (ret)
239 return ret; 241 return ret;
@@ -244,6 +246,125 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
244} 246}
245 247
246/* 248/*
249 * check if the tree block can be shared by multiple trees
250 */
251int btrfs_block_can_be_shared(struct btrfs_root *root,
252 struct extent_buffer *buf)
253{
254 /*
255 * Tree blocks not in refernece counted trees and tree roots
256 * are never shared. If a block was allocated after the last
257 * snapshot and the block was not allocated by tree relocation,
258 * we know the block is not shared.
259 */
260 if (root->ref_cows &&
261 buf != root->node && buf != root->commit_root &&
262 (btrfs_header_generation(buf) <=
263 btrfs_root_last_snapshot(&root->root_item) ||
264 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
265 return 1;
266#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
267 if (root->ref_cows &&
268 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
269 return 1;
270#endif
271 return 0;
272}
273
274static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
275 struct btrfs_root *root,
276 struct extent_buffer *buf,
277 struct extent_buffer *cow)
278{
279 u64 refs;
280 u64 owner;
281 u64 flags;
282 u64 new_flags = 0;
283 int ret;
284
285 /*
286 * Backrefs update rules:
287 *
288 * Always use full backrefs for extent pointers in tree block
289 * allocated by tree relocation.
290 *
291 * If a shared tree block is no longer referenced by its owner
292 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
293 * use full backrefs for extent pointers in tree block.
294 *
295 * If a tree block is been relocating
296 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
297 * use full backrefs for extent pointers in tree block.
298 * The reason for this is some operations (such as drop tree)
299 * are only allowed for blocks use full backrefs.
300 */
301
302 if (btrfs_block_can_be_shared(root, buf)) {
303 ret = btrfs_lookup_extent_info(trans, root, buf->start,
304 buf->len, &refs, &flags);
305 BUG_ON(ret);
306 BUG_ON(refs == 0);
307 } else {
308 refs = 1;
309 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
310 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
311 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
312 else
313 flags = 0;
314 }
315
316 owner = btrfs_header_owner(buf);
317 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
318 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
319
320 if (refs > 1) {
321 if ((owner == root->root_key.objectid ||
322 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
323 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
324 ret = btrfs_inc_ref(trans, root, buf, 1);
325 BUG_ON(ret);
326
327 if (root->root_key.objectid ==
328 BTRFS_TREE_RELOC_OBJECTID) {
329 ret = btrfs_dec_ref(trans, root, buf, 0);
330 BUG_ON(ret);
331 ret = btrfs_inc_ref(trans, root, cow, 1);
332 BUG_ON(ret);
333 }
334 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
335 } else {
336
337 if (root->root_key.objectid ==
338 BTRFS_TREE_RELOC_OBJECTID)
339 ret = btrfs_inc_ref(trans, root, cow, 1);
340 else
341 ret = btrfs_inc_ref(trans, root, cow, 0);
342 BUG_ON(ret);
343 }
344 if (new_flags != 0) {
345 ret = btrfs_set_disk_extent_flags(trans, root,
346 buf->start,
347 buf->len,
348 new_flags, 0);
349 BUG_ON(ret);
350 }
351 } else {
352 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
353 if (root->root_key.objectid ==
354 BTRFS_TREE_RELOC_OBJECTID)
355 ret = btrfs_inc_ref(trans, root, cow, 1);
356 else
357 ret = btrfs_inc_ref(trans, root, cow, 0);
358 BUG_ON(ret);
359 ret = btrfs_dec_ref(trans, root, buf, 1);
360 BUG_ON(ret);
361 }
362 clean_tree_block(trans, root, buf);
363 }
364 return 0;
365}
366
367/*
247 * does the dirty work in cow of a single block. The parent block (if 368 * does the dirty work in cow of a single block. The parent block (if
248 * supplied) is updated to point to the new cow copy. The new buffer is marked 369 * supplied) is updated to point to the new cow copy. The new buffer is marked
249 * dirty and returned locked. If you modify the block it needs to be marked 370 * dirty and returned locked. If you modify the block it needs to be marked
@@ -262,34 +383,39 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
262 struct extent_buffer **cow_ret, 383 struct extent_buffer **cow_ret,
263 u64 search_start, u64 empty_size) 384 u64 search_start, u64 empty_size)
264{ 385{
265 u64 parent_start; 386 struct btrfs_disk_key disk_key;
266 struct extent_buffer *cow; 387 struct extent_buffer *cow;
267 u32 nritems;
268 int ret = 0;
269 int level; 388 int level;
270 int unlock_orig = 0; 389 int unlock_orig = 0;
390 u64 parent_start;
271 391
272 if (*cow_ret == buf) 392 if (*cow_ret == buf)
273 unlock_orig = 1; 393 unlock_orig = 1;
274 394
275 btrfs_assert_tree_locked(buf); 395 btrfs_assert_tree_locked(buf);
276 396
277 if (parent)
278 parent_start = parent->start;
279 else
280 parent_start = 0;
281
282 WARN_ON(root->ref_cows && trans->transid != 397 WARN_ON(root->ref_cows && trans->transid !=
283 root->fs_info->running_transaction->transid); 398 root->fs_info->running_transaction->transid);
284 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 399 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
285 400
286 level = btrfs_header_level(buf); 401 level = btrfs_header_level(buf);
287 nritems = btrfs_header_nritems(buf);
288 402
289 cow = btrfs_alloc_free_block(trans, root, buf->len, 403 if (level == 0)
290 parent_start, root->root_key.objectid, 404 btrfs_item_key(buf, &disk_key, 0);
291 trans->transid, level, 405 else
292 search_start, empty_size); 406 btrfs_node_key(buf, &disk_key, 0);
407
408 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
409 if (parent)
410 parent_start = parent->start;
411 else
412 parent_start = 0;
413 } else
414 parent_start = 0;
415
416 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
417 root->root_key.objectid, &disk_key,
418 level, search_start, empty_size);
293 if (IS_ERR(cow)) 419 if (IS_ERR(cow))
294 return PTR_ERR(cow); 420 return PTR_ERR(cow);
295 421
@@ -298,83 +424,53 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
298 copy_extent_buffer(cow, buf, 0, 0, cow->len); 424 copy_extent_buffer(cow, buf, 0, 0, cow->len);
299 btrfs_set_header_bytenr(cow, cow->start); 425 btrfs_set_header_bytenr(cow, cow->start);
300 btrfs_set_header_generation(cow, trans->transid); 426 btrfs_set_header_generation(cow, trans->transid);
301 btrfs_set_header_owner(cow, root->root_key.objectid); 427 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
302 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 428 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
429 BTRFS_HEADER_FLAG_RELOC);
430 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
431 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
432 else
433 btrfs_set_header_owner(cow, root->root_key.objectid);
303 434
304 write_extent_buffer(cow, root->fs_info->fsid, 435 write_extent_buffer(cow, root->fs_info->fsid,
305 (unsigned long)btrfs_header_fsid(cow), 436 (unsigned long)btrfs_header_fsid(cow),
306 BTRFS_FSID_SIZE); 437 BTRFS_FSID_SIZE);
307 438
308 WARN_ON(btrfs_header_generation(buf) > trans->transid); 439 update_ref_for_cow(trans, root, buf, cow);
309 if (btrfs_header_generation(buf) != trans->transid) {
310 u32 nr_extents;
311 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
312 if (ret)
313 return ret;
314
315 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
316 WARN_ON(ret);
317 } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
318 /*
319 * There are only two places that can drop reference to
320 * tree blocks owned by living reloc trees, one is here,
321 * the other place is btrfs_drop_subtree. In both places,
322 * we check reference count while tree block is locked.
323 * Furthermore, if reference count is one, it won't get
324 * increased by someone else.
325 */
326 u32 refs;
327 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
328 buf->len, &refs);
329 BUG_ON(ret);
330 if (refs == 1) {
331 ret = btrfs_update_ref(trans, root, buf, cow,
332 0, nritems);
333 clean_tree_block(trans, root, buf);
334 } else {
335 ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
336 }
337 BUG_ON(ret);
338 } else {
339 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
340 if (ret)
341 return ret;
342 clean_tree_block(trans, root, buf);
343 }
344
345 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
346 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
347 WARN_ON(ret);
348 }
349 440
350 if (buf == root->node) { 441 if (buf == root->node) {
351 WARN_ON(parent && parent != buf); 442 WARN_ON(parent && parent != buf);
443 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
444 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
445 parent_start = buf->start;
446 else
447 parent_start = 0;
352 448
353 spin_lock(&root->node_lock); 449 spin_lock(&root->node_lock);
354 root->node = cow; 450 root->node = cow;
355 extent_buffer_get(cow); 451 extent_buffer_get(cow);
356 spin_unlock(&root->node_lock); 452 spin_unlock(&root->node_lock);
357 453
358 if (buf != root->commit_root) { 454 btrfs_free_extent(trans, root, buf->start, buf->len,
359 btrfs_free_extent(trans, root, buf->start, 455 parent_start, root->root_key.objectid,
360 buf->len, buf->start, 456 level, 0);
361 root->root_key.objectid,
362 btrfs_header_generation(buf),
363 level, 1);
364 }
365 free_extent_buffer(buf); 457 free_extent_buffer(buf);
366 add_root_to_dirty_list(root); 458 add_root_to_dirty_list(root);
367 } else { 459 } else {
460 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
461 parent_start = parent->start;
462 else
463 parent_start = 0;
464
465 WARN_ON(trans->transid != btrfs_header_generation(parent));
368 btrfs_set_node_blockptr(parent, parent_slot, 466 btrfs_set_node_blockptr(parent, parent_slot,
369 cow->start); 467 cow->start);
370 WARN_ON(trans->transid == 0);
371 btrfs_set_node_ptr_generation(parent, parent_slot, 468 btrfs_set_node_ptr_generation(parent, parent_slot,
372 trans->transid); 469 trans->transid);
373 btrfs_mark_buffer_dirty(parent); 470 btrfs_mark_buffer_dirty(parent);
374 WARN_ON(btrfs_header_generation(parent) != trans->transid);
375 btrfs_free_extent(trans, root, buf->start, buf->len, 471 btrfs_free_extent(trans, root, buf->start, buf->len,
376 parent_start, btrfs_header_owner(parent), 472 parent_start, root->root_key.objectid,
377 btrfs_header_generation(parent), level, 1); 473 level, 0);
378 } 474 }
379 if (unlock_orig) 475 if (unlock_orig)
380 btrfs_tree_unlock(buf); 476 btrfs_tree_unlock(buf);
@@ -384,6 +480,18 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
384 return 0; 480 return 0;
385} 481}
386 482
483static inline int should_cow_block(struct btrfs_trans_handle *trans,
484 struct btrfs_root *root,
485 struct extent_buffer *buf)
486{
487 if (btrfs_header_generation(buf) == trans->transid &&
488 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
489 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
490 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
491 return 0;
492 return 1;
493}
494
387/* 495/*
388 * cows a single block, see __btrfs_cow_block for the real work. 496 * cows a single block, see __btrfs_cow_block for the real work.
389 * This version of it has extra checks so that a block isn't cow'd more than 497 * This version of it has extra checks so that a block isn't cow'd more than
@@ -411,9 +519,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
411 WARN_ON(1); 519 WARN_ON(1);
412 } 520 }
413 521
414 if (btrfs_header_generation(buf) == trans->transid && 522 if (!should_cow_block(trans, root, buf)) {
415 btrfs_header_owner(buf) == root->root_key.objectid &&
416 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
417 *cow_ret = buf; 523 *cow_ret = buf;
418 return 0; 524 return 0;
419 } 525 }
@@ -469,7 +575,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
469/* 575/*
470 * same as comp_keys only with two btrfs_key's 576 * same as comp_keys only with two btrfs_key's
471 */ 577 */
472static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 578int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
473{ 579{
474 if (k1->objectid > k2->objectid) 580 if (k1->objectid > k2->objectid)
475 return 1; 581 return 1;
@@ -845,6 +951,12 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
845 return -1; 951 return -1;
846} 952}
847 953
954int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
955 int level, int *slot)
956{
957 return bin_search(eb, key, level, slot);
958}
959
848/* given a node and slot number, this reads the blocks it points to. The 960/* given a node and slot number, this reads the blocks it points to. The
849 * extent buffer is returned with a reference taken (but unlocked). 961 * extent buffer is returned with a reference taken (but unlocked).
850 * NULL is returned on error. 962 * NULL is returned on error.
@@ -921,13 +1033,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
921 root->node = child; 1033 root->node = child;
922 spin_unlock(&root->node_lock); 1034 spin_unlock(&root->node_lock);
923 1035
924 ret = btrfs_update_extent_ref(trans, root, child->start,
925 child->len,
926 mid->start, child->start,
927 root->root_key.objectid,
928 trans->transid, level - 1);
929 BUG_ON(ret);
930
931 add_root_to_dirty_list(root); 1036 add_root_to_dirty_list(root);
932 btrfs_tree_unlock(child); 1037 btrfs_tree_unlock(child);
933 1038
@@ -938,9 +1043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
938 /* once for the path */ 1043 /* once for the path */
939 free_extent_buffer(mid); 1044 free_extent_buffer(mid);
940 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1045 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
941 mid->start, root->root_key.objectid, 1046 0, root->root_key.objectid, level, 1);
942 btrfs_header_generation(mid),
943 level, 1);
944 /* once for the root ptr */ 1047 /* once for the root ptr */
945 free_extent_buffer(mid); 1048 free_extent_buffer(mid);
946 return ret; 1049 return ret;
@@ -998,7 +1101,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
998 ret = wret; 1101 ret = wret;
999 if (btrfs_header_nritems(right) == 0) { 1102 if (btrfs_header_nritems(right) == 0) {
1000 u64 bytenr = right->start; 1103 u64 bytenr = right->start;
1001 u64 generation = btrfs_header_generation(parent);
1002 u32 blocksize = right->len; 1104 u32 blocksize = right->len;
1003 1105
1004 clean_tree_block(trans, root, right); 1106 clean_tree_block(trans, root, right);
@@ -1010,9 +1112,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1010 if (wret) 1112 if (wret)
1011 ret = wret; 1113 ret = wret;
1012 wret = btrfs_free_extent(trans, root, bytenr, 1114 wret = btrfs_free_extent(trans, root, bytenr,
1013 blocksize, parent->start, 1115 blocksize, 0,
1014 btrfs_header_owner(parent), 1116 root->root_key.objectid,
1015 generation, level, 1); 1117 level, 0);
1016 if (wret) 1118 if (wret)
1017 ret = wret; 1119 ret = wret;
1018 } else { 1120 } else {
@@ -1047,7 +1149,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1047 } 1149 }
1048 if (btrfs_header_nritems(mid) == 0) { 1150 if (btrfs_header_nritems(mid) == 0) {
1049 /* we've managed to empty the middle node, drop it */ 1151 /* we've managed to empty the middle node, drop it */
1050 u64 root_gen = btrfs_header_generation(parent);
1051 u64 bytenr = mid->start; 1152 u64 bytenr = mid->start;
1052 u32 blocksize = mid->len; 1153 u32 blocksize = mid->len;
1053 1154
@@ -1059,9 +1160,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1059 if (wret) 1160 if (wret)
1060 ret = wret; 1161 ret = wret;
1061 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1162 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1062 parent->start, 1163 0, root->root_key.objectid,
1063 btrfs_header_owner(parent), 1164 level, 0);
1064 root_gen, level, 1);
1065 if (wret) 1165 if (wret)
1066 ret = wret; 1166 ret = wret;
1067 } else { 1167 } else {
@@ -1437,7 +1537,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1437{ 1537{
1438 int i; 1538 int i;
1439 1539
1440 if (path->keep_locks || path->lowest_level) 1540 if (path->keep_locks)
1441 return; 1541 return;
1442 1542
1443 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1543 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1614,10 +1714,17 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1614 lowest_unlock = 2; 1714 lowest_unlock = 2;
1615 1715
1616again: 1716again:
1617 if (p->skip_locking) 1717 if (p->search_commit_root) {
1618 b = btrfs_root_node(root); 1718 b = root->commit_root;
1619 else 1719 extent_buffer_get(b);
1620 b = btrfs_lock_root_node(root); 1720 if (!p->skip_locking)
1721 btrfs_tree_lock(b);
1722 } else {
1723 if (p->skip_locking)
1724 b = btrfs_root_node(root);
1725 else
1726 b = btrfs_lock_root_node(root);
1727 }
1621 1728
1622 while (b) { 1729 while (b) {
1623 level = btrfs_header_level(b); 1730 level = btrfs_header_level(b);
@@ -1638,11 +1745,9 @@ again:
1638 * then we don't want to set the path blocking, 1745 * then we don't want to set the path blocking,
1639 * so we test it here 1746 * so we test it here
1640 */ 1747 */
1641 if (btrfs_header_generation(b) == trans->transid && 1748 if (!should_cow_block(trans, root, b))
1642 btrfs_header_owner(b) == root->root_key.objectid &&
1643 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1644 goto cow_done; 1749 goto cow_done;
1645 } 1750
1646 btrfs_set_path_blocking(p); 1751 btrfs_set_path_blocking(p);
1647 1752
1648 wret = btrfs_cow_block(trans, root, b, 1753 wret = btrfs_cow_block(trans, root, b,
@@ -1764,138 +1869,6 @@ done:
1764 return ret; 1869 return ret;
1765} 1870}
1766 1871
1767int btrfs_merge_path(struct btrfs_trans_handle *trans,
1768 struct btrfs_root *root,
1769 struct btrfs_key *node_keys,
1770 u64 *nodes, int lowest_level)
1771{
1772 struct extent_buffer *eb;
1773 struct extent_buffer *parent;
1774 struct btrfs_key key;
1775 u64 bytenr;
1776 u64 generation;
1777 u32 blocksize;
1778 int level;
1779 int slot;
1780 int key_match;
1781 int ret;
1782
1783 eb = btrfs_lock_root_node(root);
1784 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
1785 BUG_ON(ret);
1786
1787 btrfs_set_lock_blocking(eb);
1788
1789 parent = eb;
1790 while (1) {
1791 level = btrfs_header_level(parent);
1792 if (level == 0 || level <= lowest_level)
1793 break;
1794
1795 ret = bin_search(parent, &node_keys[lowest_level], level,
1796 &slot);
1797 if (ret && slot > 0)
1798 slot--;
1799
1800 bytenr = btrfs_node_blockptr(parent, slot);
1801 if (nodes[level - 1] == bytenr)
1802 break;
1803
1804 blocksize = btrfs_level_size(root, level - 1);
1805 generation = btrfs_node_ptr_generation(parent, slot);
1806 btrfs_node_key_to_cpu(eb, &key, slot);
1807 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1808
1809 if (generation == trans->transid) {
1810 eb = read_tree_block(root, bytenr, blocksize,
1811 generation);
1812 btrfs_tree_lock(eb);
1813 btrfs_set_lock_blocking(eb);
1814 }
1815
1816 /*
1817 * if node keys match and node pointer hasn't been modified
1818 * in the running transaction, we can merge the path. for
1819 * blocks owened by reloc trees, the node pointer check is
1820 * skipped, this is because these blocks are fully controlled
1821 * by the space balance code, no one else can modify them.
1822 */
1823 if (!nodes[level - 1] || !key_match ||
1824 (generation == trans->transid &&
1825 btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1826 if (level == 1 || level == lowest_level + 1) {
1827 if (generation == trans->transid) {
1828 btrfs_tree_unlock(eb);
1829 free_extent_buffer(eb);
1830 }
1831 break;
1832 }
1833
1834 if (generation != trans->transid) {
1835 eb = read_tree_block(root, bytenr, blocksize,
1836 generation);
1837 btrfs_tree_lock(eb);
1838 btrfs_set_lock_blocking(eb);
1839 }
1840
1841 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1842 &eb);
1843 BUG_ON(ret);
1844
1845 if (root->root_key.objectid ==
1846 BTRFS_TREE_RELOC_OBJECTID) {
1847 if (!nodes[level - 1]) {
1848 nodes[level - 1] = eb->start;
1849 memcpy(&node_keys[level - 1], &key,
1850 sizeof(node_keys[0]));
1851 } else {
1852 WARN_ON(1);
1853 }
1854 }
1855
1856 btrfs_tree_unlock(parent);
1857 free_extent_buffer(parent);
1858 parent = eb;
1859 continue;
1860 }
1861
1862 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1863 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1864 btrfs_mark_buffer_dirty(parent);
1865
1866 ret = btrfs_inc_extent_ref(trans, root,
1867 nodes[level - 1],
1868 blocksize, parent->start,
1869 btrfs_header_owner(parent),
1870 btrfs_header_generation(parent),
1871 level - 1);
1872 BUG_ON(ret);
1873
1874 /*
1875 * If the block was created in the running transaction,
1876 * it's possible this is the last reference to it, so we
1877 * should drop the subtree.
1878 */
1879 if (generation == trans->transid) {
1880 ret = btrfs_drop_subtree(trans, root, eb, parent);
1881 BUG_ON(ret);
1882 btrfs_tree_unlock(eb);
1883 free_extent_buffer(eb);
1884 } else {
1885 ret = btrfs_free_extent(trans, root, bytenr,
1886 blocksize, parent->start,
1887 btrfs_header_owner(parent),
1888 btrfs_header_generation(parent),
1889 level - 1, 1);
1890 BUG_ON(ret);
1891 }
1892 break;
1893 }
1894 btrfs_tree_unlock(parent);
1895 free_extent_buffer(parent);
1896 return 0;
1897}
1898
1899/* 1872/*
1900 * adjust the pointers going up the tree, starting at level 1873 * adjust the pointers going up the tree, starting at level
1901 * making sure the right key of each node is points to 'key'. 1874 * making sure the right key of each node is points to 'key'.
@@ -2021,9 +1994,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
2021 btrfs_mark_buffer_dirty(src); 1994 btrfs_mark_buffer_dirty(src);
2022 btrfs_mark_buffer_dirty(dst); 1995 btrfs_mark_buffer_dirty(dst);
2023 1996
2024 ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
2025 BUG_ON(ret);
2026
2027 return ret; 1997 return ret;
2028} 1998}
2029 1999
@@ -2083,9 +2053,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
2083 btrfs_mark_buffer_dirty(src); 2053 btrfs_mark_buffer_dirty(src);
2084 btrfs_mark_buffer_dirty(dst); 2054 btrfs_mark_buffer_dirty(dst);
2085 2055
2086 ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
2087 BUG_ON(ret);
2088
2089 return ret; 2056 return ret;
2090} 2057}
2091 2058
@@ -2105,7 +2072,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2105 struct extent_buffer *c; 2072 struct extent_buffer *c;
2106 struct extent_buffer *old; 2073 struct extent_buffer *old;
2107 struct btrfs_disk_key lower_key; 2074 struct btrfs_disk_key lower_key;
2108 int ret;
2109 2075
2110 BUG_ON(path->nodes[level]); 2076 BUG_ON(path->nodes[level]);
2111 BUG_ON(path->nodes[level-1] != root->node); 2077 BUG_ON(path->nodes[level-1] != root->node);
@@ -2117,16 +2083,17 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2117 btrfs_node_key(lower, &lower_key, 0); 2083 btrfs_node_key(lower, &lower_key, 0);
2118 2084
2119 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2085 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2120 root->root_key.objectid, trans->transid, 2086 root->root_key.objectid, &lower_key,
2121 level, root->node->start, 0); 2087 level, root->node->start, 0);
2122 if (IS_ERR(c)) 2088 if (IS_ERR(c))
2123 return PTR_ERR(c); 2089 return PTR_ERR(c);
2124 2090
2125 memset_extent_buffer(c, 0, 0, root->nodesize); 2091 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
2126 btrfs_set_header_nritems(c, 1); 2092 btrfs_set_header_nritems(c, 1);
2127 btrfs_set_header_level(c, level); 2093 btrfs_set_header_level(c, level);
2128 btrfs_set_header_bytenr(c, c->start); 2094 btrfs_set_header_bytenr(c, c->start);
2129 btrfs_set_header_generation(c, trans->transid); 2095 btrfs_set_header_generation(c, trans->transid);
2096 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
2130 btrfs_set_header_owner(c, root->root_key.objectid); 2097 btrfs_set_header_owner(c, root->root_key.objectid);
2131 2098
2132 write_extent_buffer(c, root->fs_info->fsid, 2099 write_extent_buffer(c, root->fs_info->fsid,
@@ -2151,12 +2118,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2151 root->node = c; 2118 root->node = c;
2152 spin_unlock(&root->node_lock); 2119 spin_unlock(&root->node_lock);
2153 2120
2154 ret = btrfs_update_extent_ref(trans, root, lower->start,
2155 lower->len, lower->start, c->start,
2156 root->root_key.objectid,
2157 trans->transid, level - 1);
2158 BUG_ON(ret);
2159
2160 /* the super has an extra ref to root->node */ 2121 /* the super has an extra ref to root->node */
2161 free_extent_buffer(old); 2122 free_extent_buffer(old);
2162 2123
@@ -2244,20 +2205,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2244 } 2205 }
2245 2206
2246 c_nritems = btrfs_header_nritems(c); 2207 c_nritems = btrfs_header_nritems(c);
2208 mid = (c_nritems + 1) / 2;
2209 btrfs_node_key(c, &disk_key, mid);
2247 2210
2248 split = btrfs_alloc_free_block(trans, root, root->nodesize, 2211 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2249 path->nodes[level + 1]->start,
2250 root->root_key.objectid, 2212 root->root_key.objectid,
2251 trans->transid, level, c->start, 0); 2213 &disk_key, level, c->start, 0);
2252 if (IS_ERR(split)) 2214 if (IS_ERR(split))
2253 return PTR_ERR(split); 2215 return PTR_ERR(split);
2254 2216
2255 btrfs_set_header_flags(split, btrfs_header_flags(c)); 2217 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
2256 btrfs_set_header_level(split, btrfs_header_level(c)); 2218 btrfs_set_header_level(split, btrfs_header_level(c));
2257 btrfs_set_header_bytenr(split, split->start); 2219 btrfs_set_header_bytenr(split, split->start);
2258 btrfs_set_header_generation(split, trans->transid); 2220 btrfs_set_header_generation(split, trans->transid);
2221 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
2259 btrfs_set_header_owner(split, root->root_key.objectid); 2222 btrfs_set_header_owner(split, root->root_key.objectid);
2260 btrfs_set_header_flags(split, 0);
2261 write_extent_buffer(split, root->fs_info->fsid, 2223 write_extent_buffer(split, root->fs_info->fsid,
2262 (unsigned long)btrfs_header_fsid(split), 2224 (unsigned long)btrfs_header_fsid(split),
2263 BTRFS_FSID_SIZE); 2225 BTRFS_FSID_SIZE);
@@ -2265,7 +2227,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2265 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2227 (unsigned long)btrfs_header_chunk_tree_uuid(split),
2266 BTRFS_UUID_SIZE); 2228 BTRFS_UUID_SIZE);
2267 2229
2268 mid = (c_nritems + 1) / 2;
2269 2230
2270 copy_extent_buffer(split, c, 2231 copy_extent_buffer(split, c,
2271 btrfs_node_key_ptr_offset(0), 2232 btrfs_node_key_ptr_offset(0),
@@ -2278,16 +2239,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2278 btrfs_mark_buffer_dirty(c); 2239 btrfs_mark_buffer_dirty(c);
2279 btrfs_mark_buffer_dirty(split); 2240 btrfs_mark_buffer_dirty(split);
2280 2241
2281 btrfs_node_key(split, &disk_key, 0);
2282 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2242 wret = insert_ptr(trans, root, path, &disk_key, split->start,
2283 path->slots[level + 1] + 1, 2243 path->slots[level + 1] + 1,
2284 level + 1); 2244 level + 1);
2285 if (wret) 2245 if (wret)
2286 ret = wret; 2246 ret = wret;
2287 2247
2288 ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2289 BUG_ON(ret);
2290
2291 if (path->slots[level] >= mid) { 2248 if (path->slots[level] >= mid) {
2292 path->slots[level] -= mid; 2249 path->slots[level] -= mid;
2293 btrfs_tree_unlock(c); 2250 btrfs_tree_unlock(c);
@@ -2360,7 +2317,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2360 u32 right_nritems; 2317 u32 right_nritems;
2361 u32 data_end; 2318 u32 data_end;
2362 u32 this_item_size; 2319 u32 this_item_size;
2363 int ret;
2364 2320
2365 if (empty) 2321 if (empty)
2366 nr = 0; 2322 nr = 0;
@@ -2473,9 +2429,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2473 btrfs_mark_buffer_dirty(left); 2429 btrfs_mark_buffer_dirty(left);
2474 btrfs_mark_buffer_dirty(right); 2430 btrfs_mark_buffer_dirty(right);
2475 2431
2476 ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2477 BUG_ON(ret);
2478
2479 btrfs_item_key(right, &disk_key, 0); 2432 btrfs_item_key(right, &disk_key, 0);
2480 btrfs_set_node_key(upper, &disk_key, slot + 1); 2433 btrfs_set_node_key(upper, &disk_key, slot + 1);
2481 btrfs_mark_buffer_dirty(upper); 2434 btrfs_mark_buffer_dirty(upper);
@@ -2720,10 +2673,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2720 if (right_nritems) 2673 if (right_nritems)
2721 btrfs_mark_buffer_dirty(right); 2674 btrfs_mark_buffer_dirty(right);
2722 2675
2723 ret = btrfs_update_ref(trans, root, right, left,
2724 old_left_nritems, push_items);
2725 BUG_ON(ret);
2726
2727 btrfs_item_key(right, &disk_key, 0); 2676 btrfs_item_key(right, &disk_key, 0);
2728 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2677 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2729 if (wret) 2678 if (wret)
@@ -2880,9 +2829,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2880 btrfs_mark_buffer_dirty(l); 2829 btrfs_mark_buffer_dirty(l);
2881 BUG_ON(path->slots[0] != slot); 2830 BUG_ON(path->slots[0] != slot);
2882 2831
2883 ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2884 BUG_ON(ret);
2885
2886 if (mid <= slot) { 2832 if (mid <= slot) {
2887 btrfs_tree_unlock(path->nodes[0]); 2833 btrfs_tree_unlock(path->nodes[0]);
2888 free_extent_buffer(path->nodes[0]); 2834 free_extent_buffer(path->nodes[0]);
@@ -2911,6 +2857,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2911 struct btrfs_path *path, int data_size, 2857 struct btrfs_path *path, int data_size,
2912 int extend) 2858 int extend)
2913{ 2859{
2860 struct btrfs_disk_key disk_key;
2914 struct extent_buffer *l; 2861 struct extent_buffer *l;
2915 u32 nritems; 2862 u32 nritems;
2916 int mid; 2863 int mid;
@@ -2918,7 +2865,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2918 struct extent_buffer *right; 2865 struct extent_buffer *right;
2919 int ret = 0; 2866 int ret = 0;
2920 int wret; 2867 int wret;
2921 int double_split; 2868 int split;
2922 int num_doubles = 0; 2869 int num_doubles = 0;
2923 2870
2924 /* first try to make some room by pushing left and right */ 2871 /* first try to make some room by pushing left and right */
@@ -2945,16 +2892,53 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2945 return ret; 2892 return ret;
2946 } 2893 }
2947again: 2894again:
2948 double_split = 0; 2895 split = 1;
2949 l = path->nodes[0]; 2896 l = path->nodes[0];
2950 slot = path->slots[0]; 2897 slot = path->slots[0];
2951 nritems = btrfs_header_nritems(l); 2898 nritems = btrfs_header_nritems(l);
2952 mid = (nritems + 1) / 2; 2899 mid = (nritems + 1) / 2;
2953 2900
2954 right = btrfs_alloc_free_block(trans, root, root->leafsize, 2901 if (mid <= slot) {
2955 path->nodes[1]->start, 2902 if (nritems == 1 ||
2903 leaf_space_used(l, mid, nritems - mid) + data_size >
2904 BTRFS_LEAF_DATA_SIZE(root)) {
2905 if (slot >= nritems) {
2906 split = 0;
2907 } else {
2908 mid = slot;
2909 if (mid != nritems &&
2910 leaf_space_used(l, mid, nritems - mid) +
2911 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2912 split = 2;
2913 }
2914 }
2915 }
2916 } else {
2917 if (leaf_space_used(l, 0, mid) + data_size >
2918 BTRFS_LEAF_DATA_SIZE(root)) {
2919 if (!extend && data_size && slot == 0) {
2920 split = 0;
2921 } else if ((extend || !data_size) && slot == 0) {
2922 mid = 1;
2923 } else {
2924 mid = slot;
2925 if (mid != nritems &&
2926 leaf_space_used(l, mid, nritems - mid) +
2927 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2928 split = 2 ;
2929 }
2930 }
2931 }
2932 }
2933
2934 if (split == 0)
2935 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2936 else
2937 btrfs_item_key(l, &disk_key, mid);
2938
2939 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
2956 root->root_key.objectid, 2940 root->root_key.objectid,
2957 trans->transid, 0, l->start, 0); 2941 &disk_key, 0, l->start, 0);
2958 if (IS_ERR(right)) { 2942 if (IS_ERR(right)) {
2959 BUG_ON(1); 2943 BUG_ON(1);
2960 return PTR_ERR(right); 2944 return PTR_ERR(right);
@@ -2963,6 +2947,7 @@ again:
2963 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2947 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2964 btrfs_set_header_bytenr(right, right->start); 2948 btrfs_set_header_bytenr(right, right->start);
2965 btrfs_set_header_generation(right, trans->transid); 2949 btrfs_set_header_generation(right, trans->transid);
2950 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
2966 btrfs_set_header_owner(right, root->root_key.objectid); 2951 btrfs_set_header_owner(right, root->root_key.objectid);
2967 btrfs_set_header_level(right, 0); 2952 btrfs_set_header_level(right, 0);
2968 write_extent_buffer(right, root->fs_info->fsid, 2953 write_extent_buffer(right, root->fs_info->fsid,
@@ -2973,79 +2958,47 @@ again:
2973 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2958 (unsigned long)btrfs_header_chunk_tree_uuid(right),
2974 BTRFS_UUID_SIZE); 2959 BTRFS_UUID_SIZE);
2975 2960
2976 if (mid <= slot) { 2961 if (split == 0) {
2977 if (nritems == 1 || 2962 if (mid <= slot) {
2978 leaf_space_used(l, mid, nritems - mid) + data_size > 2963 btrfs_set_header_nritems(right, 0);
2979 BTRFS_LEAF_DATA_SIZE(root)) { 2964 wret = insert_ptr(trans, root, path,
2980 if (slot >= nritems) { 2965 &disk_key, right->start,
2981 struct btrfs_disk_key disk_key; 2966 path->slots[1] + 1, 1);
2982 2967 if (wret)
2983 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2968 ret = wret;
2984 btrfs_set_header_nritems(right, 0);
2985 wret = insert_ptr(trans, root, path,
2986 &disk_key, right->start,
2987 path->slots[1] + 1, 1);
2988 if (wret)
2989 ret = wret;
2990 2969
2991 btrfs_tree_unlock(path->nodes[0]); 2970 btrfs_tree_unlock(path->nodes[0]);
2992 free_extent_buffer(path->nodes[0]); 2971 free_extent_buffer(path->nodes[0]);
2993 path->nodes[0] = right; 2972 path->nodes[0] = right;
2994 path->slots[0] = 0; 2973 path->slots[0] = 0;
2995 path->slots[1] += 1; 2974 path->slots[1] += 1;
2996 btrfs_mark_buffer_dirty(right); 2975 } else {
2997 return ret; 2976 btrfs_set_header_nritems(right, 0);
2998 } 2977 wret = insert_ptr(trans, root, path,
2999 mid = slot; 2978 &disk_key,
3000 if (mid != nritems && 2979 right->start,
3001 leaf_space_used(l, mid, nritems - mid) + 2980 path->slots[1], 1);
3002 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2981 if (wret)
3003 double_split = 1; 2982 ret = wret;
3004 } 2983 btrfs_tree_unlock(path->nodes[0]);
3005 } 2984 free_extent_buffer(path->nodes[0]);
3006 } else { 2985 path->nodes[0] = right;
3007 if (leaf_space_used(l, 0, mid) + data_size > 2986 path->slots[0] = 0;
3008 BTRFS_LEAF_DATA_SIZE(root)) { 2987 if (path->slots[1] == 0) {
3009 if (!extend && data_size && slot == 0) { 2988 wret = fixup_low_keys(trans, root,
3010 struct btrfs_disk_key disk_key; 2989 path, &disk_key, 1);
3011
3012 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3013 btrfs_set_header_nritems(right, 0);
3014 wret = insert_ptr(trans, root, path,
3015 &disk_key,
3016 right->start,
3017 path->slots[1], 1);
3018 if (wret) 2990 if (wret)
3019 ret = wret; 2991 ret = wret;
3020 btrfs_tree_unlock(path->nodes[0]);
3021 free_extent_buffer(path->nodes[0]);
3022 path->nodes[0] = right;
3023 path->slots[0] = 0;
3024 if (path->slots[1] == 0) {
3025 wret = fixup_low_keys(trans, root,
3026 path, &disk_key, 1);
3027 if (wret)
3028 ret = wret;
3029 }
3030 btrfs_mark_buffer_dirty(right);
3031 return ret;
3032 } else if ((extend || !data_size) && slot == 0) {
3033 mid = 1;
3034 } else {
3035 mid = slot;
3036 if (mid != nritems &&
3037 leaf_space_used(l, mid, nritems - mid) +
3038 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3039 double_split = 1;
3040 }
3041 } 2992 }
3042 } 2993 }
2994 btrfs_mark_buffer_dirty(right);
2995 return ret;
3043 } 2996 }
3044 2997
3045 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 2998 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
3046 BUG_ON(ret); 2999 BUG_ON(ret);
3047 3000
3048 if (double_split) { 3001 if (split == 2) {
3049 BUG_ON(num_doubles != 0); 3002 BUG_ON(num_doubles != 0);
3050 num_doubles++; 3003 num_doubles++;
3051 goto again; 3004 goto again;
@@ -3447,7 +3400,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3447 /* figure out how many keys we can insert in here */ 3400 /* figure out how many keys we can insert in here */
3448 total_data = data_size[0]; 3401 total_data = data_size[0];
3449 for (i = 1; i < nr; i++) { 3402 for (i = 1; i < nr; i++) {
3450 if (comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3403 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3451 break; 3404 break;
3452 total_data += data_size[i]; 3405 total_data += data_size[i];
3453 } 3406 }
@@ -3745,9 +3698,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3745 3698
3746/* 3699/*
3747 * a helper function to delete the leaf pointed to by path->slots[1] and 3700 * a helper function to delete the leaf pointed to by path->slots[1] and
3748 * path->nodes[1]. bytenr is the node block pointer, but since the callers 3701 * path->nodes[1].
3749 * already know it, it is faster to have them pass it down than to
3750 * read it out of the node again.
3751 * 3702 *
3752 * This deletes the pointer in path->nodes[1] and frees the leaf 3703 * This deletes the pointer in path->nodes[1] and frees the leaf
3753 * block extent. zero is returned if it all worked out, < 0 otherwise. 3704 * block extent. zero is returned if it all worked out, < 0 otherwise.
@@ -3755,15 +3706,14 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3755 * The path must have already been setup for deleting the leaf, including 3706 * The path must have already been setup for deleting the leaf, including
3756 * all the proper balancing. path->nodes[1] must be locked. 3707 * all the proper balancing. path->nodes[1] must be locked.
3757 */ 3708 */
3758noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3709static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3759 struct btrfs_root *root, 3710 struct btrfs_root *root,
3760 struct btrfs_path *path, u64 bytenr) 3711 struct btrfs_path *path,
3712 struct extent_buffer *leaf)
3761{ 3713{
3762 int ret; 3714 int ret;
3763 u64 root_gen = btrfs_header_generation(path->nodes[1]);
3764 u64 parent_start = path->nodes[1]->start;
3765 u64 parent_owner = btrfs_header_owner(path->nodes[1]);
3766 3715
3716 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
3767 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3717 ret = del_ptr(trans, root, path, 1, path->slots[1]);
3768 if (ret) 3718 if (ret)
3769 return ret; 3719 return ret;
@@ -3774,10 +3724,8 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3774 */ 3724 */
3775 btrfs_unlock_up_safe(path, 0); 3725 btrfs_unlock_up_safe(path, 0);
3776 3726
3777 ret = btrfs_free_extent(trans, root, bytenr, 3727 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
3778 btrfs_level_size(root, 0), 3728 0, root->root_key.objectid, 0, 0);
3779 parent_start, parent_owner,
3780 root_gen, 0, 1);
3781 return ret; 3729 return ret;
3782} 3730}
3783/* 3731/*
@@ -3845,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3845 if (leaf == root->node) { 3793 if (leaf == root->node) {
3846 btrfs_set_header_level(leaf, 0); 3794 btrfs_set_header_level(leaf, 0);
3847 } else { 3795 } else {
3848 ret = btrfs_del_leaf(trans, root, path, leaf->start); 3796 ret = btrfs_del_leaf(trans, root, path, leaf);
3849 BUG_ON(ret); 3797 BUG_ON(ret);
3850 } 3798 }
3851 } else { 3799 } else {
@@ -3884,8 +3832,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3884 3832
3885 if (btrfs_header_nritems(leaf) == 0) { 3833 if (btrfs_header_nritems(leaf) == 0) {
3886 path->slots[1] = slot; 3834 path->slots[1] = slot;
3887 ret = btrfs_del_leaf(trans, root, path, 3835 ret = btrfs_del_leaf(trans, root, path, leaf);
3888 leaf->start);
3889 BUG_ON(ret); 3836 BUG_ON(ret);
3890 free_extent_buffer(leaf); 3837 free_extent_buffer(leaf);
3891 } else { 3838 } else {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4414a5d9983a..ce3ab4e13064 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -45,6 +45,8 @@ struct btrfs_ordered_sum;
45 45
46#define BTRFS_MAX_LEVEL 8 46#define BTRFS_MAX_LEVEL 8
47 47
48#define BTRFS_COMPAT_EXTENT_TREE_V0
49
48/* 50/*
49 * files bigger than this get some pre-flushing when they are added 51 * files bigger than this get some pre-flushing when they are added
50 * to the ordered operations list. That way we limit the total 52 * to the ordered operations list. That way we limit the total
@@ -267,7 +269,18 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
267} 269}
268 270
269#define BTRFS_FSID_SIZE 16 271#define BTRFS_FSID_SIZE 16
270#define BTRFS_HEADER_FLAG_WRITTEN (1 << 0) 272#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
273#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
274#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
275#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
276
277#define BTRFS_BACKREF_REV_MAX 256
278#define BTRFS_BACKREF_REV_SHIFT 56
279#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
280 BTRFS_BACKREF_REV_SHIFT)
281
282#define BTRFS_OLD_BACKREF_REV 0
283#define BTRFS_MIXED_BACKREF_REV 1
271 284
272/* 285/*
273 * every tree block (leaf or node) starts with this header. 286 * every tree block (leaf or node) starts with this header.
@@ -296,7 +309,6 @@ struct btrfs_header {
296 sizeof(struct btrfs_item) - \ 309 sizeof(struct btrfs_item) - \
297 sizeof(struct btrfs_file_extent_item)) 310 sizeof(struct btrfs_file_extent_item))
298 311
299#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
300 312
301/* 313/*
302 * this is a very generous portion of the super block, giving us 314 * this is a very generous portion of the super block, giving us
@@ -355,9 +367,12 @@ struct btrfs_super_block {
355 * Compat flags that we support. If any incompat flags are set other than the 367 * Compat flags that we support. If any incompat flags are set other than the
356 * ones specified below then we will fail to mount 368 * ones specified below then we will fail to mount
357 */ 369 */
358#define BTRFS_FEATURE_COMPAT_SUPP 0x0 370#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
359#define BTRFS_FEATURE_COMPAT_RO_SUPP 0x0 371
360#define BTRFS_FEATURE_INCOMPAT_SUPP 0x0 372#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
373#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
374#define BTRFS_FEATURE_INCOMPAT_SUPP \
375 BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF
361 376
362/* 377/*
363 * A leaf is full of items. offset and size tell us where to find 378 * A leaf is full of items. offset and size tell us where to find
@@ -421,23 +436,65 @@ struct btrfs_path {
421 unsigned int keep_locks:1; 436 unsigned int keep_locks:1;
422 unsigned int skip_locking:1; 437 unsigned int skip_locking:1;
423 unsigned int leave_spinning:1; 438 unsigned int leave_spinning:1;
439 unsigned int search_commit_root:1;
424}; 440};
425 441
426/* 442/*
427 * items in the extent btree are used to record the objectid of the 443 * items in the extent btree are used to record the objectid of the
428 * owner of the block and the number of references 444 * owner of the block and the number of references
429 */ 445 */
446
430struct btrfs_extent_item { 447struct btrfs_extent_item {
448 __le64 refs;
449 __le64 generation;
450 __le64 flags;
451} __attribute__ ((__packed__));
452
453struct btrfs_extent_item_v0 {
431 __le32 refs; 454 __le32 refs;
432} __attribute__ ((__packed__)); 455} __attribute__ ((__packed__));
433 456
434struct btrfs_extent_ref { 457#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r) >> 4) - \
458 sizeof(struct btrfs_item))
459
460#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
461#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
462
463/* following flags only apply to tree blocks */
464
465/* use full backrefs for extent pointers in the block */
466#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
467
468struct btrfs_tree_block_info {
469 struct btrfs_disk_key key;
470 u8 level;
471} __attribute__ ((__packed__));
472
473struct btrfs_extent_data_ref {
474 __le64 root;
475 __le64 objectid;
476 __le64 offset;
477 __le32 count;
478} __attribute__ ((__packed__));
479
480struct btrfs_shared_data_ref {
481 __le32 count;
482} __attribute__ ((__packed__));
483
484struct btrfs_extent_inline_ref {
485 u8 type;
486 u64 offset;
487} __attribute__ ((__packed__));
488
489/* old style backrefs item */
490struct btrfs_extent_ref_v0 {
435 __le64 root; 491 __le64 root;
436 __le64 generation; 492 __le64 generation;
437 __le64 objectid; 493 __le64 objectid;
438 __le32 num_refs; 494 __le32 count;
439} __attribute__ ((__packed__)); 495} __attribute__ ((__packed__));
440 496
497
441/* dev extents record free space on individual devices. The owner 498/* dev extents record free space on individual devices. The owner
442 * field points back to the chunk allocation mapping tree that allocated 499 * field points back to the chunk allocation mapping tree that allocated
443 * the extent. The chunk tree uuid field is a way to double check the owner 500 * the extent. The chunk tree uuid field is a way to double check the owner
@@ -695,12 +752,7 @@ struct btrfs_block_group_cache {
695 struct list_head cluster_list; 752 struct list_head cluster_list;
696}; 753};
697 754
698struct btrfs_leaf_ref_tree { 755struct reloc_control;
699 struct rb_root root;
700 struct list_head list;
701 spinlock_t lock;
702};
703
704struct btrfs_device; 756struct btrfs_device;
705struct btrfs_fs_devices; 757struct btrfs_fs_devices;
706struct btrfs_fs_info { 758struct btrfs_fs_info {
@@ -831,18 +883,11 @@ struct btrfs_fs_info {
831 struct task_struct *cleaner_kthread; 883 struct task_struct *cleaner_kthread;
832 int thread_pool_size; 884 int thread_pool_size;
833 885
834 /* tree relocation relocated fields */
835 struct list_head dead_reloc_roots;
836 struct btrfs_leaf_ref_tree reloc_ref_tree;
837 struct btrfs_leaf_ref_tree shared_ref_tree;
838
839 struct kobject super_kobj; 886 struct kobject super_kobj;
840 struct completion kobj_unregister; 887 struct completion kobj_unregister;
841 int do_barriers; 888 int do_barriers;
842 int closing; 889 int closing;
843 int log_root_recovering; 890 int log_root_recovering;
844 atomic_t throttles;
845 atomic_t throttle_gen;
846 891
847 u64 total_pinned; 892 u64 total_pinned;
848 893
@@ -861,6 +906,8 @@ struct btrfs_fs_info {
861 */ 906 */
862 struct list_head space_info; 907 struct list_head space_info;
863 908
909 struct reloc_control *reloc_ctl;
910
864 spinlock_t delalloc_lock; 911 spinlock_t delalloc_lock;
865 spinlock_t new_trans_lock; 912 spinlock_t new_trans_lock;
866 u64 delalloc_bytes; 913 u64 delalloc_bytes;
@@ -891,7 +938,6 @@ struct btrfs_fs_info {
891 * in ram representation of the tree. extent_root is used for all allocations 938 * in ram representation of the tree. extent_root is used for all allocations
892 * and for the extent tree extent_root root. 939 * and for the extent tree extent_root root.
893 */ 940 */
894struct btrfs_dirty_root;
895struct btrfs_root { 941struct btrfs_root {
896 struct extent_buffer *node; 942 struct extent_buffer *node;
897 943
@@ -899,9 +945,6 @@ struct btrfs_root {
899 spinlock_t node_lock; 945 spinlock_t node_lock;
900 946
901 struct extent_buffer *commit_root; 947 struct extent_buffer *commit_root;
902 struct btrfs_leaf_ref_tree *ref_tree;
903 struct btrfs_leaf_ref_tree ref_tree_struct;
904 struct btrfs_dirty_root *dirty_root;
905 struct btrfs_root *log_root; 948 struct btrfs_root *log_root;
906 struct btrfs_root *reloc_root; 949 struct btrfs_root *reloc_root;
907 950
@@ -952,10 +995,15 @@ struct btrfs_root {
952 /* the dirty list is only used by non-reference counted roots */ 995 /* the dirty list is only used by non-reference counted roots */
953 struct list_head dirty_list; 996 struct list_head dirty_list;
954 997
998 struct list_head root_list;
999
955 spinlock_t list_lock; 1000 spinlock_t list_lock;
956 struct list_head dead_list;
957 struct list_head orphan_list; 1001 struct list_head orphan_list;
958 1002
1003 spinlock_t inode_lock;
1004 /* red-black tree that keeps track of in-memory inodes */
1005 struct rb_root inode_tree;
1006
959 /* 1007 /*
960 * right now this just gets used so that a root has its own devid 1008 * right now this just gets used so that a root has its own devid
961 * for stat. It may be used for more later 1009 * for stat. It may be used for more later
@@ -1017,7 +1065,16 @@ struct btrfs_root {
1017 * are used, and how many references there are to each block 1065 * are used, and how many references there are to each block
1018 */ 1066 */
1019#define BTRFS_EXTENT_ITEM_KEY 168 1067#define BTRFS_EXTENT_ITEM_KEY 168
1020#define BTRFS_EXTENT_REF_KEY 180 1068
1069#define BTRFS_TREE_BLOCK_REF_KEY 176
1070
1071#define BTRFS_EXTENT_DATA_REF_KEY 178
1072
1073#define BTRFS_EXTENT_REF_V0_KEY 180
1074
1075#define BTRFS_SHARED_BLOCK_REF_KEY 182
1076
1077#define BTRFS_SHARED_DATA_REF_KEY 184
1021 1078
1022/* 1079/*
1023 * block groups give us hints into the extent allocation trees. Which 1080 * block groups give us hints into the extent allocation trees. Which
@@ -1317,24 +1374,67 @@ static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
1317 return (u8 *)((unsigned long)dev + ptr); 1374 return (u8 *)((unsigned long)dev + ptr);
1318} 1375}
1319 1376
1320/* struct btrfs_extent_ref */ 1377BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
1321BTRFS_SETGET_FUNCS(ref_root, struct btrfs_extent_ref, root, 64); 1378BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
1322BTRFS_SETGET_FUNCS(ref_generation, struct btrfs_extent_ref, generation, 64); 1379 generation, 64);
1323BTRFS_SETGET_FUNCS(ref_objectid, struct btrfs_extent_ref, objectid, 64); 1380BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
1324BTRFS_SETGET_FUNCS(ref_num_refs, struct btrfs_extent_ref, num_refs, 32);
1325 1381
1326BTRFS_SETGET_STACK_FUNCS(stack_ref_root, struct btrfs_extent_ref, root, 64); 1382BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
1327BTRFS_SETGET_STACK_FUNCS(stack_ref_generation, struct btrfs_extent_ref, 1383
1328 generation, 64); 1384
1329BTRFS_SETGET_STACK_FUNCS(stack_ref_objectid, struct btrfs_extent_ref, 1385BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
1330 objectid, 64); 1386
1331BTRFS_SETGET_STACK_FUNCS(stack_ref_num_refs, struct btrfs_extent_ref, 1387static inline void btrfs_tree_block_key(struct extent_buffer *eb,
1332 num_refs, 32); 1388 struct btrfs_tree_block_info *item,
1389 struct btrfs_disk_key *key)
1390{
1391 read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
1392}
1393
1394static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
1395 struct btrfs_tree_block_info *item,
1396 struct btrfs_disk_key *key)
1397{
1398 write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
1399}
1333 1400
1334/* struct btrfs_extent_item */ 1401BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
1335BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); 1402 root, 64);
1336BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, 1403BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
1337 refs, 32); 1404 objectid, 64);
1405BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
1406 offset, 64);
1407BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
1408 count, 32);
1409
1410BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
1411 count, 32);
1412
1413BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
1414 type, 8);
1415BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
1416 offset, 64);
1417
1418static inline u32 btrfs_extent_inline_ref_size(int type)
1419{
1420 if (type == BTRFS_TREE_BLOCK_REF_KEY ||
1421 type == BTRFS_SHARED_BLOCK_REF_KEY)
1422 return sizeof(struct btrfs_extent_inline_ref);
1423 if (type == BTRFS_SHARED_DATA_REF_KEY)
1424 return sizeof(struct btrfs_shared_data_ref) +
1425 sizeof(struct btrfs_extent_inline_ref);
1426 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1427 return sizeof(struct btrfs_extent_data_ref) +
1428 offsetof(struct btrfs_extent_inline_ref, offset);
1429 BUG();
1430 return 0;
1431}
1432
1433BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
1434BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
1435 generation, 64);
1436BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
1437BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
1338 1438
1339/* struct btrfs_node */ 1439/* struct btrfs_node */
1340BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); 1440BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
@@ -1558,6 +1658,21 @@ static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
1558 return (flags & flag) == flag; 1658 return (flags & flag) == flag;
1559} 1659}
1560 1660
1661static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
1662{
1663 u64 flags = btrfs_header_flags(eb);
1664 return flags >> BTRFS_BACKREF_REV_SHIFT;
1665}
1666
1667static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
1668 int rev)
1669{
1670 u64 flags = btrfs_header_flags(eb);
1671 flags &= ~BTRFS_BACKREF_REV_MASK;
1672 flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
1673 btrfs_set_header_flags(eb, flags);
1674}
1675
1561static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) 1676static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
1562{ 1677{
1563 unsigned long ptr = offsetof(struct btrfs_header, fsid); 1678 unsigned long ptr = offsetof(struct btrfs_header, fsid);
@@ -1790,39 +1905,32 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
1790int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1905int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1791 struct btrfs_root *root, struct extent_buffer *leaf); 1906 struct btrfs_root *root, struct extent_buffer *leaf);
1792int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1907int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1793 struct btrfs_root *root, u64 objectid, u64 bytenr); 1908 struct btrfs_root *root,
1909 u64 objectid, u64 offset, u64 bytenr);
1794int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); 1910int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
1795struct btrfs_block_group_cache *btrfs_lookup_block_group( 1911struct btrfs_block_group_cache *btrfs_lookup_block_group(
1796 struct btrfs_fs_info *info, 1912 struct btrfs_fs_info *info,
1797 u64 bytenr); 1913 u64 bytenr);
1914void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1798u64 btrfs_find_block_group(struct btrfs_root *root, 1915u64 btrfs_find_block_group(struct btrfs_root *root,
1799 u64 search_start, u64 search_hint, int owner); 1916 u64 search_start, u64 search_hint, int owner);
1800struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1917struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1801 struct btrfs_root *root, 1918 struct btrfs_root *root, u32 blocksize,
1802 u32 blocksize, u64 parent, 1919 u64 parent, u64 root_objectid,
1803 u64 root_objectid, 1920 struct btrfs_disk_key *key, int level,
1804 u64 ref_generation, 1921 u64 hint, u64 empty_size);
1805 int level,
1806 u64 hint,
1807 u64 empty_size);
1808struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 1922struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
1809 struct btrfs_root *root, 1923 struct btrfs_root *root,
1810 u64 bytenr, u32 blocksize, 1924 u64 bytenr, u32 blocksize,
1811 int level); 1925 int level);
1812int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1926int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
1813 struct btrfs_root *root, 1927 struct btrfs_root *root,
1814 u64 num_bytes, u64 parent, u64 min_bytes, 1928 u64 root_objectid, u64 owner,
1815 u64 root_objectid, u64 ref_generation, 1929 u64 offset, struct btrfs_key *ins);
1816 u64 owner, u64 empty_size, u64 hint_byte, 1930int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
1817 u64 search_end, struct btrfs_key *ins, u64 data); 1931 struct btrfs_root *root,
1818int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 1932 u64 root_objectid, u64 owner, u64 offset,
1819 struct btrfs_root *root, u64 parent, 1933 struct btrfs_key *ins);
1820 u64 root_objectid, u64 ref_generation,
1821 u64 owner, struct btrfs_key *ins);
1822int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
1823 struct btrfs_root *root, u64 parent,
1824 u64 root_objectid, u64 ref_generation,
1825 u64 owner, struct btrfs_key *ins);
1826int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 1934int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1827 struct btrfs_root *root, 1935 struct btrfs_root *root,
1828 u64 num_bytes, u64 min_alloc_size, 1936 u64 num_bytes, u64 min_alloc_size,
@@ -1830,18 +1938,18 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1830 u64 search_end, struct btrfs_key *ins, 1938 u64 search_end, struct btrfs_key *ins,
1831 u64 data); 1939 u64 data);
1832int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1940int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1833 struct extent_buffer *orig_buf, struct extent_buffer *buf, 1941 struct extent_buffer *buf, int full_backref);
1834 u32 *nr_extents); 1942int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1835int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1943 struct extent_buffer *buf, int full_backref);
1836 struct extent_buffer *buf, u32 nr_extents); 1944int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
1837int btrfs_update_ref(struct btrfs_trans_handle *trans, 1945 struct btrfs_root *root,
1838 struct btrfs_root *root, struct extent_buffer *orig_buf, 1946 u64 bytenr, u64 num_bytes, u64 flags,
1839 struct extent_buffer *buf, int start_slot, int nr); 1947 int is_data);
1840int btrfs_free_extent(struct btrfs_trans_handle *trans, 1948int btrfs_free_extent(struct btrfs_trans_handle *trans,
1841 struct btrfs_root *root, 1949 struct btrfs_root *root,
1842 u64 bytenr, u64 num_bytes, u64 parent, 1950 u64 bytenr, u64 num_bytes, u64 parent,
1843 u64 root_objectid, u64 ref_generation, 1951 u64 root_objectid, u64 owner, u64 offset);
1844 u64 owner_objectid, int pin); 1952
1845int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 1953int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
1846int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 1954int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1847 struct btrfs_root *root, 1955 struct btrfs_root *root,
@@ -1849,13 +1957,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1849int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1957int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1850 struct btrfs_root *root, 1958 struct btrfs_root *root,
1851 u64 bytenr, u64 num_bytes, u64 parent, 1959 u64 bytenr, u64 num_bytes, u64 parent,
1852 u64 root_objectid, u64 ref_generation, 1960 u64 root_objectid, u64 owner, u64 offset);
1853 u64 owner_objectid); 1961
1854int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1855 struct btrfs_root *root, u64 bytenr, u64 num_bytes,
1856 u64 orig_parent, u64 parent,
1857 u64 root_objectid, u64 ref_generation,
1858 u64 owner_objectid);
1859int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 1962int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1860 struct btrfs_root *root); 1963 struct btrfs_root *root);
1861int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 1964int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
@@ -1867,16 +1970,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
1867 u64 size); 1970 u64 size);
1868int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 1971int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
1869 struct btrfs_root *root, u64 group_start); 1972 struct btrfs_root *root, u64 group_start);
1870int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); 1973int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
1871int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 1974 struct btrfs_block_group_cache *group);
1872 struct btrfs_root *root); 1975
1873int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
1874int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
1875 struct btrfs_root *root,
1876 struct extent_buffer *buf, u64 orig_start);
1877int btrfs_add_dead_reloc_root(struct btrfs_root *root);
1878int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
1879int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
1880u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); 1976u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
1881void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); 1977void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
1882void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 1978void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
@@ -1891,13 +1987,12 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
1891void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 1987void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1892 u64 bytes); 1988 u64 bytes);
1893/* ctree.c */ 1989/* ctree.c */
1990int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1991 int level, int *slot);
1992int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
1894int btrfs_previous_item(struct btrfs_root *root, 1993int btrfs_previous_item(struct btrfs_root *root,
1895 struct btrfs_path *path, u64 min_objectid, 1994 struct btrfs_path *path, u64 min_objectid,
1896 int type); 1995 int type);
1897int btrfs_merge_path(struct btrfs_trans_handle *trans,
1898 struct btrfs_root *root,
1899 struct btrfs_key *node_keys,
1900 u64 *nodes, int lowest_level);
1901int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1996int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1902 struct btrfs_root *root, struct btrfs_path *path, 1997 struct btrfs_root *root, struct btrfs_path *path,
1903 struct btrfs_key *new_key); 1998 struct btrfs_key *new_key);
@@ -1918,6 +2013,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
1918 struct btrfs_root *root, 2013 struct btrfs_root *root,
1919 struct extent_buffer *buf, 2014 struct extent_buffer *buf,
1920 struct extent_buffer **cow_ret, u64 new_root_objectid); 2015 struct extent_buffer **cow_ret, u64 new_root_objectid);
2016int btrfs_block_can_be_shared(struct btrfs_root *root,
2017 struct extent_buffer *buf);
1921int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root 2018int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1922 *root, struct btrfs_path *path, u32 data_size); 2019 *root, struct btrfs_path *path, u32 data_size);
1923int btrfs_truncate_item(struct btrfs_trans_handle *trans, 2020int btrfs_truncate_item(struct btrfs_trans_handle *trans,
@@ -1944,9 +2041,6 @@ void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
1944 2041
1945int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2042int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1946 struct btrfs_path *path, int slot, int nr); 2043 struct btrfs_path *path, int slot, int nr);
1947int btrfs_del_leaf(struct btrfs_trans_handle *trans,
1948 struct btrfs_root *root,
1949 struct btrfs_path *path, u64 bytenr);
1950static inline int btrfs_del_item(struct btrfs_trans_handle *trans, 2044static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
1951 struct btrfs_root *root, 2045 struct btrfs_root *root,
1952 struct btrfs_path *path) 2046 struct btrfs_path *path)
@@ -2005,8 +2099,9 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
2005 btrfs_root_item *item, struct btrfs_key *key); 2099 btrfs_root_item *item, struct btrfs_key *key);
2006int btrfs_search_root(struct btrfs_root *root, u64 search_start, 2100int btrfs_search_root(struct btrfs_root *root, u64 search_start,
2007 u64 *found_objectid); 2101 u64 *found_objectid);
2008int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, 2102int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2009 struct btrfs_root *latest_root); 2103int btrfs_set_root_node(struct btrfs_root_item *item,
2104 struct extent_buffer *node);
2010/* dir-item.c */ 2105/* dir-item.c */
2011int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2106int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2012 struct btrfs_root *root, const char *name, 2107 struct btrfs_root *root, const char *name,
@@ -2139,7 +2234,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2139int btrfs_readpage(struct file *file, struct page *page); 2234int btrfs_readpage(struct file *file, struct page *page);
2140void btrfs_delete_inode(struct inode *inode); 2235void btrfs_delete_inode(struct inode *inode);
2141void btrfs_put_inode(struct inode *inode); 2236void btrfs_put_inode(struct inode *inode);
2142void btrfs_read_locked_inode(struct inode *inode);
2143int btrfs_write_inode(struct inode *inode, int wait); 2237int btrfs_write_inode(struct inode *inode, int wait);
2144void btrfs_dirty_inode(struct inode *inode); 2238void btrfs_dirty_inode(struct inode *inode);
2145struct inode *btrfs_alloc_inode(struct super_block *sb); 2239struct inode *btrfs_alloc_inode(struct super_block *sb);
@@ -2147,12 +2241,8 @@ void btrfs_destroy_inode(struct inode *inode);
2147int btrfs_init_cachep(void); 2241int btrfs_init_cachep(void);
2148void btrfs_destroy_cachep(void); 2242void btrfs_destroy_cachep(void);
2149long btrfs_ioctl_trans_end(struct file *file); 2243long btrfs_ioctl_trans_end(struct file *file);
2150struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
2151 struct btrfs_root *root, int wait);
2152struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
2153 struct btrfs_root *root);
2154struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 2244struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2155 struct btrfs_root *root, int *is_new); 2245 struct btrfs_root *root);
2156int btrfs_commit_write(struct file *file, struct page *page, 2246int btrfs_commit_write(struct file *file, struct page *page,
2157 unsigned from, unsigned to); 2247 unsigned from, unsigned to);
2158struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2248struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2209,4 +2299,12 @@ int btrfs_check_acl(struct inode *inode, int mask);
2209int btrfs_init_acl(struct inode *inode, struct inode *dir); 2299int btrfs_init_acl(struct inode *inode, struct inode *dir);
2210int btrfs_acl_chmod(struct inode *inode); 2300int btrfs_acl_chmod(struct inode *inode);
2211 2301
2302/* relocation.c */
2303int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
2304int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
2305 struct btrfs_root *root);
2306int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
2307 struct btrfs_root *root);
2308int btrfs_recover_relocation(struct btrfs_root *root);
2309int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
2212#endif 2310#endif
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index d6c01c096a40..84e6781413b1 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -29,27 +29,87 @@
29 * add extents in the middle of btrfs_search_slot, and it allows 29 * add extents in the middle of btrfs_search_slot, and it allows
30 * us to buffer up frequently modified backrefs in an rb tree instead 30 * us to buffer up frequently modified backrefs in an rb tree instead
31 * of hammering updates on the extent allocation tree. 31 * of hammering updates on the extent allocation tree.
32 *
33 * Right now this code is only used for reference counted trees, but
34 * the long term goal is to get rid of the similar code for delayed
35 * extent tree modifications.
36 */ 32 */
37 33
38/* 34/*
39 * entries in the rb tree are ordered by the byte number of the extent 35 * compare two delayed tree backrefs with same bytenr and type
40 * and by the byte number of the parent block. 36 */
37static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
38 struct btrfs_delayed_tree_ref *ref1)
39{
40 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
41 if (ref1->root < ref2->root)
42 return -1;
43 if (ref1->root > ref2->root)
44 return 1;
45 } else {
46 if (ref1->parent < ref2->parent)
47 return -1;
48 if (ref1->parent > ref2->parent)
49 return 1;
50 }
51 return 0;
52}
53
54/*
55 * compare two delayed data backrefs with same bytenr and type
41 */ 56 */
42static int comp_entry(struct btrfs_delayed_ref_node *ref, 57static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
43 u64 bytenr, u64 parent) 58 struct btrfs_delayed_data_ref *ref1)
44{ 59{
45 if (bytenr < ref->bytenr) 60 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
61 if (ref1->root < ref2->root)
62 return -1;
63 if (ref1->root > ref2->root)
64 return 1;
65 if (ref1->objectid < ref2->objectid)
66 return -1;
67 if (ref1->objectid > ref2->objectid)
68 return 1;
69 if (ref1->offset < ref2->offset)
70 return -1;
71 if (ref1->offset > ref2->offset)
72 return 1;
73 } else {
74 if (ref1->parent < ref2->parent)
75 return -1;
76 if (ref1->parent > ref2->parent)
77 return 1;
78 }
79 return 0;
80}
81
82/*
83 * entries in the rb tree are ordered by the byte number of the extent,
84 * type of the delayed backrefs and content of delayed backrefs.
85 */
86static int comp_entry(struct btrfs_delayed_ref_node *ref2,
87 struct btrfs_delayed_ref_node *ref1)
88{
89 if (ref1->bytenr < ref2->bytenr)
46 return -1; 90 return -1;
47 if (bytenr > ref->bytenr) 91 if (ref1->bytenr > ref2->bytenr)
48 return 1; 92 return 1;
49 if (parent < ref->parent) 93 if (ref1->is_head && ref2->is_head)
94 return 0;
95 if (ref2->is_head)
50 return -1; 96 return -1;
51 if (parent > ref->parent) 97 if (ref1->is_head)
52 return 1; 98 return 1;
99 if (ref1->type < ref2->type)
100 return -1;
101 if (ref1->type > ref2->type)
102 return 1;
103 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
104 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
105 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
106 btrfs_delayed_node_to_tree_ref(ref1));
107 } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
108 ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
109 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
110 btrfs_delayed_node_to_data_ref(ref1));
111 }
112 BUG();
53 return 0; 113 return 0;
54} 114}
55 115
@@ -59,20 +119,21 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref,
59 * inserted. 119 * inserted.
60 */ 120 */
61static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, 121static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
62 u64 bytenr, u64 parent,
63 struct rb_node *node) 122 struct rb_node *node)
64{ 123{
65 struct rb_node **p = &root->rb_node; 124 struct rb_node **p = &root->rb_node;
66 struct rb_node *parent_node = NULL; 125 struct rb_node *parent_node = NULL;
67 struct btrfs_delayed_ref_node *entry; 126 struct btrfs_delayed_ref_node *entry;
127 struct btrfs_delayed_ref_node *ins;
68 int cmp; 128 int cmp;
69 129
130 ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
70 while (*p) { 131 while (*p) {
71 parent_node = *p; 132 parent_node = *p;
72 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 133 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
73 rb_node); 134 rb_node);
74 135
75 cmp = comp_entry(entry, bytenr, parent); 136 cmp = comp_entry(entry, ins);
76 if (cmp < 0) 137 if (cmp < 0)
77 p = &(*p)->rb_left; 138 p = &(*p)->rb_left;
78 else if (cmp > 0) 139 else if (cmp > 0)
@@ -81,18 +142,17 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
81 return entry; 142 return entry;
82 } 143 }
83 144
84 entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
85 rb_link_node(node, parent_node, p); 145 rb_link_node(node, parent_node, p);
86 rb_insert_color(node, root); 146 rb_insert_color(node, root);
87 return NULL; 147 return NULL;
88} 148}
89 149
90/* 150/*
91 * find an entry based on (bytenr,parent). This returns the delayed 151 * find an head entry based on bytenr. This returns the delayed ref
92 * ref if it was able to find one, or NULL if nothing was in that spot 152 * head if it was able to find one, or NULL if nothing was in that spot
93 */ 153 */
94static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, 154static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
95 u64 bytenr, u64 parent, 155 u64 bytenr,
96 struct btrfs_delayed_ref_node **last) 156 struct btrfs_delayed_ref_node **last)
97{ 157{
98 struct rb_node *n = root->rb_node; 158 struct rb_node *n = root->rb_node;
@@ -105,7 +165,15 @@ static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root,
105 if (last) 165 if (last)
106 *last = entry; 166 *last = entry;
107 167
108 cmp = comp_entry(entry, bytenr, parent); 168 if (bytenr < entry->bytenr)
169 cmp = -1;
170 else if (bytenr > entry->bytenr)
171 cmp = 1;
172 else if (!btrfs_delayed_ref_is_head(entry))
173 cmp = 1;
174 else
175 cmp = 0;
176
109 if (cmp < 0) 177 if (cmp < 0)
110 n = n->rb_left; 178 n = n->rb_left;
111 else if (cmp > 0) 179 else if (cmp > 0)
@@ -154,7 +222,7 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
154 node = rb_first(&delayed_refs->root); 222 node = rb_first(&delayed_refs->root);
155 } else { 223 } else {
156 ref = NULL; 224 ref = NULL;
157 tree_search(&delayed_refs->root, start, (u64)-1, &ref); 225 find_ref_head(&delayed_refs->root, start, &ref);
158 if (ref) { 226 if (ref) {
159 struct btrfs_delayed_ref_node *tmp; 227 struct btrfs_delayed_ref_node *tmp;
160 228
@@ -234,7 +302,7 @@ int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr)
234 delayed_refs = &trans->transaction->delayed_refs; 302 delayed_refs = &trans->transaction->delayed_refs;
235 spin_lock(&delayed_refs->lock); 303 spin_lock(&delayed_refs->lock);
236 304
237 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 305 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
238 if (ref) { 306 if (ref) {
239 prev_node = rb_prev(&ref->rb_node); 307 prev_node = rb_prev(&ref->rb_node);
240 if (!prev_node) 308 if (!prev_node)
@@ -250,25 +318,28 @@ out:
250} 318}
251 319
252/* 320/*
253 * helper function to lookup reference count 321 * helper function to lookup reference count and flags of extent.
254 * 322 *
255 * the head node for delayed ref is used to store the sum of all the 323 * the head node for delayed ref is used to store the sum of all the
256 * reference count modifications queued up in the rbtree. This way you 324 * reference count modifications queued up in the rbtree. the head
257 * can check to see what the reference count would be if all of the 325 * node may also store the extent flags to set. This way you can check
258 * delayed refs are processed. 326 * to see what the reference count and extent flags would be if all of
327 * the delayed refs are not processed.
259 */ 328 */
260int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 329int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
261 struct btrfs_root *root, u64 bytenr, 330 struct btrfs_root *root, u64 bytenr,
262 u64 num_bytes, u32 *refs) 331 u64 num_bytes, u64 *refs, u64 *flags)
263{ 332{
264 struct btrfs_delayed_ref_node *ref; 333 struct btrfs_delayed_ref_node *ref;
265 struct btrfs_delayed_ref_head *head; 334 struct btrfs_delayed_ref_head *head;
266 struct btrfs_delayed_ref_root *delayed_refs; 335 struct btrfs_delayed_ref_root *delayed_refs;
267 struct btrfs_path *path; 336 struct btrfs_path *path;
268 struct extent_buffer *leaf;
269 struct btrfs_extent_item *ei; 337 struct btrfs_extent_item *ei;
338 struct extent_buffer *leaf;
270 struct btrfs_key key; 339 struct btrfs_key key;
271 u32 num_refs; 340 u32 item_size;
341 u64 num_refs;
342 u64 extent_flags;
272 int ret; 343 int ret;
273 344
274 path = btrfs_alloc_path(); 345 path = btrfs_alloc_path();
@@ -287,37 +358,60 @@ again:
287 358
288 if (ret == 0) { 359 if (ret == 0) {
289 leaf = path->nodes[0]; 360 leaf = path->nodes[0];
290 ei = btrfs_item_ptr(leaf, path->slots[0], 361 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
291 struct btrfs_extent_item); 362 if (item_size >= sizeof(*ei)) {
292 num_refs = btrfs_extent_refs(leaf, ei); 363 ei = btrfs_item_ptr(leaf, path->slots[0],
364 struct btrfs_extent_item);
365 num_refs = btrfs_extent_refs(leaf, ei);
366 extent_flags = btrfs_extent_flags(leaf, ei);
367 } else {
368#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
369 struct btrfs_extent_item_v0 *ei0;
370 BUG_ON(item_size != sizeof(*ei0));
371 ei0 = btrfs_item_ptr(leaf, path->slots[0],
372 struct btrfs_extent_item_v0);
373 num_refs = btrfs_extent_refs_v0(leaf, ei0);
374 /* FIXME: this isn't correct for data */
375 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
376#else
377 BUG();
378#endif
379 }
380 BUG_ON(num_refs == 0);
293 } else { 381 } else {
294 num_refs = 0; 382 num_refs = 0;
383 extent_flags = 0;
295 ret = 0; 384 ret = 0;
296 } 385 }
297 386
298 spin_lock(&delayed_refs->lock); 387 spin_lock(&delayed_refs->lock);
299 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 388 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
300 if (ref) { 389 if (ref) {
301 head = btrfs_delayed_node_to_head(ref); 390 head = btrfs_delayed_node_to_head(ref);
302 if (mutex_trylock(&head->mutex)) { 391 if (!mutex_trylock(&head->mutex)) {
303 num_refs += ref->ref_mod; 392 atomic_inc(&ref->refs);
304 mutex_unlock(&head->mutex); 393 spin_unlock(&delayed_refs->lock);
305 *refs = num_refs;
306 goto out;
307 }
308 394
309 atomic_inc(&ref->refs); 395 btrfs_release_path(root->fs_info->extent_root, path);
310 spin_unlock(&delayed_refs->lock);
311 396
312 btrfs_release_path(root->fs_info->extent_root, path); 397 mutex_lock(&head->mutex);
398 mutex_unlock(&head->mutex);
399 btrfs_put_delayed_ref(ref);
400 goto again;
401 }
402 if (head->extent_op && head->extent_op->update_flags)
403 extent_flags |= head->extent_op->flags_to_set;
404 else
405 BUG_ON(num_refs == 0);
313 406
314 mutex_lock(&head->mutex); 407 num_refs += ref->ref_mod;
315 mutex_unlock(&head->mutex); 408 mutex_unlock(&head->mutex);
316 btrfs_put_delayed_ref(ref);
317 goto again;
318 } else {
319 *refs = num_refs;
320 } 409 }
410 WARN_ON(num_refs == 0);
411 if (refs)
412 *refs = num_refs;
413 if (flags)
414 *flags = extent_flags;
321out: 415out:
322 spin_unlock(&delayed_refs->lock); 416 spin_unlock(&delayed_refs->lock);
323 btrfs_free_path(path); 417 btrfs_free_path(path);
@@ -338,16 +432,7 @@ update_existing_ref(struct btrfs_trans_handle *trans,
338 struct btrfs_delayed_ref_node *existing, 432 struct btrfs_delayed_ref_node *existing,
339 struct btrfs_delayed_ref_node *update) 433 struct btrfs_delayed_ref_node *update)
340{ 434{
341 struct btrfs_delayed_ref *existing_ref; 435 if (update->action != existing->action) {
342 struct btrfs_delayed_ref *ref;
343
344 existing_ref = btrfs_delayed_node_to_ref(existing);
345 ref = btrfs_delayed_node_to_ref(update);
346
347 if (ref->pin)
348 existing_ref->pin = 1;
349
350 if (ref->action != existing_ref->action) {
351 /* 436 /*
352 * this is effectively undoing either an add or a 437 * this is effectively undoing either an add or a
353 * drop. We decrement the ref_mod, and if it goes 438 * drop. We decrement the ref_mod, and if it goes
@@ -363,20 +448,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
363 delayed_refs->num_entries--; 448 delayed_refs->num_entries--;
364 if (trans->delayed_ref_updates) 449 if (trans->delayed_ref_updates)
365 trans->delayed_ref_updates--; 450 trans->delayed_ref_updates--;
451 } else {
452 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
453 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
366 } 454 }
367 } else { 455 } else {
368 if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { 456 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
369 /* if we're adding refs, make sure all the 457 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
370 * details match up. The extent could
371 * have been totally freed and reallocated
372 * by a different owner before the delayed
373 * ref entries were removed.
374 */
375 existing_ref->owner_objectid = ref->owner_objectid;
376 existing_ref->generation = ref->generation;
377 existing_ref->root = ref->root;
378 existing->num_bytes = update->num_bytes;
379 }
380 /* 458 /*
381 * the action on the existing ref matches 459 * the action on the existing ref matches
382 * the action on the ref we're trying to add. 460 * the action on the ref we're trying to add.
@@ -401,6 +479,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
401 479
402 existing_ref = btrfs_delayed_node_to_head(existing); 480 existing_ref = btrfs_delayed_node_to_head(existing);
403 ref = btrfs_delayed_node_to_head(update); 481 ref = btrfs_delayed_node_to_head(update);
482 BUG_ON(existing_ref->is_data != ref->is_data);
404 483
405 if (ref->must_insert_reserved) { 484 if (ref->must_insert_reserved) {
406 /* if the extent was freed and then 485 /* if the extent was freed and then
@@ -420,6 +499,24 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
420 499
421 } 500 }
422 501
502 if (ref->extent_op) {
503 if (!existing_ref->extent_op) {
504 existing_ref->extent_op = ref->extent_op;
505 } else {
506 if (ref->extent_op->update_key) {
507 memcpy(&existing_ref->extent_op->key,
508 &ref->extent_op->key,
509 sizeof(ref->extent_op->key));
510 existing_ref->extent_op->update_key = 1;
511 }
512 if (ref->extent_op->update_flags) {
513 existing_ref->extent_op->flags_to_set |=
514 ref->extent_op->flags_to_set;
515 existing_ref->extent_op->update_flags = 1;
516 }
517 kfree(ref->extent_op);
518 }
519 }
423 /* 520 /*
424 * update the reference mod on the head to reflect this new operation 521 * update the reference mod on the head to reflect this new operation
425 */ 522 */
@@ -427,19 +524,16 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
427} 524}
428 525
429/* 526/*
430 * helper function to actually insert a delayed ref into the rbtree. 527 * helper function to actually insert a head node into the rbtree.
431 * this does all the dirty work in terms of maintaining the correct 528 * this does all the dirty work in terms of maintaining the correct
432 * overall modification count in the head node and properly dealing 529 * overall modification count.
433 * with updating existing nodes as new modifications are queued.
434 */ 530 */
435static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 531static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
436 struct btrfs_delayed_ref_node *ref, 532 struct btrfs_delayed_ref_node *ref,
437 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 533 u64 bytenr, u64 num_bytes,
438 u64 ref_generation, u64 owner_objectid, int action, 534 int action, int is_data)
439 int pin)
440{ 535{
441 struct btrfs_delayed_ref_node *existing; 536 struct btrfs_delayed_ref_node *existing;
442 struct btrfs_delayed_ref *full_ref;
443 struct btrfs_delayed_ref_head *head_ref = NULL; 537 struct btrfs_delayed_ref_head *head_ref = NULL;
444 struct btrfs_delayed_ref_root *delayed_refs; 538 struct btrfs_delayed_ref_root *delayed_refs;
445 int count_mod = 1; 539 int count_mod = 1;
@@ -449,12 +543,10 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
449 * the head node stores the sum of all the mods, so dropping a ref 543 * the head node stores the sum of all the mods, so dropping a ref
450 * should drop the sum in the head node by one. 544 * should drop the sum in the head node by one.
451 */ 545 */
452 if (parent == (u64)-1) { 546 if (action == BTRFS_UPDATE_DELAYED_HEAD)
453 if (action == BTRFS_DROP_DELAYED_REF) 547 count_mod = 0;
454 count_mod = -1; 548 else if (action == BTRFS_DROP_DELAYED_REF)
455 else if (action == BTRFS_UPDATE_DELAYED_HEAD) 549 count_mod = -1;
456 count_mod = 0;
457 }
458 550
459 /* 551 /*
460 * BTRFS_ADD_DELAYED_EXTENT means that we need to update 552 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
@@ -467,57 +559,148 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
467 * Once we record must_insert_reserved, switch the action to 559 * Once we record must_insert_reserved, switch the action to
468 * BTRFS_ADD_DELAYED_REF because other special casing is not required. 560 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
469 */ 561 */
470 if (action == BTRFS_ADD_DELAYED_EXTENT) { 562 if (action == BTRFS_ADD_DELAYED_EXTENT)
471 must_insert_reserved = 1; 563 must_insert_reserved = 1;
472 action = BTRFS_ADD_DELAYED_REF; 564 else
473 } else {
474 must_insert_reserved = 0; 565 must_insert_reserved = 0;
475 }
476
477 566
478 delayed_refs = &trans->transaction->delayed_refs; 567 delayed_refs = &trans->transaction->delayed_refs;
479 568
480 /* first set the basic ref node struct up */ 569 /* first set the basic ref node struct up */
481 atomic_set(&ref->refs, 1); 570 atomic_set(&ref->refs, 1);
482 ref->bytenr = bytenr; 571 ref->bytenr = bytenr;
483 ref->parent = parent; 572 ref->num_bytes = num_bytes;
484 ref->ref_mod = count_mod; 573 ref->ref_mod = count_mod;
574 ref->type = 0;
575 ref->action = 0;
576 ref->is_head = 1;
485 ref->in_tree = 1; 577 ref->in_tree = 1;
578
579 head_ref = btrfs_delayed_node_to_head(ref);
580 head_ref->must_insert_reserved = must_insert_reserved;
581 head_ref->is_data = is_data;
582
583 INIT_LIST_HEAD(&head_ref->cluster);
584 mutex_init(&head_ref->mutex);
585
586 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
587
588 if (existing) {
589 update_existing_head_ref(existing, ref);
590 /*
591 * we've updated the existing ref, free the newly
592 * allocated ref
593 */
594 kfree(ref);
595 } else {
596 delayed_refs->num_heads++;
597 delayed_refs->num_heads_ready++;
598 delayed_refs->num_entries++;
599 trans->delayed_ref_updates++;
600 }
601 return 0;
602}
603
604/*
605 * helper to insert a delayed tree ref into the rbtree.
606 */
607static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
608 struct btrfs_delayed_ref_node *ref,
609 u64 bytenr, u64 num_bytes, u64 parent,
610 u64 ref_root, int level, int action)
611{
612 struct btrfs_delayed_ref_node *existing;
613 struct btrfs_delayed_tree_ref *full_ref;
614 struct btrfs_delayed_ref_root *delayed_refs;
615
616 if (action == BTRFS_ADD_DELAYED_EXTENT)
617 action = BTRFS_ADD_DELAYED_REF;
618
619 delayed_refs = &trans->transaction->delayed_refs;
620
621 /* first set the basic ref node struct up */
622 atomic_set(&ref->refs, 1);
623 ref->bytenr = bytenr;
486 ref->num_bytes = num_bytes; 624 ref->num_bytes = num_bytes;
625 ref->ref_mod = 1;
626 ref->action = action;
627 ref->is_head = 0;
628 ref->in_tree = 1;
487 629
488 if (btrfs_delayed_ref_is_head(ref)) { 630 full_ref = btrfs_delayed_node_to_tree_ref(ref);
489 head_ref = btrfs_delayed_node_to_head(ref); 631 if (parent) {
490 head_ref->must_insert_reserved = must_insert_reserved; 632 full_ref->parent = parent;
491 INIT_LIST_HEAD(&head_ref->cluster); 633 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
492 mutex_init(&head_ref->mutex);
493 } else { 634 } else {
494 full_ref = btrfs_delayed_node_to_ref(ref);
495 full_ref->root = ref_root; 635 full_ref->root = ref_root;
496 full_ref->generation = ref_generation; 636 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
497 full_ref->owner_objectid = owner_objectid;
498 full_ref->pin = pin;
499 full_ref->action = action;
500 } 637 }
638 full_ref->level = level;
501 639
502 existing = tree_insert(&delayed_refs->root, bytenr, 640 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
503 parent, &ref->rb_node);
504 641
505 if (existing) { 642 if (existing) {
506 if (btrfs_delayed_ref_is_head(ref)) 643 update_existing_ref(trans, delayed_refs, existing, ref);
507 update_existing_head_ref(existing, ref); 644 /*
508 else 645 * we've updated the existing ref, free the newly
509 update_existing_ref(trans, delayed_refs, existing, ref); 646 * allocated ref
647 */
648 kfree(ref);
649 } else {
650 delayed_refs->num_entries++;
651 trans->delayed_ref_updates++;
652 }
653 return 0;
654}
655
656/*
657 * helper to insert a delayed data ref into the rbtree.
658 */
659static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
660 struct btrfs_delayed_ref_node *ref,
661 u64 bytenr, u64 num_bytes, u64 parent,
662 u64 ref_root, u64 owner, u64 offset,
663 int action)
664{
665 struct btrfs_delayed_ref_node *existing;
666 struct btrfs_delayed_data_ref *full_ref;
667 struct btrfs_delayed_ref_root *delayed_refs;
668
669 if (action == BTRFS_ADD_DELAYED_EXTENT)
670 action = BTRFS_ADD_DELAYED_REF;
671
672 delayed_refs = &trans->transaction->delayed_refs;
673
674 /* first set the basic ref node struct up */
675 atomic_set(&ref->refs, 1);
676 ref->bytenr = bytenr;
677 ref->num_bytes = num_bytes;
678 ref->ref_mod = 1;
679 ref->action = action;
680 ref->is_head = 0;
681 ref->in_tree = 1;
682
683 full_ref = btrfs_delayed_node_to_data_ref(ref);
684 if (parent) {
685 full_ref->parent = parent;
686 ref->type = BTRFS_SHARED_DATA_REF_KEY;
687 } else {
688 full_ref->root = ref_root;
689 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
690 }
691 full_ref->objectid = owner;
692 full_ref->offset = offset;
510 693
694 existing = tree_insert(&delayed_refs->root, &ref->rb_node);
695
696 if (existing) {
697 update_existing_ref(trans, delayed_refs, existing, ref);
511 /* 698 /*
512 * we've updated the existing ref, free the newly 699 * we've updated the existing ref, free the newly
513 * allocated ref 700 * allocated ref
514 */ 701 */
515 kfree(ref); 702 kfree(ref);
516 } else { 703 } else {
517 if (btrfs_delayed_ref_is_head(ref)) {
518 delayed_refs->num_heads++;
519 delayed_refs->num_heads_ready++;
520 }
521 delayed_refs->num_entries++; 704 delayed_refs->num_entries++;
522 trans->delayed_ref_updates++; 705 trans->delayed_ref_updates++;
523 } 706 }
@@ -525,37 +708,78 @@ static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
525} 708}
526 709
527/* 710/*
528 * add a delayed ref to the tree. This does all of the accounting required 711 * add a delayed tree ref. This does all of the accounting required
529 * to make sure the delayed ref is eventually processed before this 712 * to make sure the delayed ref is eventually processed before this
530 * transaction commits. 713 * transaction commits.
531 */ 714 */
532int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 715int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
533 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 716 u64 bytenr, u64 num_bytes, u64 parent,
534 u64 ref_generation, u64 owner_objectid, int action, 717 u64 ref_root, int level, int action,
535 int pin) 718 struct btrfs_delayed_extent_op *extent_op)
536{ 719{
537 struct btrfs_delayed_ref *ref; 720 struct btrfs_delayed_tree_ref *ref;
538 struct btrfs_delayed_ref_head *head_ref; 721 struct btrfs_delayed_ref_head *head_ref;
539 struct btrfs_delayed_ref_root *delayed_refs; 722 struct btrfs_delayed_ref_root *delayed_refs;
540 int ret; 723 int ret;
541 724
725 BUG_ON(extent_op && extent_op->is_data);
542 ref = kmalloc(sizeof(*ref), GFP_NOFS); 726 ref = kmalloc(sizeof(*ref), GFP_NOFS);
543 if (!ref) 727 if (!ref)
544 return -ENOMEM; 728 return -ENOMEM;
545 729
730 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
731 if (!head_ref) {
732 kfree(ref);
733 return -ENOMEM;
734 }
735
736 head_ref->extent_op = extent_op;
737
738 delayed_refs = &trans->transaction->delayed_refs;
739 spin_lock(&delayed_refs->lock);
740
546 /* 741 /*
547 * the parent = 0 case comes from cases where we don't actually 742 * insert both the head node and the new ref without dropping
548 * know the parent yet. It will get updated later via a add/drop 743 * the spin lock
549 * pair.
550 */ 744 */
551 if (parent == 0) 745 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
552 parent = bytenr; 746 action, 0);
747 BUG_ON(ret);
748
749 ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
750 parent, ref_root, level, action);
751 BUG_ON(ret);
752 spin_unlock(&delayed_refs->lock);
753 return 0;
754}
755
756/*
757 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
758 */
759int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
760 u64 bytenr, u64 num_bytes,
761 u64 parent, u64 ref_root,
762 u64 owner, u64 offset, int action,
763 struct btrfs_delayed_extent_op *extent_op)
764{
765 struct btrfs_delayed_data_ref *ref;
766 struct btrfs_delayed_ref_head *head_ref;
767 struct btrfs_delayed_ref_root *delayed_refs;
768 int ret;
769
770 BUG_ON(extent_op && !extent_op->is_data);
771 ref = kmalloc(sizeof(*ref), GFP_NOFS);
772 if (!ref)
773 return -ENOMEM;
553 774
554 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); 775 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
555 if (!head_ref) { 776 if (!head_ref) {
556 kfree(ref); 777 kfree(ref);
557 return -ENOMEM; 778 return -ENOMEM;
558 } 779 }
780
781 head_ref->extent_op = extent_op;
782
559 delayed_refs = &trans->transaction->delayed_refs; 783 delayed_refs = &trans->transaction->delayed_refs;
560 spin_lock(&delayed_refs->lock); 784 spin_lock(&delayed_refs->lock);
561 785
@@ -563,14 +787,39 @@ int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans,
563 * insert both the head node and the new ref without dropping 787 * insert both the head node and the new ref without dropping
564 * the spin lock 788 * the spin lock
565 */ 789 */
566 ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, 790 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
567 (u64)-1, 0, 0, 0, action, pin); 791 action, 1);
568 BUG_ON(ret); 792 BUG_ON(ret);
569 793
570 ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, 794 ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
571 parent, ref_root, ref_generation, 795 parent, ref_root, owner, offset, action);
572 owner_objectid, action, pin); 796 BUG_ON(ret);
797 spin_unlock(&delayed_refs->lock);
798 return 0;
799}
800
801int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
802 u64 bytenr, u64 num_bytes,
803 struct btrfs_delayed_extent_op *extent_op)
804{
805 struct btrfs_delayed_ref_head *head_ref;
806 struct btrfs_delayed_ref_root *delayed_refs;
807 int ret;
808
809 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
810 if (!head_ref)
811 return -ENOMEM;
812
813 head_ref->extent_op = extent_op;
814
815 delayed_refs = &trans->transaction->delayed_refs;
816 spin_lock(&delayed_refs->lock);
817
818 ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
819 num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
820 extent_op->is_data);
573 BUG_ON(ret); 821 BUG_ON(ret);
822
574 spin_unlock(&delayed_refs->lock); 823 spin_unlock(&delayed_refs->lock);
575 return 0; 824 return 0;
576} 825}
@@ -587,7 +836,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
587 struct btrfs_delayed_ref_root *delayed_refs; 836 struct btrfs_delayed_ref_root *delayed_refs;
588 837
589 delayed_refs = &trans->transaction->delayed_refs; 838 delayed_refs = &trans->transaction->delayed_refs;
590 ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); 839 ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
591 if (ref) 840 if (ref)
592 return btrfs_delayed_node_to_head(ref); 841 return btrfs_delayed_node_to_head(ref);
593 return NULL; 842 return NULL;
@@ -603,6 +852,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
603 * 852 *
604 * It is the same as doing a ref add and delete in two separate calls. 853 * It is the same as doing a ref add and delete in two separate calls.
605 */ 854 */
855#if 0
606int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, 856int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
607 u64 bytenr, u64 num_bytes, u64 orig_parent, 857 u64 bytenr, u64 num_bytes, u64 orig_parent,
608 u64 parent, u64 orig_ref_root, u64 ref_root, 858 u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -666,3 +916,4 @@ int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
666 spin_unlock(&delayed_refs->lock); 916 spin_unlock(&delayed_refs->lock);
667 return 0; 917 return 0;
668} 918}
919#endif
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 3bec2ff0b15c..f6fc67ddad36 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -30,9 +30,6 @@ struct btrfs_delayed_ref_node {
30 /* the starting bytenr of the extent */ 30 /* the starting bytenr of the extent */
31 u64 bytenr; 31 u64 bytenr;
32 32
33 /* the parent our backref will point to */
34 u64 parent;
35
36 /* the size of the extent */ 33 /* the size of the extent */
37 u64 num_bytes; 34 u64 num_bytes;
38 35
@@ -50,10 +47,21 @@ struct btrfs_delayed_ref_node {
50 */ 47 */
51 int ref_mod; 48 int ref_mod;
52 49
50 unsigned int action:8;
51 unsigned int type:8;
53 /* is this node still in the rbtree? */ 52 /* is this node still in the rbtree? */
53 unsigned int is_head:1;
54 unsigned int in_tree:1; 54 unsigned int in_tree:1;
55}; 55};
56 56
57struct btrfs_delayed_extent_op {
58 struct btrfs_disk_key key;
59 u64 flags_to_set;
60 unsigned int update_key:1;
61 unsigned int update_flags:1;
62 unsigned int is_data:1;
63};
64
57/* 65/*
58 * the head refs are used to hold a lock on a given extent, which allows us 66 * the head refs are used to hold a lock on a given extent, which allows us
59 * to make sure that only one process is running the delayed refs 67 * to make sure that only one process is running the delayed refs
@@ -71,6 +79,7 @@ struct btrfs_delayed_ref_head {
71 79
72 struct list_head cluster; 80 struct list_head cluster;
73 81
82 struct btrfs_delayed_extent_op *extent_op;
74 /* 83 /*
75 * when a new extent is allocated, it is just reserved in memory 84 * when a new extent is allocated, it is just reserved in memory
76 * The actual extent isn't inserted into the extent allocation tree 85 * The actual extent isn't inserted into the extent allocation tree
@@ -84,27 +93,26 @@ struct btrfs_delayed_ref_head {
84 * the free has happened. 93 * the free has happened.
85 */ 94 */
86 unsigned int must_insert_reserved:1; 95 unsigned int must_insert_reserved:1;
96 unsigned int is_data:1;
87}; 97};
88 98
89struct btrfs_delayed_ref { 99struct btrfs_delayed_tree_ref {
90 struct btrfs_delayed_ref_node node; 100 struct btrfs_delayed_ref_node node;
101 union {
102 u64 root;
103 u64 parent;
104 };
105 int level;
106};
91 107
92 /* the root objectid our ref will point to */ 108struct btrfs_delayed_data_ref {
93 u64 root; 109 struct btrfs_delayed_ref_node node;
94 110 union {
95 /* the generation for the backref */ 111 u64 root;
96 u64 generation; 112 u64 parent;
97 113 };
98 /* owner_objectid of the backref */ 114 u64 objectid;
99 u64 owner_objectid; 115 u64 offset;
100
101 /* operation done by this entry in the rbtree */
102 u8 action;
103
104 /* if pin == 1, when the extent is freed it will be pinned until
105 * transaction commit
106 */
107 unsigned int pin:1;
108}; 116};
109 117
110struct btrfs_delayed_ref_root { 118struct btrfs_delayed_ref_root {
@@ -143,17 +151,25 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
143 } 151 }
144} 152}
145 153
146int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, 154int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
147 u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, 155 u64 bytenr, u64 num_bytes, u64 parent,
148 u64 ref_generation, u64 owner_objectid, int action, 156 u64 ref_root, int level, int action,
149 int pin); 157 struct btrfs_delayed_extent_op *extent_op);
158int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
159 u64 bytenr, u64 num_bytes,
160 u64 parent, u64 ref_root,
161 u64 owner, u64 offset, int action,
162 struct btrfs_delayed_extent_op *extent_op);
163int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
164 u64 bytenr, u64 num_bytes,
165 struct btrfs_delayed_extent_op *extent_op);
150 166
151struct btrfs_delayed_ref_head * 167struct btrfs_delayed_ref_head *
152btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 168btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
153int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); 169int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr);
154int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 170int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
155 struct btrfs_root *root, u64 bytenr, 171 struct btrfs_root *root, u64 bytenr,
156 u64 num_bytes, u32 *refs); 172 u64 num_bytes, u64 *refs, u64 *flags);
157int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, 173int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans,
158 u64 bytenr, u64 num_bytes, u64 orig_parent, 174 u64 bytenr, u64 num_bytes, u64 orig_parent,
159 u64 parent, u64 orig_ref_root, u64 ref_root, 175 u64 parent, u64 orig_ref_root, u64 ref_root,
@@ -169,18 +185,24 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
169 */ 185 */
170static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) 186static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node)
171{ 187{
172 return node->parent == (u64)-1; 188 return node->is_head;
173} 189}
174 190
175/* 191/*
176 * helper functions to cast a node into its container 192 * helper functions to cast a node into its container
177 */ 193 */
178static inline struct btrfs_delayed_ref * 194static inline struct btrfs_delayed_tree_ref *
179btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) 195btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
180{ 196{
181 WARN_ON(btrfs_delayed_ref_is_head(node)); 197 WARN_ON(btrfs_delayed_ref_is_head(node));
182 return container_of(node, struct btrfs_delayed_ref, node); 198 return container_of(node, struct btrfs_delayed_tree_ref, node);
199}
183 200
201static inline struct btrfs_delayed_data_ref *
202btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
203{
204 WARN_ON(btrfs_delayed_ref_is_head(node));
205 return container_of(node, struct btrfs_delayed_data_ref, node);
184} 206}
185 207
186static inline struct btrfs_delayed_ref_head * 208static inline struct btrfs_delayed_ref_head *
@@ -188,6 +210,5 @@ btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node)
188{ 210{
189 WARN_ON(!btrfs_delayed_ref_is_head(node)); 211 WARN_ON(!btrfs_delayed_ref_is_head(node));
190 return container_of(node, struct btrfs_delayed_ref_head, node); 212 return container_of(node, struct btrfs_delayed_ref_head, node);
191
192} 213}
193#endif 214#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4b0ea0b80c23..7f5c6e3e9992 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -36,7 +36,6 @@
36#include "print-tree.h" 36#include "print-tree.h"
37#include "async-thread.h" 37#include "async-thread.h"
38#include "locking.h" 38#include "locking.h"
39#include "ref-cache.h"
40#include "tree-log.h" 39#include "tree-log.h"
41#include "free-space-cache.h" 40#include "free-space-cache.h"
42 41
@@ -884,7 +883,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
884{ 883{
885 root->node = NULL; 884 root->node = NULL;
886 root->commit_root = NULL; 885 root->commit_root = NULL;
887 root->ref_tree = NULL;
888 root->sectorsize = sectorsize; 886 root->sectorsize = sectorsize;
889 root->nodesize = nodesize; 887 root->nodesize = nodesize;
890 root->leafsize = leafsize; 888 root->leafsize = leafsize;
@@ -899,12 +897,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
899 root->last_inode_alloc = 0; 897 root->last_inode_alloc = 0;
900 root->name = NULL; 898 root->name = NULL;
901 root->in_sysfs = 0; 899 root->in_sysfs = 0;
900 root->inode_tree.rb_node = NULL;
902 901
903 INIT_LIST_HEAD(&root->dirty_list); 902 INIT_LIST_HEAD(&root->dirty_list);
904 INIT_LIST_HEAD(&root->orphan_list); 903 INIT_LIST_HEAD(&root->orphan_list);
905 INIT_LIST_HEAD(&root->dead_list); 904 INIT_LIST_HEAD(&root->root_list);
906 spin_lock_init(&root->node_lock); 905 spin_lock_init(&root->node_lock);
907 spin_lock_init(&root->list_lock); 906 spin_lock_init(&root->list_lock);
907 spin_lock_init(&root->inode_lock);
908 mutex_init(&root->objectid_mutex); 908 mutex_init(&root->objectid_mutex);
909 mutex_init(&root->log_mutex); 909 mutex_init(&root->log_mutex);
910 init_waitqueue_head(&root->log_writer_wait); 910 init_waitqueue_head(&root->log_writer_wait);
@@ -918,9 +918,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
918 extent_io_tree_init(&root->dirty_log_pages, 918 extent_io_tree_init(&root->dirty_log_pages,
919 fs_info->btree_inode->i_mapping, GFP_NOFS); 919 fs_info->btree_inode->i_mapping, GFP_NOFS);
920 920
921 btrfs_leaf_ref_tree_init(&root->ref_tree_struct);
922 root->ref_tree = &root->ref_tree_struct;
923
924 memset(&root->root_key, 0, sizeof(root->root_key)); 921 memset(&root->root_key, 0, sizeof(root->root_key));
925 memset(&root->root_item, 0, sizeof(root->root_item)); 922 memset(&root->root_item, 0, sizeof(root->root_item));
926 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); 923 memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -959,6 +956,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
959 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 956 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
960 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 957 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
961 blocksize, generation); 958 blocksize, generation);
959 root->commit_root = btrfs_root_node(root);
962 BUG_ON(!root->node); 960 BUG_ON(!root->node);
963 return 0; 961 return 0;
964} 962}
@@ -1025,20 +1023,19 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
1025 */ 1023 */
1026 root->ref_cows = 0; 1024 root->ref_cows = 0;
1027 1025
1028 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 1026 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
1029 0, BTRFS_TREE_LOG_OBJECTID, 1027 BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
1030 trans->transid, 0, 0, 0);
1031 if (IS_ERR(leaf)) { 1028 if (IS_ERR(leaf)) {
1032 kfree(root); 1029 kfree(root);
1033 return ERR_CAST(leaf); 1030 return ERR_CAST(leaf);
1034 } 1031 }
1035 1032
1033 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
1034 btrfs_set_header_bytenr(leaf, leaf->start);
1035 btrfs_set_header_generation(leaf, trans->transid);
1036 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
1037 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
1036 root->node = leaf; 1038 root->node = leaf;
1037 btrfs_set_header_nritems(root->node, 0);
1038 btrfs_set_header_level(root->node, 0);
1039 btrfs_set_header_bytenr(root->node, root->node->start);
1040 btrfs_set_header_generation(root->node, trans->transid);
1041 btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID);
1042 1039
1043 write_extent_buffer(root->node, root->fs_info->fsid, 1040 write_extent_buffer(root->node, root->fs_info->fsid,
1044 (unsigned long)btrfs_header_fsid(root->node), 1041 (unsigned long)btrfs_header_fsid(root->node),
@@ -1081,8 +1078,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1081 inode_item->nbytes = cpu_to_le64(root->leafsize); 1078 inode_item->nbytes = cpu_to_le64(root->leafsize);
1082 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 1079 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
1083 1080
1084 btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); 1081 btrfs_set_root_node(&log_root->root_item, log_root->node);
1085 btrfs_set_root_generation(&log_root->root_item, trans->transid);
1086 1082
1087 WARN_ON(root->log_root); 1083 WARN_ON(root->log_root);
1088 root->log_root = log_root; 1084 root->log_root = log_root;
@@ -1144,6 +1140,7 @@ out:
1144 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1140 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1145 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1141 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1146 blocksize, generation); 1142 blocksize, generation);
1143 root->commit_root = btrfs_root_node(root);
1147 BUG_ON(!root->node); 1144 BUG_ON(!root->node);
1148insert: 1145insert:
1149 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { 1146 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
@@ -1210,7 +1207,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
1210 } 1207 }
1211 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 1208 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
1212 ret = btrfs_find_dead_roots(fs_info->tree_root, 1209 ret = btrfs_find_dead_roots(fs_info->tree_root,
1213 root->root_key.objectid, root); 1210 root->root_key.objectid);
1214 BUG_ON(ret); 1211 BUG_ON(ret);
1215 btrfs_orphan_cleanup(root); 1212 btrfs_orphan_cleanup(root);
1216 } 1213 }
@@ -1569,8 +1566,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1569 atomic_set(&fs_info->async_delalloc_pages, 0); 1566 atomic_set(&fs_info->async_delalloc_pages, 0);
1570 atomic_set(&fs_info->async_submit_draining, 0); 1567 atomic_set(&fs_info->async_submit_draining, 0);
1571 atomic_set(&fs_info->nr_async_bios, 0); 1568 atomic_set(&fs_info->nr_async_bios, 0);
1572 atomic_set(&fs_info->throttles, 0);
1573 atomic_set(&fs_info->throttle_gen, 0);
1574 fs_info->sb = sb; 1569 fs_info->sb = sb;
1575 fs_info->max_extent = (u64)-1; 1570 fs_info->max_extent = (u64)-1;
1576 fs_info->max_inline = 8192 * 1024; 1571 fs_info->max_inline = 8192 * 1024;
@@ -1598,6 +1593,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1598 fs_info->btree_inode->i_mapping->a_ops = &btree_aops; 1593 fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
1599 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; 1594 fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
1600 1595
1596 RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
1601 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, 1597 extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
1602 fs_info->btree_inode->i_mapping, 1598 fs_info->btree_inode->i_mapping,
1603 GFP_NOFS); 1599 GFP_NOFS);
@@ -1613,10 +1609,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1613 fs_info->btree_inode->i_mapping, GFP_NOFS); 1609 fs_info->btree_inode->i_mapping, GFP_NOFS);
1614 fs_info->do_barriers = 1; 1610 fs_info->do_barriers = 1;
1615 1611
1616 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
1617 btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
1618 btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);
1619
1620 BTRFS_I(fs_info->btree_inode)->root = tree_root; 1612 BTRFS_I(fs_info->btree_inode)->root = tree_root;
1621 memset(&BTRFS_I(fs_info->btree_inode)->location, 0, 1613 memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
1622 sizeof(struct btrfs_key)); 1614 sizeof(struct btrfs_key));
@@ -1674,6 +1666,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1674 goto fail_iput; 1666 goto fail_iput;
1675 } 1667 }
1676 1668
1669 features = btrfs_super_incompat_flags(disk_super);
1670 if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
1671 features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
1672 btrfs_set_super_incompat_flags(disk_super, features);
1673 }
1674
1677 features = btrfs_super_compat_ro_flags(disk_super) & 1675 features = btrfs_super_compat_ro_flags(disk_super) &
1678 ~BTRFS_FEATURE_COMPAT_RO_SUPP; 1676 ~BTRFS_FEATURE_COMPAT_RO_SUPP;
1679 if (!(sb->s_flags & MS_RDONLY) && features) { 1677 if (!(sb->s_flags & MS_RDONLY) && features) {
@@ -1771,7 +1769,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1771 if (ret) { 1769 if (ret) {
1772 printk(KERN_WARNING "btrfs: failed to read the system " 1770 printk(KERN_WARNING "btrfs: failed to read the system "
1773 "array on %s\n", sb->s_id); 1771 "array on %s\n", sb->s_id);
1774 goto fail_sys_array; 1772 goto fail_sb_buffer;
1775 } 1773 }
1776 1774
1777 blocksize = btrfs_level_size(tree_root, 1775 blocksize = btrfs_level_size(tree_root,
@@ -1785,6 +1783,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1785 btrfs_super_chunk_root(disk_super), 1783 btrfs_super_chunk_root(disk_super),
1786 blocksize, generation); 1784 blocksize, generation);
1787 BUG_ON(!chunk_root->node); 1785 BUG_ON(!chunk_root->node);
1786 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
1787 chunk_root->commit_root = btrfs_root_node(chunk_root);
1788 1788
1789 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, 1789 read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
1790 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), 1790 (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
@@ -1810,7 +1810,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1810 blocksize, generation); 1810 blocksize, generation);
1811 if (!tree_root->node) 1811 if (!tree_root->node)
1812 goto fail_chunk_root; 1812 goto fail_chunk_root;
1813 1813 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
1814 tree_root->commit_root = btrfs_root_node(tree_root);
1814 1815
1815 ret = find_and_setup_root(tree_root, fs_info, 1816 ret = find_and_setup_root(tree_root, fs_info,
1816 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 1817 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
@@ -1820,14 +1821,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1820 1821
1821 ret = find_and_setup_root(tree_root, fs_info, 1822 ret = find_and_setup_root(tree_root, fs_info,
1822 BTRFS_DEV_TREE_OBJECTID, dev_root); 1823 BTRFS_DEV_TREE_OBJECTID, dev_root);
1823 dev_root->track_dirty = 1;
1824 if (ret) 1824 if (ret)
1825 goto fail_extent_root; 1825 goto fail_extent_root;
1826 dev_root->track_dirty = 1;
1826 1827
1827 ret = find_and_setup_root(tree_root, fs_info, 1828 ret = find_and_setup_root(tree_root, fs_info,
1828 BTRFS_CSUM_TREE_OBJECTID, csum_root); 1829 BTRFS_CSUM_TREE_OBJECTID, csum_root);
1829 if (ret) 1830 if (ret)
1830 goto fail_extent_root; 1831 goto fail_dev_root;
1831 1832
1832 csum_root->track_dirty = 1; 1833 csum_root->track_dirty = 1;
1833 1834
@@ -1881,7 +1882,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1881 } 1882 }
1882 1883
1883 if (!(sb->s_flags & MS_RDONLY)) { 1884 if (!(sb->s_flags & MS_RDONLY)) {
1884 ret = btrfs_cleanup_reloc_trees(tree_root); 1885 ret = btrfs_recover_relocation(tree_root);
1885 BUG_ON(ret); 1886 BUG_ON(ret);
1886 } 1887 }
1887 1888
@@ -1908,14 +1909,19 @@ fail_cleaner:
1908 1909
1909fail_csum_root: 1910fail_csum_root:
1910 free_extent_buffer(csum_root->node); 1911 free_extent_buffer(csum_root->node);
1912 free_extent_buffer(csum_root->commit_root);
1913fail_dev_root:
1914 free_extent_buffer(dev_root->node);
1915 free_extent_buffer(dev_root->commit_root);
1911fail_extent_root: 1916fail_extent_root:
1912 free_extent_buffer(extent_root->node); 1917 free_extent_buffer(extent_root->node);
1918 free_extent_buffer(extent_root->commit_root);
1913fail_tree_root: 1919fail_tree_root:
1914 free_extent_buffer(tree_root->node); 1920 free_extent_buffer(tree_root->node);
1921 free_extent_buffer(tree_root->commit_root);
1915fail_chunk_root: 1922fail_chunk_root:
1916 free_extent_buffer(chunk_root->node); 1923 free_extent_buffer(chunk_root->node);
1917fail_sys_array: 1924 free_extent_buffer(chunk_root->commit_root);
1918 free_extent_buffer(dev_root->node);
1919fail_sb_buffer: 1925fail_sb_buffer:
1920 btrfs_stop_workers(&fs_info->fixup_workers); 1926 btrfs_stop_workers(&fs_info->fixup_workers);
1921 btrfs_stop_workers(&fs_info->delalloc_workers); 1927 btrfs_stop_workers(&fs_info->delalloc_workers);
@@ -2173,6 +2179,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
2173 2179
2174int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2180int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2175{ 2181{
2182 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2176 radix_tree_delete(&fs_info->fs_roots_radix, 2183 radix_tree_delete(&fs_info->fs_roots_radix,
2177 (unsigned long)root->root_key.objectid); 2184 (unsigned long)root->root_key.objectid);
2178 if (root->anon_super.s_dev) { 2185 if (root->anon_super.s_dev) {
@@ -2219,10 +2226,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
2219 ARRAY_SIZE(gang)); 2226 ARRAY_SIZE(gang));
2220 if (!ret) 2227 if (!ret)
2221 break; 2228 break;
2229
2230 root_objectid = gang[ret - 1]->root_key.objectid + 1;
2222 for (i = 0; i < ret; i++) { 2231 for (i = 0; i < ret; i++) {
2223 root_objectid = gang[i]->root_key.objectid; 2232 root_objectid = gang[i]->root_key.objectid;
2224 ret = btrfs_find_dead_roots(fs_info->tree_root, 2233 ret = btrfs_find_dead_roots(fs_info->tree_root,
2225 root_objectid, gang[i]); 2234 root_objectid);
2226 BUG_ON(ret); 2235 BUG_ON(ret);
2227 btrfs_orphan_cleanup(gang[i]); 2236 btrfs_orphan_cleanup(gang[i]);
2228 } 2237 }
@@ -2278,20 +2287,16 @@ int close_ctree(struct btrfs_root *root)
2278 (unsigned long long)fs_info->total_ref_cache_size); 2287 (unsigned long long)fs_info->total_ref_cache_size);
2279 } 2288 }
2280 2289
2281 if (fs_info->extent_root->node) 2290 free_extent_buffer(fs_info->extent_root->node);
2282 free_extent_buffer(fs_info->extent_root->node); 2291 free_extent_buffer(fs_info->extent_root->commit_root);
2283 2292 free_extent_buffer(fs_info->tree_root->node);
2284 if (fs_info->tree_root->node) 2293 free_extent_buffer(fs_info->tree_root->commit_root);
2285 free_extent_buffer(fs_info->tree_root->node); 2294 free_extent_buffer(root->fs_info->chunk_root->node);
2286 2295 free_extent_buffer(root->fs_info->chunk_root->commit_root);
2287 if (root->fs_info->chunk_root->node) 2296 free_extent_buffer(root->fs_info->dev_root->node);
2288 free_extent_buffer(root->fs_info->chunk_root->node); 2297 free_extent_buffer(root->fs_info->dev_root->commit_root);
2289 2298 free_extent_buffer(root->fs_info->csum_root->node);
2290 if (root->fs_info->dev_root->node) 2299 free_extent_buffer(root->fs_info->csum_root->commit_root);
2291 free_extent_buffer(root->fs_info->dev_root->node);
2292
2293 if (root->fs_info->csum_root->node)
2294 free_extent_buffer(root->fs_info->csum_root->node);
2295 2300
2296 btrfs_free_block_groups(root->fs_info); 2301 btrfs_free_block_groups(root->fs_info);
2297 2302
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 85315d2c90de..9596b40caa4e 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -78,7 +78,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
78 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 78 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
79 key.offset = 0; 79 key.offset = 0;
80 80
81 inode = btrfs_iget(sb, &key, root, NULL); 81 inode = btrfs_iget(sb, &key, root);
82 if (IS_ERR(inode)) 82 if (IS_ERR(inode))
83 return (void *)inode; 83 return (void *)inode;
84 84
@@ -192,7 +192,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
192 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 192 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
193 key.offset = 0; 193 key.offset = 0;
194 194
195 return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); 195 return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
196} 196}
197 197
198const struct export_operations btrfs_export_ops = { 198const struct export_operations btrfs_export_ops = {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 35af93355063..a42419c276e2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -30,43 +30,33 @@
30#include "transaction.h" 30#include "transaction.h"
31#include "volumes.h" 31#include "volumes.h"
32#include "locking.h" 32#include "locking.h"
33#include "ref-cache.h"
34#include "free-space-cache.h" 33#include "free-space-cache.h"
35 34
36#define PENDING_EXTENT_INSERT 0
37#define PENDING_EXTENT_DELETE 1
38#define PENDING_BACKREF_UPDATE 2
39
40struct pending_extent_op {
41 int type;
42 u64 bytenr;
43 u64 num_bytes;
44 u64 parent;
45 u64 orig_parent;
46 u64 generation;
47 u64 orig_generation;
48 int level;
49 struct list_head list;
50 int del;
51};
52
53static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
54 struct btrfs_root *root, u64 parent,
55 u64 root_objectid, u64 ref_generation,
56 u64 owner, struct btrfs_key *ins,
57 int ref_mod);
58static int update_reserved_extents(struct btrfs_root *root, 35static int update_reserved_extents(struct btrfs_root *root,
59 u64 bytenr, u64 num, int reserve); 36 u64 bytenr, u64 num, int reserve);
60static int update_block_group(struct btrfs_trans_handle *trans, 37static int update_block_group(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root, 38 struct btrfs_root *root,
62 u64 bytenr, u64 num_bytes, int alloc, 39 u64 bytenr, u64 num_bytes, int alloc,
63 int mark_free); 40 int mark_free);
64static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
65 struct btrfs_root *root, 42 struct btrfs_root *root,
66 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 bytenr, u64 num_bytes, u64 parent,
67 u64 root_objectid, u64 ref_generation, 44 u64 root_objectid, u64 owner_objectid,
68 u64 owner_objectid, int pin, 45 u64 owner_offset, int refs_to_drop,
69 int ref_to_drop); 46 struct btrfs_delayed_extent_op *extra_op);
47static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
48 struct extent_buffer *leaf,
49 struct btrfs_extent_item *ei);
50static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
51 struct btrfs_root *root,
52 u64 parent, u64 root_objectid,
53 u64 flags, u64 owner, u64 offset,
54 struct btrfs_key *ins, int ref_mod);
55static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
56 struct btrfs_root *root,
57 u64 parent, u64 root_objectid,
58 u64 flags, struct btrfs_disk_key *key,
59 int level, struct btrfs_key *ins);
70 60
71static int do_chunk_alloc(struct btrfs_trans_handle *trans, 61static int do_chunk_alloc(struct btrfs_trans_handle *trans,
72 struct btrfs_root *extent_root, u64 alloc_bytes, 62 struct btrfs_root *extent_root, u64 alloc_bytes,
@@ -453,196 +443,973 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
453 * maintenance. This is actually the same as #2, but with a slightly 443 * maintenance. This is actually the same as #2, but with a slightly
454 * different use case. 444 * different use case.
455 * 445 *
446 * There are two kinds of back refs. The implicit back refs is optimized
447 * for pointers in non-shared tree blocks. For a given pointer in a block,
448 * back refs of this kind provide information about the block's owner tree
449 * and the pointer's key. These information allow us to find the block by
450 * b-tree searching. The full back refs is for pointers in tree blocks not
451 * referenced by their owner trees. The location of tree block is recorded
452 * in the back refs. Actually the full back refs is generic, and can be
453 * used in all cases the implicit back refs is used. The major shortcoming
454 * of the full back refs is its overhead. Every time a tree block gets
455 * COWed, we have to update back refs entry for all pointers in it.
456 *
457 * For a newly allocated tree block, we use implicit back refs for
458 * pointers in it. This means most tree related operations only involve
459 * implicit back refs. For a tree block created in old transaction, the
460 * only way to drop a reference to it is COW it. So we can detect the
461 * event that tree block loses its owner tree's reference and do the
462 * back refs conversion.
463 *
464 * When a tree block is COW'd through a tree, there are four cases:
465 *
466 * The reference count of the block is one and the tree is the block's
467 * owner tree. Nothing to do in this case.
468 *
469 * The reference count of the block is one and the tree is not the
470 * block's owner tree. In this case, full back refs is used for pointers
471 * in the block. Remove these full back refs, add implicit back refs for
472 * every pointers in the new block.
473 *
474 * The reference count of the block is greater than one and the tree is
475 * the block's owner tree. In this case, implicit back refs is used for
476 * pointers in the block. Add full back refs for every pointers in the
477 * block, increase lower level extents' reference counts. The original
478 * implicit back refs are entailed to the new block.
479 *
480 * The reference count of the block is greater than one and the tree is
481 * not the block's owner tree. Add implicit back refs for every pointer in
482 * the new block, increase lower level extents' reference count.
483 *
484 * Back Reference Key composing:
485 *
486 * The key objectid corresponds to the first byte in the extent,
487 * The key type is used to differentiate between types of back refs.
488 * There are different meanings of the key offset for different types
489 * of back refs.
490 *
456 * File extents can be referenced by: 491 * File extents can be referenced by:
457 * 492 *
458 * - multiple snapshots, subvolumes, or different generations in one subvol 493 * - multiple snapshots, subvolumes, or different generations in one subvol
459 * - different files inside a single subvolume 494 * - different files inside a single subvolume
460 * - different offsets inside a file (bookend extents in file.c) 495 * - different offsets inside a file (bookend extents in file.c)
461 * 496 *
462 * The extent ref structure has fields for: 497 * The extent ref structure for the implicit back refs has fields for:
463 * 498 *
464 * - Objectid of the subvolume root 499 * - Objectid of the subvolume root
465 * - Generation number of the tree holding the reference
466 * - objectid of the file holding the reference 500 * - objectid of the file holding the reference
467 * - number of references holding by parent node (alway 1 for tree blocks) 501 * - original offset in the file
468 * 502 * - how many bookend extents
469 * Btree leaf may hold multiple references to a file extent. In most cases,
470 * these references are from same file and the corresponding offsets inside
471 * the file are close together.
472 *
473 * When a file extent is allocated the fields are filled in:
474 * (root_key.objectid, trans->transid, inode objectid, 1)
475 * 503 *
476 * When a leaf is cow'd new references are added for every file extent found 504 * The key offset for the implicit back refs is hash of the first
477 * in the leaf. It looks similar to the create case, but trans->transid will 505 * three fields.
478 * be different when the block is cow'd.
479 * 506 *
480 * (root_key.objectid, trans->transid, inode objectid, 507 * The extent ref structure for the full back refs has field for:
481 * number of references in the leaf)
482 * 508 *
483 * When a file extent is removed either during snapshot deletion or 509 * - number of pointers in the tree leaf
484 * file truncation, we find the corresponding back reference and check
485 * the following fields:
486 * 510 *
487 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), 511 * The key offset for the implicit back refs is the first byte of
488 * inode objectid) 512 * the tree leaf
489 * 513 *
490 * Btree extents can be referenced by: 514 * When a file extent is allocated, The implicit back refs is used.
491 * 515 * the fields are filled in:
492 * - Different subvolumes
493 * - Different generations of the same subvolume
494 *
495 * When a tree block is created, back references are inserted:
496 * 516 *
497 * (root->root_key.objectid, trans->transid, level, 1) 517 * (root_key.objectid, inode objectid, offset in file, 1)
498 * 518 *
499 * When a tree block is cow'd, new back references are added for all the 519 * When a file extent is removed file truncation, we find the
500 * blocks it points to. If the tree block isn't in reference counted root, 520 * corresponding implicit back refs and check the following fields:
501 * the old back references are removed. These new back references are of
502 * the form (trans->transid will have increased since creation):
503 * 521 *
504 * (root->root_key.objectid, trans->transid, level, 1) 522 * (btrfs_header_owner(leaf), inode objectid, offset in file)
505 * 523 *
506 * When a backref is in deleting, the following fields are checked: 524 * Btree extents can be referenced by:
507 * 525 *
508 * if backref was for a tree root: 526 * - Different subvolumes
509 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
510 * else
511 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
512 * 527 *
513 * Back Reference Key composing: 528 * Both the implicit back refs and the full back refs for tree blocks
529 * only consist of key. The key offset for the implicit back refs is
530 * objectid of block's owner tree. The key offset for the full back refs
531 * is the first byte of parent block.
514 * 532 *
515 * The key objectid corresponds to the first byte in the extent, the key 533 * When implicit back refs is used, information about the lowest key and
516 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first 534 * level of the tree block are required. These information are stored in
517 * byte of parent extent. If a extent is tree root, the key offset is set 535 * tree block info structure.
518 * to the key objectid.
519 */ 536 */
520 537
521static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans, 538#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
522 struct btrfs_root *root, 539static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
523 struct btrfs_path *path, 540 struct btrfs_root *root,
524 u64 bytenr, u64 parent, 541 struct btrfs_path *path,
525 u64 ref_root, u64 ref_generation, 542 u64 owner, u32 extra_size)
526 u64 owner_objectid, int del)
527{ 543{
544 struct btrfs_extent_item *item;
545 struct btrfs_extent_item_v0 *ei0;
546 struct btrfs_extent_ref_v0 *ref0;
547 struct btrfs_tree_block_info *bi;
548 struct extent_buffer *leaf;
528 struct btrfs_key key; 549 struct btrfs_key key;
529 struct btrfs_extent_ref *ref; 550 struct btrfs_key found_key;
551 u32 new_size = sizeof(*item);
552 u64 refs;
553 int ret;
554
555 leaf = path->nodes[0];
556 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
557
558 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
559 ei0 = btrfs_item_ptr(leaf, path->slots[0],
560 struct btrfs_extent_item_v0);
561 refs = btrfs_extent_refs_v0(leaf, ei0);
562
563 if (owner == (u64)-1) {
564 while (1) {
565 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
566 ret = btrfs_next_leaf(root, path);
567 if (ret < 0)
568 return ret;
569 BUG_ON(ret > 0);
570 leaf = path->nodes[0];
571 }
572 btrfs_item_key_to_cpu(leaf, &found_key,
573 path->slots[0]);
574 BUG_ON(key.objectid != found_key.objectid);
575 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
576 path->slots[0]++;
577 continue;
578 }
579 ref0 = btrfs_item_ptr(leaf, path->slots[0],
580 struct btrfs_extent_ref_v0);
581 owner = btrfs_ref_objectid_v0(leaf, ref0);
582 break;
583 }
584 }
585 btrfs_release_path(root, path);
586
587 if (owner < BTRFS_FIRST_FREE_OBJECTID)
588 new_size += sizeof(*bi);
589
590 new_size -= sizeof(*ei0);
591 ret = btrfs_search_slot(trans, root, &key, path,
592 new_size + extra_size, 1);
593 if (ret < 0)
594 return ret;
595 BUG_ON(ret);
596
597 ret = btrfs_extend_item(trans, root, path, new_size);
598 BUG_ON(ret);
599
600 leaf = path->nodes[0];
601 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
602 btrfs_set_extent_refs(leaf, item, refs);
603 /* FIXME: get real generation */
604 btrfs_set_extent_generation(leaf, item, 0);
605 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
606 btrfs_set_extent_flags(leaf, item,
607 BTRFS_EXTENT_FLAG_TREE_BLOCK |
608 BTRFS_BLOCK_FLAG_FULL_BACKREF);
609 bi = (struct btrfs_tree_block_info *)(item + 1);
610 /* FIXME: get first key of the block */
611 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
612 btrfs_set_tree_block_level(leaf, bi, (int)owner);
613 } else {
614 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
615 }
616 btrfs_mark_buffer_dirty(leaf);
617 return 0;
618}
619#endif
620
621static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
622{
623 u32 high_crc = ~(u32)0;
624 u32 low_crc = ~(u32)0;
625 __le64 lenum;
626
627 lenum = cpu_to_le64(root_objectid);
628 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
629 lenum = cpu_to_le64(owner);
630 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
631 lenum = cpu_to_le64(offset);
632 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
633
634 return ((u64)high_crc << 31) ^ (u64)low_crc;
635}
636
637static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
638 struct btrfs_extent_data_ref *ref)
639{
640 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
641 btrfs_extent_data_ref_objectid(leaf, ref),
642 btrfs_extent_data_ref_offset(leaf, ref));
643}
644
645static int match_extent_data_ref(struct extent_buffer *leaf,
646 struct btrfs_extent_data_ref *ref,
647 u64 root_objectid, u64 owner, u64 offset)
648{
649 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
650 btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
651 btrfs_extent_data_ref_offset(leaf, ref) != offset)
652 return 0;
653 return 1;
654}
655
656static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
657 struct btrfs_root *root,
658 struct btrfs_path *path,
659 u64 bytenr, u64 parent,
660 u64 root_objectid,
661 u64 owner, u64 offset)
662{
663 struct btrfs_key key;
664 struct btrfs_extent_data_ref *ref;
530 struct extent_buffer *leaf; 665 struct extent_buffer *leaf;
531 u64 ref_objectid; 666 u32 nritems;
532 int ret; 667 int ret;
668 int recow;
669 int err = -ENOENT;
533 670
534 key.objectid = bytenr; 671 key.objectid = bytenr;
535 key.type = BTRFS_EXTENT_REF_KEY; 672 if (parent) {
536 key.offset = parent; 673 key.type = BTRFS_SHARED_DATA_REF_KEY;
674 key.offset = parent;
675 } else {
676 key.type = BTRFS_EXTENT_DATA_REF_KEY;
677 key.offset = hash_extent_data_ref(root_objectid,
678 owner, offset);
679 }
680again:
681 recow = 0;
682 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
683 if (ret < 0) {
684 err = ret;
685 goto fail;
686 }
537 687
538 ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); 688 if (parent) {
539 if (ret < 0) 689 if (!ret)
540 goto out; 690 return 0;
541 if (ret > 0) { 691#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
542 ret = -ENOENT; 692 key.type = BTRFS_EXTENT_REF_V0_KEY;
543 goto out; 693 btrfs_release_path(root, path);
694 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
695 if (ret < 0) {
696 err = ret;
697 goto fail;
698 }
699 if (!ret)
700 return 0;
701#endif
702 goto fail;
544 } 703 }
545 704
546 leaf = path->nodes[0]; 705 leaf = path->nodes[0];
547 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 706 nritems = btrfs_header_nritems(leaf);
548 ref_objectid = btrfs_ref_objectid(leaf, ref); 707 while (1) {
549 if (btrfs_ref_root(leaf, ref) != ref_root || 708 if (path->slots[0] >= nritems) {
550 btrfs_ref_generation(leaf, ref) != ref_generation || 709 ret = btrfs_next_leaf(root, path);
551 (ref_objectid != owner_objectid && 710 if (ret < 0)
552 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { 711 err = ret;
553 ret = -EIO; 712 if (ret)
554 WARN_ON(1); 713 goto fail;
555 goto out; 714
715 leaf = path->nodes[0];
716 nritems = btrfs_header_nritems(leaf);
717 recow = 1;
718 }
719
720 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
721 if (key.objectid != bytenr ||
722 key.type != BTRFS_EXTENT_DATA_REF_KEY)
723 goto fail;
724
725 ref = btrfs_item_ptr(leaf, path->slots[0],
726 struct btrfs_extent_data_ref);
727
728 if (match_extent_data_ref(leaf, ref, root_objectid,
729 owner, offset)) {
730 if (recow) {
731 btrfs_release_path(root, path);
732 goto again;
733 }
734 err = 0;
735 break;
736 }
737 path->slots[0]++;
556 } 738 }
557 ret = 0; 739fail:
558out: 740 return err;
559 return ret;
560} 741}
561 742
562static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, 743static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
563 struct btrfs_root *root, 744 struct btrfs_root *root,
564 struct btrfs_path *path, 745 struct btrfs_path *path,
565 u64 bytenr, u64 parent, 746 u64 bytenr, u64 parent,
566 u64 ref_root, u64 ref_generation, 747 u64 root_objectid, u64 owner,
567 u64 owner_objectid, 748 u64 offset, int refs_to_add)
568 int refs_to_add)
569{ 749{
570 struct btrfs_key key; 750 struct btrfs_key key;
571 struct extent_buffer *leaf; 751 struct extent_buffer *leaf;
572 struct btrfs_extent_ref *ref; 752 u32 size;
573 u32 num_refs; 753 u32 num_refs;
574 int ret; 754 int ret;
575 755
576 key.objectid = bytenr; 756 key.objectid = bytenr;
577 key.type = BTRFS_EXTENT_REF_KEY; 757 if (parent) {
578 key.offset = parent; 758 key.type = BTRFS_SHARED_DATA_REF_KEY;
759 key.offset = parent;
760 size = sizeof(struct btrfs_shared_data_ref);
761 } else {
762 key.type = BTRFS_EXTENT_DATA_REF_KEY;
763 key.offset = hash_extent_data_ref(root_objectid,
764 owner, offset);
765 size = sizeof(struct btrfs_extent_data_ref);
766 }
579 767
580 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); 768 ret = btrfs_insert_empty_item(trans, root, path, &key, size);
581 if (ret == 0) { 769 if (ret && ret != -EEXIST)
582 leaf = path->nodes[0]; 770 goto fail;
583 ref = btrfs_item_ptr(leaf, path->slots[0], 771
584 struct btrfs_extent_ref); 772 leaf = path->nodes[0];
585 btrfs_set_ref_root(leaf, ref, ref_root); 773 if (parent) {
586 btrfs_set_ref_generation(leaf, ref, ref_generation); 774 struct btrfs_shared_data_ref *ref;
587 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
588 btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
589 } else if (ret == -EEXIST) {
590 u64 existing_owner;
591
592 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
593 leaf = path->nodes[0];
594 ref = btrfs_item_ptr(leaf, path->slots[0], 775 ref = btrfs_item_ptr(leaf, path->slots[0],
595 struct btrfs_extent_ref); 776 struct btrfs_shared_data_ref);
596 if (btrfs_ref_root(leaf, ref) != ref_root || 777 if (ret == 0) {
597 btrfs_ref_generation(leaf, ref) != ref_generation) { 778 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
598 ret = -EIO; 779 } else {
599 WARN_ON(1); 780 num_refs = btrfs_shared_data_ref_count(leaf, ref);
600 goto out; 781 num_refs += refs_to_add;
782 btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
601 } 783 }
784 } else {
785 struct btrfs_extent_data_ref *ref;
786 while (ret == -EEXIST) {
787 ref = btrfs_item_ptr(leaf, path->slots[0],
788 struct btrfs_extent_data_ref);
789 if (match_extent_data_ref(leaf, ref, root_objectid,
790 owner, offset))
791 break;
792 btrfs_release_path(root, path);
793 key.offset++;
794 ret = btrfs_insert_empty_item(trans, root, path, &key,
795 size);
796 if (ret && ret != -EEXIST)
797 goto fail;
602 798
603 num_refs = btrfs_ref_num_refs(leaf, ref); 799 leaf = path->nodes[0];
604 BUG_ON(num_refs == 0); 800 }
605 btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add); 801 ref = btrfs_item_ptr(leaf, path->slots[0],
606 802 struct btrfs_extent_data_ref);
607 existing_owner = btrfs_ref_objectid(leaf, ref); 803 if (ret == 0) {
608 if (existing_owner != owner_objectid && 804 btrfs_set_extent_data_ref_root(leaf, ref,
609 existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { 805 root_objectid);
610 btrfs_set_ref_objectid(leaf, ref, 806 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
611 BTRFS_MULTIPLE_OBJECTIDS); 807 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
808 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
809 } else {
810 num_refs = btrfs_extent_data_ref_count(leaf, ref);
811 num_refs += refs_to_add;
812 btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
612 } 813 }
613 ret = 0;
614 } else {
615 goto out;
616 } 814 }
617 btrfs_unlock_up_safe(path, 1); 815 btrfs_mark_buffer_dirty(leaf);
618 btrfs_mark_buffer_dirty(path->nodes[0]); 816 ret = 0;
619out: 817fail:
620 btrfs_release_path(root, path); 818 btrfs_release_path(root, path);
621 return ret; 819 return ret;
622} 820}
623 821
624static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, 822static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
625 struct btrfs_root *root, 823 struct btrfs_root *root,
626 struct btrfs_path *path, 824 struct btrfs_path *path,
627 int refs_to_drop) 825 int refs_to_drop)
628{ 826{
827 struct btrfs_key key;
828 struct btrfs_extent_data_ref *ref1 = NULL;
829 struct btrfs_shared_data_ref *ref2 = NULL;
629 struct extent_buffer *leaf; 830 struct extent_buffer *leaf;
630 struct btrfs_extent_ref *ref; 831 u32 num_refs = 0;
631 u32 num_refs;
632 int ret = 0; 832 int ret = 0;
633 833
634 leaf = path->nodes[0]; 834 leaf = path->nodes[0];
635 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 835 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
636 num_refs = btrfs_ref_num_refs(leaf, ref); 836
837 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
838 ref1 = btrfs_item_ptr(leaf, path->slots[0],
839 struct btrfs_extent_data_ref);
840 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
841 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
842 ref2 = btrfs_item_ptr(leaf, path->slots[0],
843 struct btrfs_shared_data_ref);
844 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
845#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
846 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
847 struct btrfs_extent_ref_v0 *ref0;
848 ref0 = btrfs_item_ptr(leaf, path->slots[0],
849 struct btrfs_extent_ref_v0);
850 num_refs = btrfs_ref_count_v0(leaf, ref0);
851#endif
852 } else {
853 BUG();
854 }
855
637 BUG_ON(num_refs < refs_to_drop); 856 BUG_ON(num_refs < refs_to_drop);
638 num_refs -= refs_to_drop; 857 num_refs -= refs_to_drop;
858
639 if (num_refs == 0) { 859 if (num_refs == 0) {
640 ret = btrfs_del_item(trans, root, path); 860 ret = btrfs_del_item(trans, root, path);
641 } else { 861 } else {
642 btrfs_set_ref_num_refs(leaf, ref, num_refs); 862 if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
863 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
864 else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
865 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
866#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
867 else {
868 struct btrfs_extent_ref_v0 *ref0;
869 ref0 = btrfs_item_ptr(leaf, path->slots[0],
870 struct btrfs_extent_ref_v0);
871 btrfs_set_ref_count_v0(leaf, ref0, num_refs);
872 }
873#endif
643 btrfs_mark_buffer_dirty(leaf); 874 btrfs_mark_buffer_dirty(leaf);
644 } 875 }
876 return ret;
877}
878
879static noinline u32 extent_data_ref_count(struct btrfs_root *root,
880 struct btrfs_path *path,
881 struct btrfs_extent_inline_ref *iref)
882{
883 struct btrfs_key key;
884 struct extent_buffer *leaf;
885 struct btrfs_extent_data_ref *ref1;
886 struct btrfs_shared_data_ref *ref2;
887 u32 num_refs = 0;
888
889 leaf = path->nodes[0];
890 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
891 if (iref) {
892 if (btrfs_extent_inline_ref_type(leaf, iref) ==
893 BTRFS_EXTENT_DATA_REF_KEY) {
894 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
895 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
896 } else {
897 ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
898 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
899 }
900 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
901 ref1 = btrfs_item_ptr(leaf, path->slots[0],
902 struct btrfs_extent_data_ref);
903 num_refs = btrfs_extent_data_ref_count(leaf, ref1);
904 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
905 ref2 = btrfs_item_ptr(leaf, path->slots[0],
906 struct btrfs_shared_data_ref);
907 num_refs = btrfs_shared_data_ref_count(leaf, ref2);
908#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
909 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
910 struct btrfs_extent_ref_v0 *ref0;
911 ref0 = btrfs_item_ptr(leaf, path->slots[0],
912 struct btrfs_extent_ref_v0);
913 num_refs = btrfs_ref_count_v0(leaf, ref0);
914#endif
915 } else {
916 WARN_ON(1);
917 }
918 return num_refs;
919}
920
921static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
922 struct btrfs_root *root,
923 struct btrfs_path *path,
924 u64 bytenr, u64 parent,
925 u64 root_objectid)
926{
927 struct btrfs_key key;
928 int ret;
929
930 key.objectid = bytenr;
931 if (parent) {
932 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
933 key.offset = parent;
934 } else {
935 key.type = BTRFS_TREE_BLOCK_REF_KEY;
936 key.offset = root_objectid;
937 }
938
939 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
940 if (ret > 0)
941 ret = -ENOENT;
942#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
943 if (ret == -ENOENT && parent) {
944 btrfs_release_path(root, path);
945 key.type = BTRFS_EXTENT_REF_V0_KEY;
946 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
947 if (ret > 0)
948 ret = -ENOENT;
949 }
950#endif
951 return ret;
952}
953
954static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
955 struct btrfs_root *root,
956 struct btrfs_path *path,
957 u64 bytenr, u64 parent,
958 u64 root_objectid)
959{
960 struct btrfs_key key;
961 int ret;
962
963 key.objectid = bytenr;
964 if (parent) {
965 key.type = BTRFS_SHARED_BLOCK_REF_KEY;
966 key.offset = parent;
967 } else {
968 key.type = BTRFS_TREE_BLOCK_REF_KEY;
969 key.offset = root_objectid;
970 }
971
972 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
973 btrfs_release_path(root, path);
974 return ret;
975}
976
977static inline int extent_ref_type(u64 parent, u64 owner)
978{
979 int type;
980 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
981 if (parent > 0)
982 type = BTRFS_SHARED_BLOCK_REF_KEY;
983 else
984 type = BTRFS_TREE_BLOCK_REF_KEY;
985 } else {
986 if (parent > 0)
987 type = BTRFS_SHARED_DATA_REF_KEY;
988 else
989 type = BTRFS_EXTENT_DATA_REF_KEY;
990 }
991 return type;
992}
993
994static int find_next_key(struct btrfs_path *path, struct btrfs_key *key)
995
996{
997 int level;
998 BUG_ON(!path->keep_locks);
999 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
1000 if (!path->nodes[level])
1001 break;
1002 btrfs_assert_tree_locked(path->nodes[level]);
1003 if (path->slots[level] + 1 >=
1004 btrfs_header_nritems(path->nodes[level]))
1005 continue;
1006 if (level == 0)
1007 btrfs_item_key_to_cpu(path->nodes[level], key,
1008 path->slots[level] + 1);
1009 else
1010 btrfs_node_key_to_cpu(path->nodes[level], key,
1011 path->slots[level] + 1);
1012 return 0;
1013 }
1014 return 1;
1015}
1016
1017/*
1018 * look for inline back ref. if back ref is found, *ref_ret is set
1019 * to the address of inline back ref, and 0 is returned.
1020 *
1021 * if back ref isn't found, *ref_ret is set to the address where it
1022 * should be inserted, and -ENOENT is returned.
1023 *
1024 * if insert is true and there are too many inline back refs, the path
1025 * points to the extent item, and -EAGAIN is returned.
1026 *
1027 * NOTE: inline back refs are ordered in the same way that back ref
1028 * items in the tree are ordered.
1029 */
1030static noinline_for_stack
1031int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1032 struct btrfs_root *root,
1033 struct btrfs_path *path,
1034 struct btrfs_extent_inline_ref **ref_ret,
1035 u64 bytenr, u64 num_bytes,
1036 u64 parent, u64 root_objectid,
1037 u64 owner, u64 offset, int insert)
1038{
1039 struct btrfs_key key;
1040 struct extent_buffer *leaf;
1041 struct btrfs_extent_item *ei;
1042 struct btrfs_extent_inline_ref *iref;
1043 u64 flags;
1044 u64 item_size;
1045 unsigned long ptr;
1046 unsigned long end;
1047 int extra_size;
1048 int type;
1049 int want;
1050 int ret;
1051 int err = 0;
1052
1053 key.objectid = bytenr;
1054 key.type = BTRFS_EXTENT_ITEM_KEY;
1055 key.offset = num_bytes;
1056
1057 want = extent_ref_type(parent, owner);
1058 if (insert) {
1059 extra_size = btrfs_extent_inline_ref_size(want);
1060 if (owner >= BTRFS_FIRST_FREE_OBJECTID)
1061 path->keep_locks = 1;
1062 } else
1063 extra_size = -1;
1064 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1065 if (ret < 0) {
1066 err = ret;
1067 goto out;
1068 }
1069 BUG_ON(ret);
1070
1071 leaf = path->nodes[0];
1072 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1073#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1074 if (item_size < sizeof(*ei)) {
1075 if (!insert) {
1076 err = -ENOENT;
1077 goto out;
1078 }
1079 ret = convert_extent_item_v0(trans, root, path, owner,
1080 extra_size);
1081 if (ret < 0) {
1082 err = ret;
1083 goto out;
1084 }
1085 leaf = path->nodes[0];
1086 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1087 }
1088#endif
1089 BUG_ON(item_size < sizeof(*ei));
1090
1091 if (owner < BTRFS_FIRST_FREE_OBJECTID && insert &&
1092 item_size + extra_size >= BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1093 err = -EAGAIN;
1094 goto out;
1095 }
1096
1097 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1098 flags = btrfs_extent_flags(leaf, ei);
1099
1100 ptr = (unsigned long)(ei + 1);
1101 end = (unsigned long)ei + item_size;
1102
1103 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1104 ptr += sizeof(struct btrfs_tree_block_info);
1105 BUG_ON(ptr > end);
1106 } else {
1107 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1108 }
1109
1110 err = -ENOENT;
1111 while (1) {
1112 if (ptr >= end) {
1113 WARN_ON(ptr > end);
1114 break;
1115 }
1116 iref = (struct btrfs_extent_inline_ref *)ptr;
1117 type = btrfs_extent_inline_ref_type(leaf, iref);
1118 if (want < type)
1119 break;
1120 if (want > type) {
1121 ptr += btrfs_extent_inline_ref_size(type);
1122 continue;
1123 }
1124
1125 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1126 struct btrfs_extent_data_ref *dref;
1127 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1128 if (match_extent_data_ref(leaf, dref, root_objectid,
1129 owner, offset)) {
1130 err = 0;
1131 break;
1132 }
1133 if (hash_extent_data_ref_item(leaf, dref) <
1134 hash_extent_data_ref(root_objectid, owner, offset))
1135 break;
1136 } else {
1137 u64 ref_offset;
1138 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1139 if (parent > 0) {
1140 if (parent == ref_offset) {
1141 err = 0;
1142 break;
1143 }
1144 if (ref_offset < parent)
1145 break;
1146 } else {
1147 if (root_objectid == ref_offset) {
1148 err = 0;
1149 break;
1150 }
1151 if (ref_offset < root_objectid)
1152 break;
1153 }
1154 }
1155 ptr += btrfs_extent_inline_ref_size(type);
1156 }
1157 if (err == -ENOENT && insert) {
1158 if (item_size + extra_size >=
1159 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1160 err = -EAGAIN;
1161 goto out;
1162 }
1163 /*
1164 * To add new inline back ref, we have to make sure
1165 * there is no corresponding back ref item.
1166 * For simplicity, we just do not add new inline back
1167 * ref if there is any kind of item for this block
1168 */
1169 if (owner >= BTRFS_FIRST_FREE_OBJECTID &&
1170 find_next_key(path, &key) == 0 && key.objectid == bytenr) {
1171 err = -EAGAIN;
1172 goto out;
1173 }
1174 }
1175 *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1176out:
1177 if (insert && owner >= BTRFS_FIRST_FREE_OBJECTID) {
1178 path->keep_locks = 0;
1179 btrfs_unlock_up_safe(path, 1);
1180 }
1181 return err;
1182}
1183
1184/*
1185 * helper to add new inline back ref
1186 */
1187static noinline_for_stack
1188int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1189 struct btrfs_root *root,
1190 struct btrfs_path *path,
1191 struct btrfs_extent_inline_ref *iref,
1192 u64 parent, u64 root_objectid,
1193 u64 owner, u64 offset, int refs_to_add,
1194 struct btrfs_delayed_extent_op *extent_op)
1195{
1196 struct extent_buffer *leaf;
1197 struct btrfs_extent_item *ei;
1198 unsigned long ptr;
1199 unsigned long end;
1200 unsigned long item_offset;
1201 u64 refs;
1202 int size;
1203 int type;
1204 int ret;
1205
1206 leaf = path->nodes[0];
1207 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1208 item_offset = (unsigned long)iref - (unsigned long)ei;
1209
1210 type = extent_ref_type(parent, owner);
1211 size = btrfs_extent_inline_ref_size(type);
1212
1213 ret = btrfs_extend_item(trans, root, path, size);
1214 BUG_ON(ret);
1215
1216 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1217 refs = btrfs_extent_refs(leaf, ei);
1218 refs += refs_to_add;
1219 btrfs_set_extent_refs(leaf, ei, refs);
1220 if (extent_op)
1221 __run_delayed_extent_op(extent_op, leaf, ei);
1222
1223 ptr = (unsigned long)ei + item_offset;
1224 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1225 if (ptr < end - size)
1226 memmove_extent_buffer(leaf, ptr + size, ptr,
1227 end - size - ptr);
1228
1229 iref = (struct btrfs_extent_inline_ref *)ptr;
1230 btrfs_set_extent_inline_ref_type(leaf, iref, type);
1231 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1232 struct btrfs_extent_data_ref *dref;
1233 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1234 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1235 btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1236 btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1237 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1238 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1239 struct btrfs_shared_data_ref *sref;
1240 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1241 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1242 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1243 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1244 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1245 } else {
1246 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1247 }
1248 btrfs_mark_buffer_dirty(leaf);
1249 return 0;
1250}
1251
1252static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1253 struct btrfs_root *root,
1254 struct btrfs_path *path,
1255 struct btrfs_extent_inline_ref **ref_ret,
1256 u64 bytenr, u64 num_bytes, u64 parent,
1257 u64 root_objectid, u64 owner, u64 offset)
1258{
1259 int ret;
1260
1261 ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1262 bytenr, num_bytes, parent,
1263 root_objectid, owner, offset, 0);
1264 if (ret != -ENOENT)
1265 return ret;
1266
645 btrfs_release_path(root, path); 1267 btrfs_release_path(root, path);
1268 *ref_ret = NULL;
1269
1270 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1271 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1272 root_objectid);
1273 } else {
1274 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1275 root_objectid, owner, offset);
1276 }
1277 return ret;
1278}
1279
1280/*
1281 * helper to update/remove inline back ref
1282 */
1283static noinline_for_stack
1284int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1285 struct btrfs_root *root,
1286 struct btrfs_path *path,
1287 struct btrfs_extent_inline_ref *iref,
1288 int refs_to_mod,
1289 struct btrfs_delayed_extent_op *extent_op)
1290{
1291 struct extent_buffer *leaf;
1292 struct btrfs_extent_item *ei;
1293 struct btrfs_extent_data_ref *dref = NULL;
1294 struct btrfs_shared_data_ref *sref = NULL;
1295 unsigned long ptr;
1296 unsigned long end;
1297 u32 item_size;
1298 int size;
1299 int type;
1300 int ret;
1301 u64 refs;
1302
1303 leaf = path->nodes[0];
1304 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1305 refs = btrfs_extent_refs(leaf, ei);
1306 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1307 refs += refs_to_mod;
1308 btrfs_set_extent_refs(leaf, ei, refs);
1309 if (extent_op)
1310 __run_delayed_extent_op(extent_op, leaf, ei);
1311
1312 type = btrfs_extent_inline_ref_type(leaf, iref);
1313
1314 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1315 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1316 refs = btrfs_extent_data_ref_count(leaf, dref);
1317 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1318 sref = (struct btrfs_shared_data_ref *)(iref + 1);
1319 refs = btrfs_shared_data_ref_count(leaf, sref);
1320 } else {
1321 refs = 1;
1322 BUG_ON(refs_to_mod != -1);
1323 }
1324
1325 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1326 refs += refs_to_mod;
1327
1328 if (refs > 0) {
1329 if (type == BTRFS_EXTENT_DATA_REF_KEY)
1330 btrfs_set_extent_data_ref_count(leaf, dref, refs);
1331 else
1332 btrfs_set_shared_data_ref_count(leaf, sref, refs);
1333 } else {
1334 size = btrfs_extent_inline_ref_size(type);
1335 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1336 ptr = (unsigned long)iref;
1337 end = (unsigned long)ei + item_size;
1338 if (ptr + size < end)
1339 memmove_extent_buffer(leaf, ptr, ptr + size,
1340 end - ptr - size);
1341 item_size -= size;
1342 ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1343 BUG_ON(ret);
1344 }
1345 btrfs_mark_buffer_dirty(leaf);
1346 return 0;
1347}
1348
1349static noinline_for_stack
1350int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1351 struct btrfs_root *root,
1352 struct btrfs_path *path,
1353 u64 bytenr, u64 num_bytes, u64 parent,
1354 u64 root_objectid, u64 owner,
1355 u64 offset, int refs_to_add,
1356 struct btrfs_delayed_extent_op *extent_op)
1357{
1358 struct btrfs_extent_inline_ref *iref;
1359 int ret;
1360
1361 ret = lookup_inline_extent_backref(trans, root, path, &iref,
1362 bytenr, num_bytes, parent,
1363 root_objectid, owner, offset, 1);
1364 if (ret == 0) {
1365 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1366 ret = update_inline_extent_backref(trans, root, path, iref,
1367 refs_to_add, extent_op);
1368 } else if (ret == -ENOENT) {
1369 ret = setup_inline_extent_backref(trans, root, path, iref,
1370 parent, root_objectid,
1371 owner, offset, refs_to_add,
1372 extent_op);
1373 }
1374 return ret;
1375}
1376
1377static int insert_extent_backref(struct btrfs_trans_handle *trans,
1378 struct btrfs_root *root,
1379 struct btrfs_path *path,
1380 u64 bytenr, u64 parent, u64 root_objectid,
1381 u64 owner, u64 offset, int refs_to_add)
1382{
1383 int ret;
1384 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1385 BUG_ON(refs_to_add != 1);
1386 ret = insert_tree_block_ref(trans, root, path, bytenr,
1387 parent, root_objectid);
1388 } else {
1389 ret = insert_extent_data_ref(trans, root, path, bytenr,
1390 parent, root_objectid,
1391 owner, offset, refs_to_add);
1392 }
1393 return ret;
1394}
1395
1396static int remove_extent_backref(struct btrfs_trans_handle *trans,
1397 struct btrfs_root *root,
1398 struct btrfs_path *path,
1399 struct btrfs_extent_inline_ref *iref,
1400 int refs_to_drop, int is_data)
1401{
1402 int ret;
1403
1404 BUG_ON(!is_data && refs_to_drop != 1);
1405 if (iref) {
1406 ret = update_inline_extent_backref(trans, root, path, iref,
1407 -refs_to_drop, NULL);
1408 } else if (is_data) {
1409 ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1410 } else {
1411 ret = btrfs_del_item(trans, root, path);
1412 }
646 return ret; 1413 return ret;
647} 1414}
648 1415
@@ -686,71 +1453,40 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
686#endif 1453#endif
687} 1454}
688 1455
689static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1456int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
690 struct btrfs_root *root, u64 bytenr, 1457 struct btrfs_root *root,
691 u64 num_bytes, 1458 u64 bytenr, u64 num_bytes, u64 parent,
692 u64 orig_parent, u64 parent, 1459 u64 root_objectid, u64 owner, u64 offset)
693 u64 orig_root, u64 ref_root,
694 u64 orig_generation, u64 ref_generation,
695 u64 owner_objectid)
696{ 1460{
697 int ret; 1461 int ret;
698 int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID; 1462 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1463 root_objectid == BTRFS_TREE_LOG_OBJECTID);
699 1464
700 ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes, 1465 if (owner < BTRFS_FIRST_FREE_OBJECTID) {
701 orig_parent, parent, orig_root, 1466 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
702 ref_root, orig_generation, 1467 parent, root_objectid, (int)owner,
703 ref_generation, owner_objectid, pin); 1468 BTRFS_ADD_DELAYED_REF, NULL);
704 BUG_ON(ret); 1469 } else {
1470 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
1471 parent, root_objectid, owner, offset,
1472 BTRFS_ADD_DELAYED_REF, NULL);
1473 }
705 return ret; 1474 return ret;
706} 1475}
707 1476
708int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
709 struct btrfs_root *root, u64 bytenr,
710 u64 num_bytes, u64 orig_parent, u64 parent,
711 u64 ref_root, u64 ref_generation,
712 u64 owner_objectid)
713{
714 int ret;
715 if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
716 owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
717 return 0;
718
719 ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
720 orig_parent, parent, ref_root,
721 ref_root, ref_generation,
722 ref_generation, owner_objectid);
723 return ret;
724}
725static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1477static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
726 struct btrfs_root *root, u64 bytenr, 1478 struct btrfs_root *root,
727 u64 num_bytes, 1479 u64 bytenr, u64 num_bytes,
728 u64 orig_parent, u64 parent, 1480 u64 parent, u64 root_objectid,
729 u64 orig_root, u64 ref_root, 1481 u64 owner, u64 offset, int refs_to_add,
730 u64 orig_generation, u64 ref_generation, 1482 struct btrfs_delayed_extent_op *extent_op)
731 u64 owner_objectid)
732{
733 int ret;
734
735 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
736 ref_generation, owner_objectid,
737 BTRFS_ADD_DELAYED_REF, 0);
738 BUG_ON(ret);
739 return ret;
740}
741
742static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
743 struct btrfs_root *root, u64 bytenr,
744 u64 num_bytes, u64 parent, u64 ref_root,
745 u64 ref_generation, u64 owner_objectid,
746 int refs_to_add)
747{ 1483{
748 struct btrfs_path *path; 1484 struct btrfs_path *path;
749 int ret; 1485 struct extent_buffer *leaf;
750 struct btrfs_key key;
751 struct extent_buffer *l;
752 struct btrfs_extent_item *item; 1486 struct btrfs_extent_item *item;
753 u32 refs; 1487 u64 refs;
1488 int ret;
1489 int err = 0;
754 1490
755 path = btrfs_alloc_path(); 1491 path = btrfs_alloc_path();
756 if (!path) 1492 if (!path)
@@ -758,43 +1494,27 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
758 1494
759 path->reada = 1; 1495 path->reada = 1;
760 path->leave_spinning = 1; 1496 path->leave_spinning = 1;
761 key.objectid = bytenr; 1497 /* this will setup the path even if it fails to insert the back ref */
762 key.type = BTRFS_EXTENT_ITEM_KEY; 1498 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
763 key.offset = num_bytes; 1499 path, bytenr, num_bytes, parent,
764 1500 root_objectid, owner, offset,
765 /* first find the extent item and update its reference count */ 1501 refs_to_add, extent_op);
766 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1502 if (ret == 0)
767 path, 0, 1); 1503 goto out;
768 if (ret < 0) {
769 btrfs_set_path_blocking(path);
770 return ret;
771 }
772
773 if (ret > 0) {
774 WARN_ON(1);
775 btrfs_free_path(path);
776 return -EIO;
777 }
778 l = path->nodes[0];
779 1504
780 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 1505 if (ret != -EAGAIN) {
781 if (key.objectid != bytenr) { 1506 err = ret;
782 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); 1507 goto out;
783 printk(KERN_ERR "btrfs wanted %llu found %llu\n",
784 (unsigned long long)bytenr,
785 (unsigned long long)key.objectid);
786 BUG();
787 } 1508 }
788 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
789 1509
790 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 1510 leaf = path->nodes[0];
791 1511 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
792 refs = btrfs_extent_refs(l, item); 1512 refs = btrfs_extent_refs(leaf, item);
793 btrfs_set_extent_refs(l, item, refs + refs_to_add); 1513 btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
794 btrfs_unlock_up_safe(path, 1); 1514 if (extent_op)
795 1515 __run_delayed_extent_op(extent_op, leaf, item);
796 btrfs_mark_buffer_dirty(path->nodes[0]);
797 1516
1517 btrfs_mark_buffer_dirty(leaf);
798 btrfs_release_path(root->fs_info->extent_root, path); 1518 btrfs_release_path(root->fs_info->extent_root, path);
799 1519
800 path->reada = 1; 1520 path->reada = 1;
@@ -802,56 +1522,197 @@ static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
802 1522
803 /* now insert the actual backref */ 1523 /* now insert the actual backref */
804 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1524 ret = insert_extent_backref(trans, root->fs_info->extent_root,
805 path, bytenr, parent, 1525 path, bytenr, parent, root_objectid,
806 ref_root, ref_generation, 1526 owner, offset, refs_to_add);
807 owner_objectid, refs_to_add);
808 BUG_ON(ret); 1527 BUG_ON(ret);
1528out:
809 btrfs_free_path(path); 1529 btrfs_free_path(path);
810 return 0; 1530 return err;
811} 1531}
812 1532
813int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1533static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
814 struct btrfs_root *root, 1534 struct btrfs_root *root,
815 u64 bytenr, u64 num_bytes, u64 parent, 1535 struct btrfs_delayed_ref_node *node,
816 u64 ref_root, u64 ref_generation, 1536 struct btrfs_delayed_extent_op *extent_op,
817 u64 owner_objectid) 1537 int insert_reserved)
818{ 1538{
819 int ret; 1539 int ret = 0;
820 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 1540 struct btrfs_delayed_data_ref *ref;
821 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 1541 struct btrfs_key ins;
822 return 0; 1542 u64 parent = 0;
1543 u64 ref_root = 0;
1544 u64 flags = 0;
1545
1546 ins.objectid = node->bytenr;
1547 ins.offset = node->num_bytes;
1548 ins.type = BTRFS_EXTENT_ITEM_KEY;
1549
1550 ref = btrfs_delayed_node_to_data_ref(node);
1551 if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1552 parent = ref->parent;
1553 else
1554 ref_root = ref->root;
823 1555
824 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent, 1556 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
825 0, ref_root, 0, ref_generation, 1557 if (extent_op) {
826 owner_objectid); 1558 BUG_ON(extent_op->update_key);
1559 flags |= extent_op->flags_to_set;
1560 }
1561 ret = alloc_reserved_file_extent(trans, root,
1562 parent, ref_root, flags,
1563 ref->objectid, ref->offset,
1564 &ins, node->ref_mod);
1565 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1566 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1567 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1568 node->num_bytes, parent,
1569 ref_root, ref->objectid,
1570 ref->offset, node->ref_mod,
1571 extent_op);
1572 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1573 ret = __btrfs_free_extent(trans, root, node->bytenr,
1574 node->num_bytes, parent,
1575 ref_root, ref->objectid,
1576 ref->offset, node->ref_mod,
1577 extent_op);
1578 } else {
1579 BUG();
1580 }
827 return ret; 1581 return ret;
828} 1582}
829 1583
830static int drop_delayed_ref(struct btrfs_trans_handle *trans, 1584static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
831 struct btrfs_root *root, 1585 struct extent_buffer *leaf,
832 struct btrfs_delayed_ref_node *node) 1586 struct btrfs_extent_item *ei)
1587{
1588 u64 flags = btrfs_extent_flags(leaf, ei);
1589 if (extent_op->update_flags) {
1590 flags |= extent_op->flags_to_set;
1591 btrfs_set_extent_flags(leaf, ei, flags);
1592 }
1593
1594 if (extent_op->update_key) {
1595 struct btrfs_tree_block_info *bi;
1596 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1597 bi = (struct btrfs_tree_block_info *)(ei + 1);
1598 btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1599 }
1600}
1601
1602static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1603 struct btrfs_root *root,
1604 struct btrfs_delayed_ref_node *node,
1605 struct btrfs_delayed_extent_op *extent_op)
1606{
1607 struct btrfs_key key;
1608 struct btrfs_path *path;
1609 struct btrfs_extent_item *ei;
1610 struct extent_buffer *leaf;
1611 u32 item_size;
1612 int ret;
1613 int err = 0;
1614
1615 path = btrfs_alloc_path();
1616 if (!path)
1617 return -ENOMEM;
1618
1619 key.objectid = node->bytenr;
1620 key.type = BTRFS_EXTENT_ITEM_KEY;
1621 key.offset = node->num_bytes;
1622
1623 path->reada = 1;
1624 path->leave_spinning = 1;
1625 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
1626 path, 0, 1);
1627 if (ret < 0) {
1628 err = ret;
1629 goto out;
1630 }
1631 if (ret > 0) {
1632 err = -EIO;
1633 goto out;
1634 }
1635
1636 leaf = path->nodes[0];
1637 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1638#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1639 if (item_size < sizeof(*ei)) {
1640 ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1641 path, (u64)-1, 0);
1642 if (ret < 0) {
1643 err = ret;
1644 goto out;
1645 }
1646 leaf = path->nodes[0];
1647 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1648 }
1649#endif
1650 BUG_ON(item_size < sizeof(*ei));
1651 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1652 __run_delayed_extent_op(extent_op, leaf, ei);
1653
1654 btrfs_mark_buffer_dirty(leaf);
1655out:
1656 btrfs_free_path(path);
1657 return err;
1658}
1659
1660static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
1661 struct btrfs_root *root,
1662 struct btrfs_delayed_ref_node *node,
1663 struct btrfs_delayed_extent_op *extent_op,
1664 int insert_reserved)
833{ 1665{
834 int ret = 0; 1666 int ret = 0;
835 struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node); 1667 struct btrfs_delayed_tree_ref *ref;
1668 struct btrfs_key ins;
1669 u64 parent = 0;
1670 u64 ref_root = 0;
836 1671
837 BUG_ON(node->ref_mod == 0); 1672 ins.objectid = node->bytenr;
838 ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, 1673 ins.offset = node->num_bytes;
839 node->parent, ref->root, ref->generation, 1674 ins.type = BTRFS_EXTENT_ITEM_KEY;
840 ref->owner_objectid, ref->pin, node->ref_mod);
841 1675
1676 ref = btrfs_delayed_node_to_tree_ref(node);
1677 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1678 parent = ref->parent;
1679 else
1680 ref_root = ref->root;
1681
1682 BUG_ON(node->ref_mod != 1);
1683 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1684 BUG_ON(!extent_op || !extent_op->update_flags ||
1685 !extent_op->update_key);
1686 ret = alloc_reserved_tree_block(trans, root,
1687 parent, ref_root,
1688 extent_op->flags_to_set,
1689 &extent_op->key,
1690 ref->level, &ins);
1691 update_reserved_extents(root, ins.objectid, ins.offset, 0);
1692 } else if (node->action == BTRFS_ADD_DELAYED_REF) {
1693 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1694 node->num_bytes, parent, ref_root,
1695 ref->level, 0, 1, extent_op);
1696 } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1697 ret = __btrfs_free_extent(trans, root, node->bytenr,
1698 node->num_bytes, parent, ref_root,
1699 ref->level, 0, 1, extent_op);
1700 } else {
1701 BUG();
1702 }
842 return ret; 1703 return ret;
843} 1704}
844 1705
1706
845/* helper function to actually process a single delayed ref entry */ 1707/* helper function to actually process a single delayed ref entry */
846static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1708static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
847 struct btrfs_root *root, 1709 struct btrfs_root *root,
848 struct btrfs_delayed_ref_node *node, 1710 struct btrfs_delayed_ref_node *node,
849 int insert_reserved) 1711 struct btrfs_delayed_extent_op *extent_op,
1712 int insert_reserved)
850{ 1713{
851 int ret; 1714 int ret;
852 struct btrfs_delayed_ref *ref; 1715 if (btrfs_delayed_ref_is_head(node)) {
853
854 if (node->parent == (u64)-1) {
855 struct btrfs_delayed_ref_head *head; 1716 struct btrfs_delayed_ref_head *head;
856 /* 1717 /*
857 * we've hit the end of the chain and we were supposed 1718 * we've hit the end of the chain and we were supposed
@@ -859,44 +1720,35 @@ static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
859 * deleted before we ever needed to insert it, so all 1720 * deleted before we ever needed to insert it, so all
860 * we have to do is clean up the accounting 1721 * we have to do is clean up the accounting
861 */ 1722 */
1723 BUG_ON(extent_op);
1724 head = btrfs_delayed_node_to_head(node);
862 if (insert_reserved) { 1725 if (insert_reserved) {
1726 if (head->is_data) {
1727 ret = btrfs_del_csums(trans, root,
1728 node->bytenr,
1729 node->num_bytes);
1730 BUG_ON(ret);
1731 }
1732 btrfs_update_pinned_extents(root, node->bytenr,
1733 node->num_bytes, 1);
863 update_reserved_extents(root, node->bytenr, 1734 update_reserved_extents(root, node->bytenr,
864 node->num_bytes, 0); 1735 node->num_bytes, 0);
865 } 1736 }
866 head = btrfs_delayed_node_to_head(node);
867 mutex_unlock(&head->mutex); 1737 mutex_unlock(&head->mutex);
868 return 0; 1738 return 0;
869 } 1739 }
870 1740
871 ref = btrfs_delayed_node_to_ref(node); 1741 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
872 if (ref->action == BTRFS_ADD_DELAYED_REF) { 1742 node->type == BTRFS_SHARED_BLOCK_REF_KEY)
873 if (insert_reserved) { 1743 ret = run_delayed_tree_ref(trans, root, node, extent_op,
874 struct btrfs_key ins; 1744 insert_reserved);
875 1745 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
876 ins.objectid = node->bytenr; 1746 node->type == BTRFS_SHARED_DATA_REF_KEY)
877 ins.offset = node->num_bytes; 1747 ret = run_delayed_data_ref(trans, root, node, extent_op,
878 ins.type = BTRFS_EXTENT_ITEM_KEY; 1748 insert_reserved);
879 1749 else
880 /* record the full extent allocation */ 1750 BUG();
881 ret = __btrfs_alloc_reserved_extent(trans, root, 1751 return ret;
882 node->parent, ref->root,
883 ref->generation, ref->owner_objectid,
884 &ins, node->ref_mod);
885 update_reserved_extents(root, node->bytenr,
886 node->num_bytes, 0);
887 } else {
888 /* just add one backref */
889 ret = add_extent_ref(trans, root, node->bytenr,
890 node->num_bytes,
891 node->parent, ref->root, ref->generation,
892 ref->owner_objectid, node->ref_mod);
893 }
894 BUG_ON(ret);
895 } else if (ref->action == BTRFS_DROP_DELAYED_REF) {
896 WARN_ON(insert_reserved);
897 ret = drop_delayed_ref(trans, root, node);
898 }
899 return 0;
900} 1752}
901 1753
902static noinline struct btrfs_delayed_ref_node * 1754static noinline struct btrfs_delayed_ref_node *
@@ -919,7 +1771,7 @@ again:
919 rb_node); 1771 rb_node);
920 if (ref->bytenr != head->node.bytenr) 1772 if (ref->bytenr != head->node.bytenr)
921 break; 1773 break;
922 if (btrfs_delayed_node_to_ref(ref)->action == action) 1774 if (ref->action == action)
923 return ref; 1775 return ref;
924 node = rb_prev(node); 1776 node = rb_prev(node);
925 } 1777 }
@@ -937,6 +1789,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
937 struct btrfs_delayed_ref_root *delayed_refs; 1789 struct btrfs_delayed_ref_root *delayed_refs;
938 struct btrfs_delayed_ref_node *ref; 1790 struct btrfs_delayed_ref_node *ref;
939 struct btrfs_delayed_ref_head *locked_ref = NULL; 1791 struct btrfs_delayed_ref_head *locked_ref = NULL;
1792 struct btrfs_delayed_extent_op *extent_op;
940 int ret; 1793 int ret;
941 int count = 0; 1794 int count = 0;
942 int must_insert_reserved = 0; 1795 int must_insert_reserved = 0;
@@ -975,6 +1828,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
975 must_insert_reserved = locked_ref->must_insert_reserved; 1828 must_insert_reserved = locked_ref->must_insert_reserved;
976 locked_ref->must_insert_reserved = 0; 1829 locked_ref->must_insert_reserved = 0;
977 1830
1831 extent_op = locked_ref->extent_op;
1832 locked_ref->extent_op = NULL;
1833
978 /* 1834 /*
979 * locked_ref is the head node, so we have to go one 1835 * locked_ref is the head node, so we have to go one
980 * node back for any delayed ref updates 1836 * node back for any delayed ref updates
@@ -986,6 +1842,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
986 * so that any accounting fixes can happen 1842 * so that any accounting fixes can happen
987 */ 1843 */
988 ref = &locked_ref->node; 1844 ref = &locked_ref->node;
1845
1846 if (extent_op && must_insert_reserved) {
1847 kfree(extent_op);
1848 extent_op = NULL;
1849 }
1850
1851 if (extent_op) {
1852 spin_unlock(&delayed_refs->lock);
1853
1854 ret = run_delayed_extent_op(trans, root,
1855 ref, extent_op);
1856 BUG_ON(ret);
1857 kfree(extent_op);
1858
1859 cond_resched();
1860 spin_lock(&delayed_refs->lock);
1861 continue;
1862 }
1863
989 list_del_init(&locked_ref->cluster); 1864 list_del_init(&locked_ref->cluster);
990 locked_ref = NULL; 1865 locked_ref = NULL;
991 } 1866 }
@@ -993,14 +1868,17 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
993 ref->in_tree = 0; 1868 ref->in_tree = 0;
994 rb_erase(&ref->rb_node, &delayed_refs->root); 1869 rb_erase(&ref->rb_node, &delayed_refs->root);
995 delayed_refs->num_entries--; 1870 delayed_refs->num_entries--;
1871
996 spin_unlock(&delayed_refs->lock); 1872 spin_unlock(&delayed_refs->lock);
997 1873
998 ret = run_one_delayed_ref(trans, root, ref, 1874 ret = run_one_delayed_ref(trans, root, ref, extent_op,
999 must_insert_reserved); 1875 must_insert_reserved);
1000 BUG_ON(ret); 1876 BUG_ON(ret);
1001 btrfs_put_delayed_ref(ref);
1002 1877
1878 btrfs_put_delayed_ref(ref);
1879 kfree(extent_op);
1003 count++; 1880 count++;
1881
1004 cond_resched(); 1882 cond_resched();
1005 spin_lock(&delayed_refs->lock); 1883 spin_lock(&delayed_refs->lock);
1006 } 1884 }
@@ -1095,25 +1973,112 @@ out:
1095 return 0; 1973 return 0;
1096} 1974}
1097 1975
1098int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1976int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
1099 struct btrfs_root *root, u64 objectid, u64 bytenr) 1977 struct btrfs_root *root,
1978 u64 bytenr, u64 num_bytes, u64 flags,
1979 int is_data)
1980{
1981 struct btrfs_delayed_extent_op *extent_op;
1982 int ret;
1983
1984 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
1985 if (!extent_op)
1986 return -ENOMEM;
1987
1988 extent_op->flags_to_set = flags;
1989 extent_op->update_flags = 1;
1990 extent_op->update_key = 0;
1991 extent_op->is_data = is_data ? 1 : 0;
1992
1993 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
1994 if (ret)
1995 kfree(extent_op);
1996 return ret;
1997}
1998
1999static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2000 struct btrfs_root *root,
2001 struct btrfs_path *path,
2002 u64 objectid, u64 offset, u64 bytenr)
2003{
2004 struct btrfs_delayed_ref_head *head;
2005 struct btrfs_delayed_ref_node *ref;
2006 struct btrfs_delayed_data_ref *data_ref;
2007 struct btrfs_delayed_ref_root *delayed_refs;
2008 struct rb_node *node;
2009 int ret = 0;
2010
2011 ret = -ENOENT;
2012 delayed_refs = &trans->transaction->delayed_refs;
2013 spin_lock(&delayed_refs->lock);
2014 head = btrfs_find_delayed_ref_head(trans, bytenr);
2015 if (!head)
2016 goto out;
2017
2018 if (!mutex_trylock(&head->mutex)) {
2019 atomic_inc(&head->node.refs);
2020 spin_unlock(&delayed_refs->lock);
2021
2022 btrfs_release_path(root->fs_info->extent_root, path);
2023
2024 mutex_lock(&head->mutex);
2025 mutex_unlock(&head->mutex);
2026 btrfs_put_delayed_ref(&head->node);
2027 return -EAGAIN;
2028 }
2029
2030 node = rb_prev(&head->node.rb_node);
2031 if (!node)
2032 goto out_unlock;
2033
2034 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2035
2036 if (ref->bytenr != bytenr)
2037 goto out_unlock;
2038
2039 ret = 1;
2040 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2041 goto out_unlock;
2042
2043 data_ref = btrfs_delayed_node_to_data_ref(ref);
2044
2045 node = rb_prev(node);
2046 if (node) {
2047 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2048 if (ref->bytenr == bytenr)
2049 goto out_unlock;
2050 }
2051
2052 if (data_ref->root != root->root_key.objectid ||
2053 data_ref->objectid != objectid || data_ref->offset != offset)
2054 goto out_unlock;
2055
2056 ret = 0;
2057out_unlock:
2058 mutex_unlock(&head->mutex);
2059out:
2060 spin_unlock(&delayed_refs->lock);
2061 return ret;
2062}
2063
2064static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2065 struct btrfs_root *root,
2066 struct btrfs_path *path,
2067 u64 objectid, u64 offset, u64 bytenr)
1100{ 2068{
1101 struct btrfs_root *extent_root = root->fs_info->extent_root; 2069 struct btrfs_root *extent_root = root->fs_info->extent_root;
1102 struct btrfs_path *path;
1103 struct extent_buffer *leaf; 2070 struct extent_buffer *leaf;
1104 struct btrfs_extent_ref *ref_item; 2071 struct btrfs_extent_data_ref *ref;
2072 struct btrfs_extent_inline_ref *iref;
2073 struct btrfs_extent_item *ei;
1105 struct btrfs_key key; 2074 struct btrfs_key key;
1106 struct btrfs_key found_key; 2075 u32 item_size;
1107 u64 ref_root;
1108 u64 last_snapshot;
1109 u32 nritems;
1110 int ret; 2076 int ret;
1111 2077
1112 key.objectid = bytenr; 2078 key.objectid = bytenr;
1113 key.offset = (u64)-1; 2079 key.offset = (u64)-1;
1114 key.type = BTRFS_EXTENT_ITEM_KEY; 2080 key.type = BTRFS_EXTENT_ITEM_KEY;
1115 2081
1116 path = btrfs_alloc_path();
1117 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2082 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1118 if (ret < 0) 2083 if (ret < 0)
1119 goto out; 2084 goto out;
@@ -1125,55 +2090,83 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1125 2090
1126 path->slots[0]--; 2091 path->slots[0]--;
1127 leaf = path->nodes[0]; 2092 leaf = path->nodes[0];
1128 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2093 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1129 2094
1130 if (found_key.objectid != bytenr || 2095 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
1131 found_key.type != BTRFS_EXTENT_ITEM_KEY)
1132 goto out; 2096 goto out;
1133 2097
1134 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 2098 ret = 1;
1135 while (1) { 2099 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1136 leaf = path->nodes[0]; 2100#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1137 nritems = btrfs_header_nritems(leaf); 2101 if (item_size < sizeof(*ei)) {
1138 if (path->slots[0] >= nritems) { 2102 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
1139 ret = btrfs_next_leaf(extent_root, path); 2103 goto out;
1140 if (ret < 0) 2104 }
1141 goto out; 2105#endif
1142 if (ret == 0) 2106 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1143 continue;
1144 break;
1145 }
1146 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1147 if (found_key.objectid != bytenr)
1148 break;
1149 2107
1150 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 2108 if (item_size != sizeof(*ei) +
1151 path->slots[0]++; 2109 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
1152 continue; 2110 goto out;
1153 }
1154 2111
1155 ref_item = btrfs_item_ptr(leaf, path->slots[0], 2112 if (btrfs_extent_generation(leaf, ei) <=
1156 struct btrfs_extent_ref); 2113 btrfs_root_last_snapshot(&root->root_item))
1157 ref_root = btrfs_ref_root(leaf, ref_item); 2114 goto out;
1158 if ((ref_root != root->root_key.objectid && 2115
1159 ref_root != BTRFS_TREE_LOG_OBJECTID) || 2116 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
1160 objectid != btrfs_ref_objectid(leaf, ref_item)) { 2117 if (btrfs_extent_inline_ref_type(leaf, iref) !=
1161 ret = 1; 2118 BTRFS_EXTENT_DATA_REF_KEY)
1162 goto out; 2119 goto out;
1163 } 2120
1164 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) { 2121 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
1165 ret = 1; 2122 if (btrfs_extent_refs(leaf, ei) !=
2123 btrfs_extent_data_ref_count(leaf, ref) ||
2124 btrfs_extent_data_ref_root(leaf, ref) !=
2125 root->root_key.objectid ||
2126 btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2127 btrfs_extent_data_ref_offset(leaf, ref) != offset)
2128 goto out;
2129
2130 ret = 0;
2131out:
2132 return ret;
2133}
2134
2135int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2136 struct btrfs_root *root,
2137 u64 objectid, u64 offset, u64 bytenr)
2138{
2139 struct btrfs_path *path;
2140 int ret;
2141 int ret2;
2142
2143 path = btrfs_alloc_path();
2144 if (!path)
2145 return -ENOENT;
2146
2147 do {
2148 ret = check_committed_ref(trans, root, path, objectid,
2149 offset, bytenr);
2150 if (ret && ret != -ENOENT)
1166 goto out; 2151 goto out;
1167 }
1168 2152
1169 path->slots[0]++; 2153 ret2 = check_delayed_ref(trans, root, path, objectid,
2154 offset, bytenr);
2155 } while (ret2 == -EAGAIN);
2156
2157 if (ret2 && ret2 != -ENOENT) {
2158 ret = ret2;
2159 goto out;
1170 } 2160 }
1171 ret = 0; 2161
2162 if (ret != -ENOENT || ret2 != -ENOENT)
2163 ret = 0;
1172out: 2164out:
1173 btrfs_free_path(path); 2165 btrfs_free_path(path);
1174 return ret; 2166 return ret;
1175} 2167}
1176 2168
2169#if 0
1177int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2170int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1178 struct extent_buffer *buf, u32 nr_extents) 2171 struct extent_buffer *buf, u32 nr_extents)
1179{ 2172{
@@ -1291,62 +2284,44 @@ static int refsort_cmp(const void *a_void, const void *b_void)
1291 return 1; 2284 return 1;
1292 return 0; 2285 return 0;
1293} 2286}
2287#endif
1294 2288
1295 2289static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
1296noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1297 struct btrfs_root *root, 2290 struct btrfs_root *root,
1298 struct extent_buffer *orig_buf, 2291 struct extent_buffer *buf,
1299 struct extent_buffer *buf, u32 *nr_extents) 2292 int full_backref, int inc)
1300{ 2293{
1301 u64 bytenr; 2294 u64 bytenr;
2295 u64 num_bytes;
2296 u64 parent;
1302 u64 ref_root; 2297 u64 ref_root;
1303 u64 orig_root;
1304 u64 ref_generation;
1305 u64 orig_generation;
1306 struct refsort *sorted;
1307 u32 nritems; 2298 u32 nritems;
1308 u32 nr_file_extents = 0;
1309 struct btrfs_key key; 2299 struct btrfs_key key;
1310 struct btrfs_file_extent_item *fi; 2300 struct btrfs_file_extent_item *fi;
1311 int i; 2301 int i;
1312 int level; 2302 int level;
1313 int ret = 0; 2303 int ret = 0;
1314 int faili = 0;
1315 int refi = 0;
1316 int slot;
1317 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2304 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1318 u64, u64, u64, u64, u64, u64, u64, u64, u64); 2305 u64, u64, u64, u64, u64, u64);
1319 2306
1320 ref_root = btrfs_header_owner(buf); 2307 ref_root = btrfs_header_owner(buf);
1321 ref_generation = btrfs_header_generation(buf);
1322 orig_root = btrfs_header_owner(orig_buf);
1323 orig_generation = btrfs_header_generation(orig_buf);
1324
1325 nritems = btrfs_header_nritems(buf); 2308 nritems = btrfs_header_nritems(buf);
1326 level = btrfs_header_level(buf); 2309 level = btrfs_header_level(buf);
1327 2310
1328 sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); 2311 if (!root->ref_cows && level == 0)
1329 BUG_ON(!sorted); 2312 return 0;
1330 2313
1331 if (root->ref_cows) { 2314 if (inc)
1332 process_func = __btrfs_inc_extent_ref; 2315 process_func = btrfs_inc_extent_ref;
1333 } else { 2316 else
1334 if (level == 0 && 2317 process_func = btrfs_free_extent;
1335 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 2318
1336 goto out; 2319 if (full_backref)
1337 if (level != 0 && 2320 parent = buf->start;
1338 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) 2321 else
1339 goto out; 2322 parent = 0;
1340 process_func = __btrfs_update_extent_ref;
1341 }
1342 2323
1343 /*
1344 * we make two passes through the items. In the first pass we
1345 * only record the byte number and slot. Then we sort based on
1346 * byte number and do the actual work based on the sorted results
1347 */
1348 for (i = 0; i < nritems; i++) { 2324 for (i = 0; i < nritems; i++) {
1349 cond_resched();
1350 if (level == 0) { 2325 if (level == 0) {
1351 btrfs_item_key_to_cpu(buf, &key, i); 2326 btrfs_item_key_to_cpu(buf, &key, i);
1352 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2327 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
@@ -1360,151 +2335,38 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1360 if (bytenr == 0) 2335 if (bytenr == 0)
1361 continue; 2336 continue;
1362 2337
1363 nr_file_extents++; 2338 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
1364 sorted[refi].bytenr = bytenr; 2339 key.offset -= btrfs_file_extent_offset(buf, fi);
1365 sorted[refi].slot = i; 2340 ret = process_func(trans, root, bytenr, num_bytes,
1366 refi++; 2341 parent, ref_root, key.objectid,
1367 } else { 2342 key.offset);
1368 bytenr = btrfs_node_blockptr(buf, i); 2343 if (ret)
1369 sorted[refi].bytenr = bytenr;
1370 sorted[refi].slot = i;
1371 refi++;
1372 }
1373 }
1374 /*
1375 * if refi == 0, we didn't actually put anything into the sorted
1376 * array and we're done
1377 */
1378 if (refi == 0)
1379 goto out;
1380
1381 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
1382
1383 for (i = 0; i < refi; i++) {
1384 cond_resched();
1385 slot = sorted[i].slot;
1386 bytenr = sorted[i].bytenr;
1387
1388 if (level == 0) {
1389 btrfs_item_key_to_cpu(buf, &key, slot);
1390 fi = btrfs_item_ptr(buf, slot,
1391 struct btrfs_file_extent_item);
1392
1393 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1394 if (bytenr == 0)
1395 continue;
1396
1397 ret = process_func(trans, root, bytenr,
1398 btrfs_file_extent_disk_num_bytes(buf, fi),
1399 orig_buf->start, buf->start,
1400 orig_root, ref_root,
1401 orig_generation, ref_generation,
1402 key.objectid);
1403
1404 if (ret) {
1405 faili = slot;
1406 WARN_ON(1);
1407 goto fail; 2344 goto fail;
1408 }
1409 } else { 2345 } else {
1410 ret = process_func(trans, root, bytenr, buf->len, 2346 bytenr = btrfs_node_blockptr(buf, i);
1411 orig_buf->start, buf->start, 2347 num_bytes = btrfs_level_size(root, level - 1);
1412 orig_root, ref_root, 2348 ret = process_func(trans, root, bytenr, num_bytes,
1413 orig_generation, ref_generation, 2349 parent, ref_root, level - 1, 0);
1414 level - 1); 2350 if (ret)
1415 if (ret) {
1416 faili = slot;
1417 WARN_ON(1);
1418 goto fail; 2351 goto fail;
1419 }
1420 } 2352 }
1421 } 2353 }
1422out:
1423 kfree(sorted);
1424 if (nr_extents) {
1425 if (level == 0)
1426 *nr_extents = nr_file_extents;
1427 else
1428 *nr_extents = nritems;
1429 }
1430 return 0; 2354 return 0;
1431fail: 2355fail:
1432 kfree(sorted); 2356 BUG();
1433 WARN_ON(1);
1434 return ret; 2357 return ret;
1435} 2358}
1436 2359
1437int btrfs_update_ref(struct btrfs_trans_handle *trans, 2360int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1438 struct btrfs_root *root, struct extent_buffer *orig_buf, 2361 struct extent_buffer *buf, int full_backref)
1439 struct extent_buffer *buf, int start_slot, int nr)
1440
1441{ 2362{
1442 u64 bytenr; 2363 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
1443 u64 ref_root; 2364}
1444 u64 orig_root;
1445 u64 ref_generation;
1446 u64 orig_generation;
1447 struct btrfs_key key;
1448 struct btrfs_file_extent_item *fi;
1449 int i;
1450 int ret;
1451 int slot;
1452 int level;
1453
1454 BUG_ON(start_slot < 0);
1455 BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1456
1457 ref_root = btrfs_header_owner(buf);
1458 ref_generation = btrfs_header_generation(buf);
1459 orig_root = btrfs_header_owner(orig_buf);
1460 orig_generation = btrfs_header_generation(orig_buf);
1461 level = btrfs_header_level(buf);
1462
1463 if (!root->ref_cows) {
1464 if (level == 0 &&
1465 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1466 return 0;
1467 if (level != 0 &&
1468 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1469 return 0;
1470 }
1471 2365
1472 for (i = 0, slot = start_slot; i < nr; i++, slot++) { 2366int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1473 cond_resched(); 2367 struct extent_buffer *buf, int full_backref)
1474 if (level == 0) { 2368{
1475 btrfs_item_key_to_cpu(buf, &key, slot); 2369 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
1476 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1477 continue;
1478 fi = btrfs_item_ptr(buf, slot,
1479 struct btrfs_file_extent_item);
1480 if (btrfs_file_extent_type(buf, fi) ==
1481 BTRFS_FILE_EXTENT_INLINE)
1482 continue;
1483 bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1484 if (bytenr == 0)
1485 continue;
1486 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1487 btrfs_file_extent_disk_num_bytes(buf, fi),
1488 orig_buf->start, buf->start,
1489 orig_root, ref_root, orig_generation,
1490 ref_generation, key.objectid);
1491 if (ret)
1492 goto fail;
1493 } else {
1494 bytenr = btrfs_node_blockptr(buf, slot);
1495 ret = __btrfs_update_extent_ref(trans, root, bytenr,
1496 buf->len, orig_buf->start,
1497 buf->start, orig_root, ref_root,
1498 orig_generation, ref_generation,
1499 level - 1);
1500 if (ret)
1501 goto fail;
1502 }
1503 }
1504 return 0;
1505fail:
1506 WARN_ON(1);
1507 return -1;
1508} 2370}
1509 2371
1510static int write_one_cache_group(struct btrfs_trans_handle *trans, 2372static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -2007,6 +2869,24 @@ static int update_block_group(struct btrfs_trans_handle *trans,
2007 u64 old_val; 2869 u64 old_val;
2008 u64 byte_in_group; 2870 u64 byte_in_group;
2009 2871
2872 /* block accounting for super block */
2873 spin_lock(&info->delalloc_lock);
2874 old_val = btrfs_super_bytes_used(&info->super_copy);
2875 if (alloc)
2876 old_val += num_bytes;
2877 else
2878 old_val -= num_bytes;
2879 btrfs_set_super_bytes_used(&info->super_copy, old_val);
2880
2881 /* block accounting for root item */
2882 old_val = btrfs_root_used(&root->root_item);
2883 if (alloc)
2884 old_val += num_bytes;
2885 else
2886 old_val -= num_bytes;
2887 btrfs_set_root_used(&root->root_item, old_val);
2888 spin_unlock(&info->delalloc_lock);
2889
2010 while (total) { 2890 while (total) {
2011 cache = btrfs_lookup_block_group(info, bytenr); 2891 cache = btrfs_lookup_block_group(info, bytenr);
2012 if (!cache) 2892 if (!cache)
@@ -2216,8 +3096,6 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
2216 u64 header_owner = btrfs_header_owner(buf); 3096 u64 header_owner = btrfs_header_owner(buf);
2217 u64 header_transid = btrfs_header_generation(buf); 3097 u64 header_transid = btrfs_header_generation(buf);
2218 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3098 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2219 header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2220 header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
2221 header_transid == trans->transid && 3099 header_transid == trans->transid &&
2222 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3100 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2223 *must_clean = buf; 3101 *must_clean = buf;
@@ -2235,63 +3113,77 @@ pinit:
2235 return 0; 3113 return 0;
2236} 3114}
2237 3115
2238/* 3116
2239 * remove an extent from the root, returns 0 on success 3117static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2240 */ 3118 struct btrfs_root *root,
2241static int __free_extent(struct btrfs_trans_handle *trans, 3119 u64 bytenr, u64 num_bytes, u64 parent,
2242 struct btrfs_root *root, 3120 u64 root_objectid, u64 owner_objectid,
2243 u64 bytenr, u64 num_bytes, u64 parent, 3121 u64 owner_offset, int refs_to_drop,
2244 u64 root_objectid, u64 ref_generation, 3122 struct btrfs_delayed_extent_op *extent_op)
2245 u64 owner_objectid, int pin, int mark_free,
2246 int refs_to_drop)
2247{ 3123{
2248 struct btrfs_path *path;
2249 struct btrfs_key key; 3124 struct btrfs_key key;
3125 struct btrfs_path *path;
2250 struct btrfs_fs_info *info = root->fs_info; 3126 struct btrfs_fs_info *info = root->fs_info;
2251 struct btrfs_root *extent_root = info->extent_root; 3127 struct btrfs_root *extent_root = info->extent_root;
2252 struct extent_buffer *leaf; 3128 struct extent_buffer *leaf;
3129 struct btrfs_extent_item *ei;
3130 struct btrfs_extent_inline_ref *iref;
2253 int ret; 3131 int ret;
3132 int is_data;
2254 int extent_slot = 0; 3133 int extent_slot = 0;
2255 int found_extent = 0; 3134 int found_extent = 0;
2256 int num_to_del = 1; 3135 int num_to_del = 1;
2257 struct btrfs_extent_item *ei; 3136 u32 item_size;
2258 u32 refs; 3137 u64 refs;
2259 3138
2260 key.objectid = bytenr;
2261 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
2262 key.offset = num_bytes;
2263 path = btrfs_alloc_path(); 3139 path = btrfs_alloc_path();
2264 if (!path) 3140 if (!path)
2265 return -ENOMEM; 3141 return -ENOMEM;
2266 3142
2267 path->reada = 1; 3143 path->reada = 1;
2268 path->leave_spinning = 1; 3144 path->leave_spinning = 1;
2269 ret = lookup_extent_backref(trans, extent_root, path, 3145
2270 bytenr, parent, root_objectid, 3146 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
2271 ref_generation, owner_objectid, 1); 3147 BUG_ON(!is_data && refs_to_drop != 1);
3148
3149 ret = lookup_extent_backref(trans, extent_root, path, &iref,
3150 bytenr, num_bytes, parent,
3151 root_objectid, owner_objectid,
3152 owner_offset);
2272 if (ret == 0) { 3153 if (ret == 0) {
2273 struct btrfs_key found_key;
2274 extent_slot = path->slots[0]; 3154 extent_slot = path->slots[0];
2275 while (extent_slot > 0) { 3155 while (extent_slot >= 0) {
2276 extent_slot--; 3156 btrfs_item_key_to_cpu(path->nodes[0], &key,
2277 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2278 extent_slot); 3157 extent_slot);
2279 if (found_key.objectid != bytenr) 3158 if (key.objectid != bytenr)
2280 break; 3159 break;
2281 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 3160 if (key.type == BTRFS_EXTENT_ITEM_KEY &&
2282 found_key.offset == num_bytes) { 3161 key.offset == num_bytes) {
2283 found_extent = 1; 3162 found_extent = 1;
2284 break; 3163 break;
2285 } 3164 }
2286 if (path->slots[0] - extent_slot > 5) 3165 if (path->slots[0] - extent_slot > 5)
2287 break; 3166 break;
3167 extent_slot--;
2288 } 3168 }
3169#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3170 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
3171 if (found_extent && item_size < sizeof(*ei))
3172 found_extent = 0;
3173#endif
2289 if (!found_extent) { 3174 if (!found_extent) {
3175 BUG_ON(iref);
2290 ret = remove_extent_backref(trans, extent_root, path, 3176 ret = remove_extent_backref(trans, extent_root, path,
2291 refs_to_drop); 3177 NULL, refs_to_drop,
3178 is_data);
2292 BUG_ON(ret); 3179 BUG_ON(ret);
2293 btrfs_release_path(extent_root, path); 3180 btrfs_release_path(extent_root, path);
2294 path->leave_spinning = 1; 3181 path->leave_spinning = 1;
3182
3183 key.objectid = bytenr;
3184 key.type = BTRFS_EXTENT_ITEM_KEY;
3185 key.offset = num_bytes;
3186
2295 ret = btrfs_search_slot(trans, extent_root, 3187 ret = btrfs_search_slot(trans, extent_root,
2296 &key, path, -1, 1); 3188 &key, path, -1, 1);
2297 if (ret) { 3189 if (ret) {
@@ -2307,82 +3199,98 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2307 btrfs_print_leaf(extent_root, path->nodes[0]); 3199 btrfs_print_leaf(extent_root, path->nodes[0]);
2308 WARN_ON(1); 3200 WARN_ON(1);
2309 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3201 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2310 "parent %llu root %llu gen %llu owner %llu\n", 3202 "parent %llu root %llu owner %llu offset %llu\n",
2311 (unsigned long long)bytenr, 3203 (unsigned long long)bytenr,
2312 (unsigned long long)parent, 3204 (unsigned long long)parent,
2313 (unsigned long long)root_objectid, 3205 (unsigned long long)root_objectid,
2314 (unsigned long long)ref_generation, 3206 (unsigned long long)owner_objectid,
2315 (unsigned long long)owner_objectid); 3207 (unsigned long long)owner_offset);
2316 } 3208 }
2317 3209
2318 leaf = path->nodes[0]; 3210 leaf = path->nodes[0];
3211 item_size = btrfs_item_size_nr(leaf, extent_slot);
3212#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3213 if (item_size < sizeof(*ei)) {
3214 BUG_ON(found_extent || extent_slot != path->slots[0]);
3215 ret = convert_extent_item_v0(trans, extent_root, path,
3216 owner_objectid, 0);
3217 BUG_ON(ret < 0);
3218
3219 btrfs_release_path(extent_root, path);
3220 path->leave_spinning = 1;
3221
3222 key.objectid = bytenr;
3223 key.type = BTRFS_EXTENT_ITEM_KEY;
3224 key.offset = num_bytes;
3225
3226 ret = btrfs_search_slot(trans, extent_root, &key, path,
3227 -1, 1);
3228 if (ret) {
3229 printk(KERN_ERR "umm, got %d back from search"
3230 ", was looking for %llu\n", ret,
3231 (unsigned long long)bytenr);
3232 btrfs_print_leaf(extent_root, path->nodes[0]);
3233 }
3234 BUG_ON(ret);
3235 extent_slot = path->slots[0];
3236 leaf = path->nodes[0];
3237 item_size = btrfs_item_size_nr(leaf, extent_slot);
3238 }
3239#endif
3240 BUG_ON(item_size < sizeof(*ei));
2319 ei = btrfs_item_ptr(leaf, extent_slot, 3241 ei = btrfs_item_ptr(leaf, extent_slot,
2320 struct btrfs_extent_item); 3242 struct btrfs_extent_item);
2321 refs = btrfs_extent_refs(leaf, ei); 3243 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2322 3244 struct btrfs_tree_block_info *bi;
2323 /* 3245 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
2324 * we're not allowed to delete the extent item if there 3246 bi = (struct btrfs_tree_block_info *)(ei + 1);
2325 * are other delayed ref updates pending 3247 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
2326 */ 3248 }
2327 3249
3250 refs = btrfs_extent_refs(leaf, ei);
2328 BUG_ON(refs < refs_to_drop); 3251 BUG_ON(refs < refs_to_drop);
2329 refs -= refs_to_drop; 3252 refs -= refs_to_drop;
2330 btrfs_set_extent_refs(leaf, ei, refs);
2331 btrfs_mark_buffer_dirty(leaf);
2332 3253
2333 if (refs == 0 && found_extent && 3254 if (refs > 0) {
2334 path->slots[0] == extent_slot + 1) { 3255 if (extent_op)
2335 struct btrfs_extent_ref *ref; 3256 __run_delayed_extent_op(extent_op, leaf, ei);
2336 ref = btrfs_item_ptr(leaf, path->slots[0], 3257 /*
2337 struct btrfs_extent_ref); 3258 * In the case of inline back ref, reference count will
2338 BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop); 3259 * be updated by remove_extent_backref
2339 /* if the back ref and the extent are next to each other
2340 * they get deleted below in one shot
2341 */ 3260 */
2342 path->slots[0] = extent_slot; 3261 if (iref) {
2343 num_to_del = 2; 3262 BUG_ON(!found_extent);
2344 } else if (found_extent) { 3263 } else {
2345 /* otherwise delete the extent back ref */ 3264 btrfs_set_extent_refs(leaf, ei, refs);
2346 ret = remove_extent_backref(trans, extent_root, path, 3265 btrfs_mark_buffer_dirty(leaf);
2347 refs_to_drop); 3266 }
2348 BUG_ON(ret); 3267 if (found_extent) {
2349 /* if refs are 0, we need to setup the path for deletion */ 3268 ret = remove_extent_backref(trans, extent_root, path,
2350 if (refs == 0) { 3269 iref, refs_to_drop,
2351 btrfs_release_path(extent_root, path); 3270 is_data);
2352 path->leave_spinning = 1;
2353 ret = btrfs_search_slot(trans, extent_root, &key, path,
2354 -1, 1);
2355 BUG_ON(ret); 3271 BUG_ON(ret);
2356 } 3272 }
2357 } 3273 } else {
2358 3274 int mark_free = 0;
2359 if (refs == 0) {
2360 u64 super_used;
2361 u64 root_used;
2362 struct extent_buffer *must_clean = NULL; 3275 struct extent_buffer *must_clean = NULL;
2363 3276
2364 if (pin) { 3277 if (found_extent) {
2365 ret = pin_down_bytes(trans, root, path, 3278 BUG_ON(is_data && refs_to_drop !=
2366 bytenr, num_bytes, 3279 extent_data_ref_count(root, path, iref));
2367 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, 3280 if (iref) {
2368 &must_clean); 3281 BUG_ON(path->slots[0] != extent_slot);
2369 if (ret > 0) 3282 } else {
2370 mark_free = 1; 3283 BUG_ON(path->slots[0] != extent_slot + 1);
2371 BUG_ON(ret < 0); 3284 path->slots[0] = extent_slot;
3285 num_to_del = 2;
3286 }
2372 } 3287 }
2373 3288
2374 /* block accounting for super block */ 3289 ret = pin_down_bytes(trans, root, path, bytenr,
2375 spin_lock(&info->delalloc_lock); 3290 num_bytes, is_data, &must_clean);
2376 super_used = btrfs_super_bytes_used(&info->super_copy); 3291 if (ret > 0)
2377 btrfs_set_super_bytes_used(&info->super_copy, 3292 mark_free = 1;
2378 super_used - num_bytes); 3293 BUG_ON(ret < 0);
2379
2380 /* block accounting for root item */
2381 root_used = btrfs_root_used(&root->root_item);
2382 btrfs_set_root_used(&root->root_item,
2383 root_used - num_bytes);
2384 spin_unlock(&info->delalloc_lock);
2385
2386 /* 3294 /*
2387 * it is going to be very rare for someone to be waiting 3295 * it is going to be very rare for someone to be waiting
2388 * on the block we're freeing. del_items might need to 3296 * on the block we're freeing. del_items might need to
@@ -2403,7 +3311,7 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2403 free_extent_buffer(must_clean); 3311 free_extent_buffer(must_clean);
2404 } 3312 }
2405 3313
2406 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 3314 if (is_data) {
2407 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3315 ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2408 BUG_ON(ret); 3316 BUG_ON(ret);
2409 } else { 3317 } else {
@@ -2421,34 +3329,6 @@ static int __free_extent(struct btrfs_trans_handle *trans,
2421} 3329}
2422 3330
2423/* 3331/*
2424 * remove an extent from the root, returns 0 on success
2425 */
2426static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2427 struct btrfs_root *root,
2428 u64 bytenr, u64 num_bytes, u64 parent,
2429 u64 root_objectid, u64 ref_generation,
2430 u64 owner_objectid, int pin,
2431 int refs_to_drop)
2432{
2433 WARN_ON(num_bytes < root->sectorsize);
2434
2435 /*
2436 * if metadata always pin
2437 * if data pin when any transaction has committed this
2438 */
2439 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
2440 ref_generation != trans->transid)
2441 pin = 1;
2442
2443 if (ref_generation != trans->transid)
2444 pin = 1;
2445
2446 return __free_extent(trans, root, bytenr, num_bytes, parent,
2447 root_objectid, ref_generation,
2448 owner_objectid, pin, pin == 0, refs_to_drop);
2449}
2450
2451/*
2452 * when we free an extent, it is possible (and likely) that we free the last 3332 * when we free an extent, it is possible (and likely) that we free the last
2453 * delayed ref for that extent as well. This searches the delayed ref tree for 3333 * delayed ref for that extent as well. This searches the delayed ref tree for
2454 * a given extent, and if there are no other delayed refs to be processed, it 3334 * a given extent, and if there are no other delayed refs to be processed, it
@@ -2479,6 +3359,13 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2479 if (ref->bytenr == bytenr) 3359 if (ref->bytenr == bytenr)
2480 goto out; 3360 goto out;
2481 3361
3362 if (head->extent_op) {
3363 if (!head->must_insert_reserved)
3364 goto out;
3365 kfree(head->extent_op);
3366 head->extent_op = NULL;
3367 }
3368
2482 /* 3369 /*
2483 * waiting for the lock here would deadlock. If someone else has it 3370 * waiting for the lock here would deadlock. If someone else has it
2484 * locked they are already in the process of dropping it anyway 3371 * locked they are already in the process of dropping it anyway
@@ -2507,7 +3394,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2507 spin_unlock(&delayed_refs->lock); 3394 spin_unlock(&delayed_refs->lock);
2508 3395
2509 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 3396 ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
2510 &head->node, head->must_insert_reserved); 3397 &head->node, head->extent_op,
3398 head->must_insert_reserved);
2511 BUG_ON(ret); 3399 BUG_ON(ret);
2512 btrfs_put_delayed_ref(&head->node); 3400 btrfs_put_delayed_ref(&head->node);
2513 return 0; 3401 return 0;
@@ -2519,32 +3407,32 @@ out:
2519int btrfs_free_extent(struct btrfs_trans_handle *trans, 3407int btrfs_free_extent(struct btrfs_trans_handle *trans,
2520 struct btrfs_root *root, 3408 struct btrfs_root *root,
2521 u64 bytenr, u64 num_bytes, u64 parent, 3409 u64 bytenr, u64 num_bytes, u64 parent,
2522 u64 root_objectid, u64 ref_generation, 3410 u64 root_objectid, u64 owner, u64 offset)
2523 u64 owner_objectid, int pin)
2524{ 3411{
2525 int ret; 3412 int ret;
2526 3413
2527 /* 3414 /*
2528 * tree log blocks never actually go into the extent allocation 3415 * tree log blocks never actually go into the extent allocation
2529 * tree, just update pinning info and exit early. 3416 * tree, just update pinning info and exit early.
2530 *
2531 * data extents referenced by the tree log do need to have
2532 * their reference counts bumped.
2533 */ 3417 */
2534 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && 3418 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
2535 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3419 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
2536 /* unlocks the pinned mutex */ 3420 /* unlocks the pinned mutex */
2537 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3421 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2538 update_reserved_extents(root, bytenr, num_bytes, 0); 3422 update_reserved_extents(root, bytenr, num_bytes, 0);
2539 ret = 0; 3423 ret = 0;
2540 } else { 3424 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
2541 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, 3425 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
2542 root_objectid, ref_generation, 3426 parent, root_objectid, (int)owner,
2543 owner_objectid, 3427 BTRFS_DROP_DELAYED_REF, NULL);
2544 BTRFS_DROP_DELAYED_REF, 1);
2545 BUG_ON(ret); 3428 BUG_ON(ret);
2546 ret = check_ref_cleanup(trans, root, bytenr); 3429 ret = check_ref_cleanup(trans, root, bytenr);
2547 BUG_ON(ret); 3430 BUG_ON(ret);
3431 } else {
3432 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
3433 parent, root_objectid, owner,
3434 offset, BTRFS_DROP_DELAYED_REF, NULL);
3435 BUG_ON(ret);
2548 } 3436 }
2549 return ret; 3437 return ret;
2550} 3438}
@@ -2969,99 +3857,147 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2969 return ret; 3857 return ret;
2970} 3858}
2971 3859
2972static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3860static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
2973 struct btrfs_root *root, u64 parent, 3861 struct btrfs_root *root,
2974 u64 root_objectid, u64 ref_generation, 3862 u64 parent, u64 root_objectid,
2975 u64 owner, struct btrfs_key *ins, 3863 u64 flags, u64 owner, u64 offset,
2976 int ref_mod) 3864 struct btrfs_key *ins, int ref_mod)
2977{ 3865{
2978 int ret; 3866 int ret;
2979 u64 super_used; 3867 struct btrfs_fs_info *fs_info = root->fs_info;
2980 u64 root_used;
2981 u64 num_bytes = ins->offset;
2982 u32 sizes[2];
2983 struct btrfs_fs_info *info = root->fs_info;
2984 struct btrfs_root *extent_root = info->extent_root;
2985 struct btrfs_extent_item *extent_item; 3868 struct btrfs_extent_item *extent_item;
2986 struct btrfs_extent_ref *ref; 3869 struct btrfs_extent_inline_ref *iref;
2987 struct btrfs_path *path; 3870 struct btrfs_path *path;
2988 struct btrfs_key keys[2]; 3871 struct extent_buffer *leaf;
2989 3872 int type;
2990 if (parent == 0) 3873 u32 size;
2991 parent = ins->objectid;
2992
2993 /* block accounting for super block */
2994 spin_lock(&info->delalloc_lock);
2995 super_used = btrfs_super_bytes_used(&info->super_copy);
2996 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2997 3874
2998 /* block accounting for root item */ 3875 if (parent > 0)
2999 root_used = btrfs_root_used(&root->root_item); 3876 type = BTRFS_SHARED_DATA_REF_KEY;
3000 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 3877 else
3001 spin_unlock(&info->delalloc_lock); 3878 type = BTRFS_EXTENT_DATA_REF_KEY;
3002 3879
3003 memcpy(&keys[0], ins, sizeof(*ins)); 3880 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
3004 keys[1].objectid = ins->objectid;
3005 keys[1].type = BTRFS_EXTENT_REF_KEY;
3006 keys[1].offset = parent;
3007 sizes[0] = sizeof(*extent_item);
3008 sizes[1] = sizeof(*ref);
3009 3881
3010 path = btrfs_alloc_path(); 3882 path = btrfs_alloc_path();
3011 BUG_ON(!path); 3883 BUG_ON(!path);
3012 3884
3013 path->leave_spinning = 1; 3885 path->leave_spinning = 1;
3014 ret = btrfs_insert_empty_items(trans, extent_root, path, keys, 3886 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
3015 sizes, 2); 3887 ins, size);
3016 BUG_ON(ret); 3888 BUG_ON(ret);
3017 3889
3018 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3890 leaf = path->nodes[0];
3891 extent_item = btrfs_item_ptr(leaf, path->slots[0],
3019 struct btrfs_extent_item); 3892 struct btrfs_extent_item);
3020 btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod); 3893 btrfs_set_extent_refs(leaf, extent_item, ref_mod);
3021 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 3894 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
3022 struct btrfs_extent_ref); 3895 btrfs_set_extent_flags(leaf, extent_item,
3023 3896 flags | BTRFS_EXTENT_FLAG_DATA);
3024 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 3897
3025 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 3898 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
3026 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 3899 btrfs_set_extent_inline_ref_type(leaf, iref, type);
3027 btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod); 3900 if (parent > 0) {
3901 struct btrfs_shared_data_ref *ref;
3902 ref = (struct btrfs_shared_data_ref *)(iref + 1);
3903 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
3904 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
3905 } else {
3906 struct btrfs_extent_data_ref *ref;
3907 ref = (struct btrfs_extent_data_ref *)(&iref->offset);
3908 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
3909 btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
3910 btrfs_set_extent_data_ref_offset(leaf, ref, offset);
3911 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
3912 }
3028 3913
3029 btrfs_mark_buffer_dirty(path->nodes[0]); 3914 btrfs_mark_buffer_dirty(path->nodes[0]);
3030
3031 trans->alloc_exclude_start = 0;
3032 trans->alloc_exclude_nr = 0;
3033 btrfs_free_path(path); 3915 btrfs_free_path(path);
3034 3916
3035 if (ret) 3917 ret = update_block_group(trans, root, ins->objectid, ins->offset,
3036 goto out; 3918 1, 0);
3037
3038 ret = update_block_group(trans, root, ins->objectid,
3039 ins->offset, 1, 0);
3040 if (ret) { 3919 if (ret) {
3041 printk(KERN_ERR "btrfs update block group failed for %llu " 3920 printk(KERN_ERR "btrfs update block group failed for %llu "
3042 "%llu\n", (unsigned long long)ins->objectid, 3921 "%llu\n", (unsigned long long)ins->objectid,
3043 (unsigned long long)ins->offset); 3922 (unsigned long long)ins->offset);
3044 BUG(); 3923 BUG();
3045 } 3924 }
3046out:
3047 return ret; 3925 return ret;
3048} 3926}
3049 3927
3050int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3928static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
3051 struct btrfs_root *root, u64 parent, 3929 struct btrfs_root *root,
3052 u64 root_objectid, u64 ref_generation, 3930 u64 parent, u64 root_objectid,
3053 u64 owner, struct btrfs_key *ins) 3931 u64 flags, struct btrfs_disk_key *key,
3932 int level, struct btrfs_key *ins)
3054{ 3933{
3055 int ret; 3934 int ret;
3935 struct btrfs_fs_info *fs_info = root->fs_info;
3936 struct btrfs_extent_item *extent_item;
3937 struct btrfs_tree_block_info *block_info;
3938 struct btrfs_extent_inline_ref *iref;
3939 struct btrfs_path *path;
3940 struct extent_buffer *leaf;
3941 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
3056 3942
3057 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) 3943 path = btrfs_alloc_path();
3058 return 0; 3944 BUG_ON(!path);
3059 3945
3060 ret = btrfs_add_delayed_ref(trans, ins->objectid, 3946 path->leave_spinning = 1;
3061 ins->offset, parent, root_objectid, 3947 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
3062 ref_generation, owner, 3948 ins, size);
3063 BTRFS_ADD_DELAYED_EXTENT, 0);
3064 BUG_ON(ret); 3949 BUG_ON(ret);
3950
3951 leaf = path->nodes[0];
3952 extent_item = btrfs_item_ptr(leaf, path->slots[0],
3953 struct btrfs_extent_item);
3954 btrfs_set_extent_refs(leaf, extent_item, 1);
3955 btrfs_set_extent_generation(leaf, extent_item, trans->transid);
3956 btrfs_set_extent_flags(leaf, extent_item,
3957 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
3958 block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
3959
3960 btrfs_set_tree_block_key(leaf, block_info, key);
3961 btrfs_set_tree_block_level(leaf, block_info, level);
3962
3963 iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
3964 if (parent > 0) {
3965 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
3966 btrfs_set_extent_inline_ref_type(leaf, iref,
3967 BTRFS_SHARED_BLOCK_REF_KEY);
3968 btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
3969 } else {
3970 btrfs_set_extent_inline_ref_type(leaf, iref,
3971 BTRFS_TREE_BLOCK_REF_KEY);
3972 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
3973 }
3974
3975 btrfs_mark_buffer_dirty(leaf);
3976 btrfs_free_path(path);
3977
3978 ret = update_block_group(trans, root, ins->objectid, ins->offset,
3979 1, 0);
3980 if (ret) {
3981 printk(KERN_ERR "btrfs update block group failed for %llu "
3982 "%llu\n", (unsigned long long)ins->objectid,
3983 (unsigned long long)ins->offset);
3984 BUG();
3985 }
3986 return ret;
3987}
3988
3989int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
3990 struct btrfs_root *root,
3991 u64 root_objectid, u64 owner,
3992 u64 offset, struct btrfs_key *ins)
3993{
3994 int ret;
3995
3996 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
3997
3998 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
3999 0, root_objectid, owner, offset,
4000 BTRFS_ADD_DELAYED_EXTENT, NULL);
3065 return ret; 4001 return ret;
3066} 4002}
3067 4003
@@ -3070,10 +4006,10 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
3070 * an extent has been allocated and makes sure to clear the free 4006 * an extent has been allocated and makes sure to clear the free
3071 * space cache bits as well 4007 * space cache bits as well
3072 */ 4008 */
3073int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, 4009int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
3074 struct btrfs_root *root, u64 parent, 4010 struct btrfs_root *root,
3075 u64 root_objectid, u64 ref_generation, 4011 u64 root_objectid, u64 owner, u64 offset,
3076 u64 owner, struct btrfs_key *ins) 4012 struct btrfs_key *ins)
3077{ 4013{
3078 int ret; 4014 int ret;
3079 struct btrfs_block_group_cache *block_group; 4015 struct btrfs_block_group_cache *block_group;
@@ -3087,8 +4023,8 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3087 ins->offset); 4023 ins->offset);
3088 BUG_ON(ret); 4024 BUG_ON(ret);
3089 btrfs_put_block_group(block_group); 4025 btrfs_put_block_group(block_group);
3090 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 4026 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
3091 ref_generation, owner, ins, 1); 4027 0, owner, offset, ins, 1);
3092 return ret; 4028 return ret;
3093} 4029}
3094 4030
@@ -3099,26 +4035,48 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
3099 * 4035 *
3100 * returns 0 if everything worked, non-zero otherwise. 4036 * returns 0 if everything worked, non-zero otherwise.
3101 */ 4037 */
3102int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 4038static int alloc_tree_block(struct btrfs_trans_handle *trans,
3103 struct btrfs_root *root, 4039 struct btrfs_root *root,
3104 u64 num_bytes, u64 parent, u64 min_alloc_size, 4040 u64 num_bytes, u64 parent, u64 root_objectid,
3105 u64 root_objectid, u64 ref_generation, 4041 struct btrfs_disk_key *key, int level,
3106 u64 owner_objectid, u64 empty_size, u64 hint_byte, 4042 u64 empty_size, u64 hint_byte, u64 search_end,
3107 u64 search_end, struct btrfs_key *ins, u64 data) 4043 struct btrfs_key *ins)
3108{ 4044{
3109 int ret; 4045 int ret;
3110 ret = __btrfs_reserve_extent(trans, root, num_bytes, 4046 u64 flags = 0;
3111 min_alloc_size, empty_size, hint_byte, 4047
3112 search_end, ins, data); 4048 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4049 empty_size, hint_byte, search_end,
4050 ins, 0);
3113 BUG_ON(ret); 4051 BUG_ON(ret);
4052
4053 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4054 if (parent == 0)
4055 parent = ins->objectid;
4056 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4057 } else
4058 BUG_ON(parent > 0);
4059
4060 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3114 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4061 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
3115 ret = btrfs_add_delayed_ref(trans, ins->objectid, 4062 struct btrfs_delayed_extent_op *extent_op;
3116 ins->offset, parent, root_objectid, 4063 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
3117 ref_generation, owner_objectid, 4064 BUG_ON(!extent_op);
3118 BTRFS_ADD_DELAYED_EXTENT, 0); 4065 if (key)
4066 memcpy(&extent_op->key, key, sizeof(extent_op->key));
4067 else
4068 memset(&extent_op->key, 0, sizeof(extent_op->key));
4069 extent_op->flags_to_set = flags;
4070 extent_op->update_key = 1;
4071 extent_op->update_flags = 1;
4072 extent_op->is_data = 0;
4073
4074 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid,
4075 ins->offset, parent, root_objectid,
4076 level, BTRFS_ADD_DELAYED_EXTENT,
4077 extent_op);
3119 BUG_ON(ret); 4078 BUG_ON(ret);
3120 } 4079 }
3121 update_reserved_extents(root, ins->objectid, ins->offset, 1);
3122 return ret; 4080 return ret;
3123} 4081}
3124 4082
@@ -3157,21 +4115,17 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
3157 * returns the tree buffer or NULL. 4115 * returns the tree buffer or NULL.
3158 */ 4116 */
3159struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4117struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3160 struct btrfs_root *root, 4118 struct btrfs_root *root, u32 blocksize,
3161 u32 blocksize, u64 parent, 4119 u64 parent, u64 root_objectid,
3162 u64 root_objectid, 4120 struct btrfs_disk_key *key, int level,
3163 u64 ref_generation, 4121 u64 hint, u64 empty_size)
3164 int level,
3165 u64 hint,
3166 u64 empty_size)
3167{ 4122{
3168 struct btrfs_key ins; 4123 struct btrfs_key ins;
3169 int ret; 4124 int ret;
3170 struct extent_buffer *buf; 4125 struct extent_buffer *buf;
3171 4126
3172 ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, 4127 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid,
3173 root_objectid, ref_generation, level, 4128 key, level, empty_size, hint, (u64)-1, &ins);
3174 empty_size, hint, (u64)-1, &ins, 0);
3175 if (ret) { 4129 if (ret) {
3176 BUG_ON(ret > 0); 4130 BUG_ON(ret > 0);
3177 return ERR_PTR(ret); 4131 return ERR_PTR(ret);
@@ -3185,32 +4139,19 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3185int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4139int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3186 struct btrfs_root *root, struct extent_buffer *leaf) 4140 struct btrfs_root *root, struct extent_buffer *leaf)
3187{ 4141{
3188 u64 leaf_owner; 4142 u64 disk_bytenr;
3189 u64 leaf_generation; 4143 u64 num_bytes;
3190 struct refsort *sorted;
3191 struct btrfs_key key; 4144 struct btrfs_key key;
3192 struct btrfs_file_extent_item *fi; 4145 struct btrfs_file_extent_item *fi;
4146 u32 nritems;
3193 int i; 4147 int i;
3194 int nritems;
3195 int ret; 4148 int ret;
3196 int refi = 0;
3197 int slot;
3198 4149
3199 BUG_ON(!btrfs_is_leaf(leaf)); 4150 BUG_ON(!btrfs_is_leaf(leaf));
3200 nritems = btrfs_header_nritems(leaf); 4151 nritems = btrfs_header_nritems(leaf);
3201 leaf_owner = btrfs_header_owner(leaf);
3202 leaf_generation = btrfs_header_generation(leaf);
3203 4152
3204 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
3205 /* we do this loop twice. The first time we build a list
3206 * of the extents we have a reference on, then we sort the list
3207 * by bytenr. The second time around we actually do the
3208 * extent freeing.
3209 */
3210 for (i = 0; i < nritems; i++) { 4153 for (i = 0; i < nritems; i++) {
3211 u64 disk_bytenr;
3212 cond_resched(); 4154 cond_resched();
3213
3214 btrfs_item_key_to_cpu(leaf, &key, i); 4155 btrfs_item_key_to_cpu(leaf, &key, i);
3215 4156
3216 /* only extents have references, skip everything else */ 4157 /* only extents have references, skip everything else */
@@ -3230,45 +4171,16 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3230 if (disk_bytenr == 0) 4171 if (disk_bytenr == 0)
3231 continue; 4172 continue;
3232 4173
3233 sorted[refi].bytenr = disk_bytenr; 4174 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
3234 sorted[refi].slot = i; 4175 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
3235 refi++; 4176 leaf->start, 0, key.objectid, 0);
3236 }
3237
3238 if (refi == 0)
3239 goto out;
3240
3241 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
3242
3243 for (i = 0; i < refi; i++) {
3244 u64 disk_bytenr;
3245
3246 disk_bytenr = sorted[i].bytenr;
3247 slot = sorted[i].slot;
3248
3249 cond_resched();
3250
3251 btrfs_item_key_to_cpu(leaf, &key, slot);
3252 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3253 continue;
3254
3255 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
3256
3257 ret = btrfs_free_extent(trans, root, disk_bytenr,
3258 btrfs_file_extent_disk_num_bytes(leaf, fi),
3259 leaf->start, leaf_owner, leaf_generation,
3260 key.objectid, 0);
3261 BUG_ON(ret); 4177 BUG_ON(ret);
3262
3263 atomic_inc(&root->fs_info->throttle_gen);
3264 wake_up(&root->fs_info->transaction_throttle);
3265 cond_resched();
3266 } 4178 }
3267out:
3268 kfree(sorted);
3269 return 0; 4179 return 0;
3270} 4180}
3271 4181
4182#if 0
4183
3272static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4184static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3273 struct btrfs_root *root, 4185 struct btrfs_root *root,
3274 struct btrfs_leaf_ref *ref) 4186 struct btrfs_leaf_ref *ref)
@@ -3311,13 +4223,14 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3311 return 0; 4223 return 0;
3312} 4224}
3313 4225
4226
3314static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, 4227static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3315 struct btrfs_root *root, u64 start, 4228 struct btrfs_root *root, u64 start,
3316 u64 len, u32 *refs) 4229 u64 len, u32 *refs)
3317{ 4230{
3318 int ret; 4231 int ret;
3319 4232
3320 ret = btrfs_lookup_extent_ref(trans, root, start, len, refs); 4233 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
3321 BUG_ON(ret); 4234 BUG_ON(ret);
3322 4235
3323#if 0 /* some debugging code in case we see problems here */ 4236#if 0 /* some debugging code in case we see problems here */
@@ -3352,6 +4265,7 @@ static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3352 return ret; 4265 return ret;
3353} 4266}
3354 4267
4268
3355/* 4269/*
3356 * this is used while deleting old snapshots, and it drops the refs 4270 * this is used while deleting old snapshots, and it drops the refs
3357 * on a whole subtree starting from a level 1 node. 4271 * on a whole subtree starting from a level 1 node.
@@ -3645,32 +4559,36 @@ out:
3645 cond_resched(); 4559 cond_resched();
3646 return 0; 4560 return 0;
3647} 4561}
4562#endif
3648 4563
3649/* 4564/*
3650 * helper function for drop_subtree, this function is similar to 4565 * helper function for drop_subtree, this function is similar to
3651 * walk_down_tree. The main difference is that it checks reference 4566 * walk_down_tree. The main difference is that it checks reference
3652 * counts while tree blocks are locked. 4567 * counts while tree blocks are locked.
3653 */ 4568 */
3654static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, 4569static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
3655 struct btrfs_root *root, 4570 struct btrfs_root *root,
3656 struct btrfs_path *path, int *level) 4571 struct btrfs_path *path, int *level)
3657{ 4572{
3658 struct extent_buffer *next; 4573 struct extent_buffer *next;
3659 struct extent_buffer *cur; 4574 struct extent_buffer *cur;
3660 struct extent_buffer *parent; 4575 struct extent_buffer *parent;
3661 u64 bytenr; 4576 u64 bytenr;
3662 u64 ptr_gen; 4577 u64 ptr_gen;
4578 u64 refs;
4579 u64 flags;
3663 u32 blocksize; 4580 u32 blocksize;
3664 u32 refs;
3665 int ret; 4581 int ret;
3666 4582
3667 cur = path->nodes[*level]; 4583 cur = path->nodes[*level];
3668 ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, 4584 ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len,
3669 &refs); 4585 &refs, &flags);
3670 BUG_ON(ret); 4586 BUG_ON(ret);
3671 if (refs > 1) 4587 if (refs > 1)
3672 goto out; 4588 goto out;
3673 4589
4590 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4591
3674 while (*level >= 0) { 4592 while (*level >= 0) {
3675 cur = path->nodes[*level]; 4593 cur = path->nodes[*level];
3676 if (*level == 0) { 4594 if (*level == 0) {
@@ -3692,16 +4610,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3692 btrfs_tree_lock(next); 4610 btrfs_tree_lock(next);
3693 btrfs_set_lock_blocking(next); 4611 btrfs_set_lock_blocking(next);
3694 4612
3695 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 4613 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
3696 &refs); 4614 &refs, &flags);
3697 BUG_ON(ret); 4615 BUG_ON(ret);
3698 if (refs > 1) { 4616 if (refs > 1) {
3699 parent = path->nodes[*level]; 4617 parent = path->nodes[*level];
3700 ret = btrfs_free_extent(trans, root, bytenr, 4618 ret = btrfs_free_extent(trans, root, bytenr,
3701 blocksize, parent->start, 4619 blocksize, parent->start,
3702 btrfs_header_owner(parent), 4620 btrfs_header_owner(parent),
3703 btrfs_header_generation(parent), 4621 *level - 1, 0);
3704 *level - 1, 1);
3705 BUG_ON(ret); 4622 BUG_ON(ret);
3706 path->slots[*level]++; 4623 path->slots[*level]++;
3707 btrfs_tree_unlock(next); 4624 btrfs_tree_unlock(next);
@@ -3709,6 +4626,8 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3709 continue; 4626 continue;
3710 } 4627 }
3711 4628
4629 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
4630
3712 *level = btrfs_header_level(next); 4631 *level = btrfs_header_level(next);
3713 path->nodes[*level] = next; 4632 path->nodes[*level] = next;
3714 path->slots[*level] = 0; 4633 path->slots[*level] = 0;
@@ -3716,13 +4635,15 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3716 cond_resched(); 4635 cond_resched();
3717 } 4636 }
3718out: 4637out:
3719 parent = path->nodes[*level + 1]; 4638 if (path->nodes[*level] == root->node)
4639 parent = path->nodes[*level];
4640 else
4641 parent = path->nodes[*level + 1];
3720 bytenr = path->nodes[*level]->start; 4642 bytenr = path->nodes[*level]->start;
3721 blocksize = path->nodes[*level]->len; 4643 blocksize = path->nodes[*level]->len;
3722 4644
3723 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 4645 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start,
3724 parent->start, btrfs_header_owner(parent), 4646 btrfs_header_owner(parent), *level, 0);
3725 btrfs_header_generation(parent), *level, 1);
3726 BUG_ON(ret); 4647 BUG_ON(ret);
3727 4648
3728 if (path->locks[*level]) { 4649 if (path->locks[*level]) {
@@ -3746,8 +4667,6 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3746 struct btrfs_path *path, 4667 struct btrfs_path *path,
3747 int *level, int max_level) 4668 int *level, int max_level)
3748{ 4669{
3749 u64 root_owner;
3750 u64 root_gen;
3751 struct btrfs_root_item *root_item = &root->root_item; 4670 struct btrfs_root_item *root_item = &root->root_item;
3752 int i; 4671 int i;
3753 int slot; 4672 int slot;
@@ -3755,24 +4674,22 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3755 4674
3756 for (i = *level; i < max_level && path->nodes[i]; i++) { 4675 for (i = *level; i < max_level && path->nodes[i]; i++) {
3757 slot = path->slots[i]; 4676 slot = path->slots[i];
3758 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 4677 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
3759 struct extent_buffer *node;
3760 struct btrfs_disk_key disk_key;
3761
3762 /* 4678 /*
3763 * there is more work to do in this level. 4679 * there is more work to do in this level.
3764 * Update the drop_progress marker to reflect 4680 * Update the drop_progress marker to reflect
3765 * the work we've done so far, and then bump 4681 * the work we've done so far, and then bump
3766 * the slot number 4682 * the slot number
3767 */ 4683 */
3768 node = path->nodes[i];
3769 path->slots[i]++; 4684 path->slots[i]++;
3770 *level = i;
3771 WARN_ON(*level == 0); 4685 WARN_ON(*level == 0);
3772 btrfs_node_key(node, &disk_key, path->slots[i]); 4686 if (max_level == BTRFS_MAX_LEVEL) {
3773 memcpy(&root_item->drop_progress, 4687 btrfs_node_key(path->nodes[i],
3774 &disk_key, sizeof(disk_key)); 4688 &root_item->drop_progress,
3775 root_item->drop_level = i; 4689 path->slots[i]);
4690 root_item->drop_level = i;
4691 }
4692 *level = i;
3776 return 0; 4693 return 0;
3777 } else { 4694 } else {
3778 struct extent_buffer *parent; 4695 struct extent_buffer *parent;
@@ -3786,22 +4703,20 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3786 else 4703 else
3787 parent = path->nodes[*level + 1]; 4704 parent = path->nodes[*level + 1];
3788 4705
3789 root_owner = btrfs_header_owner(parent); 4706 clean_tree_block(trans, root, path->nodes[i]);
3790 root_gen = btrfs_header_generation(parent);
3791
3792 clean_tree_block(trans, root, path->nodes[*level]);
3793 ret = btrfs_free_extent(trans, root, 4707 ret = btrfs_free_extent(trans, root,
3794 path->nodes[*level]->start, 4708 path->nodes[i]->start,
3795 path->nodes[*level]->len, 4709 path->nodes[i]->len,
3796 parent->start, root_owner, 4710 parent->start,
3797 root_gen, *level, 1); 4711 btrfs_header_owner(parent),
4712 *level, 0);
3798 BUG_ON(ret); 4713 BUG_ON(ret);
3799 if (path->locks[*level]) { 4714 if (path->locks[*level]) {
3800 btrfs_tree_unlock(path->nodes[*level]); 4715 btrfs_tree_unlock(path->nodes[i]);
3801 path->locks[*level] = 0; 4716 path->locks[i] = 0;
3802 } 4717 }
3803 free_extent_buffer(path->nodes[*level]); 4718 free_extent_buffer(path->nodes[i]);
3804 path->nodes[*level] = NULL; 4719 path->nodes[i] = NULL;
3805 *level = i + 1; 4720 *level = i + 1;
3806 } 4721 }
3807 } 4722 }
@@ -3820,21 +4735,18 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3820 int wret; 4735 int wret;
3821 int level; 4736 int level;
3822 struct btrfs_path *path; 4737 struct btrfs_path *path;
3823 int i;
3824 int orig_level;
3825 int update_count; 4738 int update_count;
3826 struct btrfs_root_item *root_item = &root->root_item; 4739 struct btrfs_root_item *root_item = &root->root_item;
3827 4740
3828 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3829 path = btrfs_alloc_path(); 4741 path = btrfs_alloc_path();
3830 BUG_ON(!path); 4742 BUG_ON(!path);
3831 4743
3832 level = btrfs_header_level(root->node); 4744 level = btrfs_header_level(root->node);
3833 orig_level = level;
3834 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4745 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3835 path->nodes[level] = root->node; 4746 path->nodes[level] = btrfs_lock_root_node(root);
3836 extent_buffer_get(root->node); 4747 btrfs_set_lock_blocking(path->nodes[level]);
3837 path->slots[level] = 0; 4748 path->slots[level] = 0;
4749 path->locks[level] = 1;
3838 } else { 4750 } else {
3839 struct btrfs_key key; 4751 struct btrfs_key key;
3840 struct btrfs_disk_key found_key; 4752 struct btrfs_disk_key found_key;
@@ -3856,12 +4768,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3856 * unlock our path, this is safe because only this 4768 * unlock our path, this is safe because only this
3857 * function is allowed to delete this snapshot 4769 * function is allowed to delete this snapshot
3858 */ 4770 */
3859 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4771 btrfs_unlock_up_safe(path, 0);
3860 if (path->nodes[i] && path->locks[i]) {
3861 path->locks[i] = 0;
3862 btrfs_tree_unlock(path->nodes[i]);
3863 }
3864 }
3865 } 4772 }
3866 while (1) { 4773 while (1) {
3867 unsigned long update; 4774 unsigned long update;
@@ -3882,8 +4789,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3882 ret = -EAGAIN; 4789 ret = -EAGAIN;
3883 break; 4790 break;
3884 } 4791 }
3885 atomic_inc(&root->fs_info->throttle_gen);
3886 wake_up(&root->fs_info->transaction_throttle);
3887 for (update_count = 0; update_count < 16; update_count++) { 4792 for (update_count = 0; update_count < 16; update_count++) {
3888 update = trans->delayed_ref_updates; 4793 update = trans->delayed_ref_updates;
3889 trans->delayed_ref_updates = 0; 4794 trans->delayed_ref_updates = 0;
@@ -3893,12 +4798,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3893 break; 4798 break;
3894 } 4799 }
3895 } 4800 }
3896 for (i = 0; i <= orig_level; i++) {
3897 if (path->nodes[i]) {
3898 free_extent_buffer(path->nodes[i]);
3899 path->nodes[i] = NULL;
3900 }
3901 }
3902out: 4801out:
3903 btrfs_free_path(path); 4802 btrfs_free_path(path);
3904 return ret; 4803 return ret;
@@ -3931,7 +4830,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3931 path->slots[level] = 0; 4830 path->slots[level] = 0;
3932 4831
3933 while (1) { 4832 while (1) {
3934 wret = walk_down_subtree(trans, root, path, &level); 4833 wret = walk_down_tree(trans, root, path, &level);
3935 if (wret < 0) 4834 if (wret < 0)
3936 ret = wret; 4835 ret = wret;
3937 if (wret != 0) 4836 if (wret != 0)
@@ -3948,6 +4847,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3948 return ret; 4847 return ret;
3949} 4848}
3950 4849
4850#if 0
3951static unsigned long calc_ra(unsigned long start, unsigned long last, 4851static unsigned long calc_ra(unsigned long start, unsigned long last,
3952 unsigned long nr) 4852 unsigned long nr)
3953{ 4853{
@@ -5429,6 +6329,7 @@ out:
5429 kfree(ref_path); 6329 kfree(ref_path);
5430 return ret; 6330 return ret;
5431} 6331}
6332#endif
5432 6333
5433static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6334static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
5434{ 6335{
@@ -5477,7 +6378,8 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root,
5477 u64 calc; 6378 u64 calc;
5478 6379
5479 spin_lock(&shrink_block_group->lock); 6380 spin_lock(&shrink_block_group->lock);
5480 if (btrfs_block_group_used(&shrink_block_group->item) > 0) { 6381 if (btrfs_block_group_used(&shrink_block_group->item) +
6382 shrink_block_group->reserved > 0) {
5481 spin_unlock(&shrink_block_group->lock); 6383 spin_unlock(&shrink_block_group->lock);
5482 6384
5483 trans = btrfs_start_transaction(root, 1); 6385 trans = btrfs_start_transaction(root, 1);
@@ -5502,6 +6404,17 @@ static int __alloc_chunk_for_shrink(struct btrfs_root *root,
5502 return 0; 6404 return 0;
5503} 6405}
5504 6406
6407
6408int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
6409 struct btrfs_block_group_cache *group)
6410
6411{
6412 __alloc_chunk_for_shrink(root, group, 1);
6413 set_block_group_readonly(group);
6414 return 0;
6415}
6416
6417#if 0
5505static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6418static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
5506 struct btrfs_root *root, 6419 struct btrfs_root *root,
5507 u64 objectid, u64 size) 6420 u64 objectid, u64 size)
@@ -5781,6 +6694,7 @@ out:
5781 btrfs_free_path(path); 6694 btrfs_free_path(path);
5782 return ret; 6695 return ret;
5783} 6696}
6697#endif
5784 6698
5785static int find_first_block_group(struct btrfs_root *root, 6699static int find_first_block_group(struct btrfs_root *root,
5786 struct btrfs_path *path, struct btrfs_key *key) 6700 struct btrfs_path *path, struct btrfs_key *key)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1d51dc38bb49..0726a734ee38 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -291,16 +291,12 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
291{ 291{
292 u64 extent_end = 0; 292 u64 extent_end = 0;
293 u64 search_start = start; 293 u64 search_start = start;
294 u64 leaf_start;
295 u64 ram_bytes = 0; 294 u64 ram_bytes = 0;
296 u64 orig_parent = 0;
297 u64 disk_bytenr = 0; 295 u64 disk_bytenr = 0;
298 u64 orig_locked_end = locked_end; 296 u64 orig_locked_end = locked_end;
299 u8 compression; 297 u8 compression;
300 u8 encryption; 298 u8 encryption;
301 u16 other_encoding = 0; 299 u16 other_encoding = 0;
302 u64 root_gen;
303 u64 root_owner;
304 struct extent_buffer *leaf; 300 struct extent_buffer *leaf;
305 struct btrfs_file_extent_item *extent; 301 struct btrfs_file_extent_item *extent;
306 struct btrfs_path *path; 302 struct btrfs_path *path;
@@ -340,9 +336,6 @@ next_slot:
340 bookend = 0; 336 bookend = 0;
341 found_extent = 0; 337 found_extent = 0;
342 found_inline = 0; 338 found_inline = 0;
343 leaf_start = 0;
344 root_gen = 0;
345 root_owner = 0;
346 compression = 0; 339 compression = 0;
347 encryption = 0; 340 encryption = 0;
348 extent = NULL; 341 extent = NULL;
@@ -417,9 +410,6 @@ next_slot:
417 if (found_extent) { 410 if (found_extent) {
418 read_extent_buffer(leaf, &old, (unsigned long)extent, 411 read_extent_buffer(leaf, &old, (unsigned long)extent,
419 sizeof(old)); 412 sizeof(old));
420 root_gen = btrfs_header_generation(leaf);
421 root_owner = btrfs_header_owner(leaf);
422 leaf_start = leaf->start;
423 } 413 }
424 414
425 if (end < extent_end && end >= key.offset) { 415 if (end < extent_end && end >= key.offset) {
@@ -443,14 +433,14 @@ next_slot:
443 } 433 }
444 locked_end = extent_end; 434 locked_end = extent_end;
445 } 435 }
446 orig_parent = path->nodes[0]->start;
447 disk_bytenr = le64_to_cpu(old.disk_bytenr); 436 disk_bytenr = le64_to_cpu(old.disk_bytenr);
448 if (disk_bytenr != 0) { 437 if (disk_bytenr != 0) {
449 ret = btrfs_inc_extent_ref(trans, root, 438 ret = btrfs_inc_extent_ref(trans, root,
450 disk_bytenr, 439 disk_bytenr,
451 le64_to_cpu(old.disk_num_bytes), 440 le64_to_cpu(old.disk_num_bytes), 0,
452 orig_parent, root->root_key.objectid, 441 root->root_key.objectid,
453 trans->transid, inode->i_ino); 442 key.objectid, key.offset -
443 le64_to_cpu(old.offset));
454 BUG_ON(ret); 444 BUG_ON(ret);
455 } 445 }
456 } 446 }
@@ -568,17 +558,6 @@ next_slot:
568 btrfs_mark_buffer_dirty(path->nodes[0]); 558 btrfs_mark_buffer_dirty(path->nodes[0]);
569 btrfs_set_lock_blocking(path->nodes[0]); 559 btrfs_set_lock_blocking(path->nodes[0]);
570 560
571 if (disk_bytenr != 0) {
572 ret = btrfs_update_extent_ref(trans, root,
573 disk_bytenr,
574 le64_to_cpu(old.disk_num_bytes),
575 orig_parent,
576 leaf->start,
577 root->root_key.objectid,
578 trans->transid, ins.objectid);
579
580 BUG_ON(ret);
581 }
582 path->leave_spinning = 0; 561 path->leave_spinning = 0;
583 btrfs_release_path(root, path); 562 btrfs_release_path(root, path);
584 if (disk_bytenr != 0) 563 if (disk_bytenr != 0)
@@ -594,8 +573,9 @@ next_slot:
594 ret = btrfs_free_extent(trans, root, 573 ret = btrfs_free_extent(trans, root,
595 old_disk_bytenr, 574 old_disk_bytenr,
596 le64_to_cpu(old.disk_num_bytes), 575 le64_to_cpu(old.disk_num_bytes),
597 leaf_start, root_owner, 576 0, root->root_key.objectid,
598 root_gen, key.objectid, 0); 577 key.objectid, key.offset -
578 le64_to_cpu(old.offset));
599 BUG_ON(ret); 579 BUG_ON(ret);
600 *hint_byte = old_disk_bytenr; 580 *hint_byte = old_disk_bytenr;
601 } 581 }
@@ -664,12 +644,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
664 u64 bytenr; 644 u64 bytenr;
665 u64 num_bytes; 645 u64 num_bytes;
666 u64 extent_end; 646 u64 extent_end;
667 u64 extent_offset; 647 u64 orig_offset;
668 u64 other_start; 648 u64 other_start;
669 u64 other_end; 649 u64 other_end;
670 u64 split = start; 650 u64 split = start;
671 u64 locked_end = end; 651 u64 locked_end = end;
672 u64 orig_parent;
673 int extent_type; 652 int extent_type;
674 int split_end = 1; 653 int split_end = 1;
675 int ret; 654 int ret;
@@ -703,7 +682,7 @@ again:
703 682
704 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 683 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
705 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 684 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
706 extent_offset = btrfs_file_extent_offset(leaf, fi); 685 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
707 686
708 if (key.offset == start) 687 if (key.offset == start)
709 split = end; 688 split = end;
@@ -711,8 +690,6 @@ again:
711 if (key.offset == start && extent_end == end) { 690 if (key.offset == start && extent_end == end) {
712 int del_nr = 0; 691 int del_nr = 0;
713 int del_slot = 0; 692 int del_slot = 0;
714 u64 leaf_owner = btrfs_header_owner(leaf);
715 u64 leaf_gen = btrfs_header_generation(leaf);
716 other_start = end; 693 other_start = end;
717 other_end = 0; 694 other_end = 0;
718 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 695 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
@@ -721,8 +698,8 @@ again:
721 del_slot = path->slots[0] + 1; 698 del_slot = path->slots[0] + 1;
722 del_nr++; 699 del_nr++;
723 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 700 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
724 leaf->start, leaf_owner, 701 0, root->root_key.objectid,
725 leaf_gen, inode->i_ino, 0); 702 inode->i_ino, orig_offset);
726 BUG_ON(ret); 703 BUG_ON(ret);
727 } 704 }
728 other_start = 0; 705 other_start = 0;
@@ -733,8 +710,8 @@ again:
733 del_slot = path->slots[0]; 710 del_slot = path->slots[0];
734 del_nr++; 711 del_nr++;
735 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 712 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
736 leaf->start, leaf_owner, 713 0, root->root_key.objectid,
737 leaf_gen, inode->i_ino, 0); 714 inode->i_ino, orig_offset);
738 BUG_ON(ret); 715 BUG_ON(ret);
739 } 716 }
740 split_end = 0; 717 split_end = 0;
@@ -768,13 +745,12 @@ again:
768 locked_end = extent_end; 745 locked_end = extent_end;
769 } 746 }
770 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset); 747 btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
771 extent_offset += split - key.offset;
772 } else { 748 } else {
773 BUG_ON(key.offset != start); 749 BUG_ON(key.offset != start);
774 btrfs_set_file_extent_offset(leaf, fi, extent_offset +
775 split - key.offset);
776 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
777 key.offset = split; 750 key.offset = split;
751 btrfs_set_file_extent_offset(leaf, fi, key.offset -
752 orig_offset);
753 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
778 btrfs_set_item_key_safe(trans, root, path, &key); 754 btrfs_set_item_key_safe(trans, root, path, &key);
779 extent_end = split; 755 extent_end = split;
780 } 756 }
@@ -793,7 +769,8 @@ again:
793 struct btrfs_file_extent_item); 769 struct btrfs_file_extent_item);
794 key.offset = split; 770 key.offset = split;
795 btrfs_set_item_key_safe(trans, root, path, &key); 771 btrfs_set_item_key_safe(trans, root, path, &key);
796 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 772 btrfs_set_file_extent_offset(leaf, fi, key.offset -
773 orig_offset);
797 btrfs_set_file_extent_num_bytes(leaf, fi, 774 btrfs_set_file_extent_num_bytes(leaf, fi,
798 other_end - split); 775 other_end - split);
799 goto done; 776 goto done;
@@ -815,10 +792,9 @@ again:
815 792
816 btrfs_mark_buffer_dirty(leaf); 793 btrfs_mark_buffer_dirty(leaf);
817 794
818 orig_parent = leaf->start; 795 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
819 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 796 root->root_key.objectid,
820 orig_parent, root->root_key.objectid, 797 inode->i_ino, orig_offset);
821 trans->transid, inode->i_ino);
822 BUG_ON(ret); 798 BUG_ON(ret);
823 btrfs_release_path(root, path); 799 btrfs_release_path(root, path);
824 800
@@ -833,20 +809,12 @@ again:
833 btrfs_set_file_extent_type(leaf, fi, extent_type); 809 btrfs_set_file_extent_type(leaf, fi, extent_type);
834 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr); 810 btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
835 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes); 811 btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
836 btrfs_set_file_extent_offset(leaf, fi, extent_offset); 812 btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
837 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset); 813 btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
838 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); 814 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
839 btrfs_set_file_extent_compression(leaf, fi, 0); 815 btrfs_set_file_extent_compression(leaf, fi, 0);
840 btrfs_set_file_extent_encryption(leaf, fi, 0); 816 btrfs_set_file_extent_encryption(leaf, fi, 0);
841 btrfs_set_file_extent_other_encoding(leaf, fi, 0); 817 btrfs_set_file_extent_other_encoding(leaf, fi, 0);
842
843 if (orig_parent != leaf->start) {
844 ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
845 orig_parent, leaf->start,
846 root->root_key.objectid,
847 trans->transid, inode->i_ino);
848 BUG_ON(ret);
849 }
850done: 818done:
851 btrfs_mark_buffer_dirty(leaf); 819 btrfs_mark_buffer_dirty(leaf);
852 820
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1c8b0190d031..917bf10597c6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -48,7 +48,6 @@
48#include "ordered-data.h" 48#include "ordered-data.h"
49#include "xattr.h" 49#include "xattr.h"
50#include "tree-log.h" 50#include "tree-log.h"
51#include "ref-cache.h"
52#include "compression.h" 51#include "compression.h"
53#include "locking.h" 52#include "locking.h"
54 53
@@ -944,6 +943,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
944 u64 cow_start; 943 u64 cow_start;
945 u64 cur_offset; 944 u64 cur_offset;
946 u64 extent_end; 945 u64 extent_end;
946 u64 extent_offset;
947 u64 disk_bytenr; 947 u64 disk_bytenr;
948 u64 num_bytes; 948 u64 num_bytes;
949 int extent_type; 949 int extent_type;
@@ -1005,6 +1005,7 @@ next_slot:
1005 if (extent_type == BTRFS_FILE_EXTENT_REG || 1005 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1006 extent_type == BTRFS_FILE_EXTENT_PREALLOC) { 1006 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1007 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1007 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1008 extent_offset = btrfs_file_extent_offset(leaf, fi);
1008 extent_end = found_key.offset + 1009 extent_end = found_key.offset +
1009 btrfs_file_extent_num_bytes(leaf, fi); 1010 btrfs_file_extent_num_bytes(leaf, fi);
1010 if (extent_end <= start) { 1011 if (extent_end <= start) {
@@ -1022,9 +1023,10 @@ next_slot:
1022 if (btrfs_extent_readonly(root, disk_bytenr)) 1023 if (btrfs_extent_readonly(root, disk_bytenr))
1023 goto out_check; 1024 goto out_check;
1024 if (btrfs_cross_ref_exist(trans, root, inode->i_ino, 1025 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
1025 disk_bytenr)) 1026 found_key.offset -
1027 extent_offset, disk_bytenr))
1026 goto out_check; 1028 goto out_check;
1027 disk_bytenr += btrfs_file_extent_offset(leaf, fi); 1029 disk_bytenr += extent_offset;
1028 disk_bytenr += cur_offset - found_key.offset; 1030 disk_bytenr += cur_offset - found_key.offset;
1029 num_bytes = min(end + 1, extent_end) - cur_offset; 1031 num_bytes = min(end + 1, extent_end) - cur_offset;
1030 /* 1032 /*
@@ -1489,9 +1491,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1489 ins.objectid = disk_bytenr; 1491 ins.objectid = disk_bytenr;
1490 ins.offset = disk_num_bytes; 1492 ins.offset = disk_num_bytes;
1491 ins.type = BTRFS_EXTENT_ITEM_KEY; 1493 ins.type = BTRFS_EXTENT_ITEM_KEY;
1492 ret = btrfs_alloc_reserved_extent(trans, root, leaf->start, 1494 ret = btrfs_alloc_reserved_file_extent(trans, root,
1493 root->root_key.objectid, 1495 root->root_key.objectid,
1494 trans->transid, inode->i_ino, &ins); 1496 inode->i_ino, file_pos, &ins);
1495 BUG_ON(ret); 1497 BUG_ON(ret);
1496 btrfs_free_path(path); 1498 btrfs_free_path(path);
1497 1499
@@ -1956,23 +1958,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
1956 * crossing root thing. we store the inode number in the 1958 * crossing root thing. we store the inode number in the
1957 * offset of the orphan item. 1959 * offset of the orphan item.
1958 */ 1960 */
1959 inode = btrfs_iget_locked(root->fs_info->sb, 1961 found_key.objectid = found_key.offset;
1960 found_key.offset, root); 1962 found_key.type = BTRFS_INODE_ITEM_KEY;
1961 if (!inode) 1963 found_key.offset = 0;
1964 inode = btrfs_iget(root->fs_info->sb, &found_key, root);
1965 if (IS_ERR(inode))
1962 break; 1966 break;
1963 1967
1964 if (inode->i_state & I_NEW) {
1965 BTRFS_I(inode)->root = root;
1966
1967 /* have to set the location manually */
1968 BTRFS_I(inode)->location.objectid = inode->i_ino;
1969 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
1970 BTRFS_I(inode)->location.offset = 0;
1971
1972 btrfs_read_locked_inode(inode);
1973 unlock_new_inode(inode);
1974 }
1975
1976 /* 1968 /*
1977 * add this inode to the orphan list so btrfs_orphan_del does 1969 * add this inode to the orphan list so btrfs_orphan_del does
1978 * the proper thing when we hit it 1970 * the proper thing when we hit it
@@ -2069,7 +2061,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
2069/* 2061/*
2070 * read an inode from the btree into the in-memory inode 2062 * read an inode from the btree into the in-memory inode
2071 */ 2063 */
2072void btrfs_read_locked_inode(struct inode *inode) 2064static void btrfs_read_locked_inode(struct inode *inode)
2073{ 2065{
2074 struct btrfs_path *path; 2066 struct btrfs_path *path;
2075 struct extent_buffer *leaf; 2067 struct extent_buffer *leaf;
@@ -2599,9 +2591,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2599 struct btrfs_file_extent_item *fi; 2591 struct btrfs_file_extent_item *fi;
2600 u64 extent_start = 0; 2592 u64 extent_start = 0;
2601 u64 extent_num_bytes = 0; 2593 u64 extent_num_bytes = 0;
2594 u64 extent_offset = 0;
2602 u64 item_end = 0; 2595 u64 item_end = 0;
2603 u64 root_gen = 0;
2604 u64 root_owner = 0;
2605 int found_extent; 2596 int found_extent;
2606 int del_item; 2597 int del_item;
2607 int pending_del_nr = 0; 2598 int pending_del_nr = 0;
@@ -2716,6 +2707,9 @@ search_again:
2716 extent_num_bytes = 2707 extent_num_bytes =
2717 btrfs_file_extent_disk_num_bytes(leaf, 2708 btrfs_file_extent_disk_num_bytes(leaf,
2718 fi); 2709 fi);
2710 extent_offset = found_key.offset -
2711 btrfs_file_extent_offset(leaf, fi);
2712
2719 /* FIXME blocksize != 4096 */ 2713 /* FIXME blocksize != 4096 */
2720 num_dec = btrfs_file_extent_num_bytes(leaf, fi); 2714 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
2721 if (extent_start != 0) { 2715 if (extent_start != 0) {
@@ -2723,8 +2717,6 @@ search_again:
2723 if (root->ref_cows) 2717 if (root->ref_cows)
2724 inode_sub_bytes(inode, num_dec); 2718 inode_sub_bytes(inode, num_dec);
2725 } 2719 }
2726 root_gen = btrfs_header_generation(leaf);
2727 root_owner = btrfs_header_owner(leaf);
2728 } 2720 }
2729 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 2721 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
2730 /* 2722 /*
@@ -2768,12 +2760,12 @@ delete:
2768 } else { 2760 } else {
2769 break; 2761 break;
2770 } 2762 }
2771 if (found_extent) { 2763 if (found_extent && root->ref_cows) {
2772 btrfs_set_path_blocking(path); 2764 btrfs_set_path_blocking(path);
2773 ret = btrfs_free_extent(trans, root, extent_start, 2765 ret = btrfs_free_extent(trans, root, extent_start,
2774 extent_num_bytes, 2766 extent_num_bytes, 0,
2775 leaf->start, root_owner, 2767 btrfs_header_owner(leaf),
2776 root_gen, inode->i_ino, 0); 2768 inode->i_ino, extent_offset);
2777 BUG_ON(ret); 2769 BUG_ON(ret);
2778 } 2770 }
2779next: 2771next:
@@ -3105,6 +3097,45 @@ static int fixup_tree_root_location(struct btrfs_root *root,
3105 return 0; 3097 return 0;
3106} 3098}
3107 3099
3100static void inode_tree_add(struct inode *inode)
3101{
3102 struct btrfs_root *root = BTRFS_I(inode)->root;
3103 struct btrfs_inode *entry;
3104 struct rb_node **p = &root->inode_tree.rb_node;
3105 struct rb_node *parent = NULL;
3106
3107 spin_lock(&root->inode_lock);
3108 while (*p) {
3109 parent = *p;
3110 entry = rb_entry(parent, struct btrfs_inode, rb_node);
3111
3112 if (inode->i_ino < entry->vfs_inode.i_ino)
3113 p = &(*p)->rb_left;
3114 else if (inode->i_ino > entry->vfs_inode.i_ino)
3115 p = &(*p)->rb_right;
3116 else {
3117 WARN_ON(!(entry->vfs_inode.i_state &
3118 (I_WILL_FREE | I_FREEING | I_CLEAR)));
3119 break;
3120 }
3121 }
3122 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
3123 rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree);
3124 spin_unlock(&root->inode_lock);
3125}
3126
3127static void inode_tree_del(struct inode *inode)
3128{
3129 struct btrfs_root *root = BTRFS_I(inode)->root;
3130
3131 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
3132 spin_lock(&root->inode_lock);
3133 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
3134 spin_unlock(&root->inode_lock);
3135 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3136 }
3137}
3138
3108static noinline void init_btrfs_i(struct inode *inode) 3139static noinline void init_btrfs_i(struct inode *inode)
3109{ 3140{
3110 struct btrfs_inode *bi = BTRFS_I(inode); 3141 struct btrfs_inode *bi = BTRFS_I(inode);
@@ -3130,6 +3161,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3130 inode->i_mapping, GFP_NOFS); 3161 inode->i_mapping, GFP_NOFS);
3131 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); 3162 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3132 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); 3163 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3164 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3133 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3165 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3134 mutex_init(&BTRFS_I(inode)->extent_mutex); 3166 mutex_init(&BTRFS_I(inode)->extent_mutex);
3135 mutex_init(&BTRFS_I(inode)->log_mutex); 3167 mutex_init(&BTRFS_I(inode)->log_mutex);
@@ -3152,26 +3184,9 @@ static int btrfs_find_actor(struct inode *inode, void *opaque)
3152 args->root == BTRFS_I(inode)->root; 3184 args->root == BTRFS_I(inode)->root;
3153} 3185}
3154 3186
3155struct inode *btrfs_ilookup(struct super_block *s, u64 objectid, 3187static struct inode *btrfs_iget_locked(struct super_block *s,
3156 struct btrfs_root *root, int wait) 3188 u64 objectid,
3157{ 3189 struct btrfs_root *root)
3158 struct inode *inode;
3159 struct btrfs_iget_args args;
3160 args.ino = objectid;
3161 args.root = root;
3162
3163 if (wait) {
3164 inode = ilookup5(s, objectid, btrfs_find_actor,
3165 (void *)&args);
3166 } else {
3167 inode = ilookup5_nowait(s, objectid, btrfs_find_actor,
3168 (void *)&args);
3169 }
3170 return inode;
3171}
3172
3173struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
3174 struct btrfs_root *root)
3175{ 3190{
3176 struct inode *inode; 3191 struct inode *inode;
3177 struct btrfs_iget_args args; 3192 struct btrfs_iget_args args;
@@ -3188,24 +3203,21 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
3188 * Returns in *is_new if the inode was read from disk 3203 * Returns in *is_new if the inode was read from disk
3189 */ 3204 */
3190struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3205struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3191 struct btrfs_root *root, int *is_new) 3206 struct btrfs_root *root)
3192{ 3207{
3193 struct inode *inode; 3208 struct inode *inode;
3194 3209
3195 inode = btrfs_iget_locked(s, location->objectid, root); 3210 inode = btrfs_iget_locked(s, location->objectid, root);
3196 if (!inode) 3211 if (!inode)
3197 return ERR_PTR(-EACCES); 3212 return ERR_PTR(-ENOMEM);
3198 3213
3199 if (inode->i_state & I_NEW) { 3214 if (inode->i_state & I_NEW) {
3200 BTRFS_I(inode)->root = root; 3215 BTRFS_I(inode)->root = root;
3201 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); 3216 memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
3202 btrfs_read_locked_inode(inode); 3217 btrfs_read_locked_inode(inode);
3218
3219 inode_tree_add(inode);
3203 unlock_new_inode(inode); 3220 unlock_new_inode(inode);
3204 if (is_new)
3205 *is_new = 1;
3206 } else {
3207 if (is_new)
3208 *is_new = 0;
3209 } 3221 }
3210 3222
3211 return inode; 3223 return inode;
@@ -3218,7 +3230,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3218 struct btrfs_root *root = bi->root; 3230 struct btrfs_root *root = bi->root;
3219 struct btrfs_root *sub_root = root; 3231 struct btrfs_root *sub_root = root;
3220 struct btrfs_key location; 3232 struct btrfs_key location;
3221 int ret, new; 3233 int ret;
3222 3234
3223 if (dentry->d_name.len > BTRFS_NAME_LEN) 3235 if (dentry->d_name.len > BTRFS_NAME_LEN)
3224 return ERR_PTR(-ENAMETOOLONG); 3236 return ERR_PTR(-ENAMETOOLONG);
@@ -3236,7 +3248,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3236 return ERR_PTR(ret); 3248 return ERR_PTR(ret);
3237 if (ret > 0) 3249 if (ret > 0)
3238 return ERR_PTR(-ENOENT); 3250 return ERR_PTR(-ENOENT);
3239 inode = btrfs_iget(dir->i_sb, &location, sub_root, &new); 3251 inode = btrfs_iget(dir->i_sb, &location, sub_root);
3240 if (IS_ERR(inode)) 3252 if (IS_ERR(inode))
3241 return ERR_CAST(inode); 3253 return ERR_CAST(inode);
3242 } 3254 }
@@ -3631,6 +3643,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3631 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); 3643 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
3632 3644
3633 insert_inode_hash(inode); 3645 insert_inode_hash(inode);
3646 inode_tree_add(inode);
3634 return inode; 3647 return inode;
3635fail: 3648fail:
3636 if (dir) 3649 if (dir)
@@ -4683,6 +4696,7 @@ void btrfs_destroy_inode(struct inode *inode)
4683 btrfs_put_ordered_extent(ordered); 4696 btrfs_put_ordered_extent(ordered);
4684 } 4697 }
4685 } 4698 }
4699 inode_tree_del(inode);
4686 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 4700 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
4687 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 4701 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
4688} 4702}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2624b53ea783..54dfd45cc591 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -82,22 +82,25 @@ static noinline int create_subvol(struct btrfs_root *root,
82 if (ret) 82 if (ret)
83 goto fail; 83 goto fail;
84 84
85 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 85 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
86 objectid, trans->transid, 0, 0, 0); 86 0, objectid, NULL, 0, 0, 0);
87 if (IS_ERR(leaf)) { 87 if (IS_ERR(leaf)) {
88 ret = PTR_ERR(leaf); 88 ret = PTR_ERR(leaf);
89 goto fail; 89 goto fail;
90 } 90 }
91 91
92 btrfs_set_header_nritems(leaf, 0); 92 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
93 btrfs_set_header_level(leaf, 0);
94 btrfs_set_header_bytenr(leaf, leaf->start); 93 btrfs_set_header_bytenr(leaf, leaf->start);
95 btrfs_set_header_generation(leaf, trans->transid); 94 btrfs_set_header_generation(leaf, trans->transid);
95 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
96 btrfs_set_header_owner(leaf, objectid); 96 btrfs_set_header_owner(leaf, objectid);
97 97
98 write_extent_buffer(leaf, root->fs_info->fsid, 98 write_extent_buffer(leaf, root->fs_info->fsid,
99 (unsigned long)btrfs_header_fsid(leaf), 99 (unsigned long)btrfs_header_fsid(leaf),
100 BTRFS_FSID_SIZE); 100 BTRFS_FSID_SIZE);
101 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
102 (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
103 BTRFS_UUID_SIZE);
101 btrfs_mark_buffer_dirty(leaf); 104 btrfs_mark_buffer_dirty(leaf);
102 105
103 inode_item = &root_item.inode; 106 inode_item = &root_item.inode;
@@ -125,7 +128,7 @@ static noinline int create_subvol(struct btrfs_root *root,
125 btrfs_set_root_dirid(&root_item, new_dirid); 128 btrfs_set_root_dirid(&root_item, new_dirid);
126 129
127 key.objectid = objectid; 130 key.objectid = objectid;
128 key.offset = 1; 131 key.offset = 0;
129 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 132 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
130 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 133 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
131 &root_item); 134 &root_item);
@@ -911,10 +914,10 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
911 if (disko) { 914 if (disko) {
912 inode_add_bytes(inode, datal); 915 inode_add_bytes(inode, datal);
913 ret = btrfs_inc_extent_ref(trans, root, 916 ret = btrfs_inc_extent_ref(trans, root,
914 disko, diskl, leaf->start, 917 disko, diskl, 0,
915 root->root_key.objectid, 918 root->root_key.objectid,
916 trans->transid, 919 inode->i_ino,
917 inode->i_ino); 920 new_key.offset - datao);
918 BUG_ON(ret); 921 BUG_ON(ret);
919 } 922 }
920 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 923 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 5f8f218c1005..6d6523da0a30 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -45,22 +45,132 @@ static void print_dev_item(struct extent_buffer *eb,
45 (unsigned long long)btrfs_device_total_bytes(eb, dev_item), 45 (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
46 (unsigned long long)btrfs_device_bytes_used(eb, dev_item)); 46 (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
47} 47}
48static void print_extent_data_ref(struct extent_buffer *eb,
49 struct btrfs_extent_data_ref *ref)
50{
51 printk(KERN_INFO "\t\textent data backref root %llu "
52 "objectid %llu offset %llu count %u\n",
53 (unsigned long long)btrfs_extent_data_ref_root(eb, ref),
54 (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref),
55 (unsigned long long)btrfs_extent_data_ref_offset(eb, ref),
56 btrfs_extent_data_ref_count(eb, ref));
57}
58
59static void print_extent_item(struct extent_buffer *eb, int slot)
60{
61 struct btrfs_extent_item *ei;
62 struct btrfs_extent_inline_ref *iref;
63 struct btrfs_extent_data_ref *dref;
64 struct btrfs_shared_data_ref *sref;
65 struct btrfs_disk_key key;
66 unsigned long end;
67 unsigned long ptr;
68 int type;
69 u32 item_size = btrfs_item_size_nr(eb, slot);
70 u64 flags;
71 u64 offset;
72
73 if (item_size < sizeof(*ei)) {
74#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
75 struct btrfs_extent_item_v0 *ei0;
76 BUG_ON(item_size != sizeof(*ei0));
77 ei0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_item_v0);
78 printk(KERN_INFO "\t\textent refs %u\n",
79 btrfs_extent_refs_v0(eb, ei0));
80 return;
81#else
82 BUG();
83#endif
84 }
85
86 ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
87 flags = btrfs_extent_flags(eb, ei);
88
89 printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
90 (unsigned long long)btrfs_extent_refs(eb, ei),
91 (unsigned long long)btrfs_extent_generation(eb, ei),
92 (unsigned long long)flags);
93
94 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
95 struct btrfs_tree_block_info *info;
96 info = (struct btrfs_tree_block_info *)(ei + 1);
97 btrfs_tree_block_key(eb, info, &key);
98 printk(KERN_INFO "\t\ttree block key (%llu %x %llu) "
99 "level %d\n",
100 (unsigned long long)btrfs_disk_key_objectid(&key),
101 key.type,
102 (unsigned long long)btrfs_disk_key_offset(&key),
103 btrfs_tree_block_level(eb, info));
104 iref = (struct btrfs_extent_inline_ref *)(info + 1);
105 } else {
106 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
107 }
108
109 ptr = (unsigned long)iref;
110 end = (unsigned long)ei + item_size;
111 while (ptr < end) {
112 iref = (struct btrfs_extent_inline_ref *)ptr;
113 type = btrfs_extent_inline_ref_type(eb, iref);
114 offset = btrfs_extent_inline_ref_offset(eb, iref);
115 switch (type) {
116 case BTRFS_TREE_BLOCK_REF_KEY:
117 printk(KERN_INFO "\t\ttree block backref "
118 "root %llu\n", (unsigned long long)offset);
119 break;
120 case BTRFS_SHARED_BLOCK_REF_KEY:
121 printk(KERN_INFO "\t\tshared block backref "
122 "parent %llu\n", (unsigned long long)offset);
123 break;
124 case BTRFS_EXTENT_DATA_REF_KEY:
125 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
126 print_extent_data_ref(eb, dref);
127 break;
128 case BTRFS_SHARED_DATA_REF_KEY:
129 sref = (struct btrfs_shared_data_ref *)(iref + 1);
130 printk(KERN_INFO "\t\tshared data backref "
131 "parent %llu count %u\n",
132 (unsigned long long)offset,
133 btrfs_shared_data_ref_count(eb, sref));
134 break;
135 default:
136 BUG();
137 }
138 ptr += btrfs_extent_inline_ref_size(type);
139 }
140 WARN_ON(ptr > end);
141}
142
143#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
144static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
145{
146 struct btrfs_extent_ref_v0 *ref0;
147
148 ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
149 printk("\t\textent back ref root %llu gen %llu "
150 "owner %llu num_refs %lu\n",
151 (unsigned long long)btrfs_ref_root_v0(eb, ref0),
152 (unsigned long long)btrfs_ref_generation_v0(eb, ref0),
153 (unsigned long long)btrfs_ref_objectid_v0(eb, ref0),
154 (unsigned long)btrfs_ref_count_v0(eb, ref0));
155}
156#endif
157
48void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) 158void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
49{ 159{
50 int i; 160 int i;
161 u32 type;
51 u32 nr = btrfs_header_nritems(l); 162 u32 nr = btrfs_header_nritems(l);
52 struct btrfs_item *item; 163 struct btrfs_item *item;
53 struct btrfs_extent_item *ei;
54 struct btrfs_root_item *ri; 164 struct btrfs_root_item *ri;
55 struct btrfs_dir_item *di; 165 struct btrfs_dir_item *di;
56 struct btrfs_inode_item *ii; 166 struct btrfs_inode_item *ii;
57 struct btrfs_block_group_item *bi; 167 struct btrfs_block_group_item *bi;
58 struct btrfs_file_extent_item *fi; 168 struct btrfs_file_extent_item *fi;
169 struct btrfs_extent_data_ref *dref;
170 struct btrfs_shared_data_ref *sref;
171 struct btrfs_dev_extent *dev_extent;
59 struct btrfs_key key; 172 struct btrfs_key key;
60 struct btrfs_key found_key; 173 struct btrfs_key found_key;
61 struct btrfs_extent_ref *ref;
62 struct btrfs_dev_extent *dev_extent;
63 u32 type;
64 174
65 printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n", 175 printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
66 (unsigned long long)btrfs_header_bytenr(l), nr, 176 (unsigned long long)btrfs_header_bytenr(l), nr,
@@ -100,20 +210,25 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
100 btrfs_disk_root_refs(l, ri)); 210 btrfs_disk_root_refs(l, ri));
101 break; 211 break;
102 case BTRFS_EXTENT_ITEM_KEY: 212 case BTRFS_EXTENT_ITEM_KEY:
103 ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); 213 print_extent_item(l, i);
104 printk(KERN_INFO "\t\textent data refs %u\n", 214 break;
105 btrfs_extent_refs(l, ei)); 215 case BTRFS_TREE_BLOCK_REF_KEY:
106 break; 216 printk(KERN_INFO "\t\ttree block backref\n");
107 case BTRFS_EXTENT_REF_KEY: 217 break;
108 ref = btrfs_item_ptr(l, i, struct btrfs_extent_ref); 218 case BTRFS_SHARED_BLOCK_REF_KEY:
109 printk(KERN_INFO "\t\textent back ref root %llu " 219 printk(KERN_INFO "\t\tshared block backref\n");
110 "gen %llu owner %llu num_refs %lu\n", 220 break;
111 (unsigned long long)btrfs_ref_root(l, ref), 221 case BTRFS_EXTENT_DATA_REF_KEY:
112 (unsigned long long)btrfs_ref_generation(l, ref), 222 dref = btrfs_item_ptr(l, i,
113 (unsigned long long)btrfs_ref_objectid(l, ref), 223 struct btrfs_extent_data_ref);
114 (unsigned long)btrfs_ref_num_refs(l, ref)); 224 print_extent_data_ref(l, dref);
225 break;
226 case BTRFS_SHARED_DATA_REF_KEY:
227 sref = btrfs_item_ptr(l, i,
228 struct btrfs_shared_data_ref);
229 printk(KERN_INFO "\t\tshared data backref count %u\n",
230 btrfs_shared_data_ref_count(l, sref));
115 break; 231 break;
116
117 case BTRFS_EXTENT_DATA_KEY: 232 case BTRFS_EXTENT_DATA_KEY:
118 fi = btrfs_item_ptr(l, i, 233 fi = btrfs_item_ptr(l, i,
119 struct btrfs_file_extent_item); 234 struct btrfs_file_extent_item);
@@ -139,6 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
139 (unsigned long long) 254 (unsigned long long)
140 btrfs_file_extent_ram_bytes(l, fi)); 255 btrfs_file_extent_ram_bytes(l, fi));
141 break; 256 break;
257 case BTRFS_EXTENT_REF_V0_KEY:
258#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
259 print_extent_ref_v0(l, i);
260#else
261 BUG();
262#endif
142 case BTRFS_BLOCK_GROUP_ITEM_KEY: 263 case BTRFS_BLOCK_GROUP_ITEM_KEY:
143 bi = btrfs_item_ptr(l, i, 264 bi = btrfs_item_ptr(l, i,
144 struct btrfs_block_group_item); 265 struct btrfs_block_group_item);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
new file mode 100644
index 000000000000..b23dc209ae10
--- /dev/null
+++ b/fs/btrfs/relocation.c
@@ -0,0 +1,3711 @@
1/*
2 * Copyright (C) 2009 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include "ctree.h"
25#include "disk-io.h"
26#include "transaction.h"
27#include "volumes.h"
28#include "locking.h"
29#include "btrfs_inode.h"
30#include "async-thread.h"
31
32/*
33 * backref_node, mapping_node and tree_block start with this
34 */
35struct tree_entry {
36 struct rb_node rb_node;
37 u64 bytenr;
38};
39
40/*
41 * present a tree block in the backref cache
42 */
43struct backref_node {
44 struct rb_node rb_node;
45 u64 bytenr;
46 /* objectid tree block owner */
47 u64 owner;
48 /* list of upper level blocks reference this block */
49 struct list_head upper;
50 /* list of child blocks in the cache */
51 struct list_head lower;
52 /* NULL if this node is not tree root */
53 struct btrfs_root *root;
54 /* extent buffer got by COW the block */
55 struct extent_buffer *eb;
56 /* level of tree block */
57 unsigned int level:8;
58 /* 1 if the block is root of old snapshot */
59 unsigned int old_root:1;
60 /* 1 if no child blocks in the cache */
61 unsigned int lowest:1;
62 /* is the extent buffer locked */
63 unsigned int locked:1;
64 /* has the block been processed */
65 unsigned int processed:1;
66 /* have backrefs of this block been checked */
67 unsigned int checked:1;
68};
69
70/*
71 * present a block pointer in the backref cache
72 */
73struct backref_edge {
74 struct list_head list[2];
75 struct backref_node *node[2];
76 u64 blockptr;
77};
78
79#define LOWER 0
80#define UPPER 1
81
82struct backref_cache {
83 /* red black tree of all backref nodes in the cache */
84 struct rb_root rb_root;
85 /* list of backref nodes with no child block in the cache */
86 struct list_head pending[BTRFS_MAX_LEVEL];
87 spinlock_t lock;
88};
89
90/*
91 * map address of tree root to tree
92 */
93struct mapping_node {
94 struct rb_node rb_node;
95 u64 bytenr;
96 void *data;
97};
98
99struct mapping_tree {
100 struct rb_root rb_root;
101 spinlock_t lock;
102};
103
104/*
105 * present a tree block to process
106 */
107struct tree_block {
108 struct rb_node rb_node;
109 u64 bytenr;
110 struct btrfs_key key;
111 unsigned int level:8;
112 unsigned int key_ready:1;
113};
114
115/* inode vector */
116#define INODEVEC_SIZE 16
117
118struct inodevec {
119 struct list_head list;
120 struct inode *inode[INODEVEC_SIZE];
121 int nr;
122};
123
124struct reloc_control {
125 /* block group to relocate */
126 struct btrfs_block_group_cache *block_group;
127 /* extent tree */
128 struct btrfs_root *extent_root;
129 /* inode for moving data */
130 struct inode *data_inode;
131 struct btrfs_workers workers;
132 /* tree blocks have been processed */
133 struct extent_io_tree processed_blocks;
134 /* map start of tree root to corresponding reloc tree */
135 struct mapping_tree reloc_root_tree;
136 /* list of reloc trees */
137 struct list_head reloc_roots;
138 u64 search_start;
139 u64 extents_found;
140 u64 extents_skipped;
141 int stage;
142 int create_reloc_root;
143 unsigned int found_file_extent:1;
144 unsigned int found_old_snapshot:1;
145};
146
147/* stages of data relocation */
148#define MOVE_DATA_EXTENTS 0
149#define UPDATE_DATA_PTRS 1
150
151/*
152 * merge reloc tree to corresponding fs tree in worker threads
153 */
154struct async_merge {
155 struct btrfs_work work;
156 struct reloc_control *rc;
157 struct btrfs_root *root;
158 struct completion *done;
159 atomic_t *num_pending;
160};
161
162static void mapping_tree_init(struct mapping_tree *tree)
163{
164 tree->rb_root.rb_node = NULL;
165 spin_lock_init(&tree->lock);
166}
167
168static void backref_cache_init(struct backref_cache *cache)
169{
170 int i;
171 cache->rb_root.rb_node = NULL;
172 for (i = 0; i < BTRFS_MAX_LEVEL; i++)
173 INIT_LIST_HEAD(&cache->pending[i]);
174 spin_lock_init(&cache->lock);
175}
176
177static void backref_node_init(struct backref_node *node)
178{
179 memset(node, 0, sizeof(*node));
180 INIT_LIST_HEAD(&node->upper);
181 INIT_LIST_HEAD(&node->lower);
182 RB_CLEAR_NODE(&node->rb_node);
183}
184
185static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
186 struct rb_node *node)
187{
188 struct rb_node **p = &root->rb_node;
189 struct rb_node *parent = NULL;
190 struct tree_entry *entry;
191
192 while (*p) {
193 parent = *p;
194 entry = rb_entry(parent, struct tree_entry, rb_node);
195
196 if (bytenr < entry->bytenr)
197 p = &(*p)->rb_left;
198 else if (bytenr > entry->bytenr)
199 p = &(*p)->rb_right;
200 else
201 return parent;
202 }
203
204 rb_link_node(node, parent, p);
205 rb_insert_color(node, root);
206 return NULL;
207}
208
209static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
210{
211 struct rb_node *n = root->rb_node;
212 struct tree_entry *entry;
213
214 while (n) {
215 entry = rb_entry(n, struct tree_entry, rb_node);
216
217 if (bytenr < entry->bytenr)
218 n = n->rb_left;
219 else if (bytenr > entry->bytenr)
220 n = n->rb_right;
221 else
222 return n;
223 }
224 return NULL;
225}
226
227/*
228 * walk up backref nodes until reach node presents tree root
229 */
230static struct backref_node *walk_up_backref(struct backref_node *node,
231 struct backref_edge *edges[],
232 int *index)
233{
234 struct backref_edge *edge;
235 int idx = *index;
236
237 while (!list_empty(&node->upper)) {
238 edge = list_entry(node->upper.next,
239 struct backref_edge, list[LOWER]);
240 edges[idx++] = edge;
241 node = edge->node[UPPER];
242 }
243 *index = idx;
244 return node;
245}
246
247/*
248 * walk down backref nodes to find start of next reference path
249 */
250static struct backref_node *walk_down_backref(struct backref_edge *edges[],
251 int *index)
252{
253 struct backref_edge *edge;
254 struct backref_node *lower;
255 int idx = *index;
256
257 while (idx > 0) {
258 edge = edges[idx - 1];
259 lower = edge->node[LOWER];
260 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
261 idx--;
262 continue;
263 }
264 edge = list_entry(edge->list[LOWER].next,
265 struct backref_edge, list[LOWER]);
266 edges[idx - 1] = edge;
267 *index = idx;
268 return edge->node[UPPER];
269 }
270 *index = 0;
271 return NULL;
272}
273
274static void drop_node_buffer(struct backref_node *node)
275{
276 if (node->eb) {
277 if (node->locked) {
278 btrfs_tree_unlock(node->eb);
279 node->locked = 0;
280 }
281 free_extent_buffer(node->eb);
282 node->eb = NULL;
283 }
284}
285
286static void drop_backref_node(struct backref_cache *tree,
287 struct backref_node *node)
288{
289 BUG_ON(!node->lowest);
290 BUG_ON(!list_empty(&node->upper));
291
292 drop_node_buffer(node);
293 list_del(&node->lower);
294
295 rb_erase(&node->rb_node, &tree->rb_root);
296 kfree(node);
297}
298
299/*
300 * remove a backref node from the backref cache
301 */
302static void remove_backref_node(struct backref_cache *cache,
303 struct backref_node *node)
304{
305 struct backref_node *upper;
306 struct backref_edge *edge;
307
308 if (!node)
309 return;
310
311 BUG_ON(!node->lowest);
312 while (!list_empty(&node->upper)) {
313 edge = list_entry(node->upper.next, struct backref_edge,
314 list[LOWER]);
315 upper = edge->node[UPPER];
316 list_del(&edge->list[LOWER]);
317 list_del(&edge->list[UPPER]);
318 kfree(edge);
319 /*
320 * add the node to pending list if no other
321 * child block cached.
322 */
323 if (list_empty(&upper->lower)) {
324 list_add_tail(&upper->lower,
325 &cache->pending[upper->level]);
326 upper->lowest = 1;
327 }
328 }
329 drop_backref_node(cache, node);
330}
331
332/*
333 * find reloc tree by address of tree root
334 */
335static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
336 u64 bytenr)
337{
338 struct rb_node *rb_node;
339 struct mapping_node *node;
340 struct btrfs_root *root = NULL;
341
342 spin_lock(&rc->reloc_root_tree.lock);
343 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
344 if (rb_node) {
345 node = rb_entry(rb_node, struct mapping_node, rb_node);
346 root = (struct btrfs_root *)node->data;
347 }
348 spin_unlock(&rc->reloc_root_tree.lock);
349 return root;
350}
351
352static int is_cowonly_root(u64 root_objectid)
353{
354 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
355 root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
356 root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
357 root_objectid == BTRFS_DEV_TREE_OBJECTID ||
358 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
359 root_objectid == BTRFS_CSUM_TREE_OBJECTID)
360 return 1;
361 return 0;
362}
363
364static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
365 u64 root_objectid)
366{
367 struct btrfs_key key;
368
369 key.objectid = root_objectid;
370 key.type = BTRFS_ROOT_ITEM_KEY;
371 if (is_cowonly_root(root_objectid))
372 key.offset = 0;
373 else
374 key.offset = (u64)-1;
375
376 return btrfs_read_fs_root_no_name(fs_info, &key);
377}
378
379#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
380static noinline_for_stack
381struct btrfs_root *find_tree_root(struct reloc_control *rc,
382 struct extent_buffer *leaf,
383 struct btrfs_extent_ref_v0 *ref0)
384{
385 struct btrfs_root *root;
386 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
387 u64 generation = btrfs_ref_generation_v0(leaf, ref0);
388
389 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
390
391 root = read_fs_root(rc->extent_root->fs_info, root_objectid);
392 BUG_ON(IS_ERR(root));
393
394 if (root->ref_cows &&
395 generation != btrfs_root_generation(&root->root_item))
396 return NULL;
397
398 return root;
399}
400#endif
401
402static noinline_for_stack
403int find_inline_backref(struct extent_buffer *leaf, int slot,
404 unsigned long *ptr, unsigned long *end)
405{
406 struct btrfs_extent_item *ei;
407 struct btrfs_tree_block_info *bi;
408 u32 item_size;
409
410 item_size = btrfs_item_size_nr(leaf, slot);
411#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
412 if (item_size < sizeof(*ei)) {
413 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
414 return 1;
415 }
416#endif
417 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
418 WARN_ON(!(btrfs_extent_flags(leaf, ei) &
419 BTRFS_EXTENT_FLAG_TREE_BLOCK));
420
421 if (item_size <= sizeof(*ei) + sizeof(*bi)) {
422 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
423 return 1;
424 }
425
426 bi = (struct btrfs_tree_block_info *)(ei + 1);
427 *ptr = (unsigned long)(bi + 1);
428 *end = (unsigned long)ei + item_size;
429 return 0;
430}
431
432/*
433 * build backref tree for a given tree block. root of the backref tree
434 * corresponds the tree block, leaves of the backref tree correspond
435 * roots of b-trees that reference the tree block.
436 *
437 * the basic idea of this function is check backrefs of a given block
438 * to find upper level blocks that refernece the block, and then check
439 * bakcrefs of these upper level blocks recursively. the recursion stop
440 * when tree root is reached or backrefs for the block is cached.
441 *
442 * NOTE: if we find backrefs for a block are cached, we know backrefs
443 * for all upper level blocks that directly/indirectly reference the
444 * block are also cached.
445 */
446static struct backref_node *build_backref_tree(struct reloc_control *rc,
447 struct backref_cache *cache,
448 struct btrfs_key *node_key,
449 int level, u64 bytenr)
450{
451 struct btrfs_path *path1;
452 struct btrfs_path *path2;
453 struct extent_buffer *eb;
454 struct btrfs_root *root;
455 struct backref_node *cur;
456 struct backref_node *upper;
457 struct backref_node *lower;
458 struct backref_node *node = NULL;
459 struct backref_node *exist = NULL;
460 struct backref_edge *edge;
461 struct rb_node *rb_node;
462 struct btrfs_key key;
463 unsigned long end;
464 unsigned long ptr;
465 LIST_HEAD(list);
466 int ret;
467 int err = 0;
468
469 path1 = btrfs_alloc_path();
470 path2 = btrfs_alloc_path();
471 if (!path1 || !path2) {
472 err = -ENOMEM;
473 goto out;
474 }
475
476 node = kmalloc(sizeof(*node), GFP_NOFS);
477 if (!node) {
478 err = -ENOMEM;
479 goto out;
480 }
481
482 backref_node_init(node);
483 node->bytenr = bytenr;
484 node->owner = 0;
485 node->level = level;
486 node->lowest = 1;
487 cur = node;
488again:
489 end = 0;
490 ptr = 0;
491 key.objectid = cur->bytenr;
492 key.type = BTRFS_EXTENT_ITEM_KEY;
493 key.offset = (u64)-1;
494
495 path1->search_commit_root = 1;
496 path1->skip_locking = 1;
497 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
498 0, 0);
499 if (ret < 0) {
500 err = ret;
501 goto out;
502 }
503 BUG_ON(!ret || !path1->slots[0]);
504
505 path1->slots[0]--;
506
507 WARN_ON(cur->checked);
508 if (!list_empty(&cur->upper)) {
509 /*
510 * the backref was added previously when processsing
511 * backref of type BTRFS_TREE_BLOCK_REF_KEY
512 */
513 BUG_ON(!list_is_singular(&cur->upper));
514 edge = list_entry(cur->upper.next, struct backref_edge,
515 list[LOWER]);
516 BUG_ON(!list_empty(&edge->list[UPPER]));
517 exist = edge->node[UPPER];
518 /*
519 * add the upper level block to pending list if we need
520 * check its backrefs
521 */
522 if (!exist->checked)
523 list_add_tail(&edge->list[UPPER], &list);
524 } else {
525 exist = NULL;
526 }
527
528 while (1) {
529 cond_resched();
530 eb = path1->nodes[0];
531
532 if (ptr >= end) {
533 if (path1->slots[0] >= btrfs_header_nritems(eb)) {
534 ret = btrfs_next_leaf(rc->extent_root, path1);
535 if (ret < 0) {
536 err = ret;
537 goto out;
538 }
539 if (ret > 0)
540 break;
541 eb = path1->nodes[0];
542 }
543
544 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
545 if (key.objectid != cur->bytenr) {
546 WARN_ON(exist);
547 break;
548 }
549
550 if (key.type == BTRFS_EXTENT_ITEM_KEY) {
551 ret = find_inline_backref(eb, path1->slots[0],
552 &ptr, &end);
553 if (ret)
554 goto next;
555 }
556 }
557
558 if (ptr < end) {
559 /* update key for inline back ref */
560 struct btrfs_extent_inline_ref *iref;
561 iref = (struct btrfs_extent_inline_ref *)ptr;
562 key.type = btrfs_extent_inline_ref_type(eb, iref);
563 key.offset = btrfs_extent_inline_ref_offset(eb, iref);
564 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
565 key.type != BTRFS_SHARED_BLOCK_REF_KEY);
566 }
567
568 if (exist &&
569 ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
570 exist->owner == key.offset) ||
571 (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
572 exist->bytenr == key.offset))) {
573 exist = NULL;
574 goto next;
575 }
576
577#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
578 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
579 key.type == BTRFS_EXTENT_REF_V0_KEY) {
580 if (key.objectid == key.offset &&
581 key.type == BTRFS_EXTENT_REF_V0_KEY) {
582 struct btrfs_extent_ref_v0 *ref0;
583 ref0 = btrfs_item_ptr(eb, path1->slots[0],
584 struct btrfs_extent_ref_v0);
585 root = find_tree_root(rc, eb, ref0);
586 if (root)
587 cur->root = root;
588 else
589 cur->old_root = 1;
590 break;
591 }
592#else
593 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
594 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
595#endif
596 if (key.objectid == key.offset) {
597 /*
598 * only root blocks of reloc trees use
599 * backref of this type.
600 */
601 root = find_reloc_root(rc, cur->bytenr);
602 BUG_ON(!root);
603 cur->root = root;
604 break;
605 }
606
607 edge = kzalloc(sizeof(*edge), GFP_NOFS);
608 if (!edge) {
609 err = -ENOMEM;
610 goto out;
611 }
612 rb_node = tree_search(&cache->rb_root, key.offset);
613 if (!rb_node) {
614 upper = kmalloc(sizeof(*upper), GFP_NOFS);
615 if (!upper) {
616 kfree(edge);
617 err = -ENOMEM;
618 goto out;
619 }
620 backref_node_init(upper);
621 upper->bytenr = key.offset;
622 upper->owner = 0;
623 upper->level = cur->level + 1;
624 /*
625 * backrefs for the upper level block isn't
626 * cached, add the block to pending list
627 */
628 list_add_tail(&edge->list[UPPER], &list);
629 } else {
630 upper = rb_entry(rb_node, struct backref_node,
631 rb_node);
632 INIT_LIST_HEAD(&edge->list[UPPER]);
633 }
634 list_add(&edge->list[LOWER], &cur->upper);
635 edge->node[UPPER] = upper;
636 edge->node[LOWER] = cur;
637
638 goto next;
639 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
640 goto next;
641 }
642
643 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
644 root = read_fs_root(rc->extent_root->fs_info, key.offset);
645 if (IS_ERR(root)) {
646 err = PTR_ERR(root);
647 goto out;
648 }
649
650 if (btrfs_root_level(&root->root_item) == cur->level) {
651 /* tree root */
652 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
653 cur->bytenr);
654 cur->root = root;
655 break;
656 }
657
658 level = cur->level + 1;
659
660 /*
661 * searching the tree to find upper level blocks
662 * reference the block.
663 */
664 path2->search_commit_root = 1;
665 path2->skip_locking = 1;
666 path2->lowest_level = level;
667 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
668 path2->lowest_level = 0;
669 if (ret < 0) {
670 err = ret;
671 goto out;
672 }
673
674 eb = path2->nodes[level];
675 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
676 cur->bytenr);
677
678 lower = cur;
679 for (; level < BTRFS_MAX_LEVEL; level++) {
680 if (!path2->nodes[level]) {
681 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
682 lower->bytenr);
683 lower->root = root;
684 break;
685 }
686
687 edge = kzalloc(sizeof(*edge), GFP_NOFS);
688 if (!edge) {
689 err = -ENOMEM;
690 goto out;
691 }
692
693 eb = path2->nodes[level];
694 rb_node = tree_search(&cache->rb_root, eb->start);
695 if (!rb_node) {
696 upper = kmalloc(sizeof(*upper), GFP_NOFS);
697 if (!upper) {
698 kfree(edge);
699 err = -ENOMEM;
700 goto out;
701 }
702 backref_node_init(upper);
703 upper->bytenr = eb->start;
704 upper->owner = btrfs_header_owner(eb);
705 upper->level = lower->level + 1;
706
707 /*
708 * if we know the block isn't shared
709 * we can void checking its backrefs.
710 */
711 if (btrfs_block_can_be_shared(root, eb))
712 upper->checked = 0;
713 else
714 upper->checked = 1;
715
716 /*
717 * add the block to pending list if we
718 * need check its backrefs. only block
719 * at 'cur->level + 1' is added to the
720 * tail of pending list. this guarantees
721 * we check backrefs from lower level
722 * blocks to upper level blocks.
723 */
724 if (!upper->checked &&
725 level == cur->level + 1) {
726 list_add_tail(&edge->list[UPPER],
727 &list);
728 } else
729 INIT_LIST_HEAD(&edge->list[UPPER]);
730 } else {
731 upper = rb_entry(rb_node, struct backref_node,
732 rb_node);
733 BUG_ON(!upper->checked);
734 INIT_LIST_HEAD(&edge->list[UPPER]);
735 }
736 list_add_tail(&edge->list[LOWER], &lower->upper);
737 edge->node[UPPER] = upper;
738 edge->node[LOWER] = lower;
739
740 if (rb_node)
741 break;
742 lower = upper;
743 upper = NULL;
744 }
745 btrfs_release_path(root, path2);
746next:
747 if (ptr < end) {
748 ptr += btrfs_extent_inline_ref_size(key.type);
749 if (ptr >= end) {
750 WARN_ON(ptr > end);
751 ptr = 0;
752 end = 0;
753 }
754 }
755 if (ptr >= end)
756 path1->slots[0]++;
757 }
758 btrfs_release_path(rc->extent_root, path1);
759
760 cur->checked = 1;
761 WARN_ON(exist);
762
763 /* the pending list isn't empty, take the first block to process */
764 if (!list_empty(&list)) {
765 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
766 list_del_init(&edge->list[UPPER]);
767 cur = edge->node[UPPER];
768 goto again;
769 }
770
771 /*
772 * everything goes well, connect backref nodes and insert backref nodes
773 * into the cache.
774 */
775 BUG_ON(!node->checked);
776 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
777 BUG_ON(rb_node);
778
779 list_for_each_entry(edge, &node->upper, list[LOWER])
780 list_add_tail(&edge->list[UPPER], &list);
781
782 while (!list_empty(&list)) {
783 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
784 list_del_init(&edge->list[UPPER]);
785 upper = edge->node[UPPER];
786
787 if (!RB_EMPTY_NODE(&upper->rb_node)) {
788 if (upper->lowest) {
789 list_del_init(&upper->lower);
790 upper->lowest = 0;
791 }
792
793 list_add_tail(&edge->list[UPPER], &upper->lower);
794 continue;
795 }
796
797 BUG_ON(!upper->checked);
798 rb_node = tree_insert(&cache->rb_root, upper->bytenr,
799 &upper->rb_node);
800 BUG_ON(rb_node);
801
802 list_add_tail(&edge->list[UPPER], &upper->lower);
803
804 list_for_each_entry(edge, &upper->upper, list[LOWER])
805 list_add_tail(&edge->list[UPPER], &list);
806 }
807out:
808 btrfs_free_path(path1);
809 btrfs_free_path(path2);
810 if (err) {
811 INIT_LIST_HEAD(&list);
812 upper = node;
813 while (upper) {
814 if (RB_EMPTY_NODE(&upper->rb_node)) {
815 list_splice_tail(&upper->upper, &list);
816 kfree(upper);
817 }
818
819 if (list_empty(&list))
820 break;
821
822 edge = list_entry(list.next, struct backref_edge,
823 list[LOWER]);
824 upper = edge->node[UPPER];
825 kfree(edge);
826 }
827 return ERR_PTR(err);
828 }
829 return node;
830}
831
832/*
833 * helper to add 'address of tree root -> reloc tree' mapping
834 */
835static int __add_reloc_root(struct btrfs_root *root)
836{
837 struct rb_node *rb_node;
838 struct mapping_node *node;
839 struct reloc_control *rc = root->fs_info->reloc_ctl;
840
841 node = kmalloc(sizeof(*node), GFP_NOFS);
842 BUG_ON(!node);
843
844 node->bytenr = root->node->start;
845 node->data = root;
846
847 spin_lock(&rc->reloc_root_tree.lock);
848 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
849 node->bytenr, &node->rb_node);
850 spin_unlock(&rc->reloc_root_tree.lock);
851 BUG_ON(rb_node);
852
853 list_add_tail(&root->root_list, &rc->reloc_roots);
854 return 0;
855}
856
857/*
858 * helper to update/delete the 'address of tree root -> reloc tree'
859 * mapping
860 */
861static int __update_reloc_root(struct btrfs_root *root, int del)
862{
863 struct rb_node *rb_node;
864 struct mapping_node *node = NULL;
865 struct reloc_control *rc = root->fs_info->reloc_ctl;
866
867 spin_lock(&rc->reloc_root_tree.lock);
868 rb_node = tree_search(&rc->reloc_root_tree.rb_root,
869 root->commit_root->start);
870 if (rb_node) {
871 node = rb_entry(rb_node, struct mapping_node, rb_node);
872 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
873 }
874 spin_unlock(&rc->reloc_root_tree.lock);
875
876 BUG_ON((struct btrfs_root *)node->data != root);
877
878 if (!del) {
879 spin_lock(&rc->reloc_root_tree.lock);
880 node->bytenr = root->node->start;
881 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
882 node->bytenr, &node->rb_node);
883 spin_unlock(&rc->reloc_root_tree.lock);
884 BUG_ON(rb_node);
885 } else {
886 list_del_init(&root->root_list);
887 kfree(node);
888 }
889 return 0;
890}
891
892/*
893 * create reloc tree for a given fs tree. reloc tree is just a
894 * snapshot of the fs tree with special root objectid.
895 */
896int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
897 struct btrfs_root *root)
898{
899 struct btrfs_root *reloc_root;
900 struct extent_buffer *eb;
901 struct btrfs_root_item *root_item;
902 struct btrfs_key root_key;
903 int ret;
904
905 if (root->reloc_root) {
906 reloc_root = root->reloc_root;
907 reloc_root->last_trans = trans->transid;
908 return 0;
909 }
910
911 if (!root->fs_info->reloc_ctl ||
912 !root->fs_info->reloc_ctl->create_reloc_root ||
913 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
914 return 0;
915
916 root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
917 BUG_ON(!root_item);
918
919 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
920 root_key.type = BTRFS_ROOT_ITEM_KEY;
921 root_key.offset = root->root_key.objectid;
922
923 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
924 BTRFS_TREE_RELOC_OBJECTID);
925 BUG_ON(ret);
926
927 btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1);
928 memcpy(root_item, &root->root_item, sizeof(*root_item));
929 btrfs_set_root_refs(root_item, 1);
930 btrfs_set_root_bytenr(root_item, eb->start);
931 btrfs_set_root_level(root_item, btrfs_header_level(eb));
932 btrfs_set_root_generation(root_item, trans->transid);
933 memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key));
934 root_item->drop_level = 0;
935
936 btrfs_tree_unlock(eb);
937 free_extent_buffer(eb);
938
939 ret = btrfs_insert_root(trans, root->fs_info->tree_root,
940 &root_key, root_item);
941 BUG_ON(ret);
942 kfree(root_item);
943
944 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
945 &root_key);
946 BUG_ON(IS_ERR(reloc_root));
947 reloc_root->last_trans = trans->transid;
948
949 __add_reloc_root(reloc_root);
950 root->reloc_root = reloc_root;
951 return 0;
952}
953
954/*
955 * update root item of reloc tree
956 */
957int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
958 struct btrfs_root *root)
959{
960 struct btrfs_root *reloc_root;
961 struct btrfs_root_item *root_item;
962 int del = 0;
963 int ret;
964
965 if (!root->reloc_root)
966 return 0;
967
968 reloc_root = root->reloc_root;
969 root_item = &reloc_root->root_item;
970
971 if (btrfs_root_refs(root_item) == 0) {
972 root->reloc_root = NULL;
973 del = 1;
974 }
975
976 __update_reloc_root(reloc_root, del);
977
978 if (reloc_root->commit_root != reloc_root->node) {
979 btrfs_set_root_node(root_item, reloc_root->node);
980 free_extent_buffer(reloc_root->commit_root);
981 reloc_root->commit_root = btrfs_root_node(reloc_root);
982 }
983
984 ret = btrfs_update_root(trans, root->fs_info->tree_root,
985 &reloc_root->root_key, root_item);
986 BUG_ON(ret);
987 return 0;
988}
989
990/*
991 * helper to find first cached inode with inode number >= objectid
992 * in a subvolume
993 */
994static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
995{
996 struct rb_node *node;
997 struct rb_node *prev;
998 struct btrfs_inode *entry;
999 struct inode *inode;
1000
1001 spin_lock(&root->inode_lock);
1002again:
1003 node = root->inode_tree.rb_node;
1004 prev = NULL;
1005 while (node) {
1006 prev = node;
1007 entry = rb_entry(node, struct btrfs_inode, rb_node);
1008
1009 if (objectid < entry->vfs_inode.i_ino)
1010 node = node->rb_left;
1011 else if (objectid > entry->vfs_inode.i_ino)
1012 node = node->rb_right;
1013 else
1014 break;
1015 }
1016 if (!node) {
1017 while (prev) {
1018 entry = rb_entry(prev, struct btrfs_inode, rb_node);
1019 if (objectid <= entry->vfs_inode.i_ino) {
1020 node = prev;
1021 break;
1022 }
1023 prev = rb_next(prev);
1024 }
1025 }
1026 while (node) {
1027 entry = rb_entry(node, struct btrfs_inode, rb_node);
1028 inode = igrab(&entry->vfs_inode);
1029 if (inode) {
1030 spin_unlock(&root->inode_lock);
1031 return inode;
1032 }
1033
1034 objectid = entry->vfs_inode.i_ino + 1;
1035 if (cond_resched_lock(&root->inode_lock))
1036 goto again;
1037
1038 node = rb_next(node);
1039 }
1040 spin_unlock(&root->inode_lock);
1041 return NULL;
1042}
1043
1044static int in_block_group(u64 bytenr,
1045 struct btrfs_block_group_cache *block_group)
1046{
1047 if (bytenr >= block_group->key.objectid &&
1048 bytenr < block_group->key.objectid + block_group->key.offset)
1049 return 1;
1050 return 0;
1051}
1052
1053/*
1054 * get new location of data
1055 */
1056static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1057 u64 bytenr, u64 num_bytes)
1058{
1059 struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1060 struct btrfs_path *path;
1061 struct btrfs_file_extent_item *fi;
1062 struct extent_buffer *leaf;
1063 int ret;
1064
1065 path = btrfs_alloc_path();
1066 if (!path)
1067 return -ENOMEM;
1068
1069 bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1070 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
1071 bytenr, 0);
1072 if (ret < 0)
1073 goto out;
1074 if (ret > 0) {
1075 ret = -ENOENT;
1076 goto out;
1077 }
1078
1079 leaf = path->nodes[0];
1080 fi = btrfs_item_ptr(leaf, path->slots[0],
1081 struct btrfs_file_extent_item);
1082
1083 BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1084 btrfs_file_extent_compression(leaf, fi) ||
1085 btrfs_file_extent_encryption(leaf, fi) ||
1086 btrfs_file_extent_other_encoding(leaf, fi));
1087
1088 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1089 ret = 1;
1090 goto out;
1091 }
1092
1093 if (new_bytenr)
1094 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1095 ret = 0;
1096out:
1097 btrfs_free_path(path);
1098 return ret;
1099}
1100
1101/*
1102 * update file extent items in the tree leaf to point to
1103 * the new locations.
1104 */
1105static int replace_file_extents(struct btrfs_trans_handle *trans,
1106 struct reloc_control *rc,
1107 struct btrfs_root *root,
1108 struct extent_buffer *leaf,
1109 struct list_head *inode_list)
1110{
1111 struct btrfs_key key;
1112 struct btrfs_file_extent_item *fi;
1113 struct inode *inode = NULL;
1114 struct inodevec *ivec = NULL;
1115 u64 parent;
1116 u64 bytenr;
1117 u64 new_bytenr;
1118 u64 num_bytes;
1119 u64 end;
1120 u32 nritems;
1121 u32 i;
1122 int ret;
1123 int first = 1;
1124 int dirty = 0;
1125
1126 if (rc->stage != UPDATE_DATA_PTRS)
1127 return 0;
1128
1129 /* reloc trees always use full backref */
1130 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1131 parent = leaf->start;
1132 else
1133 parent = 0;
1134
1135 nritems = btrfs_header_nritems(leaf);
1136 for (i = 0; i < nritems; i++) {
1137 cond_resched();
1138 btrfs_item_key_to_cpu(leaf, &key, i);
1139 if (key.type != BTRFS_EXTENT_DATA_KEY)
1140 continue;
1141 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1142 if (btrfs_file_extent_type(leaf, fi) ==
1143 BTRFS_FILE_EXTENT_INLINE)
1144 continue;
1145 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1146 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1147 if (bytenr == 0)
1148 continue;
1149 if (!in_block_group(bytenr, rc->block_group))
1150 continue;
1151
1152 /*
1153 * if we are modifying block in fs tree, wait for readpage
1154 * to complete and drop the extent cache
1155 */
1156 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1157 if (!ivec || ivec->nr == INODEVEC_SIZE) {
1158 ivec = kmalloc(sizeof(*ivec), GFP_NOFS);
1159 BUG_ON(!ivec);
1160 ivec->nr = 0;
1161 list_add_tail(&ivec->list, inode_list);
1162 }
1163 if (first) {
1164 inode = find_next_inode(root, key.objectid);
1165 if (inode)
1166 ivec->inode[ivec->nr++] = inode;
1167 first = 0;
1168 } else if (inode && inode->i_ino < key.objectid) {
1169 inode = find_next_inode(root, key.objectid);
1170 if (inode)
1171 ivec->inode[ivec->nr++] = inode;
1172 }
1173 if (inode && inode->i_ino == key.objectid) {
1174 end = key.offset +
1175 btrfs_file_extent_num_bytes(leaf, fi);
1176 WARN_ON(!IS_ALIGNED(key.offset,
1177 root->sectorsize));
1178 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1179 end--;
1180 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1181 key.offset, end,
1182 GFP_NOFS);
1183 if (!ret)
1184 continue;
1185
1186 btrfs_drop_extent_cache(inode, key.offset, end,
1187 1);
1188 unlock_extent(&BTRFS_I(inode)->io_tree,
1189 key.offset, end, GFP_NOFS);
1190 }
1191 }
1192
1193 ret = get_new_location(rc->data_inode, &new_bytenr,
1194 bytenr, num_bytes);
1195 if (ret > 0)
1196 continue;
1197 BUG_ON(ret < 0);
1198
1199 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1200 dirty = 1;
1201
1202 key.offset -= btrfs_file_extent_offset(leaf, fi);
1203 ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1204 num_bytes, parent,
1205 btrfs_header_owner(leaf),
1206 key.objectid, key.offset);
1207 BUG_ON(ret);
1208
1209 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1210 parent, btrfs_header_owner(leaf),
1211 key.objectid, key.offset);
1212 BUG_ON(ret);
1213 }
1214 if (dirty)
1215 btrfs_mark_buffer_dirty(leaf);
1216 return 0;
1217}
1218
1219static noinline_for_stack
1220int memcmp_node_keys(struct extent_buffer *eb, int slot,
1221 struct btrfs_path *path, int level)
1222{
1223 struct btrfs_disk_key key1;
1224 struct btrfs_disk_key key2;
1225 btrfs_node_key(eb, &key1, slot);
1226 btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1227 return memcmp(&key1, &key2, sizeof(key1));
1228}
1229
1230/*
1231 * try to replace tree blocks in fs tree with the new blocks
1232 * in reloc tree. tree blocks haven't been modified since the
1233 * reloc tree was create can be replaced.
1234 *
1235 * if a block was replaced, level of the block + 1 is returned.
1236 * if no block got replaced, 0 is returned. if there are other
1237 * errors, a negative error number is returned.
1238 */
1239static int replace_path(struct btrfs_trans_handle *trans,
1240 struct btrfs_root *dest, struct btrfs_root *src,
1241 struct btrfs_path *path, struct btrfs_key *next_key,
1242 struct extent_buffer **leaf,
1243 int lowest_level, int max_level)
1244{
1245 struct extent_buffer *eb;
1246 struct extent_buffer *parent;
1247 struct btrfs_key key;
1248 u64 old_bytenr;
1249 u64 new_bytenr;
1250 u64 old_ptr_gen;
1251 u64 new_ptr_gen;
1252 u64 last_snapshot;
1253 u32 blocksize;
1254 int level;
1255 int ret;
1256 int slot;
1257
1258 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1259 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1260 BUG_ON(lowest_level > 1 && leaf);
1261
1262 last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1263
1264 slot = path->slots[lowest_level];
1265 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1266
1267 eb = btrfs_lock_root_node(dest);
1268 btrfs_set_lock_blocking(eb);
1269 level = btrfs_header_level(eb);
1270
1271 if (level < lowest_level) {
1272 btrfs_tree_unlock(eb);
1273 free_extent_buffer(eb);
1274 return 0;
1275 }
1276
1277 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1278 BUG_ON(ret);
1279 btrfs_set_lock_blocking(eb);
1280
1281 if (next_key) {
1282 next_key->objectid = (u64)-1;
1283 next_key->type = (u8)-1;
1284 next_key->offset = (u64)-1;
1285 }
1286
1287 parent = eb;
1288 while (1) {
1289 level = btrfs_header_level(parent);
1290 BUG_ON(level < lowest_level);
1291
1292 ret = btrfs_bin_search(parent, &key, level, &slot);
1293 if (ret && slot > 0)
1294 slot--;
1295
1296 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1297 btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1298
1299 old_bytenr = btrfs_node_blockptr(parent, slot);
1300 blocksize = btrfs_level_size(dest, level - 1);
1301 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1302
1303 if (level <= max_level) {
1304 eb = path->nodes[level];
1305 new_bytenr = btrfs_node_blockptr(eb,
1306 path->slots[level]);
1307 new_ptr_gen = btrfs_node_ptr_generation(eb,
1308 path->slots[level]);
1309 } else {
1310 new_bytenr = 0;
1311 new_ptr_gen = 0;
1312 }
1313
1314 if (new_bytenr > 0 && new_bytenr == old_bytenr) {
1315 WARN_ON(1);
1316 ret = level;
1317 break;
1318 }
1319
1320 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1321 memcmp_node_keys(parent, slot, path, level)) {
1322 if (level <= lowest_level && !leaf) {
1323 ret = 0;
1324 break;
1325 }
1326
1327 eb = read_tree_block(dest, old_bytenr, blocksize,
1328 old_ptr_gen);
1329 btrfs_tree_lock(eb);
1330 ret = btrfs_cow_block(trans, dest, eb, parent,
1331 slot, &eb);
1332 BUG_ON(ret);
1333 btrfs_set_lock_blocking(eb);
1334
1335 if (level <= lowest_level) {
1336 *leaf = eb;
1337 ret = 0;
1338 break;
1339 }
1340
1341 btrfs_tree_unlock(parent);
1342 free_extent_buffer(parent);
1343
1344 parent = eb;
1345 continue;
1346 }
1347
1348 btrfs_node_key_to_cpu(path->nodes[level], &key,
1349 path->slots[level]);
1350 btrfs_release_path(src, path);
1351
1352 path->lowest_level = level;
1353 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1354 path->lowest_level = 0;
1355 BUG_ON(ret);
1356
1357 /*
1358 * swap blocks in fs tree and reloc tree.
1359 */
1360 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1361 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1362 btrfs_mark_buffer_dirty(parent);
1363
1364 btrfs_set_node_blockptr(path->nodes[level],
1365 path->slots[level], old_bytenr);
1366 btrfs_set_node_ptr_generation(path->nodes[level],
1367 path->slots[level], old_ptr_gen);
1368 btrfs_mark_buffer_dirty(path->nodes[level]);
1369
1370 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
1371 path->nodes[level]->start,
1372 src->root_key.objectid, level - 1, 0);
1373 BUG_ON(ret);
1374 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
1375 0, dest->root_key.objectid, level - 1,
1376 0);
1377 BUG_ON(ret);
1378
1379 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1380 path->nodes[level]->start,
1381 src->root_key.objectid, level - 1, 0);
1382 BUG_ON(ret);
1383
1384 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1385 0, dest->root_key.objectid, level - 1,
1386 0);
1387 BUG_ON(ret);
1388
1389 btrfs_unlock_up_safe(path, 0);
1390
1391 ret = level;
1392 break;
1393 }
1394 btrfs_tree_unlock(parent);
1395 free_extent_buffer(parent);
1396 return ret;
1397}
1398
1399/*
1400 * helper to find next relocated block in reloc tree
1401 */
1402static noinline_for_stack
1403int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1404 int *level)
1405{
1406 struct extent_buffer *eb;
1407 int i;
1408 u64 last_snapshot;
1409 u32 nritems;
1410
1411 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1412
1413 for (i = 0; i < *level; i++) {
1414 free_extent_buffer(path->nodes[i]);
1415 path->nodes[i] = NULL;
1416 }
1417
1418 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1419 eb = path->nodes[i];
1420 nritems = btrfs_header_nritems(eb);
1421 while (path->slots[i] + 1 < nritems) {
1422 path->slots[i]++;
1423 if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1424 last_snapshot)
1425 continue;
1426
1427 *level = i;
1428 return 0;
1429 }
1430 free_extent_buffer(path->nodes[i]);
1431 path->nodes[i] = NULL;
1432 }
1433 return 1;
1434}
1435
1436/*
1437 * walk down reloc tree to find relocated block of lowest level
1438 */
1439static noinline_for_stack
1440int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1441 int *level)
1442{
1443 struct extent_buffer *eb = NULL;
1444 int i;
1445 u64 bytenr;
1446 u64 ptr_gen = 0;
1447 u64 last_snapshot;
1448 u32 blocksize;
1449 u32 nritems;
1450
1451 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1452
1453 for (i = *level; i > 0; i--) {
1454 eb = path->nodes[i];
1455 nritems = btrfs_header_nritems(eb);
1456 while (path->slots[i] < nritems) {
1457 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1458 if (ptr_gen > last_snapshot)
1459 break;
1460 path->slots[i]++;
1461 }
1462 if (path->slots[i] >= nritems) {
1463 if (i == *level)
1464 break;
1465 *level = i + 1;
1466 return 0;
1467 }
1468 if (i == 1) {
1469 *level = i;
1470 return 0;
1471 }
1472
1473 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1474 blocksize = btrfs_level_size(root, i - 1);
1475 eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1476 BUG_ON(btrfs_header_level(eb) != i - 1);
1477 path->nodes[i - 1] = eb;
1478 path->slots[i - 1] = 0;
1479 }
1480 return 1;
1481}
1482
1483/*
1484 * invalidate extent cache for file extents whose key in range of
1485 * [min_key, max_key)
1486 */
1487static int invalidate_extent_cache(struct btrfs_root *root,
1488 struct btrfs_key *min_key,
1489 struct btrfs_key *max_key)
1490{
1491 struct inode *inode = NULL;
1492 u64 objectid;
1493 u64 start, end;
1494
1495 objectid = min_key->objectid;
1496 while (1) {
1497 cond_resched();
1498 iput(inode);
1499
1500 if (objectid > max_key->objectid)
1501 break;
1502
1503 inode = find_next_inode(root, objectid);
1504 if (!inode)
1505 break;
1506
1507 if (inode->i_ino > max_key->objectid) {
1508 iput(inode);
1509 break;
1510 }
1511
1512 objectid = inode->i_ino + 1;
1513 if (!S_ISREG(inode->i_mode))
1514 continue;
1515
1516 if (unlikely(min_key->objectid == inode->i_ino)) {
1517 if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1518 continue;
1519 if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1520 start = 0;
1521 else {
1522 start = min_key->offset;
1523 WARN_ON(!IS_ALIGNED(start, root->sectorsize));
1524 }
1525 } else {
1526 start = 0;
1527 }
1528
1529 if (unlikely(max_key->objectid == inode->i_ino)) {
1530 if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1531 continue;
1532 if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1533 end = (u64)-1;
1534 } else {
1535 if (max_key->offset == 0)
1536 continue;
1537 end = max_key->offset;
1538 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1539 end--;
1540 }
1541 } else {
1542 end = (u64)-1;
1543 }
1544
1545 /* the lock_extent waits for readpage to complete */
1546 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1547 btrfs_drop_extent_cache(inode, start, end, 1);
1548 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1549 }
1550 return 0;
1551}
1552
1553static int find_next_key(struct btrfs_path *path, int level,
1554 struct btrfs_key *key)
1555
1556{
1557 while (level < BTRFS_MAX_LEVEL) {
1558 if (!path->nodes[level])
1559 break;
1560 if (path->slots[level] + 1 <
1561 btrfs_header_nritems(path->nodes[level])) {
1562 btrfs_node_key_to_cpu(path->nodes[level], key,
1563 path->slots[level] + 1);
1564 return 0;
1565 }
1566 level++;
1567 }
1568 return 1;
1569}
1570
1571/*
1572 * merge the relocated tree blocks in reloc tree with corresponding
1573 * fs tree.
1574 */
1575static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1576 struct btrfs_root *root)
1577{
1578 LIST_HEAD(inode_list);
1579 struct btrfs_key key;
1580 struct btrfs_key next_key;
1581 struct btrfs_trans_handle *trans;
1582 struct btrfs_root *reloc_root;
1583 struct btrfs_root_item *root_item;
1584 struct btrfs_path *path;
1585 struct extent_buffer *leaf = NULL;
1586 unsigned long nr;
1587 int level;
1588 int max_level;
1589 int replaced = 0;
1590 int ret;
1591 int err = 0;
1592
1593 path = btrfs_alloc_path();
1594 if (!path)
1595 return -ENOMEM;
1596
1597 reloc_root = root->reloc_root;
1598 root_item = &reloc_root->root_item;
1599
1600 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1601 level = btrfs_root_level(root_item);
1602 extent_buffer_get(reloc_root->node);
1603 path->nodes[level] = reloc_root->node;
1604 path->slots[level] = 0;
1605 } else {
1606 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1607
1608 level = root_item->drop_level;
1609 BUG_ON(level == 0);
1610 path->lowest_level = level;
1611 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1612 if (ret < 0) {
1613 btrfs_free_path(path);
1614 return ret;
1615 }
1616
1617 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
1618 path->slots[level]);
1619 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
1620
1621 btrfs_unlock_up_safe(path, 0);
1622 }
1623
1624 if (level == 0 && rc->stage == UPDATE_DATA_PTRS) {
1625 trans = btrfs_start_transaction(root, 1);
1626
1627 leaf = path->nodes[0];
1628 btrfs_item_key_to_cpu(leaf, &key, 0);
1629 btrfs_release_path(reloc_root, path);
1630
1631 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1632 if (ret < 0) {
1633 err = ret;
1634 goto out;
1635 }
1636
1637 leaf = path->nodes[0];
1638 btrfs_unlock_up_safe(path, 1);
1639 ret = replace_file_extents(trans, rc, root, leaf,
1640 &inode_list);
1641 if (ret < 0)
1642 err = ret;
1643 goto out;
1644 }
1645
1646 memset(&next_key, 0, sizeof(next_key));
1647
1648 while (1) {
1649 leaf = NULL;
1650 replaced = 0;
1651 trans = btrfs_start_transaction(root, 1);
1652 max_level = level;
1653
1654 ret = walk_down_reloc_tree(reloc_root, path, &level);
1655 if (ret < 0) {
1656 err = ret;
1657 goto out;
1658 }
1659 if (ret > 0)
1660 break;
1661
1662 if (!find_next_key(path, level, &key) &&
1663 btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
1664 ret = 0;
1665 } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) {
1666 ret = replace_path(trans, root, reloc_root,
1667 path, &next_key, &leaf,
1668 level, max_level);
1669 } else {
1670 ret = replace_path(trans, root, reloc_root,
1671 path, &next_key, NULL,
1672 level, max_level);
1673 }
1674 if (ret < 0) {
1675 err = ret;
1676 goto out;
1677 }
1678
1679 if (ret > 0) {
1680 level = ret;
1681 btrfs_node_key_to_cpu(path->nodes[level], &key,
1682 path->slots[level]);
1683 replaced = 1;
1684 } else if (leaf) {
1685 /*
1686 * no block got replaced, try replacing file extents
1687 */
1688 btrfs_item_key_to_cpu(leaf, &key, 0);
1689 ret = replace_file_extents(trans, rc, root, leaf,
1690 &inode_list);
1691 btrfs_tree_unlock(leaf);
1692 free_extent_buffer(leaf);
1693 BUG_ON(ret < 0);
1694 }
1695
1696 ret = walk_up_reloc_tree(reloc_root, path, &level);
1697 if (ret > 0)
1698 break;
1699
1700 BUG_ON(level == 0);
1701 /*
1702 * save the merging progress in the drop_progress.
1703 * this is OK since root refs == 1 in this case.
1704 */
1705 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
1706 path->slots[level]);
1707 root_item->drop_level = level;
1708
1709 nr = trans->blocks_used;
1710 btrfs_end_transaction(trans, root);
1711
1712 btrfs_btree_balance_dirty(root, nr);
1713
1714 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1715 invalidate_extent_cache(root, &key, &next_key);
1716 }
1717
1718 /*
1719 * handle the case only one block in the fs tree need to be
1720 * relocated and the block is tree root.
1721 */
1722 leaf = btrfs_lock_root_node(root);
1723 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
1724 btrfs_tree_unlock(leaf);
1725 free_extent_buffer(leaf);
1726 if (ret < 0)
1727 err = ret;
1728out:
1729 btrfs_free_path(path);
1730
1731 if (err == 0) {
1732 memset(&root_item->drop_progress, 0,
1733 sizeof(root_item->drop_progress));
1734 root_item->drop_level = 0;
1735 btrfs_set_root_refs(root_item, 0);
1736 }
1737
1738 nr = trans->blocks_used;
1739 btrfs_end_transaction(trans, root);
1740
1741 btrfs_btree_balance_dirty(root, nr);
1742
1743 /*
1744 * put inodes while we aren't holding the tree locks
1745 */
1746 while (!list_empty(&inode_list)) {
1747 struct inodevec *ivec;
1748 ivec = list_entry(inode_list.next, struct inodevec, list);
1749 list_del(&ivec->list);
1750 while (ivec->nr > 0) {
1751 ivec->nr--;
1752 iput(ivec->inode[ivec->nr]);
1753 }
1754 kfree(ivec);
1755 }
1756
1757 if (replaced && rc->stage == UPDATE_DATA_PTRS)
1758 invalidate_extent_cache(root, &key, &next_key);
1759
1760 return err;
1761}
1762
1763/*
1764 * callback for the work threads.
1765 * this function merges reloc tree with corresponding fs tree,
1766 * and then drops the reloc tree.
1767 */
1768static void merge_func(struct btrfs_work *work)
1769{
1770 struct btrfs_trans_handle *trans;
1771 struct btrfs_root *root;
1772 struct btrfs_root *reloc_root;
1773 struct async_merge *async;
1774
1775 async = container_of(work, struct async_merge, work);
1776 reloc_root = async->root;
1777
1778 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
1779 root = read_fs_root(reloc_root->fs_info,
1780 reloc_root->root_key.offset);
1781 BUG_ON(IS_ERR(root));
1782 BUG_ON(root->reloc_root != reloc_root);
1783
1784 merge_reloc_root(async->rc, root);
1785
1786 trans = btrfs_start_transaction(root, 1);
1787 btrfs_update_reloc_root(trans, root);
1788 btrfs_end_transaction(trans, root);
1789 }
1790
1791 btrfs_drop_dead_root(reloc_root);
1792
1793 if (atomic_dec_and_test(async->num_pending))
1794 complete(async->done);
1795
1796 kfree(async);
1797}
1798
1799static int merge_reloc_roots(struct reloc_control *rc)
1800{
1801 struct async_merge *async;
1802 struct btrfs_root *root;
1803 struct completion done;
1804 atomic_t num_pending;
1805
1806 init_completion(&done);
1807 atomic_set(&num_pending, 1);
1808
1809 while (!list_empty(&rc->reloc_roots)) {
1810 root = list_entry(rc->reloc_roots.next,
1811 struct btrfs_root, root_list);
1812 list_del_init(&root->root_list);
1813
1814 async = kmalloc(sizeof(*async), GFP_NOFS);
1815 BUG_ON(!async);
1816 async->work.func = merge_func;
1817 async->work.flags = 0;
1818 async->rc = rc;
1819 async->root = root;
1820 async->done = &done;
1821 async->num_pending = &num_pending;
1822 atomic_inc(&num_pending);
1823 btrfs_queue_worker(&rc->workers, &async->work);
1824 }
1825
1826 if (!atomic_dec_and_test(&num_pending))
1827 wait_for_completion(&done);
1828
1829 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
1830 return 0;
1831}
1832
1833static void free_block_list(struct rb_root *blocks)
1834{
1835 struct tree_block *block;
1836 struct rb_node *rb_node;
1837 while ((rb_node = rb_first(blocks))) {
1838 block = rb_entry(rb_node, struct tree_block, rb_node);
1839 rb_erase(rb_node, blocks);
1840 kfree(block);
1841 }
1842}
1843
1844static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
1845 struct btrfs_root *reloc_root)
1846{
1847 struct btrfs_root *root;
1848
1849 if (reloc_root->last_trans == trans->transid)
1850 return 0;
1851
1852 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
1853 BUG_ON(IS_ERR(root));
1854 BUG_ON(root->reloc_root != reloc_root);
1855
1856 return btrfs_record_root_in_trans(trans, root);
1857}
1858
1859/*
1860 * select one tree from trees that references the block.
1861 * for blocks in refernce counted trees, we preper reloc tree.
1862 * if no reloc tree found and reloc_only is true, NULL is returned.
1863 */
1864static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans,
1865 struct backref_node *node,
1866 struct backref_edge *edges[],
1867 int *nr, int reloc_only)
1868{
1869 struct backref_node *next;
1870 struct btrfs_root *root;
1871 int index;
1872 int loop = 0;
1873again:
1874 index = 0;
1875 next = node;
1876 while (1) {
1877 cond_resched();
1878 next = walk_up_backref(next, edges, &index);
1879 root = next->root;
1880 if (!root) {
1881 BUG_ON(!node->old_root);
1882 goto skip;
1883 }
1884
1885 /* no other choice for non-refernce counted tree */
1886 if (!root->ref_cows) {
1887 BUG_ON(reloc_only);
1888 break;
1889 }
1890
1891 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
1892 record_reloc_root_in_trans(trans, root);
1893 break;
1894 }
1895
1896 if (loop) {
1897 btrfs_record_root_in_trans(trans, root);
1898 break;
1899 }
1900
1901 if (reloc_only || next != node) {
1902 if (!root->reloc_root)
1903 btrfs_record_root_in_trans(trans, root);
1904 root = root->reloc_root;
1905 /*
1906 * if the reloc tree was created in current
1907 * transation, there is no node in backref tree
1908 * corresponds to the root of the reloc tree.
1909 */
1910 if (btrfs_root_last_snapshot(&root->root_item) ==
1911 trans->transid - 1)
1912 break;
1913 }
1914skip:
1915 root = NULL;
1916 next = walk_down_backref(edges, &index);
1917 if (!next || next->level <= node->level)
1918 break;
1919 }
1920
1921 if (!root && !loop && !reloc_only) {
1922 loop = 1;
1923 goto again;
1924 }
1925
1926 if (root)
1927 *nr = index;
1928 else
1929 *nr = 0;
1930
1931 return root;
1932}
1933
1934static noinline_for_stack
1935struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
1936 struct backref_node *node)
1937{
1938 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
1939 int nr;
1940 return __select_one_root(trans, node, edges, &nr, 0);
1941}
1942
1943static noinline_for_stack
1944struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
1945 struct backref_node *node,
1946 struct backref_edge *edges[], int *nr)
1947{
1948 return __select_one_root(trans, node, edges, nr, 1);
1949}
1950
1951static void grab_path_buffers(struct btrfs_path *path,
1952 struct backref_node *node,
1953 struct backref_edge *edges[], int nr)
1954{
1955 int i = 0;
1956 while (1) {
1957 drop_node_buffer(node);
1958 node->eb = path->nodes[node->level];
1959 BUG_ON(!node->eb);
1960 if (path->locks[node->level])
1961 node->locked = 1;
1962 path->nodes[node->level] = NULL;
1963 path->locks[node->level] = 0;
1964
1965 if (i >= nr)
1966 break;
1967
1968 edges[i]->blockptr = node->eb->start;
1969 node = edges[i]->node[UPPER];
1970 i++;
1971 }
1972}
1973
1974/*
1975 * relocate a block tree, and then update pointers in upper level
1976 * blocks that reference the block to point to the new location.
1977 *
1978 * if called by link_to_upper, the block has already been relocated.
1979 * in that case this function just updates pointers.
1980 */
1981static int do_relocation(struct btrfs_trans_handle *trans,
1982 struct backref_node *node,
1983 struct btrfs_key *key,
1984 struct btrfs_path *path, int lowest)
1985{
1986 struct backref_node *upper;
1987 struct backref_edge *edge;
1988 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
1989 struct btrfs_root *root;
1990 struct extent_buffer *eb;
1991 u32 blocksize;
1992 u64 bytenr;
1993 u64 generation;
1994 int nr;
1995 int slot;
1996 int ret;
1997 int err = 0;
1998
1999 BUG_ON(lowest && node->eb);
2000
2001 path->lowest_level = node->level + 1;
2002 list_for_each_entry(edge, &node->upper, list[LOWER]) {
2003 cond_resched();
2004 if (node->eb && node->eb->start == edge->blockptr)
2005 continue;
2006
2007 upper = edge->node[UPPER];
2008 root = select_reloc_root(trans, upper, edges, &nr);
2009 if (!root)
2010 continue;
2011
2012 if (upper->eb && !upper->locked)
2013 drop_node_buffer(upper);
2014
2015 if (!upper->eb) {
2016 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2017 if (ret < 0) {
2018 err = ret;
2019 break;
2020 }
2021 BUG_ON(ret > 0);
2022
2023 slot = path->slots[upper->level];
2024
2025 btrfs_unlock_up_safe(path, upper->level + 1);
2026 grab_path_buffers(path, upper, edges, nr);
2027
2028 btrfs_release_path(NULL, path);
2029 } else {
2030 ret = btrfs_bin_search(upper->eb, key, upper->level,
2031 &slot);
2032 BUG_ON(ret);
2033 }
2034
2035 bytenr = btrfs_node_blockptr(upper->eb, slot);
2036 if (!lowest) {
2037 if (node->eb->start == bytenr) {
2038 btrfs_tree_unlock(upper->eb);
2039 upper->locked = 0;
2040 continue;
2041 }
2042 } else {
2043 BUG_ON(node->bytenr != bytenr);
2044 }
2045
2046 blocksize = btrfs_level_size(root, node->level);
2047 generation = btrfs_node_ptr_generation(upper->eb, slot);
2048 eb = read_tree_block(root, bytenr, blocksize, generation);
2049 btrfs_tree_lock(eb);
2050 btrfs_set_lock_blocking(eb);
2051
2052 if (!node->eb) {
2053 ret = btrfs_cow_block(trans, root, eb, upper->eb,
2054 slot, &eb);
2055 if (ret < 0) {
2056 err = ret;
2057 break;
2058 }
2059 btrfs_set_lock_blocking(eb);
2060 node->eb = eb;
2061 node->locked = 1;
2062 } else {
2063 btrfs_set_node_blockptr(upper->eb, slot,
2064 node->eb->start);
2065 btrfs_set_node_ptr_generation(upper->eb, slot,
2066 trans->transid);
2067 btrfs_mark_buffer_dirty(upper->eb);
2068
2069 ret = btrfs_inc_extent_ref(trans, root,
2070 node->eb->start, blocksize,
2071 upper->eb->start,
2072 btrfs_header_owner(upper->eb),
2073 node->level, 0);
2074 BUG_ON(ret);
2075
2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2077 BUG_ON(ret);
2078
2079 btrfs_tree_unlock(eb);
2080 free_extent_buffer(eb);
2081 }
2082 if (!lowest) {
2083 btrfs_tree_unlock(upper->eb);
2084 upper->locked = 0;
2085 }
2086 }
2087 path->lowest_level = 0;
2088 return err;
2089}
2090
2091static int link_to_upper(struct btrfs_trans_handle *trans,
2092 struct backref_node *node,
2093 struct btrfs_path *path)
2094{
2095 struct btrfs_key key;
2096 if (!node->eb || list_empty(&node->upper))
2097 return 0;
2098
2099 btrfs_node_key_to_cpu(node->eb, &key, 0);
2100 return do_relocation(trans, node, &key, path, 0);
2101}
2102
2103static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2104 struct backref_cache *cache,
2105 struct btrfs_path *path)
2106{
2107 struct backref_node *node;
2108 int level;
2109 int ret;
2110 int err = 0;
2111
2112 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2113 while (!list_empty(&cache->pending[level])) {
2114 node = list_entry(cache->pending[level].next,
2115 struct backref_node, lower);
2116 BUG_ON(node->level != level);
2117
2118 ret = link_to_upper(trans, node, path);
2119 if (ret < 0)
2120 err = ret;
2121 /*
2122 * this remove the node from the pending list and
2123 * may add some other nodes to the level + 1
2124 * pending list
2125 */
2126 remove_backref_node(cache, node);
2127 }
2128 }
2129 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
2130 return err;
2131}
2132
2133static void mark_block_processed(struct reloc_control *rc,
2134 struct backref_node *node)
2135{
2136 u32 blocksize;
2137 if (node->level == 0 ||
2138 in_block_group(node->bytenr, rc->block_group)) {
2139 blocksize = btrfs_level_size(rc->extent_root, node->level);
2140 set_extent_bits(&rc->processed_blocks, node->bytenr,
2141 node->bytenr + blocksize - 1, EXTENT_DIRTY,
2142 GFP_NOFS);
2143 }
2144 node->processed = 1;
2145}
2146
2147/*
2148 * mark a block and all blocks directly/indirectly reference the block
2149 * as processed.
2150 */
2151static void update_processed_blocks(struct reloc_control *rc,
2152 struct backref_node *node)
2153{
2154 struct backref_node *next = node;
2155 struct backref_edge *edge;
2156 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2157 int index = 0;
2158
2159 while (next) {
2160 cond_resched();
2161 while (1) {
2162 if (next->processed)
2163 break;
2164
2165 mark_block_processed(rc, next);
2166
2167 if (list_empty(&next->upper))
2168 break;
2169
2170 edge = list_entry(next->upper.next,
2171 struct backref_edge, list[LOWER]);
2172 edges[index++] = edge;
2173 next = edge->node[UPPER];
2174 }
2175 next = walk_down_backref(edges, &index);
2176 }
2177}
2178
2179static int tree_block_processed(u64 bytenr, u32 blocksize,
2180 struct reloc_control *rc)
2181{
2182 if (test_range_bit(&rc->processed_blocks, bytenr,
2183 bytenr + blocksize - 1, EXTENT_DIRTY, 1))
2184 return 1;
2185 return 0;
2186}
2187
2188/*
2189 * check if there are any file extent pointers in the leaf point to
2190 * data require processing
2191 */
2192static int check_file_extents(struct reloc_control *rc,
2193 u64 bytenr, u32 blocksize, u64 ptr_gen)
2194{
2195 struct btrfs_key found_key;
2196 struct btrfs_file_extent_item *fi;
2197 struct extent_buffer *leaf;
2198 u32 nritems;
2199 int i;
2200 int ret = 0;
2201
2202 leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen);
2203
2204 nritems = btrfs_header_nritems(leaf);
2205 for (i = 0; i < nritems; i++) {
2206 cond_resched();
2207 btrfs_item_key_to_cpu(leaf, &found_key, i);
2208 if (found_key.type != BTRFS_EXTENT_DATA_KEY)
2209 continue;
2210 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2211 if (btrfs_file_extent_type(leaf, fi) ==
2212 BTRFS_FILE_EXTENT_INLINE)
2213 continue;
2214 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2215 if (bytenr == 0)
2216 continue;
2217 if (in_block_group(bytenr, rc->block_group)) {
2218 ret = 1;
2219 break;
2220 }
2221 }
2222 free_extent_buffer(leaf);
2223 return ret;
2224}
2225
2226/*
2227 * scan child blocks of a given block to find blocks require processing
2228 */
2229static int add_child_blocks(struct btrfs_trans_handle *trans,
2230 struct reloc_control *rc,
2231 struct backref_node *node,
2232 struct rb_root *blocks)
2233{
2234 struct tree_block *block;
2235 struct rb_node *rb_node;
2236 u64 bytenr;
2237 u64 ptr_gen;
2238 u32 blocksize;
2239 u32 nritems;
2240 int i;
2241 int err = 0;
2242
2243 nritems = btrfs_header_nritems(node->eb);
2244 blocksize = btrfs_level_size(rc->extent_root, node->level - 1);
2245 for (i = 0; i < nritems; i++) {
2246 cond_resched();
2247 bytenr = btrfs_node_blockptr(node->eb, i);
2248 ptr_gen = btrfs_node_ptr_generation(node->eb, i);
2249 if (ptr_gen == trans->transid)
2250 continue;
2251 if (!in_block_group(bytenr, rc->block_group) &&
2252 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
2253 continue;
2254 if (tree_block_processed(bytenr, blocksize, rc))
2255 continue;
2256
2257 readahead_tree_block(rc->extent_root,
2258 bytenr, blocksize, ptr_gen);
2259 }
2260
2261 for (i = 0; i < nritems; i++) {
2262 cond_resched();
2263 bytenr = btrfs_node_blockptr(node->eb, i);
2264 ptr_gen = btrfs_node_ptr_generation(node->eb, i);
2265 if (ptr_gen == trans->transid)
2266 continue;
2267 if (!in_block_group(bytenr, rc->block_group) &&
2268 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS))
2269 continue;
2270 if (tree_block_processed(bytenr, blocksize, rc))
2271 continue;
2272 if (!in_block_group(bytenr, rc->block_group) &&
2273 !check_file_extents(rc, bytenr, blocksize, ptr_gen))
2274 continue;
2275
2276 block = kmalloc(sizeof(*block), GFP_NOFS);
2277 if (!block) {
2278 err = -ENOMEM;
2279 break;
2280 }
2281 block->bytenr = bytenr;
2282 btrfs_node_key_to_cpu(node->eb, &block->key, i);
2283 block->level = node->level - 1;
2284 block->key_ready = 1;
2285 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
2286 BUG_ON(rb_node);
2287 }
2288 if (err)
2289 free_block_list(blocks);
2290 return err;
2291}
2292
2293/*
2294 * find adjacent blocks require processing
2295 */
2296static noinline_for_stack
2297int add_adjacent_blocks(struct btrfs_trans_handle *trans,
2298 struct reloc_control *rc,
2299 struct backref_cache *cache,
2300 struct rb_root *blocks, int level,
2301 struct backref_node **upper)
2302{
2303 struct backref_node *node;
2304 int ret = 0;
2305
2306 WARN_ON(!list_empty(&cache->pending[level]));
2307
2308 if (list_empty(&cache->pending[level + 1]))
2309 return 1;
2310
2311 node = list_entry(cache->pending[level + 1].next,
2312 struct backref_node, lower);
2313 if (node->eb)
2314 ret = add_child_blocks(trans, rc, node, blocks);
2315
2316 *upper = node;
2317 return ret;
2318}
2319
2320static int get_tree_block_key(struct reloc_control *rc,
2321 struct tree_block *block)
2322{
2323 struct extent_buffer *eb;
2324
2325 BUG_ON(block->key_ready);
2326 eb = read_tree_block(rc->extent_root, block->bytenr,
2327 block->key.objectid, block->key.offset);
2328 WARN_ON(btrfs_header_level(eb) != block->level);
2329 if (block->level == 0)
2330 btrfs_item_key_to_cpu(eb, &block->key, 0);
2331 else
2332 btrfs_node_key_to_cpu(eb, &block->key, 0);
2333 free_extent_buffer(eb);
2334 block->key_ready = 1;
2335 return 0;
2336}
2337
2338static int reada_tree_block(struct reloc_control *rc,
2339 struct tree_block *block)
2340{
2341 BUG_ON(block->key_ready);
2342 readahead_tree_block(rc->extent_root, block->bytenr,
2343 block->key.objectid, block->key.offset);
2344 return 0;
2345}
2346
2347/*
2348 * helper function to relocate a tree block
2349 */
2350static int relocate_tree_block(struct btrfs_trans_handle *trans,
2351 struct reloc_control *rc,
2352 struct backref_node *node,
2353 struct btrfs_key *key,
2354 struct btrfs_path *path)
2355{
2356 struct btrfs_root *root;
2357 int ret;
2358
2359 root = select_one_root(trans, node);
2360 if (unlikely(!root)) {
2361 rc->found_old_snapshot = 1;
2362 update_processed_blocks(rc, node);
2363 return 0;
2364 }
2365
2366 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2367 ret = do_relocation(trans, node, key, path, 1);
2368 if (ret < 0)
2369 goto out;
2370 if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) {
2371 ret = replace_file_extents(trans, rc, root,
2372 node->eb, NULL);
2373 if (ret < 0)
2374 goto out;
2375 }
2376 drop_node_buffer(node);
2377 } else if (!root->ref_cows) {
2378 path->lowest_level = node->level;
2379 ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2380 btrfs_release_path(root, path);
2381 if (ret < 0)
2382 goto out;
2383 } else if (root != node->root) {
2384 WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS);
2385 }
2386
2387 update_processed_blocks(rc, node);
2388 ret = 0;
2389out:
2390 drop_node_buffer(node);
2391 return ret;
2392}
2393
2394/*
2395 * relocate a list of blocks
2396 */
2397static noinline_for_stack
2398int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2399 struct reloc_control *rc, struct rb_root *blocks)
2400{
2401 struct backref_cache *cache;
2402 struct backref_node *node;
2403 struct btrfs_path *path;
2404 struct tree_block *block;
2405 struct rb_node *rb_node;
2406 int level = -1;
2407 int ret;
2408 int err = 0;
2409
2410 path = btrfs_alloc_path();
2411 if (!path)
2412 return -ENOMEM;
2413
2414 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2415 if (!cache) {
2416 btrfs_free_path(path);
2417 return -ENOMEM;
2418 }
2419
2420 backref_cache_init(cache);
2421
2422 rb_node = rb_first(blocks);
2423 while (rb_node) {
2424 block = rb_entry(rb_node, struct tree_block, rb_node);
2425 if (level == -1)
2426 level = block->level;
2427 else
2428 BUG_ON(level != block->level);
2429 if (!block->key_ready)
2430 reada_tree_block(rc, block);
2431 rb_node = rb_next(rb_node);
2432 }
2433
2434 rb_node = rb_first(blocks);
2435 while (rb_node) {
2436 block = rb_entry(rb_node, struct tree_block, rb_node);
2437 if (!block->key_ready)
2438 get_tree_block_key(rc, block);
2439 rb_node = rb_next(rb_node);
2440 }
2441
2442 rb_node = rb_first(blocks);
2443 while (rb_node) {
2444 block = rb_entry(rb_node, struct tree_block, rb_node);
2445
2446 node = build_backref_tree(rc, cache, &block->key,
2447 block->level, block->bytenr);
2448 if (IS_ERR(node)) {
2449 err = PTR_ERR(node);
2450 goto out;
2451 }
2452
2453 ret = relocate_tree_block(trans, rc, node, &block->key,
2454 path);
2455 if (ret < 0) {
2456 err = ret;
2457 goto out;
2458 }
2459 remove_backref_node(cache, node);
2460 rb_node = rb_next(rb_node);
2461 }
2462
2463 if (level > 0)
2464 goto out;
2465
2466 free_block_list(blocks);
2467
2468 /*
2469 * now backrefs of some upper level tree blocks have been cached,
2470 * try relocating blocks referenced by these upper level blocks.
2471 */
2472 while (1) {
2473 struct backref_node *upper = NULL;
2474 if (trans->transaction->in_commit ||
2475 trans->transaction->delayed_refs.flushing)
2476 break;
2477
2478 ret = add_adjacent_blocks(trans, rc, cache, blocks, level,
2479 &upper);
2480 if (ret < 0)
2481 err = ret;
2482 if (ret != 0)
2483 break;
2484
2485 rb_node = rb_first(blocks);
2486 while (rb_node) {
2487 block = rb_entry(rb_node, struct tree_block, rb_node);
2488 if (trans->transaction->in_commit ||
2489 trans->transaction->delayed_refs.flushing)
2490 goto out;
2491 BUG_ON(!block->key_ready);
2492 node = build_backref_tree(rc, cache, &block->key,
2493 level, block->bytenr);
2494 if (IS_ERR(node)) {
2495 err = PTR_ERR(node);
2496 goto out;
2497 }
2498
2499 ret = relocate_tree_block(trans, rc, node,
2500 &block->key, path);
2501 if (ret < 0) {
2502 err = ret;
2503 goto out;
2504 }
2505 remove_backref_node(cache, node);
2506 rb_node = rb_next(rb_node);
2507 }
2508 free_block_list(blocks);
2509
2510 if (upper) {
2511 ret = link_to_upper(trans, upper, path);
2512 if (ret < 0) {
2513 err = ret;
2514 break;
2515 }
2516 remove_backref_node(cache, upper);
2517 }
2518 }
2519out:
2520 free_block_list(blocks);
2521
2522 ret = finish_pending_nodes(trans, cache, path);
2523 if (ret < 0)
2524 err = ret;
2525
2526 kfree(cache);
2527 btrfs_free_path(path);
2528 return err;
2529}
2530
2531static noinline_for_stack
2532int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
2533{
2534 u64 page_start;
2535 u64 page_end;
2536 unsigned long i;
2537 unsigned long first_index;
2538 unsigned long last_index;
2539 unsigned int total_read = 0;
2540 unsigned int total_dirty = 0;
2541 struct page *page;
2542 struct file_ra_state *ra;
2543 struct btrfs_ordered_extent *ordered;
2544 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2545 int ret = 0;
2546
2547 ra = kzalloc(sizeof(*ra), GFP_NOFS);
2548 if (!ra)
2549 return -ENOMEM;
2550
2551 mutex_lock(&inode->i_mutex);
2552 first_index = start >> PAGE_CACHE_SHIFT;
2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2554
2555 /* make sure the dirty trick played by the caller work */
2556 ret = invalidate_inode_pages2_range(inode->i_mapping,
2557 first_index, last_index);
2558 if (ret)
2559 goto out_unlock;
2560
2561 file_ra_state_init(ra, inode->i_mapping);
2562
2563 for (i = first_index ; i <= last_index; i++) {
2564 if (total_read % ra->ra_pages == 0) {
2565 btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2566 min(last_index, ra->ra_pages + i - 1));
2567 }
2568 total_read++;
2569again:
2570 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
2571 BUG_ON(1);
2572 page = grab_cache_page(inode->i_mapping, i);
2573 if (!page) {
2574 ret = -ENOMEM;
2575 goto out_unlock;
2576 }
2577 if (!PageUptodate(page)) {
2578 btrfs_readpage(NULL, page);
2579 lock_page(page);
2580 if (!PageUptodate(page)) {
2581 unlock_page(page);
2582 page_cache_release(page);
2583 ret = -EIO;
2584 goto out_unlock;
2585 }
2586 }
2587 wait_on_page_writeback(page);
2588
2589 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2590 page_end = page_start + PAGE_CACHE_SIZE - 1;
2591 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2592
2593 ordered = btrfs_lookup_ordered_extent(inode, page_start);
2594 if (ordered) {
2595 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2596 unlock_page(page);
2597 page_cache_release(page);
2598 btrfs_start_ordered_extent(inode, ordered, 1);
2599 btrfs_put_ordered_extent(ordered);
2600 goto again;
2601 }
2602 set_page_extent_mapped(page);
2603
2604 if (i == first_index)
2605 set_extent_bits(io_tree, page_start, page_end,
2606 EXTENT_BOUNDARY, GFP_NOFS);
2607 btrfs_set_extent_delalloc(inode, page_start, page_end);
2608
2609 set_page_dirty(page);
2610 total_dirty++;
2611
2612 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2613 unlock_page(page);
2614 page_cache_release(page);
2615 }
2616out_unlock:
2617 mutex_unlock(&inode->i_mutex);
2618 kfree(ra);
2619 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
2620 return ret;
2621}
2622
2623static noinline_for_stack
2624int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
2625{
2626 struct btrfs_root *root = BTRFS_I(inode)->root;
2627 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2628 struct extent_map *em;
2629 u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
2630 u64 end = start + extent_key->offset - 1;
2631
2632 em = alloc_extent_map(GFP_NOFS);
2633 em->start = start;
2634 em->len = extent_key->offset;
2635 em->block_len = extent_key->offset;
2636 em->block_start = extent_key->objectid;
2637 em->bdev = root->fs_info->fs_devices->latest_bdev;
2638 set_bit(EXTENT_FLAG_PINNED, &em->flags);
2639
2640 /* setup extent map to cheat btrfs_readpage */
2641 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2642 while (1) {
2643 int ret;
2644 spin_lock(&em_tree->lock);
2645 ret = add_extent_mapping(em_tree, em);
2646 spin_unlock(&em_tree->lock);
2647 if (ret != -EEXIST) {
2648 free_extent_map(em);
2649 break;
2650 }
2651 btrfs_drop_extent_cache(inode, start, end, 0);
2652 }
2653 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2654
2655 return relocate_inode_pages(inode, start, extent_key->offset);
2656}
2657
2658#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2659static int get_ref_objectid_v0(struct reloc_control *rc,
2660 struct btrfs_path *path,
2661 struct btrfs_key *extent_key,
2662 u64 *ref_objectid, int *path_change)
2663{
2664 struct btrfs_key key;
2665 struct extent_buffer *leaf;
2666 struct btrfs_extent_ref_v0 *ref0;
2667 int ret;
2668 int slot;
2669
2670 leaf = path->nodes[0];
2671 slot = path->slots[0];
2672 while (1) {
2673 if (slot >= btrfs_header_nritems(leaf)) {
2674 ret = btrfs_next_leaf(rc->extent_root, path);
2675 if (ret < 0)
2676 return ret;
2677 BUG_ON(ret > 0);
2678 leaf = path->nodes[0];
2679 slot = path->slots[0];
2680 if (path_change)
2681 *path_change = 1;
2682 }
2683 btrfs_item_key_to_cpu(leaf, &key, slot);
2684 if (key.objectid != extent_key->objectid)
2685 return -ENOENT;
2686
2687 if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
2688 slot++;
2689 continue;
2690 }
2691 ref0 = btrfs_item_ptr(leaf, slot,
2692 struct btrfs_extent_ref_v0);
2693 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
2694 break;
2695 }
2696 return 0;
2697}
2698#endif
2699
2700/*
2701 * helper to add a tree block to the list.
2702 * the major work is getting the generation and level of the block
2703 */
2704static int add_tree_block(struct reloc_control *rc,
2705 struct btrfs_key *extent_key,
2706 struct btrfs_path *path,
2707 struct rb_root *blocks)
2708{
2709 struct extent_buffer *eb;
2710 struct btrfs_extent_item *ei;
2711 struct btrfs_tree_block_info *bi;
2712 struct tree_block *block;
2713 struct rb_node *rb_node;
2714 u32 item_size;
2715 int level = -1;
2716 int generation;
2717
2718 eb = path->nodes[0];
2719 item_size = btrfs_item_size_nr(eb, path->slots[0]);
2720
2721 if (item_size >= sizeof(*ei) + sizeof(*bi)) {
2722 ei = btrfs_item_ptr(eb, path->slots[0],
2723 struct btrfs_extent_item);
2724 bi = (struct btrfs_tree_block_info *)(ei + 1);
2725 generation = btrfs_extent_generation(eb, ei);
2726 level = btrfs_tree_block_level(eb, bi);
2727 } else {
2728#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2729 u64 ref_owner;
2730 int ret;
2731
2732 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2733 ret = get_ref_objectid_v0(rc, path, extent_key,
2734 &ref_owner, NULL);
2735 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
2736 level = (int)ref_owner;
2737 /* FIXME: get real generation */
2738 generation = 0;
2739#else
2740 BUG();
2741#endif
2742 }
2743
2744 btrfs_release_path(rc->extent_root, path);
2745
2746 BUG_ON(level == -1);
2747
2748 block = kmalloc(sizeof(*block), GFP_NOFS);
2749 if (!block)
2750 return -ENOMEM;
2751
2752 block->bytenr = extent_key->objectid;
2753 block->key.objectid = extent_key->offset;
2754 block->key.offset = generation;
2755 block->level = level;
2756 block->key_ready = 0;
2757
2758 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
2759 BUG_ON(rb_node);
2760
2761 return 0;
2762}
2763
2764/*
2765 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
2766 */
2767static int __add_tree_block(struct reloc_control *rc,
2768 u64 bytenr, u32 blocksize,
2769 struct rb_root *blocks)
2770{
2771 struct btrfs_path *path;
2772 struct btrfs_key key;
2773 int ret;
2774
2775 if (tree_block_processed(bytenr, blocksize, rc))
2776 return 0;
2777
2778 if (tree_search(blocks, bytenr))
2779 return 0;
2780
2781 path = btrfs_alloc_path();
2782 if (!path)
2783 return -ENOMEM;
2784
2785 key.objectid = bytenr;
2786 key.type = BTRFS_EXTENT_ITEM_KEY;
2787 key.offset = blocksize;
2788
2789 path->search_commit_root = 1;
2790 path->skip_locking = 1;
2791 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
2792 if (ret < 0)
2793 goto out;
2794 BUG_ON(ret);
2795
2796 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
2797 ret = add_tree_block(rc, &key, path, blocks);
2798out:
2799 btrfs_free_path(path);
2800 return ret;
2801}
2802
2803/*
2804 * helper to check if the block use full backrefs for pointers in it
2805 */
2806static int block_use_full_backref(struct reloc_control *rc,
2807 struct extent_buffer *eb)
2808{
2809 struct btrfs_path *path;
2810 struct btrfs_extent_item *ei;
2811 struct btrfs_key key;
2812 u64 flags;
2813 int ret;
2814
2815 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
2816 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
2817 return 1;
2818
2819 path = btrfs_alloc_path();
2820 BUG_ON(!path);
2821
2822 key.objectid = eb->start;
2823 key.type = BTRFS_EXTENT_ITEM_KEY;
2824 key.offset = eb->len;
2825
2826 path->search_commit_root = 1;
2827 path->skip_locking = 1;
2828 ret = btrfs_search_slot(NULL, rc->extent_root,
2829 &key, path, 0, 0);
2830 BUG_ON(ret);
2831
2832 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
2833 struct btrfs_extent_item);
2834 flags = btrfs_extent_flags(path->nodes[0], ei);
2835 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
2836 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
2837 ret = 1;
2838 else
2839 ret = 0;
2840 btrfs_free_path(path);
2841 return ret;
2842}
2843
2844/*
2845 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
2846 * this function scans fs tree to find blocks reference the data extent
2847 */
2848static int find_data_references(struct reloc_control *rc,
2849 struct btrfs_key *extent_key,
2850 struct extent_buffer *leaf,
2851 struct btrfs_extent_data_ref *ref,
2852 struct rb_root *blocks)
2853{
2854 struct btrfs_path *path;
2855 struct tree_block *block;
2856 struct btrfs_root *root;
2857 struct btrfs_file_extent_item *fi;
2858 struct rb_node *rb_node;
2859 struct btrfs_key key;
2860 u64 ref_root;
2861 u64 ref_objectid;
2862 u64 ref_offset;
2863 u32 ref_count;
2864 u32 nritems;
2865 int err = 0;
2866 int added = 0;
2867 int counted;
2868 int ret;
2869
2870 path = btrfs_alloc_path();
2871 if (!path)
2872 return -ENOMEM;
2873
2874 ref_root = btrfs_extent_data_ref_root(leaf, ref);
2875 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
2876 ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
2877 ref_count = btrfs_extent_data_ref_count(leaf, ref);
2878
2879 root = read_fs_root(rc->extent_root->fs_info, ref_root);
2880 if (IS_ERR(root)) {
2881 err = PTR_ERR(root);
2882 goto out;
2883 }
2884
2885 key.objectid = ref_objectid;
2886 key.offset = ref_offset;
2887 key.type = BTRFS_EXTENT_DATA_KEY;
2888
2889 path->search_commit_root = 1;
2890 path->skip_locking = 1;
2891 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2892 if (ret < 0) {
2893 err = ret;
2894 goto out;
2895 }
2896
2897 leaf = path->nodes[0];
2898 nritems = btrfs_header_nritems(leaf);
2899 /*
2900 * the references in tree blocks that use full backrefs
2901 * are not counted in
2902 */
2903 if (block_use_full_backref(rc, leaf))
2904 counted = 0;
2905 else
2906 counted = 1;
2907 rb_node = tree_search(blocks, leaf->start);
2908 if (rb_node) {
2909 if (counted)
2910 added = 1;
2911 else
2912 path->slots[0] = nritems;
2913 }
2914
2915 while (ref_count > 0) {
2916 while (path->slots[0] >= nritems) {
2917 ret = btrfs_next_leaf(root, path);
2918 if (ret < 0) {
2919 err = ret;
2920 goto out;
2921 }
2922 if (ret > 0) {
2923 WARN_ON(1);
2924 goto out;
2925 }
2926
2927 leaf = path->nodes[0];
2928 nritems = btrfs_header_nritems(leaf);
2929 added = 0;
2930
2931 if (block_use_full_backref(rc, leaf))
2932 counted = 0;
2933 else
2934 counted = 1;
2935 rb_node = tree_search(blocks, leaf->start);
2936 if (rb_node) {
2937 if (counted)
2938 added = 1;
2939 else
2940 path->slots[0] = nritems;
2941 }
2942 }
2943
2944 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2945 if (key.objectid != ref_objectid ||
2946 key.type != BTRFS_EXTENT_DATA_KEY) {
2947 WARN_ON(1);
2948 break;
2949 }
2950
2951 fi = btrfs_item_ptr(leaf, path->slots[0],
2952 struct btrfs_file_extent_item);
2953
2954 if (btrfs_file_extent_type(leaf, fi) ==
2955 BTRFS_FILE_EXTENT_INLINE)
2956 goto next;
2957
2958 if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
2959 extent_key->objectid)
2960 goto next;
2961
2962 key.offset -= btrfs_file_extent_offset(leaf, fi);
2963 if (key.offset != ref_offset)
2964 goto next;
2965
2966 if (counted)
2967 ref_count--;
2968 if (added)
2969 goto next;
2970
2971 if (!tree_block_processed(leaf->start, leaf->len, rc)) {
2972 block = kmalloc(sizeof(*block), GFP_NOFS);
2973 if (!block) {
2974 err = -ENOMEM;
2975 break;
2976 }
2977 block->bytenr = leaf->start;
2978 btrfs_item_key_to_cpu(leaf, &block->key, 0);
2979 block->level = 0;
2980 block->key_ready = 1;
2981 rb_node = tree_insert(blocks, block->bytenr,
2982 &block->rb_node);
2983 BUG_ON(rb_node);
2984 }
2985 if (counted)
2986 added = 1;
2987 else
2988 path->slots[0] = nritems;
2989next:
2990 path->slots[0]++;
2991
2992 }
2993out:
2994 btrfs_free_path(path);
2995 return err;
2996}
2997
2998/*
2999 * hepler to find all tree blocks that reference a given data extent
3000 */
3001static noinline_for_stack
3002int add_data_references(struct reloc_control *rc,
3003 struct btrfs_key *extent_key,
3004 struct btrfs_path *path,
3005 struct rb_root *blocks)
3006{
3007 struct btrfs_key key;
3008 struct extent_buffer *eb;
3009 struct btrfs_extent_data_ref *dref;
3010 struct btrfs_extent_inline_ref *iref;
3011 unsigned long ptr;
3012 unsigned long end;
3013 u32 blocksize;
3014 int ret;
3015 int err = 0;
3016
3017 ret = get_new_location(rc->data_inode, NULL, extent_key->objectid,
3018 extent_key->offset);
3019 BUG_ON(ret < 0);
3020 if (ret > 0) {
3021 /* the relocated data is fragmented */
3022 rc->extents_skipped++;
3023 btrfs_release_path(rc->extent_root, path);
3024 return 0;
3025 }
3026
3027 blocksize = btrfs_level_size(rc->extent_root, 0);
3028
3029 eb = path->nodes[0];
3030 ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3031 end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3032#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3033 if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
3034 ptr = end;
3035 else
3036#endif
3037 ptr += sizeof(struct btrfs_extent_item);
3038
3039 while (ptr < end) {
3040 iref = (struct btrfs_extent_inline_ref *)ptr;
3041 key.type = btrfs_extent_inline_ref_type(eb, iref);
3042 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3043 key.offset = btrfs_extent_inline_ref_offset(eb, iref);
3044 ret = __add_tree_block(rc, key.offset, blocksize,
3045 blocks);
3046 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3047 dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3048 ret = find_data_references(rc, extent_key,
3049 eb, dref, blocks);
3050 } else {
3051 BUG();
3052 }
3053 ptr += btrfs_extent_inline_ref_size(key.type);
3054 }
3055 WARN_ON(ptr > end);
3056
3057 while (1) {
3058 cond_resched();
3059 eb = path->nodes[0];
3060 if (path->slots[0] >= btrfs_header_nritems(eb)) {
3061 ret = btrfs_next_leaf(rc->extent_root, path);
3062 if (ret < 0) {
3063 err = ret;
3064 break;
3065 }
3066 if (ret > 0)
3067 break;
3068 eb = path->nodes[0];
3069 }
3070
3071 btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
3072 if (key.objectid != extent_key->objectid)
3073 break;
3074
3075#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3076 if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
3077 key.type == BTRFS_EXTENT_REF_V0_KEY) {
3078#else
3079 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
3080 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3081#endif
3082 ret = __add_tree_block(rc, key.offset, blocksize,
3083 blocks);
3084 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3085 dref = btrfs_item_ptr(eb, path->slots[0],
3086 struct btrfs_extent_data_ref);
3087 ret = find_data_references(rc, extent_key,
3088 eb, dref, blocks);
3089 } else {
3090 ret = 0;
3091 }
3092 if (ret) {
3093 err = ret;
3094 break;
3095 }
3096 path->slots[0]++;
3097 }
3098 btrfs_release_path(rc->extent_root, path);
3099 if (err)
3100 free_block_list(blocks);
3101 return err;
3102}
3103
3104/*
3105 * hepler to find next unprocessed extent
3106 */
3107static noinline_for_stack
3108int find_next_extent(struct btrfs_trans_handle *trans,
3109 struct reloc_control *rc, struct btrfs_path *path)
3110{
3111 struct btrfs_key key;
3112 struct extent_buffer *leaf;
3113 u64 start, end, last;
3114 int ret;
3115
3116 last = rc->block_group->key.objectid + rc->block_group->key.offset;
3117 while (1) {
3118 cond_resched();
3119 if (rc->search_start >= last) {
3120 ret = 1;
3121 break;
3122 }
3123
3124 key.objectid = rc->search_start;
3125 key.type = BTRFS_EXTENT_ITEM_KEY;
3126 key.offset = 0;
3127
3128 path->search_commit_root = 1;
3129 path->skip_locking = 1;
3130 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3131 0, 0);
3132 if (ret < 0)
3133 break;
3134next:
3135 leaf = path->nodes[0];
3136 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3137 ret = btrfs_next_leaf(rc->extent_root, path);
3138 if (ret != 0)
3139 break;
3140 leaf = path->nodes[0];
3141 }
3142
3143 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3144 if (key.objectid >= last) {
3145 ret = 1;
3146 break;
3147 }
3148
3149 if (key.type != BTRFS_EXTENT_ITEM_KEY ||
3150 key.objectid + key.offset <= rc->search_start) {
3151 path->slots[0]++;
3152 goto next;
3153 }
3154
3155 ret = find_first_extent_bit(&rc->processed_blocks,
3156 key.objectid, &start, &end,
3157 EXTENT_DIRTY);
3158
3159 if (ret == 0 && start <= key.objectid) {
3160 btrfs_release_path(rc->extent_root, path);
3161 rc->search_start = end + 1;
3162 } else {
3163 rc->search_start = key.objectid + key.offset;
3164 return 0;
3165 }
3166 }
3167 btrfs_release_path(rc->extent_root, path);
3168 return ret;
3169}
3170
3171static void set_reloc_control(struct reloc_control *rc)
3172{
3173 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3174 mutex_lock(&fs_info->trans_mutex);
3175 fs_info->reloc_ctl = rc;
3176 mutex_unlock(&fs_info->trans_mutex);
3177}
3178
3179static void unset_reloc_control(struct reloc_control *rc)
3180{
3181 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3182 mutex_lock(&fs_info->trans_mutex);
3183 fs_info->reloc_ctl = NULL;
3184 mutex_unlock(&fs_info->trans_mutex);
3185}
3186
3187static int check_extent_flags(u64 flags)
3188{
3189 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3190 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3191 return 1;
3192 if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
3193 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3194 return 1;
3195 if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3196 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
3197 return 1;
3198 return 0;
3199}
3200
3201static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3202{
3203 struct rb_root blocks = RB_ROOT;
3204 struct btrfs_key key;
3205 struct btrfs_trans_handle *trans = NULL;
3206 struct btrfs_path *path;
3207 struct btrfs_extent_item *ei;
3208 unsigned long nr;
3209 u64 flags;
3210 u32 item_size;
3211 int ret;
3212 int err = 0;
3213
3214 path = btrfs_alloc_path();
3215 if (!path)
3216 return -ENOMEM;
3217
3218 rc->search_start = rc->block_group->key.objectid;
3219 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3220 GFP_NOFS);
3221
3222 rc->create_reloc_root = 1;
3223 set_reloc_control(rc);
3224
3225 trans = btrfs_start_transaction(rc->extent_root, 1);
3226 btrfs_commit_transaction(trans, rc->extent_root);
3227
3228 while (1) {
3229 trans = btrfs_start_transaction(rc->extent_root, 1);
3230
3231 ret = find_next_extent(trans, rc, path);
3232 if (ret < 0)
3233 err = ret;
3234 if (ret != 0)
3235 break;
3236
3237 rc->extents_found++;
3238
3239 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3240 struct btrfs_extent_item);
3241 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3242 item_size = btrfs_item_size_nr(path->nodes[0],
3243 path->slots[0]);
3244 if (item_size >= sizeof(*ei)) {
3245 flags = btrfs_extent_flags(path->nodes[0], ei);
3246 ret = check_extent_flags(flags);
3247 BUG_ON(ret);
3248
3249 } else {
3250#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3251 u64 ref_owner;
3252 int path_change = 0;
3253
3254 BUG_ON(item_size !=
3255 sizeof(struct btrfs_extent_item_v0));
3256 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
3257 &path_change);
3258 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
3259 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
3260 else
3261 flags = BTRFS_EXTENT_FLAG_DATA;
3262
3263 if (path_change) {
3264 btrfs_release_path(rc->extent_root, path);
3265
3266 path->search_commit_root = 1;
3267 path->skip_locking = 1;
3268 ret = btrfs_search_slot(NULL, rc->extent_root,
3269 &key, path, 0, 0);
3270 if (ret < 0) {
3271 err = ret;
3272 break;
3273 }
3274 BUG_ON(ret > 0);
3275 }
3276#else
3277 BUG();
3278#endif
3279 }
3280
3281 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3282 ret = add_tree_block(rc, &key, path, &blocks);
3283 } else if (rc->stage == UPDATE_DATA_PTRS &&
3284 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3285 ret = add_data_references(rc, &key, path, &blocks);
3286 } else {
3287 btrfs_release_path(rc->extent_root, path);
3288 ret = 0;
3289 }
3290 if (ret < 0) {
3291 err = 0;
3292 break;
3293 }
3294
3295 if (!RB_EMPTY_ROOT(&blocks)) {
3296 ret = relocate_tree_blocks(trans, rc, &blocks);
3297 if (ret < 0) {
3298 err = ret;
3299 break;
3300 }
3301 }
3302
3303 nr = trans->blocks_used;
3304 btrfs_end_transaction_throttle(trans, rc->extent_root);
3305 trans = NULL;
3306 btrfs_btree_balance_dirty(rc->extent_root, nr);
3307
3308 if (rc->stage == MOVE_DATA_EXTENTS &&
3309 (flags & BTRFS_EXTENT_FLAG_DATA)) {
3310 rc->found_file_extent = 1;
3311 ret = relocate_data_extent(rc->data_inode, &key);
3312 if (ret < 0) {
3313 err = ret;
3314 break;
3315 }
3316 }
3317 }
3318 btrfs_free_path(path);
3319
3320 if (trans) {
3321 nr = trans->blocks_used;
3322 btrfs_end_transaction(trans, rc->extent_root);
3323 btrfs_btree_balance_dirty(rc->extent_root, nr);
3324 }
3325
3326 rc->create_reloc_root = 0;
3327 smp_mb();
3328
3329 if (rc->extents_found > 0) {
3330 trans = btrfs_start_transaction(rc->extent_root, 1);
3331 btrfs_commit_transaction(trans, rc->extent_root);
3332 }
3333
3334 merge_reloc_roots(rc);
3335
3336 unset_reloc_control(rc);
3337
3338 /* get rid of pinned extents */
3339 trans = btrfs_start_transaction(rc->extent_root, 1);
3340 btrfs_commit_transaction(trans, rc->extent_root);
3341
3342 return err;
3343}
3344
3345static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3346 struct btrfs_root *root,
3347 u64 objectid, u64 size)
3348{
3349 struct btrfs_path *path;
3350 struct btrfs_inode_item *item;
3351 struct extent_buffer *leaf;
3352 int ret;
3353
3354 path = btrfs_alloc_path();
3355 if (!path)
3356 return -ENOMEM;
3357
3358 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3359 if (ret)
3360 goto out;
3361
3362 leaf = path->nodes[0];
3363 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3364 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3365 btrfs_set_inode_generation(leaf, item, 1);
3366 btrfs_set_inode_size(leaf, item, size);
3367 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3368 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
3369 btrfs_mark_buffer_dirty(leaf);
3370 btrfs_release_path(root, path);
3371out:
3372 btrfs_free_path(path);
3373 return ret;
3374}
3375
3376/*
3377 * helper to create inode for data relocation.
3378 * the inode is in data relocation tree and its link count is 0
3379 */
3380static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3381 struct btrfs_block_group_cache *group)
3382{
3383 struct inode *inode = NULL;
3384 struct btrfs_trans_handle *trans;
3385 struct btrfs_root *root;
3386 struct btrfs_key key;
3387 unsigned long nr;
3388 u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
3389 int err = 0;
3390
3391 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
3392 if (IS_ERR(root))
3393 return ERR_CAST(root);
3394
3395 trans = btrfs_start_transaction(root, 1);
3396 BUG_ON(!trans);
3397
3398 err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
3399 if (err)
3400 goto out;
3401
3402 err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
3403 BUG_ON(err);
3404
3405 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
3406 group->key.offset, 0, group->key.offset,
3407 0, 0, 0);
3408 BUG_ON(err);
3409
3410 key.objectid = objectid;
3411 key.type = BTRFS_INODE_ITEM_KEY;
3412 key.offset = 0;
3413 inode = btrfs_iget(root->fs_info->sb, &key, root);
3414 BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3415 BTRFS_I(inode)->index_cnt = group->key.objectid;
3416
3417 err = btrfs_orphan_add(trans, inode);
3418out:
3419 nr = trans->blocks_used;
3420 btrfs_end_transaction(trans, root);
3421
3422 btrfs_btree_balance_dirty(root, nr);
3423 if (err) {
3424 if (inode)
3425 iput(inode);
3426 inode = ERR_PTR(err);
3427 }
3428 return inode;
3429}
3430
3431/*
3432 * function to relocate all extents in a block group.
3433 */
3434int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3435{
3436 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3437 struct reloc_control *rc;
3438 int ret;
3439 int err = 0;
3440
3441 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3442 if (!rc)
3443 return -ENOMEM;
3444
3445 mapping_tree_init(&rc->reloc_root_tree);
3446 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
3447 INIT_LIST_HEAD(&rc->reloc_roots);
3448
3449 rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
3450 BUG_ON(!rc->block_group);
3451
3452 btrfs_init_workers(&rc->workers, "relocate",
3453 fs_info->thread_pool_size);
3454
3455 rc->extent_root = extent_root;
3456 btrfs_prepare_block_group_relocation(extent_root, rc->block_group);
3457
3458 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3459 if (IS_ERR(rc->data_inode)) {
3460 err = PTR_ERR(rc->data_inode);
3461 rc->data_inode = NULL;
3462 goto out;
3463 }
3464
3465 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
3466 (unsigned long long)rc->block_group->key.objectid,
3467 (unsigned long long)rc->block_group->flags);
3468
3469 btrfs_start_delalloc_inodes(fs_info->tree_root);
3470 btrfs_wait_ordered_extents(fs_info->tree_root, 0);
3471
3472 while (1) {
3473 mutex_lock(&fs_info->cleaner_mutex);
3474 btrfs_clean_old_snapshots(fs_info->tree_root);
3475 mutex_unlock(&fs_info->cleaner_mutex);
3476
3477 rc->extents_found = 0;
3478 rc->extents_skipped = 0;
3479
3480 ret = relocate_block_group(rc);
3481 if (ret < 0) {
3482 err = ret;
3483 break;
3484 }
3485
3486 if (rc->extents_found == 0)
3487 break;
3488
3489 printk(KERN_INFO "btrfs: found %llu extents\n",
3490 (unsigned long long)rc->extents_found);
3491
3492 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
3493 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
3494 invalidate_mapping_pages(rc->data_inode->i_mapping,
3495 0, -1);
3496 rc->stage = UPDATE_DATA_PTRS;
3497 } else if (rc->stage == UPDATE_DATA_PTRS &&
3498 rc->extents_skipped >= rc->extents_found) {
3499 iput(rc->data_inode);
3500 rc->data_inode = create_reloc_inode(fs_info,
3501 rc->block_group);
3502 if (IS_ERR(rc->data_inode)) {
3503 err = PTR_ERR(rc->data_inode);
3504 rc->data_inode = NULL;
3505 break;
3506 }
3507 rc->stage = MOVE_DATA_EXTENTS;
3508 rc->found_file_extent = 0;
3509 }
3510 }
3511
3512 filemap_fdatawrite_range(fs_info->btree_inode->i_mapping,
3513 rc->block_group->key.objectid,
3514 rc->block_group->key.objectid +
3515 rc->block_group->key.offset - 1);
3516
3517 WARN_ON(rc->block_group->pinned > 0);
3518 WARN_ON(rc->block_group->reserved > 0);
3519 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
3520out:
3521 iput(rc->data_inode);
3522 btrfs_stop_workers(&rc->workers);
3523 btrfs_put_block_group(rc->block_group);
3524 kfree(rc);
3525 return err;
3526}
3527
3528/*
3529 * recover relocation interrupted by system crash.
3530 *
3531 * this function resumes merging reloc trees with corresponding fs trees.
3532 * this is important for keeping the sharing of tree blocks
3533 */
3534int btrfs_recover_relocation(struct btrfs_root *root)
3535{
3536 LIST_HEAD(reloc_roots);
3537 struct btrfs_key key;
3538 struct btrfs_root *fs_root;
3539 struct btrfs_root *reloc_root;
3540 struct btrfs_path *path;
3541 struct extent_buffer *leaf;
3542 struct reloc_control *rc = NULL;
3543 struct btrfs_trans_handle *trans;
3544 int ret;
3545 int err = 0;
3546
3547 path = btrfs_alloc_path();
3548 if (!path)
3549 return -ENOMEM;
3550
3551 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
3552 key.type = BTRFS_ROOT_ITEM_KEY;
3553 key.offset = (u64)-1;
3554
3555 while (1) {
3556 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
3557 path, 0, 0);
3558 if (ret < 0) {
3559 err = ret;
3560 goto out;
3561 }
3562 if (ret > 0) {
3563 if (path->slots[0] == 0)
3564 break;
3565 path->slots[0]--;
3566 }
3567 leaf = path->nodes[0];
3568 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3569 btrfs_release_path(root->fs_info->tree_root, path);
3570
3571 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
3572 key.type != BTRFS_ROOT_ITEM_KEY)
3573 break;
3574
3575 reloc_root = btrfs_read_fs_root_no_radix(root, &key);
3576 if (IS_ERR(reloc_root)) {
3577 err = PTR_ERR(reloc_root);
3578 goto out;
3579 }
3580
3581 list_add(&reloc_root->root_list, &reloc_roots);
3582
3583 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
3584 fs_root = read_fs_root(root->fs_info,
3585 reloc_root->root_key.offset);
3586 if (IS_ERR(fs_root)) {
3587 err = PTR_ERR(fs_root);
3588 goto out;
3589 }
3590 }
3591
3592 if (key.offset == 0)
3593 break;
3594
3595 key.offset--;
3596 }
3597 btrfs_release_path(root->fs_info->tree_root, path);
3598
3599 if (list_empty(&reloc_roots))
3600 goto out;
3601
3602 rc = kzalloc(sizeof(*rc), GFP_NOFS);
3603 if (!rc) {
3604 err = -ENOMEM;
3605 goto out;
3606 }
3607
3608 mapping_tree_init(&rc->reloc_root_tree);
3609 INIT_LIST_HEAD(&rc->reloc_roots);
3610 btrfs_init_workers(&rc->workers, "relocate",
3611 root->fs_info->thread_pool_size);
3612 rc->extent_root = root->fs_info->extent_root;
3613
3614 set_reloc_control(rc);
3615
3616 while (!list_empty(&reloc_roots)) {
3617 reloc_root = list_entry(reloc_roots.next,
3618 struct btrfs_root, root_list);
3619 list_del(&reloc_root->root_list);
3620
3621 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
3622 list_add_tail(&reloc_root->root_list,
3623 &rc->reloc_roots);
3624 continue;
3625 }
3626
3627 fs_root = read_fs_root(root->fs_info,
3628 reloc_root->root_key.offset);
3629 BUG_ON(IS_ERR(fs_root));
3630
3631 __add_reloc_root(reloc_root);
3632 fs_root->reloc_root = reloc_root;
3633 }
3634
3635 trans = btrfs_start_transaction(rc->extent_root, 1);
3636 btrfs_commit_transaction(trans, rc->extent_root);
3637
3638 merge_reloc_roots(rc);
3639
3640 unset_reloc_control(rc);
3641
3642 trans = btrfs_start_transaction(rc->extent_root, 1);
3643 btrfs_commit_transaction(trans, rc->extent_root);
3644out:
3645 if (rc) {
3646 btrfs_stop_workers(&rc->workers);
3647 kfree(rc);
3648 }
3649 while (!list_empty(&reloc_roots)) {
3650 reloc_root = list_entry(reloc_roots.next,
3651 struct btrfs_root, root_list);
3652 list_del(&reloc_root->root_list);
3653 free_extent_buffer(reloc_root->node);
3654 free_extent_buffer(reloc_root->commit_root);
3655 kfree(reloc_root);
3656 }
3657 btrfs_free_path(path);
3658
3659 if (err == 0) {
3660 /* cleanup orphan inode in data relocation tree */
3661 fs_root = read_fs_root(root->fs_info,
3662 BTRFS_DATA_RELOC_TREE_OBJECTID);
3663 if (IS_ERR(fs_root))
3664 err = PTR_ERR(fs_root);
3665 }
3666 return err;
3667}
3668
3669/*
3670 * helper to add ordered checksum for data relocation.
3671 *
3672 * cloning checksum properly handles the nodatasum extents.
3673 * it also saves CPU time to re-calculate the checksum.
3674 */
3675int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
3676{
3677 struct btrfs_ordered_sum *sums;
3678 struct btrfs_sector_sum *sector_sum;
3679 struct btrfs_ordered_extent *ordered;
3680 struct btrfs_root *root = BTRFS_I(inode)->root;
3681 size_t offset;
3682 int ret;
3683 u64 disk_bytenr;
3684 LIST_HEAD(list);
3685
3686 ordered = btrfs_lookup_ordered_extent(inode, file_pos);
3687 BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
3688
3689 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
3690 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
3691 disk_bytenr + len - 1, &list);
3692
3693 while (!list_empty(&list)) {
3694 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
3695 list_del_init(&sums->list);
3696
3697 sector_sum = sums->sums;
3698 sums->bytenr = ordered->start;
3699
3700 offset = 0;
3701 while (offset < sums->len) {
3702 sector_sum->bytenr += ordered->start - disk_bytenr;
3703 sector_sum++;
3704 offset += root->sectorsize;
3705 }
3706
3707 btrfs_add_ordered_sum(inode, ordered, sums);
3708 }
3709 btrfs_put_ordered_extent(ordered);
3710 return 0;
3711}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b48650de4472..0ddc6d61c55a 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -111,6 +111,15 @@ out:
111 return ret; 111 return ret;
112} 112}
113 113
114int btrfs_set_root_node(struct btrfs_root_item *item,
115 struct extent_buffer *node)
116{
117 btrfs_set_root_bytenr(item, node->start);
118 btrfs_set_root_level(item, btrfs_header_level(node));
119 btrfs_set_root_generation(item, btrfs_header_generation(node));
120 return 0;
121}
122
114/* 123/*
115 * copy the data in 'item' into the btree 124 * copy the data in 'item' into the btree
116 */ 125 */
@@ -164,8 +173,7 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
164 * offset lower than the latest root. They need to be queued for deletion to 173 * offset lower than the latest root. They need to be queued for deletion to
165 * finish what was happening when we crashed. 174 * finish what was happening when we crashed.
166 */ 175 */
167int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, 176int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
168 struct btrfs_root *latest)
169{ 177{
170 struct btrfs_root *dead_root; 178 struct btrfs_root *dead_root;
171 struct btrfs_item *item; 179 struct btrfs_item *item;
@@ -227,10 +235,7 @@ again:
227 goto err; 235 goto err;
228 } 236 }
229 237
230 if (objectid == BTRFS_TREE_RELOC_OBJECTID) 238 ret = btrfs_add_dead_root(dead_root);
231 ret = btrfs_add_dead_reloc_root(dead_root);
232 else
233 ret = btrfs_add_dead_root(dead_root, latest);
234 if (ret) 239 if (ret)
235 goto err; 240 goto err;
236 goto again; 241 goto again;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2ff7cd2db25f..e9ef8c3307fe 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -52,7 +52,6 @@
52#include "export.h" 52#include "export.h"
53#include "compression.h" 53#include "compression.h"
54 54
55
56static struct super_operations btrfs_super_ops; 55static struct super_operations btrfs_super_ops;
57 56
58static void btrfs_put_super(struct super_block *sb) 57static void btrfs_put_super(struct super_block *sb)
@@ -322,7 +321,7 @@ static int btrfs_fill_super(struct super_block *sb,
322 struct dentry *root_dentry; 321 struct dentry *root_dentry;
323 struct btrfs_super_block *disk_super; 322 struct btrfs_super_block *disk_super;
324 struct btrfs_root *tree_root; 323 struct btrfs_root *tree_root;
325 struct btrfs_inode *bi; 324 struct btrfs_key key;
326 int err; 325 int err;
327 326
328 sb->s_maxbytes = MAX_LFS_FILESIZE; 327 sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -341,23 +340,15 @@ static int btrfs_fill_super(struct super_block *sb,
341 } 340 }
342 sb->s_fs_info = tree_root; 341 sb->s_fs_info = tree_root;
343 disk_super = &tree_root->fs_info->super_copy; 342 disk_super = &tree_root->fs_info->super_copy;
344 inode = btrfs_iget_locked(sb, BTRFS_FIRST_FREE_OBJECTID,
345 tree_root->fs_info->fs_root);
346 bi = BTRFS_I(inode);
347 bi->location.objectid = inode->i_ino;
348 bi->location.offset = 0;
349 bi->root = tree_root->fs_info->fs_root;
350
351 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
352 343
353 if (!inode) { 344 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
354 err = -ENOMEM; 345 key.type = BTRFS_INODE_ITEM_KEY;
346 key.offset = 0;
347 inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root);
348 if (IS_ERR(inode)) {
349 err = PTR_ERR(inode);
355 goto fail_close; 350 goto fail_close;
356 } 351 }
357 if (inode->i_state & I_NEW) {
358 btrfs_read_locked_inode(inode);
359 unlock_new_inode(inode);
360 }
361 352
362 root_dentry = d_alloc_root(inode); 353 root_dentry = d_alloc_root(inode);
363 if (!root_dentry) { 354 if (!root_dentry) {
@@ -584,7 +575,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
584 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) 575 if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
585 return -EINVAL; 576 return -EINVAL;
586 577
587 ret = btrfs_cleanup_reloc_trees(root); 578 /* recover relocation */
579 ret = btrfs_recover_relocation(root);
588 WARN_ON(ret); 580 WARN_ON(ret);
589 581
590 ret = btrfs_cleanup_fs_roots(root->fs_info); 582 ret = btrfs_cleanup_fs_roots(root->fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 01b143605ec1..2e177d7f4bb9 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -25,7 +25,6 @@
25#include "disk-io.h" 25#include "disk-io.h"
26#include "transaction.h" 26#include "transaction.h"
27#include "locking.h" 27#include "locking.h"
28#include "ref-cache.h"
29#include "tree-log.h" 28#include "tree-log.h"
30 29
31#define BTRFS_ROOT_TRANS_TAG 0 30#define BTRFS_ROOT_TRANS_TAG 0
@@ -94,45 +93,37 @@ static noinline int join_transaction(struct btrfs_root *root)
94 * to make sure the old root from before we joined the transaction is deleted 93 * to make sure the old root from before we joined the transaction is deleted
95 * when the transaction commits 94 * when the transaction commits
96 */ 95 */
97noinline int btrfs_record_root_in_trans(struct btrfs_root *root) 96static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root)
98{ 98{
99 struct btrfs_dirty_root *dirty; 99 if (root->ref_cows && root->last_trans < trans->transid) {
100 u64 running_trans_id = root->fs_info->running_transaction->transid;
101 if (root->ref_cows && root->last_trans < running_trans_id) {
102 WARN_ON(root == root->fs_info->extent_root); 100 WARN_ON(root == root->fs_info->extent_root);
103 if (root->root_item.refs != 0) { 101 WARN_ON(root->root_item.refs == 0);
104 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 102 WARN_ON(root->commit_root != root->node);
105 (unsigned long)root->root_key.objectid, 103
106 BTRFS_ROOT_TRANS_TAG); 104 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
107 105 (unsigned long)root->root_key.objectid,
108 dirty = kmalloc(sizeof(*dirty), GFP_NOFS); 106 BTRFS_ROOT_TRANS_TAG);
109 BUG_ON(!dirty); 107 root->last_trans = trans->transid;
110 dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); 108 btrfs_init_reloc_root(trans, root);
111 BUG_ON(!dirty->root); 109 }
112 dirty->latest_root = root; 110 return 0;
113 INIT_LIST_HEAD(&dirty->list); 111}
114
115 root->commit_root = btrfs_root_node(root);
116
117 memcpy(dirty->root, root, sizeof(*root));
118 spin_lock_init(&dirty->root->node_lock);
119 spin_lock_init(&dirty->root->list_lock);
120 mutex_init(&dirty->root->objectid_mutex);
121 mutex_init(&dirty->root->log_mutex);
122 INIT_LIST_HEAD(&dirty->root->dead_list);
123 dirty->root->node = root->commit_root;
124 dirty->root->commit_root = NULL;
125 112
126 spin_lock(&root->list_lock); 113int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
127 list_add(&dirty->root->dead_list, &root->dead_list); 114 struct btrfs_root *root)
128 spin_unlock(&root->list_lock); 115{
116 if (!root->ref_cows)
117 return 0;
129 118
130 root->dirty_root = dirty; 119 mutex_lock(&root->fs_info->trans_mutex);
131 } else { 120 if (root->last_trans == trans->transid) {
132 WARN_ON(1); 121 mutex_unlock(&root->fs_info->trans_mutex);
133 } 122 return 0;
134 root->last_trans = running_trans_id;
135 } 123 }
124
125 record_root_in_trans(trans, root);
126 mutex_unlock(&root->fs_info->trans_mutex);
136 return 0; 127 return 0;
137} 128}
138 129
@@ -181,7 +172,6 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181 ret = join_transaction(root); 172 ret = join_transaction(root);
182 BUG_ON(ret); 173 BUG_ON(ret);
183 174
184 btrfs_record_root_in_trans(root);
185 h->transid = root->fs_info->running_transaction->transid; 175 h->transid = root->fs_info->running_transaction->transid;
186 h->transaction = root->fs_info->running_transaction; 176 h->transaction = root->fs_info->running_transaction;
187 h->blocks_reserved = num_blocks; 177 h->blocks_reserved = num_blocks;
@@ -192,6 +182,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
192 h->delayed_ref_updates = 0; 182 h->delayed_ref_updates = 0;
193 183
194 root->fs_info->running_transaction->use_count++; 184 root->fs_info->running_transaction->use_count++;
185 record_root_in_trans(h, root);
195 mutex_unlock(&root->fs_info->trans_mutex); 186 mutex_unlock(&root->fs_info->trans_mutex);
196 return h; 187 return h;
197} 188}
@@ -233,6 +224,7 @@ static noinline int wait_for_commit(struct btrfs_root *root,
233 return 0; 224 return 0;
234} 225}
235 226
227#if 0
236/* 228/*
237 * rate limit against the drop_snapshot code. This helps to slow down new 229 * rate limit against the drop_snapshot code. This helps to slow down new
238 * operations if the drop_snapshot code isn't able to keep up. 230 * operations if the drop_snapshot code isn't able to keep up.
@@ -273,6 +265,7 @@ harder:
273 goto harder; 265 goto harder;
274 } 266 }
275} 267}
268#endif
276 269
277void btrfs_throttle(struct btrfs_root *root) 270void btrfs_throttle(struct btrfs_root *root)
278{ 271{
@@ -280,7 +273,6 @@ void btrfs_throttle(struct btrfs_root *root)
280 if (!root->fs_info->open_ioctl_trans) 273 if (!root->fs_info->open_ioctl_trans)
281 wait_current_trans(root); 274 wait_current_trans(root);
282 mutex_unlock(&root->fs_info->trans_mutex); 275 mutex_unlock(&root->fs_info->trans_mutex);
283 throttle_on_drops(root);
284} 276}
285 277
286static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 278static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
@@ -323,9 +315,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
323 memset(trans, 0, sizeof(*trans)); 315 memset(trans, 0, sizeof(*trans));
324 kmem_cache_free(btrfs_trans_handle_cachep, trans); 316 kmem_cache_free(btrfs_trans_handle_cachep, trans);
325 317
326 if (throttle)
327 throttle_on_drops(root);
328
329 return 0; 318 return 0;
330} 319}
331 320
@@ -462,12 +451,8 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
462 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 451 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
463 if (old_root_bytenr == root->node->start) 452 if (old_root_bytenr == root->node->start)
464 break; 453 break;
465 btrfs_set_root_bytenr(&root->root_item,
466 root->node->start);
467 btrfs_set_root_level(&root->root_item,
468 btrfs_header_level(root->node));
469 btrfs_set_root_generation(&root->root_item, trans->transid);
470 454
455 btrfs_set_root_node(&root->root_item, root->node);
471 ret = btrfs_update_root(trans, tree_root, 456 ret = btrfs_update_root(trans, tree_root,
472 &root->root_key, 457 &root->root_key,
473 &root->root_item); 458 &root->root_item);
@@ -477,14 +462,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
477 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 462 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
478 BUG_ON(ret); 463 BUG_ON(ret);
479 } 464 }
465 free_extent_buffer(root->commit_root);
466 root->commit_root = btrfs_root_node(root);
480 return 0; 467 return 0;
481} 468}
482 469
483/* 470/*
484 * update all the cowonly tree roots on disk 471 * update all the cowonly tree roots on disk
485 */ 472 */
486int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 473static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
487 struct btrfs_root *root) 474 struct btrfs_root *root)
488{ 475{
489 struct btrfs_fs_info *fs_info = root->fs_info; 476 struct btrfs_fs_info *fs_info = root->fs_info;
490 struct list_head *next; 477 struct list_head *next;
@@ -520,118 +507,54 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
520 * a dirty root struct and adds it into the list of dead roots that need to 507 * a dirty root struct and adds it into the list of dead roots that need to
521 * be deleted 508 * be deleted
522 */ 509 */
523int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) 510int btrfs_add_dead_root(struct btrfs_root *root)
524{ 511{
525 struct btrfs_dirty_root *dirty;
526
527 dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
528 if (!dirty)
529 return -ENOMEM;
530 dirty->root = root;
531 dirty->latest_root = latest;
532
533 mutex_lock(&root->fs_info->trans_mutex); 512 mutex_lock(&root->fs_info->trans_mutex);
534 list_add(&dirty->list, &latest->fs_info->dead_roots); 513 list_add(&root->root_list, &root->fs_info->dead_roots);
535 mutex_unlock(&root->fs_info->trans_mutex); 514 mutex_unlock(&root->fs_info->trans_mutex);
536 return 0; 515 return 0;
537} 516}
538 517
539/* 518/*
540 * at transaction commit time we need to schedule the old roots for 519 * update all the cowonly tree roots on disk
541 * deletion via btrfs_drop_snapshot. This runs through all the
542 * reference counted roots that were modified in the current
543 * transaction and puts them into the drop list
544 */ 520 */
545static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, 521static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
546 struct radix_tree_root *radix, 522 struct btrfs_root *root)
547 struct list_head *list)
548{ 523{
549 struct btrfs_dirty_root *dirty;
550 struct btrfs_root *gang[8]; 524 struct btrfs_root *gang[8];
551 struct btrfs_root *root; 525 struct btrfs_fs_info *fs_info = root->fs_info;
552 int i; 526 int i;
553 int ret; 527 int ret;
554 int err = 0; 528 int err = 0;
555 u32 refs;
556 529
557 while (1) { 530 while (1) {
558 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, 531 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
532 (void **)gang, 0,
559 ARRAY_SIZE(gang), 533 ARRAY_SIZE(gang),
560 BTRFS_ROOT_TRANS_TAG); 534 BTRFS_ROOT_TRANS_TAG);
561 if (ret == 0) 535 if (ret == 0)
562 break; 536 break;
563 for (i = 0; i < ret; i++) { 537 for (i = 0; i < ret; i++) {
564 root = gang[i]; 538 root = gang[i];
565 radix_tree_tag_clear(radix, 539 radix_tree_tag_clear(&fs_info->fs_roots_radix,
566 (unsigned long)root->root_key.objectid, 540 (unsigned long)root->root_key.objectid,
567 BTRFS_ROOT_TRANS_TAG); 541 BTRFS_ROOT_TRANS_TAG);
568
569 BUG_ON(!root->ref_tree);
570 dirty = root->dirty_root;
571 542
572 btrfs_free_log(trans, root); 543 btrfs_free_log(trans, root);
573 btrfs_free_reloc_root(trans, root); 544 btrfs_update_reloc_root(trans, root);
574
575 if (root->commit_root == root->node) {
576 WARN_ON(root->node->start !=
577 btrfs_root_bytenr(&root->root_item));
578
579 free_extent_buffer(root->commit_root);
580 root->commit_root = NULL;
581 root->dirty_root = NULL;
582
583 spin_lock(&root->list_lock);
584 list_del_init(&dirty->root->dead_list);
585 spin_unlock(&root->list_lock);
586 545
587 kfree(dirty->root); 546 if (root->commit_root == root->node)
588 kfree(dirty);
589
590 /* make sure to update the root on disk
591 * so we get any updates to the block used
592 * counts
593 */
594 err = btrfs_update_root(trans,
595 root->fs_info->tree_root,
596 &root->root_key,
597 &root->root_item);
598 continue; 547 continue;
599 }
600 548
601 memset(&root->root_item.drop_progress, 0, 549 free_extent_buffer(root->commit_root);
602 sizeof(struct btrfs_disk_key)); 550 root->commit_root = btrfs_root_node(root);
603 root->root_item.drop_level = 0; 551
604 root->commit_root = NULL; 552 btrfs_set_root_node(&root->root_item, root->node);
605 root->dirty_root = NULL; 553 err = btrfs_update_root(trans, fs_info->tree_root,
606 root->root_key.offset = root->fs_info->generation;
607 btrfs_set_root_bytenr(&root->root_item,
608 root->node->start);
609 btrfs_set_root_level(&root->root_item,
610 btrfs_header_level(root->node));
611 btrfs_set_root_generation(&root->root_item,
612 root->root_key.offset);
613
614 err = btrfs_insert_root(trans, root->fs_info->tree_root,
615 &root->root_key, 554 &root->root_key,
616 &root->root_item); 555 &root->root_item);
617 if (err) 556 if (err)
618 break; 557 break;
619
620 refs = btrfs_root_refs(&dirty->root->root_item);
621 btrfs_set_root_refs(&dirty->root->root_item, refs - 1);
622 err = btrfs_update_root(trans, root->fs_info->tree_root,
623 &dirty->root->root_key,
624 &dirty->root->root_item);
625
626 BUG_ON(err);
627 if (refs == 1) {
628 list_add(&dirty->list, list);
629 } else {
630 WARN_ON(1);
631 free_extent_buffer(dirty->root->node);
632 kfree(dirty->root);
633 kfree(dirty);
634 }
635 } 558 }
636 } 559 }
637 return err; 560 return err;
@@ -688,12 +611,8 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
688 TASK_UNINTERRUPTIBLE); 611 TASK_UNINTERRUPTIBLE);
689 mutex_unlock(&info->trans_mutex); 612 mutex_unlock(&info->trans_mutex);
690 613
691 atomic_dec(&info->throttles);
692 wake_up(&info->transaction_throttle);
693
694 schedule(); 614 schedule();
695 615
696 atomic_inc(&info->throttles);
697 mutex_lock(&info->trans_mutex); 616 mutex_lock(&info->trans_mutex);
698 finish_wait(&info->transaction_wait, &wait); 617 finish_wait(&info->transaction_wait, &wait);
699 } 618 }
@@ -705,111 +624,61 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
705 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 624 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
706 * all of them 625 * all of them
707 */ 626 */
708static noinline int drop_dirty_roots(struct btrfs_root *tree_root, 627int btrfs_drop_dead_root(struct btrfs_root *root)
709 struct list_head *list)
710{ 628{
711 struct btrfs_dirty_root *dirty;
712 struct btrfs_trans_handle *trans; 629 struct btrfs_trans_handle *trans;
630 struct btrfs_root *tree_root = root->fs_info->tree_root;
713 unsigned long nr; 631 unsigned long nr;
714 u64 num_bytes; 632 int ret;
715 u64 bytes_used;
716 u64 max_useless;
717 int ret = 0;
718 int err;
719
720 while (!list_empty(list)) {
721 struct btrfs_root *root;
722
723 dirty = list_entry(list->prev, struct btrfs_dirty_root, list);
724 list_del_init(&dirty->list);
725
726 num_bytes = btrfs_root_used(&dirty->root->root_item);
727 root = dirty->latest_root;
728 atomic_inc(&root->fs_info->throttles);
729
730 while (1) {
731 /*
732 * we don't want to jump in and create a bunch of
733 * delayed refs if the transaction is starting to close
734 */
735 wait_transaction_pre_flush(tree_root->fs_info);
736 trans = btrfs_start_transaction(tree_root, 1);
737
738 /*
739 * we've joined a transaction, make sure it isn't
740 * closing right now
741 */
742 if (trans->transaction->delayed_refs.flushing) {
743 btrfs_end_transaction(trans, tree_root);
744 continue;
745 }
746
747 mutex_lock(&root->fs_info->drop_mutex);
748 ret = btrfs_drop_snapshot(trans, dirty->root);
749 if (ret != -EAGAIN)
750 break;
751 mutex_unlock(&root->fs_info->drop_mutex);
752 633
753 err = btrfs_update_root(trans, 634 while (1) {
754 tree_root, 635 /*
755 &dirty->root->root_key, 636 * we don't want to jump in and create a bunch of
756 &dirty->root->root_item); 637 * delayed refs if the transaction is starting to close
757 if (err) 638 */
758 ret = err; 639 wait_transaction_pre_flush(tree_root->fs_info);
759 nr = trans->blocks_used; 640 trans = btrfs_start_transaction(tree_root, 1);
760 ret = btrfs_end_transaction(trans, tree_root);
761 BUG_ON(ret);
762 641
763 btrfs_btree_balance_dirty(tree_root, nr); 642 /*
764 cond_resched(); 643 * we've joined a transaction, make sure it isn't
644 * closing right now
645 */
646 if (trans->transaction->delayed_refs.flushing) {
647 btrfs_end_transaction(trans, tree_root);
648 continue;
765 } 649 }
766 BUG_ON(ret);
767 atomic_dec(&root->fs_info->throttles);
768 wake_up(&root->fs_info->transaction_throttle);
769 650
770 num_bytes -= btrfs_root_used(&dirty->root->root_item); 651 ret = btrfs_drop_snapshot(trans, root);
771 bytes_used = btrfs_root_used(&root->root_item); 652 if (ret != -EAGAIN)
772 if (num_bytes) { 653 break;
773 mutex_lock(&root->fs_info->trans_mutex);
774 btrfs_record_root_in_trans(root);
775 mutex_unlock(&root->fs_info->trans_mutex);
776 btrfs_set_root_used(&root->root_item,
777 bytes_used - num_bytes);
778 }
779 654
780 ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); 655 ret = btrfs_update_root(trans, tree_root,
781 if (ret) { 656 &root->root_key,
782 BUG(); 657 &root->root_item);
658 if (ret)
783 break; 659 break;
784 }
785 mutex_unlock(&root->fs_info->drop_mutex);
786
787 spin_lock(&root->list_lock);
788 list_del_init(&dirty->root->dead_list);
789 if (!list_empty(&root->dead_list)) {
790 struct btrfs_root *oldest;
791 oldest = list_entry(root->dead_list.prev,
792 struct btrfs_root, dead_list);
793 max_useless = oldest->root_key.offset - 1;
794 } else {
795 max_useless = root->root_key.offset - 1;
796 }
797 spin_unlock(&root->list_lock);
798 660
799 nr = trans->blocks_used; 661 nr = trans->blocks_used;
800 ret = btrfs_end_transaction(trans, tree_root); 662 ret = btrfs_end_transaction(trans, tree_root);
801 BUG_ON(ret); 663 BUG_ON(ret);
802 664
803 ret = btrfs_remove_leaf_refs(root, max_useless, 0);
804 BUG_ON(ret);
805
806 free_extent_buffer(dirty->root->node);
807 kfree(dirty->root);
808 kfree(dirty);
809
810 btrfs_btree_balance_dirty(tree_root, nr); 665 btrfs_btree_balance_dirty(tree_root, nr);
811 cond_resched(); 666 cond_resched();
812 } 667 }
668 BUG_ON(ret);
669
670 ret = btrfs_del_root(trans, tree_root, &root->root_key);
671 BUG_ON(ret);
672
673 nr = trans->blocks_used;
674 ret = btrfs_end_transaction(trans, tree_root);
675 BUG_ON(ret);
676
677 free_extent_buffer(root->node);
678 free_extent_buffer(root->commit_root);
679 kfree(root);
680
681 btrfs_btree_balance_dirty(tree_root, nr);
813 return ret; 682 return ret;
814} 683}
815 684
@@ -839,24 +708,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
839 if (ret) 708 if (ret)
840 goto fail; 709 goto fail;
841 710
842 btrfs_record_root_in_trans(root); 711 record_root_in_trans(trans, root);
843 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 712 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
844 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 713 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
845 714
846 key.objectid = objectid; 715 key.objectid = objectid;
847 key.offset = trans->transid; 716 key.offset = 0;
848 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 717 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
849 718
850 old = btrfs_lock_root_node(root); 719 old = btrfs_lock_root_node(root);
851 btrfs_cow_block(trans, root, old, NULL, 0, &old); 720 btrfs_cow_block(trans, root, old, NULL, 0, &old);
721 btrfs_set_lock_blocking(old);
852 722
853 btrfs_copy_root(trans, root, old, &tmp, objectid); 723 btrfs_copy_root(trans, root, old, &tmp, objectid);
854 btrfs_tree_unlock(old); 724 btrfs_tree_unlock(old);
855 free_extent_buffer(old); 725 free_extent_buffer(old);
856 726
857 btrfs_set_root_bytenr(new_root_item, tmp->start); 727 btrfs_set_root_node(new_root_item, tmp);
858 btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
859 btrfs_set_root_generation(new_root_item, trans->transid);
860 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 728 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
861 new_root_item); 729 new_root_item);
862 btrfs_tree_unlock(tmp); 730 btrfs_tree_unlock(tmp);
@@ -964,6 +832,24 @@ static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
964 return 0; 832 return 0;
965} 833}
966 834
835static void update_super_roots(struct btrfs_root *root)
836{
837 struct btrfs_root_item *root_item;
838 struct btrfs_super_block *super;
839
840 super = &root->fs_info->super_copy;
841
842 root_item = &root->fs_info->chunk_root->root_item;
843 super->chunk_root = root_item->bytenr;
844 super->chunk_root_generation = root_item->generation;
845 super->chunk_root_level = root_item->level;
846
847 root_item = &root->fs_info->tree_root->root_item;
848 super->root = root_item->bytenr;
849 super->generation = root_item->generation;
850 super->root_level = root_item->level;
851}
852
967int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 853int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
968 struct btrfs_root *root) 854 struct btrfs_root *root)
969{ 855{
@@ -971,8 +857,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
971 unsigned long timeout = 1; 857 unsigned long timeout = 1;
972 struct btrfs_transaction *cur_trans; 858 struct btrfs_transaction *cur_trans;
973 struct btrfs_transaction *prev_trans = NULL; 859 struct btrfs_transaction *prev_trans = NULL;
974 struct btrfs_root *chunk_root = root->fs_info->chunk_root;
975 struct list_head dirty_fs_roots;
976 struct extent_io_tree *pinned_copy; 860 struct extent_io_tree *pinned_copy;
977 DEFINE_WAIT(wait); 861 DEFINE_WAIT(wait);
978 int ret; 862 int ret;
@@ -999,7 +883,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
999 BUG_ON(ret); 883 BUG_ON(ret);
1000 884
1001 mutex_lock(&root->fs_info->trans_mutex); 885 mutex_lock(&root->fs_info->trans_mutex);
1002 INIT_LIST_HEAD(&dirty_fs_roots);
1003 if (cur_trans->in_commit) { 886 if (cur_trans->in_commit) {
1004 cur_trans->use_count++; 887 cur_trans->use_count++;
1005 mutex_unlock(&root->fs_info->trans_mutex); 888 mutex_unlock(&root->fs_info->trans_mutex);
@@ -1105,41 +988,36 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1105 * with the tree-log code. 988 * with the tree-log code.
1106 */ 989 */
1107 mutex_lock(&root->fs_info->tree_log_mutex); 990 mutex_lock(&root->fs_info->tree_log_mutex);
1108 /*
1109 * keep tree reloc code from adding new reloc trees
1110 */
1111 mutex_lock(&root->fs_info->tree_reloc_mutex);
1112
1113 991
1114 ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, 992 ret = commit_fs_roots(trans, root);
1115 &dirty_fs_roots);
1116 BUG_ON(ret); 993 BUG_ON(ret);
1117 994
1118 /* add_dirty_roots gets rid of all the tree log roots, it is now 995 /* commit_fs_roots gets rid of all the tree log roots, it is now
1119 * safe to free the root of tree log roots 996 * safe to free the root of tree log roots
1120 */ 997 */
1121 btrfs_free_log_root_tree(trans, root->fs_info); 998 btrfs_free_log_root_tree(trans, root->fs_info);
1122 999
1123 ret = btrfs_commit_tree_roots(trans, root); 1000 ret = commit_cowonly_roots(trans, root);
1124 BUG_ON(ret); 1001 BUG_ON(ret);
1125 1002
1126 cur_trans = root->fs_info->running_transaction; 1003 cur_trans = root->fs_info->running_transaction;
1127 spin_lock(&root->fs_info->new_trans_lock); 1004 spin_lock(&root->fs_info->new_trans_lock);
1128 root->fs_info->running_transaction = NULL; 1005 root->fs_info->running_transaction = NULL;
1129 spin_unlock(&root->fs_info->new_trans_lock); 1006 spin_unlock(&root->fs_info->new_trans_lock);
1130 btrfs_set_super_generation(&root->fs_info->super_copy, 1007
1131 cur_trans->transid); 1008 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1132 btrfs_set_super_root(&root->fs_info->super_copy, 1009 root->fs_info->tree_root->node);
1133 root->fs_info->tree_root->node->start); 1010 free_extent_buffer(root->fs_info->tree_root->commit_root);
1134 btrfs_set_super_root_level(&root->fs_info->super_copy, 1011 root->fs_info->tree_root->commit_root =
1135 btrfs_header_level(root->fs_info->tree_root->node)); 1012 btrfs_root_node(root->fs_info->tree_root);
1136 1013
1137 btrfs_set_super_chunk_root(&root->fs_info->super_copy, 1014 btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1138 chunk_root->node->start); 1015 root->fs_info->chunk_root->node);
1139 btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, 1016 free_extent_buffer(root->fs_info->chunk_root->commit_root);
1140 btrfs_header_level(chunk_root->node)); 1017 root->fs_info->chunk_root->commit_root =
1141 btrfs_set_super_chunk_root_generation(&root->fs_info->super_copy, 1018 btrfs_root_node(root->fs_info->chunk_root);
1142 btrfs_header_generation(chunk_root->node)); 1019
1020 update_super_roots(root);
1143 1021
1144 if (!root->fs_info->log_root_recovering) { 1022 if (!root->fs_info->log_root_recovering) {
1145 btrfs_set_super_log_root(&root->fs_info->super_copy, 0); 1023 btrfs_set_super_log_root(&root->fs_info->super_copy, 0);
@@ -1153,7 +1031,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1153 1031
1154 trans->transaction->blocked = 0; 1032 trans->transaction->blocked = 0;
1155 1033
1156 wake_up(&root->fs_info->transaction_throttle);
1157 wake_up(&root->fs_info->transaction_wait); 1034 wake_up(&root->fs_info->transaction_wait);
1158 1035
1159 mutex_unlock(&root->fs_info->trans_mutex); 1036 mutex_unlock(&root->fs_info->trans_mutex);
@@ -1170,9 +1047,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1170 btrfs_finish_extent_commit(trans, root, pinned_copy); 1047 btrfs_finish_extent_commit(trans, root, pinned_copy);
1171 kfree(pinned_copy); 1048 kfree(pinned_copy);
1172 1049
1173 btrfs_drop_dead_reloc_roots(root);
1174 mutex_unlock(&root->fs_info->tree_reloc_mutex);
1175
1176 /* do the directory inserts of any pending snapshot creations */ 1050 /* do the directory inserts of any pending snapshot creations */
1177 finish_pending_snapshots(trans, root->fs_info); 1051 finish_pending_snapshots(trans, root->fs_info);
1178 1052
@@ -1186,16 +1060,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1186 put_transaction(cur_trans); 1060 put_transaction(cur_trans);
1187 put_transaction(cur_trans); 1061 put_transaction(cur_trans);
1188 1062
1189 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
1190 if (root->fs_info->closing)
1191 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
1192
1193 mutex_unlock(&root->fs_info->trans_mutex); 1063 mutex_unlock(&root->fs_info->trans_mutex);
1194 1064
1195 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1065 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1196
1197 if (root->fs_info->closing)
1198 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
1199 return ret; 1066 return ret;
1200} 1067}
1201 1068
@@ -1204,16 +1071,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1204 */ 1071 */
1205int btrfs_clean_old_snapshots(struct btrfs_root *root) 1072int btrfs_clean_old_snapshots(struct btrfs_root *root)
1206{ 1073{
1207 struct list_head dirty_roots; 1074 LIST_HEAD(list);
1208 INIT_LIST_HEAD(&dirty_roots); 1075 struct btrfs_fs_info *fs_info = root->fs_info;
1209again: 1076
1210 mutex_lock(&root->fs_info->trans_mutex); 1077 mutex_lock(&fs_info->trans_mutex);
1211 list_splice_init(&root->fs_info->dead_roots, &dirty_roots); 1078 list_splice_init(&fs_info->dead_roots, &list);
1212 mutex_unlock(&root->fs_info->trans_mutex); 1079 mutex_unlock(&fs_info->trans_mutex);
1213 1080
1214 if (!list_empty(&dirty_roots)) { 1081 while (!list_empty(&list)) {
1215 drop_dirty_roots(root, &dirty_roots); 1082 root = list_entry(list.next, struct btrfs_root, root_list);
1216 goto again; 1083 list_del_init(&root->root_list);
1084 btrfs_drop_dead_root(root);
1217 } 1085 }
1218 return 0; 1086 return 0;
1219} 1087}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 94f5bde2b58d..961c3ee5a2e1 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -62,12 +62,6 @@ struct btrfs_pending_snapshot {
62 struct list_head list; 62 struct list_head list;
63}; 63};
64 64
65struct btrfs_dirty_root {
66 struct list_head list;
67 struct btrfs_root *root;
68 struct btrfs_root *latest_root;
69};
70
71static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, 65static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
72 struct inode *inode) 66 struct inode *inode)
73{ 67{
@@ -100,7 +94,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
100int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 94int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
101 struct btrfs_root *root); 95 struct btrfs_root *root);
102 96
103int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest); 97int btrfs_add_dead_root(struct btrfs_root *root);
98int btrfs_drop_dead_root(struct btrfs_root *root);
104int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); 99int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
105int btrfs_clean_old_snapshots(struct btrfs_root *root); 100int btrfs_clean_old_snapshots(struct btrfs_root *root);
106int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 101int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -108,7 +103,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
108int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 103int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
109 struct btrfs_root *root); 104 struct btrfs_root *root);
110void btrfs_throttle(struct btrfs_root *root); 105void btrfs_throttle(struct btrfs_root *root);
111int btrfs_record_root_in_trans(struct btrfs_root *root); 106int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
107 struct btrfs_root *root);
112int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 108int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
113 struct extent_io_tree *dirty_pages); 109 struct extent_io_tree *dirty_pages);
114#endif 110#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index db5e212e8445..2b41fc08c34a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -430,18 +430,16 @@ no_copy:
430static noinline struct inode *read_one_inode(struct btrfs_root *root, 430static noinline struct inode *read_one_inode(struct btrfs_root *root,
431 u64 objectid) 431 u64 objectid)
432{ 432{
433 struct btrfs_key key;
433 struct inode *inode; 434 struct inode *inode;
434 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
435 if (inode->i_state & I_NEW) {
436 BTRFS_I(inode)->root = root;
437 BTRFS_I(inode)->location.objectid = objectid;
438 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
439 BTRFS_I(inode)->location.offset = 0;
440 btrfs_read_locked_inode(inode);
441 unlock_new_inode(inode);
442 435
443 } 436 key.objectid = objectid;
444 if (is_bad_inode(inode)) { 437 key.type = BTRFS_INODE_ITEM_KEY;
438 key.offset = 0;
439 inode = btrfs_iget(root->fs_info->sb, &key, root);
440 if (IS_ERR(inode)) {
441 inode = NULL;
442 } else if (is_bad_inode(inode)) {
445 iput(inode); 443 iput(inode);
446 inode = NULL; 444 inode = NULL;
447 } 445 }
@@ -541,6 +539,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
541 539
542 if (found_type == BTRFS_FILE_EXTENT_REG || 540 if (found_type == BTRFS_FILE_EXTENT_REG ||
543 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 541 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
542 u64 offset;
544 unsigned long dest_offset; 543 unsigned long dest_offset;
545 struct btrfs_key ins; 544 struct btrfs_key ins;
546 545
@@ -555,6 +554,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
555 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); 554 ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
556 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); 555 ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
557 ins.type = BTRFS_EXTENT_ITEM_KEY; 556 ins.type = BTRFS_EXTENT_ITEM_KEY;
557 offset = key->offset - btrfs_file_extent_offset(eb, item);
558 558
559 if (ins.objectid > 0) { 559 if (ins.objectid > 0) {
560 u64 csum_start; 560 u64 csum_start;
@@ -569,19 +569,16 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
569 if (ret == 0) { 569 if (ret == 0) {
570 ret = btrfs_inc_extent_ref(trans, root, 570 ret = btrfs_inc_extent_ref(trans, root,
571 ins.objectid, ins.offset, 571 ins.objectid, ins.offset,
572 path->nodes[0]->start, 572 0, root->root_key.objectid,
573 root->root_key.objectid, 573 key->objectid, offset);
574 trans->transid, key->objectid);
575 } else { 574 } else {
576 /* 575 /*
577 * insert the extent pointer in the extent 576 * insert the extent pointer in the extent
578 * allocation tree 577 * allocation tree
579 */ 578 */
580 ret = btrfs_alloc_logged_extent(trans, root, 579 ret = btrfs_alloc_logged_file_extent(trans,
581 path->nodes[0]->start, 580 root, root->root_key.objectid,
582 root->root_key.objectid, 581 key->objectid, offset, &ins);
583 trans->transid, key->objectid,
584 &ins);
585 BUG_ON(ret); 582 BUG_ON(ret);
586 } 583 }
587 btrfs_release_path(root, path); 584 btrfs_release_path(root, path);
@@ -1706,9 +1703,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1706 btrfs_wait_tree_block_writeback(next); 1703 btrfs_wait_tree_block_writeback(next);
1707 btrfs_tree_unlock(next); 1704 btrfs_tree_unlock(next);
1708 1705
1709 ret = btrfs_drop_leaf_ref(trans, root, next);
1710 BUG_ON(ret);
1711
1712 WARN_ON(root_owner != 1706 WARN_ON(root_owner !=
1713 BTRFS_TREE_LOG_OBJECTID); 1707 BTRFS_TREE_LOG_OBJECTID);
1714 ret = btrfs_free_reserved_extent(root, 1708 ret = btrfs_free_reserved_extent(root,
@@ -1753,10 +1747,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1753 btrfs_wait_tree_block_writeback(next); 1747 btrfs_wait_tree_block_writeback(next);
1754 btrfs_tree_unlock(next); 1748 btrfs_tree_unlock(next);
1755 1749
1756 if (*level == 0) {
1757 ret = btrfs_drop_leaf_ref(trans, root, next);
1758 BUG_ON(ret);
1759 }
1760 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 1750 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
1761 ret = btrfs_free_reserved_extent(root, bytenr, blocksize); 1751 ret = btrfs_free_reserved_extent(root, bytenr, blocksize);
1762 BUG_ON(ret); 1752 BUG_ON(ret);
@@ -1811,12 +1801,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1811 btrfs_wait_tree_block_writeback(next); 1801 btrfs_wait_tree_block_writeback(next);
1812 btrfs_tree_unlock(next); 1802 btrfs_tree_unlock(next);
1813 1803
1814 if (*level == 0) {
1815 ret = btrfs_drop_leaf_ref(trans, root,
1816 next);
1817 BUG_ON(ret);
1818 }
1819
1820 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 1804 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
1821 ret = btrfs_free_reserved_extent(root, 1805 ret = btrfs_free_reserved_extent(root,
1822 path->nodes[*level]->start, 1806 path->nodes[*level]->start,
@@ -1884,11 +1868,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1884 btrfs_wait_tree_block_writeback(next); 1868 btrfs_wait_tree_block_writeback(next);
1885 btrfs_tree_unlock(next); 1869 btrfs_tree_unlock(next);
1886 1870
1887 if (orig_level == 0) {
1888 ret = btrfs_drop_leaf_ref(trans, log,
1889 next);
1890 BUG_ON(ret);
1891 }
1892 WARN_ON(log->root_key.objectid != 1871 WARN_ON(log->root_key.objectid !=
1893 BTRFS_TREE_LOG_OBJECTID); 1872 BTRFS_TREE_LOG_OBJECTID);
1894 ret = btrfs_free_reserved_extent(log, next->start, 1873 ret = btrfs_free_reserved_extent(log, next->start,
@@ -2027,9 +2006,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2027 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 2006 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
2028 BUG_ON(ret); 2007 BUG_ON(ret);
2029 2008
2030 btrfs_set_root_bytenr(&log->root_item, log->node->start); 2009 btrfs_set_root_node(&log->root_item, log->node);
2031 btrfs_set_root_generation(&log->root_item, trans->transid);
2032 btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
2033 2010
2034 root->log_batch = 0; 2011 root->log_batch = 0;
2035 root->log_transid++; 2012 root->log_transid++;
@@ -2581,7 +2558,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2581 ins_keys, ins_sizes, nr); 2558 ins_keys, ins_sizes, nr);
2582 BUG_ON(ret); 2559 BUG_ON(ret);
2583 2560
2584 for (i = 0; i < nr; i++) { 2561 for (i = 0; i < nr; i++, dst_path->slots[0]++) {
2585 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], 2562 dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0],
2586 dst_path->slots[0]); 2563 dst_path->slots[0]);
2587 2564
@@ -2617,36 +2594,31 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2617 found_type = btrfs_file_extent_type(src, extent); 2594 found_type = btrfs_file_extent_type(src, extent);
2618 if (found_type == BTRFS_FILE_EXTENT_REG || 2595 if (found_type == BTRFS_FILE_EXTENT_REG ||
2619 found_type == BTRFS_FILE_EXTENT_PREALLOC) { 2596 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
2620 u64 ds = btrfs_file_extent_disk_bytenr(src, 2597 u64 ds, dl, cs, cl;
2621 extent); 2598 ds = btrfs_file_extent_disk_bytenr(src,
2622 u64 dl = btrfs_file_extent_disk_num_bytes(src, 2599 extent);
2623 extent); 2600 /* ds == 0 is a hole */
2624 u64 cs = btrfs_file_extent_offset(src, extent); 2601 if (ds == 0)
2625 u64 cl = btrfs_file_extent_num_bytes(src, 2602 continue;
2626 extent);; 2603
2604 dl = btrfs_file_extent_disk_num_bytes(src,
2605 extent);
2606 cs = btrfs_file_extent_offset(src, extent);
2607 cl = btrfs_file_extent_num_bytes(src,
2608 extent);;
2627 if (btrfs_file_extent_compression(src, 2609 if (btrfs_file_extent_compression(src,
2628 extent)) { 2610 extent)) {
2629 cs = 0; 2611 cs = 0;
2630 cl = dl; 2612 cl = dl;
2631 } 2613 }
2632 /* ds == 0 is a hole */ 2614
2633 if (ds != 0) { 2615 ret = btrfs_lookup_csums_range(
2634 ret = btrfs_inc_extent_ref(trans, log, 2616 log->fs_info->csum_root,
2635 ds, dl, 2617 ds + cs, ds + cs + cl - 1,
2636 dst_path->nodes[0]->start, 2618 &ordered_sums);
2637 BTRFS_TREE_LOG_OBJECTID, 2619 BUG_ON(ret);
2638 trans->transid,
2639 ins_keys[i].objectid);
2640 BUG_ON(ret);
2641 ret = btrfs_lookup_csums_range(
2642 log->fs_info->csum_root,
2643 ds + cs, ds + cs + cl - 1,
2644 &ordered_sums);
2645 BUG_ON(ret);
2646 }
2647 } 2620 }
2648 } 2621 }
2649 dst_path->slots[0]++;
2650 } 2622 }
2651 2623
2652 btrfs_mark_buffer_dirty(dst_path->nodes[0]); 2624 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
@@ -3029,9 +3001,7 @@ again:
3029 BUG_ON(!wc.replay_dest); 3001 BUG_ON(!wc.replay_dest);
3030 3002
3031 wc.replay_dest->log_root = log; 3003 wc.replay_dest->log_root = log;
3032 mutex_lock(&fs_info->trans_mutex); 3004 btrfs_record_root_in_trans(trans, wc.replay_dest);
3033 btrfs_record_root_in_trans(wc.replay_dest);
3034 mutex_unlock(&fs_info->trans_mutex);
3035 ret = walk_log_tree(trans, log, &wc); 3005 ret = walk_log_tree(trans, log, &wc);
3036 BUG_ON(ret); 3006 BUG_ON(ret);
3037 3007
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a6d35b0054ca..8bc6a8807482 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1671,8 +1671,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1671 int ret; 1671 int ret;
1672 int i; 1672 int i;
1673 1673
1674 printk(KERN_INFO "btrfs relocating chunk %llu\n",
1675 (unsigned long long)chunk_offset);
1676 root = root->fs_info->chunk_root; 1674 root = root->fs_info->chunk_root;
1677 extent_root = root->fs_info->extent_root; 1675 extent_root = root->fs_info->extent_root;
1678 em_tree = &root->fs_info->mapping_tree.map_tree; 1676 em_tree = &root->fs_info->mapping_tree.map_tree;