aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorYan Zheng <zheng.yan@oracle.com>2009-06-10 10:45:14 -0400
committerChris Mason <chris.mason@oracle.com>2009-06-10 11:29:46 -0400
commit5d4f98a28c7d334091c1b7744f48a1acdd2a4ae0 (patch)
treec611d7d824cbcdb777dd2d8e33e2ed1c5df8a9c6 /fs/btrfs/ctree.c
parent5c939df56c3ea018b58e5aa76181284c2053d699 (diff)
Btrfs: Mixed back reference (FORWARD ROLLING FORMAT CHANGE)
This commit introduces a new kind of back reference for btrfs metadata. Once a filesystem has been mounted with this commit, IT WILL NO LONGER BE MOUNTABLE BY OLDER KERNELS. When a tree block in subvolume tree is cow'd, the reference counts of all extents it points to are increased by one. At transaction commit time, the old root of the subvolume is recorded in a "dead root" data structure, and the btree it points to is later walked, dropping reference counts and freeing any blocks where the reference count goes to 0. The increments done during cow and decrements done after commit cancel out, and the walk is a very expensive way to go about freeing the blocks that are no longer referenced by the new btree root. This commit reduces the transaction overhead by avoiding the need for dead root records. When a non-shared tree block is cow'd, we free the old block at once, and the new block inherits old block's references. When a tree block with reference count > 1 is cow'd, we increase the reference counts of all extents the new block points to by one, and decrease the old block's reference count by one. This dead tree avoidance code removes the need to modify the reference counts of lower level extents when a non-shared tree block is cow'd. But we still need to update back ref for all pointers in the block. This is because the location of the block is recorded in the back ref item. We can solve this by introducing a new type of back ref. The new back ref provides information about pointer's key, level and in which tree the pointer lives. This information allow us to find the pointer by searching the tree. The shortcoming of the new back ref is that it only works for pointers in tree blocks referenced by their owner trees. This is mostly a problem for snapshots, where resolving one of these fuzzy back references would be O(number_of_snapshots) and quite slow. The solution used here is to use the fuzzy back references in the common case where a given tree block is only referenced by one root, and use the full back references when multiple roots have a reference on a given block. This commit adds per subvolume red-black tree to keep trace of cached inodes. The red-black tree helps the balancing code to find cached inodes whose inode numbers within a given range. This commit improves the balancing code by introducing several data structures to keep the state of balancing. The most important one is the back ref cache. It caches how the upper level tree blocks are referenced. This greatly reduce the overhead of checking back ref. The improved balancing code scales significantly better with a large number of snapshots. This is a very large commit and was written in a number of pieces. But, they depend heavily on the disk format change and were squashed together to make sure git bisect didn't end up in a bad state wrt space balancing or the format change. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c685
1 files changed, 316 insertions, 369 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index fedf8b9f03a2..2b960278a2f9 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -197,14 +197,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
197 u32 nritems; 197 u32 nritems;
198 int ret = 0; 198 int ret = 0;
199 int level; 199 int level;
200 struct btrfs_root *new_root; 200 struct btrfs_disk_key disk_key;
201
202 new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
203 if (!new_root)
204 return -ENOMEM;
205
206 memcpy(new_root, root, sizeof(*new_root));
207 new_root->root_key.objectid = new_root_objectid;
208 201
209 WARN_ON(root->ref_cows && trans->transid != 202 WARN_ON(root->ref_cows && trans->transid !=
210 root->fs_info->running_transaction->transid); 203 root->fs_info->running_transaction->transid);
@@ -212,28 +205,37 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
212 205
213 level = btrfs_header_level(buf); 206 level = btrfs_header_level(buf);
214 nritems = btrfs_header_nritems(buf); 207 nritems = btrfs_header_nritems(buf);
208 if (level == 0)
209 btrfs_item_key(buf, &disk_key, 0);
210 else
211 btrfs_node_key(buf, &disk_key, 0);
215 212
216 cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, 213 cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
217 new_root_objectid, trans->transid, 214 new_root_objectid, &disk_key, level,
218 level, buf->start, 0); 215 buf->start, 0);
219 if (IS_ERR(cow)) { 216 if (IS_ERR(cow))
220 kfree(new_root);
221 return PTR_ERR(cow); 217 return PTR_ERR(cow);
222 }
223 218
224 copy_extent_buffer(cow, buf, 0, 0, cow->len); 219 copy_extent_buffer(cow, buf, 0, 0, cow->len);
225 btrfs_set_header_bytenr(cow, cow->start); 220 btrfs_set_header_bytenr(cow, cow->start);
226 btrfs_set_header_generation(cow, trans->transid); 221 btrfs_set_header_generation(cow, trans->transid);
227 btrfs_set_header_owner(cow, new_root_objectid); 222 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
228 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 223 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
224 BTRFS_HEADER_FLAG_RELOC);
225 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
226 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
227 else
228 btrfs_set_header_owner(cow, new_root_objectid);
229 229
230 write_extent_buffer(cow, root->fs_info->fsid, 230 write_extent_buffer(cow, root->fs_info->fsid,
231 (unsigned long)btrfs_header_fsid(cow), 231 (unsigned long)btrfs_header_fsid(cow),
232 BTRFS_FSID_SIZE); 232 BTRFS_FSID_SIZE);
233 233
234 WARN_ON(btrfs_header_generation(buf) > trans->transid); 234 WARN_ON(btrfs_header_generation(buf) > trans->transid);
235 ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); 235 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
236 kfree(new_root); 236 ret = btrfs_inc_ref(trans, root, cow, 1);
237 else
238 ret = btrfs_inc_ref(trans, root, cow, 0);
237 239
238 if (ret) 240 if (ret)
239 return ret; 241 return ret;
@@ -244,6 +246,125 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
244} 246}
245 247
246/* 248/*
249 * check if the tree block can be shared by multiple trees
250 */
251int btrfs_block_can_be_shared(struct btrfs_root *root,
252 struct extent_buffer *buf)
253{
254 /*
255 * Tree blocks not in refernece counted trees and tree roots
256 * are never shared. If a block was allocated after the last
257 * snapshot and the block was not allocated by tree relocation,
258 * we know the block is not shared.
259 */
260 if (root->ref_cows &&
261 buf != root->node && buf != root->commit_root &&
262 (btrfs_header_generation(buf) <=
263 btrfs_root_last_snapshot(&root->root_item) ||
264 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
265 return 1;
266#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
267 if (root->ref_cows &&
268 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
269 return 1;
270#endif
271 return 0;
272}
273
274static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
275 struct btrfs_root *root,
276 struct extent_buffer *buf,
277 struct extent_buffer *cow)
278{
279 u64 refs;
280 u64 owner;
281 u64 flags;
282 u64 new_flags = 0;
283 int ret;
284
285 /*
286 * Backrefs update rules:
287 *
288 * Always use full backrefs for extent pointers in tree block
289 * allocated by tree relocation.
290 *
291 * If a shared tree block is no longer referenced by its owner
292 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
293 * use full backrefs for extent pointers in tree block.
294 *
295 * If a tree block is been relocating
296 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
297 * use full backrefs for extent pointers in tree block.
298 * The reason for this is some operations (such as drop tree)
299 * are only allowed for blocks use full backrefs.
300 */
301
302 if (btrfs_block_can_be_shared(root, buf)) {
303 ret = btrfs_lookup_extent_info(trans, root, buf->start,
304 buf->len, &refs, &flags);
305 BUG_ON(ret);
306 BUG_ON(refs == 0);
307 } else {
308 refs = 1;
309 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
310 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
311 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
312 else
313 flags = 0;
314 }
315
316 owner = btrfs_header_owner(buf);
317 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
318 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
319
320 if (refs > 1) {
321 if ((owner == root->root_key.objectid ||
322 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
323 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
324 ret = btrfs_inc_ref(trans, root, buf, 1);
325 BUG_ON(ret);
326
327 if (root->root_key.objectid ==
328 BTRFS_TREE_RELOC_OBJECTID) {
329 ret = btrfs_dec_ref(trans, root, buf, 0);
330 BUG_ON(ret);
331 ret = btrfs_inc_ref(trans, root, cow, 1);
332 BUG_ON(ret);
333 }
334 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
335 } else {
336
337 if (root->root_key.objectid ==
338 BTRFS_TREE_RELOC_OBJECTID)
339 ret = btrfs_inc_ref(trans, root, cow, 1);
340 else
341 ret = btrfs_inc_ref(trans, root, cow, 0);
342 BUG_ON(ret);
343 }
344 if (new_flags != 0) {
345 ret = btrfs_set_disk_extent_flags(trans, root,
346 buf->start,
347 buf->len,
348 new_flags, 0);
349 BUG_ON(ret);
350 }
351 } else {
352 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
353 if (root->root_key.objectid ==
354 BTRFS_TREE_RELOC_OBJECTID)
355 ret = btrfs_inc_ref(trans, root, cow, 1);
356 else
357 ret = btrfs_inc_ref(trans, root, cow, 0);
358 BUG_ON(ret);
359 ret = btrfs_dec_ref(trans, root, buf, 1);
360 BUG_ON(ret);
361 }
362 clean_tree_block(trans, root, buf);
363 }
364 return 0;
365}
366
367/*
247 * does the dirty work in cow of a single block. The parent block (if 368 * does the dirty work in cow of a single block. The parent block (if
248 * supplied) is updated to point to the new cow copy. The new buffer is marked 369 * supplied) is updated to point to the new cow copy. The new buffer is marked
249 * dirty and returned locked. If you modify the block it needs to be marked 370 * dirty and returned locked. If you modify the block it needs to be marked
@@ -262,34 +383,39 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
262 struct extent_buffer **cow_ret, 383 struct extent_buffer **cow_ret,
263 u64 search_start, u64 empty_size) 384 u64 search_start, u64 empty_size)
264{ 385{
265 u64 parent_start; 386 struct btrfs_disk_key disk_key;
266 struct extent_buffer *cow; 387 struct extent_buffer *cow;
267 u32 nritems;
268 int ret = 0;
269 int level; 388 int level;
270 int unlock_orig = 0; 389 int unlock_orig = 0;
390 u64 parent_start;
271 391
272 if (*cow_ret == buf) 392 if (*cow_ret == buf)
273 unlock_orig = 1; 393 unlock_orig = 1;
274 394
275 btrfs_assert_tree_locked(buf); 395 btrfs_assert_tree_locked(buf);
276 396
277 if (parent)
278 parent_start = parent->start;
279 else
280 parent_start = 0;
281
282 WARN_ON(root->ref_cows && trans->transid != 397 WARN_ON(root->ref_cows && trans->transid !=
283 root->fs_info->running_transaction->transid); 398 root->fs_info->running_transaction->transid);
284 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 399 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
285 400
286 level = btrfs_header_level(buf); 401 level = btrfs_header_level(buf);
287 nritems = btrfs_header_nritems(buf);
288 402
289 cow = btrfs_alloc_free_block(trans, root, buf->len, 403 if (level == 0)
290 parent_start, root->root_key.objectid, 404 btrfs_item_key(buf, &disk_key, 0);
291 trans->transid, level, 405 else
292 search_start, empty_size); 406 btrfs_node_key(buf, &disk_key, 0);
407
408 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
409 if (parent)
410 parent_start = parent->start;
411 else
412 parent_start = 0;
413 } else
414 parent_start = 0;
415
416 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
417 root->root_key.objectid, &disk_key,
418 level, search_start, empty_size);
293 if (IS_ERR(cow)) 419 if (IS_ERR(cow))
294 return PTR_ERR(cow); 420 return PTR_ERR(cow);
295 421
@@ -298,83 +424,53 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
298 copy_extent_buffer(cow, buf, 0, 0, cow->len); 424 copy_extent_buffer(cow, buf, 0, 0, cow->len);
299 btrfs_set_header_bytenr(cow, cow->start); 425 btrfs_set_header_bytenr(cow, cow->start);
300 btrfs_set_header_generation(cow, trans->transid); 426 btrfs_set_header_generation(cow, trans->transid);
301 btrfs_set_header_owner(cow, root->root_key.objectid); 427 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
302 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 428 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
429 BTRFS_HEADER_FLAG_RELOC);
430 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
431 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
432 else
433 btrfs_set_header_owner(cow, root->root_key.objectid);
303 434
304 write_extent_buffer(cow, root->fs_info->fsid, 435 write_extent_buffer(cow, root->fs_info->fsid,
305 (unsigned long)btrfs_header_fsid(cow), 436 (unsigned long)btrfs_header_fsid(cow),
306 BTRFS_FSID_SIZE); 437 BTRFS_FSID_SIZE);
307 438
308 WARN_ON(btrfs_header_generation(buf) > trans->transid); 439 update_ref_for_cow(trans, root, buf, cow);
309 if (btrfs_header_generation(buf) != trans->transid) {
310 u32 nr_extents;
311 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
312 if (ret)
313 return ret;
314
315 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
316 WARN_ON(ret);
317 } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
318 /*
319 * There are only two places that can drop reference to
320 * tree blocks owned by living reloc trees, one is here,
321 * the other place is btrfs_drop_subtree. In both places,
322 * we check reference count while tree block is locked.
323 * Furthermore, if reference count is one, it won't get
324 * increased by someone else.
325 */
326 u32 refs;
327 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
328 buf->len, &refs);
329 BUG_ON(ret);
330 if (refs == 1) {
331 ret = btrfs_update_ref(trans, root, buf, cow,
332 0, nritems);
333 clean_tree_block(trans, root, buf);
334 } else {
335 ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
336 }
337 BUG_ON(ret);
338 } else {
339 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
340 if (ret)
341 return ret;
342 clean_tree_block(trans, root, buf);
343 }
344
345 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
346 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
347 WARN_ON(ret);
348 }
349 440
350 if (buf == root->node) { 441 if (buf == root->node) {
351 WARN_ON(parent && parent != buf); 442 WARN_ON(parent && parent != buf);
443 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
444 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
445 parent_start = buf->start;
446 else
447 parent_start = 0;
352 448
353 spin_lock(&root->node_lock); 449 spin_lock(&root->node_lock);
354 root->node = cow; 450 root->node = cow;
355 extent_buffer_get(cow); 451 extent_buffer_get(cow);
356 spin_unlock(&root->node_lock); 452 spin_unlock(&root->node_lock);
357 453
358 if (buf != root->commit_root) { 454 btrfs_free_extent(trans, root, buf->start, buf->len,
359 btrfs_free_extent(trans, root, buf->start, 455 parent_start, root->root_key.objectid,
360 buf->len, buf->start, 456 level, 0);
361 root->root_key.objectid,
362 btrfs_header_generation(buf),
363 level, 1);
364 }
365 free_extent_buffer(buf); 457 free_extent_buffer(buf);
366 add_root_to_dirty_list(root); 458 add_root_to_dirty_list(root);
367 } else { 459 } else {
460 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
461 parent_start = parent->start;
462 else
463 parent_start = 0;
464
465 WARN_ON(trans->transid != btrfs_header_generation(parent));
368 btrfs_set_node_blockptr(parent, parent_slot, 466 btrfs_set_node_blockptr(parent, parent_slot,
369 cow->start); 467 cow->start);
370 WARN_ON(trans->transid == 0);
371 btrfs_set_node_ptr_generation(parent, parent_slot, 468 btrfs_set_node_ptr_generation(parent, parent_slot,
372 trans->transid); 469 trans->transid);
373 btrfs_mark_buffer_dirty(parent); 470 btrfs_mark_buffer_dirty(parent);
374 WARN_ON(btrfs_header_generation(parent) != trans->transid);
375 btrfs_free_extent(trans, root, buf->start, buf->len, 471 btrfs_free_extent(trans, root, buf->start, buf->len,
376 parent_start, btrfs_header_owner(parent), 472 parent_start, root->root_key.objectid,
377 btrfs_header_generation(parent), level, 1); 473 level, 0);
378 } 474 }
379 if (unlock_orig) 475 if (unlock_orig)
380 btrfs_tree_unlock(buf); 476 btrfs_tree_unlock(buf);
@@ -384,6 +480,18 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
384 return 0; 480 return 0;
385} 481}
386 482
483static inline int should_cow_block(struct btrfs_trans_handle *trans,
484 struct btrfs_root *root,
485 struct extent_buffer *buf)
486{
487 if (btrfs_header_generation(buf) == trans->transid &&
488 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
489 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
490 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
491 return 0;
492 return 1;
493}
494
387/* 495/*
388 * cows a single block, see __btrfs_cow_block for the real work. 496 * cows a single block, see __btrfs_cow_block for the real work.
389 * This version of it has extra checks so that a block isn't cow'd more than 497 * This version of it has extra checks so that a block isn't cow'd more than
@@ -411,9 +519,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
411 WARN_ON(1); 519 WARN_ON(1);
412 } 520 }
413 521
414 if (btrfs_header_generation(buf) == trans->transid && 522 if (!should_cow_block(trans, root, buf)) {
415 btrfs_header_owner(buf) == root->root_key.objectid &&
416 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
417 *cow_ret = buf; 523 *cow_ret = buf;
418 return 0; 524 return 0;
419 } 525 }
@@ -469,7 +575,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
469/* 575/*
470 * same as comp_keys only with two btrfs_key's 576 * same as comp_keys only with two btrfs_key's
471 */ 577 */
472static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 578int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
473{ 579{
474 if (k1->objectid > k2->objectid) 580 if (k1->objectid > k2->objectid)
475 return 1; 581 return 1;
@@ -845,6 +951,12 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
845 return -1; 951 return -1;
846} 952}
847 953
954int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
955 int level, int *slot)
956{
957 return bin_search(eb, key, level, slot);
958}
959
848/* given a node and slot number, this reads the blocks it points to. The 960/* given a node and slot number, this reads the blocks it points to. The
849 * extent buffer is returned with a reference taken (but unlocked). 961 * extent buffer is returned with a reference taken (but unlocked).
850 * NULL is returned on error. 962 * NULL is returned on error.
@@ -921,13 +1033,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
921 root->node = child; 1033 root->node = child;
922 spin_unlock(&root->node_lock); 1034 spin_unlock(&root->node_lock);
923 1035
924 ret = btrfs_update_extent_ref(trans, root, child->start,
925 child->len,
926 mid->start, child->start,
927 root->root_key.objectid,
928 trans->transid, level - 1);
929 BUG_ON(ret);
930
931 add_root_to_dirty_list(root); 1036 add_root_to_dirty_list(root);
932 btrfs_tree_unlock(child); 1037 btrfs_tree_unlock(child);
933 1038
@@ -938,9 +1043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
938 /* once for the path */ 1043 /* once for the path */
939 free_extent_buffer(mid); 1044 free_extent_buffer(mid);
940 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1045 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
941 mid->start, root->root_key.objectid, 1046 0, root->root_key.objectid, level, 1);
942 btrfs_header_generation(mid),
943 level, 1);
944 /* once for the root ptr */ 1047 /* once for the root ptr */
945 free_extent_buffer(mid); 1048 free_extent_buffer(mid);
946 return ret; 1049 return ret;
@@ -998,7 +1101,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
998 ret = wret; 1101 ret = wret;
999 if (btrfs_header_nritems(right) == 0) { 1102 if (btrfs_header_nritems(right) == 0) {
1000 u64 bytenr = right->start; 1103 u64 bytenr = right->start;
1001 u64 generation = btrfs_header_generation(parent);
1002 u32 blocksize = right->len; 1104 u32 blocksize = right->len;
1003 1105
1004 clean_tree_block(trans, root, right); 1106 clean_tree_block(trans, root, right);
@@ -1010,9 +1112,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1010 if (wret) 1112 if (wret)
1011 ret = wret; 1113 ret = wret;
1012 wret = btrfs_free_extent(trans, root, bytenr, 1114 wret = btrfs_free_extent(trans, root, bytenr,
1013 blocksize, parent->start, 1115 blocksize, 0,
1014 btrfs_header_owner(parent), 1116 root->root_key.objectid,
1015 generation, level, 1); 1117 level, 0);
1016 if (wret) 1118 if (wret)
1017 ret = wret; 1119 ret = wret;
1018 } else { 1120 } else {
@@ -1047,7 +1149,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1047 } 1149 }
1048 if (btrfs_header_nritems(mid) == 0) { 1150 if (btrfs_header_nritems(mid) == 0) {
1049 /* we've managed to empty the middle node, drop it */ 1151 /* we've managed to empty the middle node, drop it */
1050 u64 root_gen = btrfs_header_generation(parent);
1051 u64 bytenr = mid->start; 1152 u64 bytenr = mid->start;
1052 u32 blocksize = mid->len; 1153 u32 blocksize = mid->len;
1053 1154
@@ -1059,9 +1160,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1059 if (wret) 1160 if (wret)
1060 ret = wret; 1161 ret = wret;
1061 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1162 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1062 parent->start, 1163 0, root->root_key.objectid,
1063 btrfs_header_owner(parent), 1164 level, 0);
1064 root_gen, level, 1);
1065 if (wret) 1165 if (wret)
1066 ret = wret; 1166 ret = wret;
1067 } else { 1167 } else {
@@ -1437,7 +1537,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1437{ 1537{
1438 int i; 1538 int i;
1439 1539
1440 if (path->keep_locks || path->lowest_level) 1540 if (path->keep_locks)
1441 return; 1541 return;
1442 1542
1443 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1543 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1614,10 +1714,17 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1614 lowest_unlock = 2; 1714 lowest_unlock = 2;
1615 1715
1616again: 1716again:
1617 if (p->skip_locking) 1717 if (p->search_commit_root) {
1618 b = btrfs_root_node(root); 1718 b = root->commit_root;
1619 else 1719 extent_buffer_get(b);
1620 b = btrfs_lock_root_node(root); 1720 if (!p->skip_locking)
1721 btrfs_tree_lock(b);
1722 } else {
1723 if (p->skip_locking)
1724 b = btrfs_root_node(root);
1725 else
1726 b = btrfs_lock_root_node(root);
1727 }
1621 1728
1622 while (b) { 1729 while (b) {
1623 level = btrfs_header_level(b); 1730 level = btrfs_header_level(b);
@@ -1638,11 +1745,9 @@ again:
1638 * then we don't want to set the path blocking, 1745 * then we don't want to set the path blocking,
1639 * so we test it here 1746 * so we test it here
1640 */ 1747 */
1641 if (btrfs_header_generation(b) == trans->transid && 1748 if (!should_cow_block(trans, root, b))
1642 btrfs_header_owner(b) == root->root_key.objectid &&
1643 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1644 goto cow_done; 1749 goto cow_done;
1645 } 1750
1646 btrfs_set_path_blocking(p); 1751 btrfs_set_path_blocking(p);
1647 1752
1648 wret = btrfs_cow_block(trans, root, b, 1753 wret = btrfs_cow_block(trans, root, b,
@@ -1764,138 +1869,6 @@ done:
1764 return ret; 1869 return ret;
1765} 1870}
1766 1871
1767int btrfs_merge_path(struct btrfs_trans_handle *trans,
1768 struct btrfs_root *root,
1769 struct btrfs_key *node_keys,
1770 u64 *nodes, int lowest_level)
1771{
1772 struct extent_buffer *eb;
1773 struct extent_buffer *parent;
1774 struct btrfs_key key;
1775 u64 bytenr;
1776 u64 generation;
1777 u32 blocksize;
1778 int level;
1779 int slot;
1780 int key_match;
1781 int ret;
1782
1783 eb = btrfs_lock_root_node(root);
1784 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
1785 BUG_ON(ret);
1786
1787 btrfs_set_lock_blocking(eb);
1788
1789 parent = eb;
1790 while (1) {
1791 level = btrfs_header_level(parent);
1792 if (level == 0 || level <= lowest_level)
1793 break;
1794
1795 ret = bin_search(parent, &node_keys[lowest_level], level,
1796 &slot);
1797 if (ret && slot > 0)
1798 slot--;
1799
1800 bytenr = btrfs_node_blockptr(parent, slot);
1801 if (nodes[level - 1] == bytenr)
1802 break;
1803
1804 blocksize = btrfs_level_size(root, level - 1);
1805 generation = btrfs_node_ptr_generation(parent, slot);
1806 btrfs_node_key_to_cpu(eb, &key, slot);
1807 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1808
1809 if (generation == trans->transid) {
1810 eb = read_tree_block(root, bytenr, blocksize,
1811 generation);
1812 btrfs_tree_lock(eb);
1813 btrfs_set_lock_blocking(eb);
1814 }
1815
1816 /*
1817 * if node keys match and node pointer hasn't been modified
1818 * in the running transaction, we can merge the path. for
1819 * blocks owened by reloc trees, the node pointer check is
1820 * skipped, this is because these blocks are fully controlled
1821 * by the space balance code, no one else can modify them.
1822 */
1823 if (!nodes[level - 1] || !key_match ||
1824 (generation == trans->transid &&
1825 btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1826 if (level == 1 || level == lowest_level + 1) {
1827 if (generation == trans->transid) {
1828 btrfs_tree_unlock(eb);
1829 free_extent_buffer(eb);
1830 }
1831 break;
1832 }
1833
1834 if (generation != trans->transid) {
1835 eb = read_tree_block(root, bytenr, blocksize,
1836 generation);
1837 btrfs_tree_lock(eb);
1838 btrfs_set_lock_blocking(eb);
1839 }
1840
1841 ret = btrfs_cow_block(trans, root, eb, parent, slot,
1842 &eb);
1843 BUG_ON(ret);
1844
1845 if (root->root_key.objectid ==
1846 BTRFS_TREE_RELOC_OBJECTID) {
1847 if (!nodes[level - 1]) {
1848 nodes[level - 1] = eb->start;
1849 memcpy(&node_keys[level - 1], &key,
1850 sizeof(node_keys[0]));
1851 } else {
1852 WARN_ON(1);
1853 }
1854 }
1855
1856 btrfs_tree_unlock(parent);
1857 free_extent_buffer(parent);
1858 parent = eb;
1859 continue;
1860 }
1861
1862 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1863 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1864 btrfs_mark_buffer_dirty(parent);
1865
1866 ret = btrfs_inc_extent_ref(trans, root,
1867 nodes[level - 1],
1868 blocksize, parent->start,
1869 btrfs_header_owner(parent),
1870 btrfs_header_generation(parent),
1871 level - 1);
1872 BUG_ON(ret);
1873
1874 /*
1875 * If the block was created in the running transaction,
1876 * it's possible this is the last reference to it, so we
1877 * should drop the subtree.
1878 */
1879 if (generation == trans->transid) {
1880 ret = btrfs_drop_subtree(trans, root, eb, parent);
1881 BUG_ON(ret);
1882 btrfs_tree_unlock(eb);
1883 free_extent_buffer(eb);
1884 } else {
1885 ret = btrfs_free_extent(trans, root, bytenr,
1886 blocksize, parent->start,
1887 btrfs_header_owner(parent),
1888 btrfs_header_generation(parent),
1889 level - 1, 1);
1890 BUG_ON(ret);
1891 }
1892 break;
1893 }
1894 btrfs_tree_unlock(parent);
1895 free_extent_buffer(parent);
1896 return 0;
1897}
1898
1899/* 1872/*
1900 * adjust the pointers going up the tree, starting at level 1873 * adjust the pointers going up the tree, starting at level
1901 * making sure the right key of each node is points to 'key'. 1874 * making sure the right key of each node is points to 'key'.
@@ -2021,9 +1994,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
2021 btrfs_mark_buffer_dirty(src); 1994 btrfs_mark_buffer_dirty(src);
2022 btrfs_mark_buffer_dirty(dst); 1995 btrfs_mark_buffer_dirty(dst);
2023 1996
2024 ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
2025 BUG_ON(ret);
2026
2027 return ret; 1997 return ret;
2028} 1998}
2029 1999
@@ -2083,9 +2053,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
2083 btrfs_mark_buffer_dirty(src); 2053 btrfs_mark_buffer_dirty(src);
2084 btrfs_mark_buffer_dirty(dst); 2054 btrfs_mark_buffer_dirty(dst);
2085 2055
2086 ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
2087 BUG_ON(ret);
2088
2089 return ret; 2056 return ret;
2090} 2057}
2091 2058
@@ -2105,7 +2072,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2105 struct extent_buffer *c; 2072 struct extent_buffer *c;
2106 struct extent_buffer *old; 2073 struct extent_buffer *old;
2107 struct btrfs_disk_key lower_key; 2074 struct btrfs_disk_key lower_key;
2108 int ret;
2109 2075
2110 BUG_ON(path->nodes[level]); 2076 BUG_ON(path->nodes[level]);
2111 BUG_ON(path->nodes[level-1] != root->node); 2077 BUG_ON(path->nodes[level-1] != root->node);
@@ -2117,16 +2083,17 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2117 btrfs_node_key(lower, &lower_key, 0); 2083 btrfs_node_key(lower, &lower_key, 0);
2118 2084
2119 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2085 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2120 root->root_key.objectid, trans->transid, 2086 root->root_key.objectid, &lower_key,
2121 level, root->node->start, 0); 2087 level, root->node->start, 0);
2122 if (IS_ERR(c)) 2088 if (IS_ERR(c))
2123 return PTR_ERR(c); 2089 return PTR_ERR(c);
2124 2090
2125 memset_extent_buffer(c, 0, 0, root->nodesize); 2091 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
2126 btrfs_set_header_nritems(c, 1); 2092 btrfs_set_header_nritems(c, 1);
2127 btrfs_set_header_level(c, level); 2093 btrfs_set_header_level(c, level);
2128 btrfs_set_header_bytenr(c, c->start); 2094 btrfs_set_header_bytenr(c, c->start);
2129 btrfs_set_header_generation(c, trans->transid); 2095 btrfs_set_header_generation(c, trans->transid);
2096 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
2130 btrfs_set_header_owner(c, root->root_key.objectid); 2097 btrfs_set_header_owner(c, root->root_key.objectid);
2131 2098
2132 write_extent_buffer(c, root->fs_info->fsid, 2099 write_extent_buffer(c, root->fs_info->fsid,
@@ -2151,12 +2118,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2151 root->node = c; 2118 root->node = c;
2152 spin_unlock(&root->node_lock); 2119 spin_unlock(&root->node_lock);
2153 2120
2154 ret = btrfs_update_extent_ref(trans, root, lower->start,
2155 lower->len, lower->start, c->start,
2156 root->root_key.objectid,
2157 trans->transid, level - 1);
2158 BUG_ON(ret);
2159
2160 /* the super has an extra ref to root->node */ 2121 /* the super has an extra ref to root->node */
2161 free_extent_buffer(old); 2122 free_extent_buffer(old);
2162 2123
@@ -2244,20 +2205,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2244 } 2205 }
2245 2206
2246 c_nritems = btrfs_header_nritems(c); 2207 c_nritems = btrfs_header_nritems(c);
2208 mid = (c_nritems + 1) / 2;
2209 btrfs_node_key(c, &disk_key, mid);
2247 2210
2248 split = btrfs_alloc_free_block(trans, root, root->nodesize, 2211 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2249 path->nodes[level + 1]->start,
2250 root->root_key.objectid, 2212 root->root_key.objectid,
2251 trans->transid, level, c->start, 0); 2213 &disk_key, level, c->start, 0);
2252 if (IS_ERR(split)) 2214 if (IS_ERR(split))
2253 return PTR_ERR(split); 2215 return PTR_ERR(split);
2254 2216
2255 btrfs_set_header_flags(split, btrfs_header_flags(c)); 2217 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
2256 btrfs_set_header_level(split, btrfs_header_level(c)); 2218 btrfs_set_header_level(split, btrfs_header_level(c));
2257 btrfs_set_header_bytenr(split, split->start); 2219 btrfs_set_header_bytenr(split, split->start);
2258 btrfs_set_header_generation(split, trans->transid); 2220 btrfs_set_header_generation(split, trans->transid);
2221 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
2259 btrfs_set_header_owner(split, root->root_key.objectid); 2222 btrfs_set_header_owner(split, root->root_key.objectid);
2260 btrfs_set_header_flags(split, 0);
2261 write_extent_buffer(split, root->fs_info->fsid, 2223 write_extent_buffer(split, root->fs_info->fsid,
2262 (unsigned long)btrfs_header_fsid(split), 2224 (unsigned long)btrfs_header_fsid(split),
2263 BTRFS_FSID_SIZE); 2225 BTRFS_FSID_SIZE);
@@ -2265,7 +2227,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2265 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2227 (unsigned long)btrfs_header_chunk_tree_uuid(split),
2266 BTRFS_UUID_SIZE); 2228 BTRFS_UUID_SIZE);
2267 2229
2268 mid = (c_nritems + 1) / 2;
2269 2230
2270 copy_extent_buffer(split, c, 2231 copy_extent_buffer(split, c,
2271 btrfs_node_key_ptr_offset(0), 2232 btrfs_node_key_ptr_offset(0),
@@ -2278,16 +2239,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2278 btrfs_mark_buffer_dirty(c); 2239 btrfs_mark_buffer_dirty(c);
2279 btrfs_mark_buffer_dirty(split); 2240 btrfs_mark_buffer_dirty(split);
2280 2241
2281 btrfs_node_key(split, &disk_key, 0);
2282 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2242 wret = insert_ptr(trans, root, path, &disk_key, split->start,
2283 path->slots[level + 1] + 1, 2243 path->slots[level + 1] + 1,
2284 level + 1); 2244 level + 1);
2285 if (wret) 2245 if (wret)
2286 ret = wret; 2246 ret = wret;
2287 2247
2288 ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2289 BUG_ON(ret);
2290
2291 if (path->slots[level] >= mid) { 2248 if (path->slots[level] >= mid) {
2292 path->slots[level] -= mid; 2249 path->slots[level] -= mid;
2293 btrfs_tree_unlock(c); 2250 btrfs_tree_unlock(c);
@@ -2360,7 +2317,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2360 u32 right_nritems; 2317 u32 right_nritems;
2361 u32 data_end; 2318 u32 data_end;
2362 u32 this_item_size; 2319 u32 this_item_size;
2363 int ret;
2364 2320
2365 if (empty) 2321 if (empty)
2366 nr = 0; 2322 nr = 0;
@@ -2473,9 +2429,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2473 btrfs_mark_buffer_dirty(left); 2429 btrfs_mark_buffer_dirty(left);
2474 btrfs_mark_buffer_dirty(right); 2430 btrfs_mark_buffer_dirty(right);
2475 2431
2476 ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2477 BUG_ON(ret);
2478
2479 btrfs_item_key(right, &disk_key, 0); 2432 btrfs_item_key(right, &disk_key, 0);
2480 btrfs_set_node_key(upper, &disk_key, slot + 1); 2433 btrfs_set_node_key(upper, &disk_key, slot + 1);
2481 btrfs_mark_buffer_dirty(upper); 2434 btrfs_mark_buffer_dirty(upper);
@@ -2720,10 +2673,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2720 if (right_nritems) 2673 if (right_nritems)
2721 btrfs_mark_buffer_dirty(right); 2674 btrfs_mark_buffer_dirty(right);
2722 2675
2723 ret = btrfs_update_ref(trans, root, right, left,
2724 old_left_nritems, push_items);
2725 BUG_ON(ret);
2726
2727 btrfs_item_key(right, &disk_key, 0); 2676 btrfs_item_key(right, &disk_key, 0);
2728 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2677 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2729 if (wret) 2678 if (wret)
@@ -2880,9 +2829,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2880 btrfs_mark_buffer_dirty(l); 2829 btrfs_mark_buffer_dirty(l);
2881 BUG_ON(path->slots[0] != slot); 2830 BUG_ON(path->slots[0] != slot);
2882 2831
2883 ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2884 BUG_ON(ret);
2885
2886 if (mid <= slot) { 2832 if (mid <= slot) {
2887 btrfs_tree_unlock(path->nodes[0]); 2833 btrfs_tree_unlock(path->nodes[0]);
2888 free_extent_buffer(path->nodes[0]); 2834 free_extent_buffer(path->nodes[0]);
@@ -2911,6 +2857,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2911 struct btrfs_path *path, int data_size, 2857 struct btrfs_path *path, int data_size,
2912 int extend) 2858 int extend)
2913{ 2859{
2860 struct btrfs_disk_key disk_key;
2914 struct extent_buffer *l; 2861 struct extent_buffer *l;
2915 u32 nritems; 2862 u32 nritems;
2916 int mid; 2863 int mid;
@@ -2918,7 +2865,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2918 struct extent_buffer *right; 2865 struct extent_buffer *right;
2919 int ret = 0; 2866 int ret = 0;
2920 int wret; 2867 int wret;
2921 int double_split; 2868 int split;
2922 int num_doubles = 0; 2869 int num_doubles = 0;
2923 2870
2924 /* first try to make some room by pushing left and right */ 2871 /* first try to make some room by pushing left and right */
@@ -2945,16 +2892,53 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2945 return ret; 2892 return ret;
2946 } 2893 }
2947again: 2894again:
2948 double_split = 0; 2895 split = 1;
2949 l = path->nodes[0]; 2896 l = path->nodes[0];
2950 slot = path->slots[0]; 2897 slot = path->slots[0];
2951 nritems = btrfs_header_nritems(l); 2898 nritems = btrfs_header_nritems(l);
2952 mid = (nritems + 1) / 2; 2899 mid = (nritems + 1) / 2;
2953 2900
2954 right = btrfs_alloc_free_block(trans, root, root->leafsize, 2901 if (mid <= slot) {
2955 path->nodes[1]->start, 2902 if (nritems == 1 ||
2903 leaf_space_used(l, mid, nritems - mid) + data_size >
2904 BTRFS_LEAF_DATA_SIZE(root)) {
2905 if (slot >= nritems) {
2906 split = 0;
2907 } else {
2908 mid = slot;
2909 if (mid != nritems &&
2910 leaf_space_used(l, mid, nritems - mid) +
2911 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2912 split = 2;
2913 }
2914 }
2915 }
2916 } else {
2917 if (leaf_space_used(l, 0, mid) + data_size >
2918 BTRFS_LEAF_DATA_SIZE(root)) {
2919 if (!extend && data_size && slot == 0) {
2920 split = 0;
2921 } else if ((extend || !data_size) && slot == 0) {
2922 mid = 1;
2923 } else {
2924 mid = slot;
2925 if (mid != nritems &&
2926 leaf_space_used(l, mid, nritems - mid) +
2927 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2928 split = 2 ;
2929 }
2930 }
2931 }
2932 }
2933
2934 if (split == 0)
2935 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2936 else
2937 btrfs_item_key(l, &disk_key, mid);
2938
2939 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
2956 root->root_key.objectid, 2940 root->root_key.objectid,
2957 trans->transid, 0, l->start, 0); 2941 &disk_key, 0, l->start, 0);
2958 if (IS_ERR(right)) { 2942 if (IS_ERR(right)) {
2959 BUG_ON(1); 2943 BUG_ON(1);
2960 return PTR_ERR(right); 2944 return PTR_ERR(right);
@@ -2963,6 +2947,7 @@ again:
2963 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2947 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2964 btrfs_set_header_bytenr(right, right->start); 2948 btrfs_set_header_bytenr(right, right->start);
2965 btrfs_set_header_generation(right, trans->transid); 2949 btrfs_set_header_generation(right, trans->transid);
2950 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
2966 btrfs_set_header_owner(right, root->root_key.objectid); 2951 btrfs_set_header_owner(right, root->root_key.objectid);
2967 btrfs_set_header_level(right, 0); 2952 btrfs_set_header_level(right, 0);
2968 write_extent_buffer(right, root->fs_info->fsid, 2953 write_extent_buffer(right, root->fs_info->fsid,
@@ -2973,79 +2958,47 @@ again:
2973 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2958 (unsigned long)btrfs_header_chunk_tree_uuid(right),
2974 BTRFS_UUID_SIZE); 2959 BTRFS_UUID_SIZE);
2975 2960
2976 if (mid <= slot) { 2961 if (split == 0) {
2977 if (nritems == 1 || 2962 if (mid <= slot) {
2978 leaf_space_used(l, mid, nritems - mid) + data_size > 2963 btrfs_set_header_nritems(right, 0);
2979 BTRFS_LEAF_DATA_SIZE(root)) { 2964 wret = insert_ptr(trans, root, path,
2980 if (slot >= nritems) { 2965 &disk_key, right->start,
2981 struct btrfs_disk_key disk_key; 2966 path->slots[1] + 1, 1);
2982 2967 if (wret)
2983 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2968 ret = wret;
2984 btrfs_set_header_nritems(right, 0);
2985 wret = insert_ptr(trans, root, path,
2986 &disk_key, right->start,
2987 path->slots[1] + 1, 1);
2988 if (wret)
2989 ret = wret;
2990 2969
2991 btrfs_tree_unlock(path->nodes[0]); 2970 btrfs_tree_unlock(path->nodes[0]);
2992 free_extent_buffer(path->nodes[0]); 2971 free_extent_buffer(path->nodes[0]);
2993 path->nodes[0] = right; 2972 path->nodes[0] = right;
2994 path->slots[0] = 0; 2973 path->slots[0] = 0;
2995 path->slots[1] += 1; 2974 path->slots[1] += 1;
2996 btrfs_mark_buffer_dirty(right); 2975 } else {
2997 return ret; 2976 btrfs_set_header_nritems(right, 0);
2998 } 2977 wret = insert_ptr(trans, root, path,
2999 mid = slot; 2978 &disk_key,
3000 if (mid != nritems && 2979 right->start,
3001 leaf_space_used(l, mid, nritems - mid) + 2980 path->slots[1], 1);
3002 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2981 if (wret)
3003 double_split = 1; 2982 ret = wret;
3004 } 2983 btrfs_tree_unlock(path->nodes[0]);
3005 } 2984 free_extent_buffer(path->nodes[0]);
3006 } else { 2985 path->nodes[0] = right;
3007 if (leaf_space_used(l, 0, mid) + data_size > 2986 path->slots[0] = 0;
3008 BTRFS_LEAF_DATA_SIZE(root)) { 2987 if (path->slots[1] == 0) {
3009 if (!extend && data_size && slot == 0) { 2988 wret = fixup_low_keys(trans, root,
3010 struct btrfs_disk_key disk_key; 2989 path, &disk_key, 1);
3011
3012 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3013 btrfs_set_header_nritems(right, 0);
3014 wret = insert_ptr(trans, root, path,
3015 &disk_key,
3016 right->start,
3017 path->slots[1], 1);
3018 if (wret) 2990 if (wret)
3019 ret = wret; 2991 ret = wret;
3020 btrfs_tree_unlock(path->nodes[0]);
3021 free_extent_buffer(path->nodes[0]);
3022 path->nodes[0] = right;
3023 path->slots[0] = 0;
3024 if (path->slots[1] == 0) {
3025 wret = fixup_low_keys(trans, root,
3026 path, &disk_key, 1);
3027 if (wret)
3028 ret = wret;
3029 }
3030 btrfs_mark_buffer_dirty(right);
3031 return ret;
3032 } else if ((extend || !data_size) && slot == 0) {
3033 mid = 1;
3034 } else {
3035 mid = slot;
3036 if (mid != nritems &&
3037 leaf_space_used(l, mid, nritems - mid) +
3038 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3039 double_split = 1;
3040 }
3041 } 2992 }
3042 } 2993 }
2994 btrfs_mark_buffer_dirty(right);
2995 return ret;
3043 } 2996 }
3044 2997
3045 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 2998 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
3046 BUG_ON(ret); 2999 BUG_ON(ret);
3047 3000
3048 if (double_split) { 3001 if (split == 2) {
3049 BUG_ON(num_doubles != 0); 3002 BUG_ON(num_doubles != 0);
3050 num_doubles++; 3003 num_doubles++;
3051 goto again; 3004 goto again;
@@ -3447,7 +3400,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3447 /* figure out how many keys we can insert in here */ 3400 /* figure out how many keys we can insert in here */
3448 total_data = data_size[0]; 3401 total_data = data_size[0];
3449 for (i = 1; i < nr; i++) { 3402 for (i = 1; i < nr; i++) {
3450 if (comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3403 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3451 break; 3404 break;
3452 total_data += data_size[i]; 3405 total_data += data_size[i];
3453 } 3406 }
@@ -3745,9 +3698,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3745 3698
3746/* 3699/*
3747 * a helper function to delete the leaf pointed to by path->slots[1] and 3700 * a helper function to delete the leaf pointed to by path->slots[1] and
3748 * path->nodes[1]. bytenr is the node block pointer, but since the callers 3701 * path->nodes[1].
3749 * already know it, it is faster to have them pass it down than to
3750 * read it out of the node again.
3751 * 3702 *
3752 * This deletes the pointer in path->nodes[1] and frees the leaf 3703 * This deletes the pointer in path->nodes[1] and frees the leaf
3753 * block extent. zero is returned if it all worked out, < 0 otherwise. 3704 * block extent. zero is returned if it all worked out, < 0 otherwise.
@@ -3755,15 +3706,14 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3755 * The path must have already been setup for deleting the leaf, including 3706 * The path must have already been setup for deleting the leaf, including
3756 * all the proper balancing. path->nodes[1] must be locked. 3707 * all the proper balancing. path->nodes[1] must be locked.
3757 */ 3708 */
3758noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3709static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3759 struct btrfs_root *root, 3710 struct btrfs_root *root,
3760 struct btrfs_path *path, u64 bytenr) 3711 struct btrfs_path *path,
3712 struct extent_buffer *leaf)
3761{ 3713{
3762 int ret; 3714 int ret;
3763 u64 root_gen = btrfs_header_generation(path->nodes[1]);
3764 u64 parent_start = path->nodes[1]->start;
3765 u64 parent_owner = btrfs_header_owner(path->nodes[1]);
3766 3715
3716 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
3767 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3717 ret = del_ptr(trans, root, path, 1, path->slots[1]);
3768 if (ret) 3718 if (ret)
3769 return ret; 3719 return ret;
@@ -3774,10 +3724,8 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3774 */ 3724 */
3775 btrfs_unlock_up_safe(path, 0); 3725 btrfs_unlock_up_safe(path, 0);
3776 3726
3777 ret = btrfs_free_extent(trans, root, bytenr, 3727 ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
3778 btrfs_level_size(root, 0), 3728 0, root->root_key.objectid, 0, 0);
3779 parent_start, parent_owner,
3780 root_gen, 0, 1);
3781 return ret; 3729 return ret;
3782} 3730}
3783/* 3731/*
@@ -3845,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3845 if (leaf == root->node) { 3793 if (leaf == root->node) {
3846 btrfs_set_header_level(leaf, 0); 3794 btrfs_set_header_level(leaf, 0);
3847 } else { 3795 } else {
3848 ret = btrfs_del_leaf(trans, root, path, leaf->start); 3796 ret = btrfs_del_leaf(trans, root, path, leaf);
3849 BUG_ON(ret); 3797 BUG_ON(ret);
3850 } 3798 }
3851 } else { 3799 } else {
@@ -3884,8 +3832,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3884 3832
3885 if (btrfs_header_nritems(leaf) == 0) { 3833 if (btrfs_header_nritems(leaf) == 0) {
3886 path->slots[1] = slot; 3834 path->slots[1] = slot;
3887 ret = btrfs_del_leaf(trans, root, path, 3835 ret = btrfs_del_leaf(trans, root, path, leaf);
3888 leaf->start);
3889 BUG_ON(ret); 3836 BUG_ON(ret);
3890 free_extent_buffer(leaf); 3837 free_extent_buffer(leaf);
3891 } else { 3838 } else {