aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2012-05-31 16:50:28 -0400
committerChris Mason <chris.mason@oracle.com>2012-05-31 16:49:53 -0400
commit1e20932a23578bb1ec59107843574e259b96193f (patch)
tree844ae54293c4414fc4c232a36d0e4d4939dc35aa /fs/btrfs/ctree.c
parentcfc442b69696b593cb442f09997dcb4cb5748171 (diff)
parentc31931088fd6cf953bd0868a2647b6c3928e6c96 (diff)
Merge branch 'for-chris' of git://git.jan-o-sch.net/btrfs-unstable into for-linus
Conflicts: fs/btrfs/ulist.h Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c849
1 files changed, 823 insertions, 26 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 99fcad631a21..d7a96cfdc50a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/rbtree.h>
21#include "ctree.h" 22#include "ctree.h"
22#include "disk-io.h" 23#include "disk-io.h"
23#include "transaction.h" 24#include "transaction.h"
@@ -37,7 +38,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
37 struct extent_buffer *dst_buf, 38 struct extent_buffer *dst_buf,
38 struct extent_buffer *src_buf); 39 struct extent_buffer *src_buf);
39static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 40static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
40 struct btrfs_path *path, int level, int slot); 41 struct btrfs_path *path, int level, int slot,
42 int tree_mod_log);
43static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
44 struct extent_buffer *eb);
45struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr,
46 u32 blocksize, u64 parent_transid,
47 u64 time_seq);
48struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root,
49 u64 bytenr, u32 blocksize,
50 u64 time_seq);
41 51
42struct btrfs_path *btrfs_alloc_path(void) 52struct btrfs_path *btrfs_alloc_path(void)
43{ 53{
@@ -255,7 +265,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
255 265
256 cow = btrfs_alloc_free_block(trans, root, buf->len, 0, 266 cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
257 new_root_objectid, &disk_key, level, 267 new_root_objectid, &disk_key, level,
258 buf->start, 0, 1); 268 buf->start, 0);
259 if (IS_ERR(cow)) 269 if (IS_ERR(cow))
260 return PTR_ERR(cow); 270 return PTR_ERR(cow);
261 271
@@ -288,6 +298,434 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
288 return 0; 298 return 0;
289} 299}
290 300
301enum mod_log_op {
302 MOD_LOG_KEY_REPLACE,
303 MOD_LOG_KEY_ADD,
304 MOD_LOG_KEY_REMOVE,
305 MOD_LOG_KEY_REMOVE_WHILE_FREEING,
306 MOD_LOG_KEY_REMOVE_WHILE_MOVING,
307 MOD_LOG_MOVE_KEYS,
308 MOD_LOG_ROOT_REPLACE,
309};
310
311struct tree_mod_move {
312 int dst_slot;
313 int nr_items;
314};
315
316struct tree_mod_root {
317 u64 logical;
318 u8 level;
319};
320
321struct tree_mod_elem {
322 struct rb_node node;
323 u64 index; /* shifted logical */
324 struct seq_list elem;
325 enum mod_log_op op;
326
327 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
328 int slot;
329
330 /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
331 u64 generation;
332
333 /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
334 struct btrfs_disk_key key;
335 u64 blockptr;
336
337 /* this is used for op == MOD_LOG_MOVE_KEYS */
338 struct tree_mod_move move;
339
340 /* this is used for op == MOD_LOG_ROOT_REPLACE */
341 struct tree_mod_root old_root;
342};
343
344static inline void
345__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem)
346{
347 elem->seq = atomic_inc_return(&fs_info->tree_mod_seq);
348 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
349}
350
351void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
352 struct seq_list *elem)
353{
354 elem->flags = 1;
355 spin_lock(&fs_info->tree_mod_seq_lock);
356 __get_tree_mod_seq(fs_info, elem);
357 spin_unlock(&fs_info->tree_mod_seq_lock);
358}
359
360void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
361 struct seq_list *elem)
362{
363 struct rb_root *tm_root;
364 struct rb_node *node;
365 struct rb_node *next;
366 struct seq_list *cur_elem;
367 struct tree_mod_elem *tm;
368 u64 min_seq = (u64)-1;
369 u64 seq_putting = elem->seq;
370
371 if (!seq_putting)
372 return;
373
374 BUG_ON(!(elem->flags & 1));
375 spin_lock(&fs_info->tree_mod_seq_lock);
376 list_del(&elem->list);
377
378 list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
379 if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) {
380 if (seq_putting > cur_elem->seq) {
381 /*
382 * blocker with lower sequence number exists, we
383 * cannot remove anything from the log
384 */
385 goto out;
386 }
387 min_seq = cur_elem->seq;
388 }
389 }
390
391 /*
392 * anything that's lower than the lowest existing (read: blocked)
393 * sequence number can be removed from the tree.
394 */
395 write_lock(&fs_info->tree_mod_log_lock);
396 tm_root = &fs_info->tree_mod_log;
397 for (node = rb_first(tm_root); node; node = next) {
398 next = rb_next(node);
399 tm = container_of(node, struct tree_mod_elem, node);
400 if (tm->elem.seq > min_seq)
401 continue;
402 rb_erase(node, tm_root);
403 list_del(&tm->elem.list);
404 kfree(tm);
405 }
406 write_unlock(&fs_info->tree_mod_log_lock);
407out:
408 spin_unlock(&fs_info->tree_mod_seq_lock);
409}
410
411/*
412 * key order of the log:
413 * index -> sequence
414 *
415 * the index is the shifted logical of the *new* root node for root replace
416 * operations, or the shifted logical of the affected block for all other
417 * operations.
418 */
419static noinline int
420__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
421{
422 struct rb_root *tm_root;
423 struct rb_node **new;
424 struct rb_node *parent = NULL;
425 struct tree_mod_elem *cur;
426 int ret = 0;
427
428 BUG_ON(!tm || !tm->elem.seq);
429
430 write_lock(&fs_info->tree_mod_log_lock);
431 tm_root = &fs_info->tree_mod_log;
432 new = &tm_root->rb_node;
433 while (*new) {
434 cur = container_of(*new, struct tree_mod_elem, node);
435 parent = *new;
436 if (cur->index < tm->index)
437 new = &((*new)->rb_left);
438 else if (cur->index > tm->index)
439 new = &((*new)->rb_right);
440 else if (cur->elem.seq < tm->elem.seq)
441 new = &((*new)->rb_left);
442 else if (cur->elem.seq > tm->elem.seq)
443 new = &((*new)->rb_right);
444 else {
445 kfree(tm);
446 ret = -EEXIST;
447 goto unlock;
448 }
449 }
450
451 rb_link_node(&tm->node, parent, new);
452 rb_insert_color(&tm->node, tm_root);
453unlock:
454 write_unlock(&fs_info->tree_mod_log_lock);
455 return ret;
456}
457
458static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
459 struct extent_buffer *eb) {
460 smp_mb();
461 if (list_empty(&(fs_info)->tree_mod_seq_list))
462 return 1;
463 if (!eb)
464 return 0;
465 if (btrfs_header_level(eb) == 0)
466 return 1;
467 return 0;
468}
469
470static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
471 struct tree_mod_elem **tm_ret)
472{
473 struct tree_mod_elem *tm;
474 int seq;
475
476 if (tree_mod_dont_log(fs_info, NULL))
477 return 0;
478
479 tm = *tm_ret = kzalloc(sizeof(*tm), flags);
480 if (!tm)
481 return -ENOMEM;
482
483 tm->elem.flags = 0;
484 spin_lock(&fs_info->tree_mod_seq_lock);
485 if (list_empty(&fs_info->tree_mod_seq_list)) {
486 /*
487 * someone emptied the list while we were waiting for the lock.
488 * we must not add to the list, because no blocker exists. items
489 * are removed from the list only when the existing blocker is
490 * removed from the list.
491 */
492 kfree(tm);
493 seq = 0;
494 } else {
495 __get_tree_mod_seq(fs_info, &tm->elem);
496 seq = tm->elem.seq;
497 }
498 spin_unlock(&fs_info->tree_mod_seq_lock);
499
500 return seq;
501}
502
503static noinline int
504tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
505 struct extent_buffer *eb, int slot,
506 enum mod_log_op op, gfp_t flags)
507{
508 struct tree_mod_elem *tm;
509 int ret;
510
511 ret = tree_mod_alloc(fs_info, flags, &tm);
512 if (ret <= 0)
513 return ret;
514
515 tm->index = eb->start >> PAGE_CACHE_SHIFT;
516 if (op != MOD_LOG_KEY_ADD) {
517 btrfs_node_key(eb, &tm->key, slot);
518 tm->blockptr = btrfs_node_blockptr(eb, slot);
519 }
520 tm->op = op;
521 tm->slot = slot;
522 tm->generation = btrfs_node_ptr_generation(eb, slot);
523
524 return __tree_mod_log_insert(fs_info, tm);
525}
526
527static noinline int
528tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
529 int slot, enum mod_log_op op)
530{
531 return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
532}
533
534static noinline int
535tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
536 struct extent_buffer *eb, int dst_slot, int src_slot,
537 int nr_items, gfp_t flags)
538{
539 struct tree_mod_elem *tm;
540 int ret;
541 int i;
542
543 if (tree_mod_dont_log(fs_info, eb))
544 return 0;
545
546 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
547 ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
548 MOD_LOG_KEY_REMOVE_WHILE_MOVING);
549 BUG_ON(ret < 0);
550 }
551
552 ret = tree_mod_alloc(fs_info, flags, &tm);
553 if (ret <= 0)
554 return ret;
555
556 tm->index = eb->start >> PAGE_CACHE_SHIFT;
557 tm->slot = src_slot;
558 tm->move.dst_slot = dst_slot;
559 tm->move.nr_items = nr_items;
560 tm->op = MOD_LOG_MOVE_KEYS;
561
562 return __tree_mod_log_insert(fs_info, tm);
563}
564
565static noinline int
566tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
567 struct extent_buffer *old_root,
568 struct extent_buffer *new_root, gfp_t flags)
569{
570 struct tree_mod_elem *tm;
571 int ret;
572
573 ret = tree_mod_alloc(fs_info, flags, &tm);
574 if (ret <= 0)
575 return ret;
576
577 tm->index = new_root->start >> PAGE_CACHE_SHIFT;
578 tm->old_root.logical = old_root->start;
579 tm->old_root.level = btrfs_header_level(old_root);
580 tm->generation = btrfs_header_generation(old_root);
581 tm->op = MOD_LOG_ROOT_REPLACE;
582
583 return __tree_mod_log_insert(fs_info, tm);
584}
585
586static struct tree_mod_elem *
587__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
588 int smallest)
589{
590 struct rb_root *tm_root;
591 struct rb_node *node;
592 struct tree_mod_elem *cur = NULL;
593 struct tree_mod_elem *found = NULL;
594 u64 index = start >> PAGE_CACHE_SHIFT;
595
596 read_lock(&fs_info->tree_mod_log_lock);
597 tm_root = &fs_info->tree_mod_log;
598 node = tm_root->rb_node;
599 while (node) {
600 cur = container_of(node, struct tree_mod_elem, node);
601 if (cur->index < index) {
602 node = node->rb_left;
603 } else if (cur->index > index) {
604 node = node->rb_right;
605 } else if (cur->elem.seq < min_seq) {
606 node = node->rb_left;
607 } else if (!smallest) {
608 /* we want the node with the highest seq */
609 if (found)
610 BUG_ON(found->elem.seq > cur->elem.seq);
611 found = cur;
612 node = node->rb_left;
613 } else if (cur->elem.seq > min_seq) {
614 /* we want the node with the smallest seq */
615 if (found)
616 BUG_ON(found->elem.seq < cur->elem.seq);
617 found = cur;
618 node = node->rb_right;
619 } else {
620 found = cur;
621 break;
622 }
623 }
624 read_unlock(&fs_info->tree_mod_log_lock);
625
626 return found;
627}
628
629/*
630 * this returns the element from the log with the smallest time sequence
631 * value that's in the log (the oldest log item). any element with a time
632 * sequence lower than min_seq will be ignored.
633 */
634static struct tree_mod_elem *
635tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
636 u64 min_seq)
637{
638 return __tree_mod_log_search(fs_info, start, min_seq, 1);
639}
640
641/*
642 * this returns the element from the log with the largest time sequence
643 * value that's in the log (the most recent log item). any element with
644 * a time sequence lower than min_seq will be ignored.
645 */
646static struct tree_mod_elem *
647tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
648{
649 return __tree_mod_log_search(fs_info, start, min_seq, 0);
650}
651
652static inline void
653tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
654 struct extent_buffer *src, unsigned long dst_offset,
655 unsigned long src_offset, int nr_items)
656{
657 int ret;
658 int i;
659
660 if (tree_mod_dont_log(fs_info, NULL))
661 return;
662
663 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
664 return;
665
666 /* speed this up by single seq for all operations? */
667 for (i = 0; i < nr_items; i++) {
668 ret = tree_mod_log_insert_key(fs_info, src, i + src_offset,
669 MOD_LOG_KEY_REMOVE);
670 BUG_ON(ret < 0);
671 ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset,
672 MOD_LOG_KEY_ADD);
673 BUG_ON(ret < 0);
674 }
675}
676
677static inline void
678tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
679 int dst_offset, int src_offset, int nr_items)
680{
681 int ret;
682 ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
683 nr_items, GFP_NOFS);
684 BUG_ON(ret < 0);
685}
686
687static inline void
688tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
689 struct extent_buffer *eb,
690 struct btrfs_disk_key *disk_key, int slot, int atomic)
691{
692 int ret;
693
694 ret = tree_mod_log_insert_key_mask(fs_info, eb, slot,
695 MOD_LOG_KEY_REPLACE,
696 atomic ? GFP_ATOMIC : GFP_NOFS);
697 BUG_ON(ret < 0);
698}
699
700static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
701 struct extent_buffer *eb)
702{
703 int i;
704 int ret;
705 u32 nritems;
706
707 if (tree_mod_dont_log(fs_info, eb))
708 return;
709
710 nritems = btrfs_header_nritems(eb);
711 for (i = nritems - 1; i >= 0; i--) {
712 ret = tree_mod_log_insert_key(fs_info, eb, i,
713 MOD_LOG_KEY_REMOVE_WHILE_FREEING);
714 BUG_ON(ret < 0);
715 }
716}
717
718static inline void
719tree_mod_log_set_root_pointer(struct btrfs_root *root,
720 struct extent_buffer *new_root_node)
721{
722 int ret;
723 tree_mod_log_free_eb(root->fs_info, root->node);
724 ret = tree_mod_log_insert_root(root->fs_info, root->node,
725 new_root_node, GFP_NOFS);
726 BUG_ON(ret < 0);
727}
728
291/* 729/*
292 * check if the tree block can be shared by multiple trees 730 * check if the tree block can be shared by multiple trees
293 */ 731 */
@@ -409,6 +847,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
409 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 847 ret = btrfs_dec_ref(trans, root, buf, 1, 1);
410 BUG_ON(ret); /* -ENOMEM */ 848 BUG_ON(ret); /* -ENOMEM */
411 } 849 }
850 /*
851 * don't log freeing in case we're freeing the root node, this
852 * is done by tree_mod_log_set_root_pointer later
853 */
854 if (buf != root->node && btrfs_header_level(buf) != 0)
855 tree_mod_log_free_eb(root->fs_info, buf);
412 clean_tree_block(trans, root, buf); 856 clean_tree_block(trans, root, buf);
413 *last_ref = 1; 857 *last_ref = 1;
414 } 858 }
@@ -467,7 +911,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
467 911
468 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, 912 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
469 root->root_key.objectid, &disk_key, 913 root->root_key.objectid, &disk_key,
470 level, search_start, empty_size, 1); 914 level, search_start, empty_size);
471 if (IS_ERR(cow)) 915 if (IS_ERR(cow))
472 return PTR_ERR(cow); 916 return PTR_ERR(cow);
473 917
@@ -506,10 +950,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
506 parent_start = 0; 950 parent_start = 0;
507 951
508 extent_buffer_get(cow); 952 extent_buffer_get(cow);
953 tree_mod_log_set_root_pointer(root, cow);
509 rcu_assign_pointer(root->node, cow); 954 rcu_assign_pointer(root->node, cow);
510 955
511 btrfs_free_tree_block(trans, root, buf, parent_start, 956 btrfs_free_tree_block(trans, root, buf, parent_start,
512 last_ref, 1); 957 last_ref);
513 free_extent_buffer(buf); 958 free_extent_buffer(buf);
514 add_root_to_dirty_list(root); 959 add_root_to_dirty_list(root);
515 } else { 960 } else {
@@ -519,13 +964,15 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
519 parent_start = 0; 964 parent_start = 0;
520 965
521 WARN_ON(trans->transid != btrfs_header_generation(parent)); 966 WARN_ON(trans->transid != btrfs_header_generation(parent));
967 tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
968 MOD_LOG_KEY_REPLACE);
522 btrfs_set_node_blockptr(parent, parent_slot, 969 btrfs_set_node_blockptr(parent, parent_slot,
523 cow->start); 970 cow->start);
524 btrfs_set_node_ptr_generation(parent, parent_slot, 971 btrfs_set_node_ptr_generation(parent, parent_slot,
525 trans->transid); 972 trans->transid);
526 btrfs_mark_buffer_dirty(parent); 973 btrfs_mark_buffer_dirty(parent);
527 btrfs_free_tree_block(trans, root, buf, parent_start, 974 btrfs_free_tree_block(trans, root, buf, parent_start,
528 last_ref, 1); 975 last_ref);
529 } 976 }
530 if (unlock_orig) 977 if (unlock_orig)
531 btrfs_tree_unlock(buf); 978 btrfs_tree_unlock(buf);
@@ -535,6 +982,210 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
535 return 0; 982 return 0;
536} 983}
537 984
985/*
986 * returns the logical address of the oldest predecessor of the given root.
987 * entries older than time_seq are ignored.
988 */
989static struct tree_mod_elem *
990__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
991 struct btrfs_root *root, u64 time_seq)
992{
993 struct tree_mod_elem *tm;
994 struct tree_mod_elem *found = NULL;
995 u64 root_logical = root->node->start;
996 int looped = 0;
997
998 if (!time_seq)
999 return 0;
1000
1001 /*
1002 * the very last operation that's logged for a root is the replacement
1003 * operation (if it is replaced at all). this has the index of the *new*
1004 * root, making it the very first operation that's logged for this root.
1005 */
1006 while (1) {
1007 tm = tree_mod_log_search_oldest(fs_info, root_logical,
1008 time_seq);
1009 if (!looped && !tm)
1010 return 0;
1011 /*
1012 * we must have key remove operations in the log before the
1013 * replace operation.
1014 */
1015 BUG_ON(!tm);
1016
1017 if (tm->op != MOD_LOG_ROOT_REPLACE)
1018 break;
1019
1020 found = tm;
1021 root_logical = tm->old_root.logical;
1022 BUG_ON(root_logical == root->node->start);
1023 looped = 1;
1024 }
1025
1026 return found;
1027}
1028
1029/*
1030 * tm is a pointer to the first operation to rewind within eb. then, all
1031 * previous operations will be rewinded (until we reach something older than
1032 * time_seq).
1033 */
1034static void
1035__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1036 struct tree_mod_elem *first_tm)
1037{
1038 u32 n;
1039 struct rb_node *next;
1040 struct tree_mod_elem *tm = first_tm;
1041 unsigned long o_dst;
1042 unsigned long o_src;
1043 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1044
1045 n = btrfs_header_nritems(eb);
1046 while (tm && tm->elem.seq >= time_seq) {
1047 /*
1048 * all the operations are recorded with the operator used for
1049 * the modification. as we're going backwards, we do the
1050 * opposite of each operation here.
1051 */
1052 switch (tm->op) {
1053 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
1054 BUG_ON(tm->slot < n);
1055 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
1056 case MOD_LOG_KEY_REMOVE:
1057 btrfs_set_node_key(eb, &tm->key, tm->slot);
1058 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1059 btrfs_set_node_ptr_generation(eb, tm->slot,
1060 tm->generation);
1061 n++;
1062 break;
1063 case MOD_LOG_KEY_REPLACE:
1064 BUG_ON(tm->slot >= n);
1065 btrfs_set_node_key(eb, &tm->key, tm->slot);
1066 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1067 btrfs_set_node_ptr_generation(eb, tm->slot,
1068 tm->generation);
1069 break;
1070 case MOD_LOG_KEY_ADD:
1071 if (tm->slot != n - 1) {
1072 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1073 o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
1074 memmove_extent_buffer(eb, o_dst, o_src, p_size);
1075 }
1076 n--;
1077 break;
1078 case MOD_LOG_MOVE_KEYS:
1079 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1080 o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
1081 memmove_extent_buffer(eb, o_dst, o_src,
1082 tm->move.nr_items * p_size);
1083 break;
1084 case MOD_LOG_ROOT_REPLACE:
1085 /*
1086 * this operation is special. for roots, this must be
1087 * handled explicitly before rewinding.
1088 * for non-roots, this operation may exist if the node
1089 * was a root: root A -> child B; then A gets empty and
1090 * B is promoted to the new root. in the mod log, we'll
1091 * have a root-replace operation for B, a tree block
1092 * that is no root. we simply ignore that operation.
1093 */
1094 break;
1095 }
1096 next = rb_next(&tm->node);
1097 if (!next)
1098 break;
1099 tm = container_of(next, struct tree_mod_elem, node);
1100 if (tm->index != first_tm->index)
1101 break;
1102 }
1103 btrfs_set_header_nritems(eb, n);
1104}
1105
1106static struct extent_buffer *
1107tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1108 u64 time_seq)
1109{
1110 struct extent_buffer *eb_rewin;
1111 struct tree_mod_elem *tm;
1112
1113 if (!time_seq)
1114 return eb;
1115
1116 if (btrfs_header_level(eb) == 0)
1117 return eb;
1118
1119 tm = tree_mod_log_search(fs_info, eb->start, time_seq);
1120 if (!tm)
1121 return eb;
1122
1123 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1124 BUG_ON(tm->slot != 0);
1125 eb_rewin = alloc_dummy_extent_buffer(eb->start,
1126 fs_info->tree_root->nodesize);
1127 BUG_ON(!eb_rewin);
1128 btrfs_set_header_bytenr(eb_rewin, eb->start);
1129 btrfs_set_header_backref_rev(eb_rewin,
1130 btrfs_header_backref_rev(eb));
1131 btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
1132 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
1133 } else {
1134 eb_rewin = btrfs_clone_extent_buffer(eb);
1135 BUG_ON(!eb_rewin);
1136 }
1137
1138 extent_buffer_get(eb_rewin);
1139 free_extent_buffer(eb);
1140
1141 __tree_mod_log_rewind(eb_rewin, time_seq, tm);
1142
1143 return eb_rewin;
1144}
1145
1146static inline struct extent_buffer *
1147get_old_root(struct btrfs_root *root, u64 time_seq)
1148{
1149 struct tree_mod_elem *tm;
1150 struct extent_buffer *eb;
1151 struct tree_mod_root *old_root;
1152 u64 old_generation;
1153
1154 tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
1155 if (!tm)
1156 return root->node;
1157
1158 old_root = &tm->old_root;
1159 old_generation = tm->generation;
1160
1161 tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq);
1162 /*
1163 * there was an item in the log when __tree_mod_log_oldest_root
1164 * returned. this one must not go away, because the time_seq passed to
1165 * us must be blocking its removal.
1166 */
1167 BUG_ON(!tm);
1168
1169 if (old_root->logical == root->node->start) {
1170 /* there are logged operations for the current root */
1171 eb = btrfs_clone_extent_buffer(root->node);
1172 } else {
1173 /* there's a root replace operation for the current root */
1174 eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
1175 root->nodesize);
1176 btrfs_set_header_bytenr(eb, eb->start);
1177 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
1178 btrfs_set_header_owner(eb, root->root_key.objectid);
1179 }
1180 if (!eb)
1181 return NULL;
1182 btrfs_set_header_level(eb, old_root->level);
1183 btrfs_set_header_generation(eb, old_generation);
1184 __tree_mod_log_rewind(eb, time_seq, tm);
1185
1186 return eb;
1187}
1188
538static inline int should_cow_block(struct btrfs_trans_handle *trans, 1189static inline int should_cow_block(struct btrfs_trans_handle *trans,
539 struct btrfs_root *root, 1190 struct btrfs_root *root,
540 struct extent_buffer *buf) 1191 struct extent_buffer *buf)
@@ -976,6 +1627,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
976 goto enospc; 1627 goto enospc;
977 } 1628 }
978 1629
1630 tree_mod_log_set_root_pointer(root, child);
979 rcu_assign_pointer(root->node, child); 1631 rcu_assign_pointer(root->node, child);
980 1632
981 add_root_to_dirty_list(root); 1633 add_root_to_dirty_list(root);
@@ -989,7 +1641,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
989 free_extent_buffer(mid); 1641 free_extent_buffer(mid);
990 1642
991 root_sub_used(root, mid->len); 1643 root_sub_used(root, mid->len);
992 btrfs_free_tree_block(trans, root, mid, 0, 1, 0); 1644 btrfs_free_tree_block(trans, root, mid, 0, 1);
993 /* once for the root ptr */ 1645 /* once for the root ptr */
994 free_extent_buffer_stale(mid); 1646 free_extent_buffer_stale(mid);
995 return 0; 1647 return 0;
@@ -1042,14 +1694,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1042 if (btrfs_header_nritems(right) == 0) { 1694 if (btrfs_header_nritems(right) == 0) {
1043 clean_tree_block(trans, root, right); 1695 clean_tree_block(trans, root, right);
1044 btrfs_tree_unlock(right); 1696 btrfs_tree_unlock(right);
1045 del_ptr(trans, root, path, level + 1, pslot + 1); 1697 del_ptr(trans, root, path, level + 1, pslot + 1, 1);
1046 root_sub_used(root, right->len); 1698 root_sub_used(root, right->len);
1047 btrfs_free_tree_block(trans, root, right, 0, 1, 0); 1699 btrfs_free_tree_block(trans, root, right, 0, 1);
1048 free_extent_buffer_stale(right); 1700 free_extent_buffer_stale(right);
1049 right = NULL; 1701 right = NULL;
1050 } else { 1702 } else {
1051 struct btrfs_disk_key right_key; 1703 struct btrfs_disk_key right_key;
1052 btrfs_node_key(right, &right_key, 0); 1704 btrfs_node_key(right, &right_key, 0);
1705 tree_mod_log_set_node_key(root->fs_info, parent,
1706 &right_key, pslot + 1, 0);
1053 btrfs_set_node_key(parent, &right_key, pslot + 1); 1707 btrfs_set_node_key(parent, &right_key, pslot + 1);
1054 btrfs_mark_buffer_dirty(parent); 1708 btrfs_mark_buffer_dirty(parent);
1055 } 1709 }
@@ -1084,15 +1738,17 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1084 if (btrfs_header_nritems(mid) == 0) { 1738 if (btrfs_header_nritems(mid) == 0) {
1085 clean_tree_block(trans, root, mid); 1739 clean_tree_block(trans, root, mid);
1086 btrfs_tree_unlock(mid); 1740 btrfs_tree_unlock(mid);
1087 del_ptr(trans, root, path, level + 1, pslot); 1741 del_ptr(trans, root, path, level + 1, pslot, 1);
1088 root_sub_used(root, mid->len); 1742 root_sub_used(root, mid->len);
1089 btrfs_free_tree_block(trans, root, mid, 0, 1, 0); 1743 btrfs_free_tree_block(trans, root, mid, 0, 1);
1090 free_extent_buffer_stale(mid); 1744 free_extent_buffer_stale(mid);
1091 mid = NULL; 1745 mid = NULL;
1092 } else { 1746 } else {
1093 /* update the parent key to reflect our changes */ 1747 /* update the parent key to reflect our changes */
1094 struct btrfs_disk_key mid_key; 1748 struct btrfs_disk_key mid_key;
1095 btrfs_node_key(mid, &mid_key, 0); 1749 btrfs_node_key(mid, &mid_key, 0);
1750 tree_mod_log_set_node_key(root->fs_info, parent, &mid_key,
1751 pslot, 0);
1096 btrfs_set_node_key(parent, &mid_key, pslot); 1752 btrfs_set_node_key(parent, &mid_key, pslot);
1097 btrfs_mark_buffer_dirty(parent); 1753 btrfs_mark_buffer_dirty(parent);
1098 } 1754 }
@@ -1190,6 +1846,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1190 struct btrfs_disk_key disk_key; 1846 struct btrfs_disk_key disk_key;
1191 orig_slot += left_nr; 1847 orig_slot += left_nr;
1192 btrfs_node_key(mid, &disk_key, 0); 1848 btrfs_node_key(mid, &disk_key, 0);
1849 tree_mod_log_set_node_key(root->fs_info, parent,
1850 &disk_key, pslot, 0);
1193 btrfs_set_node_key(parent, &disk_key, pslot); 1851 btrfs_set_node_key(parent, &disk_key, pslot);
1194 btrfs_mark_buffer_dirty(parent); 1852 btrfs_mark_buffer_dirty(parent);
1195 if (btrfs_header_nritems(left) > orig_slot) { 1853 if (btrfs_header_nritems(left) > orig_slot) {
@@ -1241,6 +1899,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1241 struct btrfs_disk_key disk_key; 1899 struct btrfs_disk_key disk_key;
1242 1900
1243 btrfs_node_key(right, &disk_key, 0); 1901 btrfs_node_key(right, &disk_key, 0);
1902 tree_mod_log_set_node_key(root->fs_info, parent,
1903 &disk_key, pslot + 1, 0);
1244 btrfs_set_node_key(parent, &disk_key, pslot + 1); 1904 btrfs_set_node_key(parent, &disk_key, pslot + 1);
1245 btrfs_mark_buffer_dirty(parent); 1905 btrfs_mark_buffer_dirty(parent);
1246 1906
@@ -1498,7 +2158,7 @@ static int
1498read_block_for_search(struct btrfs_trans_handle *trans, 2158read_block_for_search(struct btrfs_trans_handle *trans,
1499 struct btrfs_root *root, struct btrfs_path *p, 2159 struct btrfs_root *root, struct btrfs_path *p,
1500 struct extent_buffer **eb_ret, int level, int slot, 2160 struct extent_buffer **eb_ret, int level, int slot,
1501 struct btrfs_key *key) 2161 struct btrfs_key *key, u64 time_seq)
1502{ 2162{
1503 u64 blocknr; 2163 u64 blocknr;
1504 u64 gen; 2164 u64 gen;
@@ -1852,7 +2512,7 @@ cow_done:
1852 } 2512 }
1853 2513
1854 err = read_block_for_search(trans, root, p, 2514 err = read_block_for_search(trans, root, p,
1855 &b, level, slot, key); 2515 &b, level, slot, key, 0);
1856 if (err == -EAGAIN) 2516 if (err == -EAGAIN)
1857 goto again; 2517 goto again;
1858 if (err) { 2518 if (err) {
@@ -1924,6 +2584,115 @@ done:
1924} 2584}
1925 2585
1926/* 2586/*
2587 * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
2588 * current state of the tree together with the operations recorded in the tree
2589 * modification log to search for the key in a previous version of this tree, as
2590 * denoted by the time_seq parameter.
2591 *
2592 * Naturally, there is no support for insert, delete or cow operations.
2593 *
2594 * The resulting path and return value will be set up as if we called
2595 * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
2596 */
2597int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
2598 struct btrfs_path *p, u64 time_seq)
2599{
2600 struct extent_buffer *b;
2601 int slot;
2602 int ret;
2603 int err;
2604 int level;
2605 int lowest_unlock = 1;
2606 u8 lowest_level = 0;
2607
2608 lowest_level = p->lowest_level;
2609 WARN_ON(p->nodes[0] != NULL);
2610
2611 if (p->search_commit_root) {
2612 BUG_ON(time_seq);
2613 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2614 }
2615
2616again:
2617 b = get_old_root(root, time_seq);
2618 extent_buffer_get(b);
2619 level = btrfs_header_level(b);
2620 btrfs_tree_read_lock(b);
2621 p->locks[level] = BTRFS_READ_LOCK;
2622
2623 while (b) {
2624 level = btrfs_header_level(b);
2625 p->nodes[level] = b;
2626 btrfs_clear_path_blocking(p, NULL, 0);
2627
2628 /*
2629 * we have a lock on b and as long as we aren't changing
2630 * the tree, there is no way to for the items in b to change.
2631 * It is safe to drop the lock on our parent before we
2632 * go through the expensive btree search on b.
2633 */
2634 btrfs_unlock_up_safe(p, level + 1);
2635
2636 ret = bin_search(b, key, level, &slot);
2637
2638 if (level != 0) {
2639 int dec = 0;
2640 if (ret && slot > 0) {
2641 dec = 1;
2642 slot -= 1;
2643 }
2644 p->slots[level] = slot;
2645 unlock_up(p, level, lowest_unlock, 0, NULL);
2646
2647 if (level == lowest_level) {
2648 if (dec)
2649 p->slots[level]++;
2650 goto done;
2651 }
2652
2653 err = read_block_for_search(NULL, root, p, &b, level,
2654 slot, key, time_seq);
2655 if (err == -EAGAIN)
2656 goto again;
2657 if (err) {
2658 ret = err;
2659 goto done;
2660 }
2661
2662 level = btrfs_header_level(b);
2663 err = btrfs_try_tree_read_lock(b);
2664 if (!err) {
2665 btrfs_set_path_blocking(p);
2666 btrfs_tree_read_lock(b);
2667 btrfs_clear_path_blocking(p, b,
2668 BTRFS_READ_LOCK);
2669 }
2670 p->locks[level] = BTRFS_READ_LOCK;
2671 p->nodes[level] = b;
2672 b = tree_mod_log_rewind(root->fs_info, b, time_seq);
2673 if (b != p->nodes[level]) {
2674 btrfs_tree_unlock_rw(p->nodes[level],
2675 p->locks[level]);
2676 p->locks[level] = 0;
2677 p->nodes[level] = b;
2678 }
2679 } else {
2680 p->slots[level] = slot;
2681 unlock_up(p, level, lowest_unlock, 0, NULL);
2682 goto done;
2683 }
2684 }
2685 ret = 1;
2686done:
2687 if (!p->leave_spinning)
2688 btrfs_set_path_blocking(p);
2689 if (ret < 0)
2690 btrfs_release_path(p);
2691
2692 return ret;
2693}
2694
2695/*
1927 * adjust the pointers going up the tree, starting at level 2696 * adjust the pointers going up the tree, starting at level
1928 * making sure the right key of each node is points to 'key'. 2697 * making sure the right key of each node is points to 'key'.
1929 * This is used after shifting pointers to the left, so it stops 2698 * This is used after shifting pointers to the left, so it stops
@@ -1943,6 +2712,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans,
1943 if (!path->nodes[i]) 2712 if (!path->nodes[i])
1944 break; 2713 break;
1945 t = path->nodes[i]; 2714 t = path->nodes[i];
2715 tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1);
1946 btrfs_set_node_key(t, key, tslot); 2716 btrfs_set_node_key(t, key, tslot);
1947 btrfs_mark_buffer_dirty(path->nodes[i]); 2717 btrfs_mark_buffer_dirty(path->nodes[i]);
1948 if (tslot != 0) 2718 if (tslot != 0)
@@ -2025,12 +2795,16 @@ static int push_node_left(struct btrfs_trans_handle *trans,
2025 } else 2795 } else
2026 push_items = min(src_nritems - 8, push_items); 2796 push_items = min(src_nritems - 8, push_items);
2027 2797
2798 tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
2799 push_items);
2028 copy_extent_buffer(dst, src, 2800 copy_extent_buffer(dst, src,
2029 btrfs_node_key_ptr_offset(dst_nritems), 2801 btrfs_node_key_ptr_offset(dst_nritems),
2030 btrfs_node_key_ptr_offset(0), 2802 btrfs_node_key_ptr_offset(0),
2031 push_items * sizeof(struct btrfs_key_ptr)); 2803 push_items * sizeof(struct btrfs_key_ptr));
2032 2804
2033 if (push_items < src_nritems) { 2805 if (push_items < src_nritems) {
2806 tree_mod_log_eb_move(root->fs_info, src, 0, push_items,
2807 src_nritems - push_items);
2034 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 2808 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2035 btrfs_node_key_ptr_offset(push_items), 2809 btrfs_node_key_ptr_offset(push_items),
2036 (src_nritems - push_items) * 2810 (src_nritems - push_items) *
@@ -2084,11 +2858,14 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
2084 if (max_push < push_items) 2858 if (max_push < push_items)
2085 push_items = max_push; 2859 push_items = max_push;
2086 2860
2861 tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems);
2087 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 2862 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2088 btrfs_node_key_ptr_offset(0), 2863 btrfs_node_key_ptr_offset(0),
2089 (dst_nritems) * 2864 (dst_nritems) *
2090 sizeof(struct btrfs_key_ptr)); 2865 sizeof(struct btrfs_key_ptr));
2091 2866
2867 tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
2868 src_nritems - push_items, push_items);
2092 copy_extent_buffer(dst, src, 2869 copy_extent_buffer(dst, src,
2093 btrfs_node_key_ptr_offset(0), 2870 btrfs_node_key_ptr_offset(0),
2094 btrfs_node_key_ptr_offset(src_nritems - push_items), 2871 btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -2131,7 +2908,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2131 2908
2132 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2909 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2133 root->root_key.objectid, &lower_key, 2910 root->root_key.objectid, &lower_key,
2134 level, root->node->start, 0, 0); 2911 level, root->node->start, 0);
2135 if (IS_ERR(c)) 2912 if (IS_ERR(c))
2136 return PTR_ERR(c); 2913 return PTR_ERR(c);
2137 2914
@@ -2163,6 +2940,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2163 btrfs_mark_buffer_dirty(c); 2940 btrfs_mark_buffer_dirty(c);
2164 2941
2165 old = root->node; 2942 old = root->node;
2943 tree_mod_log_set_root_pointer(root, c);
2166 rcu_assign_pointer(root->node, c); 2944 rcu_assign_pointer(root->node, c);
2167 2945
2168 /* the super has an extra ref to root->node */ 2946 /* the super has an extra ref to root->node */
@@ -2186,10 +2964,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2186static void insert_ptr(struct btrfs_trans_handle *trans, 2964static void insert_ptr(struct btrfs_trans_handle *trans,
2187 struct btrfs_root *root, struct btrfs_path *path, 2965 struct btrfs_root *root, struct btrfs_path *path,
2188 struct btrfs_disk_key *key, u64 bytenr, 2966 struct btrfs_disk_key *key, u64 bytenr,
2189 int slot, int level) 2967 int slot, int level, int tree_mod_log)
2190{ 2968{
2191 struct extent_buffer *lower; 2969 struct extent_buffer *lower;
2192 int nritems; 2970 int nritems;
2971 int ret;
2193 2972
2194 BUG_ON(!path->nodes[level]); 2973 BUG_ON(!path->nodes[level]);
2195 btrfs_assert_tree_locked(path->nodes[level]); 2974 btrfs_assert_tree_locked(path->nodes[level]);
@@ -2198,11 +2977,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
2198 BUG_ON(slot > nritems); 2977 BUG_ON(slot > nritems);
2199 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); 2978 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
2200 if (slot != nritems) { 2979 if (slot != nritems) {
2980 if (tree_mod_log && level)
2981 tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
2982 slot, nritems - slot);
2201 memmove_extent_buffer(lower, 2983 memmove_extent_buffer(lower,
2202 btrfs_node_key_ptr_offset(slot + 1), 2984 btrfs_node_key_ptr_offset(slot + 1),
2203 btrfs_node_key_ptr_offset(slot), 2985 btrfs_node_key_ptr_offset(slot),
2204 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 2986 (nritems - slot) * sizeof(struct btrfs_key_ptr));
2205 } 2987 }
2988 if (tree_mod_log && level) {
2989 ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
2990 MOD_LOG_KEY_ADD);
2991 BUG_ON(ret < 0);
2992 }
2206 btrfs_set_node_key(lower, key, slot); 2993 btrfs_set_node_key(lower, key, slot);
2207 btrfs_set_node_blockptr(lower, slot, bytenr); 2994 btrfs_set_node_blockptr(lower, slot, bytenr);
2208 WARN_ON(trans->transid == 0); 2995 WARN_ON(trans->transid == 0);
@@ -2254,7 +3041,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2254 3041
2255 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 3042 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2256 root->root_key.objectid, 3043 root->root_key.objectid,
2257 &disk_key, level, c->start, 0, 0); 3044 &disk_key, level, c->start, 0);
2258 if (IS_ERR(split)) 3045 if (IS_ERR(split))
2259 return PTR_ERR(split); 3046 return PTR_ERR(split);
2260 3047
@@ -2273,7 +3060,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2273 (unsigned long)btrfs_header_chunk_tree_uuid(split), 3060 (unsigned long)btrfs_header_chunk_tree_uuid(split),
2274 BTRFS_UUID_SIZE); 3061 BTRFS_UUID_SIZE);
2275 3062
2276 3063 tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
2277 copy_extent_buffer(split, c, 3064 copy_extent_buffer(split, c,
2278 btrfs_node_key_ptr_offset(0), 3065 btrfs_node_key_ptr_offset(0),
2279 btrfs_node_key_ptr_offset(mid), 3066 btrfs_node_key_ptr_offset(mid),
@@ -2286,7 +3073,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2286 btrfs_mark_buffer_dirty(split); 3073 btrfs_mark_buffer_dirty(split);
2287 3074
2288 insert_ptr(trans, root, path, &disk_key, split->start, 3075 insert_ptr(trans, root, path, &disk_key, split->start,
2289 path->slots[level + 1] + 1, level + 1); 3076 path->slots[level + 1] + 1, level + 1, 1);
2290 3077
2291 if (path->slots[level] >= mid) { 3078 if (path->slots[level] >= mid) {
2292 path->slots[level] -= mid; 3079 path->slots[level] -= mid;
@@ -2823,7 +3610,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
2823 btrfs_set_header_nritems(l, mid); 3610 btrfs_set_header_nritems(l, mid);
2824 btrfs_item_key(right, &disk_key, 0); 3611 btrfs_item_key(right, &disk_key, 0);
2825 insert_ptr(trans, root, path, &disk_key, right->start, 3612 insert_ptr(trans, root, path, &disk_key, right->start,
2826 path->slots[1] + 1, 1); 3613 path->slots[1] + 1, 1, 0);
2827 3614
2828 btrfs_mark_buffer_dirty(right); 3615 btrfs_mark_buffer_dirty(right);
2829 btrfs_mark_buffer_dirty(l); 3616 btrfs_mark_buffer_dirty(l);
@@ -3006,7 +3793,7 @@ again:
3006 3793
3007 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 3794 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
3008 root->root_key.objectid, 3795 root->root_key.objectid,
3009 &disk_key, 0, l->start, 0, 0); 3796 &disk_key, 0, l->start, 0);
3010 if (IS_ERR(right)) 3797 if (IS_ERR(right))
3011 return PTR_ERR(right); 3798 return PTR_ERR(right);
3012 3799
@@ -3030,7 +3817,7 @@ again:
3030 if (mid <= slot) { 3817 if (mid <= slot) {
3031 btrfs_set_header_nritems(right, 0); 3818 btrfs_set_header_nritems(right, 0);
3032 insert_ptr(trans, root, path, &disk_key, right->start, 3819 insert_ptr(trans, root, path, &disk_key, right->start,
3033 path->slots[1] + 1, 1); 3820 path->slots[1] + 1, 1, 0);
3034 btrfs_tree_unlock(path->nodes[0]); 3821 btrfs_tree_unlock(path->nodes[0]);
3035 free_extent_buffer(path->nodes[0]); 3822 free_extent_buffer(path->nodes[0]);
3036 path->nodes[0] = right; 3823 path->nodes[0] = right;
@@ -3039,7 +3826,7 @@ again:
3039 } else { 3826 } else {
3040 btrfs_set_header_nritems(right, 0); 3827 btrfs_set_header_nritems(right, 0);
3041 insert_ptr(trans, root, path, &disk_key, right->start, 3828 insert_ptr(trans, root, path, &disk_key, right->start,
3042 path->slots[1], 1); 3829 path->slots[1], 1, 0);
3043 btrfs_tree_unlock(path->nodes[0]); 3830 btrfs_tree_unlock(path->nodes[0]);
3044 free_extent_buffer(path->nodes[0]); 3831 free_extent_buffer(path->nodes[0]);
3045 path->nodes[0] = right; 3832 path->nodes[0] = right;
@@ -3751,19 +4538,29 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3751 * empty a node. 4538 * empty a node.
3752 */ 4539 */
3753static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 4540static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3754 struct btrfs_path *path, int level, int slot) 4541 struct btrfs_path *path, int level, int slot,
4542 int tree_mod_log)
3755{ 4543{
3756 struct extent_buffer *parent = path->nodes[level]; 4544 struct extent_buffer *parent = path->nodes[level];
3757 u32 nritems; 4545 u32 nritems;
4546 int ret;
3758 4547
3759 nritems = btrfs_header_nritems(parent); 4548 nritems = btrfs_header_nritems(parent);
3760 if (slot != nritems - 1) { 4549 if (slot != nritems - 1) {
4550 if (tree_mod_log && level)
4551 tree_mod_log_eb_move(root->fs_info, parent, slot,
4552 slot + 1, nritems - slot - 1);
3761 memmove_extent_buffer(parent, 4553 memmove_extent_buffer(parent,
3762 btrfs_node_key_ptr_offset(slot), 4554 btrfs_node_key_ptr_offset(slot),
3763 btrfs_node_key_ptr_offset(slot + 1), 4555 btrfs_node_key_ptr_offset(slot + 1),
3764 sizeof(struct btrfs_key_ptr) * 4556 sizeof(struct btrfs_key_ptr) *
3765 (nritems - slot - 1)); 4557 (nritems - slot - 1));
4558 } else if (tree_mod_log && level) {
4559 ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
4560 MOD_LOG_KEY_REMOVE);
4561 BUG_ON(ret < 0);
3766 } 4562 }
4563
3767 nritems--; 4564 nritems--;
3768 btrfs_set_header_nritems(parent, nritems); 4565 btrfs_set_header_nritems(parent, nritems);
3769 if (nritems == 0 && parent == root->node) { 4566 if (nritems == 0 && parent == root->node) {
@@ -3795,7 +4592,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
3795 struct extent_buffer *leaf) 4592 struct extent_buffer *leaf)
3796{ 4593{
3797 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 4594 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
3798 del_ptr(trans, root, path, 1, path->slots[1]); 4595 del_ptr(trans, root, path, 1, path->slots[1], 1);
3799 4596
3800 /* 4597 /*
3801 * btrfs_free_extent is expensive, we want to make sure we 4598 * btrfs_free_extent is expensive, we want to make sure we
@@ -3806,7 +4603,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
3806 root_sub_used(root, leaf->len); 4603 root_sub_used(root, leaf->len);
3807 4604
3808 extent_buffer_get(leaf); 4605 extent_buffer_get(leaf);
3809 btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); 4606 btrfs_free_tree_block(trans, root, leaf, 0, 1);
3810 free_extent_buffer_stale(leaf); 4607 free_extent_buffer_stale(leaf);
3811} 4608}
3812/* 4609/*
@@ -4273,7 +5070,7 @@ again:
4273 next = c; 5070 next = c;
4274 next_rw_lock = path->locks[level]; 5071 next_rw_lock = path->locks[level];
4275 ret = read_block_for_search(NULL, root, path, &next, level, 5072 ret = read_block_for_search(NULL, root, path, &next, level,
4276 slot, &key); 5073 slot, &key, 0);
4277 if (ret == -EAGAIN) 5074 if (ret == -EAGAIN)
4278 goto again; 5075 goto again;
4279 5076
@@ -4310,7 +5107,7 @@ again:
4310 break; 5107 break;
4311 5108
4312 ret = read_block_for_search(NULL, root, path, &next, level, 5109 ret = read_block_for_search(NULL, root, path, &next, level,
4313 0, &key); 5110 0, &key, 0);
4314 if (ret == -EAGAIN) 5111 if (ret == -EAGAIN)
4315 goto again; 5112 goto again;
4316 5113