diff options
author | Chris Mason <chris.mason@oracle.com> | 2012-05-31 16:50:28 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2012-05-31 16:49:53 -0400 |
commit | 1e20932a23578bb1ec59107843574e259b96193f (patch) | |
tree | 844ae54293c4414fc4c232a36d0e4d4939dc35aa /fs/btrfs/ctree.c | |
parent | cfc442b69696b593cb442f09997dcb4cb5748171 (diff) | |
parent | c31931088fd6cf953bd0868a2647b6c3928e6c96 (diff) |
Merge branch 'for-chris' of git://git.jan-o-sch.net/btrfs-unstable into for-linus
Conflicts:
fs/btrfs/ulist.h
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r-- | fs/btrfs/ctree.c | 849 |
1 files changed, 823 insertions, 26 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 99fcad631a21..d7a96cfdc50a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -18,6 +18,7 @@ | |||
18 | 18 | ||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/rbtree.h> | ||
21 | #include "ctree.h" | 22 | #include "ctree.h" |
22 | #include "disk-io.h" | 23 | #include "disk-io.h" |
23 | #include "transaction.h" | 24 | #include "transaction.h" |
@@ -37,7 +38,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
37 | struct extent_buffer *dst_buf, | 38 | struct extent_buffer *dst_buf, |
38 | struct extent_buffer *src_buf); | 39 | struct extent_buffer *src_buf); |
39 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 40 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
40 | struct btrfs_path *path, int level, int slot); | 41 | struct btrfs_path *path, int level, int slot, |
42 | int tree_mod_log); | ||
43 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
44 | struct extent_buffer *eb); | ||
45 | struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, | ||
46 | u32 blocksize, u64 parent_transid, | ||
47 | u64 time_seq); | ||
48 | struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root, | ||
49 | u64 bytenr, u32 blocksize, | ||
50 | u64 time_seq); | ||
41 | 51 | ||
42 | struct btrfs_path *btrfs_alloc_path(void) | 52 | struct btrfs_path *btrfs_alloc_path(void) |
43 | { | 53 | { |
@@ -255,7 +265,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
255 | 265 | ||
256 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, | 266 | cow = btrfs_alloc_free_block(trans, root, buf->len, 0, |
257 | new_root_objectid, &disk_key, level, | 267 | new_root_objectid, &disk_key, level, |
258 | buf->start, 0, 1); | 268 | buf->start, 0); |
259 | if (IS_ERR(cow)) | 269 | if (IS_ERR(cow)) |
260 | return PTR_ERR(cow); | 270 | return PTR_ERR(cow); |
261 | 271 | ||
@@ -288,6 +298,434 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
288 | return 0; | 298 | return 0; |
289 | } | 299 | } |
290 | 300 | ||
301 | enum mod_log_op { | ||
302 | MOD_LOG_KEY_REPLACE, | ||
303 | MOD_LOG_KEY_ADD, | ||
304 | MOD_LOG_KEY_REMOVE, | ||
305 | MOD_LOG_KEY_REMOVE_WHILE_FREEING, | ||
306 | MOD_LOG_KEY_REMOVE_WHILE_MOVING, | ||
307 | MOD_LOG_MOVE_KEYS, | ||
308 | MOD_LOG_ROOT_REPLACE, | ||
309 | }; | ||
310 | |||
311 | struct tree_mod_move { | ||
312 | int dst_slot; | ||
313 | int nr_items; | ||
314 | }; | ||
315 | |||
316 | struct tree_mod_root { | ||
317 | u64 logical; | ||
318 | u8 level; | ||
319 | }; | ||
320 | |||
321 | struct tree_mod_elem { | ||
322 | struct rb_node node; | ||
323 | u64 index; /* shifted logical */ | ||
324 | struct seq_list elem; | ||
325 | enum mod_log_op op; | ||
326 | |||
327 | /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ | ||
328 | int slot; | ||
329 | |||
330 | /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ | ||
331 | u64 generation; | ||
332 | |||
333 | /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ | ||
334 | struct btrfs_disk_key key; | ||
335 | u64 blockptr; | ||
336 | |||
337 | /* this is used for op == MOD_LOG_MOVE_KEYS */ | ||
338 | struct tree_mod_move move; | ||
339 | |||
340 | /* this is used for op == MOD_LOG_ROOT_REPLACE */ | ||
341 | struct tree_mod_root old_root; | ||
342 | }; | ||
343 | |||
344 | static inline void | ||
345 | __get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) | ||
346 | { | ||
347 | elem->seq = atomic_inc_return(&fs_info->tree_mod_seq); | ||
348 | list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); | ||
349 | } | ||
350 | |||
351 | void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
352 | struct seq_list *elem) | ||
353 | { | ||
354 | elem->flags = 1; | ||
355 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
356 | __get_tree_mod_seq(fs_info, elem); | ||
357 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
358 | } | ||
359 | |||
360 | void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, | ||
361 | struct seq_list *elem) | ||
362 | { | ||
363 | struct rb_root *tm_root; | ||
364 | struct rb_node *node; | ||
365 | struct rb_node *next; | ||
366 | struct seq_list *cur_elem; | ||
367 | struct tree_mod_elem *tm; | ||
368 | u64 min_seq = (u64)-1; | ||
369 | u64 seq_putting = elem->seq; | ||
370 | |||
371 | if (!seq_putting) | ||
372 | return; | ||
373 | |||
374 | BUG_ON(!(elem->flags & 1)); | ||
375 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
376 | list_del(&elem->list); | ||
377 | |||
378 | list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) { | ||
379 | if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) { | ||
380 | if (seq_putting > cur_elem->seq) { | ||
381 | /* | ||
382 | * blocker with lower sequence number exists, we | ||
383 | * cannot remove anything from the log | ||
384 | */ | ||
385 | goto out; | ||
386 | } | ||
387 | min_seq = cur_elem->seq; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * anything that's lower than the lowest existing (read: blocked) | ||
393 | * sequence number can be removed from the tree. | ||
394 | */ | ||
395 | write_lock(&fs_info->tree_mod_log_lock); | ||
396 | tm_root = &fs_info->tree_mod_log; | ||
397 | for (node = rb_first(tm_root); node; node = next) { | ||
398 | next = rb_next(node); | ||
399 | tm = container_of(node, struct tree_mod_elem, node); | ||
400 | if (tm->elem.seq > min_seq) | ||
401 | continue; | ||
402 | rb_erase(node, tm_root); | ||
403 | list_del(&tm->elem.list); | ||
404 | kfree(tm); | ||
405 | } | ||
406 | write_unlock(&fs_info->tree_mod_log_lock); | ||
407 | out: | ||
408 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * key order of the log: | ||
413 | * index -> sequence | ||
414 | * | ||
415 | * the index is the shifted logical of the *new* root node for root replace | ||
416 | * operations, or the shifted logical of the affected block for all other | ||
417 | * operations. | ||
418 | */ | ||
419 | static noinline int | ||
420 | __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) | ||
421 | { | ||
422 | struct rb_root *tm_root; | ||
423 | struct rb_node **new; | ||
424 | struct rb_node *parent = NULL; | ||
425 | struct tree_mod_elem *cur; | ||
426 | int ret = 0; | ||
427 | |||
428 | BUG_ON(!tm || !tm->elem.seq); | ||
429 | |||
430 | write_lock(&fs_info->tree_mod_log_lock); | ||
431 | tm_root = &fs_info->tree_mod_log; | ||
432 | new = &tm_root->rb_node; | ||
433 | while (*new) { | ||
434 | cur = container_of(*new, struct tree_mod_elem, node); | ||
435 | parent = *new; | ||
436 | if (cur->index < tm->index) | ||
437 | new = &((*new)->rb_left); | ||
438 | else if (cur->index > tm->index) | ||
439 | new = &((*new)->rb_right); | ||
440 | else if (cur->elem.seq < tm->elem.seq) | ||
441 | new = &((*new)->rb_left); | ||
442 | else if (cur->elem.seq > tm->elem.seq) | ||
443 | new = &((*new)->rb_right); | ||
444 | else { | ||
445 | kfree(tm); | ||
446 | ret = -EEXIST; | ||
447 | goto unlock; | ||
448 | } | ||
449 | } | ||
450 | |||
451 | rb_link_node(&tm->node, parent, new); | ||
452 | rb_insert_color(&tm->node, tm_root); | ||
453 | unlock: | ||
454 | write_unlock(&fs_info->tree_mod_log_lock); | ||
455 | return ret; | ||
456 | } | ||
457 | |||
458 | static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, | ||
459 | struct extent_buffer *eb) { | ||
460 | smp_mb(); | ||
461 | if (list_empty(&(fs_info)->tree_mod_seq_list)) | ||
462 | return 1; | ||
463 | if (!eb) | ||
464 | return 0; | ||
465 | if (btrfs_header_level(eb) == 0) | ||
466 | return 1; | ||
467 | return 0; | ||
468 | } | ||
469 | |||
470 | static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, | ||
471 | struct tree_mod_elem **tm_ret) | ||
472 | { | ||
473 | struct tree_mod_elem *tm; | ||
474 | int seq; | ||
475 | |||
476 | if (tree_mod_dont_log(fs_info, NULL)) | ||
477 | return 0; | ||
478 | |||
479 | tm = *tm_ret = kzalloc(sizeof(*tm), flags); | ||
480 | if (!tm) | ||
481 | return -ENOMEM; | ||
482 | |||
483 | tm->elem.flags = 0; | ||
484 | spin_lock(&fs_info->tree_mod_seq_lock); | ||
485 | if (list_empty(&fs_info->tree_mod_seq_list)) { | ||
486 | /* | ||
487 | * someone emptied the list while we were waiting for the lock. | ||
488 | * we must not add to the list, because no blocker exists. items | ||
489 | * are removed from the list only when the existing blocker is | ||
490 | * removed from the list. | ||
491 | */ | ||
492 | kfree(tm); | ||
493 | seq = 0; | ||
494 | } else { | ||
495 | __get_tree_mod_seq(fs_info, &tm->elem); | ||
496 | seq = tm->elem.seq; | ||
497 | } | ||
498 | spin_unlock(&fs_info->tree_mod_seq_lock); | ||
499 | |||
500 | return seq; | ||
501 | } | ||
502 | |||
503 | static noinline int | ||
504 | tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, | ||
505 | struct extent_buffer *eb, int slot, | ||
506 | enum mod_log_op op, gfp_t flags) | ||
507 | { | ||
508 | struct tree_mod_elem *tm; | ||
509 | int ret; | ||
510 | |||
511 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
512 | if (ret <= 0) | ||
513 | return ret; | ||
514 | |||
515 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
516 | if (op != MOD_LOG_KEY_ADD) { | ||
517 | btrfs_node_key(eb, &tm->key, slot); | ||
518 | tm->blockptr = btrfs_node_blockptr(eb, slot); | ||
519 | } | ||
520 | tm->op = op; | ||
521 | tm->slot = slot; | ||
522 | tm->generation = btrfs_node_ptr_generation(eb, slot); | ||
523 | |||
524 | return __tree_mod_log_insert(fs_info, tm); | ||
525 | } | ||
526 | |||
527 | static noinline int | ||
528 | tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
529 | int slot, enum mod_log_op op) | ||
530 | { | ||
531 | return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS); | ||
532 | } | ||
533 | |||
534 | static noinline int | ||
535 | tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, | ||
536 | struct extent_buffer *eb, int dst_slot, int src_slot, | ||
537 | int nr_items, gfp_t flags) | ||
538 | { | ||
539 | struct tree_mod_elem *tm; | ||
540 | int ret; | ||
541 | int i; | ||
542 | |||
543 | if (tree_mod_dont_log(fs_info, eb)) | ||
544 | return 0; | ||
545 | |||
546 | for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { | ||
547 | ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot, | ||
548 | MOD_LOG_KEY_REMOVE_WHILE_MOVING); | ||
549 | BUG_ON(ret < 0); | ||
550 | } | ||
551 | |||
552 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
553 | if (ret <= 0) | ||
554 | return ret; | ||
555 | |||
556 | tm->index = eb->start >> PAGE_CACHE_SHIFT; | ||
557 | tm->slot = src_slot; | ||
558 | tm->move.dst_slot = dst_slot; | ||
559 | tm->move.nr_items = nr_items; | ||
560 | tm->op = MOD_LOG_MOVE_KEYS; | ||
561 | |||
562 | return __tree_mod_log_insert(fs_info, tm); | ||
563 | } | ||
564 | |||
565 | static noinline int | ||
566 | tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, | ||
567 | struct extent_buffer *old_root, | ||
568 | struct extent_buffer *new_root, gfp_t flags) | ||
569 | { | ||
570 | struct tree_mod_elem *tm; | ||
571 | int ret; | ||
572 | |||
573 | ret = tree_mod_alloc(fs_info, flags, &tm); | ||
574 | if (ret <= 0) | ||
575 | return ret; | ||
576 | |||
577 | tm->index = new_root->start >> PAGE_CACHE_SHIFT; | ||
578 | tm->old_root.logical = old_root->start; | ||
579 | tm->old_root.level = btrfs_header_level(old_root); | ||
580 | tm->generation = btrfs_header_generation(old_root); | ||
581 | tm->op = MOD_LOG_ROOT_REPLACE; | ||
582 | |||
583 | return __tree_mod_log_insert(fs_info, tm); | ||
584 | } | ||
585 | |||
586 | static struct tree_mod_elem * | ||
587 | __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, | ||
588 | int smallest) | ||
589 | { | ||
590 | struct rb_root *tm_root; | ||
591 | struct rb_node *node; | ||
592 | struct tree_mod_elem *cur = NULL; | ||
593 | struct tree_mod_elem *found = NULL; | ||
594 | u64 index = start >> PAGE_CACHE_SHIFT; | ||
595 | |||
596 | read_lock(&fs_info->tree_mod_log_lock); | ||
597 | tm_root = &fs_info->tree_mod_log; | ||
598 | node = tm_root->rb_node; | ||
599 | while (node) { | ||
600 | cur = container_of(node, struct tree_mod_elem, node); | ||
601 | if (cur->index < index) { | ||
602 | node = node->rb_left; | ||
603 | } else if (cur->index > index) { | ||
604 | node = node->rb_right; | ||
605 | } else if (cur->elem.seq < min_seq) { | ||
606 | node = node->rb_left; | ||
607 | } else if (!smallest) { | ||
608 | /* we want the node with the highest seq */ | ||
609 | if (found) | ||
610 | BUG_ON(found->elem.seq > cur->elem.seq); | ||
611 | found = cur; | ||
612 | node = node->rb_left; | ||
613 | } else if (cur->elem.seq > min_seq) { | ||
614 | /* we want the node with the smallest seq */ | ||
615 | if (found) | ||
616 | BUG_ON(found->elem.seq < cur->elem.seq); | ||
617 | found = cur; | ||
618 | node = node->rb_right; | ||
619 | } else { | ||
620 | found = cur; | ||
621 | break; | ||
622 | } | ||
623 | } | ||
624 | read_unlock(&fs_info->tree_mod_log_lock); | ||
625 | |||
626 | return found; | ||
627 | } | ||
628 | |||
629 | /* | ||
630 | * this returns the element from the log with the smallest time sequence | ||
631 | * value that's in the log (the oldest log item). any element with a time | ||
632 | * sequence lower than min_seq will be ignored. | ||
633 | */ | ||
634 | static struct tree_mod_elem * | ||
635 | tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, | ||
636 | u64 min_seq) | ||
637 | { | ||
638 | return __tree_mod_log_search(fs_info, start, min_seq, 1); | ||
639 | } | ||
640 | |||
641 | /* | ||
642 | * this returns the element from the log with the largest time sequence | ||
643 | * value that's in the log (the most recent log item). any element with | ||
644 | * a time sequence lower than min_seq will be ignored. | ||
645 | */ | ||
646 | static struct tree_mod_elem * | ||
647 | tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) | ||
648 | { | ||
649 | return __tree_mod_log_search(fs_info, start, min_seq, 0); | ||
650 | } | ||
651 | |||
652 | static inline void | ||
653 | tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
654 | struct extent_buffer *src, unsigned long dst_offset, | ||
655 | unsigned long src_offset, int nr_items) | ||
656 | { | ||
657 | int ret; | ||
658 | int i; | ||
659 | |||
660 | if (tree_mod_dont_log(fs_info, NULL)) | ||
661 | return; | ||
662 | |||
663 | if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) | ||
664 | return; | ||
665 | |||
666 | /* speed this up by single seq for all operations? */ | ||
667 | for (i = 0; i < nr_items; i++) { | ||
668 | ret = tree_mod_log_insert_key(fs_info, src, i + src_offset, | ||
669 | MOD_LOG_KEY_REMOVE); | ||
670 | BUG_ON(ret < 0); | ||
671 | ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset, | ||
672 | MOD_LOG_KEY_ADD); | ||
673 | BUG_ON(ret < 0); | ||
674 | } | ||
675 | } | ||
676 | |||
677 | static inline void | ||
678 | tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | ||
679 | int dst_offset, int src_offset, int nr_items) | ||
680 | { | ||
681 | int ret; | ||
682 | ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset, | ||
683 | nr_items, GFP_NOFS); | ||
684 | BUG_ON(ret < 0); | ||
685 | } | ||
686 | |||
687 | static inline void | ||
688 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | ||
689 | struct extent_buffer *eb, | ||
690 | struct btrfs_disk_key *disk_key, int slot, int atomic) | ||
691 | { | ||
692 | int ret; | ||
693 | |||
694 | ret = tree_mod_log_insert_key_mask(fs_info, eb, slot, | ||
695 | MOD_LOG_KEY_REPLACE, | ||
696 | atomic ? GFP_ATOMIC : GFP_NOFS); | ||
697 | BUG_ON(ret < 0); | ||
698 | } | ||
699 | |||
700 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | ||
701 | struct extent_buffer *eb) | ||
702 | { | ||
703 | int i; | ||
704 | int ret; | ||
705 | u32 nritems; | ||
706 | |||
707 | if (tree_mod_dont_log(fs_info, eb)) | ||
708 | return; | ||
709 | |||
710 | nritems = btrfs_header_nritems(eb); | ||
711 | for (i = nritems - 1; i >= 0; i--) { | ||
712 | ret = tree_mod_log_insert_key(fs_info, eb, i, | ||
713 | MOD_LOG_KEY_REMOVE_WHILE_FREEING); | ||
714 | BUG_ON(ret < 0); | ||
715 | } | ||
716 | } | ||
717 | |||
718 | static inline void | ||
719 | tree_mod_log_set_root_pointer(struct btrfs_root *root, | ||
720 | struct extent_buffer *new_root_node) | ||
721 | { | ||
722 | int ret; | ||
723 | tree_mod_log_free_eb(root->fs_info, root->node); | ||
724 | ret = tree_mod_log_insert_root(root->fs_info, root->node, | ||
725 | new_root_node, GFP_NOFS); | ||
726 | BUG_ON(ret < 0); | ||
727 | } | ||
728 | |||
291 | /* | 729 | /* |
292 | * check if the tree block can be shared by multiple trees | 730 | * check if the tree block can be shared by multiple trees |
293 | */ | 731 | */ |
@@ -409,6 +847,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
409 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); | 847 | ret = btrfs_dec_ref(trans, root, buf, 1, 1); |
410 | BUG_ON(ret); /* -ENOMEM */ | 848 | BUG_ON(ret); /* -ENOMEM */ |
411 | } | 849 | } |
850 | /* | ||
851 | * don't log freeing in case we're freeing the root node, this | ||
852 | * is done by tree_mod_log_set_root_pointer later | ||
853 | */ | ||
854 | if (buf != root->node && btrfs_header_level(buf) != 0) | ||
855 | tree_mod_log_free_eb(root->fs_info, buf); | ||
412 | clean_tree_block(trans, root, buf); | 856 | clean_tree_block(trans, root, buf); |
413 | *last_ref = 1; | 857 | *last_ref = 1; |
414 | } | 858 | } |
@@ -467,7 +911,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
467 | 911 | ||
468 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, | 912 | cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, |
469 | root->root_key.objectid, &disk_key, | 913 | root->root_key.objectid, &disk_key, |
470 | level, search_start, empty_size, 1); | 914 | level, search_start, empty_size); |
471 | if (IS_ERR(cow)) | 915 | if (IS_ERR(cow)) |
472 | return PTR_ERR(cow); | 916 | return PTR_ERR(cow); |
473 | 917 | ||
@@ -506,10 +950,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
506 | parent_start = 0; | 950 | parent_start = 0; |
507 | 951 | ||
508 | extent_buffer_get(cow); | 952 | extent_buffer_get(cow); |
953 | tree_mod_log_set_root_pointer(root, cow); | ||
509 | rcu_assign_pointer(root->node, cow); | 954 | rcu_assign_pointer(root->node, cow); |
510 | 955 | ||
511 | btrfs_free_tree_block(trans, root, buf, parent_start, | 956 | btrfs_free_tree_block(trans, root, buf, parent_start, |
512 | last_ref, 1); | 957 | last_ref); |
513 | free_extent_buffer(buf); | 958 | free_extent_buffer(buf); |
514 | add_root_to_dirty_list(root); | 959 | add_root_to_dirty_list(root); |
515 | } else { | 960 | } else { |
@@ -519,13 +964,15 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
519 | parent_start = 0; | 964 | parent_start = 0; |
520 | 965 | ||
521 | WARN_ON(trans->transid != btrfs_header_generation(parent)); | 966 | WARN_ON(trans->transid != btrfs_header_generation(parent)); |
967 | tree_mod_log_insert_key(root->fs_info, parent, parent_slot, | ||
968 | MOD_LOG_KEY_REPLACE); | ||
522 | btrfs_set_node_blockptr(parent, parent_slot, | 969 | btrfs_set_node_blockptr(parent, parent_slot, |
523 | cow->start); | 970 | cow->start); |
524 | btrfs_set_node_ptr_generation(parent, parent_slot, | 971 | btrfs_set_node_ptr_generation(parent, parent_slot, |
525 | trans->transid); | 972 | trans->transid); |
526 | btrfs_mark_buffer_dirty(parent); | 973 | btrfs_mark_buffer_dirty(parent); |
527 | btrfs_free_tree_block(trans, root, buf, parent_start, | 974 | btrfs_free_tree_block(trans, root, buf, parent_start, |
528 | last_ref, 1); | 975 | last_ref); |
529 | } | 976 | } |
530 | if (unlock_orig) | 977 | if (unlock_orig) |
531 | btrfs_tree_unlock(buf); | 978 | btrfs_tree_unlock(buf); |
@@ -535,6 +982,210 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
535 | return 0; | 982 | return 0; |
536 | } | 983 | } |
537 | 984 | ||
985 | /* | ||
986 | * returns the logical address of the oldest predecessor of the given root. | ||
987 | * entries older than time_seq are ignored. | ||
988 | */ | ||
989 | static struct tree_mod_elem * | ||
990 | __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, | ||
991 | struct btrfs_root *root, u64 time_seq) | ||
992 | { | ||
993 | struct tree_mod_elem *tm; | ||
994 | struct tree_mod_elem *found = NULL; | ||
995 | u64 root_logical = root->node->start; | ||
996 | int looped = 0; | ||
997 | |||
998 | if (!time_seq) | ||
999 | return 0; | ||
1000 | |||
1001 | /* | ||
1002 | * the very last operation that's logged for a root is the replacement | ||
1003 | * operation (if it is replaced at all). this has the index of the *new* | ||
1004 | * root, making it the very first operation that's logged for this root. | ||
1005 | */ | ||
1006 | while (1) { | ||
1007 | tm = tree_mod_log_search_oldest(fs_info, root_logical, | ||
1008 | time_seq); | ||
1009 | if (!looped && !tm) | ||
1010 | return 0; | ||
1011 | /* | ||
1012 | * we must have key remove operations in the log before the | ||
1013 | * replace operation. | ||
1014 | */ | ||
1015 | BUG_ON(!tm); | ||
1016 | |||
1017 | if (tm->op != MOD_LOG_ROOT_REPLACE) | ||
1018 | break; | ||
1019 | |||
1020 | found = tm; | ||
1021 | root_logical = tm->old_root.logical; | ||
1022 | BUG_ON(root_logical == root->node->start); | ||
1023 | looped = 1; | ||
1024 | } | ||
1025 | |||
1026 | return found; | ||
1027 | } | ||
1028 | |||
1029 | /* | ||
1030 | * tm is a pointer to the first operation to rewind within eb. then, all | ||
1031 | * previous operations will be rewinded (until we reach something older than | ||
1032 | * time_seq). | ||
1033 | */ | ||
1034 | static void | ||
1035 | __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | ||
1036 | struct tree_mod_elem *first_tm) | ||
1037 | { | ||
1038 | u32 n; | ||
1039 | struct rb_node *next; | ||
1040 | struct tree_mod_elem *tm = first_tm; | ||
1041 | unsigned long o_dst; | ||
1042 | unsigned long o_src; | ||
1043 | unsigned long p_size = sizeof(struct btrfs_key_ptr); | ||
1044 | |||
1045 | n = btrfs_header_nritems(eb); | ||
1046 | while (tm && tm->elem.seq >= time_seq) { | ||
1047 | /* | ||
1048 | * all the operations are recorded with the operator used for | ||
1049 | * the modification. as we're going backwards, we do the | ||
1050 | * opposite of each operation here. | ||
1051 | */ | ||
1052 | switch (tm->op) { | ||
1053 | case MOD_LOG_KEY_REMOVE_WHILE_FREEING: | ||
1054 | BUG_ON(tm->slot < n); | ||
1055 | case MOD_LOG_KEY_REMOVE_WHILE_MOVING: | ||
1056 | case MOD_LOG_KEY_REMOVE: | ||
1057 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
1058 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
1059 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
1060 | tm->generation); | ||
1061 | n++; | ||
1062 | break; | ||
1063 | case MOD_LOG_KEY_REPLACE: | ||
1064 | BUG_ON(tm->slot >= n); | ||
1065 | btrfs_set_node_key(eb, &tm->key, tm->slot); | ||
1066 | btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); | ||
1067 | btrfs_set_node_ptr_generation(eb, tm->slot, | ||
1068 | tm->generation); | ||
1069 | break; | ||
1070 | case MOD_LOG_KEY_ADD: | ||
1071 | if (tm->slot != n - 1) { | ||
1072 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
1073 | o_src = btrfs_node_key_ptr_offset(tm->slot + 1); | ||
1074 | memmove_extent_buffer(eb, o_dst, o_src, p_size); | ||
1075 | } | ||
1076 | n--; | ||
1077 | break; | ||
1078 | case MOD_LOG_MOVE_KEYS: | ||
1079 | o_dst = btrfs_node_key_ptr_offset(tm->slot); | ||
1080 | o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot); | ||
1081 | memmove_extent_buffer(eb, o_dst, o_src, | ||
1082 | tm->move.nr_items * p_size); | ||
1083 | break; | ||
1084 | case MOD_LOG_ROOT_REPLACE: | ||
1085 | /* | ||
1086 | * this operation is special. for roots, this must be | ||
1087 | * handled explicitly before rewinding. | ||
1088 | * for non-roots, this operation may exist if the node | ||
1089 | * was a root: root A -> child B; then A gets empty and | ||
1090 | * B is promoted to the new root. in the mod log, we'll | ||
1091 | * have a root-replace operation for B, a tree block | ||
1092 | * that is no root. we simply ignore that operation. | ||
1093 | */ | ||
1094 | break; | ||
1095 | } | ||
1096 | next = rb_next(&tm->node); | ||
1097 | if (!next) | ||
1098 | break; | ||
1099 | tm = container_of(next, struct tree_mod_elem, node); | ||
1100 | if (tm->index != first_tm->index) | ||
1101 | break; | ||
1102 | } | ||
1103 | btrfs_set_header_nritems(eb, n); | ||
1104 | } | ||
1105 | |||
1106 | static struct extent_buffer * | ||
1107 | tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | ||
1108 | u64 time_seq) | ||
1109 | { | ||
1110 | struct extent_buffer *eb_rewin; | ||
1111 | struct tree_mod_elem *tm; | ||
1112 | |||
1113 | if (!time_seq) | ||
1114 | return eb; | ||
1115 | |||
1116 | if (btrfs_header_level(eb) == 0) | ||
1117 | return eb; | ||
1118 | |||
1119 | tm = tree_mod_log_search(fs_info, eb->start, time_seq); | ||
1120 | if (!tm) | ||
1121 | return eb; | ||
1122 | |||
1123 | if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { | ||
1124 | BUG_ON(tm->slot != 0); | ||
1125 | eb_rewin = alloc_dummy_extent_buffer(eb->start, | ||
1126 | fs_info->tree_root->nodesize); | ||
1127 | BUG_ON(!eb_rewin); | ||
1128 | btrfs_set_header_bytenr(eb_rewin, eb->start); | ||
1129 | btrfs_set_header_backref_rev(eb_rewin, | ||
1130 | btrfs_header_backref_rev(eb)); | ||
1131 | btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); | ||
1132 | btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); | ||
1133 | } else { | ||
1134 | eb_rewin = btrfs_clone_extent_buffer(eb); | ||
1135 | BUG_ON(!eb_rewin); | ||
1136 | } | ||
1137 | |||
1138 | extent_buffer_get(eb_rewin); | ||
1139 | free_extent_buffer(eb); | ||
1140 | |||
1141 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); | ||
1142 | |||
1143 | return eb_rewin; | ||
1144 | } | ||
1145 | |||
1146 | static inline struct extent_buffer * | ||
1147 | get_old_root(struct btrfs_root *root, u64 time_seq) | ||
1148 | { | ||
1149 | struct tree_mod_elem *tm; | ||
1150 | struct extent_buffer *eb; | ||
1151 | struct tree_mod_root *old_root; | ||
1152 | u64 old_generation; | ||
1153 | |||
1154 | tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); | ||
1155 | if (!tm) | ||
1156 | return root->node; | ||
1157 | |||
1158 | old_root = &tm->old_root; | ||
1159 | old_generation = tm->generation; | ||
1160 | |||
1161 | tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); | ||
1162 | /* | ||
1163 | * there was an item in the log when __tree_mod_log_oldest_root | ||
1164 | * returned. this one must not go away, because the time_seq passed to | ||
1165 | * us must be blocking its removal. | ||
1166 | */ | ||
1167 | BUG_ON(!tm); | ||
1168 | |||
1169 | if (old_root->logical == root->node->start) { | ||
1170 | /* there are logged operations for the current root */ | ||
1171 | eb = btrfs_clone_extent_buffer(root->node); | ||
1172 | } else { | ||
1173 | /* there's a root replace operation for the current root */ | ||
1174 | eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, | ||
1175 | root->nodesize); | ||
1176 | btrfs_set_header_bytenr(eb, eb->start); | ||
1177 | btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); | ||
1178 | btrfs_set_header_owner(eb, root->root_key.objectid); | ||
1179 | } | ||
1180 | if (!eb) | ||
1181 | return NULL; | ||
1182 | btrfs_set_header_level(eb, old_root->level); | ||
1183 | btrfs_set_header_generation(eb, old_generation); | ||
1184 | __tree_mod_log_rewind(eb, time_seq, tm); | ||
1185 | |||
1186 | return eb; | ||
1187 | } | ||
1188 | |||
538 | static inline int should_cow_block(struct btrfs_trans_handle *trans, | 1189 | static inline int should_cow_block(struct btrfs_trans_handle *trans, |
539 | struct btrfs_root *root, | 1190 | struct btrfs_root *root, |
540 | struct extent_buffer *buf) | 1191 | struct extent_buffer *buf) |
@@ -976,6 +1627,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
976 | goto enospc; | 1627 | goto enospc; |
977 | } | 1628 | } |
978 | 1629 | ||
1630 | tree_mod_log_set_root_pointer(root, child); | ||
979 | rcu_assign_pointer(root->node, child); | 1631 | rcu_assign_pointer(root->node, child); |
980 | 1632 | ||
981 | add_root_to_dirty_list(root); | 1633 | add_root_to_dirty_list(root); |
@@ -989,7 +1641,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
989 | free_extent_buffer(mid); | 1641 | free_extent_buffer(mid); |
990 | 1642 | ||
991 | root_sub_used(root, mid->len); | 1643 | root_sub_used(root, mid->len); |
992 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1644 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
993 | /* once for the root ptr */ | 1645 | /* once for the root ptr */ |
994 | free_extent_buffer_stale(mid); | 1646 | free_extent_buffer_stale(mid); |
995 | return 0; | 1647 | return 0; |
@@ -1042,14 +1694,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1042 | if (btrfs_header_nritems(right) == 0) { | 1694 | if (btrfs_header_nritems(right) == 0) { |
1043 | clean_tree_block(trans, root, right); | 1695 | clean_tree_block(trans, root, right); |
1044 | btrfs_tree_unlock(right); | 1696 | btrfs_tree_unlock(right); |
1045 | del_ptr(trans, root, path, level + 1, pslot + 1); | 1697 | del_ptr(trans, root, path, level + 1, pslot + 1, 1); |
1046 | root_sub_used(root, right->len); | 1698 | root_sub_used(root, right->len); |
1047 | btrfs_free_tree_block(trans, root, right, 0, 1, 0); | 1699 | btrfs_free_tree_block(trans, root, right, 0, 1); |
1048 | free_extent_buffer_stale(right); | 1700 | free_extent_buffer_stale(right); |
1049 | right = NULL; | 1701 | right = NULL; |
1050 | } else { | 1702 | } else { |
1051 | struct btrfs_disk_key right_key; | 1703 | struct btrfs_disk_key right_key; |
1052 | btrfs_node_key(right, &right_key, 0); | 1704 | btrfs_node_key(right, &right_key, 0); |
1705 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1706 | &right_key, pslot + 1, 0); | ||
1053 | btrfs_set_node_key(parent, &right_key, pslot + 1); | 1707 | btrfs_set_node_key(parent, &right_key, pslot + 1); |
1054 | btrfs_mark_buffer_dirty(parent); | 1708 | btrfs_mark_buffer_dirty(parent); |
1055 | } | 1709 | } |
@@ -1084,15 +1738,17 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1084 | if (btrfs_header_nritems(mid) == 0) { | 1738 | if (btrfs_header_nritems(mid) == 0) { |
1085 | clean_tree_block(trans, root, mid); | 1739 | clean_tree_block(trans, root, mid); |
1086 | btrfs_tree_unlock(mid); | 1740 | btrfs_tree_unlock(mid); |
1087 | del_ptr(trans, root, path, level + 1, pslot); | 1741 | del_ptr(trans, root, path, level + 1, pslot, 1); |
1088 | root_sub_used(root, mid->len); | 1742 | root_sub_used(root, mid->len); |
1089 | btrfs_free_tree_block(trans, root, mid, 0, 1, 0); | 1743 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
1090 | free_extent_buffer_stale(mid); | 1744 | free_extent_buffer_stale(mid); |
1091 | mid = NULL; | 1745 | mid = NULL; |
1092 | } else { | 1746 | } else { |
1093 | /* update the parent key to reflect our changes */ | 1747 | /* update the parent key to reflect our changes */ |
1094 | struct btrfs_disk_key mid_key; | 1748 | struct btrfs_disk_key mid_key; |
1095 | btrfs_node_key(mid, &mid_key, 0); | 1749 | btrfs_node_key(mid, &mid_key, 0); |
1750 | tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, | ||
1751 | pslot, 0); | ||
1096 | btrfs_set_node_key(parent, &mid_key, pslot); | 1752 | btrfs_set_node_key(parent, &mid_key, pslot); |
1097 | btrfs_mark_buffer_dirty(parent); | 1753 | btrfs_mark_buffer_dirty(parent); |
1098 | } | 1754 | } |
@@ -1190,6 +1846,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1190 | struct btrfs_disk_key disk_key; | 1846 | struct btrfs_disk_key disk_key; |
1191 | orig_slot += left_nr; | 1847 | orig_slot += left_nr; |
1192 | btrfs_node_key(mid, &disk_key, 0); | 1848 | btrfs_node_key(mid, &disk_key, 0); |
1849 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1850 | &disk_key, pslot, 0); | ||
1193 | btrfs_set_node_key(parent, &disk_key, pslot); | 1851 | btrfs_set_node_key(parent, &disk_key, pslot); |
1194 | btrfs_mark_buffer_dirty(parent); | 1852 | btrfs_mark_buffer_dirty(parent); |
1195 | if (btrfs_header_nritems(left) > orig_slot) { | 1853 | if (btrfs_header_nritems(left) > orig_slot) { |
@@ -1241,6 +1899,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1241 | struct btrfs_disk_key disk_key; | 1899 | struct btrfs_disk_key disk_key; |
1242 | 1900 | ||
1243 | btrfs_node_key(right, &disk_key, 0); | 1901 | btrfs_node_key(right, &disk_key, 0); |
1902 | tree_mod_log_set_node_key(root->fs_info, parent, | ||
1903 | &disk_key, pslot + 1, 0); | ||
1244 | btrfs_set_node_key(parent, &disk_key, pslot + 1); | 1904 | btrfs_set_node_key(parent, &disk_key, pslot + 1); |
1245 | btrfs_mark_buffer_dirty(parent); | 1905 | btrfs_mark_buffer_dirty(parent); |
1246 | 1906 | ||
@@ -1498,7 +2158,7 @@ static int | |||
1498 | read_block_for_search(struct btrfs_trans_handle *trans, | 2158 | read_block_for_search(struct btrfs_trans_handle *trans, |
1499 | struct btrfs_root *root, struct btrfs_path *p, | 2159 | struct btrfs_root *root, struct btrfs_path *p, |
1500 | struct extent_buffer **eb_ret, int level, int slot, | 2160 | struct extent_buffer **eb_ret, int level, int slot, |
1501 | struct btrfs_key *key) | 2161 | struct btrfs_key *key, u64 time_seq) |
1502 | { | 2162 | { |
1503 | u64 blocknr; | 2163 | u64 blocknr; |
1504 | u64 gen; | 2164 | u64 gen; |
@@ -1852,7 +2512,7 @@ cow_done: | |||
1852 | } | 2512 | } |
1853 | 2513 | ||
1854 | err = read_block_for_search(trans, root, p, | 2514 | err = read_block_for_search(trans, root, p, |
1855 | &b, level, slot, key); | 2515 | &b, level, slot, key, 0); |
1856 | if (err == -EAGAIN) | 2516 | if (err == -EAGAIN) |
1857 | goto again; | 2517 | goto again; |
1858 | if (err) { | 2518 | if (err) { |
@@ -1924,6 +2584,115 @@ done: | |||
1924 | } | 2584 | } |
1925 | 2585 | ||
1926 | /* | 2586 | /* |
2587 | * Like btrfs_search_slot, this looks for a key in the given tree. It uses the | ||
2588 | * current state of the tree together with the operations recorded in the tree | ||
2589 | * modification log to search for the key in a previous version of this tree, as | ||
2590 | * denoted by the time_seq parameter. | ||
2591 | * | ||
2592 | * Naturally, there is no support for insert, delete or cow operations. | ||
2593 | * | ||
2594 | * The resulting path and return value will be set up as if we called | ||
2595 | * btrfs_search_slot at that point in time with ins_len and cow both set to 0. | ||
2596 | */ | ||
2597 | int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, | ||
2598 | struct btrfs_path *p, u64 time_seq) | ||
2599 | { | ||
2600 | struct extent_buffer *b; | ||
2601 | int slot; | ||
2602 | int ret; | ||
2603 | int err; | ||
2604 | int level; | ||
2605 | int lowest_unlock = 1; | ||
2606 | u8 lowest_level = 0; | ||
2607 | |||
2608 | lowest_level = p->lowest_level; | ||
2609 | WARN_ON(p->nodes[0] != NULL); | ||
2610 | |||
2611 | if (p->search_commit_root) { | ||
2612 | BUG_ON(time_seq); | ||
2613 | return btrfs_search_slot(NULL, root, key, p, 0, 0); | ||
2614 | } | ||
2615 | |||
2616 | again: | ||
2617 | b = get_old_root(root, time_seq); | ||
2618 | extent_buffer_get(b); | ||
2619 | level = btrfs_header_level(b); | ||
2620 | btrfs_tree_read_lock(b); | ||
2621 | p->locks[level] = BTRFS_READ_LOCK; | ||
2622 | |||
2623 | while (b) { | ||
2624 | level = btrfs_header_level(b); | ||
2625 | p->nodes[level] = b; | ||
2626 | btrfs_clear_path_blocking(p, NULL, 0); | ||
2627 | |||
2628 | /* | ||
2629 | * we have a lock on b and as long as we aren't changing | ||
2630 | * the tree, there is no way to for the items in b to change. | ||
2631 | * It is safe to drop the lock on our parent before we | ||
2632 | * go through the expensive btree search on b. | ||
2633 | */ | ||
2634 | btrfs_unlock_up_safe(p, level + 1); | ||
2635 | |||
2636 | ret = bin_search(b, key, level, &slot); | ||
2637 | |||
2638 | if (level != 0) { | ||
2639 | int dec = 0; | ||
2640 | if (ret && slot > 0) { | ||
2641 | dec = 1; | ||
2642 | slot -= 1; | ||
2643 | } | ||
2644 | p->slots[level] = slot; | ||
2645 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
2646 | |||
2647 | if (level == lowest_level) { | ||
2648 | if (dec) | ||
2649 | p->slots[level]++; | ||
2650 | goto done; | ||
2651 | } | ||
2652 | |||
2653 | err = read_block_for_search(NULL, root, p, &b, level, | ||
2654 | slot, key, time_seq); | ||
2655 | if (err == -EAGAIN) | ||
2656 | goto again; | ||
2657 | if (err) { | ||
2658 | ret = err; | ||
2659 | goto done; | ||
2660 | } | ||
2661 | |||
2662 | level = btrfs_header_level(b); | ||
2663 | err = btrfs_try_tree_read_lock(b); | ||
2664 | if (!err) { | ||
2665 | btrfs_set_path_blocking(p); | ||
2666 | btrfs_tree_read_lock(b); | ||
2667 | btrfs_clear_path_blocking(p, b, | ||
2668 | BTRFS_READ_LOCK); | ||
2669 | } | ||
2670 | p->locks[level] = BTRFS_READ_LOCK; | ||
2671 | p->nodes[level] = b; | ||
2672 | b = tree_mod_log_rewind(root->fs_info, b, time_seq); | ||
2673 | if (b != p->nodes[level]) { | ||
2674 | btrfs_tree_unlock_rw(p->nodes[level], | ||
2675 | p->locks[level]); | ||
2676 | p->locks[level] = 0; | ||
2677 | p->nodes[level] = b; | ||
2678 | } | ||
2679 | } else { | ||
2680 | p->slots[level] = slot; | ||
2681 | unlock_up(p, level, lowest_unlock, 0, NULL); | ||
2682 | goto done; | ||
2683 | } | ||
2684 | } | ||
2685 | ret = 1; | ||
2686 | done: | ||
2687 | if (!p->leave_spinning) | ||
2688 | btrfs_set_path_blocking(p); | ||
2689 | if (ret < 0) | ||
2690 | btrfs_release_path(p); | ||
2691 | |||
2692 | return ret; | ||
2693 | } | ||
2694 | |||
2695 | /* | ||
1927 | * adjust the pointers going up the tree, starting at level | 2696 | * adjust the pointers going up the tree, starting at level |
1928 | * making sure the right key of each node is points to 'key'. | 2697 | * making sure the right key of each node is points to 'key'. |
1929 | * This is used after shifting pointers to the left, so it stops | 2698 | * This is used after shifting pointers to the left, so it stops |
@@ -1943,6 +2712,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, | |||
1943 | if (!path->nodes[i]) | 2712 | if (!path->nodes[i]) |
1944 | break; | 2713 | break; |
1945 | t = path->nodes[i]; | 2714 | t = path->nodes[i]; |
2715 | tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); | ||
1946 | btrfs_set_node_key(t, key, tslot); | 2716 | btrfs_set_node_key(t, key, tslot); |
1947 | btrfs_mark_buffer_dirty(path->nodes[i]); | 2717 | btrfs_mark_buffer_dirty(path->nodes[i]); |
1948 | if (tslot != 0) | 2718 | if (tslot != 0) |
@@ -2025,12 +2795,16 @@ static int push_node_left(struct btrfs_trans_handle *trans, | |||
2025 | } else | 2795 | } else |
2026 | push_items = min(src_nritems - 8, push_items); | 2796 | push_items = min(src_nritems - 8, push_items); |
2027 | 2797 | ||
2798 | tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0, | ||
2799 | push_items); | ||
2028 | copy_extent_buffer(dst, src, | 2800 | copy_extent_buffer(dst, src, |
2029 | btrfs_node_key_ptr_offset(dst_nritems), | 2801 | btrfs_node_key_ptr_offset(dst_nritems), |
2030 | btrfs_node_key_ptr_offset(0), | 2802 | btrfs_node_key_ptr_offset(0), |
2031 | push_items * sizeof(struct btrfs_key_ptr)); | 2803 | push_items * sizeof(struct btrfs_key_ptr)); |
2032 | 2804 | ||
2033 | if (push_items < src_nritems) { | 2805 | if (push_items < src_nritems) { |
2806 | tree_mod_log_eb_move(root->fs_info, src, 0, push_items, | ||
2807 | src_nritems - push_items); | ||
2034 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), | 2808 | memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), |
2035 | btrfs_node_key_ptr_offset(push_items), | 2809 | btrfs_node_key_ptr_offset(push_items), |
2036 | (src_nritems - push_items) * | 2810 | (src_nritems - push_items) * |
@@ -2084,11 +2858,14 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
2084 | if (max_push < push_items) | 2858 | if (max_push < push_items) |
2085 | push_items = max_push; | 2859 | push_items = max_push; |
2086 | 2860 | ||
2861 | tree_mod_log_eb_move(root->fs_info, dst, push_items, 0, dst_nritems); | ||
2087 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), | 2862 | memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), |
2088 | btrfs_node_key_ptr_offset(0), | 2863 | btrfs_node_key_ptr_offset(0), |
2089 | (dst_nritems) * | 2864 | (dst_nritems) * |
2090 | sizeof(struct btrfs_key_ptr)); | 2865 | sizeof(struct btrfs_key_ptr)); |
2091 | 2866 | ||
2867 | tree_mod_log_eb_copy(root->fs_info, dst, src, 0, | ||
2868 | src_nritems - push_items, push_items); | ||
2092 | copy_extent_buffer(dst, src, | 2869 | copy_extent_buffer(dst, src, |
2093 | btrfs_node_key_ptr_offset(0), | 2870 | btrfs_node_key_ptr_offset(0), |
2094 | btrfs_node_key_ptr_offset(src_nritems - push_items), | 2871 | btrfs_node_key_ptr_offset(src_nritems - push_items), |
@@ -2131,7 +2908,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2131 | 2908 | ||
2132 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 2909 | c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2133 | root->root_key.objectid, &lower_key, | 2910 | root->root_key.objectid, &lower_key, |
2134 | level, root->node->start, 0, 0); | 2911 | level, root->node->start, 0); |
2135 | if (IS_ERR(c)) | 2912 | if (IS_ERR(c)) |
2136 | return PTR_ERR(c); | 2913 | return PTR_ERR(c); |
2137 | 2914 | ||
@@ -2163,6 +2940,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2163 | btrfs_mark_buffer_dirty(c); | 2940 | btrfs_mark_buffer_dirty(c); |
2164 | 2941 | ||
2165 | old = root->node; | 2942 | old = root->node; |
2943 | tree_mod_log_set_root_pointer(root, c); | ||
2166 | rcu_assign_pointer(root->node, c); | 2944 | rcu_assign_pointer(root->node, c); |
2167 | 2945 | ||
2168 | /* the super has an extra ref to root->node */ | 2946 | /* the super has an extra ref to root->node */ |
@@ -2186,10 +2964,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2186 | static void insert_ptr(struct btrfs_trans_handle *trans, | 2964 | static void insert_ptr(struct btrfs_trans_handle *trans, |
2187 | struct btrfs_root *root, struct btrfs_path *path, | 2965 | struct btrfs_root *root, struct btrfs_path *path, |
2188 | struct btrfs_disk_key *key, u64 bytenr, | 2966 | struct btrfs_disk_key *key, u64 bytenr, |
2189 | int slot, int level) | 2967 | int slot, int level, int tree_mod_log) |
2190 | { | 2968 | { |
2191 | struct extent_buffer *lower; | 2969 | struct extent_buffer *lower; |
2192 | int nritems; | 2970 | int nritems; |
2971 | int ret; | ||
2193 | 2972 | ||
2194 | BUG_ON(!path->nodes[level]); | 2973 | BUG_ON(!path->nodes[level]); |
2195 | btrfs_assert_tree_locked(path->nodes[level]); | 2974 | btrfs_assert_tree_locked(path->nodes[level]); |
@@ -2198,11 +2977,19 @@ static void insert_ptr(struct btrfs_trans_handle *trans, | |||
2198 | BUG_ON(slot > nritems); | 2977 | BUG_ON(slot > nritems); |
2199 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); | 2978 | BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); |
2200 | if (slot != nritems) { | 2979 | if (slot != nritems) { |
2980 | if (tree_mod_log && level) | ||
2981 | tree_mod_log_eb_move(root->fs_info, lower, slot + 1, | ||
2982 | slot, nritems - slot); | ||
2201 | memmove_extent_buffer(lower, | 2983 | memmove_extent_buffer(lower, |
2202 | btrfs_node_key_ptr_offset(slot + 1), | 2984 | btrfs_node_key_ptr_offset(slot + 1), |
2203 | btrfs_node_key_ptr_offset(slot), | 2985 | btrfs_node_key_ptr_offset(slot), |
2204 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); | 2986 | (nritems - slot) * sizeof(struct btrfs_key_ptr)); |
2205 | } | 2987 | } |
2988 | if (tree_mod_log && level) { | ||
2989 | ret = tree_mod_log_insert_key(root->fs_info, lower, slot, | ||
2990 | MOD_LOG_KEY_ADD); | ||
2991 | BUG_ON(ret < 0); | ||
2992 | } | ||
2206 | btrfs_set_node_key(lower, key, slot); | 2993 | btrfs_set_node_key(lower, key, slot); |
2207 | btrfs_set_node_blockptr(lower, slot, bytenr); | 2994 | btrfs_set_node_blockptr(lower, slot, bytenr); |
2208 | WARN_ON(trans->transid == 0); | 2995 | WARN_ON(trans->transid == 0); |
@@ -2254,7 +3041,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2254 | 3041 | ||
2255 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, | 3042 | split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, |
2256 | root->root_key.objectid, | 3043 | root->root_key.objectid, |
2257 | &disk_key, level, c->start, 0, 0); | 3044 | &disk_key, level, c->start, 0); |
2258 | if (IS_ERR(split)) | 3045 | if (IS_ERR(split)) |
2259 | return PTR_ERR(split); | 3046 | return PTR_ERR(split); |
2260 | 3047 | ||
@@ -2273,7 +3060,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2273 | (unsigned long)btrfs_header_chunk_tree_uuid(split), | 3060 | (unsigned long)btrfs_header_chunk_tree_uuid(split), |
2274 | BTRFS_UUID_SIZE); | 3061 | BTRFS_UUID_SIZE); |
2275 | 3062 | ||
2276 | 3063 | tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid); | |
2277 | copy_extent_buffer(split, c, | 3064 | copy_extent_buffer(split, c, |
2278 | btrfs_node_key_ptr_offset(0), | 3065 | btrfs_node_key_ptr_offset(0), |
2279 | btrfs_node_key_ptr_offset(mid), | 3066 | btrfs_node_key_ptr_offset(mid), |
@@ -2286,7 +3073,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2286 | btrfs_mark_buffer_dirty(split); | 3073 | btrfs_mark_buffer_dirty(split); |
2287 | 3074 | ||
2288 | insert_ptr(trans, root, path, &disk_key, split->start, | 3075 | insert_ptr(trans, root, path, &disk_key, split->start, |
2289 | path->slots[level + 1] + 1, level + 1); | 3076 | path->slots[level + 1] + 1, level + 1, 1); |
2290 | 3077 | ||
2291 | if (path->slots[level] >= mid) { | 3078 | if (path->slots[level] >= mid) { |
2292 | path->slots[level] -= mid; | 3079 | path->slots[level] -= mid; |
@@ -2823,7 +3610,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, | |||
2823 | btrfs_set_header_nritems(l, mid); | 3610 | btrfs_set_header_nritems(l, mid); |
2824 | btrfs_item_key(right, &disk_key, 0); | 3611 | btrfs_item_key(right, &disk_key, 0); |
2825 | insert_ptr(trans, root, path, &disk_key, right->start, | 3612 | insert_ptr(trans, root, path, &disk_key, right->start, |
2826 | path->slots[1] + 1, 1); | 3613 | path->slots[1] + 1, 1, 0); |
2827 | 3614 | ||
2828 | btrfs_mark_buffer_dirty(right); | 3615 | btrfs_mark_buffer_dirty(right); |
2829 | btrfs_mark_buffer_dirty(l); | 3616 | btrfs_mark_buffer_dirty(l); |
@@ -3006,7 +3793,7 @@ again: | |||
3006 | 3793 | ||
3007 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 3794 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
3008 | root->root_key.objectid, | 3795 | root->root_key.objectid, |
3009 | &disk_key, 0, l->start, 0, 0); | 3796 | &disk_key, 0, l->start, 0); |
3010 | if (IS_ERR(right)) | 3797 | if (IS_ERR(right)) |
3011 | return PTR_ERR(right); | 3798 | return PTR_ERR(right); |
3012 | 3799 | ||
@@ -3030,7 +3817,7 @@ again: | |||
3030 | if (mid <= slot) { | 3817 | if (mid <= slot) { |
3031 | btrfs_set_header_nritems(right, 0); | 3818 | btrfs_set_header_nritems(right, 0); |
3032 | insert_ptr(trans, root, path, &disk_key, right->start, | 3819 | insert_ptr(trans, root, path, &disk_key, right->start, |
3033 | path->slots[1] + 1, 1); | 3820 | path->slots[1] + 1, 1, 0); |
3034 | btrfs_tree_unlock(path->nodes[0]); | 3821 | btrfs_tree_unlock(path->nodes[0]); |
3035 | free_extent_buffer(path->nodes[0]); | 3822 | free_extent_buffer(path->nodes[0]); |
3036 | path->nodes[0] = right; | 3823 | path->nodes[0] = right; |
@@ -3039,7 +3826,7 @@ again: | |||
3039 | } else { | 3826 | } else { |
3040 | btrfs_set_header_nritems(right, 0); | 3827 | btrfs_set_header_nritems(right, 0); |
3041 | insert_ptr(trans, root, path, &disk_key, right->start, | 3828 | insert_ptr(trans, root, path, &disk_key, right->start, |
3042 | path->slots[1], 1); | 3829 | path->slots[1], 1, 0); |
3043 | btrfs_tree_unlock(path->nodes[0]); | 3830 | btrfs_tree_unlock(path->nodes[0]); |
3044 | free_extent_buffer(path->nodes[0]); | 3831 | free_extent_buffer(path->nodes[0]); |
3045 | path->nodes[0] = right; | 3832 | path->nodes[0] = right; |
@@ -3751,19 +4538,29 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
3751 | * empty a node. | 4538 | * empty a node. |
3752 | */ | 4539 | */ |
3753 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 4540 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
3754 | struct btrfs_path *path, int level, int slot) | 4541 | struct btrfs_path *path, int level, int slot, |
4542 | int tree_mod_log) | ||
3755 | { | 4543 | { |
3756 | struct extent_buffer *parent = path->nodes[level]; | 4544 | struct extent_buffer *parent = path->nodes[level]; |
3757 | u32 nritems; | 4545 | u32 nritems; |
4546 | int ret; | ||
3758 | 4547 | ||
3759 | nritems = btrfs_header_nritems(parent); | 4548 | nritems = btrfs_header_nritems(parent); |
3760 | if (slot != nritems - 1) { | 4549 | if (slot != nritems - 1) { |
4550 | if (tree_mod_log && level) | ||
4551 | tree_mod_log_eb_move(root->fs_info, parent, slot, | ||
4552 | slot + 1, nritems - slot - 1); | ||
3761 | memmove_extent_buffer(parent, | 4553 | memmove_extent_buffer(parent, |
3762 | btrfs_node_key_ptr_offset(slot), | 4554 | btrfs_node_key_ptr_offset(slot), |
3763 | btrfs_node_key_ptr_offset(slot + 1), | 4555 | btrfs_node_key_ptr_offset(slot + 1), |
3764 | sizeof(struct btrfs_key_ptr) * | 4556 | sizeof(struct btrfs_key_ptr) * |
3765 | (nritems - slot - 1)); | 4557 | (nritems - slot - 1)); |
4558 | } else if (tree_mod_log && level) { | ||
4559 | ret = tree_mod_log_insert_key(root->fs_info, parent, slot, | ||
4560 | MOD_LOG_KEY_REMOVE); | ||
4561 | BUG_ON(ret < 0); | ||
3766 | } | 4562 | } |
4563 | |||
3767 | nritems--; | 4564 | nritems--; |
3768 | btrfs_set_header_nritems(parent, nritems); | 4565 | btrfs_set_header_nritems(parent, nritems); |
3769 | if (nritems == 0 && parent == root->node) { | 4566 | if (nritems == 0 && parent == root->node) { |
@@ -3795,7 +4592,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3795 | struct extent_buffer *leaf) | 4592 | struct extent_buffer *leaf) |
3796 | { | 4593 | { |
3797 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); | 4594 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); |
3798 | del_ptr(trans, root, path, 1, path->slots[1]); | 4595 | del_ptr(trans, root, path, 1, path->slots[1], 1); |
3799 | 4596 | ||
3800 | /* | 4597 | /* |
3801 | * btrfs_free_extent is expensive, we want to make sure we | 4598 | * btrfs_free_extent is expensive, we want to make sure we |
@@ -3806,7 +4603,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3806 | root_sub_used(root, leaf->len); | 4603 | root_sub_used(root, leaf->len); |
3807 | 4604 | ||
3808 | extent_buffer_get(leaf); | 4605 | extent_buffer_get(leaf); |
3809 | btrfs_free_tree_block(trans, root, leaf, 0, 1, 0); | 4606 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
3810 | free_extent_buffer_stale(leaf); | 4607 | free_extent_buffer_stale(leaf); |
3811 | } | 4608 | } |
3812 | /* | 4609 | /* |
@@ -4273,7 +5070,7 @@ again: | |||
4273 | next = c; | 5070 | next = c; |
4274 | next_rw_lock = path->locks[level]; | 5071 | next_rw_lock = path->locks[level]; |
4275 | ret = read_block_for_search(NULL, root, path, &next, level, | 5072 | ret = read_block_for_search(NULL, root, path, &next, level, |
4276 | slot, &key); | 5073 | slot, &key, 0); |
4277 | if (ret == -EAGAIN) | 5074 | if (ret == -EAGAIN) |
4278 | goto again; | 5075 | goto again; |
4279 | 5076 | ||
@@ -4310,7 +5107,7 @@ again: | |||
4310 | break; | 5107 | break; |
4311 | 5108 | ||
4312 | ret = read_block_for_search(NULL, root, path, &next, level, | 5109 | ret = read_block_for_search(NULL, root, path, &next, level, |
4313 | 0, &key); | 5110 | 0, &key, 0); |
4314 | if (ret == -EAGAIN) | 5111 | if (ret == -EAGAIN) |
4315 | goto again; | 5112 | goto again; |
4316 | 5113 | ||