aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJan Schmidt <list.btrfs@jan-o-sch.net>2012-05-16 11:18:50 -0400
committerJan Schmidt <list.btrfs@jan-o-sch.net>2012-05-30 09:17:01 -0400
commitbd989ba359f2acb8bc5f5490e19010fc0a6f8356 (patch)
tree3689629e226678802bfe9b8a91ae560ef50a93f0 /fs
parentf29021b29a85701c08afadfd51d87163fb078059 (diff)
Btrfs: add tree modification log functions
The tree mod log will log modifications made fs-tree nodes. Most modifications are done by autobalance of the tree. Such changes are recorded as long as a block entry exists. When released, the log is cleaned. With the tree modification log, it's possible to reconstruct a consistent old state of the tree. This is required to do backref walking on a busy file system. Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.c408
-rw-r--r--fs/btrfs/ctree.h5
2 files changed, 412 insertions, 1 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 56485b3b7c3..72b9f97e2fd 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/rbtree.h>
21#include "ctree.h" 22#include "ctree.h"
22#include "disk-io.h" 23#include "disk-io.h"
23#include "transaction.h" 24#include "transaction.h"
@@ -288,6 +289,412 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
288 return 0; 289 return 0;
289} 290}
290 291
292enum mod_log_op {
293 MOD_LOG_KEY_REPLACE,
294 MOD_LOG_KEY_ADD,
295 MOD_LOG_KEY_REMOVE,
296 MOD_LOG_KEY_REMOVE_WHILE_FREEING,
297 MOD_LOG_KEY_REMOVE_WHILE_MOVING,
298 MOD_LOG_MOVE_KEYS,
299 MOD_LOG_ROOT_REPLACE,
300};
301
302struct tree_mod_move {
303 int dst_slot;
304 int nr_items;
305};
306
307struct tree_mod_root {
308 u64 logical;
309 u8 level;
310};
311
312struct tree_mod_elem {
313 struct rb_node node;
314 u64 index; /* shifted logical */
315 struct seq_list elem;
316 enum mod_log_op op;
317
318 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
319 int slot;
320
321 /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
322 u64 generation;
323
324 /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
325 struct btrfs_disk_key key;
326 u64 blockptr;
327
328 /* this is used for op == MOD_LOG_MOVE_KEYS */
329 struct tree_mod_move move;
330
331 /* this is used for op == MOD_LOG_ROOT_REPLACE */
332 struct tree_mod_root old_root;
333};
334
335static inline void
336__get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem)
337{
338 elem->seq = atomic_inc_return(&fs_info->tree_mod_seq);
339 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
340}
341
342void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
343 struct seq_list *elem)
344{
345 elem->flags = 1;
346 spin_lock(&fs_info->tree_mod_seq_lock);
347 __get_tree_mod_seq(fs_info, elem);
348 spin_unlock(&fs_info->tree_mod_seq_lock);
349}
350
351void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
352 struct seq_list *elem)
353{
354 struct rb_root *tm_root;
355 struct rb_node *node;
356 struct rb_node *next;
357 struct seq_list *cur_elem;
358 struct tree_mod_elem *tm;
359 u64 min_seq = (u64)-1;
360 u64 seq_putting = elem->seq;
361
362 if (!seq_putting)
363 return;
364
365 BUG_ON(!(elem->flags & 1));
366 spin_lock(&fs_info->tree_mod_seq_lock);
367 list_del(&elem->list);
368
369 list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
370 if ((cur_elem->flags & 1) && cur_elem->seq < min_seq) {
371 if (seq_putting > cur_elem->seq) {
372 /*
373 * blocker with lower sequence number exists, we
374 * cannot remove anything from the log
375 */
376 goto out;
377 }
378 min_seq = cur_elem->seq;
379 }
380 }
381
382 /*
383 * anything that's lower than the lowest existing (read: blocked)
384 * sequence number can be removed from the tree.
385 */
386 write_lock(&fs_info->tree_mod_log_lock);
387 tm_root = &fs_info->tree_mod_log;
388 for (node = rb_first(tm_root); node; node = next) {
389 next = rb_next(node);
390 tm = container_of(node, struct tree_mod_elem, node);
391 if (tm->elem.seq > min_seq)
392 continue;
393 rb_erase(node, tm_root);
394 list_del(&tm->elem.list);
395 kfree(tm);
396 }
397 write_unlock(&fs_info->tree_mod_log_lock);
398out:
399 spin_unlock(&fs_info->tree_mod_seq_lock);
400}
401
402/*
403 * key order of the log:
404 * index -> sequence
405 *
406 * the index is the shifted logical of the *new* root node for root replace
407 * operations, or the shifted logical of the affected block for all other
408 * operations.
409 */
410static noinline int
411__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
412{
413 struct rb_root *tm_root;
414 struct rb_node **new;
415 struct rb_node *parent = NULL;
416 struct tree_mod_elem *cur;
417 int ret = 0;
418
419 BUG_ON(!tm || !tm->elem.seq);
420
421 write_lock(&fs_info->tree_mod_log_lock);
422 tm_root = &fs_info->tree_mod_log;
423 new = &tm_root->rb_node;
424 while (*new) {
425 cur = container_of(*new, struct tree_mod_elem, node);
426 parent = *new;
427 if (cur->index < tm->index)
428 new = &((*new)->rb_left);
429 else if (cur->index > tm->index)
430 new = &((*new)->rb_right);
431 else if (cur->elem.seq < tm->elem.seq)
432 new = &((*new)->rb_left);
433 else if (cur->elem.seq > tm->elem.seq)
434 new = &((*new)->rb_right);
435 else {
436 kfree(tm);
437 ret = -EEXIST;
438 goto unlock;
439 }
440 }
441
442 rb_link_node(&tm->node, parent, new);
443 rb_insert_color(&tm->node, tm_root);
444unlock:
445 write_unlock(&fs_info->tree_mod_log_lock);
446 return ret;
447}
448
449int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
450 struct tree_mod_elem **tm_ret)
451{
452 struct tree_mod_elem *tm;
453 u64 seq = 0;
454
455 smp_mb();
456 if (list_empty(&fs_info->tree_mod_seq_list))
457 return 0;
458
459 tm = *tm_ret = kzalloc(sizeof(*tm), flags);
460 if (!tm)
461 return -ENOMEM;
462
463 __get_tree_mod_seq(fs_info, &tm->elem);
464 seq = tm->elem.seq;
465 tm->elem.flags = 0;
466
467 return seq;
468}
469
470static noinline int
471tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
472 struct extent_buffer *eb, int slot,
473 enum mod_log_op op, gfp_t flags)
474{
475 struct tree_mod_elem *tm;
476 int ret;
477
478 ret = tree_mod_alloc(fs_info, flags, &tm);
479 if (ret <= 0)
480 return ret;
481
482 tm->index = eb->start >> PAGE_CACHE_SHIFT;
483 if (op != MOD_LOG_KEY_ADD) {
484 btrfs_node_key(eb, &tm->key, slot);
485 tm->blockptr = btrfs_node_blockptr(eb, slot);
486 }
487 tm->op = op;
488 tm->slot = slot;
489 tm->generation = btrfs_node_ptr_generation(eb, slot);
490
491 return __tree_mod_log_insert(fs_info, tm);
492}
493
494static noinline int
495tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
496 int slot, enum mod_log_op op)
497{
498 return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
499}
500
501static noinline int
502tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
503 struct extent_buffer *eb, int dst_slot, int src_slot,
504 int nr_items, gfp_t flags)
505{
506 struct tree_mod_elem *tm;
507 int ret;
508 int i;
509
510 ret = tree_mod_alloc(fs_info, flags, &tm);
511 if (ret <= 0)
512 return ret;
513
514 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
515 ret = tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
516 MOD_LOG_KEY_REMOVE_WHILE_MOVING);
517 BUG_ON(ret < 0);
518 }
519
520 tm->index = eb->start >> PAGE_CACHE_SHIFT;
521 tm->slot = src_slot;
522 tm->move.dst_slot = dst_slot;
523 tm->move.nr_items = nr_items;
524 tm->op = MOD_LOG_MOVE_KEYS;
525
526 return __tree_mod_log_insert(fs_info, tm);
527}
528
529static noinline int
530tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
531 struct extent_buffer *old_root,
532 struct extent_buffer *new_root, gfp_t flags)
533{
534 struct tree_mod_elem *tm;
535 int ret;
536
537 ret = tree_mod_alloc(fs_info, flags, &tm);
538 if (ret <= 0)
539 return ret;
540
541 tm->index = new_root->start >> PAGE_CACHE_SHIFT;
542 tm->old_root.logical = old_root->start;
543 tm->old_root.level = btrfs_header_level(old_root);
544 tm->generation = btrfs_header_generation(old_root);
545 tm->op = MOD_LOG_ROOT_REPLACE;
546
547 return __tree_mod_log_insert(fs_info, tm);
548}
549
550static struct tree_mod_elem *
551__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
552 int smallest)
553{
554 struct rb_root *tm_root;
555 struct rb_node *node;
556 struct tree_mod_elem *cur = NULL;
557 struct tree_mod_elem *found = NULL;
558 u64 index = start >> PAGE_CACHE_SHIFT;
559
560 read_lock(&fs_info->tree_mod_log_lock);
561 tm_root = &fs_info->tree_mod_log;
562 node = tm_root->rb_node;
563 while (node) {
564 cur = container_of(node, struct tree_mod_elem, node);
565 if (cur->index < index) {
566 node = node->rb_left;
567 } else if (cur->index > index) {
568 node = node->rb_right;
569 } else if (cur->elem.seq < min_seq) {
570 node = node->rb_left;
571 } else if (!smallest) {
572 /* we want the node with the highest seq */
573 if (found)
574 BUG_ON(found->elem.seq > cur->elem.seq);
575 found = cur;
576 node = node->rb_left;
577 } else if (cur->elem.seq > min_seq) {
578 /* we want the node with the smallest seq */
579 if (found)
580 BUG_ON(found->elem.seq < cur->elem.seq);
581 found = cur;
582 node = node->rb_right;
583 } else {
584 found = cur;
585 break;
586 }
587 }
588 read_unlock(&fs_info->tree_mod_log_lock);
589
590 return found;
591}
592
593/*
594 * this returns the element from the log with the smallest time sequence
595 * value that's in the log (the oldest log item). any element with a time
596 * sequence lower than min_seq will be ignored.
597 */
598static struct tree_mod_elem *
599tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
600 u64 min_seq)
601{
602 return __tree_mod_log_search(fs_info, start, min_seq, 1);
603}
604
605/*
606 * this returns the element from the log with the largest time sequence
607 * value that's in the log (the most recent log item). any element with
608 * a time sequence lower than min_seq will be ignored.
609 */
610static struct tree_mod_elem *
611tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
612{
613 return __tree_mod_log_search(fs_info, start, min_seq, 0);
614}
615
616static inline void
617tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
618 struct extent_buffer *src, unsigned long dst_offset,
619 unsigned long src_offset, int nr_items)
620{
621 int ret;
622 int i;
623
624 smp_mb();
625 if (list_empty(&fs_info->tree_mod_seq_list))
626 return;
627
628 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
629 return;
630
631 /* speed this up by single seq for all operations? */
632 for (i = 0; i < nr_items; i++) {
633 ret = tree_mod_log_insert_key(fs_info, src, i + src_offset,
634 MOD_LOG_KEY_REMOVE);
635 BUG_ON(ret < 0);
636 ret = tree_mod_log_insert_key(fs_info, dst, i + dst_offset,
637 MOD_LOG_KEY_ADD);
638 BUG_ON(ret < 0);
639 }
640}
641
642static inline void
643tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
644 int dst_offset, int src_offset, int nr_items)
645{
646 int ret;
647 ret = tree_mod_log_insert_move(fs_info, dst, dst_offset, src_offset,
648 nr_items, GFP_NOFS);
649 BUG_ON(ret < 0);
650}
651
652static inline void
653tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
654 struct extent_buffer *eb,
655 struct btrfs_disk_key *disk_key, int slot, int atomic)
656{
657 int ret;
658
659 ret = tree_mod_log_insert_key_mask(fs_info, eb, slot,
660 MOD_LOG_KEY_REPLACE,
661 atomic ? GFP_ATOMIC : GFP_NOFS);
662 BUG_ON(ret < 0);
663}
664
665static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
666 struct extent_buffer *eb)
667{
668 int i;
669 int ret;
670 u32 nritems;
671
672 smp_mb();
673 if (list_empty(&fs_info->tree_mod_seq_list))
674 return;
675
676 if (btrfs_header_level(eb) == 0)
677 return;
678
679 nritems = btrfs_header_nritems(eb);
680 for (i = nritems - 1; i >= 0; i--) {
681 ret = tree_mod_log_insert_key(fs_info, eb, i,
682 MOD_LOG_KEY_REMOVE_WHILE_FREEING);
683 BUG_ON(ret < 0);
684 }
685}
686
687static inline void
688tree_mod_log_set_root_pointer(struct btrfs_root *root,
689 struct extent_buffer *new_root_node)
690{
691 int ret;
692 tree_mod_log_free_eb(root->fs_info, root->node);
693 ret = tree_mod_log_insert_root(root->fs_info, root->node,
694 new_root_node, GFP_NOFS);
695 BUG_ON(ret < 0);
696}
697
291/* 698/*
292 * check if the tree block can be shared by multiple trees 699 * check if the tree block can be shared by multiple trees
293 */ 700 */
@@ -2271,7 +2678,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
2271 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2678 (unsigned long)btrfs_header_chunk_tree_uuid(split),
2272 BTRFS_UUID_SIZE); 2679 BTRFS_UUID_SIZE);
2273 2680
2274
2275 copy_extent_buffer(split, c, 2681 copy_extent_buffer(split, c,
2276 btrfs_node_key_ptr_offset(0), 2682 btrfs_node_key_ptr_offset(0),
2277 btrfs_node_key_ptr_offset(mid), 2683 btrfs_node_key_ptr_offset(mid),
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 01639e10004..e53bfb9b391 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3114,4 +3114,9 @@ struct seq_list {
3114 u32 flags; 3114 u32 flags;
3115}; 3115};
3116 3116
3117void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
3118 struct seq_list *elem);
3119void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
3120 struct seq_list *elem);
3121
3117#endif 3122#endif