aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-12-11 12:42:00 -0500
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:03:58 -0400
commitd8d5f3e16d1ae4fe9b93312e083f2c04a95520f0 (patch)
tree3327d76311a31a9345a3019534bb043ff14d204f
parent7bb86316c3961d1bc401ef184fd996f999556c7f (diff)
Btrfs: Add lowest key information to back refs for extent tree blocks as well.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c102
-rw-r--r--fs/btrfs/file.c11
-rw-r--r--fs/btrfs/inode.c14
4 files changed, 110 insertions, 19 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0ac21e3aac87..60a30da6af00 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping,
210{ 210{
211 struct extent_map_tree *tree; 211 struct extent_map_tree *tree;
212 tree = &BTRFS_I(mapping->host)->extent_tree; 212 tree = &BTRFS_I(mapping->host)->extent_tree;
213 if (0 && wbc->sync_mode == WB_SYNC_NONE) { 213 if (wbc->sync_mode == WB_SYNC_NONE) {
214 u64 num_dirty; 214 u64 num_dirty;
215 u64 start = 0; 215 u64 start = 0;
216 unsigned long thresh = 96 * 1024 * 1024; 216 unsigned long thresh = 96 * 1024 * 1024;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 32991f73e9db..187be4012474 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -457,6 +457,94 @@ out:
457 return ret; 457 return ret;
458} 458}
459 459
460/*
461 * Back reference rules. Back refs have three main goals:
462 *
463 * 1) differentiate between all holders of references to an extent so that
464 * when a reference is dropped we can make sure it was a valid reference
465 * before freeing the extent.
466 *
467 * 2) Provide enough information to quickly find the holders of an extent
468 * if we notice a given block is corrupted or bad.
469 *
470 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
471 * maintenance. This is actually the same as #2, but with a slightly
472 * different use case.
473 *
474 * File extents can be referenced by:
475 *
476 * - multiple snapshots, subvolumes, or different generations in one subvol
477 * - different files inside a single subvolume (in theory, not implemented yet)
478 * - different offsets inside a file (bookend extents in file.c)
479 *
480 * The extent ref structure has fields for:
481 *
482 * - Objectid of the subvolume root
483 * - Generation number of the tree holding the reference
484 * - objectid of the file holding the reference
485 * - offset in the file corresponding to the key holding the reference
486 *
487 * When a file extent is allocated the fields are filled in:
488 * (root_key.objectid, trans->transid, inode objectid, offset in file)
489 *
490 * When a leaf is cow'd new references are added for every file extent found
491 * in the leaf. It looks the same as the create case, but trans->transid
492 * will be different when the block is cow'd.
493 *
494 * (root_key.objectid, trans->transid, inode objectid, offset in file)
495 *
496 * When a file extent is removed either during snapshot deletion or file
497 * truncation, the corresponding back reference is found
498 * by searching for:
499 *
500 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
501 * inode objectid, offset in file)
502 *
503 * Btree extents can be referenced by:
504 *
505 * - Different subvolumes
506 * - Different generations of the same subvolume
507 *
508 * Storing sufficient information for a full reverse mapping of a btree
509 * block would require storing the lowest key of the block in the backref,
510 * and it would require updating that lowest key either before write out or
511 * every time it changed. Instead, the objectid of the lowest key is stored
512 * along with the level of the tree block. This provides a hint
513 * about where in the btree the block can be found. Searches through the
514 * btree only need to look for a pointer to that block, so they stop one
515 * level higher than the level recorded in the backref.
516 *
517 * Some btrees do not do reference counting on their extents. These
518 * include the extent tree and the tree of tree roots. Backrefs for these
519 * trees always have a generation of zero.
520 *
521 * When a tree block is created, back references are inserted:
522 *
523 * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level)
524 *
525 * When a tree block is cow'd in a reference counted root,
526 * new back references are added for all the blocks it points to.
527 * These are of the form (trans->transid will have increased since creation):
528 *
529 * (root->root_key.objectid, trans->transid, lowest_key_objectid, level)
530 *
531 * Because the lowest_key_objectid and the level are just hints
532 * they are not used when backrefs are deleted. When a backref is deleted:
533 *
534 * if backref was for a tree root:
535 * root_objectid = root->root_key.objectid
536 * else
537 * root_objectid = btrfs_header_owner(parent)
538 *
539 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
540 *
541 * Back Reference Key hashing:
542 *
543 * Back references have four fields, each 64 bits long. Unfortunately,
544 * This is hashed into a single 64 bit number and placed into the key offset.
545 * The key objectid corresponds to the first byte in the extent, and the
546 * key type is set to BTRFS_EXTENT_REF_KEY
547 */
460int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, 548int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
461 struct btrfs_root *root, 549 struct btrfs_root *root,
462 struct btrfs_path *path, u64 bytenr, 550 struct btrfs_path *path, u64 bytenr,
@@ -939,10 +1027,13 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
939 u64 start; 1027 u64 start;
940 u64 end; 1028 u64 end;
941 struct btrfs_fs_info *info = extent_root->fs_info; 1029 struct btrfs_fs_info *info = extent_root->fs_info;
1030 struct extent_buffer *eb;
942 struct btrfs_path *path; 1031 struct btrfs_path *path;
943 struct btrfs_key ins; 1032 struct btrfs_key ins;
1033 struct btrfs_disk_key first;
944 struct btrfs_extent_item extent_item; 1034 struct btrfs_extent_item extent_item;
945 int ret; 1035 int ret;
1036 int level;
946 int err = 0; 1037 int err = 0;
947 1038
948 btrfs_set_stack_extent_refs(&extent_item, 1); 1039 btrfs_set_stack_extent_refs(&extent_item, 1);
@@ -961,10 +1052,19 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
961 &extent_item, sizeof(extent_item)); 1052 &extent_item, sizeof(extent_item));
962 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, 1053 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
963 GFP_NOFS); 1054 GFP_NOFS);
1055 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1056 level = btrfs_header_level(eb);
1057 if (level == 0) {
1058 btrfs_item_key(eb, &first, 0);
1059 } else {
1060 btrfs_node_key(eb, &first, 0);
1061 }
964 err = btrfs_insert_extent_backref(trans, extent_root, path, 1062 err = btrfs_insert_extent_backref(trans, extent_root, path,
965 start, extent_root->root_key.objectid, 1063 start, extent_root->root_key.objectid,
966 0, 0, 0); 1064 0, btrfs_disk_key_objectid(&first),
1065 level);
967 BUG_ON(err); 1066 BUG_ON(err);
1067 free_extent_buffer(eb);
968 } 1068 }
969 btrfs_free_path(path); 1069 btrfs_free_path(path);
970 return 0; 1070 return 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5b1f90f06e03..1cc4d285951c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -545,13 +545,10 @@ next_slot:
545 u64 disk_num_bytes = 0; 545 u64 disk_num_bytes = 0;
546 u64 extent_num_bytes = 0; 546 u64 extent_num_bytes = 0;
547 u64 root_gen; 547 u64 root_gen;
548 u64 root_owner;
548 549
549 if (leaf != root->node) { 550 root_gen = btrfs_header_generation(leaf);
550 root_gen = 551 root_owner = btrfs_header_owner(leaf);
551 btrfs_header_generation(path->nodes[1]);
552 } else {
553 root_gen = btrfs_header_generation(leaf);
554 }
555 if (found_extent) { 552 if (found_extent) {
556 disk_bytenr = 553 disk_bytenr =
557 btrfs_file_extent_disk_bytenr(leaf, 554 btrfs_file_extent_disk_bytenr(leaf,
@@ -575,7 +572,7 @@ next_slot:
575 ret = btrfs_free_extent(trans, root, 572 ret = btrfs_free_extent(trans, root,
576 disk_bytenr, 573 disk_bytenr,
577 disk_num_bytes, 574 disk_num_bytes,
578 root->root_key.objectid, 575 root_owner,
579 root_gen, inode->i_ino, 576 root_gen, inode->i_ino,
580 key.offset, 0); 577 key.offset, 0);
581 } 578 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bb70db0c9df4..03fea037667e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -563,6 +563,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
563 u64 extent_num_bytes = 0; 563 u64 extent_num_bytes = 0;
564 u64 item_end = 0; 564 u64 item_end = 0;
565 u64 root_gen = 0; 565 u64 root_gen = 0;
566 u64 root_owner = 0;
566 int found_extent; 567 int found_extent;
567 int del_item; 568 int del_item;
568 int extent_type = -1; 569 int extent_type = -1;
@@ -673,15 +674,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
673 found_extent = 1; 674 found_extent = 1;
674 inode->i_blocks -= num_dec; 675 inode->i_blocks -= num_dec;
675 } 676 }
676 if (leaf == root->node) { 677 root_gen = btrfs_header_generation(leaf);
677 root_gen = 678 root_owner = btrfs_header_owner(leaf);
678 btrfs_header_generation(leaf);
679 } else {
680 struct extent_buffer *parent;
681 parent = path->nodes[1];
682 root_gen =
683 btrfs_header_generation(parent);
684 }
685 } 679 }
686 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE && 680 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE &&
687 !del_item) { 681 !del_item) {
@@ -703,7 +697,7 @@ delete:
703 if (found_extent) { 697 if (found_extent) {
704 ret = btrfs_free_extent(trans, root, extent_start, 698 ret = btrfs_free_extent(trans, root, extent_start,
705 extent_num_bytes, 699 extent_num_bytes,
706 root->root_key.objectid, 700 root_owner,
707 root_gen, inode->i_ino, 701 root_gen, inode->i_ino,
708 found_key.offset, 0); 702 found_key.offset, 0);
709 BUG_ON(ret); 703 BUG_ON(ret);