aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-12-11 12:42:00 -0500
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:03:58 -0400
commitd8d5f3e16d1ae4fe9b93312e083f2c04a95520f0 (patch)
tree3327d76311a31a9345a3019534bb043ff14d204f /fs/btrfs/extent-tree.c
parent7bb86316c3961d1bc401ef184fd996f999556c7f (diff)
Btrfs: Add lowest key information to back refs for extent tree blocks as well.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c102
1 files changed, 101 insertions, 1 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 32991f73e9db..187be4012474 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -457,6 +457,94 @@ out:
457 return ret; 457 return ret;
458} 458}
459 459
460/*
461 * Back reference rules. Back refs have three main goals:
462 *
463 * 1) differentiate between all holders of references to an extent so that
464 * when a reference is dropped we can make sure it was a valid reference
465 * before freeing the extent.
466 *
467 * 2) Provide enough information to quickly find the holders of an extent
468 * if we notice a given block is corrupted or bad.
469 *
470 * 3) Make it easy to migrate blocks for FS shrinking or storage pool
471 * maintenance. This is actually the same as #2, but with a slightly
472 * different use case.
473 *
474 * File extents can be referenced by:
475 *
476 * - multiple snapshots, subvolumes, or different generations in one subvol
477 * - different files inside a single subvolume (in theory, not implemented yet)
478 * - different offsets inside a file (bookend extents in file.c)
479 *
480 * The extent ref structure has fields for:
481 *
482 * - Objectid of the subvolume root
483 * - Generation number of the tree holding the reference
484 * - objectid of the file holding the reference
485 * - offset in the file corresponding to the key holding the reference
486 *
487 * When a file extent is allocated the fields are filled in:
488 * (root_key.objectid, trans->transid, inode objectid, offset in file)
489 *
490 * When a leaf is cow'd new references are added for every file extent found
491 * in the leaf. It looks the same as the create case, but trans->transid
492 * will be different when the block is cow'd.
493 *
494 * (root_key.objectid, trans->transid, inode objectid, offset in file)
495 *
496 * When a file extent is removed either during snapshot deletion or file
497 * truncation, the corresponding back reference is found
498 * by searching for:
499 *
500 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
501 * inode objectid, offset in file)
502 *
503 * Btree extents can be referenced by:
504 *
505 * - Different subvolumes
506 * - Different generations of the same subvolume
507 *
508 * Storing sufficient information for a full reverse mapping of a btree
509 * block would require storing the lowest key of the block in the backref,
510 * and it would require updating that lowest key either before write out or
511 * every time it changed. Instead, the objectid of the lowest key is stored
512 * along with the level of the tree block. This provides a hint
513 * about where in the btree the block can be found. Searches through the
514 * btree only need to look for a pointer to that block, so they stop one
515 * level higher than the level recorded in the backref.
516 *
517 * Some btrees do not do reference counting on their extents. These
518 * include the extent tree and the tree of tree roots. Backrefs for these
519 * trees always have a generation of zero.
520 *
521 * When a tree block is created, back references are inserted:
522 *
523 * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level)
524 *
525 * When a tree block is cow'd in a reference counted root,
526 * new back references are added for all the blocks it points to.
527 * These are of the form (trans->transid will have increased since creation):
528 *
529 * (root->root_key.objectid, trans->transid, lowest_key_objectid, level)
530 *
531 * Because the lowest_key_objectid and the level are just hints
532 * they are not used when backrefs are deleted. When a backref is deleted:
533 *
534 * if backref was for a tree root:
535 * root_objectid = root->root_key.objectid
536 * else
537 * root_objectid = btrfs_header_owner(parent)
538 *
539 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
540 *
541 * Back Reference Key hashing:
542 *
543 * Back references have four fields, each 64 bits long. Unfortunately,
544 * This is hashed into a single 64 bit number and placed into the key offset.
545 * The key objectid corresponds to the first byte in the extent, and the
546 * key type is set to BTRFS_EXTENT_REF_KEY
547 */
460int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, 548int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
461 struct btrfs_root *root, 549 struct btrfs_root *root,
462 struct btrfs_path *path, u64 bytenr, 550 struct btrfs_path *path, u64 bytenr,
@@ -939,10 +1027,13 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
939 u64 start; 1027 u64 start;
940 u64 end; 1028 u64 end;
941 struct btrfs_fs_info *info = extent_root->fs_info; 1029 struct btrfs_fs_info *info = extent_root->fs_info;
1030 struct extent_buffer *eb;
942 struct btrfs_path *path; 1031 struct btrfs_path *path;
943 struct btrfs_key ins; 1032 struct btrfs_key ins;
1033 struct btrfs_disk_key first;
944 struct btrfs_extent_item extent_item; 1034 struct btrfs_extent_item extent_item;
945 int ret; 1035 int ret;
1036 int level;
946 int err = 0; 1037 int err = 0;
947 1038
948 btrfs_set_stack_extent_refs(&extent_item, 1); 1039 btrfs_set_stack_extent_refs(&extent_item, 1);
@@ -961,10 +1052,19 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
961 &extent_item, sizeof(extent_item)); 1052 &extent_item, sizeof(extent_item));
962 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, 1053 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
963 GFP_NOFS); 1054 GFP_NOFS);
1055 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1056 level = btrfs_header_level(eb);
1057 if (level == 0) {
1058 btrfs_item_key(eb, &first, 0);
1059 } else {
1060 btrfs_node_key(eb, &first, 0);
1061 }
964 err = btrfs_insert_extent_backref(trans, extent_root, path, 1062 err = btrfs_insert_extent_backref(trans, extent_root, path,
965 start, extent_root->root_key.objectid, 1063 start, extent_root->root_key.objectid,
966 0, 0, 0); 1064 0, btrfs_disk_key_objectid(&first),
1065 level);
967 BUG_ON(err); 1066 BUG_ON(err);
1067 free_extent_buffer(eb);
968 } 1068 }
969 btrfs_free_path(path); 1069 btrfs_free_path(path);
970 return 0; 1070 return 0;