diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 102 |
1 files changed, 101 insertions, 1 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32991f73e9db..187be4012474 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -457,6 +457,94 @@ out: | |||
457 | return ret; | 457 | return ret; |
458 | } | 458 | } |
459 | 459 | ||
460 | /* | ||
461 | * Back reference rules. Back refs have three main goals: | ||
462 | * | ||
463 | * 1) differentiate between all holders of references to an extent so that | ||
464 | * when a reference is dropped we can make sure it was a valid reference | ||
465 | * before freeing the extent. | ||
466 | * | ||
467 | * 2) Provide enough information to quickly find the holders of an extent | ||
468 | * if we notice a given block is corrupted or bad. | ||
469 | * | ||
470 | * 3) Make it easy to migrate blocks for FS shrinking or storage pool | ||
471 | * maintenance. This is actually the same as #2, but with a slightly | ||
472 | * different use case. | ||
473 | * | ||
474 | * File extents can be referenced by: | ||
475 | * | ||
476 | * - multiple snapshots, subvolumes, or different generations in one subvol | ||
477 | * - different files inside a single subvolume (in theory, not implemented yet) | ||
478 | * - different offsets inside a file (bookend extents in file.c) | ||
479 | * | ||
480 | * The extent ref structure has fields for: | ||
481 | * | ||
482 | * - Objectid of the subvolume root | ||
483 | * - Generation number of the tree holding the reference | ||
484 | * - objectid of the file holding the reference | ||
485 | * - offset in the file corresponding to the key holding the reference | ||
486 | * | ||
487 | * When a file extent is allocated the fields are filled in: | ||
488 | * (root_key.objectid, trans->transid, inode objectid, offset in file) | ||
489 | * | ||
490 | * When a leaf is cow'd new references are added for every file extent found | ||
491 | * in the leaf. It looks the same as the create case, but trans->transid | ||
492 | * will be different when the block is cow'd. | ||
493 | * | ||
494 | * (root_key.objectid, trans->transid, inode objectid, offset in file) | ||
495 | * | ||
496 | * When a file extent is removed either during snapshot deletion or file | ||
497 | * truncation, the corresponding back reference is found | ||
498 | * by searching for: | ||
499 | * | ||
500 | * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), | ||
501 | * inode objectid, offset in file) | ||
502 | * | ||
503 | * Btree extents can be referenced by: | ||
504 | * | ||
505 | * - Different subvolumes | ||
506 | * - Different generations of the same subvolume | ||
507 | * | ||
508 | * Storing sufficient information for a full reverse mapping of a btree | ||
509 | * block would require storing the lowest key of the block in the backref, | ||
510 | * and it would require updating that lowest key either before write out or | ||
511 | * every time it changed. Instead, the objectid of the lowest key is stored | ||
512 | * along with the level of the tree block. This provides a hint | ||
513 | * about where in the btree the block can be found. Searches through the | ||
514 | * btree only need to look for a pointer to that block, so they stop one | ||
515 | * level higher than the level recorded in the backref. | ||
516 | * | ||
517 | * Some btrees do not do reference counting on their extents. These | ||
518 | * include the extent tree and the tree of tree roots. Backrefs for these | ||
519 | * trees always have a generation of zero. | ||
520 | * | ||
521 | * When a tree block is created, back references are inserted: | ||
522 | * | ||
523 | * (root->root_key.objectid, trans->transid or zero, lowest_key_objectid, level) | ||
524 | * | ||
525 | * When a tree block is cow'd in a reference counted root, | ||
526 | * new back references are added for all the blocks it points to. | ||
527 | * These are of the form (trans->transid will have increased since creation): | ||
528 | * | ||
529 | * (root->root_key.objectid, trans->transid, lowest_key_objectid, level) | ||
530 | * | ||
531 | * Because the lowest_key_objectid and the level are just hints | ||
532 | * they are not used when backrefs are deleted. When a backref is deleted: | ||
533 | * | ||
534 | * if backref was for a tree root: | ||
535 | * root_objectid = root->root_key.objectid | ||
536 | * else | ||
537 | * root_objectid = btrfs_header_owner(parent) | ||
538 | * | ||
539 | * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0) | ||
540 | * | ||
541 | * Back Reference Key hashing: | ||
542 | * | ||
543 | * Back references have four fields, each 64 bits long. Unfortunately, | ||
544 | * This is hashed into a single 64 bit number and placed into the key offset. | ||
545 | * The key objectid corresponds to the first byte in the extent, and the | ||
546 | * key type is set to BTRFS_EXTENT_REF_KEY | ||
547 | */ | ||
460 | int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, | 548 | int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, |
461 | struct btrfs_root *root, | 549 | struct btrfs_root *root, |
462 | struct btrfs_path *path, u64 bytenr, | 550 | struct btrfs_path *path, u64 bytenr, |
@@ -939,10 +1027,13 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct | |||
939 | u64 start; | 1027 | u64 start; |
940 | u64 end; | 1028 | u64 end; |
941 | struct btrfs_fs_info *info = extent_root->fs_info; | 1029 | struct btrfs_fs_info *info = extent_root->fs_info; |
1030 | struct extent_buffer *eb; | ||
942 | struct btrfs_path *path; | 1031 | struct btrfs_path *path; |
943 | struct btrfs_key ins; | 1032 | struct btrfs_key ins; |
1033 | struct btrfs_disk_key first; | ||
944 | struct btrfs_extent_item extent_item; | 1034 | struct btrfs_extent_item extent_item; |
945 | int ret; | 1035 | int ret; |
1036 | int level; | ||
946 | int err = 0; | 1037 | int err = 0; |
947 | 1038 | ||
948 | btrfs_set_stack_extent_refs(&extent_item, 1); | 1039 | btrfs_set_stack_extent_refs(&extent_item, 1); |
@@ -961,10 +1052,19 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct | |||
961 | &extent_item, sizeof(extent_item)); | 1052 | &extent_item, sizeof(extent_item)); |
962 | clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, | 1053 | clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, |
963 | GFP_NOFS); | 1054 | GFP_NOFS); |
1055 | eb = read_tree_block(extent_root, ins.objectid, ins.offset); | ||
1056 | level = btrfs_header_level(eb); | ||
1057 | if (level == 0) { | ||
1058 | btrfs_item_key(eb, &first, 0); | ||
1059 | } else { | ||
1060 | btrfs_node_key(eb, &first, 0); | ||
1061 | } | ||
964 | err = btrfs_insert_extent_backref(trans, extent_root, path, | 1062 | err = btrfs_insert_extent_backref(trans, extent_root, path, |
965 | start, extent_root->root_key.objectid, | 1063 | start, extent_root->root_key.objectid, |
966 | 0, 0, 0); | 1064 | 0, btrfs_disk_key_objectid(&first), |
1065 | level); | ||
967 | BUG_ON(err); | 1066 | BUG_ON(err); |
1067 | free_extent_buffer(eb); | ||
968 | } | 1068 | } |
969 | btrfs_free_path(path); | 1069 | btrfs_free_path(path); |
970 | return 0; | 1070 | return 0; |