diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-09-05 16:13:11 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:07 -0400 |
commit | e02119d5a7b4396c5a872582fddc8bd6d305a70a (patch) | |
tree | 825efe2a79dbca8d61256183f3526a5b5dc40dc6 /fs/btrfs/disk-io.c | |
parent | a1b32a5932cfac7c38b442582285f3da2a09dfd8 (diff) |
Btrfs: Add a write ahead tree log to optimize synchronous operations
File syncs and directory syncs are optimized by copying their
items into a special (copy-on-write) log tree. There is one log tree per
subvolume and the btrfs super block points to a tree of log tree roots.
After a crash, items are copied out of the log tree and back into the
subvolume. See tree-log.c for all the details.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 138 |
1 files changed, 117 insertions, 21 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e7a938bfbc7..a4373db5967a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include "async-thread.h" | 41 | #include "async-thread.h" |
42 | #include "locking.h" | 42 | #include "locking.h" |
43 | #include "ref-cache.h" | 43 | #include "ref-cache.h" |
44 | #include "tree-log.h" | ||
44 | 45 | ||
45 | #if 0 | 46 | #if 0 |
46 | static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) | 47 | static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) |
@@ -694,6 +695,18 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | |||
694 | } | 695 | } |
695 | 696 | ||
696 | 697 | ||
698 | int btrfs_write_tree_block(struct extent_buffer *buf) | ||
699 | { | ||
700 | return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, | ||
701 | buf->start + buf->len - 1, WB_SYNC_NONE); | ||
702 | } | ||
703 | |||
704 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | ||
705 | { | ||
706 | return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, | ||
707 | buf->start, buf->start + buf->len -1); | ||
708 | } | ||
709 | |||
697 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 710 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
698 | u32 blocksize, u64 parent_transid) | 711 | u32 blocksize, u64 parent_transid) |
699 | { | 712 | { |
@@ -732,15 +745,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
732 | return 0; | 745 | return 0; |
733 | } | 746 | } |
734 | 747 | ||
735 | int wait_on_tree_block_writeback(struct btrfs_root *root, | ||
736 | struct extent_buffer *buf) | ||
737 | { | ||
738 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
739 | wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, | ||
740 | buf); | ||
741 | return 0; | ||
742 | } | ||
743 | |||
744 | static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | 748 | static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, |
745 | u32 stripesize, struct btrfs_root *root, | 749 | u32 stripesize, struct btrfs_root *root, |
746 | struct btrfs_fs_info *fs_info, | 750 | struct btrfs_fs_info *fs_info, |
@@ -771,6 +775,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
771 | spin_lock_init(&root->node_lock); | 775 | spin_lock_init(&root->node_lock); |
772 | spin_lock_init(&root->list_lock); | 776 | spin_lock_init(&root->list_lock); |
773 | mutex_init(&root->objectid_mutex); | 777 | mutex_init(&root->objectid_mutex); |
778 | mutex_init(&root->log_mutex); | ||
774 | 779 | ||
775 | btrfs_leaf_ref_tree_init(&root->ref_tree_struct); | 780 | btrfs_leaf_ref_tree_init(&root->ref_tree_struct); |
776 | root->ref_tree = &root->ref_tree_struct; | 781 | root->ref_tree = &root->ref_tree_struct; |
@@ -809,11 +814,74 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
809 | return 0; | 814 | return 0; |
810 | } | 815 | } |
811 | 816 | ||
812 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, | 817 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, |
813 | struct btrfs_key *location) | 818 | struct btrfs_fs_info *fs_info) |
819 | { | ||
820 | struct extent_buffer *eb; | ||
821 | int ret; | ||
822 | |||
823 | if (!fs_info->log_root_tree) | ||
824 | return 0; | ||
825 | |||
826 | eb = fs_info->log_root_tree->node; | ||
827 | |||
828 | WARN_ON(btrfs_header_level(eb) != 0); | ||
829 | WARN_ON(btrfs_header_nritems(eb) != 0); | ||
830 | |||
831 | ret = btrfs_free_extent(trans, fs_info->tree_root, | ||
832 | eb->start, eb->len, | ||
833 | BTRFS_TREE_LOG_OBJECTID, 0, 0, 0, 1); | ||
834 | BUG_ON(ret); | ||
835 | |||
836 | free_extent_buffer(eb); | ||
837 | kfree(fs_info->log_root_tree); | ||
838 | fs_info->log_root_tree = NULL; | ||
839 | return 0; | ||
840 | } | ||
841 | |||
842 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | ||
843 | struct btrfs_fs_info *fs_info) | ||
814 | { | 844 | { |
815 | struct btrfs_root *root; | 845 | struct btrfs_root *root; |
816 | struct btrfs_root *tree_root = fs_info->tree_root; | 846 | struct btrfs_root *tree_root = fs_info->tree_root; |
847 | |||
848 | root = kzalloc(sizeof(*root), GFP_NOFS); | ||
849 | if (!root) | ||
850 | return -ENOMEM; | ||
851 | |||
852 | __setup_root(tree_root->nodesize, tree_root->leafsize, | ||
853 | tree_root->sectorsize, tree_root->stripesize, | ||
854 | root, fs_info, BTRFS_TREE_LOG_OBJECTID); | ||
855 | |||
856 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; | ||
857 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
858 | root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; | ||
859 | root->ref_cows = 0; | ||
860 | |||
861 | root->node = btrfs_alloc_free_block(trans, root, root->leafsize, | ||
862 | BTRFS_TREE_LOG_OBJECTID, | ||
863 | 0, 0, 0, 0, 0); | ||
864 | |||
865 | btrfs_set_header_nritems(root->node, 0); | ||
866 | btrfs_set_header_level(root->node, 0); | ||
867 | btrfs_set_header_bytenr(root->node, root->node->start); | ||
868 | btrfs_set_header_generation(root->node, trans->transid); | ||
869 | btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID); | ||
870 | |||
871 | write_extent_buffer(root->node, root->fs_info->fsid, | ||
872 | (unsigned long)btrfs_header_fsid(root->node), | ||
873 | BTRFS_FSID_SIZE); | ||
874 | btrfs_mark_buffer_dirty(root->node); | ||
875 | btrfs_tree_unlock(root->node); | ||
876 | fs_info->log_root_tree = root; | ||
877 | return 0; | ||
878 | } | ||
879 | |||
880 | struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | ||
881 | struct btrfs_key *location) | ||
882 | { | ||
883 | struct btrfs_root *root; | ||
884 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | ||
817 | struct btrfs_path *path; | 885 | struct btrfs_path *path; |
818 | struct extent_buffer *l; | 886 | struct extent_buffer *l; |
819 | u64 highest_inode; | 887 | u64 highest_inode; |
@@ -863,11 +931,13 @@ out: | |||
863 | blocksize, 0); | 931 | blocksize, 0); |
864 | BUG_ON(!root->node); | 932 | BUG_ON(!root->node); |
865 | insert: | 933 | insert: |
866 | root->ref_cows = 1; | 934 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { |
867 | ret = btrfs_find_highest_inode(root, &highest_inode); | 935 | root->ref_cows = 1; |
868 | if (ret == 0) { | 936 | ret = btrfs_find_highest_inode(root, &highest_inode); |
869 | root->highest_inode = highest_inode; | 937 | if (ret == 0) { |
870 | root->last_inode_alloc = highest_inode; | 938 | root->highest_inode = highest_inode; |
939 | root->last_inode_alloc = highest_inode; | ||
940 | } | ||
871 | } | 941 | } |
872 | return root; | 942 | return root; |
873 | } | 943 | } |
@@ -907,7 +977,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
907 | if (root) | 977 | if (root) |
908 | return root; | 978 | return root; |
909 | 979 | ||
910 | root = btrfs_read_fs_root_no_radix(fs_info, location); | 980 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
911 | if (IS_ERR(root)) | 981 | if (IS_ERR(root)) |
912 | return root; | 982 | return root; |
913 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 983 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
@@ -1250,16 +1320,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1250 | u32 blocksize; | 1320 | u32 blocksize; |
1251 | u32 stripesize; | 1321 | u32 stripesize; |
1252 | struct buffer_head *bh; | 1322 | struct buffer_head *bh; |
1253 | struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), | 1323 | struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), |
1254 | GFP_NOFS); | 1324 | GFP_NOFS); |
1255 | struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), | 1325 | struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), |
1256 | GFP_NOFS); | 1326 | GFP_NOFS); |
1257 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), | 1327 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), |
1258 | GFP_NOFS); | 1328 | GFP_NOFS); |
1259 | struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), | 1329 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), |
1260 | GFP_NOFS); | 1330 | GFP_NOFS); |
1261 | struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), | 1331 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), |
1262 | GFP_NOFS); | 1332 | GFP_NOFS); |
1333 | struct btrfs_root *log_tree_root; | ||
1334 | |||
1263 | int ret; | 1335 | int ret; |
1264 | int err = -EINVAL; | 1336 | int err = -EINVAL; |
1265 | 1337 | ||
@@ -1343,6 +1415,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1343 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | 1415 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
1344 | 1416 | ||
1345 | mutex_init(&fs_info->trans_mutex); | 1417 | mutex_init(&fs_info->trans_mutex); |
1418 | mutex_init(&fs_info->tree_log_mutex); | ||
1346 | mutex_init(&fs_info->drop_mutex); | 1419 | mutex_init(&fs_info->drop_mutex); |
1347 | mutex_init(&fs_info->alloc_mutex); | 1420 | mutex_init(&fs_info->alloc_mutex); |
1348 | mutex_init(&fs_info->chunk_mutex); | 1421 | mutex_init(&fs_info->chunk_mutex); |
@@ -1352,6 +1425,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1352 | init_waitqueue_head(&fs_info->transaction_throttle); | 1425 | init_waitqueue_head(&fs_info->transaction_throttle); |
1353 | init_waitqueue_head(&fs_info->transaction_wait); | 1426 | init_waitqueue_head(&fs_info->transaction_wait); |
1354 | init_waitqueue_head(&fs_info->async_submit_wait); | 1427 | init_waitqueue_head(&fs_info->async_submit_wait); |
1428 | init_waitqueue_head(&fs_info->tree_log_wait); | ||
1429 | atomic_set(&fs_info->tree_log_commit, 0); | ||
1430 | atomic_set(&fs_info->tree_log_writers, 0); | ||
1431 | fs_info->tree_log_transid = 0; | ||
1355 | 1432 | ||
1356 | #if 0 | 1433 | #if 0 |
1357 | ret = add_hasher(fs_info, "crc32c"); | 1434 | ret = add_hasher(fs_info, "crc32c"); |
@@ -1532,7 +1609,26 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1532 | if (!fs_info->transaction_kthread) | 1609 | if (!fs_info->transaction_kthread) |
1533 | goto fail_cleaner; | 1610 | goto fail_cleaner; |
1534 | 1611 | ||
1612 | if (btrfs_super_log_root(disk_super) != 0) { | ||
1613 | u32 blocksize; | ||
1614 | u64 bytenr = btrfs_super_log_root(disk_super); | ||
1615 | |||
1616 | blocksize = | ||
1617 | btrfs_level_size(tree_root, | ||
1618 | btrfs_super_log_root_level(disk_super)); | ||
1535 | 1619 | ||
1620 | log_tree_root = kzalloc(sizeof(struct btrfs_root), | ||
1621 | GFP_NOFS); | ||
1622 | |||
1623 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | ||
1624 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); | ||
1625 | |||
1626 | log_tree_root->node = read_tree_block(tree_root, bytenr, | ||
1627 | blocksize, 0); | ||
1628 | ret = btrfs_recover_log_trees(log_tree_root); | ||
1629 | BUG_ON(ret); | ||
1630 | } | ||
1631 | fs_info->last_trans_committed = btrfs_super_generation(disk_super); | ||
1536 | return tree_root; | 1632 | return tree_root; |
1537 | 1633 | ||
1538 | fail_cleaner: | 1634 | fail_cleaner: |