diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-09-05 16:13:11 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:07 -0400 |
commit | e02119d5a7b4396c5a872582fddc8bd6d305a70a (patch) | |
tree | 825efe2a79dbca8d61256183f3526a5b5dc40dc6 /fs/btrfs/transaction.c | |
parent | a1b32a5932cfac7c38b442582285f3da2a09dfd8 (diff) |
Btrfs: Add a write ahead tree log to optimize synchronous operations
File syncs and directory syncs are optimized by copying their
items into a special (copy-on-write) log tree. There is one log tree per
subvolume and the btrfs super block points to a tree of log tree roots.
After a crash, items are copied out of the log tree and back into the
subvolume. See tree-log.c for all the details.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 45 |
1 files changed, 40 insertions, 5 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index eff3ad72991b..49c4f5b40ed6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "locking.h" | 26 | #include "locking.h" |
27 | #include "ref-cache.h" | 27 | #include "ref-cache.h" |
28 | #include "tree-log.h" | ||
28 | 29 | ||
29 | static int total_trans = 0; | 30 | static int total_trans = 0; |
30 | extern struct kmem_cache *btrfs_trans_handle_cachep; | 31 | extern struct kmem_cache *btrfs_trans_handle_cachep; |
@@ -57,6 +58,7 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
57 | root->fs_info->generation++; | 58 | root->fs_info->generation++; |
58 | root->fs_info->last_alloc = 0; | 59 | root->fs_info->last_alloc = 0; |
59 | root->fs_info->last_data_alloc = 0; | 60 | root->fs_info->last_data_alloc = 0; |
61 | root->fs_info->last_log_alloc = 0; | ||
60 | cur_trans->num_writers = 1; | 62 | cur_trans->num_writers = 1; |
61 | cur_trans->num_joined = 0; | 63 | cur_trans->num_joined = 0; |
62 | cur_trans->transid = root->fs_info->generation; | 64 | cur_trans->transid = root->fs_info->generation; |
@@ -83,7 +85,7 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
83 | return 0; | 85 | return 0; |
84 | } | 86 | } |
85 | 87 | ||
86 | static noinline int record_root_in_trans(struct btrfs_root *root) | 88 | noinline int btrfs_record_root_in_trans(struct btrfs_root *root) |
87 | { | 89 | { |
88 | struct btrfs_dirty_root *dirty; | 90 | struct btrfs_dirty_root *dirty; |
89 | u64 running_trans_id = root->fs_info->running_transaction->transid; | 91 | u64 running_trans_id = root->fs_info->running_transaction->transid; |
@@ -151,7 +153,7 @@ static void wait_current_trans(struct btrfs_root *root) | |||
151 | } | 153 | } |
152 | } | 154 | } |
153 | 155 | ||
154 | struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 156 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, |
155 | int num_blocks, int wait) | 157 | int num_blocks, int wait) |
156 | { | 158 | { |
157 | struct btrfs_trans_handle *h = | 159 | struct btrfs_trans_handle *h = |
@@ -164,7 +166,7 @@ struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
164 | ret = join_transaction(root); | 166 | ret = join_transaction(root); |
165 | BUG_ON(ret); | 167 | BUG_ON(ret); |
166 | 168 | ||
167 | record_root_in_trans(root); | 169 | btrfs_record_root_in_trans(root); |
168 | h->transid = root->fs_info->running_transaction->transid; | 170 | h->transid = root->fs_info->running_transaction->transid; |
169 | h->transaction = root->fs_info->running_transaction; | 171 | h->transaction = root->fs_info->running_transaction; |
170 | h->blocks_reserved = num_blocks; | 172 | h->blocks_reserved = num_blocks; |
@@ -456,6 +458,8 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, | |||
456 | BUG_ON(!root->ref_tree); | 458 | BUG_ON(!root->ref_tree); |
457 | dirty = root->dirty_root; | 459 | dirty = root->dirty_root; |
458 | 460 | ||
461 | btrfs_free_log(trans, root); | ||
462 | |||
459 | if (root->commit_root == root->node) { | 463 | if (root->commit_root == root->node) { |
460 | WARN_ON(root->node->start != | 464 | WARN_ON(root->node->start != |
461 | btrfs_root_bytenr(&root->root_item)); | 465 | btrfs_root_bytenr(&root->root_item)); |
@@ -600,7 +604,7 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | |||
600 | num_bytes -= btrfs_root_used(&dirty->root->root_item); | 604 | num_bytes -= btrfs_root_used(&dirty->root->root_item); |
601 | bytes_used = btrfs_root_used(&root->root_item); | 605 | bytes_used = btrfs_root_used(&root->root_item); |
602 | if (num_bytes) { | 606 | if (num_bytes) { |
603 | record_root_in_trans(root); | 607 | btrfs_record_root_in_trans(root); |
604 | btrfs_set_root_used(&root->root_item, | 608 | btrfs_set_root_used(&root->root_item, |
605 | bytes_used - num_bytes); | 609 | bytes_used - num_bytes); |
606 | } | 610 | } |
@@ -745,7 +749,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
745 | int ret; | 749 | int ret; |
746 | 750 | ||
747 | INIT_LIST_HEAD(&dirty_fs_roots); | 751 | INIT_LIST_HEAD(&dirty_fs_roots); |
748 | |||
749 | mutex_lock(&root->fs_info->trans_mutex); | 752 | mutex_lock(&root->fs_info->trans_mutex); |
750 | if (trans->transaction->in_commit) { | 753 | if (trans->transaction->in_commit) { |
751 | cur_trans = trans->transaction; | 754 | cur_trans = trans->transaction; |
@@ -821,10 +824,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
821 | 824 | ||
822 | WARN_ON(cur_trans != trans->transaction); | 825 | WARN_ON(cur_trans != trans->transaction); |
823 | 826 | ||
827 | /* btrfs_commit_tree_roots is responsible for getting the | ||
828 | * various roots consistent with each other. Every pointer | ||
829 | * in the tree of tree roots has to point to the most up to date | ||
830 | * root for every subvolume and other tree. So, we have to keep | ||
831 | * the tree logging code from jumping in and changing any | ||
832 | * of the trees. | ||
833 | * | ||
834 | * At this point in the commit, there can't be any tree-log | ||
835 | * writers, but a little lower down we drop the trans mutex | ||
836 | * and let new people in. By holding the tree_log_mutex | ||
837 | * from now until after the super is written, we avoid races | ||
838 | * with the tree-log code. | ||
839 | */ | ||
840 | mutex_lock(&root->fs_info->tree_log_mutex); | ||
841 | |||
824 | ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, | 842 | ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, |
825 | &dirty_fs_roots); | 843 | &dirty_fs_roots); |
826 | BUG_ON(ret); | 844 | BUG_ON(ret); |
827 | 845 | ||
846 | /* add_dirty_roots gets rid of all the tree log roots, it is now | ||
847 | * safe to free the root of tree log roots | ||
848 | */ | ||
849 | btrfs_free_log_root_tree(trans, root->fs_info); | ||
850 | |||
828 | ret = btrfs_commit_tree_roots(trans, root); | 851 | ret = btrfs_commit_tree_roots(trans, root); |
829 | BUG_ON(ret); | 852 | BUG_ON(ret); |
830 | 853 | ||
@@ -843,6 +866,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
843 | chunk_root->node->start); | 866 | chunk_root->node->start); |
844 | btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, | 867 | btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, |
845 | btrfs_header_level(chunk_root->node)); | 868 | btrfs_header_level(chunk_root->node)); |
869 | |||
870 | if (!root->fs_info->log_root_recovering) { | ||
871 | btrfs_set_super_log_root(&root->fs_info->super_copy, 0); | ||
872 | btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); | ||
873 | } | ||
874 | |||
846 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, | 875 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, |
847 | sizeof(root->fs_info->super_copy)); | 876 | sizeof(root->fs_info->super_copy)); |
848 | 877 | ||
@@ -857,6 +886,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
857 | BUG_ON(ret); | 886 | BUG_ON(ret); |
858 | write_ctree_super(trans, root); | 887 | write_ctree_super(trans, root); |
859 | 888 | ||
889 | /* | ||
890 | * the super is written, we can safely allow the tree-loggers | ||
891 | * to go about their business | ||
892 | */ | ||
893 | mutex_unlock(&root->fs_info->tree_log_mutex); | ||
894 | |||
860 | btrfs_finish_extent_commit(trans, root, pinned_copy); | 895 | btrfs_finish_extent_commit(trans, root, pinned_copy); |
861 | mutex_lock(&root->fs_info->trans_mutex); | 896 | mutex_lock(&root->fs_info->trans_mutex); |
862 | 897 | ||