summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2019-09-30 16:27:25 -0400
committerDavid Sterba <dsterba@suse.com>2019-10-01 12:41:02 -0400
commit4203e968947071586a98b5314fd7ffdea3b4f971 (patch)
tree12e5b5a8478475df095a66f4edcb9dc88f136822 /fs/btrfs/tree-log.c
parentc67d970f0ea8dcc423e112137d34334fa0abb8ec (diff)
btrfs: fix incorrect updating of log root tree
We've historically had reports of being unable to mount file systems because the tree log root couldn't be read. Usually this is the "parent transid failure", but could be any of the related errors, including "fsid mismatch" or "bad tree block", depending on which block got allocated. The modification of the individual log root items are serialized on the per-log root root_mutex. This means that any modification to the per-subvol log root_item is completely protected. However we update the root item in the log root tree outside of the log root tree log_mutex. We do this in order to allow multiple subvolumes to be updated in each log transaction. This is problematic however because when we are writing the log root tree out we update the super block with the _current_ log root node information. Since these two operations happen independently of each other, you can end up updating the log root tree in between writing out the dirty blocks and setting the super block to point at the current root. This means we'll point at the new root node that hasn't been written out, instead of the one we should be pointing at. Thus whatever garbage or old block we end up pointing at complains when we mount the file system later and try to replay the log. Fix this by copying the log's root item into a local root item copy. Then once we're safely under the log_root_tree->log_mutex we update the root item in the log_root_tree. This way we do not modify the log_root_tree while we're committing it, fixing the problem. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Chris Mason <clm@fb.com> Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c36
1 files changed, 27 insertions, 9 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 77b6797fcac3..2488eb4b70fc 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2932,7 +2932,8 @@ out:
2932 * in the tree of log roots 2932 * in the tree of log roots
2933 */ 2933 */
2934static int update_log_root(struct btrfs_trans_handle *trans, 2934static int update_log_root(struct btrfs_trans_handle *trans,
2935 struct btrfs_root *log) 2935 struct btrfs_root *log,
2936 struct btrfs_root_item *root_item)
2936{ 2937{
2937 struct btrfs_fs_info *fs_info = log->fs_info; 2938 struct btrfs_fs_info *fs_info = log->fs_info;
2938 int ret; 2939 int ret;
@@ -2940,10 +2941,10 @@ static int update_log_root(struct btrfs_trans_handle *trans,
2940 if (log->log_transid == 1) { 2941 if (log->log_transid == 1) {
2941 /* insert root item on the first sync */ 2942 /* insert root item on the first sync */
2942 ret = btrfs_insert_root(trans, fs_info->log_root_tree, 2943 ret = btrfs_insert_root(trans, fs_info->log_root_tree,
2943 &log->root_key, &log->root_item); 2944 &log->root_key, root_item);
2944 } else { 2945 } else {
2945 ret = btrfs_update_root(trans, fs_info->log_root_tree, 2946 ret = btrfs_update_root(trans, fs_info->log_root_tree,
2946 &log->root_key, &log->root_item); 2947 &log->root_key, root_item);
2947 } 2948 }
2948 return ret; 2949 return ret;
2949} 2950}
@@ -3041,6 +3042,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3041 struct btrfs_fs_info *fs_info = root->fs_info; 3042 struct btrfs_fs_info *fs_info = root->fs_info;
3042 struct btrfs_root *log = root->log_root; 3043 struct btrfs_root *log = root->log_root;
3043 struct btrfs_root *log_root_tree = fs_info->log_root_tree; 3044 struct btrfs_root *log_root_tree = fs_info->log_root_tree;
3045 struct btrfs_root_item new_root_item;
3044 int log_transid = 0; 3046 int log_transid = 0;
3045 struct btrfs_log_ctx root_log_ctx; 3047 struct btrfs_log_ctx root_log_ctx;
3046 struct blk_plug plug; 3048 struct blk_plug plug;
@@ -3104,18 +3106,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3104 goto out; 3106 goto out;
3105 } 3107 }
3106 3108
3109 /*
3110 * We _must_ update under the root->log_mutex in order to make sure we
3111 * have a consistent view of the log root we are trying to commit at
3112 * this moment.
3113 *
3114 * We _must_ copy this into a local copy, because we are not holding the
3115 * log_root_tree->log_mutex yet. This is important because when we
3116 * commit the log_root_tree we must have a consistent view of the
3117 * log_root_tree when we update the super block to point at the
3118 * log_root_tree bytenr. If we update the log_root_tree here we'll race
3119 * with the commit and possibly point at the new block which we may not
3120 * have written out.
3121 */
3107 btrfs_set_root_node(&log->root_item, log->node); 3122 btrfs_set_root_node(&log->root_item, log->node);
3123 memcpy(&new_root_item, &log->root_item, sizeof(new_root_item));
3108 3124
3109 root->log_transid++; 3125 root->log_transid++;
3110 log->log_transid = root->log_transid; 3126 log->log_transid = root->log_transid;
3111 root->log_start_pid = 0; 3127 root->log_start_pid = 0;
3112 /* 3128 /*
3113 * Update or create log root item under the root's log_mutex to prevent
3114 * races with concurrent log syncs that can lead to failure to update
3115 * log root item because it was not created yet.
3116 */
3117 ret = update_log_root(trans, log);
3118 /*
3119 * IO has been started, blocks of the log tree have WRITTEN flag set 3129 * IO has been started, blocks of the log tree have WRITTEN flag set
3120 * in their headers. new modifications of the log will be written to 3130 * in their headers. new modifications of the log will be written to
3121 * new positions. so it's safe to allow log writers to go in. 3131 * new positions. so it's safe to allow log writers to go in.
@@ -3135,6 +3145,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
3135 mutex_unlock(&log_root_tree->log_mutex); 3145 mutex_unlock(&log_root_tree->log_mutex);
3136 3146
3137 mutex_lock(&log_root_tree->log_mutex); 3147 mutex_lock(&log_root_tree->log_mutex);
3148
3149 /*
3150 * Now we are safe to update the log_root_tree because we're under the
3151 * log_mutex, and we're a current writer so we're holding the commit
3152 * open until we drop the log_mutex.
3153 */
3154 ret = update_log_root(trans, log, &new_root_item);
3155
3138 if (atomic_dec_and_test(&log_root_tree->log_writers)) { 3156 if (atomic_dec_and_test(&log_root_tree->log_writers)) {
3139 /* atomic_dec_and_test implies a barrier */ 3157 /* atomic_dec_and_test implies a barrier */
3140 cond_wake_up_nomb(&log_root_tree->log_writer_wait); 3158 cond_wake_up_nomb(&log_root_tree->log_writer_wait);