aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2014-09-06 17:34:39 -0400
committerChris Mason <clm@fb.com>2014-09-08 16:56:43 -0400
commit49dae1bc1c665817e434d01eefaa11967f618243 (patch)
tree4c306620e23a5c0446ce1826919ed5e62f8f0a5c
parentc47ca32d3aadb234f73389a34c97574085bc9eda (diff)
Btrfs: fix fsync data loss after a ranged fsync
While we're doing a full fsync (when the inode has the flag BTRFS_INODE_NEEDS_FULL_SYNC set) that is ranged too (covers only a portion of the file), we might have ordered operations that are started before or while we're logging the inode and that fall outside the fsync range. Therefore when a full ranged fsync finishes don't remove every extent map from the list of modified extent maps - as for some of them, that fall outside our fsync range, their respective ordered operation hasn't finished yet, meaning the corresponding file extent item wasn't inserted into the fs/subvol tree yet and therefore we didn't log it, and we must let the next fast fsync (one that checks only the modified list) see this extent map and log a matching file extent item to the log btree and wait for its ordered operation to finish (if it's still ongoing). A test case for xfstests follows. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/tree-log.c77
-rw-r--r--fs/btrfs/tree-log.h2
3 files changed, 64 insertions, 17 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 36861b7a6757..ff1cc0399b9a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1966,7 +1966,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1966 1966
1967 btrfs_init_log_ctx(&ctx); 1967 btrfs_init_log_ctx(&ctx);
1968 1968
1969 ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); 1969 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
1970 if (ret < 0) { 1970 if (ret < 0) {
1971 /* Fallthrough and commit/free transaction. */ 1971 /* Fallthrough and commit/free transaction. */
1972 ret = 1; 1972 ret = 1;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7e0e6e3029dd..d296efe2d3e7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -94,8 +94,10 @@
94#define LOG_WALK_REPLAY_ALL 3 94#define LOG_WALK_REPLAY_ALL 3
95 95
96static int btrfs_log_inode(struct btrfs_trans_handle *trans, 96static int btrfs_log_inode(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root, struct inode *inode, 97 struct btrfs_root *root, struct inode *inode,
98 int inode_only); 98 int inode_only,
99 const loff_t start,
100 const loff_t end);
99static int link_to_fixup_dir(struct btrfs_trans_handle *trans, 101static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
100 struct btrfs_root *root, 102 struct btrfs_root *root,
101 struct btrfs_path *path, u64 objectid); 103 struct btrfs_path *path, u64 objectid);
@@ -3858,8 +3860,10 @@ process:
3858 * This handles both files and directories. 3860 * This handles both files and directories.
3859 */ 3861 */
3860static int btrfs_log_inode(struct btrfs_trans_handle *trans, 3862static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3861 struct btrfs_root *root, struct inode *inode, 3863 struct btrfs_root *root, struct inode *inode,
3862 int inode_only) 3864 int inode_only,
3865 const loff_t start,
3866 const loff_t end)
3863{ 3867{
3864 struct btrfs_path *path; 3868 struct btrfs_path *path;
3865 struct btrfs_path *dst_path; 3869 struct btrfs_path *dst_path;
@@ -3876,6 +3880,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3876 int ins_nr; 3880 int ins_nr;
3877 bool fast_search = false; 3881 bool fast_search = false;
3878 u64 ino = btrfs_ino(inode); 3882 u64 ino = btrfs_ino(inode);
3883 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
3879 3884
3880 path = btrfs_alloc_path(); 3885 path = btrfs_alloc_path();
3881 if (!path) 3886 if (!path)
@@ -4049,13 +4054,35 @@ log_extents:
4049 goto out_unlock; 4054 goto out_unlock;
4050 } 4055 }
4051 } else if (inode_only == LOG_INODE_ALL) { 4056 } else if (inode_only == LOG_INODE_ALL) {
4052 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
4053 struct extent_map *em, *n; 4057 struct extent_map *em, *n;
4054 4058
4055 write_lock(&tree->lock); 4059 write_lock(&em_tree->lock);
4056 list_for_each_entry_safe(em, n, &tree->modified_extents, list) 4060 /*
4057 list_del_init(&em->list); 4061 * We can't just remove every em if we're called for a ranged
4058 write_unlock(&tree->lock); 4062 * fsync - that is, one that doesn't cover the whole possible
4063 * file range (0 to LLONG_MAX). This is because we can have
4064 * em's that fall outside the range we're logging and therefore
4065 * their ordered operations haven't completed yet
4066 * (btrfs_finish_ordered_io() not invoked yet). This means we
4067 * didn't get their respective file extent item in the fs/subvol
4068 * tree yet, and need to let the next fast fsync (one which
4069 * consults the list of modified extent maps) find the em so
4070 * that it logs a matching file extent item and waits for the
4071 * respective ordered operation to complete (if it's still
4072 * running).
4073 *
4074 * Removing every em outside the range we're logging would make
4075 * the next fast fsync not log their matching file extent items,
4076 * therefore making us lose data after a log replay.
4077 */
4078 list_for_each_entry_safe(em, n, &em_tree->modified_extents,
4079 list) {
4080 const u64 mod_end = em->mod_start + em->mod_len - 1;
4081
4082 if (em->mod_start >= start && mod_end <= end)
4083 list_del_init(&em->list);
4084 }
4085 write_unlock(&em_tree->lock);
4059 } 4086 }
4060 4087
4061 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 4088 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
@@ -4065,8 +4092,19 @@ log_extents:
4065 goto out_unlock; 4092 goto out_unlock;
4066 } 4093 }
4067 } 4094 }
4068 BTRFS_I(inode)->logged_trans = trans->transid; 4095
4069 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 4096 write_lock(&em_tree->lock);
4097 /*
4098 * If we're doing a ranged fsync and there are still modified extents
4099 * in the list, we must run on the next fsync call as it might cover
4100 * those extents (a full fsync or an fsync for other range).
4101 */
4102 if (list_empty(&em_tree->modified_extents)) {
4103 BTRFS_I(inode)->logged_trans = trans->transid;
4104 BTRFS_I(inode)->last_log_commit =
4105 BTRFS_I(inode)->last_sub_trans;
4106 }
4107 write_unlock(&em_tree->lock);
4070out_unlock: 4108out_unlock:
4071 if (unlikely(err)) 4109 if (unlikely(err))
4072 btrfs_put_logged_extents(&logged_list); 4110 btrfs_put_logged_extents(&logged_list);
@@ -4161,7 +4199,10 @@ out:
4161 */ 4199 */
4162static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, 4200static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4163 struct btrfs_root *root, struct inode *inode, 4201 struct btrfs_root *root, struct inode *inode,
4164 struct dentry *parent, int exists_only, 4202 struct dentry *parent,
4203 const loff_t start,
4204 const loff_t end,
4205 int exists_only,
4165 struct btrfs_log_ctx *ctx) 4206 struct btrfs_log_ctx *ctx)
4166{ 4207{
4167 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; 4208 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
@@ -4207,7 +4248,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4207 if (ret) 4248 if (ret)
4208 goto end_no_trans; 4249 goto end_no_trans;
4209 4250
4210 ret = btrfs_log_inode(trans, root, inode, inode_only); 4251 ret = btrfs_log_inode(trans, root, inode, inode_only, start, end);
4211 if (ret) 4252 if (ret)
4212 goto end_trans; 4253 goto end_trans;
4213 4254
@@ -4235,7 +4276,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4235 4276
4236 if (BTRFS_I(inode)->generation > 4277 if (BTRFS_I(inode)->generation >
4237 root->fs_info->last_trans_committed) { 4278 root->fs_info->last_trans_committed) {
4238 ret = btrfs_log_inode(trans, root, inode, inode_only); 4279 ret = btrfs_log_inode(trans, root, inode, inode_only,
4280 0, LLONG_MAX);
4239 if (ret) 4281 if (ret)
4240 goto end_trans; 4282 goto end_trans;
4241 } 4283 }
@@ -4269,13 +4311,15 @@ end_no_trans:
4269 */ 4311 */
4270int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 4312int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
4271 struct btrfs_root *root, struct dentry *dentry, 4313 struct btrfs_root *root, struct dentry *dentry,
4314 const loff_t start,
4315 const loff_t end,
4272 struct btrfs_log_ctx *ctx) 4316 struct btrfs_log_ctx *ctx)
4273{ 4317{
4274 struct dentry *parent = dget_parent(dentry); 4318 struct dentry *parent = dget_parent(dentry);
4275 int ret; 4319 int ret;
4276 4320
4277 ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 4321 ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
4278 0, ctx); 4322 start, end, 0, ctx);
4279 dput(parent); 4323 dput(parent);
4280 4324
4281 return ret; 4325 return ret;
@@ -4512,6 +4556,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
4512 root->fs_info->last_trans_committed)) 4556 root->fs_info->last_trans_committed))
4513 return 0; 4557 return 0;
4514 4558
4515 return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); 4559 return btrfs_log_inode_parent(trans, root, inode, parent, 0,
4560 LLONG_MAX, 1, NULL);
4516} 4561}
4517 4562
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 7f5b41bd5373..e2e798ae7cd7 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -59,6 +59,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
59int btrfs_recover_log_trees(struct btrfs_root *tree_root); 59int btrfs_recover_log_trees(struct btrfs_root *tree_root);
60int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 60int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
61 struct btrfs_root *root, struct dentry *dentry, 61 struct btrfs_root *root, struct dentry *dentry,
62 const loff_t start,
63 const loff_t end,
62 struct btrfs_log_ctx *ctx); 64 struct btrfs_log_ctx *ctx);
63int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, 65int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
64 struct btrfs_root *root, 66 struct btrfs_root *root,