summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2018-06-11 14:24:28 -0400
committerDavid Sterba <dsterba@suse.com>2018-08-23 11:37:26 -0400
commitd4682ba03ef618b6ef4be7cedc7aacaf505d3a58 (patch)
tree5700c2336b2b53118c4869c447f084023ab8555e
parent8ecebf4d767e2307a946c8905278d6358eda35c3 (diff)
Btrfs: sync log after logging new name
When we add a new name for an inode which was logged in the current transaction, we update the inode in the log so that its new name and ancestors are added to the log. However when we do this we do not persist the log, so the changes remain in memory only, and as a consequence, any ancestors that were created in the current transaction are updated such that future calls to btrfs_inode_in_log() return true. This leads to a subsequent fsync against such new ancestor directories returning immediately, without persisting the log, therefore after a power failure the new ancestor directories do not exist, despite fsync being called against them explicitly. Example: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/A $ mkdir /mnt/B $ mkdir /mnt/A/C $ touch /mnt/B/foo $ xfs_io -c "fsync" /mnt/B/foo $ ln /mnt/B/foo /mnt/A/C/foo $ xfs_io -c "fsync" /mnt/A <power failure> After the power failure, directory "A" does not exist, despite the explicit fsync on it. Instead of fixing this by changing the behaviour of the explicit fsync on directory "A" to persist the log instead of doing nothing, make the logging of the new file name (which happens when creating a hard link or renaming) persist the log. This approach not only is simpler, not requiring addition of new fields to the inode in memory structure, but also gives us the same behaviour as ext4, xfs and f2fs (possibly other filesystems too). A test case for fstests follows soon. Fixes: 12fcfd22fe5b ("Btrfs: tree logging unlink/rename fixes") Reported-by: Vijay Chidambaram <vvijay03@gmail.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/inode.c92
-rw-r--r--fs/btrfs/tree-log.c48
-rw-r--r--fs/btrfs/tree-log.h10
3 files changed, 131 insertions, 19 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c6d8c5d19ff0..cb09873ad270 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6634,6 +6634,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6634 drop_inode = 1; 6634 drop_inode = 1;
6635 } else { 6635 } else {
6636 struct dentry *parent = dentry->d_parent; 6636 struct dentry *parent = dentry->d_parent;
6637 int ret;
6638
6637 err = btrfs_update_inode(trans, root, inode); 6639 err = btrfs_update_inode(trans, root, inode);
6638 if (err) 6640 if (err)
6639 goto fail; 6641 goto fail;
@@ -6647,7 +6649,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6647 goto fail; 6649 goto fail;
6648 } 6650 }
6649 d_instantiate(dentry, inode); 6651 d_instantiate(dentry, inode);
6650 btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent); 6652 ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
6653 true, NULL);
6654 if (ret == BTRFS_NEED_TRANS_COMMIT) {
6655 err = btrfs_commit_transaction(trans);
6656 trans = NULL;
6657 }
6651 } 6658 }
6652 6659
6653fail: 6660fail:
@@ -9386,14 +9393,21 @@ static int btrfs_rename_exchange(struct inode *old_dir,
9386 u64 new_idx = 0; 9393 u64 new_idx = 0;
9387 u64 root_objectid; 9394 u64 root_objectid;
9388 int ret; 9395 int ret;
9389 int ret2;
9390 bool root_log_pinned = false; 9396 bool root_log_pinned = false;
9391 bool dest_log_pinned = false; 9397 bool dest_log_pinned = false;
9398 struct btrfs_log_ctx ctx_root;
9399 struct btrfs_log_ctx ctx_dest;
9400 bool sync_log_root = false;
9401 bool sync_log_dest = false;
9402 bool commit_transaction = false;
9392 9403
9393 /* we only allow rename subvolume link between subvolumes */ 9404 /* we only allow rename subvolume link between subvolumes */
9394 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) 9405 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9395 return -EXDEV; 9406 return -EXDEV;
9396 9407
9408 btrfs_init_log_ctx(&ctx_root, old_inode);
9409 btrfs_init_log_ctx(&ctx_dest, new_inode);
9410
9397 /* close the race window with snapshot create/destroy ioctl */ 9411 /* close the race window with snapshot create/destroy ioctl */
9398 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) 9412 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9399 down_read(&fs_info->subvol_sem); 9413 down_read(&fs_info->subvol_sem);
@@ -9540,15 +9554,29 @@ static int btrfs_rename_exchange(struct inode *old_dir,
9540 9554
9541 if (root_log_pinned) { 9555 if (root_log_pinned) {
9542 parent = new_dentry->d_parent; 9556 parent = new_dentry->d_parent;
9543 btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), 9557 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9544 parent); 9558 BTRFS_I(old_dir), parent,
9559 false, &ctx_root);
9560 if (ret == BTRFS_NEED_LOG_SYNC)
9561 sync_log_root = true;
9562 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9563 commit_transaction = true;
9564 ret = 0;
9545 btrfs_end_log_trans(root); 9565 btrfs_end_log_trans(root);
9546 root_log_pinned = false; 9566 root_log_pinned = false;
9547 } 9567 }
9548 if (dest_log_pinned) { 9568 if (dest_log_pinned) {
9549 parent = old_dentry->d_parent; 9569 if (!commit_transaction) {
9550 btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir), 9570 parent = old_dentry->d_parent;
9551 parent); 9571 ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
9572 BTRFS_I(new_dir), parent,
9573 false, &ctx_dest);
9574 if (ret == BTRFS_NEED_LOG_SYNC)
9575 sync_log_dest = true;
9576 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9577 commit_transaction = true;
9578 ret = 0;
9579 }
9552 btrfs_end_log_trans(dest); 9580 btrfs_end_log_trans(dest);
9553 dest_log_pinned = false; 9581 dest_log_pinned = false;
9554 } 9582 }
@@ -9581,8 +9609,26 @@ out_fail:
9581 dest_log_pinned = false; 9609 dest_log_pinned = false;
9582 } 9610 }
9583 } 9611 }
9584 ret2 = btrfs_end_transaction(trans); 9612 if (!ret && sync_log_root && !commit_transaction) {
9585 ret = ret ? ret : ret2; 9613 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
9614 &ctx_root);
9615 if (ret)
9616 commit_transaction = true;
9617 }
9618 if (!ret && sync_log_dest && !commit_transaction) {
9619 ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
9620 &ctx_dest);
9621 if (ret)
9622 commit_transaction = true;
9623 }
9624 if (commit_transaction) {
9625 ret = btrfs_commit_transaction(trans);
9626 } else {
9627 int ret2;
9628
9629 ret2 = btrfs_end_transaction(trans);
9630 ret = ret ? ret : ret2;
9631 }
9586out_notrans: 9632out_notrans:
9587 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) 9633 if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9588 up_read(&fs_info->subvol_sem); 9634 up_read(&fs_info->subvol_sem);
@@ -9659,6 +9705,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9659 int ret; 9705 int ret;
9660 u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); 9706 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9661 bool log_pinned = false; 9707 bool log_pinned = false;
9708 struct btrfs_log_ctx ctx;
9709 bool sync_log = false;
9710 bool commit_transaction = false;
9662 9711
9663 if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) 9712 if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9664 return -EPERM; 9713 return -EPERM;
@@ -9816,8 +9865,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9816 if (log_pinned) { 9865 if (log_pinned) {
9817 struct dentry *parent = new_dentry->d_parent; 9866 struct dentry *parent = new_dentry->d_parent;
9818 9867
9819 btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir), 9868 btrfs_init_log_ctx(&ctx, old_inode);
9820 parent); 9869 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9870 BTRFS_I(old_dir), parent,
9871 false, &ctx);
9872 if (ret == BTRFS_NEED_LOG_SYNC)
9873 sync_log = true;
9874 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9875 commit_transaction = true;
9876 ret = 0;
9821 btrfs_end_log_trans(root); 9877 btrfs_end_log_trans(root);
9822 log_pinned = false; 9878 log_pinned = false;
9823 } 9879 }
@@ -9854,7 +9910,19 @@ out_fail:
9854 btrfs_end_log_trans(root); 9910 btrfs_end_log_trans(root);
9855 log_pinned = false; 9911 log_pinned = false;
9856 } 9912 }
9857 btrfs_end_transaction(trans); 9913 if (!ret && sync_log) {
9914 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
9915 if (ret)
9916 commit_transaction = true;
9917 }
9918 if (commit_transaction) {
9919 ret = btrfs_commit_transaction(trans);
9920 } else {
9921 int ret2;
9922
9923 ret2 = btrfs_end_transaction(trans);
9924 ret = ret ? ret : ret2;
9925 }
9858out_notrans: 9926out_notrans:
9859 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) 9927 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9860 up_read(&fs_info->subvol_sem); 9928 up_read(&fs_info->subvol_sem);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1650dc44a5e3..3c2ae0e4f25a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6025,14 +6025,25 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
6025 * Call this after adding a new name for a file and it will properly 6025 * Call this after adding a new name for a file and it will properly
6026 * update the log to reflect the new name. 6026 * update the log to reflect the new name.
6027 * 6027 *
6028 * It will return zero if all goes well, and it will return 1 if a 6028 * @ctx can not be NULL when @sync_log is false, and should be NULL when it's
6029 * full transaction commit is required. 6029 * true (because it's not used).
6030 *
6031 * Return value depends on whether @sync_log is true or false.
6032 * When true: returns BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
6033 * committed by the caller, and BTRFS_DONT_NEED_TRANS_COMMIT
6034 * otherwise.
6035 * When false: returns BTRFS_DONT_NEED_LOG_SYNC if the caller does not need to
6036 * to sync the log, BTRFS_NEED_LOG_SYNC if it needs to sync the log,
6037 * or BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
6038 * committed (without attempting to sync the log).
6030 */ 6039 */
6031int btrfs_log_new_name(struct btrfs_trans_handle *trans, 6040int btrfs_log_new_name(struct btrfs_trans_handle *trans,
6032 struct btrfs_inode *inode, struct btrfs_inode *old_dir, 6041 struct btrfs_inode *inode, struct btrfs_inode *old_dir,
6033 struct dentry *parent) 6042 struct dentry *parent,
6043 bool sync_log, struct btrfs_log_ctx *ctx)
6034{ 6044{
6035 struct btrfs_fs_info *fs_info = trans->fs_info; 6045 struct btrfs_fs_info *fs_info = trans->fs_info;
6046 int ret;
6036 6047
6037 /* 6048 /*
6038 * this will force the logging code to walk the dentry chain 6049 * this will force the logging code to walk the dentry chain
@@ -6047,9 +6058,34 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
6047 */ 6058 */
6048 if (inode->logged_trans <= fs_info->last_trans_committed && 6059 if (inode->logged_trans <= fs_info->last_trans_committed &&
6049 (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed)) 6060 (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
6050 return 0; 6061 return sync_log ? BTRFS_DONT_NEED_TRANS_COMMIT :
6062 BTRFS_DONT_NEED_LOG_SYNC;
6063
6064 if (sync_log) {
6065 struct btrfs_log_ctx ctx2;
6066
6067 btrfs_init_log_ctx(&ctx2, &inode->vfs_inode);
6068 ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
6069 LOG_INODE_EXISTS, &ctx2);
6070 if (ret == BTRFS_NO_LOG_SYNC)
6071 return BTRFS_DONT_NEED_TRANS_COMMIT;
6072 else if (ret)
6073 return BTRFS_NEED_TRANS_COMMIT;
6074
6075 ret = btrfs_sync_log(trans, inode->root, &ctx2);
6076 if (ret)
6077 return BTRFS_NEED_TRANS_COMMIT;
6078 return BTRFS_DONT_NEED_TRANS_COMMIT;
6079 }
6080
6081 ASSERT(ctx);
6082 ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
6083 LOG_INODE_EXISTS, ctx);
6084 if (ret == BTRFS_NO_LOG_SYNC)
6085 return BTRFS_DONT_NEED_LOG_SYNC;
6086 else if (ret)
6087 return BTRFS_NEED_TRANS_COMMIT;
6051 6088
6052 return btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX, 6089 return BTRFS_NEED_LOG_SYNC;
6053 LOG_INODE_EXISTS, NULL);
6054} 6090}
6055 6091
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 122e68b89a5a..7ab9bb88a639 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -71,8 +71,16 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
71 int for_rename); 71 int for_rename);
72void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, 72void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
73 struct btrfs_inode *dir); 73 struct btrfs_inode *dir);
74/* Return values for btrfs_log_new_name() */
75enum {
76 BTRFS_DONT_NEED_TRANS_COMMIT,
77 BTRFS_NEED_TRANS_COMMIT,
78 BTRFS_DONT_NEED_LOG_SYNC,
79 BTRFS_NEED_LOG_SYNC,
80};
74int btrfs_log_new_name(struct btrfs_trans_handle *trans, 81int btrfs_log_new_name(struct btrfs_trans_handle *trans,
75 struct btrfs_inode *inode, struct btrfs_inode *old_dir, 82 struct btrfs_inode *inode, struct btrfs_inode *old_dir,
76 struct dentry *parent); 83 struct dentry *parent,
84 bool sync_log, struct btrfs_log_ctx *ctx);
77 85
78#endif 86#endif