summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2016-06-06 11:11:13 -0400
committerFilipe Manana <fdmanana@suse.com>2016-08-01 02:32:14 -0400
commit44f714dae50a2e795d3268a6831762aa6fa54f55 (patch)
tree5df83de228b05a0041bef04241720b78cd96d8a6 /fs/btrfs/tree-log.c
parent67710892ec983aa79ad1e2a2642fe8e3a4a194ea (diff)
Btrfs: improve performance on fsync against new inode after rename/unlink
With commit 56f23fdbb600 ("Btrfs: fix file/data loss caused by fsync after rename and new inode") we got simple fix for a functional issue when the following sequence of actions is done: at transaction N create file A at directory D at transaction N + M (where M >= 1) move/rename existing file A from directory D to directory E create a new file named A at directory D fsync the new file power fail The solution was to simply detect such scenario and fallback to a full transaction commit when we detect it. However this turned out to had a significant impact on throughput (and a bit on latency too) for benchmarks using the dbench tool, which simulates real workloads from smbd (Samba) servers. For example on a test vm (with a debug kernel): Unpatched: Throughput 19.1572 MB/sec 32 clients 32 procs max_latency=1005.229 ms Patched: Throughput 23.7015 MB/sec 32 clients 32 procs max_latency=809.206 ms The patched results (this patch is applied) are similar to the results of a kernel with the commit 56f23fdbb600 ("Btrfs: fix file/data loss caused by fsync after rename and new inode") reverted. This change avoids the fallback to a transaction commit and instead makes sure all the names of the conflicting inode (the one that had a name in a past transaction that matches the name of the new file in the same parent directory) are logged so that at log replay time we don't lose neither the new file nor the old file, and the old file gets the name it was renamed to. This also ends up avoiding a full transaction commit for a similar case that involves an unlink instead of a rename of the old file: at transaction N create file A at directory D at transaction N + M (where M >= 1) remove file A create a new file named A at directory D fsync the new file power fail Signed-off-by: Filipe Manana <fdmanana@suse.com>
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c85
1 files changed, 77 insertions, 8 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c05f69a8ec42..5fa624cd815d 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4469,7 +4469,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
4469static int btrfs_check_ref_name_override(struct extent_buffer *eb, 4469static int btrfs_check_ref_name_override(struct extent_buffer *eb,
4470 const int slot, 4470 const int slot,
4471 const struct btrfs_key *key, 4471 const struct btrfs_key *key,
4472 struct inode *inode) 4472 struct inode *inode,
4473 u64 *other_ino)
4473{ 4474{
4474 int ret; 4475 int ret;
4475 struct btrfs_path *search_path; 4476 struct btrfs_path *search_path;
@@ -4528,7 +4529,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
4528 search_path, parent, 4529 search_path, parent,
4529 name, this_name_len, 0); 4530 name, this_name_len, 0);
4530 if (di && !IS_ERR(di)) { 4531 if (di && !IS_ERR(di)) {
4531 ret = 1; 4532 struct btrfs_key di_key;
4533
4534 btrfs_dir_item_key_to_cpu(search_path->nodes[0],
4535 di, &di_key);
4536 if (di_key.type == BTRFS_INODE_ITEM_KEY) {
4537 ret = 1;
4538 *other_ino = di_key.objectid;
4539 } else {
4540 ret = -EAGAIN;
4541 }
4532 goto out; 4542 goto out;
4533 } else if (IS_ERR(di)) { 4543 } else if (IS_ERR(di)) {
4534 ret = PTR_ERR(di); 4544 ret = PTR_ERR(di);
@@ -4718,16 +4728,71 @@ again:
4718 if ((min_key.type == BTRFS_INODE_REF_KEY || 4728 if ((min_key.type == BTRFS_INODE_REF_KEY ||
4719 min_key.type == BTRFS_INODE_EXTREF_KEY) && 4729 min_key.type == BTRFS_INODE_EXTREF_KEY) &&
4720 BTRFS_I(inode)->generation == trans->transid) { 4730 BTRFS_I(inode)->generation == trans->transid) {
4731 u64 other_ino = 0;
4732
4721 ret = btrfs_check_ref_name_override(path->nodes[0], 4733 ret = btrfs_check_ref_name_override(path->nodes[0],
4722 path->slots[0], 4734 path->slots[0],
4723 &min_key, inode); 4735 &min_key, inode,
4736 &other_ino);
4724 if (ret < 0) { 4737 if (ret < 0) {
4725 err = ret; 4738 err = ret;
4726 goto out_unlock; 4739 goto out_unlock;
4727 } else if (ret > 0) { 4740 } else if (ret > 0) {
4728 err = 1; 4741 struct btrfs_key inode_key;
4729 btrfs_set_log_full_commit(root->fs_info, trans); 4742 struct inode *other_inode;
4730 goto out_unlock; 4743
4744 if (ins_nr > 0) {
4745 ins_nr++;
4746 } else {
4747 ins_nr = 1;
4748 ins_start_slot = path->slots[0];
4749 }
4750 ret = copy_items(trans, inode, dst_path, path,
4751 &last_extent, ins_start_slot,
4752 ins_nr, inode_only,
4753 logged_isize);
4754 if (ret < 0) {
4755 err = ret;
4756 goto out_unlock;
4757 }
4758 ins_nr = 0;
4759 btrfs_release_path(path);
4760 inode_key.objectid = other_ino;
4761 inode_key.type = BTRFS_INODE_ITEM_KEY;
4762 inode_key.offset = 0;
4763 other_inode = btrfs_iget(root->fs_info->sb,
4764 &inode_key, root,
4765 NULL);
4766 /*
4767 * If the other inode that had a conflicting dir
4768 * entry was deleted in the current transaction,
4769 * we don't need to do more work nor fallback to
4770 * a transaction commit.
4771 */
4772 if (IS_ERR(other_inode) &&
4773 PTR_ERR(other_inode) == -ENOENT) {
4774 goto next_key;
4775 } else if (IS_ERR(other_inode)) {
4776 err = PTR_ERR(other_inode);
4777 goto out_unlock;
4778 }
4779 /*
4780 * We are safe logging the other inode without
4781 * acquiring its i_mutex as long as we log with
4782 * the LOG_INODE_EXISTS mode. We're safe against
4783 * concurrent renames of the other inode as well
4784 * because during a rename we pin the log and
4785 * update the log with the new name before we
4786 * unpin it.
4787 */
4788 err = btrfs_log_inode(trans, root, other_inode,
4789 LOG_INODE_EXISTS,
4790 0, LLONG_MAX, ctx);
4791 iput(other_inode);
4792 if (err)
4793 goto out_unlock;
4794 else
4795 goto next_key;
4731 } 4796 }
4732 } 4797 }
4733 4798
@@ -4795,7 +4860,7 @@ next_slot:
4795 ins_nr = 0; 4860 ins_nr = 0;
4796 } 4861 }
4797 btrfs_release_path(path); 4862 btrfs_release_path(path);
4798 4863next_key:
4799 if (min_key.offset < (u64)-1) { 4864 if (min_key.offset < (u64)-1) {
4800 min_key.offset++; 4865 min_key.offset++;
4801 } else if (min_key.type < max_key.type) { 4866 } else if (min_key.type < max_key.type) {
@@ -4989,8 +5054,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
4989 if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) 5054 if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
4990 break; 5055 break;
4991 5056
4992 if (IS_ROOT(parent)) 5057 if (IS_ROOT(parent)) {
5058 inode = d_inode(parent);
5059 if (btrfs_must_commit_transaction(trans, inode))
5060 ret = 1;
4993 break; 5061 break;
5062 }
4994 5063
4995 parent = dget_parent(parent); 5064 parent = dget_parent(parent);
4996 dput(old_parent); 5065 dput(old_parent);