aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c222
1 files changed, 198 insertions, 24 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9c45431e69ab..9314adeba946 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1613,6 +1613,9 @@ static bool name_in_log_ref(struct btrfs_root *log_root,
1613 * not exist in the FS, it is skipped. fsyncs on directories 1613 * not exist in the FS, it is skipped. fsyncs on directories
1614 * do not force down inodes inside that directory, just changes to the 1614 * do not force down inodes inside that directory, just changes to the
1615 * names or unlinks in a directory. 1615 * names or unlinks in a directory.
1616 *
1617 * Returns < 0 on error, 0 if the name wasn't replayed (dentry points to a
1618 * non-existing inode) and 1 if the name was replayed.
1616 */ 1619 */
1617static noinline int replay_one_name(struct btrfs_trans_handle *trans, 1620static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1618 struct btrfs_root *root, 1621 struct btrfs_root *root,
@@ -1631,6 +1634,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1631 int exists; 1634 int exists;
1632 int ret = 0; 1635 int ret = 0;
1633 bool update_size = (key->type == BTRFS_DIR_INDEX_KEY); 1636 bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
1637 bool name_added = false;
1634 1638
1635 dir = read_one_inode(root, key->objectid); 1639 dir = read_one_inode(root, key->objectid);
1636 if (!dir) 1640 if (!dir)
@@ -1708,6 +1712,8 @@ out:
1708 } 1712 }
1709 kfree(name); 1713 kfree(name);
1710 iput(dir); 1714 iput(dir);
1715 if (!ret && name_added)
1716 ret = 1;
1711 return ret; 1717 return ret;
1712 1718
1713insert: 1719insert:
@@ -1723,6 +1729,8 @@ insert:
1723 name, name_len, log_type, &log_key); 1729 name, name_len, log_type, &log_key);
1724 if (ret && ret != -ENOENT && ret != -EEXIST) 1730 if (ret && ret != -ENOENT && ret != -EEXIST)
1725 goto out; 1731 goto out;
1732 if (!ret)
1733 name_added = true;
1726 update_size = false; 1734 update_size = false;
1727 ret = 0; 1735 ret = 0;
1728 goto out; 1736 goto out;
@@ -1740,12 +1748,13 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
1740 struct extent_buffer *eb, int slot, 1748 struct extent_buffer *eb, int slot,
1741 struct btrfs_key *key) 1749 struct btrfs_key *key)
1742{ 1750{
1743 int ret; 1751 int ret = 0;
1744 u32 item_size = btrfs_item_size_nr(eb, slot); 1752 u32 item_size = btrfs_item_size_nr(eb, slot);
1745 struct btrfs_dir_item *di; 1753 struct btrfs_dir_item *di;
1746 int name_len; 1754 int name_len;
1747 unsigned long ptr; 1755 unsigned long ptr;
1748 unsigned long ptr_end; 1756 unsigned long ptr_end;
1757 struct btrfs_path *fixup_path = NULL;
1749 1758
1750 ptr = btrfs_item_ptr_offset(eb, slot); 1759 ptr = btrfs_item_ptr_offset(eb, slot);
1751 ptr_end = ptr + item_size; 1760 ptr_end = ptr + item_size;
@@ -1755,12 +1764,59 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
1755 return -EIO; 1764 return -EIO;
1756 name_len = btrfs_dir_name_len(eb, di); 1765 name_len = btrfs_dir_name_len(eb, di);
1757 ret = replay_one_name(trans, root, path, eb, di, key); 1766 ret = replay_one_name(trans, root, path, eb, di, key);
1758 if (ret) 1767 if (ret < 0)
1759 return ret; 1768 break;
1760 ptr = (unsigned long)(di + 1); 1769 ptr = (unsigned long)(di + 1);
1761 ptr += name_len; 1770 ptr += name_len;
1771
1772 /*
1773 * If this entry refers to a non-directory (directories can not
1774 * have a link count > 1) and it was added in the transaction
1775 * that was not committed, make sure we fixup the link count of
1776 * the inode it the entry points to. Otherwise something like
1777 * the following would result in a directory pointing to an
1778 * inode with a wrong link that does not account for this dir
1779 * entry:
1780 *
1781 * mkdir testdir
1782 * touch testdir/foo
1783 * touch testdir/bar
1784 * sync
1785 *
1786 * ln testdir/bar testdir/bar_link
1787 * ln testdir/foo testdir/foo_link
1788 * xfs_io -c "fsync" testdir/bar
1789 *
1790 * <power failure>
1791 *
1792 * mount fs, log replay happens
1793 *
1794 * File foo would remain with a link count of 1 when it has two
1795 * entries pointing to it in the directory testdir. This would
1796 * make it impossible to ever delete the parent directory has
1797 * it would result in stale dentries that can never be deleted.
1798 */
1799 if (ret == 1 && btrfs_dir_type(eb, di) != BTRFS_FT_DIR) {
1800 struct btrfs_key di_key;
1801
1802 if (!fixup_path) {
1803 fixup_path = btrfs_alloc_path();
1804 if (!fixup_path) {
1805 ret = -ENOMEM;
1806 break;
1807 }
1808 }
1809
1810 btrfs_dir_item_key_to_cpu(eb, di, &di_key);
1811 ret = link_to_fixup_dir(trans, root, fixup_path,
1812 di_key.objectid);
1813 if (ret)
1814 break;
1815 }
1816 ret = 0;
1762 } 1817 }
1763 return 0; 1818 btrfs_free_path(fixup_path);
1819 return ret;
1764} 1820}
1765 1821
1766/* 1822/*
@@ -4904,6 +4960,94 @@ next_dir_inode:
4904 return ret; 4960 return ret;
4905} 4961}
4906 4962
4963static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
4964 struct inode *inode,
4965 struct btrfs_log_ctx *ctx)
4966{
4967 int ret;
4968 struct btrfs_path *path;
4969 struct btrfs_key key;
4970 struct btrfs_root *root = BTRFS_I(inode)->root;
4971 const u64 ino = btrfs_ino(inode);
4972
4973 path = btrfs_alloc_path();
4974 if (!path)
4975 return -ENOMEM;
4976 path->skip_locking = 1;
4977 path->search_commit_root = 1;
4978
4979 key.objectid = ino;
4980 key.type = BTRFS_INODE_REF_KEY;
4981 key.offset = 0;
4982 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4983 if (ret < 0)
4984 goto out;
4985
4986 while (true) {
4987 struct extent_buffer *leaf = path->nodes[0];
4988 int slot = path->slots[0];
4989 u32 cur_offset = 0;
4990 u32 item_size;
4991 unsigned long ptr;
4992
4993 if (slot >= btrfs_header_nritems(leaf)) {
4994 ret = btrfs_next_leaf(root, path);
4995 if (ret < 0)
4996 goto out;
4997 else if (ret > 0)
4998 break;
4999 continue;
5000 }
5001
5002 btrfs_item_key_to_cpu(leaf, &key, slot);
5003 /* BTRFS_INODE_EXTREF_KEY is BTRFS_INODE_REF_KEY + 1 */
5004 if (key.objectid != ino || key.type > BTRFS_INODE_EXTREF_KEY)
5005 break;
5006
5007 item_size = btrfs_item_size_nr(leaf, slot);
5008 ptr = btrfs_item_ptr_offset(leaf, slot);
5009 while (cur_offset < item_size) {
5010 struct btrfs_key inode_key;
5011 struct inode *dir_inode;
5012
5013 inode_key.type = BTRFS_INODE_ITEM_KEY;
5014 inode_key.offset = 0;
5015
5016 if (key.type == BTRFS_INODE_EXTREF_KEY) {
5017 struct btrfs_inode_extref *extref;
5018
5019 extref = (struct btrfs_inode_extref *)
5020 (ptr + cur_offset);
5021 inode_key.objectid = btrfs_inode_extref_parent(
5022 leaf, extref);
5023 cur_offset += sizeof(*extref);
5024 cur_offset += btrfs_inode_extref_name_len(leaf,
5025 extref);
5026 } else {
5027 inode_key.objectid = key.offset;
5028 cur_offset = item_size;
5029 }
5030
5031 dir_inode = btrfs_iget(root->fs_info->sb, &inode_key,
5032 root, NULL);
5033 /* If parent inode was deleted, skip it. */
5034 if (IS_ERR(dir_inode))
5035 continue;
5036
5037 ret = btrfs_log_inode(trans, root, dir_inode,
5038 LOG_INODE_ALL, 0, LLONG_MAX, ctx);
5039 iput(dir_inode);
5040 if (ret)
5041 goto out;
5042 }
5043 path->slots[0]++;
5044 }
5045 ret = 0;
5046out:
5047 btrfs_free_path(path);
5048 return ret;
5049}
5050
4907/* 5051/*
4908 * helper function around btrfs_log_inode to make sure newly created 5052 * helper function around btrfs_log_inode to make sure newly created
4909 * parent directories also end up in the log. A minimal inode and backref 5053 * parent directories also end up in the log. A minimal inode and backref
@@ -4923,9 +5067,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4923 struct dentry *old_parent = NULL; 5067 struct dentry *old_parent = NULL;
4924 int ret = 0; 5068 int ret = 0;
4925 u64 last_committed = root->fs_info->last_trans_committed; 5069 u64 last_committed = root->fs_info->last_trans_committed;
4926 const struct dentry * const first_parent = parent;
4927 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
4928 last_committed);
4929 bool log_dentries = false; 5070 bool log_dentries = false;
4930 struct inode *orig_inode = inode; 5071 struct inode *orig_inode = inode;
4931 5072
@@ -4986,6 +5127,53 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4986 if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries) 5127 if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
4987 log_dentries = true; 5128 log_dentries = true;
4988 5129
5130 /*
5131 * On unlink we must make sure all our current and old parent directores
5132 * inodes are fully logged. This is to prevent leaving dangling
5133 * directory index entries in directories that were our parents but are
5134 * not anymore. Not doing this results in old parent directory being
5135 * impossible to delete after log replay (rmdir will always fail with
5136 * error -ENOTEMPTY).
5137 *
5138 * Example 1:
5139 *
5140 * mkdir testdir
5141 * touch testdir/foo
5142 * ln testdir/foo testdir/bar
5143 * sync
5144 * unlink testdir/bar
5145 * xfs_io -c fsync testdir/foo
5146 * <power failure>
5147 * mount fs, triggers log replay
5148 *
5149 * If we don't log the parent directory (testdir), after log replay the
5150 * directory still has an entry pointing to the file inode using the bar
5151 * name, but a matching BTRFS_INODE_[REF|EXTREF]_KEY does not exist and
5152 * the file inode has a link count of 1.
5153 *
5154 * Example 2:
5155 *
5156 * mkdir testdir
5157 * touch foo
5158 * ln foo testdir/foo2
5159 * ln foo testdir/foo3
5160 * sync
5161 * unlink testdir/foo3
5162 * xfs_io -c fsync foo
5163 * <power failure>
5164 * mount fs, triggers log replay
5165 *
5166 * Similar as the first example, after log replay the parent directory
5167 * testdir still has an entry pointing to the inode file with name foo3
5168 * but the file inode does not have a matching BTRFS_INODE_REF_KEY item
5169 * and has a link count of 2.
5170 */
5171 if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
5172 ret = btrfs_log_all_parents(trans, orig_inode, ctx);
5173 if (ret)
5174 goto end_trans;
5175 }
5176
4989 while (1) { 5177 while (1) {
4990 if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb) 5178 if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
4991 break; 5179 break;
@@ -4994,23 +5182,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4994 if (root != BTRFS_I(inode)->root) 5182 if (root != BTRFS_I(inode)->root)
4995 break; 5183 break;
4996 5184
4997 /* 5185 if (BTRFS_I(inode)->generation > last_committed) {
4998 * On unlink we must make sure our immediate parent directory 5186 ret = btrfs_log_inode(trans, root, inode,
4999 * inode is fully logged. This is to prevent leaving dangling 5187 LOG_INODE_EXISTS,
5000 * directory index entries and a wrong directory inode's i_size.
5001 * Not doing so can result in a directory being impossible to
5002 * delete after log replay (rmdir will always fail with error
5003 * -ENOTEMPTY).
5004 */
5005 if (did_unlink && parent == first_parent)
5006 inode_only = LOG_INODE_ALL;
5007 else
5008 inode_only = LOG_INODE_EXISTS;
5009
5010 if (BTRFS_I(inode)->generation >
5011 root->fs_info->last_trans_committed ||
5012 inode_only == LOG_INODE_ALL) {
5013 ret = btrfs_log_inode(trans, root, inode, inode_only,
5014 0, LLONG_MAX, ctx); 5188 0, LLONG_MAX, ctx);
5015 if (ret) 5189 if (ret)
5016 goto end_trans; 5190 goto end_trans;