diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 222 |
1 files changed, 198 insertions, 24 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9c45431e69ab..9314adeba946 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -1613,6 +1613,9 @@ static bool name_in_log_ref(struct btrfs_root *log_root, | |||
1613 | * not exist in the FS, it is skipped. fsyncs on directories | 1613 | * not exist in the FS, it is skipped. fsyncs on directories |
1614 | * do not force down inodes inside that directory, just changes to the | 1614 | * do not force down inodes inside that directory, just changes to the |
1615 | * names or unlinks in a directory. | 1615 | * names or unlinks in a directory. |
1616 | * | ||
1617 | * Returns < 0 on error, 0 if the name wasn't replayed (dentry points to a | ||
1618 | * non-existing inode) and 1 if the name was replayed. | ||
1616 | */ | 1619 | */ |
1617 | static noinline int replay_one_name(struct btrfs_trans_handle *trans, | 1620 | static noinline int replay_one_name(struct btrfs_trans_handle *trans, |
1618 | struct btrfs_root *root, | 1621 | struct btrfs_root *root, |
@@ -1631,6 +1634,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, | |||
1631 | int exists; | 1634 | int exists; |
1632 | int ret = 0; | 1635 | int ret = 0; |
1633 | bool update_size = (key->type == BTRFS_DIR_INDEX_KEY); | 1636 | bool update_size = (key->type == BTRFS_DIR_INDEX_KEY); |
1637 | bool name_added = false; | ||
1634 | 1638 | ||
1635 | dir = read_one_inode(root, key->objectid); | 1639 | dir = read_one_inode(root, key->objectid); |
1636 | if (!dir) | 1640 | if (!dir) |
@@ -1708,6 +1712,8 @@ out: | |||
1708 | } | 1712 | } |
1709 | kfree(name); | 1713 | kfree(name); |
1710 | iput(dir); | 1714 | iput(dir); |
1715 | if (!ret && name_added) | ||
1716 | ret = 1; | ||
1711 | return ret; | 1717 | return ret; |
1712 | 1718 | ||
1713 | insert: | 1719 | insert: |
@@ -1723,6 +1729,8 @@ insert: | |||
1723 | name, name_len, log_type, &log_key); | 1729 | name, name_len, log_type, &log_key); |
1724 | if (ret && ret != -ENOENT && ret != -EEXIST) | 1730 | if (ret && ret != -ENOENT && ret != -EEXIST) |
1725 | goto out; | 1731 | goto out; |
1732 | if (!ret) | ||
1733 | name_added = true; | ||
1726 | update_size = false; | 1734 | update_size = false; |
1727 | ret = 0; | 1735 | ret = 0; |
1728 | goto out; | 1736 | goto out; |
@@ -1740,12 +1748,13 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, | |||
1740 | struct extent_buffer *eb, int slot, | 1748 | struct extent_buffer *eb, int slot, |
1741 | struct btrfs_key *key) | 1749 | struct btrfs_key *key) |
1742 | { | 1750 | { |
1743 | int ret; | 1751 | int ret = 0; |
1744 | u32 item_size = btrfs_item_size_nr(eb, slot); | 1752 | u32 item_size = btrfs_item_size_nr(eb, slot); |
1745 | struct btrfs_dir_item *di; | 1753 | struct btrfs_dir_item *di; |
1746 | int name_len; | 1754 | int name_len; |
1747 | unsigned long ptr; | 1755 | unsigned long ptr; |
1748 | unsigned long ptr_end; | 1756 | unsigned long ptr_end; |
1757 | struct btrfs_path *fixup_path = NULL; | ||
1749 | 1758 | ||
1750 | ptr = btrfs_item_ptr_offset(eb, slot); | 1759 | ptr = btrfs_item_ptr_offset(eb, slot); |
1751 | ptr_end = ptr + item_size; | 1760 | ptr_end = ptr + item_size; |
@@ -1755,12 +1764,59 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, | |||
1755 | return -EIO; | 1764 | return -EIO; |
1756 | name_len = btrfs_dir_name_len(eb, di); | 1765 | name_len = btrfs_dir_name_len(eb, di); |
1757 | ret = replay_one_name(trans, root, path, eb, di, key); | 1766 | ret = replay_one_name(trans, root, path, eb, di, key); |
1758 | if (ret) | 1767 | if (ret < 0) |
1759 | return ret; | 1768 | break; |
1760 | ptr = (unsigned long)(di + 1); | 1769 | ptr = (unsigned long)(di + 1); |
1761 | ptr += name_len; | 1770 | ptr += name_len; |
1771 | |||
1772 | /* | ||
1773 | * If this entry refers to a non-directory (directories can not | ||
1774 | * have a link count > 1) and it was added in the transaction | ||
1775 | * that was not committed, make sure we fixup the link count of | ||
1776 | * the inode it the entry points to. Otherwise something like | ||
1777 | * the following would result in a directory pointing to an | ||
1778 | * inode with a wrong link that does not account for this dir | ||
1779 | * entry: | ||
1780 | * | ||
1781 | * mkdir testdir | ||
1782 | * touch testdir/foo | ||
1783 | * touch testdir/bar | ||
1784 | * sync | ||
1785 | * | ||
1786 | * ln testdir/bar testdir/bar_link | ||
1787 | * ln testdir/foo testdir/foo_link | ||
1788 | * xfs_io -c "fsync" testdir/bar | ||
1789 | * | ||
1790 | * <power failure> | ||
1791 | * | ||
1792 | * mount fs, log replay happens | ||
1793 | * | ||
1794 | * File foo would remain with a link count of 1 when it has two | ||
1795 | * entries pointing to it in the directory testdir. This would | ||
1796 | * make it impossible to ever delete the parent directory has | ||
1797 | * it would result in stale dentries that can never be deleted. | ||
1798 | */ | ||
1799 | if (ret == 1 && btrfs_dir_type(eb, di) != BTRFS_FT_DIR) { | ||
1800 | struct btrfs_key di_key; | ||
1801 | |||
1802 | if (!fixup_path) { | ||
1803 | fixup_path = btrfs_alloc_path(); | ||
1804 | if (!fixup_path) { | ||
1805 | ret = -ENOMEM; | ||
1806 | break; | ||
1807 | } | ||
1808 | } | ||
1809 | |||
1810 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | ||
1811 | ret = link_to_fixup_dir(trans, root, fixup_path, | ||
1812 | di_key.objectid); | ||
1813 | if (ret) | ||
1814 | break; | ||
1815 | } | ||
1816 | ret = 0; | ||
1762 | } | 1817 | } |
1763 | return 0; | 1818 | btrfs_free_path(fixup_path); |
1819 | return ret; | ||
1764 | } | 1820 | } |
1765 | 1821 | ||
1766 | /* | 1822 | /* |
@@ -4904,6 +4960,94 @@ next_dir_inode: | |||
4904 | return ret; | 4960 | return ret; |
4905 | } | 4961 | } |
4906 | 4962 | ||
4963 | static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, | ||
4964 | struct inode *inode, | ||
4965 | struct btrfs_log_ctx *ctx) | ||
4966 | { | ||
4967 | int ret; | ||
4968 | struct btrfs_path *path; | ||
4969 | struct btrfs_key key; | ||
4970 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4971 | const u64 ino = btrfs_ino(inode); | ||
4972 | |||
4973 | path = btrfs_alloc_path(); | ||
4974 | if (!path) | ||
4975 | return -ENOMEM; | ||
4976 | path->skip_locking = 1; | ||
4977 | path->search_commit_root = 1; | ||
4978 | |||
4979 | key.objectid = ino; | ||
4980 | key.type = BTRFS_INODE_REF_KEY; | ||
4981 | key.offset = 0; | ||
4982 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
4983 | if (ret < 0) | ||
4984 | goto out; | ||
4985 | |||
4986 | while (true) { | ||
4987 | struct extent_buffer *leaf = path->nodes[0]; | ||
4988 | int slot = path->slots[0]; | ||
4989 | u32 cur_offset = 0; | ||
4990 | u32 item_size; | ||
4991 | unsigned long ptr; | ||
4992 | |||
4993 | if (slot >= btrfs_header_nritems(leaf)) { | ||
4994 | ret = btrfs_next_leaf(root, path); | ||
4995 | if (ret < 0) | ||
4996 | goto out; | ||
4997 | else if (ret > 0) | ||
4998 | break; | ||
4999 | continue; | ||
5000 | } | ||
5001 | |||
5002 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
5003 | /* BTRFS_INODE_EXTREF_KEY is BTRFS_INODE_REF_KEY + 1 */ | ||
5004 | if (key.objectid != ino || key.type > BTRFS_INODE_EXTREF_KEY) | ||
5005 | break; | ||
5006 | |||
5007 | item_size = btrfs_item_size_nr(leaf, slot); | ||
5008 | ptr = btrfs_item_ptr_offset(leaf, slot); | ||
5009 | while (cur_offset < item_size) { | ||
5010 | struct btrfs_key inode_key; | ||
5011 | struct inode *dir_inode; | ||
5012 | |||
5013 | inode_key.type = BTRFS_INODE_ITEM_KEY; | ||
5014 | inode_key.offset = 0; | ||
5015 | |||
5016 | if (key.type == BTRFS_INODE_EXTREF_KEY) { | ||
5017 | struct btrfs_inode_extref *extref; | ||
5018 | |||
5019 | extref = (struct btrfs_inode_extref *) | ||
5020 | (ptr + cur_offset); | ||
5021 | inode_key.objectid = btrfs_inode_extref_parent( | ||
5022 | leaf, extref); | ||
5023 | cur_offset += sizeof(*extref); | ||
5024 | cur_offset += btrfs_inode_extref_name_len(leaf, | ||
5025 | extref); | ||
5026 | } else { | ||
5027 | inode_key.objectid = key.offset; | ||
5028 | cur_offset = item_size; | ||
5029 | } | ||
5030 | |||
5031 | dir_inode = btrfs_iget(root->fs_info->sb, &inode_key, | ||
5032 | root, NULL); | ||
5033 | /* If parent inode was deleted, skip it. */ | ||
5034 | if (IS_ERR(dir_inode)) | ||
5035 | continue; | ||
5036 | |||
5037 | ret = btrfs_log_inode(trans, root, dir_inode, | ||
5038 | LOG_INODE_ALL, 0, LLONG_MAX, ctx); | ||
5039 | iput(dir_inode); | ||
5040 | if (ret) | ||
5041 | goto out; | ||
5042 | } | ||
5043 | path->slots[0]++; | ||
5044 | } | ||
5045 | ret = 0; | ||
5046 | out: | ||
5047 | btrfs_free_path(path); | ||
5048 | return ret; | ||
5049 | } | ||
5050 | |||
4907 | /* | 5051 | /* |
4908 | * helper function around btrfs_log_inode to make sure newly created | 5052 | * helper function around btrfs_log_inode to make sure newly created |
4909 | * parent directories also end up in the log. A minimal inode and backref | 5053 | * parent directories also end up in the log. A minimal inode and backref |
@@ -4923,9 +5067,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4923 | struct dentry *old_parent = NULL; | 5067 | struct dentry *old_parent = NULL; |
4924 | int ret = 0; | 5068 | int ret = 0; |
4925 | u64 last_committed = root->fs_info->last_trans_committed; | 5069 | u64 last_committed = root->fs_info->last_trans_committed; |
4926 | const struct dentry * const first_parent = parent; | ||
4927 | const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > | ||
4928 | last_committed); | ||
4929 | bool log_dentries = false; | 5070 | bool log_dentries = false; |
4930 | struct inode *orig_inode = inode; | 5071 | struct inode *orig_inode = inode; |
4931 | 5072 | ||
@@ -4986,6 +5127,53 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4986 | if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries) | 5127 | if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries) |
4987 | log_dentries = true; | 5128 | log_dentries = true; |
4988 | 5129 | ||
5130 | /* | ||
5131 | * On unlink we must make sure all our current and old parent directores | ||
5132 | * inodes are fully logged. This is to prevent leaving dangling | ||
5133 | * directory index entries in directories that were our parents but are | ||
5134 | * not anymore. Not doing this results in old parent directory being | ||
5135 | * impossible to delete after log replay (rmdir will always fail with | ||
5136 | * error -ENOTEMPTY). | ||
5137 | * | ||
5138 | * Example 1: | ||
5139 | * | ||
5140 | * mkdir testdir | ||
5141 | * touch testdir/foo | ||
5142 | * ln testdir/foo testdir/bar | ||
5143 | * sync | ||
5144 | * unlink testdir/bar | ||
5145 | * xfs_io -c fsync testdir/foo | ||
5146 | * <power failure> | ||
5147 | * mount fs, triggers log replay | ||
5148 | * | ||
5149 | * If we don't log the parent directory (testdir), after log replay the | ||
5150 | * directory still has an entry pointing to the file inode using the bar | ||
5151 | * name, but a matching BTRFS_INODE_[REF|EXTREF]_KEY does not exist and | ||
5152 | * the file inode has a link count of 1. | ||
5153 | * | ||
5154 | * Example 2: | ||
5155 | * | ||
5156 | * mkdir testdir | ||
5157 | * touch foo | ||
5158 | * ln foo testdir/foo2 | ||
5159 | * ln foo testdir/foo3 | ||
5160 | * sync | ||
5161 | * unlink testdir/foo3 | ||
5162 | * xfs_io -c fsync foo | ||
5163 | * <power failure> | ||
5164 | * mount fs, triggers log replay | ||
5165 | * | ||
5166 | * Similar as the first example, after log replay the parent directory | ||
5167 | * testdir still has an entry pointing to the inode file with name foo3 | ||
5168 | * but the file inode does not have a matching BTRFS_INODE_REF_KEY item | ||
5169 | * and has a link count of 2. | ||
5170 | */ | ||
5171 | if (BTRFS_I(inode)->last_unlink_trans > last_committed) { | ||
5172 | ret = btrfs_log_all_parents(trans, orig_inode, ctx); | ||
5173 | if (ret) | ||
5174 | goto end_trans; | ||
5175 | } | ||
5176 | |||
4989 | while (1) { | 5177 | while (1) { |
4990 | if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb) | 5178 | if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb) |
4991 | break; | 5179 | break; |
@@ -4994,23 +5182,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4994 | if (root != BTRFS_I(inode)->root) | 5182 | if (root != BTRFS_I(inode)->root) |
4995 | break; | 5183 | break; |
4996 | 5184 | ||
4997 | /* | 5185 | if (BTRFS_I(inode)->generation > last_committed) { |
4998 | * On unlink we must make sure our immediate parent directory | 5186 | ret = btrfs_log_inode(trans, root, inode, |
4999 | * inode is fully logged. This is to prevent leaving dangling | 5187 | LOG_INODE_EXISTS, |
5000 | * directory index entries and a wrong directory inode's i_size. | ||
5001 | * Not doing so can result in a directory being impossible to | ||
5002 | * delete after log replay (rmdir will always fail with error | ||
5003 | * -ENOTEMPTY). | ||
5004 | */ | ||
5005 | if (did_unlink && parent == first_parent) | ||
5006 | inode_only = LOG_INODE_ALL; | ||
5007 | else | ||
5008 | inode_only = LOG_INODE_EXISTS; | ||
5009 | |||
5010 | if (BTRFS_I(inode)->generation > | ||
5011 | root->fs_info->last_trans_committed || | ||
5012 | inode_only == LOG_INODE_ALL) { | ||
5013 | ret = btrfs_log_inode(trans, root, inode, inode_only, | ||
5014 | 0, LLONG_MAX, ctx); | 5188 | 0, LLONG_MAX, ctx); |
5015 | if (ret) | 5189 | if (ret) |
5016 | goto end_trans; | 5190 | goto end_trans; |