aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c350
1 files changed, 166 insertions, 184 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d81cda2e077c..4f26f3ed0c87 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -78,104 +78,6 @@ static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
78 */ 78 */
79 79
80/* 80/*
81 * btrfs_add_log_tree adds a new per-subvolume log tree into the
82 * tree of log tree roots. This must be called with a tree log transaction
83 * running (see start_log_trans).
84 */
85static int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
86 struct btrfs_root *root)
87{
88 struct btrfs_key key;
89 struct btrfs_root_item root_item;
90 struct btrfs_inode_item *inode_item;
91 struct extent_buffer *leaf;
92 struct btrfs_root *new_root = root;
93 int ret;
94 u64 objectid = root->root_key.objectid;
95
96 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
97 BTRFS_TREE_LOG_OBJECTID,
98 trans->transid, 0, 0, 0);
99 if (IS_ERR(leaf)) {
100 ret = PTR_ERR(leaf);
101 return ret;
102 }
103
104 btrfs_set_header_nritems(leaf, 0);
105 btrfs_set_header_level(leaf, 0);
106 btrfs_set_header_bytenr(leaf, leaf->start);
107 btrfs_set_header_generation(leaf, trans->transid);
108 btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
109
110 write_extent_buffer(leaf, root->fs_info->fsid,
111 (unsigned long)btrfs_header_fsid(leaf),
112 BTRFS_FSID_SIZE);
113 btrfs_mark_buffer_dirty(leaf);
114
115 inode_item = &root_item.inode;
116 memset(inode_item, 0, sizeof(*inode_item));
117 inode_item->generation = cpu_to_le64(1);
118 inode_item->size = cpu_to_le64(3);
119 inode_item->nlink = cpu_to_le32(1);
120 inode_item->nbytes = cpu_to_le64(root->leafsize);
121 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
122
123 btrfs_set_root_bytenr(&root_item, leaf->start);
124 btrfs_set_root_generation(&root_item, trans->transid);
125 btrfs_set_root_level(&root_item, 0);
126 btrfs_set_root_refs(&root_item, 0);
127 btrfs_set_root_used(&root_item, 0);
128
129 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
130 root_item.drop_level = 0;
131
132 btrfs_tree_unlock(leaf);
133 free_extent_buffer(leaf);
134 leaf = NULL;
135
136 btrfs_set_root_dirid(&root_item, 0);
137
138 key.objectid = BTRFS_TREE_LOG_OBJECTID;
139 key.offset = objectid;
140 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
141 ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key,
142 &root_item);
143 if (ret)
144 goto fail;
145
146 new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree,
147 &key);
148 BUG_ON(!new_root);
149
150 WARN_ON(root->log_root);
151 root->log_root = new_root;
152
153 /*
154 * log trees do not get reference counted because they go away
155 * before a real commit is actually done. They do store pointers
156 * to file data extents, and those reference counts still get
157 * updated (along with back refs to the log tree).
158 */
159 new_root->ref_cows = 0;
160 new_root->last_trans = trans->transid;
161
162 /*
163 * we need to make sure the root block for this new tree
164 * is marked as dirty in the dirty_log_pages tree. This
165 * is how it gets flushed down to disk at tree log commit time.
166 *
167 * the tree logging mutex keeps others from coming in and changing
168 * the new_root->node, so we can safely access it here
169 */
170 set_extent_dirty(&new_root->dirty_log_pages, new_root->node->start,
171 new_root->node->start + new_root->node->len - 1,
172 GFP_NOFS);
173
174fail:
175 return ret;
176}
177
178/*
179 * start a sub transaction and setup the log tree 81 * start a sub transaction and setup the log tree
180 * this increments the log tree writer count to make the people 82 * this increments the log tree writer count to make the people
181 * syncing the tree wait for us to finish 83 * syncing the tree wait for us to finish
@@ -184,6 +86,14 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
184 struct btrfs_root *root) 86 struct btrfs_root *root)
185{ 87{
186 int ret; 88 int ret;
89
90 mutex_lock(&root->log_mutex);
91 if (root->log_root) {
92 root->log_batch++;
93 atomic_inc(&root->log_writers);
94 mutex_unlock(&root->log_mutex);
95 return 0;
96 }
187 mutex_lock(&root->fs_info->tree_log_mutex); 97 mutex_lock(&root->fs_info->tree_log_mutex);
188 if (!root->fs_info->log_root_tree) { 98 if (!root->fs_info->log_root_tree) {
189 ret = btrfs_init_log_root_tree(trans, root->fs_info); 99 ret = btrfs_init_log_root_tree(trans, root->fs_info);
@@ -193,9 +103,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
193 ret = btrfs_add_log_tree(trans, root); 103 ret = btrfs_add_log_tree(trans, root);
194 BUG_ON(ret); 104 BUG_ON(ret);
195 } 105 }
196 atomic_inc(&root->fs_info->tree_log_writers);
197 root->fs_info->tree_log_batch++;
198 mutex_unlock(&root->fs_info->tree_log_mutex); 106 mutex_unlock(&root->fs_info->tree_log_mutex);
107 root->log_batch++;
108 atomic_inc(&root->log_writers);
109 mutex_unlock(&root->log_mutex);
199 return 0; 110 return 0;
200} 111}
201 112
@@ -212,13 +123,12 @@ static int join_running_log_trans(struct btrfs_root *root)
212 if (!root->log_root) 123 if (!root->log_root)
213 return -ENOENT; 124 return -ENOENT;
214 125
215 mutex_lock(&root->fs_info->tree_log_mutex); 126 mutex_lock(&root->log_mutex);
216 if (root->log_root) { 127 if (root->log_root) {
217 ret = 0; 128 ret = 0;
218 atomic_inc(&root->fs_info->tree_log_writers); 129 atomic_inc(&root->log_writers);
219 root->fs_info->tree_log_batch++;
220 } 130 }
221 mutex_unlock(&root->fs_info->tree_log_mutex); 131 mutex_unlock(&root->log_mutex);
222 return ret; 132 return ret;
223} 133}
224 134
@@ -228,10 +138,11 @@ static int join_running_log_trans(struct btrfs_root *root)
228 */ 138 */
229static int end_log_trans(struct btrfs_root *root) 139static int end_log_trans(struct btrfs_root *root)
230{ 140{
231 atomic_dec(&root->fs_info->tree_log_writers); 141 if (atomic_dec_and_test(&root->log_writers)) {
232 smp_mb(); 142 smp_mb();
233 if (waitqueue_active(&root->fs_info->tree_log_wait)) 143 if (waitqueue_active(&root->log_writer_wait))
234 wake_up(&root->fs_info->tree_log_wait); 144 wake_up(&root->log_writer_wait);
145 }
235 return 0; 146 return 0;
236} 147}
237 148
@@ -1902,26 +1813,65 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1902 } 1813 }
1903 } 1814 }
1904 btrfs_free_path(path); 1815 btrfs_free_path(path);
1905 if (wc->free)
1906 free_extent_buffer(log->node);
1907 return ret; 1816 return ret;
1908} 1817}
1909 1818
1910static int wait_log_commit(struct btrfs_root *log) 1819/*
1820 * helper function to update the item for a given subvolumes log root
1821 * in the tree of log roots
1822 */
1823static int update_log_root(struct btrfs_trans_handle *trans,
1824 struct btrfs_root *log)
1825{
1826 int ret;
1827
1828 if (log->log_transid == 1) {
1829 /* insert root item on the first sync */
1830 ret = btrfs_insert_root(trans, log->fs_info->log_root_tree,
1831 &log->root_key, &log->root_item);
1832 } else {
1833 ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
1834 &log->root_key, &log->root_item);
1835 }
1836 return ret;
1837}
1838
1839static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
1911{ 1840{
1912 DEFINE_WAIT(wait); 1841 DEFINE_WAIT(wait);
1913 u64 transid = log->fs_info->tree_log_transid; 1842 int index = transid % 2;
1914 1843
1844 /*
1845 * we only allow two pending log transactions at a time,
1846 * so we know that if ours is more than 2 older than the
1847 * current transaction, we're done
1848 */
1915 do { 1849 do {
1916 prepare_to_wait(&log->fs_info->tree_log_wait, &wait, 1850 prepare_to_wait(&root->log_commit_wait[index],
1917 TASK_UNINTERRUPTIBLE); 1851 &wait, TASK_UNINTERRUPTIBLE);
1918 mutex_unlock(&log->fs_info->tree_log_mutex); 1852 mutex_unlock(&root->log_mutex);
1919 if (atomic_read(&log->fs_info->tree_log_commit)) 1853 if (root->log_transid < transid + 2 &&
1854 atomic_read(&root->log_commit[index]))
1920 schedule(); 1855 schedule();
1921 finish_wait(&log->fs_info->tree_log_wait, &wait); 1856 finish_wait(&root->log_commit_wait[index], &wait);
1922 mutex_lock(&log->fs_info->tree_log_mutex); 1857 mutex_lock(&root->log_mutex);
1923 } while (transid == log->fs_info->tree_log_transid && 1858 } while (root->log_transid < transid + 2 &&
1924 atomic_read(&log->fs_info->tree_log_commit)); 1859 atomic_read(&root->log_commit[index]));
1860 return 0;
1861}
1862
1863static int wait_for_writer(struct btrfs_root *root)
1864{
1865 DEFINE_WAIT(wait);
1866 while (atomic_read(&root->log_writers)) {
1867 prepare_to_wait(&root->log_writer_wait,
1868 &wait, TASK_UNINTERRUPTIBLE);
1869 mutex_unlock(&root->log_mutex);
1870 if (atomic_read(&root->log_writers))
1871 schedule();
1872 mutex_lock(&root->log_mutex);
1873 finish_wait(&root->log_writer_wait, &wait);
1874 }
1925 return 0; 1875 return 0;
1926} 1876}
1927 1877
@@ -1933,57 +1883,114 @@ static int wait_log_commit(struct btrfs_root *log)
1933int btrfs_sync_log(struct btrfs_trans_handle *trans, 1883int btrfs_sync_log(struct btrfs_trans_handle *trans,
1934 struct btrfs_root *root) 1884 struct btrfs_root *root)
1935{ 1885{
1886 int index1;
1887 int index2;
1936 int ret; 1888 int ret;
1937 unsigned long batch;
1938 struct btrfs_root *log = root->log_root; 1889 struct btrfs_root *log = root->log_root;
1890 struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
1939 1891
1940 mutex_lock(&log->fs_info->tree_log_mutex); 1892 mutex_lock(&root->log_mutex);
1941 if (atomic_read(&log->fs_info->tree_log_commit)) { 1893 index1 = root->log_transid % 2;
1942 wait_log_commit(log); 1894 if (atomic_read(&root->log_commit[index1])) {
1943 goto out; 1895 wait_log_commit(root, root->log_transid);
1896 mutex_unlock(&root->log_mutex);
1897 return 0;
1944 } 1898 }
1945 atomic_set(&log->fs_info->tree_log_commit, 1); 1899 atomic_set(&root->log_commit[index1], 1);
1900
1901 /* wait for previous tree log sync to complete */
1902 if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
1903 wait_log_commit(root, root->log_transid - 1);
1946 1904
1947 while (1) { 1905 while (1) {
1948 batch = log->fs_info->tree_log_batch; 1906 unsigned long batch = root->log_batch;
1949 mutex_unlock(&log->fs_info->tree_log_mutex); 1907 mutex_unlock(&root->log_mutex);
1950 schedule_timeout_uninterruptible(1); 1908 schedule_timeout_uninterruptible(1);
1951 mutex_lock(&log->fs_info->tree_log_mutex); 1909 mutex_lock(&root->log_mutex);
1952 1910 wait_for_writer(root);
1953 while (atomic_read(&log->fs_info->tree_log_writers)) { 1911 if (batch == root->log_batch)
1954 DEFINE_WAIT(wait);
1955 prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
1956 TASK_UNINTERRUPTIBLE);
1957 mutex_unlock(&log->fs_info->tree_log_mutex);
1958 if (atomic_read(&log->fs_info->tree_log_writers))
1959 schedule();
1960 mutex_lock(&log->fs_info->tree_log_mutex);
1961 finish_wait(&log->fs_info->tree_log_wait, &wait);
1962 }
1963 if (batch == log->fs_info->tree_log_batch)
1964 break; 1912 break;
1965 } 1913 }
1966 1914
1967 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); 1915 ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
1968 BUG_ON(ret); 1916 BUG_ON(ret);
1969 ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, 1917
1970 &root->fs_info->log_root_tree->dirty_log_pages); 1918 btrfs_set_root_bytenr(&log->root_item, log->node->start);
1919 btrfs_set_root_generation(&log->root_item, trans->transid);
1920 btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
1921
1922 root->log_batch = 0;
1923 root->log_transid++;
1924 log->log_transid = root->log_transid;
1925 smp_mb();
1926 /*
1927 * log tree has been flushed to disk, new modifications of
1928 * the log will be written to new positions. so it's safe to
1929 * allow log writers to go in.
1930 */
1931 mutex_unlock(&root->log_mutex);
1932
1933 mutex_lock(&log_root_tree->log_mutex);
1934 log_root_tree->log_batch++;
1935 atomic_inc(&log_root_tree->log_writers);
1936 mutex_unlock(&log_root_tree->log_mutex);
1937
1938 ret = update_log_root(trans, log);
1939 BUG_ON(ret);
1940
1941 mutex_lock(&log_root_tree->log_mutex);
1942 if (atomic_dec_and_test(&log_root_tree->log_writers)) {
1943 smp_mb();
1944 if (waitqueue_active(&log_root_tree->log_writer_wait))
1945 wake_up(&log_root_tree->log_writer_wait);
1946 }
1947
1948 index2 = log_root_tree->log_transid % 2;
1949 if (atomic_read(&log_root_tree->log_commit[index2])) {
1950 wait_log_commit(log_root_tree, log_root_tree->log_transid);
1951 mutex_unlock(&log_root_tree->log_mutex);
1952 goto out;
1953 }
1954 atomic_set(&log_root_tree->log_commit[index2], 1);
1955
1956 if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2]))
1957 wait_log_commit(log_root_tree, log_root_tree->log_transid - 1);
1958
1959 wait_for_writer(log_root_tree);
1960
1961 ret = btrfs_write_and_wait_marked_extents(log_root_tree,
1962 &log_root_tree->dirty_log_pages);
1971 BUG_ON(ret); 1963 BUG_ON(ret);
1972 1964
1973 btrfs_set_super_log_root(&root->fs_info->super_for_commit, 1965 btrfs_set_super_log_root(&root->fs_info->super_for_commit,
1974 log->fs_info->log_root_tree->node->start); 1966 log_root_tree->node->start);
1975 btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, 1967 btrfs_set_super_log_root_level(&root->fs_info->super_for_commit,
1976 btrfs_header_level(log->fs_info->log_root_tree->node)); 1968 btrfs_header_level(log_root_tree->node));
1969
1970 log_root_tree->log_batch = 0;
1971 log_root_tree->log_transid++;
1972 smp_mb();
1973
1974 mutex_unlock(&log_root_tree->log_mutex);
1975
1976 /*
1977 * nobody else is going to jump in and write the the ctree
1978 * super here because the log_commit atomic below is protecting
1979 * us. We must be called with a transaction handle pinning
1980 * the running transaction open, so a full commit can't hop
1981 * in and cause problems either.
1982 */
1983 write_ctree_super(trans, root->fs_info->tree_root, 2);
1977 1984
1978 write_ctree_super(trans, log->fs_info->tree_root, 2); 1985 atomic_set(&log_root_tree->log_commit[index2], 0);
1979 log->fs_info->tree_log_transid++;
1980 log->fs_info->tree_log_batch = 0;
1981 atomic_set(&log->fs_info->tree_log_commit, 0);
1982 smp_mb(); 1986 smp_mb();
1983 if (waitqueue_active(&log->fs_info->tree_log_wait)) 1987 if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
1984 wake_up(&log->fs_info->tree_log_wait); 1988 wake_up(&log_root_tree->log_commit_wait[index2]);
1985out: 1989out:
1986 mutex_unlock(&log->fs_info->tree_log_mutex); 1990 atomic_set(&root->log_commit[index1], 0);
1991 smp_mb();
1992 if (waitqueue_active(&root->log_commit_wait[index1]))
1993 wake_up(&root->log_commit_wait[index1]);
1987 return 0; 1994 return 0;
1988} 1995}
1989 1996
@@ -2019,38 +2026,18 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
2019 start, end, GFP_NOFS); 2026 start, end, GFP_NOFS);
2020 } 2027 }
2021 2028
2022 log = root->log_root; 2029 if (log->log_transid > 0) {
2023 ret = btrfs_del_root(trans, root->fs_info->log_root_tree, 2030 ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
2024 &log->root_key); 2031 &log->root_key);
2025 BUG_ON(ret); 2032 BUG_ON(ret);
2033 }
2026 root->log_root = NULL; 2034 root->log_root = NULL;
2027 kfree(root->log_root); 2035 free_extent_buffer(log->node);
2036 kfree(log);
2028 return 0; 2037 return 0;
2029} 2038}
2030 2039
2031/* 2040/*
2032 * helper function to update the item for a given subvolumes log root
2033 * in the tree of log roots
2034 */
2035static int update_log_root(struct btrfs_trans_handle *trans,
2036 struct btrfs_root *log)
2037{
2038 u64 bytenr = btrfs_root_bytenr(&log->root_item);
2039 int ret;
2040
2041 if (log->node->start == bytenr)
2042 return 0;
2043
2044 btrfs_set_root_bytenr(&log->root_item, log->node->start);
2045 btrfs_set_root_generation(&log->root_item, trans->transid);
2046 btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
2047 ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
2048 &log->root_key, &log->root_item);
2049 BUG_ON(ret);
2050 return ret;
2051}
2052
2053/*
2054 * If both a file and directory are logged, and unlinks or renames are 2041 * If both a file and directory are logged, and unlinks or renames are
2055 * mixed in, we have a few interesting corners: 2042 * mixed in, we have a few interesting corners:
2056 * 2043 *
@@ -2711,11 +2698,6 @@ next_slot:
2711 2698
2712 btrfs_free_path(path); 2699 btrfs_free_path(path);
2713 btrfs_free_path(dst_path); 2700 btrfs_free_path(dst_path);
2714
2715 mutex_lock(&root->fs_info->tree_log_mutex);
2716 ret = update_log_root(trans, log);
2717 BUG_ON(ret);
2718 mutex_unlock(&root->fs_info->tree_log_mutex);
2719out: 2701out:
2720 return 0; 2702 return 0;
2721} 2703}