diff options
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 121 |
1 files changed, 102 insertions, 19 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dd719662340e..51dcec86757f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) | |||
126 | * to make sure the old root from before we joined the transaction is deleted | 126 | * to make sure the old root from before we joined the transaction is deleted |
127 | * when the transaction commits | 127 | * when the transaction commits |
128 | */ | 128 | */ |
129 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 129 | static int record_root_in_trans(struct btrfs_trans_handle *trans, |
130 | struct btrfs_root *root) | 130 | struct btrfs_root *root) |
131 | { | 131 | { |
132 | if (root->ref_cows && root->last_trans < trans->transid) { | 132 | if (root->ref_cows && root->last_trans < trans->transid) { |
133 | WARN_ON(root == root->fs_info->extent_root); | 133 | WARN_ON(root == root->fs_info->extent_root); |
134 | WARN_ON(root->commit_root != root->node); | 134 | WARN_ON(root->commit_root != root->node); |
135 | 135 | ||
136 | /* | ||
137 | * see below for in_trans_setup usage rules | ||
138 | * we have the reloc mutex held now, so there | ||
139 | * is only one writer in this function | ||
140 | */ | ||
141 | root->in_trans_setup = 1; | ||
142 | |||
143 | /* make sure readers find in_trans_setup before | ||
144 | * they find our root->last_trans update | ||
145 | */ | ||
146 | smp_wmb(); | ||
147 | |||
136 | spin_lock(&root->fs_info->fs_roots_radix_lock); | 148 | spin_lock(&root->fs_info->fs_roots_radix_lock); |
137 | if (root->last_trans == trans->transid) { | 149 | if (root->last_trans == trans->transid) { |
138 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | 150 | spin_unlock(&root->fs_info->fs_roots_radix_lock); |
139 | return 0; | 151 | return 0; |
140 | } | 152 | } |
141 | root->last_trans = trans->transid; | ||
142 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 153 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
143 | (unsigned long)root->root_key.objectid, | 154 | (unsigned long)root->root_key.objectid, |
144 | BTRFS_ROOT_TRANS_TAG); | 155 | BTRFS_ROOT_TRANS_TAG); |
145 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | 156 | spin_unlock(&root->fs_info->fs_roots_radix_lock); |
157 | root->last_trans = trans->transid; | ||
158 | |||
159 | /* this is pretty tricky. We don't want to | ||
160 | * take the relocation lock in btrfs_record_root_in_trans | ||
161 | * unless we're really doing the first setup for this root in | ||
162 | * this transaction. | ||
163 | * | ||
164 | * Normally we'd use root->last_trans as a flag to decide | ||
165 | * if we want to take the expensive mutex. | ||
166 | * | ||
167 | * But, we have to set root->last_trans before we | ||
168 | * init the relocation root, otherwise, we trip over warnings | ||
169 | * in ctree.c. The solution used here is to flag ourselves | ||
170 | * with root->in_trans_setup. When this is 1, we're still | ||
171 | * fixing up the reloc trees and everyone must wait. | ||
172 | * | ||
173 | * When this is zero, they can trust root->last_trans and fly | ||
174 | * through btrfs_record_root_in_trans without having to take the | ||
175 | * lock. smp_wmb() makes sure that all the writes above are | ||
176 | * done before we pop in the zero below | ||
177 | */ | ||
146 | btrfs_init_reloc_root(trans, root); | 178 | btrfs_init_reloc_root(trans, root); |
179 | smp_wmb(); | ||
180 | root->in_trans_setup = 0; | ||
147 | } | 181 | } |
148 | return 0; | 182 | return 0; |
149 | } | 183 | } |
150 | 184 | ||
185 | |||
186 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | ||
187 | struct btrfs_root *root) | ||
188 | { | ||
189 | if (!root->ref_cows) | ||
190 | return 0; | ||
191 | |||
192 | /* | ||
193 | * see record_root_in_trans for comments about in_trans_setup usage | ||
194 | * and barriers | ||
195 | */ | ||
196 | smp_rmb(); | ||
197 | if (root->last_trans == trans->transid && | ||
198 | !root->in_trans_setup) | ||
199 | return 0; | ||
200 | |||
201 | mutex_lock(&root->fs_info->reloc_mutex); | ||
202 | record_root_in_trans(trans, root); | ||
203 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
204 | |||
205 | return 0; | ||
206 | } | ||
207 | |||
151 | /* wait for commit against the current transaction to become unblocked | 208 | /* wait for commit against the current transaction to become unblocked |
152 | * when this is done, it is safe to start a new transaction, but the current | 209 | * when this is done, it is safe to start a new transaction, but the current |
153 | * transaction might not be fully on disk. | 210 | * transaction might not be fully on disk. |
@@ -349,7 +406,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
349 | list) { | 406 | list) { |
350 | if (t->in_commit) { | 407 | if (t->in_commit) { |
351 | if (t->commit_done) | 408 | if (t->commit_done) |
352 | goto out; | 409 | break; |
353 | cur_trans = t; | 410 | cur_trans = t; |
354 | atomic_inc(&cur_trans->use_count); | 411 | atomic_inc(&cur_trans->use_count); |
355 | break; | 412 | break; |
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
882 | parent = dget_parent(dentry); | 939 | parent = dget_parent(dentry); |
883 | parent_inode = parent->d_inode; | 940 | parent_inode = parent->d_inode; |
884 | parent_root = BTRFS_I(parent_inode)->root; | 941 | parent_root = BTRFS_I(parent_inode)->root; |
885 | btrfs_record_root_in_trans(trans, parent_root); | 942 | record_root_in_trans(trans, parent_root); |
886 | 943 | ||
887 | /* | 944 | /* |
888 | * insert the directory item | 945 | * insert the directory item |
@@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
900 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 957 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
901 | BUG_ON(ret); | 958 | BUG_ON(ret); |
902 | 959 | ||
903 | btrfs_record_root_in_trans(trans, root); | 960 | /* |
961 | * pull in the delayed directory update | ||
962 | * and the delayed inode item | ||
963 | * otherwise we corrupt the FS during | ||
964 | * snapshot | ||
965 | */ | ||
966 | ret = btrfs_run_delayed_items(trans, root); | ||
967 | BUG_ON(ret); | ||
968 | |||
969 | record_root_in_trans(trans, root); | ||
904 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 970 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
905 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 971 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
906 | btrfs_check_and_init_root_item(new_root_item); | 972 | btrfs_check_and_init_root_item(new_root_item); |
@@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, | |||
961 | int ret; | 1027 | int ret; |
962 | 1028 | ||
963 | list_for_each_entry(pending, head, list) { | 1029 | list_for_each_entry(pending, head, list) { |
964 | /* | ||
965 | * We must deal with the delayed items before creating | ||
966 | * snapshots, or we will create a snapthot with inconsistent | ||
967 | * information. | ||
968 | */ | ||
969 | ret = btrfs_run_delayed_items(trans, fs_info->fs_root); | ||
970 | BUG_ON(ret); | ||
971 | |||
972 | ret = create_pending_snapshot(trans, fs_info, pending); | 1030 | ret = create_pending_snapshot(trans, fs_info, pending); |
973 | BUG_ON(ret); | 1031 | BUG_ON(ret); |
974 | } | 1032 | } |
@@ -1118,8 +1176,11 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1118 | wait_current_trans_commit_start_and_unblock(root, cur_trans); | 1176 | wait_current_trans_commit_start_and_unblock(root, cur_trans); |
1119 | else | 1177 | else |
1120 | wait_current_trans_commit_start(root, cur_trans); | 1178 | wait_current_trans_commit_start(root, cur_trans); |
1121 | put_transaction(cur_trans); | ||
1122 | 1179 | ||
1180 | if (current->journal_info == trans) | ||
1181 | current->journal_info = NULL; | ||
1182 | |||
1183 | put_transaction(cur_trans); | ||
1123 | return 0; | 1184 | return 0; |
1124 | } | 1185 | } |
1125 | 1186 | ||
@@ -1238,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1238 | schedule_timeout(1); | 1299 | schedule_timeout(1); |
1239 | 1300 | ||
1240 | finish_wait(&cur_trans->writer_wait, &wait); | 1301 | finish_wait(&cur_trans->writer_wait, &wait); |
1241 | spin_lock(&root->fs_info->trans_lock); | ||
1242 | root->fs_info->trans_no_join = 1; | ||
1243 | spin_unlock(&root->fs_info->trans_lock); | ||
1244 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1302 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
1245 | (should_grow && cur_trans->num_joined != joined)); | 1303 | (should_grow && cur_trans->num_joined != joined)); |
1246 | 1304 | ||
1247 | ret = create_pending_snapshots(trans, root->fs_info); | 1305 | /* |
1248 | BUG_ON(ret); | 1306 | * Ok now we need to make sure to block out any other joins while we |
1307 | * commit the transaction. We could have started a join before setting | ||
1308 | * no_join so make sure to wait for num_writers to == 1 again. | ||
1309 | */ | ||
1310 | spin_lock(&root->fs_info->trans_lock); | ||
1311 | root->fs_info->trans_no_join = 1; | ||
1312 | spin_unlock(&root->fs_info->trans_lock); | ||
1313 | wait_event(cur_trans->writer_wait, | ||
1314 | atomic_read(&cur_trans->num_writers) == 1); | ||
1315 | |||
1316 | /* | ||
1317 | * the reloc mutex makes sure that we stop | ||
1318 | * the balancing code from coming in and moving | ||
1319 | * extents around in the middle of the commit | ||
1320 | */ | ||
1321 | mutex_lock(&root->fs_info->reloc_mutex); | ||
1249 | 1322 | ||
1250 | ret = btrfs_run_delayed_items(trans, root); | 1323 | ret = btrfs_run_delayed_items(trans, root); |
1251 | BUG_ON(ret); | 1324 | BUG_ON(ret); |
1252 | 1325 | ||
1326 | ret = create_pending_snapshots(trans, root->fs_info); | ||
1327 | BUG_ON(ret); | ||
1328 | |||
1253 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1329 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
1254 | BUG_ON(ret); | 1330 | BUG_ON(ret); |
1255 | 1331 | ||
1332 | /* | ||
1333 | * make sure none of the code above managed to slip in a | ||
1334 | * delayed item | ||
1335 | */ | ||
1336 | btrfs_assert_delayed_root_empty(root); | ||
1337 | |||
1256 | WARN_ON(cur_trans != trans->transaction); | 1338 | WARN_ON(cur_trans != trans->transaction); |
1257 | 1339 | ||
1258 | btrfs_scrub_pause(root); | 1340 | btrfs_scrub_pause(root); |
@@ -1309,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1309 | root->fs_info->running_transaction = NULL; | 1391 | root->fs_info->running_transaction = NULL; |
1310 | root->fs_info->trans_no_join = 0; | 1392 | root->fs_info->trans_no_join = 0; |
1311 | spin_unlock(&root->fs_info->trans_lock); | 1393 | spin_unlock(&root->fs_info->trans_lock); |
1394 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
1312 | 1395 | ||
1313 | wake_up(&root->fs_info->transaction_wait); | 1396 | wake_up(&root->fs_info->transaction_wait); |
1314 | 1397 | ||