diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/ctree.h | 14 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 1 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 30 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 73 |
4 files changed, 105 insertions, 13 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8490ee063709..a2c91a102b72 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -967,6 +967,12 @@ struct btrfs_fs_info { | |||
967 | struct srcu_struct subvol_srcu; | 967 | struct srcu_struct subvol_srcu; |
968 | 968 | ||
969 | spinlock_t trans_lock; | 969 | spinlock_t trans_lock; |
970 | /* | ||
971 | * the reloc mutex goes with the trans lock, it is taken | ||
972 | * during commit to protect us from the relocation code | ||
973 | */ | ||
974 | struct mutex reloc_mutex; | ||
975 | |||
970 | struct list_head trans_list; | 976 | struct list_head trans_list; |
971 | struct list_head hashers; | 977 | struct list_head hashers; |
972 | struct list_head dead_roots; | 978 | struct list_head dead_roots; |
@@ -1172,6 +1178,14 @@ struct btrfs_root { | |||
1172 | u32 type; | 1178 | u32 type; |
1173 | 1179 | ||
1174 | u64 highest_objectid; | 1180 | u64 highest_objectid; |
1181 | |||
1182 | /* btrfs_record_root_in_trans is a multi-step process, | ||
1183 | * and it can race with the balancing code. But the | ||
1184 | * race is very small, and only the first time the root | ||
1185 | * is added to each transaction. So in_trans_setup | ||
1186 | * is used to tell us when more checks are required | ||
1187 | */ | ||
1188 | unsigned long in_trans_setup; | ||
1175 | int ref_cows; | 1189 | int ref_cows; |
1176 | int track_dirty; | 1190 | int track_dirty; |
1177 | int in_radix; | 1191 | int in_radix; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 20c111b3fa0d..0b2b4b759136 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1620,6 +1620,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1620 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1620 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
1621 | spin_lock_init(&fs_info->delayed_iput_lock); | 1621 | spin_lock_init(&fs_info->delayed_iput_lock); |
1622 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1622 | spin_lock_init(&fs_info->defrag_inodes_lock); |
1623 | mutex_init(&fs_info->reloc_mutex); | ||
1623 | 1624 | ||
1624 | init_completion(&fs_info->kobj_unregister); | 1625 | init_completion(&fs_info->kobj_unregister); |
1625 | fs_info->tree_root = tree_root; | 1626 | fs_info->tree_root = tree_root; |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f25b10a22a0a..086b1e6b8614 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
1368 | int ret; | 1368 | int ret; |
1369 | 1369 | ||
1370 | if (!root->reloc_root) | 1370 | if (!root->reloc_root) |
1371 | return 0; | 1371 | goto out; |
1372 | 1372 | ||
1373 | reloc_root = root->reloc_root; | 1373 | reloc_root = root->reloc_root; |
1374 | root_item = &reloc_root->root_item; | 1374 | root_item = &reloc_root->root_item; |
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
1390 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | 1390 | ret = btrfs_update_root(trans, root->fs_info->tree_root, |
1391 | &reloc_root->root_key, root_item); | 1391 | &reloc_root->root_key, root_item); |
1392 | BUG_ON(ret); | 1392 | BUG_ON(ret); |
1393 | |||
1394 | out: | ||
1393 | return 0; | 1395 | return 0; |
1394 | } | 1396 | } |
1395 | 1397 | ||
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err) | |||
2142 | u64 num_bytes = 0; | 2144 | u64 num_bytes = 0; |
2143 | int ret; | 2145 | int ret; |
2144 | 2146 | ||
2145 | spin_lock(&root->fs_info->trans_lock); | 2147 | mutex_lock(&root->fs_info->reloc_mutex); |
2146 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | 2148 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
2147 | rc->merging_rsv_size += rc->nodes_relocated * 2; | 2149 | rc->merging_rsv_size += rc->nodes_relocated * 2; |
2148 | spin_unlock(&root->fs_info->trans_lock); | 2150 | mutex_unlock(&root->fs_info->reloc_mutex); |
2151 | |||
2149 | again: | 2152 | again: |
2150 | if (!err) { | 2153 | if (!err) { |
2151 | num_bytes = rc->merging_rsv_size; | 2154 | num_bytes = rc->merging_rsv_size; |
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc) | |||
2214 | int ret; | 2217 | int ret; |
2215 | again: | 2218 | again: |
2216 | root = rc->extent_root; | 2219 | root = rc->extent_root; |
2217 | spin_lock(&root->fs_info->trans_lock); | 2220 | |
2221 | /* | ||
2222 | * this serializes us with btrfs_record_root_in_transaction, | ||
2223 | * we have to make sure nobody is in the middle of | ||
2224 | * adding their roots to the list while we are | ||
2225 | * doing this splice | ||
2226 | */ | ||
2227 | mutex_lock(&root->fs_info->reloc_mutex); | ||
2218 | list_splice_init(&rc->reloc_roots, &reloc_roots); | 2228 | list_splice_init(&rc->reloc_roots, &reloc_roots); |
2219 | spin_unlock(&root->fs_info->trans_lock); | 2229 | mutex_unlock(&root->fs_info->reloc_mutex); |
2220 | 2230 | ||
2221 | while (!list_empty(&reloc_roots)) { | 2231 | while (!list_empty(&reloc_roots)) { |
2222 | found = 1; | 2232 | found = 1; |
@@ -3590,17 +3600,19 @@ next: | |||
3590 | static void set_reloc_control(struct reloc_control *rc) | 3600 | static void set_reloc_control(struct reloc_control *rc) |
3591 | { | 3601 | { |
3592 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3602 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
3593 | spin_lock(&fs_info->trans_lock); | 3603 | |
3604 | mutex_lock(&fs_info->reloc_mutex); | ||
3594 | fs_info->reloc_ctl = rc; | 3605 | fs_info->reloc_ctl = rc; |
3595 | spin_unlock(&fs_info->trans_lock); | 3606 | mutex_unlock(&fs_info->reloc_mutex); |
3596 | } | 3607 | } |
3597 | 3608 | ||
3598 | static void unset_reloc_control(struct reloc_control *rc) | 3609 | static void unset_reloc_control(struct reloc_control *rc) |
3599 | { | 3610 | { |
3600 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3611 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
3601 | spin_lock(&fs_info->trans_lock); | 3612 | |
3613 | mutex_lock(&fs_info->reloc_mutex); | ||
3602 | fs_info->reloc_ctl = NULL; | 3614 | fs_info->reloc_ctl = NULL; |
3603 | spin_unlock(&fs_info->trans_lock); | 3615 | mutex_unlock(&fs_info->reloc_mutex); |
3604 | } | 3616 | } |
3605 | 3617 | ||
3606 | static int check_extent_flags(u64 flags) | 3618 | static int check_extent_flags(u64 flags) |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b3590b9fe98..833996a0c628 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) | |||
126 | * to make sure the old root from before we joined the transaction is deleted | 126 | * to make sure the old root from before we joined the transaction is deleted |
127 | * when the transaction commits | 127 | * when the transaction commits |
128 | */ | 128 | */ |
129 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 129 | static int record_root_in_trans(struct btrfs_trans_handle *trans, |
130 | struct btrfs_root *root) | 130 | struct btrfs_root *root) |
131 | { | 131 | { |
132 | if (root->ref_cows && root->last_trans < trans->transid) { | 132 | if (root->ref_cows && root->last_trans < trans->transid) { |
133 | WARN_ON(root == root->fs_info->extent_root); | 133 | WARN_ON(root == root->fs_info->extent_root); |
134 | WARN_ON(root->commit_root != root->node); | 134 | WARN_ON(root->commit_root != root->node); |
135 | 135 | ||
136 | /* | ||
137 | * see below for in_trans_setup usage rules | ||
138 | * we have the reloc mutex held now, so there | ||
139 | * is only one writer in this function | ||
140 | */ | ||
141 | root->in_trans_setup = 1; | ||
142 | |||
143 | /* make sure readers find in_trans_setup before | ||
144 | * they find our root->last_trans update | ||
145 | */ | ||
146 | smp_wmb(); | ||
147 | |||
136 | spin_lock(&root->fs_info->fs_roots_radix_lock); | 148 | spin_lock(&root->fs_info->fs_roots_radix_lock); |
137 | if (root->last_trans == trans->transid) { | 149 | if (root->last_trans == trans->transid) { |
138 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | 150 | spin_unlock(&root->fs_info->fs_roots_radix_lock); |
139 | return 0; | 151 | return 0; |
140 | } | 152 | } |
141 | root->last_trans = trans->transid; | ||
142 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 153 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
143 | (unsigned long)root->root_key.objectid, | 154 | (unsigned long)root->root_key.objectid, |
144 | BTRFS_ROOT_TRANS_TAG); | 155 | BTRFS_ROOT_TRANS_TAG); |
145 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | 156 | spin_unlock(&root->fs_info->fs_roots_radix_lock); |
157 | root->last_trans = trans->transid; | ||
158 | |||
159 | /* this is pretty tricky. We don't want to | ||
160 | * take the relocation lock in btrfs_record_root_in_trans | ||
161 | * unless we're really doing the first setup for this root in | ||
162 | * this transaction. | ||
163 | * | ||
164 | * Normally we'd use root->last_trans as a flag to decide | ||
165 | * if we want to take the expensive mutex. | ||
166 | * | ||
167 | * But, we have to set root->last_trans before we | ||
168 | * init the relocation root, otherwise, we trip over warnings | ||
169 | * in ctree.c. The solution used here is to flag ourselves | ||
170 | * with root->in_trans_setup. When this is 1, we're still | ||
171 | * fixing up the reloc trees and everyone must wait. | ||
172 | * | ||
173 | * When this is zero, they can trust root->last_trans and fly | ||
174 | * through btrfs_record_root_in_trans without having to take the | ||
175 | * lock. smp_wmb() makes sure that all the writes above are | ||
176 | * done before we pop in the zero below | ||
177 | */ | ||
146 | btrfs_init_reloc_root(trans, root); | 178 | btrfs_init_reloc_root(trans, root); |
179 | smp_wmb(); | ||
180 | root->in_trans_setup = 0; | ||
147 | } | 181 | } |
148 | return 0; | 182 | return 0; |
149 | } | 183 | } |
150 | 184 | ||
185 | |||
186 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | ||
187 | struct btrfs_root *root) | ||
188 | { | ||
189 | if (!root->ref_cows) | ||
190 | return 0; | ||
191 | |||
192 | /* | ||
193 | * see record_root_in_trans for comments about in_trans_setup usage | ||
194 | * and barriers | ||
195 | */ | ||
196 | smp_rmb(); | ||
197 | if (root->last_trans == trans->transid && | ||
198 | !root->in_trans_setup) | ||
199 | return 0; | ||
200 | |||
201 | mutex_lock(&root->fs_info->reloc_mutex); | ||
202 | record_root_in_trans(trans, root); | ||
203 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
204 | |||
205 | return 0; | ||
206 | } | ||
207 | |||
151 | /* wait for commit against the current transaction to become unblocked | 208 | /* wait for commit against the current transaction to become unblocked |
152 | * when this is done, it is safe to start a new transaction, but the current | 209 | * when this is done, it is safe to start a new transaction, but the current |
153 | * transaction might not be fully on disk. | 210 | * transaction might not be fully on disk. |
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
882 | parent = dget_parent(dentry); | 939 | parent = dget_parent(dentry); |
883 | parent_inode = parent->d_inode; | 940 | parent_inode = parent->d_inode; |
884 | parent_root = BTRFS_I(parent_inode)->root; | 941 | parent_root = BTRFS_I(parent_inode)->root; |
885 | btrfs_record_root_in_trans(trans, parent_root); | 942 | record_root_in_trans(trans, parent_root); |
886 | 943 | ||
887 | /* | 944 | /* |
888 | * insert the directory item | 945 | * insert the directory item |
@@ -900,7 +957,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
900 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 957 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
901 | BUG_ON(ret); | 958 | BUG_ON(ret); |
902 | 959 | ||
903 | btrfs_record_root_in_trans(trans, root); | 960 | record_root_in_trans(trans, root); |
904 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 961 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
905 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 962 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
906 | btrfs_check_and_init_root_item(new_root_item); | 963 | btrfs_check_and_init_root_item(new_root_item); |
@@ -1247,6 +1304,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1247 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1304 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
1248 | (should_grow && cur_trans->num_joined != joined)); | 1305 | (should_grow && cur_trans->num_joined != joined)); |
1249 | 1306 | ||
1307 | /* | ||
1308 | * the reloc mutex makes sure that we stop | ||
1309 | * the balancing code from coming in and moving | ||
1310 | * extents around in the middle of the commit | ||
1311 | */ | ||
1312 | mutex_lock(&root->fs_info->reloc_mutex); | ||
1313 | |||
1250 | ret = create_pending_snapshots(trans, root->fs_info); | 1314 | ret = create_pending_snapshots(trans, root->fs_info); |
1251 | BUG_ON(ret); | 1315 | BUG_ON(ret); |
1252 | 1316 | ||
@@ -1312,6 +1376,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1312 | root->fs_info->running_transaction = NULL; | 1376 | root->fs_info->running_transaction = NULL; |
1313 | root->fs_info->trans_no_join = 0; | 1377 | root->fs_info->trans_no_join = 0; |
1314 | spin_unlock(&root->fs_info->trans_lock); | 1378 | spin_unlock(&root->fs_info->trans_lock); |
1379 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
1315 | 1380 | ||
1316 | wake_up(&root->fs_info->transaction_wait); | 1381 | wake_up(&root->fs_info->transaction_wait); |
1317 | 1382 | ||