diff options
| -rw-r--r-- | fs/btrfs/ctree.h | 14 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 1 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 30 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 73 |
4 files changed, 105 insertions, 13 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8490ee063709..a2c91a102b72 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -967,6 +967,12 @@ struct btrfs_fs_info { | |||
| 967 | struct srcu_struct subvol_srcu; | 967 | struct srcu_struct subvol_srcu; |
| 968 | 968 | ||
| 969 | spinlock_t trans_lock; | 969 | spinlock_t trans_lock; |
| 970 | /* | ||
| 971 | * the reloc mutex goes with the trans lock, it is taken | ||
| 972 | * during commit to protect us from the relocation code | ||
| 973 | */ | ||
| 974 | struct mutex reloc_mutex; | ||
| 975 | |||
| 970 | struct list_head trans_list; | 976 | struct list_head trans_list; |
| 971 | struct list_head hashers; | 977 | struct list_head hashers; |
| 972 | struct list_head dead_roots; | 978 | struct list_head dead_roots; |
| @@ -1172,6 +1178,14 @@ struct btrfs_root { | |||
| 1172 | u32 type; | 1178 | u32 type; |
| 1173 | 1179 | ||
| 1174 | u64 highest_objectid; | 1180 | u64 highest_objectid; |
| 1181 | |||
| 1182 | /* btrfs_record_root_in_trans is a multi-step process, | ||
| 1183 | * and it can race with the balancing code. But the | ||
| 1184 | * race is very small, and only the first time the root | ||
| 1185 | * is added to each transaction. So in_trans_setup | ||
| 1186 | * is used to tell us when more checks are required | ||
| 1187 | */ | ||
| 1188 | unsigned long in_trans_setup; | ||
| 1175 | int ref_cows; | 1189 | int ref_cows; |
| 1176 | int track_dirty; | 1190 | int track_dirty; |
| 1177 | int in_radix; | 1191 | int in_radix; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 20c111b3fa0d..0b2b4b759136 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -1620,6 +1620,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1620 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1620 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
| 1621 | spin_lock_init(&fs_info->delayed_iput_lock); | 1621 | spin_lock_init(&fs_info->delayed_iput_lock); |
| 1622 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1622 | spin_lock_init(&fs_info->defrag_inodes_lock); |
| 1623 | mutex_init(&fs_info->reloc_mutex); | ||
| 1623 | 1624 | ||
| 1624 | init_completion(&fs_info->kobj_unregister); | 1625 | init_completion(&fs_info->kobj_unregister); |
| 1625 | fs_info->tree_root = tree_root; | 1626 | fs_info->tree_root = tree_root; |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f25b10a22a0a..086b1e6b8614 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
| 1368 | int ret; | 1368 | int ret; |
| 1369 | 1369 | ||
| 1370 | if (!root->reloc_root) | 1370 | if (!root->reloc_root) |
| 1371 | return 0; | 1371 | goto out; |
| 1372 | 1372 | ||
| 1373 | reloc_root = root->reloc_root; | 1373 | reloc_root = root->reloc_root; |
| 1374 | root_item = &reloc_root->root_item; | 1374 | root_item = &reloc_root->root_item; |
| @@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
| 1390 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | 1390 | ret = btrfs_update_root(trans, root->fs_info->tree_root, |
| 1391 | &reloc_root->root_key, root_item); | 1391 | &reloc_root->root_key, root_item); |
| 1392 | BUG_ON(ret); | 1392 | BUG_ON(ret); |
| 1393 | |||
| 1394 | out: | ||
| 1393 | return 0; | 1395 | return 0; |
| 1394 | } | 1396 | } |
| 1395 | 1397 | ||
| @@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err) | |||
| 2142 | u64 num_bytes = 0; | 2144 | u64 num_bytes = 0; |
| 2143 | int ret; | 2145 | int ret; |
| 2144 | 2146 | ||
| 2145 | spin_lock(&root->fs_info->trans_lock); | 2147 | mutex_lock(&root->fs_info->reloc_mutex); |
| 2146 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | 2148 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
| 2147 | rc->merging_rsv_size += rc->nodes_relocated * 2; | 2149 | rc->merging_rsv_size += rc->nodes_relocated * 2; |
| 2148 | spin_unlock(&root->fs_info->trans_lock); | 2150 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 2151 | |||
| 2149 | again: | 2152 | again: |
| 2150 | if (!err) { | 2153 | if (!err) { |
| 2151 | num_bytes = rc->merging_rsv_size; | 2154 | num_bytes = rc->merging_rsv_size; |
| @@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc) | |||
| 2214 | int ret; | 2217 | int ret; |
| 2215 | again: | 2218 | again: |
| 2216 | root = rc->extent_root; | 2219 | root = rc->extent_root; |
| 2217 | spin_lock(&root->fs_info->trans_lock); | 2220 | |
| 2221 | /* | ||
| 2222 | * this serializes us with btrfs_record_root_in_transaction, | ||
| 2223 | * we have to make sure nobody is in the middle of | ||
| 2224 | * adding their roots to the list while we are | ||
| 2225 | * doing this splice | ||
| 2226 | */ | ||
| 2227 | mutex_lock(&root->fs_info->reloc_mutex); | ||
| 2218 | list_splice_init(&rc->reloc_roots, &reloc_roots); | 2228 | list_splice_init(&rc->reloc_roots, &reloc_roots); |
| 2219 | spin_unlock(&root->fs_info->trans_lock); | 2229 | mutex_unlock(&root->fs_info->reloc_mutex); |
| 2220 | 2230 | ||
| 2221 | while (!list_empty(&reloc_roots)) { | 2231 | while (!list_empty(&reloc_roots)) { |
| 2222 | found = 1; | 2232 | found = 1; |
| @@ -3590,17 +3600,19 @@ next: | |||
| 3590 | static void set_reloc_control(struct reloc_control *rc) | 3600 | static void set_reloc_control(struct reloc_control *rc) |
| 3591 | { | 3601 | { |
| 3592 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3602 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
| 3593 | spin_lock(&fs_info->trans_lock); | 3603 | |
| 3604 | mutex_lock(&fs_info->reloc_mutex); | ||
| 3594 | fs_info->reloc_ctl = rc; | 3605 | fs_info->reloc_ctl = rc; |
| 3595 | spin_unlock(&fs_info->trans_lock); | 3606 | mutex_unlock(&fs_info->reloc_mutex); |
| 3596 | } | 3607 | } |
| 3597 | 3608 | ||
| 3598 | static void unset_reloc_control(struct reloc_control *rc) | 3609 | static void unset_reloc_control(struct reloc_control *rc) |
| 3599 | { | 3610 | { |
| 3600 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; | 3611 | struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; |
| 3601 | spin_lock(&fs_info->trans_lock); | 3612 | |
| 3613 | mutex_lock(&fs_info->reloc_mutex); | ||
| 3602 | fs_info->reloc_ctl = NULL; | 3614 | fs_info->reloc_ctl = NULL; |
| 3603 | spin_unlock(&fs_info->trans_lock); | 3615 | mutex_unlock(&fs_info->reloc_mutex); |
| 3604 | } | 3616 | } |
| 3605 | 3617 | ||
| 3606 | static int check_extent_flags(u64 flags) | 3618 | static int check_extent_flags(u64 flags) |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b3590b9fe98..833996a0c628 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) | |||
| 126 | * to make sure the old root from before we joined the transaction is deleted | 126 | * to make sure the old root from before we joined the transaction is deleted |
| 127 | * when the transaction commits | 127 | * when the transaction commits |
| 128 | */ | 128 | */ |
| 129 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 129 | static int record_root_in_trans(struct btrfs_trans_handle *trans, |
| 130 | struct btrfs_root *root) | 130 | struct btrfs_root *root) |
| 131 | { | 131 | { |
| 132 | if (root->ref_cows && root->last_trans < trans->transid) { | 132 | if (root->ref_cows && root->last_trans < trans->transid) { |
| 133 | WARN_ON(root == root->fs_info->extent_root); | 133 | WARN_ON(root == root->fs_info->extent_root); |
| 134 | WARN_ON(root->commit_root != root->node); | 134 | WARN_ON(root->commit_root != root->node); |
| 135 | 135 | ||
| 136 | /* | ||
| 137 | * see below for in_trans_setup usage rules | ||
| 138 | * we have the reloc mutex held now, so there | ||
| 139 | * is only one writer in this function | ||
| 140 | */ | ||
| 141 | root->in_trans_setup = 1; | ||
| 142 | |||
| 143 | /* make sure readers find in_trans_setup before | ||
| 144 | * they find our root->last_trans update | ||
| 145 | */ | ||
| 146 | smp_wmb(); | ||
| 147 | |||
| 136 | spin_lock(&root->fs_info->fs_roots_radix_lock); | 148 | spin_lock(&root->fs_info->fs_roots_radix_lock); |
| 137 | if (root->last_trans == trans->transid) { | 149 | if (root->last_trans == trans->transid) { |
| 138 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | 150 | spin_unlock(&root->fs_info->fs_roots_radix_lock); |
| 139 | return 0; | 151 | return 0; |
| 140 | } | 152 | } |
| 141 | root->last_trans = trans->transid; | ||
| 142 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 153 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
| 143 | (unsigned long)root->root_key.objectid, | 154 | (unsigned long)root->root_key.objectid, |
| 144 | BTRFS_ROOT_TRANS_TAG); | 155 | BTRFS_ROOT_TRANS_TAG); |
| 145 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | 156 | spin_unlock(&root->fs_info->fs_roots_radix_lock); |
| 157 | root->last_trans = trans->transid; | ||
| 158 | |||
| 159 | /* this is pretty tricky. We don't want to | ||
| 160 | * take the relocation lock in btrfs_record_root_in_trans | ||
| 161 | * unless we're really doing the first setup for this root in | ||
| 162 | * this transaction. | ||
| 163 | * | ||
| 164 | * Normally we'd use root->last_trans as a flag to decide | ||
| 165 | * if we want to take the expensive mutex. | ||
| 166 | * | ||
| 167 | * But, we have to set root->last_trans before we | ||
| 168 | * init the relocation root, otherwise, we trip over warnings | ||
| 169 | * in ctree.c. The solution used here is to flag ourselves | ||
| 170 | * with root->in_trans_setup. When this is 1, we're still | ||
| 171 | * fixing up the reloc trees and everyone must wait. | ||
| 172 | * | ||
| 173 | * When this is zero, they can trust root->last_trans and fly | ||
| 174 | * through btrfs_record_root_in_trans without having to take the | ||
| 175 | * lock. smp_wmb() makes sure that all the writes above are | ||
| 176 | * done before we pop in the zero below | ||
| 177 | */ | ||
| 146 | btrfs_init_reloc_root(trans, root); | 178 | btrfs_init_reloc_root(trans, root); |
| 179 | smp_wmb(); | ||
| 180 | root->in_trans_setup = 0; | ||
| 147 | } | 181 | } |
| 148 | return 0; | 182 | return 0; |
| 149 | } | 183 | } |
| 150 | 184 | ||
| 185 | |||
| 186 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | ||
| 187 | struct btrfs_root *root) | ||
| 188 | { | ||
| 189 | if (!root->ref_cows) | ||
| 190 | return 0; | ||
| 191 | |||
| 192 | /* | ||
| 193 | * see record_root_in_trans for comments about in_trans_setup usage | ||
| 194 | * and barriers | ||
| 195 | */ | ||
| 196 | smp_rmb(); | ||
| 197 | if (root->last_trans == trans->transid && | ||
| 198 | !root->in_trans_setup) | ||
| 199 | return 0; | ||
| 200 | |||
| 201 | mutex_lock(&root->fs_info->reloc_mutex); | ||
| 202 | record_root_in_trans(trans, root); | ||
| 203 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
| 204 | |||
| 205 | return 0; | ||
| 206 | } | ||
| 207 | |||
| 151 | /* wait for commit against the current transaction to become unblocked | 208 | /* wait for commit against the current transaction to become unblocked |
| 152 | * when this is done, it is safe to start a new transaction, but the current | 209 | * when this is done, it is safe to start a new transaction, but the current |
| 153 | * transaction might not be fully on disk. | 210 | * transaction might not be fully on disk. |
| @@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 882 | parent = dget_parent(dentry); | 939 | parent = dget_parent(dentry); |
| 883 | parent_inode = parent->d_inode; | 940 | parent_inode = parent->d_inode; |
| 884 | parent_root = BTRFS_I(parent_inode)->root; | 941 | parent_root = BTRFS_I(parent_inode)->root; |
| 885 | btrfs_record_root_in_trans(trans, parent_root); | 942 | record_root_in_trans(trans, parent_root); |
| 886 | 943 | ||
| 887 | /* | 944 | /* |
| 888 | * insert the directory item | 945 | * insert the directory item |
| @@ -900,7 +957,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 900 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 957 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
| 901 | BUG_ON(ret); | 958 | BUG_ON(ret); |
| 902 | 959 | ||
| 903 | btrfs_record_root_in_trans(trans, root); | 960 | record_root_in_trans(trans, root); |
| 904 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 961 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
| 905 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 962 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
| 906 | btrfs_check_and_init_root_item(new_root_item); | 963 | btrfs_check_and_init_root_item(new_root_item); |
| @@ -1247,6 +1304,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1247 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1304 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
| 1248 | (should_grow && cur_trans->num_joined != joined)); | 1305 | (should_grow && cur_trans->num_joined != joined)); |
| 1249 | 1306 | ||
| 1307 | /* | ||
| 1308 | * the reloc mutex makes sure that we stop | ||
| 1309 | * the balancing code from coming in and moving | ||
| 1310 | * extents around in the middle of the commit | ||
| 1311 | */ | ||
| 1312 | mutex_lock(&root->fs_info->reloc_mutex); | ||
| 1313 | |||
| 1250 | ret = create_pending_snapshots(trans, root->fs_info); | 1314 | ret = create_pending_snapshots(trans, root->fs_info); |
| 1251 | BUG_ON(ret); | 1315 | BUG_ON(ret); |
| 1252 | 1316 | ||
| @@ -1312,6 +1376,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1312 | root->fs_info->running_transaction = NULL; | 1376 | root->fs_info->running_transaction = NULL; |
| 1313 | root->fs_info->trans_no_join = 0; | 1377 | root->fs_info->trans_no_join = 0; |
| 1314 | spin_unlock(&root->fs_info->trans_lock); | 1378 | spin_unlock(&root->fs_info->trans_lock); |
| 1379 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
| 1315 | 1380 | ||
| 1316 | wake_up(&root->fs_info->transaction_wait); | 1381 | wake_up(&root->fs_info->transaction_wait); |
| 1317 | 1382 | ||
