diff options
author | Josef Bacik <josef@redhat.com> | 2011-04-11 17:25:13 -0400 |
---|---|---|
committer | Josef Bacik <josef@redhat.com> | 2011-05-23 13:00:57 -0400 |
commit | a4abeea41adfa3c143c289045f4625dfaeba2212 (patch) | |
tree | 792e2a398d8ba77447ba3f9f2c4266a1ce2f611c /fs/btrfs/transaction.c | |
parent | 2a1eb4614d984d5cd4c928784e9afcf5c07f93be (diff) |
Btrfs: kill trans_mutex
We use trans_mutex for lots of things, here's a basic list
1) To serialize trans_handles joining the currently running transaction
2) To make sure that no new trans handles are started while we are committing
3) To protect the dead_roots list and the transaction lists
Really the serializing trans_handles joining is not too hard, and can really get
bogged down in acquiring a reference to the transaction. So replace the
trans_mutex with a trans_lock spinlock and use it to do the following
1) Protect fs_info->running_transaction. All trans handles have to do is check
this, and then take a reference of the transaction and keep on going.
2) Protect the fs_info->trans_list. This doesn't get used too much, basically
it just holds the current transactions, which will usually just be the currently
committing transaction and the currently running transaction at most.
3) Protect the dead roots list. This is only ever processed by splicing the
list so this is relatively simple.
4) Protect the fs_info->reloc_ctl stuff. This is very lightweight and was using
the trans_mutex before, so this is a pretty straightforward change.
5) Protect fs_info->no_trans_join. Because we don't hold the trans_lock over
the entirety of the commit we need to have a way to block new people from
creating a new transaction while we're doing our work. So we set no_trans_join
and in join_transaction we test to see if that is set, and if it is we do a
wait_on_commit.
6) Make the transaction use count atomic so we don't need to take locks to
modify it when we're dropping references.
7) Add a commit_lock to the transaction to make sure multiple people trying to
commit the same transaction don't race and commit at the same time.
8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl
trans.
I have tested this with xfstests, but obviously it is a pretty hairy change so
lots of testing is greatly appreciated. Thanks,
Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 271 |
1 files changed, 142 insertions, 129 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 46f40564c16..43816f8b23e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -34,6 +34,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) | |||
34 | { | 34 | { |
35 | WARN_ON(atomic_read(&transaction->use_count) == 0); | 35 | WARN_ON(atomic_read(&transaction->use_count) == 0); |
36 | if (atomic_dec_and_test(&transaction->use_count)) { | 36 | if (atomic_dec_and_test(&transaction->use_count)) { |
37 | BUG_ON(!list_empty(&transaction->list)); | ||
37 | memset(transaction, 0, sizeof(*transaction)); | 38 | memset(transaction, 0, sizeof(*transaction)); |
38 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 39 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
39 | } | 40 | } |
@@ -48,47 +49,73 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
48 | /* | 49 | /* |
49 | * either allocate a new transaction or hop into the existing one | 50 | * either allocate a new transaction or hop into the existing one |
50 | */ | 51 | */ |
51 | static noinline int join_transaction(struct btrfs_root *root) | 52 | static noinline int join_transaction(struct btrfs_root *root, int nofail) |
52 | { | 53 | { |
53 | struct btrfs_transaction *cur_trans; | 54 | struct btrfs_transaction *cur_trans; |
55 | |||
56 | spin_lock(&root->fs_info->trans_lock); | ||
57 | if (root->fs_info->trans_no_join) { | ||
58 | if (!nofail) { | ||
59 | spin_unlock(&root->fs_info->trans_lock); | ||
60 | return -EBUSY; | ||
61 | } | ||
62 | } | ||
63 | |||
54 | cur_trans = root->fs_info->running_transaction; | 64 | cur_trans = root->fs_info->running_transaction; |
55 | if (!cur_trans) { | 65 | if (cur_trans) { |
56 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, | 66 | atomic_inc(&cur_trans->use_count); |
57 | GFP_NOFS); | 67 | atomic_inc(&cur_trans->num_writers); |
58 | if (!cur_trans) | 68 | cur_trans->num_joined++; |
59 | return -ENOMEM; | 69 | spin_unlock(&root->fs_info->trans_lock); |
60 | root->fs_info->generation++; | 70 | return 0; |
61 | atomic_set(&cur_trans->num_writers, 1); | 71 | } |
62 | cur_trans->num_joined = 0; | 72 | spin_unlock(&root->fs_info->trans_lock); |
63 | cur_trans->transid = root->fs_info->generation; | 73 | |
64 | init_waitqueue_head(&cur_trans->writer_wait); | 74 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); |
65 | init_waitqueue_head(&cur_trans->commit_wait); | 75 | if (!cur_trans) |
66 | cur_trans->in_commit = 0; | 76 | return -ENOMEM; |
67 | cur_trans->blocked = 0; | 77 | spin_lock(&root->fs_info->trans_lock); |
68 | atomic_set(&cur_trans->use_count, 1); | 78 | if (root->fs_info->running_transaction) { |
69 | cur_trans->commit_done = 0; | 79 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
70 | cur_trans->start_time = get_seconds(); | 80 | cur_trans = root->fs_info->running_transaction; |
71 | 81 | atomic_inc(&cur_trans->use_count); | |
72 | cur_trans->delayed_refs.root = RB_ROOT; | ||
73 | cur_trans->delayed_refs.num_entries = 0; | ||
74 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
75 | cur_trans->delayed_refs.num_heads = 0; | ||
76 | cur_trans->delayed_refs.flushing = 0; | ||
77 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
78 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
79 | |||
80 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
81 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
82 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
83 | root->fs_info->btree_inode->i_mapping, | ||
84 | GFP_NOFS); | ||
85 | spin_lock(&root->fs_info->new_trans_lock); | ||
86 | root->fs_info->running_transaction = cur_trans; | ||
87 | spin_unlock(&root->fs_info->new_trans_lock); | ||
88 | } else { | ||
89 | atomic_inc(&cur_trans->num_writers); | 82 | atomic_inc(&cur_trans->num_writers); |
90 | cur_trans->num_joined++; | 83 | cur_trans->num_joined++; |
84 | spin_unlock(&root->fs_info->trans_lock); | ||
85 | return 0; | ||
91 | } | 86 | } |
87 | atomic_set(&cur_trans->num_writers, 1); | ||
88 | cur_trans->num_joined = 0; | ||
89 | init_waitqueue_head(&cur_trans->writer_wait); | ||
90 | init_waitqueue_head(&cur_trans->commit_wait); | ||
91 | cur_trans->in_commit = 0; | ||
92 | cur_trans->blocked = 0; | ||
93 | /* | ||
94 | * One for this trans handle, one so it will live on until we | ||
95 | * commit the transaction. | ||
96 | */ | ||
97 | atomic_set(&cur_trans->use_count, 2); | ||
98 | cur_trans->commit_done = 0; | ||
99 | cur_trans->start_time = get_seconds(); | ||
100 | |||
101 | cur_trans->delayed_refs.root = RB_ROOT; | ||
102 | cur_trans->delayed_refs.num_entries = 0; | ||
103 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
104 | cur_trans->delayed_refs.num_heads = 0; | ||
105 | cur_trans->delayed_refs.flushing = 0; | ||
106 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
107 | spin_lock_init(&cur_trans->commit_lock); | ||
108 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
109 | |||
110 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
111 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
112 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
113 | root->fs_info->btree_inode->i_mapping, | ||
114 | GFP_NOFS); | ||
115 | root->fs_info->generation++; | ||
116 | cur_trans->transid = root->fs_info->generation; | ||
117 | root->fs_info->running_transaction = cur_trans; | ||
118 | spin_unlock(&root->fs_info->trans_lock); | ||
92 | 119 | ||
93 | return 0; | 120 | return 0; |
94 | } | 121 | } |
@@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
99 | * to make sure the old root from before we joined the transaction is deleted | 126 | * to make sure the old root from before we joined the transaction is deleted |
100 | * when the transaction commits | 127 | * when the transaction commits |
101 | */ | 128 | */ |
102 | static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, | 129 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
103 | struct btrfs_root *root) | 130 | struct btrfs_root *root) |
104 | { | 131 | { |
105 | if (root->ref_cows && root->last_trans < trans->transid) { | 132 | if (root->ref_cows && root->last_trans < trans->transid) { |
106 | WARN_ON(root == root->fs_info->extent_root); | 133 | WARN_ON(root == root->fs_info->extent_root); |
107 | WARN_ON(root->commit_root != root->node); | 134 | WARN_ON(root->commit_root != root->node); |
108 | 135 | ||
136 | spin_lock(&root->fs_info->fs_roots_radix_lock); | ||
137 | if (root->last_trans == trans->transid) { | ||
138 | spin_unlock(&root->fs_info->fs_roots_radix_lock); | ||
139 | return 0; | ||
140 | } | ||
141 | root->last_trans = trans->transid; | ||
109 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 142 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
110 | (unsigned long)root->root_key.objectid, | 143 | (unsigned long)root->root_key.objectid, |
111 | BTRFS_ROOT_TRANS_TAG); | 144 | BTRFS_ROOT_TRANS_TAG); |
112 | root->last_trans = trans->transid; | 145 | spin_unlock(&root->fs_info->fs_roots_radix_lock); |
113 | btrfs_init_reloc_root(trans, root); | 146 | btrfs_init_reloc_root(trans, root); |
114 | } | 147 | } |
115 | return 0; | 148 | return 0; |
116 | } | 149 | } |
117 | 150 | ||
118 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | ||
119 | struct btrfs_root *root) | ||
120 | { | ||
121 | if (!root->ref_cows) | ||
122 | return 0; | ||
123 | |||
124 | mutex_lock(&root->fs_info->trans_mutex); | ||
125 | if (root->last_trans == trans->transid) { | ||
126 | mutex_unlock(&root->fs_info->trans_mutex); | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | record_root_in_trans(trans, root); | ||
131 | mutex_unlock(&root->fs_info->trans_mutex); | ||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* wait for commit against the current transaction to become unblocked | 151 | /* wait for commit against the current transaction to become unblocked |
136 | * when this is done, it is safe to start a new transaction, but the current | 152 | * when this is done, it is safe to start a new transaction, but the current |
137 | * transaction might not be fully on disk. | 153 | * transaction might not be fully on disk. |
@@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root) | |||
140 | { | 156 | { |
141 | struct btrfs_transaction *cur_trans; | 157 | struct btrfs_transaction *cur_trans; |
142 | 158 | ||
159 | spin_lock(&root->fs_info->trans_lock); | ||
143 | cur_trans = root->fs_info->running_transaction; | 160 | cur_trans = root->fs_info->running_transaction; |
144 | if (cur_trans && cur_trans->blocked) { | 161 | if (cur_trans && cur_trans->blocked) { |
145 | DEFINE_WAIT(wait); | 162 | DEFINE_WAIT(wait); |
146 | atomic_inc(&cur_trans->use_count); | 163 | atomic_inc(&cur_trans->use_count); |
164 | spin_unlock(&root->fs_info->trans_lock); | ||
147 | while (1) { | 165 | while (1) { |
148 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 166 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, |
149 | TASK_UNINTERRUPTIBLE); | 167 | TASK_UNINTERRUPTIBLE); |
150 | if (!cur_trans->blocked) | 168 | if (!cur_trans->blocked) |
151 | break; | 169 | break; |
152 | mutex_unlock(&root->fs_info->trans_mutex); | ||
153 | schedule(); | 170 | schedule(); |
154 | mutex_lock(&root->fs_info->trans_mutex); | ||
155 | } | 171 | } |
156 | finish_wait(&root->fs_info->transaction_wait, &wait); | 172 | finish_wait(&root->fs_info->transaction_wait, &wait); |
157 | put_transaction(cur_trans); | 173 | put_transaction(cur_trans); |
174 | } else { | ||
175 | spin_unlock(&root->fs_info->trans_lock); | ||
158 | } | 176 | } |
159 | } | 177 | } |
160 | 178 | ||
@@ -167,10 +185,16 @@ enum btrfs_trans_type { | |||
167 | 185 | ||
168 | static int may_wait_transaction(struct btrfs_root *root, int type) | 186 | static int may_wait_transaction(struct btrfs_root *root, int type) |
169 | { | 187 | { |
170 | if (!root->fs_info->log_root_recovering && | 188 | if (root->fs_info->log_root_recovering) |
171 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | 189 | return 0; |
172 | type == TRANS_USERSPACE)) | 190 | |
191 | if (type == TRANS_USERSPACE) | ||
192 | return 1; | ||
193 | |||
194 | if (type == TRANS_START && | ||
195 | !atomic_read(&root->fs_info->open_ioctl_trans)) | ||
173 | return 1; | 196 | return 1; |
197 | |||
174 | return 0; | 198 | return 0; |
175 | } | 199 | } |
176 | 200 | ||
@@ -198,23 +222,21 @@ again: | |||
198 | if (!h) | 222 | if (!h) |
199 | return ERR_PTR(-ENOMEM); | 223 | return ERR_PTR(-ENOMEM); |
200 | 224 | ||
201 | if (type != TRANS_JOIN_NOLOCK) | ||
202 | mutex_lock(&root->fs_info->trans_mutex); | ||
203 | if (may_wait_transaction(root, type)) | 225 | if (may_wait_transaction(root, type)) |
204 | wait_current_trans(root); | 226 | wait_current_trans(root); |
205 | 227 | ||
206 | ret = join_transaction(root); | 228 | do { |
229 | ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); | ||
230 | if (ret == -EBUSY) | ||
231 | wait_current_trans(root); | ||
232 | } while (ret == -EBUSY); | ||
233 | |||
207 | if (ret < 0) { | 234 | if (ret < 0) { |
208 | kmem_cache_free(btrfs_trans_handle_cachep, h); | 235 | kmem_cache_free(btrfs_trans_handle_cachep, h); |
209 | if (type != TRANS_JOIN_NOLOCK) | ||
210 | mutex_unlock(&root->fs_info->trans_mutex); | ||
211 | return ERR_PTR(ret); | 236 | return ERR_PTR(ret); |
212 | } | 237 | } |
213 | 238 | ||
214 | cur_trans = root->fs_info->running_transaction; | 239 | cur_trans = root->fs_info->running_transaction; |
215 | atomic_inc(&cur_trans->use_count); | ||
216 | if (type != TRANS_JOIN_NOLOCK) | ||
217 | mutex_unlock(&root->fs_info->trans_mutex); | ||
218 | 240 | ||
219 | h->transid = cur_trans->transid; | 241 | h->transid = cur_trans->transid; |
220 | h->transaction = cur_trans; | 242 | h->transaction = cur_trans; |
@@ -253,11 +275,7 @@ again: | |||
253 | } | 275 | } |
254 | 276 | ||
255 | got_it: | 277 | got_it: |
256 | if (type != TRANS_JOIN_NOLOCK) | 278 | btrfs_record_root_in_trans(h, root); |
257 | mutex_lock(&root->fs_info->trans_mutex); | ||
258 | record_root_in_trans(h, root); | ||
259 | if (type != TRANS_JOIN_NOLOCK) | ||
260 | mutex_unlock(&root->fs_info->trans_mutex); | ||
261 | 279 | ||
262 | if (!current->journal_info && type != TRANS_USERSPACE) | 280 | if (!current->journal_info && type != TRANS_USERSPACE) |
263 | current->journal_info = h; | 281 | current->journal_info = h; |
@@ -289,17 +307,13 @@ static noinline int wait_for_commit(struct btrfs_root *root, | |||
289 | struct btrfs_transaction *commit) | 307 | struct btrfs_transaction *commit) |
290 | { | 308 | { |
291 | DEFINE_WAIT(wait); | 309 | DEFINE_WAIT(wait); |
292 | mutex_lock(&root->fs_info->trans_mutex); | ||
293 | while (!commit->commit_done) { | 310 | while (!commit->commit_done) { |
294 | prepare_to_wait(&commit->commit_wait, &wait, | 311 | prepare_to_wait(&commit->commit_wait, &wait, |
295 | TASK_UNINTERRUPTIBLE); | 312 | TASK_UNINTERRUPTIBLE); |
296 | if (commit->commit_done) | 313 | if (commit->commit_done) |
297 | break; | 314 | break; |
298 | mutex_unlock(&root->fs_info->trans_mutex); | ||
299 | schedule(); | 315 | schedule(); |
300 | mutex_lock(&root->fs_info->trans_mutex); | ||
301 | } | 316 | } |
302 | mutex_unlock(&root->fs_info->trans_mutex); | ||
303 | finish_wait(&commit->commit_wait, &wait); | 317 | finish_wait(&commit->commit_wait, &wait); |
304 | return 0; | 318 | return 0; |
305 | } | 319 | } |
@@ -309,50 +323,49 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
309 | struct btrfs_transaction *cur_trans = NULL, *t; | 323 | struct btrfs_transaction *cur_trans = NULL, *t; |
310 | int ret; | 324 | int ret; |
311 | 325 | ||
312 | mutex_lock(&root->fs_info->trans_mutex); | ||
313 | |||
314 | ret = 0; | 326 | ret = 0; |
315 | if (transid) { | 327 | if (transid) { |
316 | if (transid <= root->fs_info->last_trans_committed) | 328 | if (transid <= root->fs_info->last_trans_committed) |
317 | goto out_unlock; | 329 | goto out; |
318 | 330 | ||
319 | /* find specified transaction */ | 331 | /* find specified transaction */ |
332 | spin_lock(&root->fs_info->trans_lock); | ||
320 | list_for_each_entry(t, &root->fs_info->trans_list, list) { | 333 | list_for_each_entry(t, &root->fs_info->trans_list, list) { |
321 | if (t->transid == transid) { | 334 | if (t->transid == transid) { |
322 | cur_trans = t; | 335 | cur_trans = t; |
336 | atomic_inc(&cur_trans->use_count); | ||
323 | break; | 337 | break; |
324 | } | 338 | } |
325 | if (t->transid > transid) | 339 | if (t->transid > transid) |
326 | break; | 340 | break; |
327 | } | 341 | } |
342 | spin_unlock(&root->fs_info->trans_lock); | ||
328 | ret = -EINVAL; | 343 | ret = -EINVAL; |
329 | if (!cur_trans) | 344 | if (!cur_trans) |
330 | goto out_unlock; /* bad transid */ | 345 | goto out; /* bad transid */ |
331 | } else { | 346 | } else { |
332 | /* find newest transaction that is committing | committed */ | 347 | /* find newest transaction that is committing | committed */ |
348 | spin_lock(&root->fs_info->trans_lock); | ||
333 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, | 349 | list_for_each_entry_reverse(t, &root->fs_info->trans_list, |
334 | list) { | 350 | list) { |
335 | if (t->in_commit) { | 351 | if (t->in_commit) { |
336 | if (t->commit_done) | 352 | if (t->commit_done) |
337 | goto out_unlock; | 353 | goto out; |
338 | cur_trans = t; | 354 | cur_trans = t; |
355 | atomic_inc(&cur_trans->use_count); | ||
339 | break; | 356 | break; |
340 | } | 357 | } |
341 | } | 358 | } |
359 | spin_unlock(&root->fs_info->trans_lock); | ||
342 | if (!cur_trans) | 360 | if (!cur_trans) |
343 | goto out_unlock; /* nothing committing|committed */ | 361 | goto out; /* nothing committing|committed */ |
344 | } | 362 | } |
345 | 363 | ||
346 | atomic_inc(&cur_trans->use_count); | ||
347 | mutex_unlock(&root->fs_info->trans_mutex); | ||
348 | |||
349 | wait_for_commit(root, cur_trans); | 364 | wait_for_commit(root, cur_trans); |
350 | 365 | ||
351 | mutex_lock(&root->fs_info->trans_mutex); | ||
352 | put_transaction(cur_trans); | 366 | put_transaction(cur_trans); |
353 | ret = 0; | 367 | ret = 0; |
354 | out_unlock: | 368 | out: |
355 | mutex_unlock(&root->fs_info->trans_mutex); | ||
356 | return ret; | 369 | return ret; |
357 | } | 370 | } |
358 | 371 | ||
@@ -401,10 +414,8 @@ harder: | |||
401 | 414 | ||
402 | void btrfs_throttle(struct btrfs_root *root) | 415 | void btrfs_throttle(struct btrfs_root *root) |
403 | { | 416 | { |
404 | mutex_lock(&root->fs_info->trans_mutex); | 417 | if (!atomic_read(&root->fs_info->open_ioctl_trans)) |
405 | if (!root->fs_info->open_ioctl_trans) | ||
406 | wait_current_trans(root); | 418 | wait_current_trans(root); |
407 | mutex_unlock(&root->fs_info->trans_mutex); | ||
408 | } | 419 | } |
409 | 420 | ||
410 | static int should_end_transaction(struct btrfs_trans_handle *trans, | 421 | static int should_end_transaction(struct btrfs_trans_handle *trans, |
@@ -422,6 +433,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | |||
422 | struct btrfs_transaction *cur_trans = trans->transaction; | 433 | struct btrfs_transaction *cur_trans = trans->transaction; |
423 | int updates; | 434 | int updates; |
424 | 435 | ||
436 | smp_mb(); | ||
425 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | 437 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) |
426 | return 1; | 438 | return 1; |
427 | 439 | ||
@@ -467,9 +479,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
467 | 479 | ||
468 | btrfs_trans_release_metadata(trans, root); | 480 | btrfs_trans_release_metadata(trans, root); |
469 | 481 | ||
470 | if (lock && !root->fs_info->open_ioctl_trans && | 482 | if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && |
471 | should_end_transaction(trans, root)) | 483 | should_end_transaction(trans, root)) { |
472 | trans->transaction->blocked = 1; | 484 | trans->transaction->blocked = 1; |
485 | smp_wmb(); | ||
486 | } | ||
473 | 487 | ||
474 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | 488 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { |
475 | if (throttle) | 489 | if (throttle) |
@@ -739,9 +753,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
739 | */ | 753 | */ |
740 | int btrfs_add_dead_root(struct btrfs_root *root) | 754 | int btrfs_add_dead_root(struct btrfs_root *root) |
741 | { | 755 | { |
742 | mutex_lock(&root->fs_info->trans_mutex); | 756 | spin_lock(&root->fs_info->trans_lock); |
743 | list_add(&root->root_list, &root->fs_info->dead_roots); | 757 | list_add(&root->root_list, &root->fs_info->dead_roots); |
744 | mutex_unlock(&root->fs_info->trans_mutex); | 758 | spin_unlock(&root->fs_info->trans_lock); |
745 | return 0; | 759 | return 0; |
746 | } | 760 | } |
747 | 761 | ||
@@ -757,6 +771,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
757 | int ret; | 771 | int ret; |
758 | int err = 0; | 772 | int err = 0; |
759 | 773 | ||
774 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
760 | while (1) { | 775 | while (1) { |
761 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, | 776 | ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, |
762 | (void **)gang, 0, | 777 | (void **)gang, 0, |
@@ -769,6 +784,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
769 | radix_tree_tag_clear(&fs_info->fs_roots_radix, | 784 | radix_tree_tag_clear(&fs_info->fs_roots_radix, |
770 | (unsigned long)root->root_key.objectid, | 785 | (unsigned long)root->root_key.objectid, |
771 | BTRFS_ROOT_TRANS_TAG); | 786 | BTRFS_ROOT_TRANS_TAG); |
787 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
772 | 788 | ||
773 | btrfs_free_log(trans, root); | 789 | btrfs_free_log(trans, root); |
774 | btrfs_update_reloc_root(trans, root); | 790 | btrfs_update_reloc_root(trans, root); |
@@ -783,10 +799,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
783 | err = btrfs_update_root(trans, fs_info->tree_root, | 799 | err = btrfs_update_root(trans, fs_info->tree_root, |
784 | &root->root_key, | 800 | &root->root_key, |
785 | &root->root_item); | 801 | &root->root_item); |
802 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
786 | if (err) | 803 | if (err) |
787 | break; | 804 | break; |
788 | } | 805 | } |
789 | } | 806 | } |
807 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
790 | return err; | 808 | return err; |
791 | } | 809 | } |
792 | 810 | ||
@@ -972,7 +990,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
972 | parent = dget_parent(dentry); | 990 | parent = dget_parent(dentry); |
973 | parent_inode = parent->d_inode; | 991 | parent_inode = parent->d_inode; |
974 | parent_root = BTRFS_I(parent_inode)->root; | 992 | parent_root = BTRFS_I(parent_inode)->root; |
975 | record_root_in_trans(trans, parent_root); | 993 | btrfs_record_root_in_trans(trans, parent_root); |
976 | 994 | ||
977 | /* | 995 | /* |
978 | * insert the directory item | 996 | * insert the directory item |
@@ -990,7 +1008,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
990 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 1008 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
991 | BUG_ON(ret); | 1009 | BUG_ON(ret); |
992 | 1010 | ||
993 | record_root_in_trans(trans, root); | 1011 | btrfs_record_root_in_trans(trans, root); |
994 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 1012 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
995 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 1013 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
996 | btrfs_check_and_init_root_item(new_root_item); | 1014 | btrfs_check_and_init_root_item(new_root_item); |
@@ -1080,20 +1098,20 @@ static void update_super_roots(struct btrfs_root *root) | |||
1080 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | 1098 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info) |
1081 | { | 1099 | { |
1082 | int ret = 0; | 1100 | int ret = 0; |
1083 | spin_lock(&info->new_trans_lock); | 1101 | spin_lock(&info->trans_lock); |
1084 | if (info->running_transaction) | 1102 | if (info->running_transaction) |
1085 | ret = info->running_transaction->in_commit; | 1103 | ret = info->running_transaction->in_commit; |
1086 | spin_unlock(&info->new_trans_lock); | 1104 | spin_unlock(&info->trans_lock); |
1087 | return ret; | 1105 | return ret; |
1088 | } | 1106 | } |
1089 | 1107 | ||
1090 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | 1108 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) |
1091 | { | 1109 | { |
1092 | int ret = 0; | 1110 | int ret = 0; |
1093 | spin_lock(&info->new_trans_lock); | 1111 | spin_lock(&info->trans_lock); |
1094 | if (info->running_transaction) | 1112 | if (info->running_transaction) |
1095 | ret = info->running_transaction->blocked; | 1113 | ret = info->running_transaction->blocked; |
1096 | spin_unlock(&info->new_trans_lock); | 1114 | spin_unlock(&info->trans_lock); |
1097 | return ret; | 1115 | return ret; |
1098 | } | 1116 | } |
1099 | 1117 | ||
@@ -1117,9 +1135,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root, | |||
1117 | &wait); | 1135 | &wait); |
1118 | break; | 1136 | break; |
1119 | } | 1137 | } |
1120 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1121 | schedule(); | 1138 | schedule(); |
1122 | mutex_lock(&root->fs_info->trans_mutex); | ||
1123 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); | 1139 | finish_wait(&root->fs_info->transaction_blocked_wait, &wait); |
1124 | } | 1140 | } |
1125 | } | 1141 | } |
@@ -1145,9 +1161,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | |||
1145 | &wait); | 1161 | &wait); |
1146 | break; | 1162 | break; |
1147 | } | 1163 | } |
1148 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1149 | schedule(); | 1164 | schedule(); |
1150 | mutex_lock(&root->fs_info->trans_mutex); | ||
1151 | finish_wait(&root->fs_info->transaction_wait, | 1165 | finish_wait(&root->fs_info->transaction_wait, |
1152 | &wait); | 1166 | &wait); |
1153 | } | 1167 | } |
@@ -1193,22 +1207,18 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1193 | } | 1207 | } |
1194 | 1208 | ||
1195 | /* take transaction reference */ | 1209 | /* take transaction reference */ |
1196 | mutex_lock(&root->fs_info->trans_mutex); | ||
1197 | cur_trans = trans->transaction; | 1210 | cur_trans = trans->transaction; |
1198 | atomic_inc(&cur_trans->use_count); | 1211 | atomic_inc(&cur_trans->use_count); |
1199 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1200 | 1212 | ||
1201 | btrfs_end_transaction(trans, root); | 1213 | btrfs_end_transaction(trans, root); |
1202 | schedule_delayed_work(&ac->work, 0); | 1214 | schedule_delayed_work(&ac->work, 0); |
1203 | 1215 | ||
1204 | /* wait for transaction to start and unblock */ | 1216 | /* wait for transaction to start and unblock */ |
1205 | mutex_lock(&root->fs_info->trans_mutex); | ||
1206 | if (wait_for_unblock) | 1217 | if (wait_for_unblock) |
1207 | wait_current_trans_commit_start_and_unblock(root, cur_trans); | 1218 | wait_current_trans_commit_start_and_unblock(root, cur_trans); |
1208 | else | 1219 | else |
1209 | wait_current_trans_commit_start(root, cur_trans); | 1220 | wait_current_trans_commit_start(root, cur_trans); |
1210 | put_transaction(cur_trans); | 1221 | put_transaction(cur_trans); |
1211 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1212 | 1222 | ||
1213 | return 0; | 1223 | return 0; |
1214 | } | 1224 | } |
@@ -1252,38 +1262,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1252 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1262 | ret = btrfs_run_delayed_refs(trans, root, 0); |
1253 | BUG_ON(ret); | 1263 | BUG_ON(ret); |
1254 | 1264 | ||
1255 | mutex_lock(&root->fs_info->trans_mutex); | 1265 | spin_lock(&cur_trans->commit_lock); |
1256 | if (cur_trans->in_commit) { | 1266 | if (cur_trans->in_commit) { |
1267 | spin_unlock(&cur_trans->commit_lock); | ||
1257 | atomic_inc(&cur_trans->use_count); | 1268 | atomic_inc(&cur_trans->use_count); |
1258 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1259 | btrfs_end_transaction(trans, root); | 1269 | btrfs_end_transaction(trans, root); |
1260 | 1270 | ||
1261 | ret = wait_for_commit(root, cur_trans); | 1271 | ret = wait_for_commit(root, cur_trans); |
1262 | BUG_ON(ret); | 1272 | BUG_ON(ret); |
1263 | 1273 | ||
1264 | mutex_lock(&root->fs_info->trans_mutex); | ||
1265 | put_transaction(cur_trans); | 1274 | put_transaction(cur_trans); |
1266 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1267 | 1275 | ||
1268 | return 0; | 1276 | return 0; |
1269 | } | 1277 | } |
1270 | 1278 | ||
1271 | trans->transaction->in_commit = 1; | 1279 | trans->transaction->in_commit = 1; |
1272 | trans->transaction->blocked = 1; | 1280 | trans->transaction->blocked = 1; |
1281 | spin_unlock(&cur_trans->commit_lock); | ||
1273 | wake_up(&root->fs_info->transaction_blocked_wait); | 1282 | wake_up(&root->fs_info->transaction_blocked_wait); |
1274 | 1283 | ||
1284 | spin_lock(&root->fs_info->trans_lock); | ||
1275 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1285 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
1276 | prev_trans = list_entry(cur_trans->list.prev, | 1286 | prev_trans = list_entry(cur_trans->list.prev, |
1277 | struct btrfs_transaction, list); | 1287 | struct btrfs_transaction, list); |
1278 | if (!prev_trans->commit_done) { | 1288 | if (!prev_trans->commit_done) { |
1279 | atomic_inc(&prev_trans->use_count); | 1289 | atomic_inc(&prev_trans->use_count); |
1280 | mutex_unlock(&root->fs_info->trans_mutex); | 1290 | spin_unlock(&root->fs_info->trans_lock); |
1281 | 1291 | ||
1282 | wait_for_commit(root, prev_trans); | 1292 | wait_for_commit(root, prev_trans); |
1283 | 1293 | ||
1284 | mutex_lock(&root->fs_info->trans_mutex); | ||
1285 | put_transaction(prev_trans); | 1294 | put_transaction(prev_trans); |
1295 | } else { | ||
1296 | spin_unlock(&root->fs_info->trans_lock); | ||
1286 | } | 1297 | } |
1298 | } else { | ||
1299 | spin_unlock(&root->fs_info->trans_lock); | ||
1287 | } | 1300 | } |
1288 | 1301 | ||
1289 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | 1302 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) |
@@ -1291,12 +1304,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1291 | 1304 | ||
1292 | do { | 1305 | do { |
1293 | int snap_pending = 0; | 1306 | int snap_pending = 0; |
1307 | |||
1294 | joined = cur_trans->num_joined; | 1308 | joined = cur_trans->num_joined; |
1295 | if (!list_empty(&trans->transaction->pending_snapshots)) | 1309 | if (!list_empty(&trans->transaction->pending_snapshots)) |
1296 | snap_pending = 1; | 1310 | snap_pending = 1; |
1297 | 1311 | ||
1298 | WARN_ON(cur_trans != trans->transaction); | 1312 | WARN_ON(cur_trans != trans->transaction); |
1299 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1300 | 1313 | ||
1301 | if (flush_on_commit || snap_pending) { | 1314 | if (flush_on_commit || snap_pending) { |
1302 | btrfs_start_delalloc_inodes(root, 1); | 1315 | btrfs_start_delalloc_inodes(root, 1); |
@@ -1316,14 +1329,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1316 | prepare_to_wait(&cur_trans->writer_wait, &wait, | 1329 | prepare_to_wait(&cur_trans->writer_wait, &wait, |
1317 | TASK_UNINTERRUPTIBLE); | 1330 | TASK_UNINTERRUPTIBLE); |
1318 | 1331 | ||
1319 | smp_mb(); | ||
1320 | if (atomic_read(&cur_trans->num_writers) > 1) | 1332 | if (atomic_read(&cur_trans->num_writers) > 1) |
1321 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); | 1333 | schedule_timeout(MAX_SCHEDULE_TIMEOUT); |
1322 | else if (should_grow) | 1334 | else if (should_grow) |
1323 | schedule_timeout(1); | 1335 | schedule_timeout(1); |
1324 | 1336 | ||
1325 | mutex_lock(&root->fs_info->trans_mutex); | ||
1326 | finish_wait(&cur_trans->writer_wait, &wait); | 1337 | finish_wait(&cur_trans->writer_wait, &wait); |
1338 | spin_lock(&root->fs_info->trans_lock); | ||
1339 | root->fs_info->trans_no_join = 1; | ||
1340 | spin_unlock(&root->fs_info->trans_lock); | ||
1327 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1341 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
1328 | (should_grow && cur_trans->num_joined != joined)); | 1342 | (should_grow && cur_trans->num_joined != joined)); |
1329 | 1343 | ||
@@ -1364,9 +1378,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1364 | btrfs_prepare_extent_commit(trans, root); | 1378 | btrfs_prepare_extent_commit(trans, root); |
1365 | 1379 | ||
1366 | cur_trans = root->fs_info->running_transaction; | 1380 | cur_trans = root->fs_info->running_transaction; |
1367 | spin_lock(&root->fs_info->new_trans_lock); | ||
1368 | root->fs_info->running_transaction = NULL; | ||
1369 | spin_unlock(&root->fs_info->new_trans_lock); | ||
1370 | 1381 | ||
1371 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, | 1382 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, |
1372 | root->fs_info->tree_root->node); | 1383 | root->fs_info->tree_root->node); |
@@ -1387,10 +1398,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1387 | sizeof(root->fs_info->super_copy)); | 1398 | sizeof(root->fs_info->super_copy)); |
1388 | 1399 | ||
1389 | trans->transaction->blocked = 0; | 1400 | trans->transaction->blocked = 0; |
1401 | spin_lock(&root->fs_info->trans_lock); | ||
1402 | root->fs_info->running_transaction = NULL; | ||
1403 | root->fs_info->trans_no_join = 0; | ||
1404 | spin_unlock(&root->fs_info->trans_lock); | ||
1390 | 1405 | ||
1391 | wake_up(&root->fs_info->transaction_wait); | 1406 | wake_up(&root->fs_info->transaction_wait); |
1392 | 1407 | ||
1393 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1394 | ret = btrfs_write_and_wait_transaction(trans, root); | 1408 | ret = btrfs_write_and_wait_transaction(trans, root); |
1395 | BUG_ON(ret); | 1409 | BUG_ON(ret); |
1396 | write_ctree_super(trans, root, 0); | 1410 | write_ctree_super(trans, root, 0); |
@@ -1403,22 +1417,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1403 | 1417 | ||
1404 | btrfs_finish_extent_commit(trans, root); | 1418 | btrfs_finish_extent_commit(trans, root); |
1405 | 1419 | ||
1406 | mutex_lock(&root->fs_info->trans_mutex); | ||
1407 | |||
1408 | cur_trans->commit_done = 1; | 1420 | cur_trans->commit_done = 1; |
1409 | 1421 | ||
1410 | root->fs_info->last_trans_committed = cur_trans->transid; | 1422 | root->fs_info->last_trans_committed = cur_trans->transid; |
1411 | 1423 | ||
1412 | wake_up(&cur_trans->commit_wait); | 1424 | wake_up(&cur_trans->commit_wait); |
1413 | 1425 | ||
1426 | spin_lock(&root->fs_info->trans_lock); | ||
1414 | list_del_init(&cur_trans->list); | 1427 | list_del_init(&cur_trans->list); |
1428 | spin_unlock(&root->fs_info->trans_lock); | ||
1429 | |||
1415 | put_transaction(cur_trans); | 1430 | put_transaction(cur_trans); |
1416 | put_transaction(cur_trans); | 1431 | put_transaction(cur_trans); |
1417 | 1432 | ||
1418 | trace_btrfs_transaction_commit(root); | 1433 | trace_btrfs_transaction_commit(root); |
1419 | 1434 | ||
1420 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1421 | |||
1422 | if (current->journal_info == trans) | 1435 | if (current->journal_info == trans) |
1423 | current->journal_info = NULL; | 1436 | current->journal_info = NULL; |
1424 | 1437 | ||
@@ -1438,9 +1451,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1438 | LIST_HEAD(list); | 1451 | LIST_HEAD(list); |
1439 | struct btrfs_fs_info *fs_info = root->fs_info; | 1452 | struct btrfs_fs_info *fs_info = root->fs_info; |
1440 | 1453 | ||
1441 | mutex_lock(&fs_info->trans_mutex); | 1454 | spin_lock(&fs_info->trans_lock); |
1442 | list_splice_init(&fs_info->dead_roots, &list); | 1455 | list_splice_init(&fs_info->dead_roots, &list); |
1443 | mutex_unlock(&fs_info->trans_mutex); | 1456 | spin_unlock(&fs_info->trans_lock); |
1444 | 1457 | ||
1445 | while (!list_empty(&list)) { | 1458 | while (!list_empty(&list)) { |
1446 | root = list_entry(list.next, struct btrfs_root, root_list); | 1459 | root = list_entry(list.next, struct btrfs_root, root_list); |