diff options
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 1023 |
1 files changed, 1023 insertions, 0 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c new file mode 100644 index 000000000000..5ecc24d634a2 --- /dev/null +++ b/fs/btrfs/transaction.c | |||
@@ -0,0 +1,1023 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/fs.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/writeback.h> | ||
22 | #include <linux/pagemap.h> | ||
23 | #include "ctree.h" | ||
24 | #include "disk-io.h" | ||
25 | #include "transaction.h" | ||
26 | #include "locking.h" | ||
27 | #include "ref-cache.h" | ||
28 | #include "tree-log.h" | ||
29 | |||
30 | static int total_trans = 0; | ||
31 | extern struct kmem_cache *btrfs_trans_handle_cachep; | ||
32 | extern struct kmem_cache *btrfs_transaction_cachep; | ||
33 | |||
34 | #define BTRFS_ROOT_TRANS_TAG 0 | ||
35 | |||
36 | static noinline void put_transaction(struct btrfs_transaction *transaction) | ||
37 | { | ||
38 | WARN_ON(transaction->use_count == 0); | ||
39 | transaction->use_count--; | ||
40 | if (transaction->use_count == 0) { | ||
41 | WARN_ON(total_trans == 0); | ||
42 | total_trans--; | ||
43 | list_del_init(&transaction->list); | ||
44 | memset(transaction, 0, sizeof(*transaction)); | ||
45 | kmem_cache_free(btrfs_transaction_cachep, transaction); | ||
46 | } | ||
47 | } | ||
48 | |||
49 | /* | ||
50 | * either allocate a new transaction or hop into the existing one | ||
51 | */ | ||
52 | static noinline int join_transaction(struct btrfs_root *root) | ||
53 | { | ||
54 | struct btrfs_transaction *cur_trans; | ||
55 | cur_trans = root->fs_info->running_transaction; | ||
56 | if (!cur_trans) { | ||
57 | cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, | ||
58 | GFP_NOFS); | ||
59 | total_trans++; | ||
60 | BUG_ON(!cur_trans); | ||
61 | root->fs_info->generation++; | ||
62 | root->fs_info->last_alloc = 0; | ||
63 | root->fs_info->last_data_alloc = 0; | ||
64 | cur_trans->num_writers = 1; | ||
65 | cur_trans->num_joined = 0; | ||
66 | cur_trans->transid = root->fs_info->generation; | ||
67 | init_waitqueue_head(&cur_trans->writer_wait); | ||
68 | init_waitqueue_head(&cur_trans->commit_wait); | ||
69 | cur_trans->in_commit = 0; | ||
70 | cur_trans->blocked = 0; | ||
71 | cur_trans->use_count = 1; | ||
72 | cur_trans->commit_done = 0; | ||
73 | cur_trans->start_time = get_seconds(); | ||
74 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
75 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | ||
76 | extent_io_tree_init(&cur_trans->dirty_pages, | ||
77 | root->fs_info->btree_inode->i_mapping, | ||
78 | GFP_NOFS); | ||
79 | spin_lock(&root->fs_info->new_trans_lock); | ||
80 | root->fs_info->running_transaction = cur_trans; | ||
81 | spin_unlock(&root->fs_info->new_trans_lock); | ||
82 | } else { | ||
83 | cur_trans->num_writers++; | ||
84 | cur_trans->num_joined++; | ||
85 | } | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * this does all the record keeping required to make sure that a | ||
92 | * reference counted root is properly recorded in a given transaction. | ||
93 | * This is required to make sure the old root from before we joined the transaction | ||
94 | * is deleted when the transaction commits | ||
95 | */ | ||
96 | noinline int btrfs_record_root_in_trans(struct btrfs_root *root) | ||
97 | { | ||
98 | struct btrfs_dirty_root *dirty; | ||
99 | u64 running_trans_id = root->fs_info->running_transaction->transid; | ||
100 | if (root->ref_cows && root->last_trans < running_trans_id) { | ||
101 | WARN_ON(root == root->fs_info->extent_root); | ||
102 | if (root->root_item.refs != 0) { | ||
103 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | ||
104 | (unsigned long)root->root_key.objectid, | ||
105 | BTRFS_ROOT_TRANS_TAG); | ||
106 | |||
107 | dirty = kmalloc(sizeof(*dirty), GFP_NOFS); | ||
108 | BUG_ON(!dirty); | ||
109 | dirty->root = kmalloc(sizeof(*dirty->root), GFP_NOFS); | ||
110 | BUG_ON(!dirty->root); | ||
111 | dirty->latest_root = root; | ||
112 | INIT_LIST_HEAD(&dirty->list); | ||
113 | |||
114 | root->commit_root = btrfs_root_node(root); | ||
115 | |||
116 | memcpy(dirty->root, root, sizeof(*root)); | ||
117 | spin_lock_init(&dirty->root->node_lock); | ||
118 | spin_lock_init(&dirty->root->list_lock); | ||
119 | mutex_init(&dirty->root->objectid_mutex); | ||
120 | mutex_init(&dirty->root->log_mutex); | ||
121 | INIT_LIST_HEAD(&dirty->root->dead_list); | ||
122 | dirty->root->node = root->commit_root; | ||
123 | dirty->root->commit_root = NULL; | ||
124 | |||
125 | spin_lock(&root->list_lock); | ||
126 | list_add(&dirty->root->dead_list, &root->dead_list); | ||
127 | spin_unlock(&root->list_lock); | ||
128 | |||
129 | root->dirty_root = dirty; | ||
130 | } else { | ||
131 | WARN_ON(1); | ||
132 | } | ||
133 | root->last_trans = running_trans_id; | ||
134 | } | ||
135 | return 0; | ||
136 | } | ||
137 | |||
138 | /* wait for commit against the current transaction to become unblocked | ||
139 | * when this is done, it is safe to start a new transaction, but the current | ||
140 | * transaction might not be fully on disk. | ||
141 | */ | ||
142 | static void wait_current_trans(struct btrfs_root *root) | ||
143 | { | ||
144 | struct btrfs_transaction *cur_trans; | ||
145 | |||
146 | cur_trans = root->fs_info->running_transaction; | ||
147 | if (cur_trans && cur_trans->blocked) { | ||
148 | DEFINE_WAIT(wait); | ||
149 | cur_trans->use_count++; | ||
150 | while(1) { | ||
151 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | ||
152 | TASK_UNINTERRUPTIBLE); | ||
153 | if (cur_trans->blocked) { | ||
154 | mutex_unlock(&root->fs_info->trans_mutex); | ||
155 | schedule(); | ||
156 | mutex_lock(&root->fs_info->trans_mutex); | ||
157 | finish_wait(&root->fs_info->transaction_wait, | ||
158 | &wait); | ||
159 | } else { | ||
160 | finish_wait(&root->fs_info->transaction_wait, | ||
161 | &wait); | ||
162 | break; | ||
163 | } | ||
164 | } | ||
165 | put_transaction(cur_trans); | ||
166 | } | ||
167 | } | ||
168 | |||
169 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | ||
170 | int num_blocks, int wait) | ||
171 | { | ||
172 | struct btrfs_trans_handle *h = | ||
173 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | ||
174 | int ret; | ||
175 | |||
176 | mutex_lock(&root->fs_info->trans_mutex); | ||
177 | if (!root->fs_info->log_root_recovering && | ||
178 | ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) | ||
179 | wait_current_trans(root); | ||
180 | ret = join_transaction(root); | ||
181 | BUG_ON(ret); | ||
182 | |||
183 | btrfs_record_root_in_trans(root); | ||
184 | h->transid = root->fs_info->running_transaction->transid; | ||
185 | h->transaction = root->fs_info->running_transaction; | ||
186 | h->blocks_reserved = num_blocks; | ||
187 | h->blocks_used = 0; | ||
188 | h->block_group = NULL; | ||
189 | h->alloc_exclude_nr = 0; | ||
190 | h->alloc_exclude_start = 0; | ||
191 | root->fs_info->running_transaction->use_count++; | ||
192 | mutex_unlock(&root->fs_info->trans_mutex); | ||
193 | return h; | ||
194 | } | ||
195 | |||
196 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | ||
197 | int num_blocks) | ||
198 | { | ||
199 | return start_transaction(root, num_blocks, 1); | ||
200 | } | ||
201 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | ||
202 | int num_blocks) | ||
203 | { | ||
204 | return start_transaction(root, num_blocks, 0); | ||
205 | } | ||
206 | |||
207 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | ||
208 | int num_blocks) | ||
209 | { | ||
210 | return start_transaction(r, num_blocks, 2); | ||
211 | } | ||
212 | |||
213 | /* wait for a transaction commit to be fully complete */ | ||
214 | static noinline int wait_for_commit(struct btrfs_root *root, | ||
215 | struct btrfs_transaction *commit) | ||
216 | { | ||
217 | DEFINE_WAIT(wait); | ||
218 | mutex_lock(&root->fs_info->trans_mutex); | ||
219 | while(!commit->commit_done) { | ||
220 | prepare_to_wait(&commit->commit_wait, &wait, | ||
221 | TASK_UNINTERRUPTIBLE); | ||
222 | if (commit->commit_done) | ||
223 | break; | ||
224 | mutex_unlock(&root->fs_info->trans_mutex); | ||
225 | schedule(); | ||
226 | mutex_lock(&root->fs_info->trans_mutex); | ||
227 | } | ||
228 | mutex_unlock(&root->fs_info->trans_mutex); | ||
229 | finish_wait(&commit->commit_wait, &wait); | ||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * rate limit against the drop_snapshot code. This helps to slow down new operations | ||
235 | * if the drop_snapshot code isn't able to keep up. | ||
236 | */ | ||
237 | static void throttle_on_drops(struct btrfs_root *root) | ||
238 | { | ||
239 | struct btrfs_fs_info *info = root->fs_info; | ||
240 | int harder_count = 0; | ||
241 | |||
242 | harder: | ||
243 | if (atomic_read(&info->throttles)) { | ||
244 | DEFINE_WAIT(wait); | ||
245 | int thr; | ||
246 | thr = atomic_read(&info->throttle_gen); | ||
247 | |||
248 | do { | ||
249 | prepare_to_wait(&info->transaction_throttle, | ||
250 | &wait, TASK_UNINTERRUPTIBLE); | ||
251 | if (!atomic_read(&info->throttles)) { | ||
252 | finish_wait(&info->transaction_throttle, &wait); | ||
253 | break; | ||
254 | } | ||
255 | schedule(); | ||
256 | finish_wait(&info->transaction_throttle, &wait); | ||
257 | } while (thr == atomic_read(&info->throttle_gen)); | ||
258 | harder_count++; | ||
259 | |||
260 | if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && | ||
261 | harder_count < 2) | ||
262 | goto harder; | ||
263 | |||
264 | if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && | ||
265 | harder_count < 10) | ||
266 | goto harder; | ||
267 | |||
268 | if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && | ||
269 | harder_count < 20) | ||
270 | goto harder; | ||
271 | } | ||
272 | } | ||
273 | |||
274 | void btrfs_throttle(struct btrfs_root *root) | ||
275 | { | ||
276 | mutex_lock(&root->fs_info->trans_mutex); | ||
277 | if (!root->fs_info->open_ioctl_trans) | ||
278 | wait_current_trans(root); | ||
279 | mutex_unlock(&root->fs_info->trans_mutex); | ||
280 | |||
281 | throttle_on_drops(root); | ||
282 | } | ||
283 | |||
284 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | ||
285 | struct btrfs_root *root, int throttle) | ||
286 | { | ||
287 | struct btrfs_transaction *cur_trans; | ||
288 | struct btrfs_fs_info *info = root->fs_info; | ||
289 | |||
290 | mutex_lock(&info->trans_mutex); | ||
291 | cur_trans = info->running_transaction; | ||
292 | WARN_ON(cur_trans != trans->transaction); | ||
293 | WARN_ON(cur_trans->num_writers < 1); | ||
294 | cur_trans->num_writers--; | ||
295 | |||
296 | if (waitqueue_active(&cur_trans->writer_wait)) | ||
297 | wake_up(&cur_trans->writer_wait); | ||
298 | put_transaction(cur_trans); | ||
299 | mutex_unlock(&info->trans_mutex); | ||
300 | memset(trans, 0, sizeof(*trans)); | ||
301 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | ||
302 | |||
303 | if (throttle) | ||
304 | throttle_on_drops(root); | ||
305 | |||
306 | return 0; | ||
307 | } | ||
308 | |||
309 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | ||
310 | struct btrfs_root *root) | ||
311 | { | ||
312 | return __btrfs_end_transaction(trans, root, 0); | ||
313 | } | ||
314 | |||
315 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | ||
316 | struct btrfs_root *root) | ||
317 | { | ||
318 | return __btrfs_end_transaction(trans, root, 1); | ||
319 | } | ||
320 | |||
321 | /* | ||
322 | * when btree blocks are allocated, they have some corresponding bits set for | ||
323 | * them in one of two extent_io trees. This is used to make sure all of | ||
324 | * those extents are on disk for transaction or log commit | ||
325 | */ | ||
326 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | ||
327 | struct extent_io_tree *dirty_pages) | ||
328 | { | ||
329 | int ret; | ||
330 | int err = 0; | ||
331 | int werr = 0; | ||
332 | struct page *page; | ||
333 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
334 | u64 start = 0; | ||
335 | u64 end; | ||
336 | unsigned long index; | ||
337 | |||
338 | while(1) { | ||
339 | ret = find_first_extent_bit(dirty_pages, start, &start, &end, | ||
340 | EXTENT_DIRTY); | ||
341 | if (ret) | ||
342 | break; | ||
343 | while(start <= end) { | ||
344 | cond_resched(); | ||
345 | |||
346 | index = start >> PAGE_CACHE_SHIFT; | ||
347 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
348 | page = find_get_page(btree_inode->i_mapping, index); | ||
349 | if (!page) | ||
350 | continue; | ||
351 | |||
352 | btree_lock_page_hook(page); | ||
353 | if (!page->mapping) { | ||
354 | unlock_page(page); | ||
355 | page_cache_release(page); | ||
356 | continue; | ||
357 | } | ||
358 | |||
359 | if (PageWriteback(page)) { | ||
360 | if (PageDirty(page)) | ||
361 | wait_on_page_writeback(page); | ||
362 | else { | ||
363 | unlock_page(page); | ||
364 | page_cache_release(page); | ||
365 | continue; | ||
366 | } | ||
367 | } | ||
368 | err = write_one_page(page, 0); | ||
369 | if (err) | ||
370 | werr = err; | ||
371 | page_cache_release(page); | ||
372 | } | ||
373 | } | ||
374 | while(1) { | ||
375 | ret = find_first_extent_bit(dirty_pages, 0, &start, &end, | ||
376 | EXTENT_DIRTY); | ||
377 | if (ret) | ||
378 | break; | ||
379 | |||
380 | clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); | ||
381 | while(start <= end) { | ||
382 | index = start >> PAGE_CACHE_SHIFT; | ||
383 | start = (u64)(index + 1) << PAGE_CACHE_SHIFT; | ||
384 | page = find_get_page(btree_inode->i_mapping, index); | ||
385 | if (!page) | ||
386 | continue; | ||
387 | if (PageDirty(page)) { | ||
388 | btree_lock_page_hook(page); | ||
389 | wait_on_page_writeback(page); | ||
390 | err = write_one_page(page, 0); | ||
391 | if (err) | ||
392 | werr = err; | ||
393 | } | ||
394 | wait_on_page_writeback(page); | ||
395 | page_cache_release(page); | ||
396 | cond_resched(); | ||
397 | } | ||
398 | } | ||
399 | if (err) | ||
400 | werr = err; | ||
401 | return werr; | ||
402 | } | ||
403 | |||
404 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | ||
405 | struct btrfs_root *root) | ||
406 | { | ||
407 | if (!trans || !trans->transaction) { | ||
408 | struct inode *btree_inode; | ||
409 | btree_inode = root->fs_info->btree_inode; | ||
410 | return filemap_write_and_wait(btree_inode->i_mapping); | ||
411 | } | ||
412 | return btrfs_write_and_wait_marked_extents(root, | ||
413 | &trans->transaction->dirty_pages); | ||
414 | } | ||
415 | |||
416 | /* | ||
417 | * this is used to update the root pointer in the tree of tree roots. | ||
418 | * | ||
419 | * But, in the case of the extent allocation tree, updating the root | ||
420 | * pointer may allocate blocks which may change the root of the extent | ||
421 | * allocation tree. | ||
422 | * | ||
423 | * So, this loops and repeats and makes sure the cowonly root didn't | ||
424 | * change while the root pointer was being updated in the metadata. | ||
425 | */ | ||
426 | static int update_cowonly_root(struct btrfs_trans_handle *trans, | ||
427 | struct btrfs_root *root) | ||
428 | { | ||
429 | int ret; | ||
430 | u64 old_root_bytenr; | ||
431 | struct btrfs_root *tree_root = root->fs_info->tree_root; | ||
432 | |||
433 | btrfs_write_dirty_block_groups(trans, root); | ||
434 | while(1) { | ||
435 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | ||
436 | if (old_root_bytenr == root->node->start) | ||
437 | break; | ||
438 | btrfs_set_root_bytenr(&root->root_item, | ||
439 | root->node->start); | ||
440 | btrfs_set_root_level(&root->root_item, | ||
441 | btrfs_header_level(root->node)); | ||
442 | ret = btrfs_update_root(trans, tree_root, | ||
443 | &root->root_key, | ||
444 | &root->root_item); | ||
445 | BUG_ON(ret); | ||
446 | btrfs_write_dirty_block_groups(trans, root); | ||
447 | } | ||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | /* | ||
452 | * update all the cowonly tree roots on disk | ||
453 | */ | ||
454 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | ||
455 | struct btrfs_root *root) | ||
456 | { | ||
457 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
458 | struct list_head *next; | ||
459 | |||
460 | while(!list_empty(&fs_info->dirty_cowonly_roots)) { | ||
461 | next = fs_info->dirty_cowonly_roots.next; | ||
462 | list_del_init(next); | ||
463 | root = list_entry(next, struct btrfs_root, dirty_list); | ||
464 | update_cowonly_root(trans, root); | ||
465 | } | ||
466 | return 0; | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * dead roots are old snapshots that need to be deleted. This allocates | ||
471 | * a dirty root struct and adds it into the list of dead roots that need to | ||
472 | * be deleted | ||
473 | */ | ||
474 | int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) | ||
475 | { | ||
476 | struct btrfs_dirty_root *dirty; | ||
477 | |||
478 | dirty = kmalloc(sizeof(*dirty), GFP_NOFS); | ||
479 | if (!dirty) | ||
480 | return -ENOMEM; | ||
481 | dirty->root = root; | ||
482 | dirty->latest_root = latest; | ||
483 | |||
484 | mutex_lock(&root->fs_info->trans_mutex); | ||
485 | list_add(&dirty->list, &latest->fs_info->dead_roots); | ||
486 | mutex_unlock(&root->fs_info->trans_mutex); | ||
487 | return 0; | ||
488 | } | ||
489 | |||
490 | /* | ||
491 | * at transaction commit time we need to schedule the old roots for | ||
492 | * deletion via btrfs_drop_snapshot. This runs through all the | ||
493 | * reference counted roots that were modified in the current | ||
494 | * transaction and puts them into the drop list | ||
495 | */ | ||
496 | static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, | ||
497 | struct radix_tree_root *radix, | ||
498 | struct list_head *list) | ||
499 | { | ||
500 | struct btrfs_dirty_root *dirty; | ||
501 | struct btrfs_root *gang[8]; | ||
502 | struct btrfs_root *root; | ||
503 | int i; | ||
504 | int ret; | ||
505 | int err = 0; | ||
506 | u32 refs; | ||
507 | |||
508 | while(1) { | ||
509 | ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, | ||
510 | ARRAY_SIZE(gang), | ||
511 | BTRFS_ROOT_TRANS_TAG); | ||
512 | if (ret == 0) | ||
513 | break; | ||
514 | for (i = 0; i < ret; i++) { | ||
515 | root = gang[i]; | ||
516 | radix_tree_tag_clear(radix, | ||
517 | (unsigned long)root->root_key.objectid, | ||
518 | BTRFS_ROOT_TRANS_TAG); | ||
519 | |||
520 | BUG_ON(!root->ref_tree); | ||
521 | dirty = root->dirty_root; | ||
522 | |||
523 | btrfs_free_log(trans, root); | ||
524 | btrfs_free_reloc_root(root); | ||
525 | |||
526 | if (root->commit_root == root->node) { | ||
527 | WARN_ON(root->node->start != | ||
528 | btrfs_root_bytenr(&root->root_item)); | ||
529 | |||
530 | free_extent_buffer(root->commit_root); | ||
531 | root->commit_root = NULL; | ||
532 | root->dirty_root = NULL; | ||
533 | |||
534 | spin_lock(&root->list_lock); | ||
535 | list_del_init(&dirty->root->dead_list); | ||
536 | spin_unlock(&root->list_lock); | ||
537 | |||
538 | kfree(dirty->root); | ||
539 | kfree(dirty); | ||
540 | |||
541 | /* make sure to update the root on disk | ||
542 | * so we get any updates to the block used | ||
543 | * counts | ||
544 | */ | ||
545 | err = btrfs_update_root(trans, | ||
546 | root->fs_info->tree_root, | ||
547 | &root->root_key, | ||
548 | &root->root_item); | ||
549 | continue; | ||
550 | } | ||
551 | |||
552 | memset(&root->root_item.drop_progress, 0, | ||
553 | sizeof(struct btrfs_disk_key)); | ||
554 | root->root_item.drop_level = 0; | ||
555 | root->commit_root = NULL; | ||
556 | root->dirty_root = NULL; | ||
557 | root->root_key.offset = root->fs_info->generation; | ||
558 | btrfs_set_root_bytenr(&root->root_item, | ||
559 | root->node->start); | ||
560 | btrfs_set_root_level(&root->root_item, | ||
561 | btrfs_header_level(root->node)); | ||
562 | err = btrfs_insert_root(trans, root->fs_info->tree_root, | ||
563 | &root->root_key, | ||
564 | &root->root_item); | ||
565 | if (err) | ||
566 | break; | ||
567 | |||
568 | refs = btrfs_root_refs(&dirty->root->root_item); | ||
569 | btrfs_set_root_refs(&dirty->root->root_item, refs - 1); | ||
570 | err = btrfs_update_root(trans, root->fs_info->tree_root, | ||
571 | &dirty->root->root_key, | ||
572 | &dirty->root->root_item); | ||
573 | |||
574 | BUG_ON(err); | ||
575 | if (refs == 1) { | ||
576 | list_add(&dirty->list, list); | ||
577 | } else { | ||
578 | WARN_ON(1); | ||
579 | free_extent_buffer(dirty->root->node); | ||
580 | kfree(dirty->root); | ||
581 | kfree(dirty); | ||
582 | } | ||
583 | } | ||
584 | } | ||
585 | return err; | ||
586 | } | ||
587 | |||
588 | /* | ||
589 | * defrag a given btree. If cacheonly == 1, this won't read from the disk, | ||
590 | * otherwise every leaf in the btree is read and defragged. | ||
591 | */ | ||
592 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | ||
593 | { | ||
594 | struct btrfs_fs_info *info = root->fs_info; | ||
595 | int ret; | ||
596 | struct btrfs_trans_handle *trans; | ||
597 | unsigned long nr; | ||
598 | |||
599 | smp_mb(); | ||
600 | if (root->defrag_running) | ||
601 | return 0; | ||
602 | trans = btrfs_start_transaction(root, 1); | ||
603 | while (1) { | ||
604 | root->defrag_running = 1; | ||
605 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | ||
606 | nr = trans->blocks_used; | ||
607 | btrfs_end_transaction(trans, root); | ||
608 | btrfs_btree_balance_dirty(info->tree_root, nr); | ||
609 | cond_resched(); | ||
610 | |||
611 | trans = btrfs_start_transaction(root, 1); | ||
612 | if (root->fs_info->closing || ret != -EAGAIN) | ||
613 | break; | ||
614 | } | ||
615 | root->defrag_running = 0; | ||
616 | smp_mb(); | ||
617 | btrfs_end_transaction(trans, root); | ||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | /* | ||
622 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | ||
623 | * all of them | ||
624 | */ | ||
625 | static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | ||
626 | struct list_head *list) | ||
627 | { | ||
628 | struct btrfs_dirty_root *dirty; | ||
629 | struct btrfs_trans_handle *trans; | ||
630 | unsigned long nr; | ||
631 | u64 num_bytes; | ||
632 | u64 bytes_used; | ||
633 | u64 max_useless; | ||
634 | int ret = 0; | ||
635 | int err; | ||
636 | |||
637 | while(!list_empty(list)) { | ||
638 | struct btrfs_root *root; | ||
639 | |||
640 | dirty = list_entry(list->prev, struct btrfs_dirty_root, list); | ||
641 | list_del_init(&dirty->list); | ||
642 | |||
643 | num_bytes = btrfs_root_used(&dirty->root->root_item); | ||
644 | root = dirty->latest_root; | ||
645 | atomic_inc(&root->fs_info->throttles); | ||
646 | |||
647 | while(1) { | ||
648 | trans = btrfs_start_transaction(tree_root, 1); | ||
649 | mutex_lock(&root->fs_info->drop_mutex); | ||
650 | ret = btrfs_drop_snapshot(trans, dirty->root); | ||
651 | if (ret != -EAGAIN) { | ||
652 | break; | ||
653 | } | ||
654 | mutex_unlock(&root->fs_info->drop_mutex); | ||
655 | |||
656 | err = btrfs_update_root(trans, | ||
657 | tree_root, | ||
658 | &dirty->root->root_key, | ||
659 | &dirty->root->root_item); | ||
660 | if (err) | ||
661 | ret = err; | ||
662 | nr = trans->blocks_used; | ||
663 | ret = btrfs_end_transaction(trans, tree_root); | ||
664 | BUG_ON(ret); | ||
665 | |||
666 | btrfs_btree_balance_dirty(tree_root, nr); | ||
667 | cond_resched(); | ||
668 | } | ||
669 | BUG_ON(ret); | ||
670 | atomic_dec(&root->fs_info->throttles); | ||
671 | wake_up(&root->fs_info->transaction_throttle); | ||
672 | |||
673 | mutex_lock(&root->fs_info->alloc_mutex); | ||
674 | num_bytes -= btrfs_root_used(&dirty->root->root_item); | ||
675 | bytes_used = btrfs_root_used(&root->root_item); | ||
676 | if (num_bytes) { | ||
677 | btrfs_record_root_in_trans(root); | ||
678 | btrfs_set_root_used(&root->root_item, | ||
679 | bytes_used - num_bytes); | ||
680 | } | ||
681 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
682 | |||
683 | ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); | ||
684 | if (ret) { | ||
685 | BUG(); | ||
686 | break; | ||
687 | } | ||
688 | mutex_unlock(&root->fs_info->drop_mutex); | ||
689 | |||
690 | spin_lock(&root->list_lock); | ||
691 | list_del_init(&dirty->root->dead_list); | ||
692 | if (!list_empty(&root->dead_list)) { | ||
693 | struct btrfs_root *oldest; | ||
694 | oldest = list_entry(root->dead_list.prev, | ||
695 | struct btrfs_root, dead_list); | ||
696 | max_useless = oldest->root_key.offset - 1; | ||
697 | } else { | ||
698 | max_useless = root->root_key.offset - 1; | ||
699 | } | ||
700 | spin_unlock(&root->list_lock); | ||
701 | |||
702 | nr = trans->blocks_used; | ||
703 | ret = btrfs_end_transaction(trans, tree_root); | ||
704 | BUG_ON(ret); | ||
705 | |||
706 | ret = btrfs_remove_leaf_refs(root, max_useless, 0); | ||
707 | BUG_ON(ret); | ||
708 | |||
709 | free_extent_buffer(dirty->root->node); | ||
710 | kfree(dirty->root); | ||
711 | kfree(dirty); | ||
712 | |||
713 | btrfs_btree_balance_dirty(tree_root, nr); | ||
714 | cond_resched(); | ||
715 | } | ||
716 | return ret; | ||
717 | } | ||
718 | |||
719 | /* | ||
720 | * new snapshots need to be created at a very specific time in the | ||
721 | * transaction commit. This does the actual creation | ||
722 | */ | ||
723 | static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | ||
724 | struct btrfs_fs_info *fs_info, | ||
725 | struct btrfs_pending_snapshot *pending) | ||
726 | { | ||
727 | struct btrfs_key key; | ||
728 | struct btrfs_root_item *new_root_item; | ||
729 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
730 | struct btrfs_root *root = pending->root; | ||
731 | struct extent_buffer *tmp; | ||
732 | struct extent_buffer *old; | ||
733 | int ret; | ||
734 | int namelen; | ||
735 | u64 objectid; | ||
736 | |||
737 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | ||
738 | if (!new_root_item) { | ||
739 | ret = -ENOMEM; | ||
740 | goto fail; | ||
741 | } | ||
742 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | ||
743 | if (ret) | ||
744 | goto fail; | ||
745 | |||
746 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | ||
747 | |||
748 | key.objectid = objectid; | ||
749 | key.offset = trans->transid; | ||
750 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
751 | |||
752 | old = btrfs_lock_root_node(root); | ||
753 | btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); | ||
754 | |||
755 | btrfs_copy_root(trans, root, old, &tmp, objectid); | ||
756 | btrfs_tree_unlock(old); | ||
757 | free_extent_buffer(old); | ||
758 | |||
759 | btrfs_set_root_bytenr(new_root_item, tmp->start); | ||
760 | btrfs_set_root_level(new_root_item, btrfs_header_level(tmp)); | ||
761 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | ||
762 | new_root_item); | ||
763 | btrfs_tree_unlock(tmp); | ||
764 | free_extent_buffer(tmp); | ||
765 | if (ret) | ||
766 | goto fail; | ||
767 | |||
768 | /* | ||
769 | * insert the directory item | ||
770 | */ | ||
771 | key.offset = (u64)-1; | ||
772 | namelen = strlen(pending->name); | ||
773 | ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, | ||
774 | pending->name, namelen, | ||
775 | root->fs_info->sb->s_root->d_inode->i_ino, | ||
776 | &key, BTRFS_FT_DIR, 0); | ||
777 | |||
778 | if (ret) | ||
779 | goto fail; | ||
780 | |||
781 | ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, | ||
782 | pending->name, strlen(pending->name), objectid, | ||
783 | root->fs_info->sb->s_root->d_inode->i_ino, 0); | ||
784 | |||
785 | /* Invalidate existing dcache entry for new snapshot. */ | ||
786 | btrfs_invalidate_dcache_root(root, pending->name, namelen); | ||
787 | |||
788 | fail: | ||
789 | kfree(new_root_item); | ||
790 | return ret; | ||
791 | } | ||
792 | |||
793 | /* | ||
794 | * create all the snapshots we've scheduled for creation | ||
795 | */ | ||
796 | static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, | ||
797 | struct btrfs_fs_info *fs_info) | ||
798 | { | ||
799 | struct btrfs_pending_snapshot *pending; | ||
800 | struct list_head *head = &trans->transaction->pending_snapshots; | ||
801 | int ret; | ||
802 | |||
803 | while(!list_empty(head)) { | ||
804 | pending = list_entry(head->next, | ||
805 | struct btrfs_pending_snapshot, list); | ||
806 | ret = create_pending_snapshot(trans, fs_info, pending); | ||
807 | BUG_ON(ret); | ||
808 | list_del(&pending->list); | ||
809 | kfree(pending->name); | ||
810 | kfree(pending); | ||
811 | } | ||
812 | return 0; | ||
813 | } | ||
814 | |||
815 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | ||
816 | struct btrfs_root *root) | ||
817 | { | ||
818 | unsigned long joined = 0; | ||
819 | unsigned long timeout = 1; | ||
820 | struct btrfs_transaction *cur_trans; | ||
821 | struct btrfs_transaction *prev_trans = NULL; | ||
822 | struct btrfs_root *chunk_root = root->fs_info->chunk_root; | ||
823 | struct list_head dirty_fs_roots; | ||
824 | struct extent_io_tree *pinned_copy; | ||
825 | DEFINE_WAIT(wait); | ||
826 | int ret; | ||
827 | |||
828 | INIT_LIST_HEAD(&dirty_fs_roots); | ||
829 | mutex_lock(&root->fs_info->trans_mutex); | ||
830 | if (trans->transaction->in_commit) { | ||
831 | cur_trans = trans->transaction; | ||
832 | trans->transaction->use_count++; | ||
833 | mutex_unlock(&root->fs_info->trans_mutex); | ||
834 | btrfs_end_transaction(trans, root); | ||
835 | |||
836 | ret = wait_for_commit(root, cur_trans); | ||
837 | BUG_ON(ret); | ||
838 | |||
839 | mutex_lock(&root->fs_info->trans_mutex); | ||
840 | put_transaction(cur_trans); | ||
841 | mutex_unlock(&root->fs_info->trans_mutex); | ||
842 | |||
843 | return 0; | ||
844 | } | ||
845 | |||
846 | pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); | ||
847 | if (!pinned_copy) | ||
848 | return -ENOMEM; | ||
849 | |||
850 | extent_io_tree_init(pinned_copy, | ||
851 | root->fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
852 | |||
853 | trans->transaction->in_commit = 1; | ||
854 | trans->transaction->blocked = 1; | ||
855 | cur_trans = trans->transaction; | ||
856 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | ||
857 | prev_trans = list_entry(cur_trans->list.prev, | ||
858 | struct btrfs_transaction, list); | ||
859 | if (!prev_trans->commit_done) { | ||
860 | prev_trans->use_count++; | ||
861 | mutex_unlock(&root->fs_info->trans_mutex); | ||
862 | |||
863 | wait_for_commit(root, prev_trans); | ||
864 | |||
865 | mutex_lock(&root->fs_info->trans_mutex); | ||
866 | put_transaction(prev_trans); | ||
867 | } | ||
868 | } | ||
869 | |||
870 | do { | ||
871 | int snap_pending = 0; | ||
872 | joined = cur_trans->num_joined; | ||
873 | if (!list_empty(&trans->transaction->pending_snapshots)) | ||
874 | snap_pending = 1; | ||
875 | |||
876 | WARN_ON(cur_trans != trans->transaction); | ||
877 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
878 | TASK_UNINTERRUPTIBLE); | ||
879 | |||
880 | if (cur_trans->num_writers > 1) | ||
881 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
882 | else | ||
883 | timeout = 1; | ||
884 | |||
885 | mutex_unlock(&root->fs_info->trans_mutex); | ||
886 | |||
887 | if (snap_pending) { | ||
888 | ret = btrfs_wait_ordered_extents(root, 1); | ||
889 | BUG_ON(ret); | ||
890 | } | ||
891 | |||
892 | schedule_timeout(timeout); | ||
893 | |||
894 | mutex_lock(&root->fs_info->trans_mutex); | ||
895 | finish_wait(&cur_trans->writer_wait, &wait); | ||
896 | } while (cur_trans->num_writers > 1 || | ||
897 | (cur_trans->num_joined != joined)); | ||
898 | |||
899 | ret = create_pending_snapshots(trans, root->fs_info); | ||
900 | BUG_ON(ret); | ||
901 | |||
902 | WARN_ON(cur_trans != trans->transaction); | ||
903 | |||
904 | /* btrfs_commit_tree_roots is responsible for getting the | ||
905 | * various roots consistent with each other. Every pointer | ||
906 | * in the tree of tree roots has to point to the most up to date | ||
907 | * root for every subvolume and other tree. So, we have to keep | ||
908 | * the tree logging code from jumping in and changing any | ||
909 | * of the trees. | ||
910 | * | ||
911 | * At this point in the commit, there can't be any tree-log | ||
912 | * writers, but a little lower down we drop the trans mutex | ||
913 | * and let new people in. By holding the tree_log_mutex | ||
914 | * from now until after the super is written, we avoid races | ||
915 | * with the tree-log code. | ||
916 | */ | ||
917 | mutex_lock(&root->fs_info->tree_log_mutex); | ||
918 | /* | ||
919 | * keep tree reloc code from adding new reloc trees | ||
920 | */ | ||
921 | mutex_lock(&root->fs_info->tree_reloc_mutex); | ||
922 | |||
923 | |||
924 | ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, | ||
925 | &dirty_fs_roots); | ||
926 | BUG_ON(ret); | ||
927 | |||
928 | /* add_dirty_roots gets rid of all the tree log roots, it is now | ||
929 | * safe to free the root of tree log roots | ||
930 | */ | ||
931 | btrfs_free_log_root_tree(trans, root->fs_info); | ||
932 | |||
933 | btrfs_free_reloc_mappings(root); | ||
934 | |||
935 | ret = btrfs_commit_tree_roots(trans, root); | ||
936 | BUG_ON(ret); | ||
937 | |||
938 | cur_trans = root->fs_info->running_transaction; | ||
939 | spin_lock(&root->fs_info->new_trans_lock); | ||
940 | root->fs_info->running_transaction = NULL; | ||
941 | spin_unlock(&root->fs_info->new_trans_lock); | ||
942 | btrfs_set_super_generation(&root->fs_info->super_copy, | ||
943 | cur_trans->transid); | ||
944 | btrfs_set_super_root(&root->fs_info->super_copy, | ||
945 | root->fs_info->tree_root->node->start); | ||
946 | btrfs_set_super_root_level(&root->fs_info->super_copy, | ||
947 | btrfs_header_level(root->fs_info->tree_root->node)); | ||
948 | |||
949 | btrfs_set_super_chunk_root(&root->fs_info->super_copy, | ||
950 | chunk_root->node->start); | ||
951 | btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, | ||
952 | btrfs_header_level(chunk_root->node)); | ||
953 | |||
954 | if (!root->fs_info->log_root_recovering) { | ||
955 | btrfs_set_super_log_root(&root->fs_info->super_copy, 0); | ||
956 | btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); | ||
957 | } | ||
958 | |||
959 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, | ||
960 | sizeof(root->fs_info->super_copy)); | ||
961 | |||
962 | btrfs_copy_pinned(root, pinned_copy); | ||
963 | |||
964 | trans->transaction->blocked = 0; | ||
965 | wake_up(&root->fs_info->transaction_throttle); | ||
966 | wake_up(&root->fs_info->transaction_wait); | ||
967 | |||
968 | mutex_unlock(&root->fs_info->trans_mutex); | ||
969 | ret = btrfs_write_and_wait_transaction(trans, root); | ||
970 | BUG_ON(ret); | ||
971 | write_ctree_super(trans, root); | ||
972 | |||
973 | /* | ||
974 | * the super is written, we can safely allow the tree-loggers | ||
975 | * to go about their business | ||
976 | */ | ||
977 | mutex_unlock(&root->fs_info->tree_log_mutex); | ||
978 | |||
979 | btrfs_finish_extent_commit(trans, root, pinned_copy); | ||
980 | kfree(pinned_copy); | ||
981 | |||
982 | btrfs_drop_dead_reloc_roots(root); | ||
983 | mutex_unlock(&root->fs_info->tree_reloc_mutex); | ||
984 | |||
985 | mutex_lock(&root->fs_info->trans_mutex); | ||
986 | |||
987 | cur_trans->commit_done = 1; | ||
988 | root->fs_info->last_trans_committed = cur_trans->transid; | ||
989 | wake_up(&cur_trans->commit_wait); | ||
990 | put_transaction(cur_trans); | ||
991 | put_transaction(cur_trans); | ||
992 | |||
993 | list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); | ||
994 | if (root->fs_info->closing) | ||
995 | list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); | ||
996 | |||
997 | mutex_unlock(&root->fs_info->trans_mutex); | ||
998 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | ||
999 | |||
1000 | if (root->fs_info->closing) { | ||
1001 | drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); | ||
1002 | } | ||
1003 | return ret; | ||
1004 | } | ||
1005 | |||
1006 | /* | ||
1007 | * interface function to delete all the snapshots we have scheduled for deletion | ||
1008 | */ | ||
1009 | int btrfs_clean_old_snapshots(struct btrfs_root *root) | ||
1010 | { | ||
1011 | struct list_head dirty_roots; | ||
1012 | INIT_LIST_HEAD(&dirty_roots); | ||
1013 | again: | ||
1014 | mutex_lock(&root->fs_info->trans_mutex); | ||
1015 | list_splice_init(&root->fs_info->dead_roots, &dirty_roots); | ||
1016 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1017 | |||
1018 | if (!list_empty(&dirty_roots)) { | ||
1019 | drop_dirty_roots(root, &dirty_roots); | ||
1020 | goto again; | ||
1021 | } | ||
1022 | return 0; | ||
1023 | } | ||