diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-01-08 15:46:30 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:03:59 -0400 |
commit | 3063d29f2a4d4a4e9fa1ec77c124514f287c6da7 (patch) | |
tree | 61aa53d18c6684a327b6166764eecbea9d0e6b5b /fs/btrfs | |
parent | dc17ff8f11d129db9e83ab7244769e4eae05e14d (diff) |
Btrfs: Move snapshot creation to commit time
It is very difficult to create a consistent snapshot of the btree when
other writers may update the btree before the commit is done.
This changes the snapshot creation to happen during the commit, while
no other updates are possible.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/inode.c | 79 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 81 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 7 |
3 files changed, 100 insertions, 67 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d6e1ac0a9a0..10cece11dbd8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -2250,13 +2250,10 @@ fail_commit: | |||
2250 | 2250 | ||
2251 | static int create_snapshot(struct btrfs_root *root, char *name, int namelen) | 2251 | static int create_snapshot(struct btrfs_root *root, char *name, int namelen) |
2252 | { | 2252 | { |
2253 | struct btrfs_pending_snapshot *pending_snapshot; | ||
2253 | struct btrfs_trans_handle *trans; | 2254 | struct btrfs_trans_handle *trans; |
2254 | struct btrfs_key key; | ||
2255 | struct btrfs_root_item new_root_item; | ||
2256 | struct extent_buffer *tmp; | ||
2257 | int ret; | 2255 | int ret; |
2258 | int err; | 2256 | int err; |
2259 | u64 objectid; | ||
2260 | unsigned long nr = 0; | 2257 | unsigned long nr = 0; |
2261 | 2258 | ||
2262 | if (!root->ref_cows) | 2259 | if (!root->ref_cows) |
@@ -2267,72 +2264,26 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) | |||
2267 | if (ret) | 2264 | if (ret) |
2268 | goto fail_unlock; | 2265 | goto fail_unlock; |
2269 | 2266 | ||
2267 | pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS); | ||
2268 | if (!pending_snapshot) { | ||
2269 | ret = -ENOMEM; | ||
2270 | goto fail_unlock; | ||
2271 | } | ||
2272 | pending_snapshot->name = kstrndup(name, namelen, GFP_NOFS); | ||
2273 | if (!pending_snapshot->name) { | ||
2274 | ret = -ENOMEM; | ||
2275 | kfree(pending_snapshot); | ||
2276 | goto fail_unlock; | ||
2277 | } | ||
2270 | trans = btrfs_start_transaction(root, 1); | 2278 | trans = btrfs_start_transaction(root, 1); |
2271 | BUG_ON(!trans); | 2279 | BUG_ON(!trans); |
2272 | err = btrfs_commit_transaction(trans, root); | ||
2273 | |||
2274 | trans = btrfs_start_transaction(root, 1); | ||
2275 | 2280 | ||
2281 | pending_snapshot->root = root; | ||
2282 | list_add(&pending_snapshot->list, | ||
2283 | &trans->transaction->pending_snapshots); | ||
2276 | ret = btrfs_update_inode(trans, root, root->inode); | 2284 | ret = btrfs_update_inode(trans, root, root->inode); |
2277 | if (ret) | ||
2278 | goto fail; | ||
2279 | |||
2280 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, | ||
2281 | 0, &objectid); | ||
2282 | if (ret) | ||
2283 | goto fail; memcpy(&new_root_item, &root->root_item, | ||
2284 | sizeof(new_root_item)); | ||
2285 | |||
2286 | key.objectid = objectid; | ||
2287 | key.offset = 1; | ||
2288 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
2289 | |||
2290 | extent_buffer_get(root->node); | ||
2291 | btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); | ||
2292 | free_extent_buffer(tmp); | ||
2293 | |||
2294 | /* write the ordered inodes to force all delayed allocations to | ||
2295 | * be filled. Once this is done, we can copy the root | ||
2296 | */ | ||
2297 | mutex_lock(&root->fs_info->trans_mutex); | ||
2298 | btrfs_write_ordered_inodes(trans, root); | ||
2299 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2300 | |||
2301 | btrfs_copy_root(trans, root, root->node, &tmp, objectid); | ||
2302 | |||
2303 | btrfs_set_root_bytenr(&new_root_item, tmp->start); | ||
2304 | btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); | ||
2305 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | ||
2306 | &new_root_item); | ||
2307 | printk("new root %Lu node %Lu\n", objectid, tmp->start); | ||
2308 | free_extent_buffer(tmp); | ||
2309 | if (ret) | ||
2310 | goto fail; | ||
2311 | |||
2312 | /* | ||
2313 | * insert the directory item | ||
2314 | */ | ||
2315 | key.offset = (u64)-1; | ||
2316 | ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, | ||
2317 | name, namelen, | ||
2318 | root->fs_info->sb->s_root->d_inode->i_ino, | ||
2319 | &key, BTRFS_FT_DIR); | ||
2320 | |||
2321 | if (ret) | ||
2322 | goto fail; | ||
2323 | |||
2324 | ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, | ||
2325 | name, namelen, objectid, | ||
2326 | root->fs_info->sb->s_root->d_inode->i_ino); | ||
2327 | |||
2328 | if (ret) | ||
2329 | goto fail; | ||
2330 | fail: | ||
2331 | nr = trans->blocks_used; | ||
2332 | err = btrfs_commit_transaction(trans, root); | 2285 | err = btrfs_commit_transaction(trans, root); |
2333 | 2286 | ||
2334 | if (err && !ret) | ||
2335 | ret = err; | ||
2336 | fail_unlock: | 2287 | fail_unlock: |
2337 | mutex_unlock(&root->fs_info->fs_mutex); | 2288 | mutex_unlock(&root->fs_info->fs_mutex); |
2338 | btrfs_btree_balance_dirty(root, nr); | 2289 | btrfs_btree_balance_dirty(root, nr); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3ed5868e7c0f..dc9865323e38 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -66,6 +66,7 @@ static int join_transaction(struct btrfs_root *root) | |||
66 | cur_trans->use_count = 1; | 66 | cur_trans->use_count = 1; |
67 | cur_trans->commit_done = 0; | 67 | cur_trans->commit_done = 0; |
68 | cur_trans->start_time = get_seconds(); | 68 | cur_trans->start_time = get_seconds(); |
69 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | ||
69 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 70 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); |
70 | btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); | 71 | btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); |
71 | extent_map_tree_init(&cur_trans->dirty_pages, | 72 | extent_map_tree_init(&cur_trans->dirty_pages, |
@@ -481,10 +482,8 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, | |||
481 | struct inode *inode; | 482 | struct inode *inode; |
482 | u64 root_objectid = 0; | 483 | u64 root_objectid = 0; |
483 | u64 objectid = 0; | 484 | u64 objectid = 0; |
484 | u64 transid = trans->transid; | ||
485 | int ret; | 485 | int ret; |
486 | 486 | ||
487 | printk("write ordered trans %Lu\n", transid); | ||
488 | while(1) { | 487 | while(1) { |
489 | ret = btrfs_find_first_ordered_inode( | 488 | ret = btrfs_find_first_ordered_inode( |
490 | &cur_trans->ordered_inode_tree, | 489 | &cur_trans->ordered_inode_tree, |
@@ -524,7 +523,80 @@ printk("write ordered trans %Lu\n", transid); | |||
524 | mutex_lock(&root->fs_info->fs_mutex); | 523 | mutex_lock(&root->fs_info->fs_mutex); |
525 | mutex_lock(&root->fs_info->trans_mutex); | 524 | mutex_lock(&root->fs_info->trans_mutex); |
526 | } | 525 | } |
527 | printk("done write ordered trans %Lu\n", transid); | 526 | return 0; |
527 | } | ||
528 | |||
529 | static int create_pending_snapshot(struct btrfs_trans_handle *trans, | ||
530 | struct btrfs_fs_info *fs_info, | ||
531 | struct btrfs_pending_snapshot *pending) | ||
532 | { | ||
533 | struct btrfs_key key; | ||
534 | struct btrfs_root_item new_root_item; | ||
535 | struct btrfs_root *tree_root = fs_info->tree_root; | ||
536 | struct btrfs_root *root = pending->root; | ||
537 | struct extent_buffer *tmp; | ||
538 | int ret; | ||
539 | u64 objectid; | ||
540 | |||
541 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | ||
542 | if (ret) | ||
543 | goto fail; | ||
544 | |||
545 | memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); | ||
546 | |||
547 | key.objectid = objectid; | ||
548 | key.offset = 1; | ||
549 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
550 | |||
551 | extent_buffer_get(root->node); | ||
552 | btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); | ||
553 | free_extent_buffer(tmp); | ||
554 | |||
555 | btrfs_copy_root(trans, root, root->node, &tmp, objectid); | ||
556 | |||
557 | btrfs_set_root_bytenr(&new_root_item, tmp->start); | ||
558 | btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); | ||
559 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | ||
560 | &new_root_item); | ||
561 | free_extent_buffer(tmp); | ||
562 | if (ret) | ||
563 | goto fail; | ||
564 | |||
565 | /* | ||
566 | * insert the directory item | ||
567 | */ | ||
568 | key.offset = (u64)-1; | ||
569 | ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, | ||
570 | pending->name, strlen(pending->name), | ||
571 | root->fs_info->sb->s_root->d_inode->i_ino, | ||
572 | &key, BTRFS_FT_DIR); | ||
573 | |||
574 | if (ret) | ||
575 | goto fail; | ||
576 | |||
577 | ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root, | ||
578 | pending->name, strlen(pending->name), objectid, | ||
579 | root->fs_info->sb->s_root->d_inode->i_ino); | ||
580 | fail: | ||
581 | return ret; | ||
582 | } | ||
583 | |||
584 | static int create_pending_snapshots(struct btrfs_trans_handle *trans, | ||
585 | struct btrfs_fs_info *fs_info) | ||
586 | { | ||
587 | struct btrfs_pending_snapshot *pending; | ||
588 | struct list_head *head = &trans->transaction->pending_snapshots; | ||
589 | int ret; | ||
590 | |||
591 | while(!list_empty(head)) { | ||
592 | pending = list_entry(head->next, | ||
593 | struct btrfs_pending_snapshot, list); | ||
594 | ret = create_pending_snapshot(trans, fs_info, pending); | ||
595 | BUG_ON(ret); | ||
596 | list_del(&pending->list); | ||
597 | kfree(pending->name); | ||
598 | kfree(pending); | ||
599 | } | ||
528 | return 0; | 600 | return 0; |
529 | } | 601 | } |
530 | 602 | ||
@@ -610,6 +682,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
610 | } while (cur_trans->num_writers > 1 || | 682 | } while (cur_trans->num_writers > 1 || |
611 | (cur_trans->num_joined != joined)); | 683 | (cur_trans->num_joined != joined)); |
612 | 684 | ||
685 | ret = create_pending_snapshots(trans, root->fs_info); | ||
686 | BUG_ON(ret); | ||
687 | |||
613 | WARN_ON(cur_trans != trans->transaction); | 688 | WARN_ON(cur_trans != trans->transaction); |
614 | 689 | ||
615 | ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, | 690 | ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index c157ddbe9d1e..fd52e9b23922 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -34,6 +34,7 @@ struct btrfs_transaction { | |||
34 | struct btrfs_ordered_inode_tree ordered_inode_tree; | 34 | struct btrfs_ordered_inode_tree ordered_inode_tree; |
35 | wait_queue_head_t writer_wait; | 35 | wait_queue_head_t writer_wait; |
36 | wait_queue_head_t commit_wait; | 36 | wait_queue_head_t commit_wait; |
37 | struct list_head pending_snapshots; | ||
37 | }; | 38 | }; |
38 | 39 | ||
39 | struct btrfs_trans_handle { | 40 | struct btrfs_trans_handle { |
@@ -46,6 +47,12 @@ struct btrfs_trans_handle { | |||
46 | u64 alloc_exclude_nr; | 47 | u64 alloc_exclude_nr; |
47 | }; | 48 | }; |
48 | 49 | ||
50 | struct btrfs_pending_snapshot { | ||
51 | struct btrfs_root *root; | ||
52 | char *name; | ||
53 | struct list_head list; | ||
54 | }; | ||
55 | |||
49 | 56 | ||
50 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, | 57 | static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, |
51 | struct inode *inode) | 58 | struct inode *inode) |