aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-17 12:53:50 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commite6dcd2dc9c489108648e2ed543315dd134d50a9a (patch)
treecddf6f588b65e28c5feb8bff89b22d8ff70f8a50 /fs/btrfs/transaction.c
parent77a41afb7d0dd0f27b6f2f1a5bc701929c7034de (diff)
Btrfs: New data=ordered implementation
The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c67
1 files changed, 9 insertions, 58 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a8a3cb03de59..86a5acc19ce7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root)
67 cur_trans->start_time = get_seconds(); 67 cur_trans->start_time = get_seconds();
68 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 68 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 69 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
70 btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
71 extent_io_tree_init(&cur_trans->dirty_pages, 70 extent_io_tree_init(&cur_trans->dirty_pages,
72 root->fs_info->btree_inode->i_mapping, 71 root->fs_info->btree_inode->i_mapping,
73 GFP_NOFS); 72 GFP_NOFS);
@@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
158 wake_up(&cur_trans->writer_wait); 157 wake_up(&cur_trans->writer_wait);
159 158
160 if (cur_trans->in_commit && throttle) { 159 if (cur_trans->in_commit && throttle) {
161 int ret; 160 DEFINE_WAIT(wait);
162 mutex_unlock(&root->fs_info->trans_mutex); 161 mutex_unlock(&root->fs_info->trans_mutex);
163 ret = wait_for_commit(root, cur_trans); 162 prepare_to_wait(&root->fs_info->transaction_throttle, &wait,
164 BUG_ON(ret); 163 TASK_UNINTERRUPTIBLE);
164 schedule();
165 finish_wait(&root->fs_info->transaction_throttle, &wait);
165 mutex_lock(&root->fs_info->trans_mutex); 166 mutex_lock(&root->fs_info->trans_mutex);
166 } 167 }
167 168
@@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
486 return ret; 487 return ret;
487} 488}
488 489
489int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
490 struct btrfs_root *root)
491{
492 struct btrfs_transaction *cur_trans = trans->transaction;
493 struct inode *inode;
494 u64 root_objectid = 0;
495 u64 objectid = 0;
496 int ret;
497
498 atomic_inc(&root->fs_info->throttles);
499 while(1) {
500 ret = btrfs_find_first_ordered_inode(
501 &cur_trans->ordered_inode_tree,
502 &root_objectid, &objectid, &inode);
503 if (!ret)
504 break;
505
506 mutex_unlock(&root->fs_info->trans_mutex);
507
508 if (S_ISREG(inode->i_mode)) {
509 atomic_inc(&BTRFS_I(inode)->ordered_writeback);
510 filemap_fdatawrite(inode->i_mapping);
511 atomic_dec(&BTRFS_I(inode)->ordered_writeback);
512 }
513 iput(inode);
514
515 mutex_lock(&root->fs_info->trans_mutex);
516 }
517 while(1) {
518 root_objectid = 0;
519 objectid = 0;
520 ret = btrfs_find_del_first_ordered_inode(
521 &cur_trans->ordered_inode_tree,
522 &root_objectid, &objectid, &inode);
523 if (!ret)
524 break;
525 mutex_unlock(&root->fs_info->trans_mutex);
526
527 if (S_ISREG(inode->i_mode)) {
528 atomic_inc(&BTRFS_I(inode)->ordered_writeback);
529 filemap_write_and_wait(inode->i_mapping);
530 atomic_dec(&BTRFS_I(inode)->ordered_writeback);
531 }
532 atomic_dec(&inode->i_count);
533 iput(inode);
534
535 mutex_lock(&root->fs_info->trans_mutex);
536 }
537 atomic_dec(&root->fs_info->throttles);
538 return 0;
539}
540
541static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 490static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
542 struct btrfs_fs_info *fs_info, 491 struct btrfs_fs_info *fs_info,
543 struct btrfs_pending_snapshot *pending) 492 struct btrfs_pending_snapshot *pending)
@@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
666 extent_io_tree_init(pinned_copy, 615 extent_io_tree_init(pinned_copy,
667 root->fs_info->btree_inode->i_mapping, GFP_NOFS); 616 root->fs_info->btree_inode->i_mapping, GFP_NOFS);
668 617
618printk("commit trans %Lu\n", trans->transid);
669 trans->transaction->in_commit = 1; 619 trans->transaction->in_commit = 1;
670 cur_trans = trans->transaction; 620 cur_trans = trans->transaction;
671 if (cur_trans->list.prev != &root->fs_info->trans_list) { 621 if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
699 649
700 mutex_lock(&root->fs_info->trans_mutex); 650 mutex_lock(&root->fs_info->trans_mutex);
701 finish_wait(&cur_trans->writer_wait, &wait); 651 finish_wait(&cur_trans->writer_wait, &wait);
702 ret = btrfs_write_ordered_inodes(trans, root);
703
704 } while (cur_trans->num_writers > 1 || 652 } while (cur_trans->num_writers > 1 ||
705 (cur_trans->num_joined != joined)); 653 (cur_trans->num_joined != joined));
706 654
@@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
736 684
737 btrfs_copy_pinned(root, pinned_copy); 685 btrfs_copy_pinned(root, pinned_copy);
738 686
687 wake_up(&root->fs_info->transaction_throttle);
688
739 mutex_unlock(&root->fs_info->trans_mutex); 689 mutex_unlock(&root->fs_info->trans_mutex);
740 ret = btrfs_write_and_wait_transaction(trans, root); 690 ret = btrfs_write_and_wait_transaction(trans, root);
741 BUG_ON(ret); 691 BUG_ON(ret);
@@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
758 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); 708 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
759 709
760 mutex_unlock(&root->fs_info->trans_mutex); 710 mutex_unlock(&root->fs_info->trans_mutex);
711printk("done commit trans %Lu\n", trans->transid);
761 kmem_cache_free(btrfs_trans_handle_cachep, trans); 712 kmem_cache_free(btrfs_trans_handle_cachep, trans);
762 713
763 if (root->fs_info->closing) { 714 if (root->fs_info->closing) {