aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2011-04-11 17:25:13 -0400
committerJosef Bacik <josef@redhat.com>2011-05-23 13:00:57 -0400
commita4abeea41adfa3c143c289045f4625dfaeba2212 (patch)
tree792e2a398d8ba77447ba3f9f2c4266a1ce2f611c /fs
parent2a1eb4614d984d5cd4c928784e9afcf5c07f93be (diff)
Btrfs: kill trans_mutex
We use trans_mutex for lots of things, here's a basic list 1) To serialize trans_handles joining the currently running transaction 2) To make sure that no new trans handles are started while we are committing 3) To protect the dead_roots list and the transaction lists Really the serializing trans_handles joining is not too hard, and can really get bogged down in acquiring a reference to the transaction. So replace the trans_mutex with a trans_lock spinlock and use it to do the following 1) Protect fs_info->running_transaction. All trans handles have to do is check this, and then take a reference of the transaction and keep on going. 2) Protect the fs_info->trans_list. This doesn't get used too much, basically it just holds the current transactions, which will usually just be the currently committing transaction and the currently running transaction at most. 3) Protect the dead roots list. This is only ever processed by splicing the list so this is relatively simple. 4) Protect the fs_info->reloc_ctl stuff. This is very lightweight and was using the trans_mutex before, so this is a pretty straightforward change. 5) Protect fs_info->no_trans_join. Because we don't hold the trans_lock over the entirety of the commit we need to have a way to block new people from creating a new transaction while we're doing our work. So we set no_trans_join and in join_transaction we test to see if that is set, and if it is we do a wait_on_commit. 6) Make the transaction use count atomic so we don't need to take locks to modify it when we're dropping references. 7) Add a commit_lock to the transaction to make sure multiple people trying to commit the same transaction don't race and commit at the same time. 8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl trans. I have tested this with xfstests, but obviously it is a pretty hairy change so lots of testing is greatly appreciated. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c30
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/ioctl.c12
-rw-r--r--fs/btrfs/relocation.c16
-rw-r--r--fs/btrfs/transaction.c271
-rw-r--r--fs/btrfs/transaction.h4
8 files changed, 177 insertions, 169 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8f4b81de3ae2..522a39b0033d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -919,7 +919,6 @@ struct btrfs_fs_info {
919 * is required instead of the faster short fsync log commits 919 * is required instead of the faster short fsync log commits
920 */ 920 */
921 u64 last_trans_log_full_commit; 921 u64 last_trans_log_full_commit;
922 u64 open_ioctl_trans;
923 unsigned long mount_opt:20; 922 unsigned long mount_opt:20;
924 unsigned long compress_type:4; 923 unsigned long compress_type:4;
925 u64 max_inline; 924 u64 max_inline;
@@ -936,7 +935,6 @@ struct btrfs_fs_info {
936 struct super_block *sb; 935 struct super_block *sb;
937 struct inode *btree_inode; 936 struct inode *btree_inode;
938 struct backing_dev_info bdi; 937 struct backing_dev_info bdi;
939 struct mutex trans_mutex;
940 struct mutex tree_log_mutex; 938 struct mutex tree_log_mutex;
941 struct mutex transaction_kthread_mutex; 939 struct mutex transaction_kthread_mutex;
942 struct mutex cleaner_mutex; 940 struct mutex cleaner_mutex;
@@ -957,6 +955,7 @@ struct btrfs_fs_info {
957 struct rw_semaphore subvol_sem; 955 struct rw_semaphore subvol_sem;
958 struct srcu_struct subvol_srcu; 956 struct srcu_struct subvol_srcu;
959 957
958 spinlock_t trans_lock;
960 struct list_head trans_list; 959 struct list_head trans_list;
961 struct list_head hashers; 960 struct list_head hashers;
962 struct list_head dead_roots; 961 struct list_head dead_roots;
@@ -969,6 +968,7 @@ struct btrfs_fs_info {
969 atomic_t async_submit_draining; 968 atomic_t async_submit_draining;
970 atomic_t nr_async_bios; 969 atomic_t nr_async_bios;
971 atomic_t async_delalloc_pages; 970 atomic_t async_delalloc_pages;
971 atomic_t open_ioctl_trans;
972 972
973 /* 973 /*
974 * this is used by the balancing code to wait for all the pending 974 * this is used by the balancing code to wait for all the pending
@@ -1032,6 +1032,7 @@ struct btrfs_fs_info {
1032 int closing; 1032 int closing;
1033 int log_root_recovering; 1033 int log_root_recovering;
1034 int enospc_unlink; 1034 int enospc_unlink;
1035 int trans_no_join;
1035 1036
1036 u64 total_pinned; 1037 u64 total_pinned;
1037 1038
@@ -1053,7 +1054,6 @@ struct btrfs_fs_info {
1053 struct reloc_control *reloc_ctl; 1054 struct reloc_control *reloc_ctl;
1054 1055
1055 spinlock_t delalloc_lock; 1056 spinlock_t delalloc_lock;
1056 spinlock_t new_trans_lock;
1057 u64 delalloc_bytes; 1057 u64 delalloc_bytes;
1058 1058
1059 /* data_alloc_cluster is only used in ssd mode */ 1059 /* data_alloc_cluster is only used in ssd mode */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9d6c9e332ca3..93ef254ec432 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1551,22 +1551,22 @@ static int transaction_kthread(void *arg)
1551 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1551 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1552 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1552 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1553 1553
1554 spin_lock(&root->fs_info->new_trans_lock); 1554 spin_lock(&root->fs_info->trans_lock);
1555 cur = root->fs_info->running_transaction; 1555 cur = root->fs_info->running_transaction;
1556 if (!cur) { 1556 if (!cur) {
1557 spin_unlock(&root->fs_info->new_trans_lock); 1557 spin_unlock(&root->fs_info->trans_lock);
1558 goto sleep; 1558 goto sleep;
1559 } 1559 }
1560 1560
1561 now = get_seconds(); 1561 now = get_seconds();
1562 if (!cur->blocked && 1562 if (!cur->blocked &&
1563 (now < cur->start_time || now - cur->start_time < 30)) { 1563 (now < cur->start_time || now - cur->start_time < 30)) {
1564 spin_unlock(&root->fs_info->new_trans_lock); 1564 spin_unlock(&root->fs_info->trans_lock);
1565 delay = HZ * 5; 1565 delay = HZ * 5;
1566 goto sleep; 1566 goto sleep;
1567 } 1567 }
1568 transid = cur->transid; 1568 transid = cur->transid;
1569 spin_unlock(&root->fs_info->new_trans_lock); 1569 spin_unlock(&root->fs_info->trans_lock);
1570 1570
1571 trans = btrfs_join_transaction(root); 1571 trans = btrfs_join_transaction(root);
1572 BUG_ON(IS_ERR(trans)); 1572 BUG_ON(IS_ERR(trans));
@@ -1658,7 +1658,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1658 INIT_LIST_HEAD(&fs_info->ordered_operations); 1658 INIT_LIST_HEAD(&fs_info->ordered_operations);
1659 INIT_LIST_HEAD(&fs_info->caching_block_groups); 1659 INIT_LIST_HEAD(&fs_info->caching_block_groups);
1660 spin_lock_init(&fs_info->delalloc_lock); 1660 spin_lock_init(&fs_info->delalloc_lock);
1661 spin_lock_init(&fs_info->new_trans_lock); 1661 spin_lock_init(&fs_info->trans_lock);
1662 spin_lock_init(&fs_info->ref_cache_lock); 1662 spin_lock_init(&fs_info->ref_cache_lock);
1663 spin_lock_init(&fs_info->fs_roots_radix_lock); 1663 spin_lock_init(&fs_info->fs_roots_radix_lock);
1664 spin_lock_init(&fs_info->delayed_iput_lock); 1664 spin_lock_init(&fs_info->delayed_iput_lock);
@@ -1687,6 +1687,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1687 fs_info->sb = sb; 1687 fs_info->sb = sb;
1688 fs_info->max_inline = 8192 * 1024; 1688 fs_info->max_inline = 8192 * 1024;
1689 fs_info->metadata_ratio = 0; 1689 fs_info->metadata_ratio = 0;
1690 fs_info->trans_no_join = 0;
1690 1691
1691 fs_info->thread_pool_size = min_t(unsigned long, 1692 fs_info->thread_pool_size = min_t(unsigned long,
1692 num_online_cpus() + 2, 8); 1693 num_online_cpus() + 2, 8);
@@ -1735,7 +1736,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1735 fs_info->do_barriers = 1; 1736 fs_info->do_barriers = 1;
1736 1737
1737 1738
1738 mutex_init(&fs_info->trans_mutex);
1739 mutex_init(&fs_info->ordered_operations_mutex); 1739 mutex_init(&fs_info->ordered_operations_mutex);
1740 mutex_init(&fs_info->tree_log_mutex); 1740 mutex_init(&fs_info->tree_log_mutex);
1741 mutex_init(&fs_info->chunk_mutex); 1741 mutex_init(&fs_info->chunk_mutex);
@@ -3006,10 +3006,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3006 3006
3007 WARN_ON(1); 3007 WARN_ON(1);
3008 3008
3009 mutex_lock(&root->fs_info->trans_mutex);
3010 mutex_lock(&root->fs_info->transaction_kthread_mutex); 3009 mutex_lock(&root->fs_info->transaction_kthread_mutex);
3011 3010
3011 spin_lock(&root->fs_info->trans_lock);
3012 list_splice_init(&root->fs_info->trans_list, &list); 3012 list_splice_init(&root->fs_info->trans_list, &list);
3013 root->fs_info->trans_no_join = 1;
3014 spin_unlock(&root->fs_info->trans_lock);
3015
3013 while (!list_empty(&list)) { 3016 while (!list_empty(&list)) {
3014 t = list_entry(list.next, struct btrfs_transaction, list); 3017 t = list_entry(list.next, struct btrfs_transaction, list);
3015 if (!t) 3018 if (!t)
@@ -3034,23 +3037,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3034 t->blocked = 0; 3037 t->blocked = 0;
3035 if (waitqueue_active(&root->fs_info->transaction_wait)) 3038 if (waitqueue_active(&root->fs_info->transaction_wait))
3036 wake_up(&root->fs_info->transaction_wait); 3039 wake_up(&root->fs_info->transaction_wait);
3037 mutex_unlock(&root->fs_info->trans_mutex);
3038 3040
3039 mutex_lock(&root->fs_info->trans_mutex);
3040 t->commit_done = 1; 3041 t->commit_done = 1;
3041 if (waitqueue_active(&t->commit_wait)) 3042 if (waitqueue_active(&t->commit_wait))
3042 wake_up(&t->commit_wait); 3043 wake_up(&t->commit_wait);
3043 mutex_unlock(&root->fs_info->trans_mutex);
3044
3045 mutex_lock(&root->fs_info->trans_mutex);
3046 3044
3047 btrfs_destroy_pending_snapshots(t); 3045 btrfs_destroy_pending_snapshots(t);
3048 3046
3049 btrfs_destroy_delalloc_inodes(root); 3047 btrfs_destroy_delalloc_inodes(root);
3050 3048
3051 spin_lock(&root->fs_info->new_trans_lock); 3049 spin_lock(&root->fs_info->trans_lock);
3052 root->fs_info->running_transaction = NULL; 3050 root->fs_info->running_transaction = NULL;
3053 spin_unlock(&root->fs_info->new_trans_lock); 3051 spin_unlock(&root->fs_info->trans_lock);
3054 3052
3055 btrfs_destroy_marked_extents(root, &t->dirty_pages, 3053 btrfs_destroy_marked_extents(root, &t->dirty_pages,
3056 EXTENT_DIRTY); 3054 EXTENT_DIRTY);
@@ -3064,8 +3062,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3064 kmem_cache_free(btrfs_transaction_cachep, t); 3062 kmem_cache_free(btrfs_transaction_cachep, t);
3065 } 3063 }
3066 3064
3065 spin_lock(&root->fs_info->trans_lock);
3066 root->fs_info->trans_no_join = 0;
3067 spin_unlock(&root->fs_info->trans_lock);
3067 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 3068 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3068 mutex_unlock(&root->fs_info->trans_mutex);
3069 3069
3070 return 0; 3070 return 0;
3071} 3071}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 941b28e78931..ca599654ce19 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3200,7 +3200,8 @@ alloc:
3200 3200
3201 /* commit the current transaction and try again */ 3201 /* commit the current transaction and try again */
3202commit_trans: 3202commit_trans:
3203 if (!committed && !root->fs_info->open_ioctl_trans) { 3203 if (!committed &&
3204 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3204 committed = 1; 3205 committed = 1;
3205 trans = btrfs_join_transaction(root); 3206 trans = btrfs_join_transaction(root);
3206 if (IS_ERR(trans)) 3207 if (IS_ERR(trans))
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 75899a01dded..cd5e82e500cf 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1222,14 +1222,12 @@ int btrfs_sync_file(struct file *file, int datasync)
1222 * the current transaction, we can bail out now without any 1222 * the current transaction, we can bail out now without any
1223 * syncing 1223 * syncing
1224 */ 1224 */
1225 mutex_lock(&root->fs_info->trans_mutex); 1225 smp_mb();
1226 if (BTRFS_I(inode)->last_trans <= 1226 if (BTRFS_I(inode)->last_trans <=
1227 root->fs_info->last_trans_committed) { 1227 root->fs_info->last_trans_committed) {
1228 BTRFS_I(inode)->last_trans = 0; 1228 BTRFS_I(inode)->last_trans = 0;
1229 mutex_unlock(&root->fs_info->trans_mutex);
1230 goto out; 1229 goto out;
1231 } 1230 }
1232 mutex_unlock(&root->fs_info->trans_mutex);
1233 1231
1234 /* 1232 /*
1235 * ok we haven't committed the transaction yet, lets do a commit 1233 * ok we haven't committed the transaction yet, lets do a commit
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 908c3d4b48c6..a578620e06a8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2177,9 +2177,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2177 if (ret) 2177 if (ret)
2178 goto out; 2178 goto out;
2179 2179
2180 mutex_lock(&root->fs_info->trans_mutex); 2180 atomic_inc(&root->fs_info->open_ioctl_trans);
2181 root->fs_info->open_ioctl_trans++;
2182 mutex_unlock(&root->fs_info->trans_mutex);
2183 2181
2184 ret = -ENOMEM; 2182 ret = -ENOMEM;
2185 trans = btrfs_start_ioctl_transaction(root); 2183 trans = btrfs_start_ioctl_transaction(root);
@@ -2190,9 +2188,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2190 return 0; 2188 return 0;
2191 2189
2192out_drop: 2190out_drop:
2193 mutex_lock(&root->fs_info->trans_mutex); 2191 atomic_dec(&root->fs_info->open_ioctl_trans);
2194 root->fs_info->open_ioctl_trans--;
2195 mutex_unlock(&root->fs_info->trans_mutex);
2196 mnt_drop_write(file->f_path.mnt); 2192 mnt_drop_write(file->f_path.mnt);
2197out: 2193out:
2198 return ret; 2194 return ret;
@@ -2426,9 +2422,7 @@ long btrfs_ioctl_trans_end(struct file *file)
2426 2422
2427 btrfs_end_transaction(trans, root); 2423 btrfs_end_transaction(trans, root);
2428 2424
2429 mutex_lock(&root->fs_info->trans_mutex); 2425 atomic_dec(&root->fs_info->open_ioctl_trans);
2430 root->fs_info->open_ioctl_trans--;
2431 mutex_unlock(&root->fs_info->trans_mutex);
2432 2426
2433 mnt_drop_write(file->f_path.mnt); 2427 mnt_drop_write(file->f_path.mnt);
2434 return 0; 2428 return 0;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8bb256667f2d..09c30d37d43e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2136,10 +2136,10 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2136 u64 num_bytes = 0; 2136 u64 num_bytes = 0;
2137 int ret; 2137 int ret;
2138 2138
2139 mutex_lock(&root->fs_info->trans_mutex); 2139 spin_lock(&root->fs_info->trans_lock);
2140 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2140 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2141 rc->merging_rsv_size += rc->nodes_relocated * 2; 2141 rc->merging_rsv_size += rc->nodes_relocated * 2;
2142 mutex_unlock(&root->fs_info->trans_mutex); 2142 spin_unlock(&root->fs_info->trans_lock);
2143again: 2143again:
2144 if (!err) { 2144 if (!err) {
2145 num_bytes = rc->merging_rsv_size; 2145 num_bytes = rc->merging_rsv_size;
@@ -2208,9 +2208,9 @@ int merge_reloc_roots(struct reloc_control *rc)
2208 int ret; 2208 int ret;
2209again: 2209again:
2210 root = rc->extent_root; 2210 root = rc->extent_root;
2211 mutex_lock(&root->fs_info->trans_mutex); 2211 spin_lock(&root->fs_info->trans_lock);
2212 list_splice_init(&rc->reloc_roots, &reloc_roots); 2212 list_splice_init(&rc->reloc_roots, &reloc_roots);
2213 mutex_unlock(&root->fs_info->trans_mutex); 2213 spin_unlock(&root->fs_info->trans_lock);
2214 2214
2215 while (!list_empty(&reloc_roots)) { 2215 while (!list_empty(&reloc_roots)) {
2216 found = 1; 2216 found = 1;
@@ -3583,17 +3583,17 @@ next:
3583static void set_reloc_control(struct reloc_control *rc) 3583static void set_reloc_control(struct reloc_control *rc)
3584{ 3584{
3585 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3585 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3586 mutex_lock(&fs_info->trans_mutex); 3586 spin_lock(&fs_info->trans_lock);
3587 fs_info->reloc_ctl = rc; 3587 fs_info->reloc_ctl = rc;
3588 mutex_unlock(&fs_info->trans_mutex); 3588 spin_unlock(&fs_info->trans_lock);
3589} 3589}
3590 3590
3591static void unset_reloc_control(struct reloc_control *rc) 3591static void unset_reloc_control(struct reloc_control *rc)
3592{ 3592{
3593 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3593 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3594 mutex_lock(&fs_info->trans_mutex); 3594 spin_lock(&fs_info->trans_lock);
3595 fs_info->reloc_ctl = NULL; 3595 fs_info->reloc_ctl = NULL;
3596 mutex_unlock(&fs_info->trans_mutex); 3596 spin_unlock(&fs_info->trans_lock);
3597} 3597}
3598 3598
3599static int check_extent_flags(u64 flags) 3599static int check_extent_flags(u64 flags)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 46f40564c168..43816f8b23e7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -34,6 +34,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 34{
35 WARN_ON(atomic_read(&transaction->use_count) == 0); 35 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 if (atomic_dec_and_test(&transaction->use_count)) { 36 if (atomic_dec_and_test(&transaction->use_count)) {
37 BUG_ON(!list_empty(&transaction->list));
37 memset(transaction, 0, sizeof(*transaction)); 38 memset(transaction, 0, sizeof(*transaction));
38 kmem_cache_free(btrfs_transaction_cachep, transaction); 39 kmem_cache_free(btrfs_transaction_cachep, transaction);
39 } 40 }
@@ -48,47 +49,73 @@ static noinline void switch_commit_root(struct btrfs_root *root)
48/* 49/*
49 * either allocate a new transaction or hop into the existing one 50 * either allocate a new transaction or hop into the existing one
50 */ 51 */
51static noinline int join_transaction(struct btrfs_root *root) 52static noinline int join_transaction(struct btrfs_root *root, int nofail)
52{ 53{
53 struct btrfs_transaction *cur_trans; 54 struct btrfs_transaction *cur_trans;
55
56 spin_lock(&root->fs_info->trans_lock);
57 if (root->fs_info->trans_no_join) {
58 if (!nofail) {
59 spin_unlock(&root->fs_info->trans_lock);
60 return -EBUSY;
61 }
62 }
63
54 cur_trans = root->fs_info->running_transaction; 64 cur_trans = root->fs_info->running_transaction;
55 if (!cur_trans) { 65 if (cur_trans) {
56 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 66 atomic_inc(&cur_trans->use_count);
57 GFP_NOFS); 67 atomic_inc(&cur_trans->num_writers);
58 if (!cur_trans) 68 cur_trans->num_joined++;
59 return -ENOMEM; 69 spin_unlock(&root->fs_info->trans_lock);
60 root->fs_info->generation++; 70 return 0;
61 atomic_set(&cur_trans->num_writers, 1); 71 }
62 cur_trans->num_joined = 0; 72 spin_unlock(&root->fs_info->trans_lock);
63 cur_trans->transid = root->fs_info->generation; 73
64 init_waitqueue_head(&cur_trans->writer_wait); 74 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
65 init_waitqueue_head(&cur_trans->commit_wait); 75 if (!cur_trans)
66 cur_trans->in_commit = 0; 76 return -ENOMEM;
67 cur_trans->blocked = 0; 77 spin_lock(&root->fs_info->trans_lock);
68 atomic_set(&cur_trans->use_count, 1); 78 if (root->fs_info->running_transaction) {
69 cur_trans->commit_done = 0; 79 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
70 cur_trans->start_time = get_seconds(); 80 cur_trans = root->fs_info->running_transaction;
71 81 atomic_inc(&cur_trans->use_count);
72 cur_trans->delayed_refs.root = RB_ROOT;
73 cur_trans->delayed_refs.num_entries = 0;
74 cur_trans->delayed_refs.num_heads_ready = 0;
75 cur_trans->delayed_refs.num_heads = 0;
76 cur_trans->delayed_refs.flushing = 0;
77 cur_trans->delayed_refs.run_delayed_start = 0;
78 spin_lock_init(&cur_trans->delayed_refs.lock);
79
80 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
81 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
82 extent_io_tree_init(&cur_trans->dirty_pages,
83 root->fs_info->btree_inode->i_mapping,
84 GFP_NOFS);
85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock);
88 } else {
89 atomic_inc(&cur_trans->num_writers); 82 atomic_inc(&cur_trans->num_writers);
90 cur_trans->num_joined++; 83 cur_trans->num_joined++;
84 spin_unlock(&root->fs_info->trans_lock);
85 return 0;
91 } 86 }
87 atomic_set(&cur_trans->num_writers, 1);
88 cur_trans->num_joined = 0;
89 init_waitqueue_head(&cur_trans->writer_wait);
90 init_waitqueue_head(&cur_trans->commit_wait);
91 cur_trans->in_commit = 0;
92 cur_trans->blocked = 0;
93 /*
94 * One for this trans handle, one so it will live on until we
95 * commit the transaction.
96 */
97 atomic_set(&cur_trans->use_count, 2);
98 cur_trans->commit_done = 0;
99 cur_trans->start_time = get_seconds();
100
101 cur_trans->delayed_refs.root = RB_ROOT;
102 cur_trans->delayed_refs.num_entries = 0;
103 cur_trans->delayed_refs.num_heads_ready = 0;
104 cur_trans->delayed_refs.num_heads = 0;
105 cur_trans->delayed_refs.flushing = 0;
106 cur_trans->delayed_refs.run_delayed_start = 0;
107 spin_lock_init(&cur_trans->commit_lock);
108 spin_lock_init(&cur_trans->delayed_refs.lock);
109
110 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
111 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
112 extent_io_tree_init(&cur_trans->dirty_pages,
113 root->fs_info->btree_inode->i_mapping,
114 GFP_NOFS);
115 root->fs_info->generation++;
116 cur_trans->transid = root->fs_info->generation;
117 root->fs_info->running_transaction = cur_trans;
118 spin_unlock(&root->fs_info->trans_lock);
92 119
93 return 0; 120 return 0;
94} 121}
@@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root)
99 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
100 * when the transaction commits 127 * when the transaction commits
101 */ 128 */
102static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 129int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root) 130 struct btrfs_root *root)
104{ 131{
105 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
106 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
107 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
108 135
136 spin_lock(&root->fs_info->fs_roots_radix_lock);
137 if (root->last_trans == trans->transid) {
138 spin_unlock(&root->fs_info->fs_roots_radix_lock);
139 return 0;
140 }
141 root->last_trans = trans->transid;
109 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 142 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
110 (unsigned long)root->root_key.objectid, 143 (unsigned long)root->root_key.objectid,
111 BTRFS_ROOT_TRANS_TAG); 144 BTRFS_ROOT_TRANS_TAG);
112 root->last_trans = trans->transid; 145 spin_unlock(&root->fs_info->fs_roots_radix_lock);
113 btrfs_init_reloc_root(trans, root); 146 btrfs_init_reloc_root(trans, root);
114 } 147 }
115 return 0; 148 return 0;
116} 149}
117 150
118int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
119 struct btrfs_root *root)
120{
121 if (!root->ref_cows)
122 return 0;
123
124 mutex_lock(&root->fs_info->trans_mutex);
125 if (root->last_trans == trans->transid) {
126 mutex_unlock(&root->fs_info->trans_mutex);
127 return 0;
128 }
129
130 record_root_in_trans(trans, root);
131 mutex_unlock(&root->fs_info->trans_mutex);
132 return 0;
133}
134
135/* wait for commit against the current transaction to become unblocked 151/* wait for commit against the current transaction to become unblocked
136 * when this is done, it is safe to start a new transaction, but the current 152 * when this is done, it is safe to start a new transaction, but the current
137 * transaction might not be fully on disk. 153 * transaction might not be fully on disk.
@@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root)
140{ 156{
141 struct btrfs_transaction *cur_trans; 157 struct btrfs_transaction *cur_trans;
142 158
159 spin_lock(&root->fs_info->trans_lock);
143 cur_trans = root->fs_info->running_transaction; 160 cur_trans = root->fs_info->running_transaction;
144 if (cur_trans && cur_trans->blocked) { 161 if (cur_trans && cur_trans->blocked) {
145 DEFINE_WAIT(wait); 162 DEFINE_WAIT(wait);
146 atomic_inc(&cur_trans->use_count); 163 atomic_inc(&cur_trans->use_count);
164 spin_unlock(&root->fs_info->trans_lock);
147 while (1) { 165 while (1) {
148 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 166 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
149 TASK_UNINTERRUPTIBLE); 167 TASK_UNINTERRUPTIBLE);
150 if (!cur_trans->blocked) 168 if (!cur_trans->blocked)
151 break; 169 break;
152 mutex_unlock(&root->fs_info->trans_mutex);
153 schedule(); 170 schedule();
154 mutex_lock(&root->fs_info->trans_mutex);
155 } 171 }
156 finish_wait(&root->fs_info->transaction_wait, &wait); 172 finish_wait(&root->fs_info->transaction_wait, &wait);
157 put_transaction(cur_trans); 173 put_transaction(cur_trans);
174 } else {
175 spin_unlock(&root->fs_info->trans_lock);
158 } 176 }
159} 177}
160 178
@@ -167,10 +185,16 @@ enum btrfs_trans_type {
167 185
168static int may_wait_transaction(struct btrfs_root *root, int type) 186static int may_wait_transaction(struct btrfs_root *root, int type)
169{ 187{
170 if (!root->fs_info->log_root_recovering && 188 if (root->fs_info->log_root_recovering)
171 ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || 189 return 0;
172 type == TRANS_USERSPACE)) 190
191 if (type == TRANS_USERSPACE)
192 return 1;
193
194 if (type == TRANS_START &&
195 !atomic_read(&root->fs_info->open_ioctl_trans))
173 return 1; 196 return 1;
197
174 return 0; 198 return 0;
175} 199}
176 200
@@ -198,23 +222,21 @@ again:
198 if (!h) 222 if (!h)
199 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
200 224
201 if (type != TRANS_JOIN_NOLOCK)
202 mutex_lock(&root->fs_info->trans_mutex);
203 if (may_wait_transaction(root, type)) 225 if (may_wait_transaction(root, type))
204 wait_current_trans(root); 226 wait_current_trans(root);
205 227
206 ret = join_transaction(root); 228 do {
229 ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
230 if (ret == -EBUSY)
231 wait_current_trans(root);
232 } while (ret == -EBUSY);
233
207 if (ret < 0) { 234 if (ret < 0) {
208 kmem_cache_free(btrfs_trans_handle_cachep, h); 235 kmem_cache_free(btrfs_trans_handle_cachep, h);
209 if (type != TRANS_JOIN_NOLOCK)
210 mutex_unlock(&root->fs_info->trans_mutex);
211 return ERR_PTR(ret); 236 return ERR_PTR(ret);
212 } 237 }
213 238
214 cur_trans = root->fs_info->running_transaction; 239 cur_trans = root->fs_info->running_transaction;
215 atomic_inc(&cur_trans->use_count);
216 if (type != TRANS_JOIN_NOLOCK)
217 mutex_unlock(&root->fs_info->trans_mutex);
218 240
219 h->transid = cur_trans->transid; 241 h->transid = cur_trans->transid;
220 h->transaction = cur_trans; 242 h->transaction = cur_trans;
@@ -253,11 +275,7 @@ again:
253 } 275 }
254 276
255got_it: 277got_it:
256 if (type != TRANS_JOIN_NOLOCK) 278 btrfs_record_root_in_trans(h, root);
257 mutex_lock(&root->fs_info->trans_mutex);
258 record_root_in_trans(h, root);
259 if (type != TRANS_JOIN_NOLOCK)
260 mutex_unlock(&root->fs_info->trans_mutex);
261 279
262 if (!current->journal_info && type != TRANS_USERSPACE) 280 if (!current->journal_info && type != TRANS_USERSPACE)
263 current->journal_info = h; 281 current->journal_info = h;
@@ -289,17 +307,13 @@ static noinline int wait_for_commit(struct btrfs_root *root,
289 struct btrfs_transaction *commit) 307 struct btrfs_transaction *commit)
290{ 308{
291 DEFINE_WAIT(wait); 309 DEFINE_WAIT(wait);
292 mutex_lock(&root->fs_info->trans_mutex);
293 while (!commit->commit_done) { 310 while (!commit->commit_done) {
294 prepare_to_wait(&commit->commit_wait, &wait, 311 prepare_to_wait(&commit->commit_wait, &wait,
295 TASK_UNINTERRUPTIBLE); 312 TASK_UNINTERRUPTIBLE);
296 if (commit->commit_done) 313 if (commit->commit_done)
297 break; 314 break;
298 mutex_unlock(&root->fs_info->trans_mutex);
299 schedule(); 315 schedule();
300 mutex_lock(&root->fs_info->trans_mutex);
301 } 316 }
302 mutex_unlock(&root->fs_info->trans_mutex);
303 finish_wait(&commit->commit_wait, &wait); 317 finish_wait(&commit->commit_wait, &wait);
304 return 0; 318 return 0;
305} 319}
@@ -309,50 +323,49 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
309 struct btrfs_transaction *cur_trans = NULL, *t; 323 struct btrfs_transaction *cur_trans = NULL, *t;
310 int ret; 324 int ret;
311 325
312 mutex_lock(&root->fs_info->trans_mutex);
313
314 ret = 0; 326 ret = 0;
315 if (transid) { 327 if (transid) {
316 if (transid <= root->fs_info->last_trans_committed) 328 if (transid <= root->fs_info->last_trans_committed)
317 goto out_unlock; 329 goto out;
318 330
319 /* find specified transaction */ 331 /* find specified transaction */
332 spin_lock(&root->fs_info->trans_lock);
320 list_for_each_entry(t, &root->fs_info->trans_list, list) { 333 list_for_each_entry(t, &root->fs_info->trans_list, list) {
321 if (t->transid == transid) { 334 if (t->transid == transid) {
322 cur_trans = t; 335 cur_trans = t;
336 atomic_inc(&cur_trans->use_count);
323 break; 337 break;
324 } 338 }
325 if (t->transid > transid) 339 if (t->transid > transid)
326 break; 340 break;
327 } 341 }
342 spin_unlock(&root->fs_info->trans_lock);
328 ret = -EINVAL; 343 ret = -EINVAL;
329 if (!cur_trans) 344 if (!cur_trans)
330 goto out_unlock; /* bad transid */ 345 goto out; /* bad transid */
331 } else { 346 } else {
332 /* find newest transaction that is committing | committed */ 347 /* find newest transaction that is committing | committed */
348 spin_lock(&root->fs_info->trans_lock);
333 list_for_each_entry_reverse(t, &root->fs_info->trans_list, 349 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
334 list) { 350 list) {
335 if (t->in_commit) { 351 if (t->in_commit) {
336 if (t->commit_done) 352 if (t->commit_done)
337 goto out_unlock; 353 goto out;
338 cur_trans = t; 354 cur_trans = t;
355 atomic_inc(&cur_trans->use_count);
339 break; 356 break;
340 } 357 }
341 } 358 }
359 spin_unlock(&root->fs_info->trans_lock);
342 if (!cur_trans) 360 if (!cur_trans)
343 goto out_unlock; /* nothing committing|committed */ 361 goto out; /* nothing committing|committed */
344 } 362 }
345 363
346 atomic_inc(&cur_trans->use_count);
347 mutex_unlock(&root->fs_info->trans_mutex);
348
349 wait_for_commit(root, cur_trans); 364 wait_for_commit(root, cur_trans);
350 365
351 mutex_lock(&root->fs_info->trans_mutex);
352 put_transaction(cur_trans); 366 put_transaction(cur_trans);
353 ret = 0; 367 ret = 0;
354out_unlock: 368out:
355 mutex_unlock(&root->fs_info->trans_mutex);
356 return ret; 369 return ret;
357} 370}
358 371
@@ -401,10 +414,8 @@ harder:
401 414
402void btrfs_throttle(struct btrfs_root *root) 415void btrfs_throttle(struct btrfs_root *root)
403{ 416{
404 mutex_lock(&root->fs_info->trans_mutex); 417 if (!atomic_read(&root->fs_info->open_ioctl_trans))
405 if (!root->fs_info->open_ioctl_trans)
406 wait_current_trans(root); 418 wait_current_trans(root);
407 mutex_unlock(&root->fs_info->trans_mutex);
408} 419}
409 420
410static int should_end_transaction(struct btrfs_trans_handle *trans, 421static int should_end_transaction(struct btrfs_trans_handle *trans,
@@ -422,6 +433,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
422 struct btrfs_transaction *cur_trans = trans->transaction; 433 struct btrfs_transaction *cur_trans = trans->transaction;
423 int updates; 434 int updates;
424 435
436 smp_mb();
425 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 437 if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
426 return 1; 438 return 1;
427 439
@@ -467,9 +479,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
467 479
468 btrfs_trans_release_metadata(trans, root); 480 btrfs_trans_release_metadata(trans, root);
469 481
470 if (lock && !root->fs_info->open_ioctl_trans && 482 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
471 should_end_transaction(trans, root)) 483 should_end_transaction(trans, root)) {
472 trans->transaction->blocked = 1; 484 trans->transaction->blocked = 1;
485 smp_wmb();
486 }
473 487
474 if (lock && cur_trans->blocked && !cur_trans->in_commit) { 488 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
475 if (throttle) 489 if (throttle)
@@ -739,9 +753,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
739 */ 753 */
740int btrfs_add_dead_root(struct btrfs_root *root) 754int btrfs_add_dead_root(struct btrfs_root *root)
741{ 755{
742 mutex_lock(&root->fs_info->trans_mutex); 756 spin_lock(&root->fs_info->trans_lock);
743 list_add(&root->root_list, &root->fs_info->dead_roots); 757 list_add(&root->root_list, &root->fs_info->dead_roots);
744 mutex_unlock(&root->fs_info->trans_mutex); 758 spin_unlock(&root->fs_info->trans_lock);
745 return 0; 759 return 0;
746} 760}
747 761
@@ -757,6 +771,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
757 int ret; 771 int ret;
758 int err = 0; 772 int err = 0;
759 773
774 spin_lock(&fs_info->fs_roots_radix_lock);
760 while (1) { 775 while (1) {
761 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 776 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
762 (void **)gang, 0, 777 (void **)gang, 0,
@@ -769,6 +784,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
769 radix_tree_tag_clear(&fs_info->fs_roots_radix, 784 radix_tree_tag_clear(&fs_info->fs_roots_radix,
770 (unsigned long)root->root_key.objectid, 785 (unsigned long)root->root_key.objectid,
771 BTRFS_ROOT_TRANS_TAG); 786 BTRFS_ROOT_TRANS_TAG);
787 spin_unlock(&fs_info->fs_roots_radix_lock);
772 788
773 btrfs_free_log(trans, root); 789 btrfs_free_log(trans, root);
774 btrfs_update_reloc_root(trans, root); 790 btrfs_update_reloc_root(trans, root);
@@ -783,10 +799,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
783 err = btrfs_update_root(trans, fs_info->tree_root, 799 err = btrfs_update_root(trans, fs_info->tree_root,
784 &root->root_key, 800 &root->root_key,
785 &root->root_item); 801 &root->root_item);
802 spin_lock(&fs_info->fs_roots_radix_lock);
786 if (err) 803 if (err)
787 break; 804 break;
788 } 805 }
789 } 806 }
807 spin_unlock(&fs_info->fs_roots_radix_lock);
790 return err; 808 return err;
791} 809}
792 810
@@ -972,7 +990,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
972 parent = dget_parent(dentry); 990 parent = dget_parent(dentry);
973 parent_inode = parent->d_inode; 991 parent_inode = parent->d_inode;
974 parent_root = BTRFS_I(parent_inode)->root; 992 parent_root = BTRFS_I(parent_inode)->root;
975 record_root_in_trans(trans, parent_root); 993 btrfs_record_root_in_trans(trans, parent_root);
976 994
977 /* 995 /*
978 * insert the directory item 996 * insert the directory item
@@ -990,7 +1008,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
990 ret = btrfs_update_inode(trans, parent_root, parent_inode); 1008 ret = btrfs_update_inode(trans, parent_root, parent_inode);
991 BUG_ON(ret); 1009 BUG_ON(ret);
992 1010
993 record_root_in_trans(trans, root); 1011 btrfs_record_root_in_trans(trans, root);
994 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 1012 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
995 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 1013 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
996 btrfs_check_and_init_root_item(new_root_item); 1014 btrfs_check_and_init_root_item(new_root_item);
@@ -1080,20 +1098,20 @@ static void update_super_roots(struct btrfs_root *root)
1080int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1098int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
1081{ 1099{
1082 int ret = 0; 1100 int ret = 0;
1083 spin_lock(&info->new_trans_lock); 1101 spin_lock(&info->trans_lock);
1084 if (info->running_transaction) 1102 if (info->running_transaction)
1085 ret = info->running_transaction->in_commit; 1103 ret = info->running_transaction->in_commit;
1086 spin_unlock(&info->new_trans_lock); 1104 spin_unlock(&info->trans_lock);
1087 return ret; 1105 return ret;
1088} 1106}
1089 1107
1090int btrfs_transaction_blocked(struct btrfs_fs_info *info) 1108int btrfs_transaction_blocked(struct btrfs_fs_info *info)
1091{ 1109{
1092 int ret = 0; 1110 int ret = 0;
1093 spin_lock(&info->new_trans_lock); 1111 spin_lock(&info->trans_lock);
1094 if (info->running_transaction) 1112 if (info->running_transaction)
1095 ret = info->running_transaction->blocked; 1113 ret = info->running_transaction->blocked;
1096 spin_unlock(&info->new_trans_lock); 1114 spin_unlock(&info->trans_lock);
1097 return ret; 1115 return ret;
1098} 1116}
1099 1117
@@ -1117,9 +1135,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
1117 &wait); 1135 &wait);
1118 break; 1136 break;
1119 } 1137 }
1120 mutex_unlock(&root->fs_info->trans_mutex);
1121 schedule(); 1138 schedule();
1122 mutex_lock(&root->fs_info->trans_mutex);
1123 finish_wait(&root->fs_info->transaction_blocked_wait, &wait); 1139 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1124 } 1140 }
1125} 1141}
@@ -1145,9 +1161,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1145 &wait); 1161 &wait);
1146 break; 1162 break;
1147 } 1163 }
1148 mutex_unlock(&root->fs_info->trans_mutex);
1149 schedule(); 1164 schedule();
1150 mutex_lock(&root->fs_info->trans_mutex);
1151 finish_wait(&root->fs_info->transaction_wait, 1165 finish_wait(&root->fs_info->transaction_wait,
1152 &wait); 1166 &wait);
1153 } 1167 }
@@ -1193,22 +1207,18 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1193 } 1207 }
1194 1208
1195 /* take transaction reference */ 1209 /* take transaction reference */
1196 mutex_lock(&root->fs_info->trans_mutex);
1197 cur_trans = trans->transaction; 1210 cur_trans = trans->transaction;
1198 atomic_inc(&cur_trans->use_count); 1211 atomic_inc(&cur_trans->use_count);
1199 mutex_unlock(&root->fs_info->trans_mutex);
1200 1212
1201 btrfs_end_transaction(trans, root); 1213 btrfs_end_transaction(trans, root);
1202 schedule_delayed_work(&ac->work, 0); 1214 schedule_delayed_work(&ac->work, 0);
1203 1215
1204 /* wait for transaction to start and unblock */ 1216 /* wait for transaction to start and unblock */
1205 mutex_lock(&root->fs_info->trans_mutex);
1206 if (wait_for_unblock) 1217 if (wait_for_unblock)
1207 wait_current_trans_commit_start_and_unblock(root, cur_trans); 1218 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1208 else 1219 else
1209 wait_current_trans_commit_start(root, cur_trans); 1220 wait_current_trans_commit_start(root, cur_trans);
1210 put_transaction(cur_trans); 1221 put_transaction(cur_trans);
1211 mutex_unlock(&root->fs_info->trans_mutex);
1212 1222
1213 return 0; 1223 return 0;
1214} 1224}
@@ -1252,38 +1262,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1252 ret = btrfs_run_delayed_refs(trans, root, 0); 1262 ret = btrfs_run_delayed_refs(trans, root, 0);
1253 BUG_ON(ret); 1263 BUG_ON(ret);
1254 1264
1255 mutex_lock(&root->fs_info->trans_mutex); 1265 spin_lock(&cur_trans->commit_lock);
1256 if (cur_trans->in_commit) { 1266 if (cur_trans->in_commit) {
1267 spin_unlock(&cur_trans->commit_lock);
1257 atomic_inc(&cur_trans->use_count); 1268 atomic_inc(&cur_trans->use_count);
1258 mutex_unlock(&root->fs_info->trans_mutex);
1259 btrfs_end_transaction(trans, root); 1269 btrfs_end_transaction(trans, root);
1260 1270
1261 ret = wait_for_commit(root, cur_trans); 1271 ret = wait_for_commit(root, cur_trans);
1262 BUG_ON(ret); 1272 BUG_ON(ret);
1263 1273
1264 mutex_lock(&root->fs_info->trans_mutex);
1265 put_transaction(cur_trans); 1274 put_transaction(cur_trans);
1266 mutex_unlock(&root->fs_info->trans_mutex);
1267 1275
1268 return 0; 1276 return 0;
1269 } 1277 }
1270 1278
1271 trans->transaction->in_commit = 1; 1279 trans->transaction->in_commit = 1;
1272 trans->transaction->blocked = 1; 1280 trans->transaction->blocked = 1;
1281 spin_unlock(&cur_trans->commit_lock);
1273 wake_up(&root->fs_info->transaction_blocked_wait); 1282 wake_up(&root->fs_info->transaction_blocked_wait);
1274 1283
1284 spin_lock(&root->fs_info->trans_lock);
1275 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1285 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1276 prev_trans = list_entry(cur_trans->list.prev, 1286 prev_trans = list_entry(cur_trans->list.prev,
1277 struct btrfs_transaction, list); 1287 struct btrfs_transaction, list);
1278 if (!prev_trans->commit_done) { 1288 if (!prev_trans->commit_done) {
1279 atomic_inc(&prev_trans->use_count); 1289 atomic_inc(&prev_trans->use_count);
1280 mutex_unlock(&root->fs_info->trans_mutex); 1290 spin_unlock(&root->fs_info->trans_lock);
1281 1291
1282 wait_for_commit(root, prev_trans); 1292 wait_for_commit(root, prev_trans);
1283 1293
1284 mutex_lock(&root->fs_info->trans_mutex);
1285 put_transaction(prev_trans); 1294 put_transaction(prev_trans);
1295 } else {
1296 spin_unlock(&root->fs_info->trans_lock);
1286 } 1297 }
1298 } else {
1299 spin_unlock(&root->fs_info->trans_lock);
1287 } 1300 }
1288 1301
1289 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 1302 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
@@ -1291,12 +1304,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1291 1304
1292 do { 1305 do {
1293 int snap_pending = 0; 1306 int snap_pending = 0;
1307
1294 joined = cur_trans->num_joined; 1308 joined = cur_trans->num_joined;
1295 if (!list_empty(&trans->transaction->pending_snapshots)) 1309 if (!list_empty(&trans->transaction->pending_snapshots))
1296 snap_pending = 1; 1310 snap_pending = 1;
1297 1311
1298 WARN_ON(cur_trans != trans->transaction); 1312 WARN_ON(cur_trans != trans->transaction);
1299 mutex_unlock(&root->fs_info->trans_mutex);
1300 1313
1301 if (flush_on_commit || snap_pending) { 1314 if (flush_on_commit || snap_pending) {
1302 btrfs_start_delalloc_inodes(root, 1); 1315 btrfs_start_delalloc_inodes(root, 1);
@@ -1316,14 +1329,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1316 prepare_to_wait(&cur_trans->writer_wait, &wait, 1329 prepare_to_wait(&cur_trans->writer_wait, &wait,
1317 TASK_UNINTERRUPTIBLE); 1330 TASK_UNINTERRUPTIBLE);
1318 1331
1319 smp_mb();
1320 if (atomic_read(&cur_trans->num_writers) > 1) 1332 if (atomic_read(&cur_trans->num_writers) > 1)
1321 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1333 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1322 else if (should_grow) 1334 else if (should_grow)
1323 schedule_timeout(1); 1335 schedule_timeout(1);
1324 1336
1325 mutex_lock(&root->fs_info->trans_mutex);
1326 finish_wait(&cur_trans->writer_wait, &wait); 1337 finish_wait(&cur_trans->writer_wait, &wait);
1338 spin_lock(&root->fs_info->trans_lock);
1339 root->fs_info->trans_no_join = 1;
1340 spin_unlock(&root->fs_info->trans_lock);
1327 } while (atomic_read(&cur_trans->num_writers) > 1 || 1341 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1328 (should_grow && cur_trans->num_joined != joined)); 1342 (should_grow && cur_trans->num_joined != joined));
1329 1343
@@ -1364,9 +1378,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1364 btrfs_prepare_extent_commit(trans, root); 1378 btrfs_prepare_extent_commit(trans, root);
1365 1379
1366 cur_trans = root->fs_info->running_transaction; 1380 cur_trans = root->fs_info->running_transaction;
1367 spin_lock(&root->fs_info->new_trans_lock);
1368 root->fs_info->running_transaction = NULL;
1369 spin_unlock(&root->fs_info->new_trans_lock);
1370 1381
1371 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1382 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1372 root->fs_info->tree_root->node); 1383 root->fs_info->tree_root->node);
@@ -1387,10 +1398,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1387 sizeof(root->fs_info->super_copy)); 1398 sizeof(root->fs_info->super_copy));
1388 1399
1389 trans->transaction->blocked = 0; 1400 trans->transaction->blocked = 0;
1401 spin_lock(&root->fs_info->trans_lock);
1402 root->fs_info->running_transaction = NULL;
1403 root->fs_info->trans_no_join = 0;
1404 spin_unlock(&root->fs_info->trans_lock);
1390 1405
1391 wake_up(&root->fs_info->transaction_wait); 1406 wake_up(&root->fs_info->transaction_wait);
1392 1407
1393 mutex_unlock(&root->fs_info->trans_mutex);
1394 ret = btrfs_write_and_wait_transaction(trans, root); 1408 ret = btrfs_write_and_wait_transaction(trans, root);
1395 BUG_ON(ret); 1409 BUG_ON(ret);
1396 write_ctree_super(trans, root, 0); 1410 write_ctree_super(trans, root, 0);
@@ -1403,22 +1417,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1403 1417
1404 btrfs_finish_extent_commit(trans, root); 1418 btrfs_finish_extent_commit(trans, root);
1405 1419
1406 mutex_lock(&root->fs_info->trans_mutex);
1407
1408 cur_trans->commit_done = 1; 1420 cur_trans->commit_done = 1;
1409 1421
1410 root->fs_info->last_trans_committed = cur_trans->transid; 1422 root->fs_info->last_trans_committed = cur_trans->transid;
1411 1423
1412 wake_up(&cur_trans->commit_wait); 1424 wake_up(&cur_trans->commit_wait);
1413 1425
1426 spin_lock(&root->fs_info->trans_lock);
1414 list_del_init(&cur_trans->list); 1427 list_del_init(&cur_trans->list);
1428 spin_unlock(&root->fs_info->trans_lock);
1429
1415 put_transaction(cur_trans); 1430 put_transaction(cur_trans);
1416 put_transaction(cur_trans); 1431 put_transaction(cur_trans);
1417 1432
1418 trace_btrfs_transaction_commit(root); 1433 trace_btrfs_transaction_commit(root);
1419 1434
1420 mutex_unlock(&root->fs_info->trans_mutex);
1421
1422 if (current->journal_info == trans) 1435 if (current->journal_info == trans)
1423 current->journal_info = NULL; 1436 current->journal_info = NULL;
1424 1437
@@ -1438,9 +1451,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1438 LIST_HEAD(list); 1451 LIST_HEAD(list);
1439 struct btrfs_fs_info *fs_info = root->fs_info; 1452 struct btrfs_fs_info *fs_info = root->fs_info;
1440 1453
1441 mutex_lock(&fs_info->trans_mutex); 1454 spin_lock(&fs_info->trans_lock);
1442 list_splice_init(&fs_info->dead_roots, &list); 1455 list_splice_init(&fs_info->dead_roots, &list);
1443 mutex_unlock(&fs_info->trans_mutex); 1456 spin_unlock(&fs_info->trans_lock);
1444 1457
1445 while (!list_empty(&list)) { 1458 while (!list_empty(&list)) {
1446 root = list_entry(list.next, struct btrfs_root, root_list); 1459 root = list_entry(list.next, struct btrfs_root, root_list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 154314f80f8d..11c6efcd4ed2 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -28,10 +28,12 @@ struct btrfs_transaction {
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 atomic_t num_writers; 30 atomic_t num_writers;
31 atomic_t use_count;
31 32
32 unsigned long num_joined; 33 unsigned long num_joined;
34
35 spinlock_t commit_lock;
33 int in_commit; 36 int in_commit;
34 atomic_t use_count;
35 int commit_done; 37 int commit_done;
36 int blocked; 38 int blocked;
37 struct list_head list; 39 struct list_head list;