aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.c32
-rw-r--r--fs/btrfs/ctree.h22
-rw-r--r--fs/btrfs/delayed-inode.c8
-rw-r--r--fs/btrfs/disk-io.c41
-rw-r--r--fs/btrfs/extent-tree.c158
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c10
-rw-r--r--fs/btrfs/free-space-cache.c231
-rw-r--r--fs/btrfs/inode-map.c34
-rw-r--r--fs/btrfs/inode.c265
-rw-r--r--fs/btrfs/ioctl.c49
-rw-r--r--fs/btrfs/relocation.c34
-rw-r--r--fs/btrfs/scrub.c192
-rw-r--r--fs/btrfs/super.c10
-rw-r--r--fs/btrfs/transaction.c307
-rw-r--r--fs/btrfs/transaction.h29
-rw-r--r--fs/btrfs/volumes.c10
-rw-r--r--fs/btrfs/xattr.c2
20 files changed, 864 insertions, 577 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 93b1aa932014..52d7eca8c7bf 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -121,9 +121,6 @@ struct btrfs_inode {
121 */ 121 */
122 u64 index_cnt; 122 u64 index_cnt;
123 123
124 /* the start of block group preferred for allocations. */
125 u64 block_group;
126
127 /* the fsync log has some corner cases that mean we have to check 124 /* the fsync log has some corner cases that mean we have to check
128 * directories to see if any unlinks have been done before 125 * directories to see if any unlinks have been done before
129 * the directory was logged. See tree-log.c for all the 126 * the directory was logged. See tree-log.c for all the
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b0e18d986e0a..2e667868e0d2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -43,8 +43,6 @@ struct btrfs_path *btrfs_alloc_path(void)
43{ 43{
44 struct btrfs_path *path; 44 struct btrfs_path *path;
45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
46 if (path)
47 path->reada = 1;
48 return path; 46 return path;
49} 47}
50 48
@@ -1224,11 +1222,13 @@ static void reada_for_search(struct btrfs_root *root,
1224 u64 search; 1222 u64 search;
1225 u64 target; 1223 u64 target;
1226 u64 nread = 0; 1224 u64 nread = 0;
1225 u64 gen;
1227 int direction = path->reada; 1226 int direction = path->reada;
1228 struct extent_buffer *eb; 1227 struct extent_buffer *eb;
1229 u32 nr; 1228 u32 nr;
1230 u32 blocksize; 1229 u32 blocksize;
1231 u32 nscan = 0; 1230 u32 nscan = 0;
1231 bool map = true;
1232 1232
1233 if (level != 1) 1233 if (level != 1)
1234 return; 1234 return;
@@ -1250,7 +1250,19 @@ static void reada_for_search(struct btrfs_root *root,
1250 1250
1251 nritems = btrfs_header_nritems(node); 1251 nritems = btrfs_header_nritems(node);
1252 nr = slot; 1252 nr = slot;
1253 if (node->map_token || path->skip_locking)
1254 map = false;
1255
1253 while (1) { 1256 while (1) {
1257 if (map && !node->map_token) {
1258 unsigned long offset = btrfs_node_key_ptr_offset(nr);
1259 map_private_extent_buffer(node, offset,
1260 sizeof(struct btrfs_key_ptr),
1261 &node->map_token,
1262 &node->kaddr,
1263 &node->map_start,
1264 &node->map_len, KM_USER1);
1265 }
1254 if (direction < 0) { 1266 if (direction < 0) {
1255 if (nr == 0) 1267 if (nr == 0)
1256 break; 1268 break;
@@ -1268,14 +1280,23 @@ static void reada_for_search(struct btrfs_root *root,
1268 search = btrfs_node_blockptr(node, nr); 1280 search = btrfs_node_blockptr(node, nr);
1269 if ((search <= target && target - search <= 65536) || 1281 if ((search <= target && target - search <= 65536) ||
1270 (search > target && search - target <= 65536)) { 1282 (search > target && search - target <= 65536)) {
1271 readahead_tree_block(root, search, blocksize, 1283 gen = btrfs_node_ptr_generation(node, nr);
1272 btrfs_node_ptr_generation(node, nr)); 1284 if (map && node->map_token) {
1285 unmap_extent_buffer(node, node->map_token,
1286 KM_USER1);
1287 node->map_token = NULL;
1288 }
1289 readahead_tree_block(root, search, blocksize, gen);
1273 nread += blocksize; 1290 nread += blocksize;
1274 } 1291 }
1275 nscan++; 1292 nscan++;
1276 if ((nread > 65536 || nscan > 32)) 1293 if ((nread > 65536 || nscan > 32))
1277 break; 1294 break;
1278 } 1295 }
1296 if (map && node->map_token) {
1297 unmap_extent_buffer(node, node->map_token, KM_USER1);
1298 node->map_token = NULL;
1299 }
1279} 1300}
1280 1301
1281/* 1302/*
@@ -1648,9 +1669,6 @@ again:
1648 } 1669 }
1649cow_done: 1670cow_done:
1650 BUG_ON(!cow && ins_len); 1671 BUG_ON(!cow && ins_len);
1651 if (level != btrfs_header_level(b))
1652 WARN_ON(1);
1653 level = btrfs_header_level(b);
1654 1672
1655 p->nodes[level] = b; 1673 p->nodes[level] = b;
1656 if (!p->skip_locking) 1674 if (!p->skip_locking)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6c093fa98f61..378b5b4443f3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -930,7 +930,6 @@ struct btrfs_fs_info {
930 * is required instead of the faster short fsync log commits 930 * is required instead of the faster short fsync log commits
931 */ 931 */
932 u64 last_trans_log_full_commit; 932 u64 last_trans_log_full_commit;
933 u64 open_ioctl_trans;
934 unsigned long mount_opt:20; 933 unsigned long mount_opt:20;
935 unsigned long compress_type:4; 934 unsigned long compress_type:4;
936 u64 max_inline; 935 u64 max_inline;
@@ -947,7 +946,6 @@ struct btrfs_fs_info {
947 struct super_block *sb; 946 struct super_block *sb;
948 struct inode *btree_inode; 947 struct inode *btree_inode;
949 struct backing_dev_info bdi; 948 struct backing_dev_info bdi;
950 struct mutex trans_mutex;
951 struct mutex tree_log_mutex; 949 struct mutex tree_log_mutex;
952 struct mutex transaction_kthread_mutex; 950 struct mutex transaction_kthread_mutex;
953 struct mutex cleaner_mutex; 951 struct mutex cleaner_mutex;
@@ -968,6 +966,7 @@ struct btrfs_fs_info {
968 struct rw_semaphore subvol_sem; 966 struct rw_semaphore subvol_sem;
969 struct srcu_struct subvol_srcu; 967 struct srcu_struct subvol_srcu;
970 968
969 spinlock_t trans_lock;
971 struct list_head trans_list; 970 struct list_head trans_list;
972 struct list_head hashers; 971 struct list_head hashers;
973 struct list_head dead_roots; 972 struct list_head dead_roots;
@@ -980,6 +979,7 @@ struct btrfs_fs_info {
980 atomic_t async_submit_draining; 979 atomic_t async_submit_draining;
981 atomic_t nr_async_bios; 980 atomic_t nr_async_bios;
982 atomic_t async_delalloc_pages; 981 atomic_t async_delalloc_pages;
982 atomic_t open_ioctl_trans;
983 983
984 /* 984 /*
985 * this is used by the balancing code to wait for all the pending 985 * this is used by the balancing code to wait for all the pending
@@ -1044,6 +1044,7 @@ struct btrfs_fs_info {
1044 int closing; 1044 int closing;
1045 int log_root_recovering; 1045 int log_root_recovering;
1046 int enospc_unlink; 1046 int enospc_unlink;
1047 int trans_no_join;
1047 1048
1048 u64 total_pinned; 1049 u64 total_pinned;
1049 1050
@@ -1065,7 +1066,6 @@ struct btrfs_fs_info {
1065 struct reloc_control *reloc_ctl; 1066 struct reloc_control *reloc_ctl;
1066 1067
1067 spinlock_t delalloc_lock; 1068 spinlock_t delalloc_lock;
1068 spinlock_t new_trans_lock;
1069 u64 delalloc_bytes; 1069 u64 delalloc_bytes;
1070 1070
1071 /* data_alloc_cluster is only used in ssd mode */ 1071 /* data_alloc_cluster is only used in ssd mode */
@@ -1340,6 +1340,7 @@ struct btrfs_ioctl_defrag_range_args {
1340#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1340#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1341#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) 1341#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1342#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) 1342#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
1343#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
1343 1344
1344#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1345#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1345#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1346#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2238,6 +2239,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
2238void btrfs_block_rsv_release(struct btrfs_root *root, 2239void btrfs_block_rsv_release(struct btrfs_root *root,
2239 struct btrfs_block_rsv *block_rsv, 2240 struct btrfs_block_rsv *block_rsv,
2240 u64 num_bytes); 2241 u64 num_bytes);
2242int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
2243 struct btrfs_root *root,
2244 struct btrfs_block_rsv *rsv);
2241int btrfs_set_block_group_ro(struct btrfs_root *root, 2245int btrfs_set_block_group_ro(struct btrfs_root *root,
2242 struct btrfs_block_group_cache *cache); 2246 struct btrfs_block_group_cache *cache);
2243int btrfs_set_block_group_rw(struct btrfs_root *root, 2247int btrfs_set_block_group_rw(struct btrfs_root *root,
@@ -2350,6 +2354,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2350 struct btrfs_root *root, 2354 struct btrfs_root *root,
2351 struct extent_buffer *node, 2355 struct extent_buffer *node,
2352 struct extent_buffer *parent); 2356 struct extent_buffer *parent);
2357static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
2358{
2359 /*
2360 * Get synced with close_ctree()
2361 */
2362 smp_mb();
2363 return fs_info->closing;
2364}
2365
2353/* root-item.c */ 2366/* root-item.c */
2354int btrfs_find_root_ref(struct btrfs_root *tree_root, 2367int btrfs_find_root_ref(struct btrfs_root *tree_root,
2355 struct btrfs_path *path, 2368 struct btrfs_path *path,
@@ -2512,8 +2525,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2512int btrfs_writepages(struct address_space *mapping, 2525int btrfs_writepages(struct address_space *mapping,
2513 struct writeback_control *wbc); 2526 struct writeback_control *wbc);
2514int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 2527int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
2515 struct btrfs_root *new_root, 2528 struct btrfs_root *new_root, u64 new_dirid);
2516 u64 new_dirid, u64 alloc_hint);
2517int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 2529int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2518 size_t size, struct bio *bio, unsigned long bio_flags); 2530 size_t size, struct bio *bio, unsigned long bio_flags);
2519 2531
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 01e29503a54b..6462c29d2d37 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -678,6 +678,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
678 INIT_LIST_HEAD(&head); 678 INIT_LIST_HEAD(&head);
679 679
680 next = item; 680 next = item;
681 nitems = 0;
681 682
682 /* 683 /*
683 * count the number of the continuous items that we can insert in batch 684 * count the number of the continuous items that we can insert in batch
@@ -1129,7 +1130,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1129 delayed_node = async_node->delayed_node; 1130 delayed_node = async_node->delayed_node;
1130 root = delayed_node->root; 1131 root = delayed_node->root;
1131 1132
1132 trans = btrfs_join_transaction(root, 0); 1133 trans = btrfs_join_transaction(root);
1133 if (IS_ERR(trans)) 1134 if (IS_ERR(trans))
1134 goto free_path; 1135 goto free_path;
1135 1136
@@ -1572,8 +1573,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1572 btrfs_set_stack_inode_transid(inode_item, trans->transid); 1573 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1573 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); 1574 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1574 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1575 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1575 btrfs_set_stack_inode_block_group(inode_item, 1576 btrfs_set_stack_inode_block_group(inode_item, 0);
1576 BTRFS_I(inode)->block_group);
1577 1577
1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), 1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1579 inode->i_atime.tv_sec); 1579 inode->i_atime.tv_sec);
@@ -1595,7 +1595,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1595 struct btrfs_root *root, struct inode *inode) 1595 struct btrfs_root *root, struct inode *inode)
1596{ 1596{
1597 struct btrfs_delayed_node *delayed_node; 1597 struct btrfs_delayed_node *delayed_node;
1598 int ret; 1598 int ret = 0;
1599 1599
1600 delayed_node = btrfs_get_or_create_delayed_node(inode); 1600 delayed_node = btrfs_get_or_create_delayed_node(inode);
1601 if (IS_ERR(delayed_node)) 1601 if (IS_ERR(delayed_node))
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 98b6a71decba..9f68c6898653 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1505,24 +1505,24 @@ static int transaction_kthread(void *arg)
1505 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1505 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1506 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1506 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1507 1507
1508 spin_lock(&root->fs_info->new_trans_lock); 1508 spin_lock(&root->fs_info->trans_lock);
1509 cur = root->fs_info->running_transaction; 1509 cur = root->fs_info->running_transaction;
1510 if (!cur) { 1510 if (!cur) {
1511 spin_unlock(&root->fs_info->new_trans_lock); 1511 spin_unlock(&root->fs_info->trans_lock);
1512 goto sleep; 1512 goto sleep;
1513 } 1513 }
1514 1514
1515 now = get_seconds(); 1515 now = get_seconds();
1516 if (!cur->blocked && 1516 if (!cur->blocked &&
1517 (now < cur->start_time || now - cur->start_time < 30)) { 1517 (now < cur->start_time || now - cur->start_time < 30)) {
1518 spin_unlock(&root->fs_info->new_trans_lock); 1518 spin_unlock(&root->fs_info->trans_lock);
1519 delay = HZ * 5; 1519 delay = HZ * 5;
1520 goto sleep; 1520 goto sleep;
1521 } 1521 }
1522 transid = cur->transid; 1522 transid = cur->transid;
1523 spin_unlock(&root->fs_info->new_trans_lock); 1523 spin_unlock(&root->fs_info->trans_lock);
1524 1524
1525 trans = btrfs_join_transaction(root, 1); 1525 trans = btrfs_join_transaction(root);
1526 BUG_ON(IS_ERR(trans)); 1526 BUG_ON(IS_ERR(trans));
1527 if (transid == trans->transid) { 1527 if (transid == trans->transid) {
1528 ret = btrfs_commit_transaction(trans, root); 1528 ret = btrfs_commit_transaction(trans, root);
@@ -1613,7 +1613,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1613 INIT_LIST_HEAD(&fs_info->ordered_operations); 1613 INIT_LIST_HEAD(&fs_info->ordered_operations);
1614 INIT_LIST_HEAD(&fs_info->caching_block_groups); 1614 INIT_LIST_HEAD(&fs_info->caching_block_groups);
1615 spin_lock_init(&fs_info->delalloc_lock); 1615 spin_lock_init(&fs_info->delalloc_lock);
1616 spin_lock_init(&fs_info->new_trans_lock); 1616 spin_lock_init(&fs_info->trans_lock);
1617 spin_lock_init(&fs_info->ref_cache_lock); 1617 spin_lock_init(&fs_info->ref_cache_lock);
1618 spin_lock_init(&fs_info->fs_roots_radix_lock); 1618 spin_lock_init(&fs_info->fs_roots_radix_lock);
1619 spin_lock_init(&fs_info->delayed_iput_lock); 1619 spin_lock_init(&fs_info->delayed_iput_lock);
@@ -1645,6 +1645,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1645 fs_info->max_inline = 8192 * 1024; 1645 fs_info->max_inline = 8192 * 1024;
1646 fs_info->metadata_ratio = 0; 1646 fs_info->metadata_ratio = 0;
1647 fs_info->defrag_inodes = RB_ROOT; 1647 fs_info->defrag_inodes = RB_ROOT;
1648 fs_info->trans_no_join = 0;
1648 1649
1649 fs_info->thread_pool_size = min_t(unsigned long, 1650 fs_info->thread_pool_size = min_t(unsigned long,
1650 num_online_cpus() + 2, 8); 1651 num_online_cpus() + 2, 8);
@@ -1667,8 +1668,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1667 init_waitqueue_head(&fs_info->scrub_pause_wait); 1668 init_waitqueue_head(&fs_info->scrub_pause_wait);
1668 init_rwsem(&fs_info->scrub_super_lock); 1669 init_rwsem(&fs_info->scrub_super_lock);
1669 fs_info->scrub_workers_refcnt = 0; 1670 fs_info->scrub_workers_refcnt = 0;
1670 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1671 fs_info->thread_pool_size, &fs_info->generic_worker);
1672 1671
1673 sb->s_blocksize = 4096; 1672 sb->s_blocksize = 4096;
1674 sb->s_blocksize_bits = blksize_bits(4096); 1673 sb->s_blocksize_bits = blksize_bits(4096);
@@ -1709,7 +1708,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1709 fs_info->do_barriers = 1; 1708 fs_info->do_barriers = 1;
1710 1709
1711 1710
1712 mutex_init(&fs_info->trans_mutex);
1713 mutex_init(&fs_info->ordered_operations_mutex); 1711 mutex_init(&fs_info->ordered_operations_mutex);
1714 mutex_init(&fs_info->tree_log_mutex); 1712 mutex_init(&fs_info->tree_log_mutex);
1715 mutex_init(&fs_info->chunk_mutex); 1713 mutex_init(&fs_info->chunk_mutex);
@@ -2479,13 +2477,13 @@ int btrfs_commit_super(struct btrfs_root *root)
2479 down_write(&root->fs_info->cleanup_work_sem); 2477 down_write(&root->fs_info->cleanup_work_sem);
2480 up_write(&root->fs_info->cleanup_work_sem); 2478 up_write(&root->fs_info->cleanup_work_sem);
2481 2479
2482 trans = btrfs_join_transaction(root, 1); 2480 trans = btrfs_join_transaction(root);
2483 if (IS_ERR(trans)) 2481 if (IS_ERR(trans))
2484 return PTR_ERR(trans); 2482 return PTR_ERR(trans);
2485 ret = btrfs_commit_transaction(trans, root); 2483 ret = btrfs_commit_transaction(trans, root);
2486 BUG_ON(ret); 2484 BUG_ON(ret);
2487 /* run commit again to drop the original snapshot */ 2485 /* run commit again to drop the original snapshot */
2488 trans = btrfs_join_transaction(root, 1); 2486 trans = btrfs_join_transaction(root);
2489 if (IS_ERR(trans)) 2487 if (IS_ERR(trans))
2490 return PTR_ERR(trans); 2488 return PTR_ERR(trans);
2491 btrfs_commit_transaction(trans, root); 2489 btrfs_commit_transaction(trans, root);
@@ -2911,9 +2909,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
2911 2909
2912 INIT_LIST_HEAD(&splice); 2910 INIT_LIST_HEAD(&splice);
2913 2911
2914 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2915
2916 spin_lock(&root->fs_info->delalloc_lock); 2912 spin_lock(&root->fs_info->delalloc_lock);
2913 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2917 2914
2918 while (!list_empty(&splice)) { 2915 while (!list_empty(&splice)) {
2919 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2916 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
@@ -3024,10 +3021,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3024 3021
3025 WARN_ON(1); 3022 WARN_ON(1);
3026 3023
3027 mutex_lock(&root->fs_info->trans_mutex);
3028 mutex_lock(&root->fs_info->transaction_kthread_mutex); 3024 mutex_lock(&root->fs_info->transaction_kthread_mutex);
3029 3025
3026 spin_lock(&root->fs_info->trans_lock);
3030 list_splice_init(&root->fs_info->trans_list, &list); 3027 list_splice_init(&root->fs_info->trans_list, &list);
3028 root->fs_info->trans_no_join = 1;
3029 spin_unlock(&root->fs_info->trans_lock);
3030
3031 while (!list_empty(&list)) { 3031 while (!list_empty(&list)) {
3032 t = list_entry(list.next, struct btrfs_transaction, list); 3032 t = list_entry(list.next, struct btrfs_transaction, list);
3033 if (!t) 3033 if (!t)
@@ -3052,23 +3052,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3052 t->blocked = 0; 3052 t->blocked = 0;
3053 if (waitqueue_active(&root->fs_info->transaction_wait)) 3053 if (waitqueue_active(&root->fs_info->transaction_wait))
3054 wake_up(&root->fs_info->transaction_wait); 3054 wake_up(&root->fs_info->transaction_wait);
3055 mutex_unlock(&root->fs_info->trans_mutex);
3056 3055
3057 mutex_lock(&root->fs_info->trans_mutex);
3058 t->commit_done = 1; 3056 t->commit_done = 1;
3059 if (waitqueue_active(&t->commit_wait)) 3057 if (waitqueue_active(&t->commit_wait))
3060 wake_up(&t->commit_wait); 3058 wake_up(&t->commit_wait);
3061 mutex_unlock(&root->fs_info->trans_mutex);
3062
3063 mutex_lock(&root->fs_info->trans_mutex);
3064 3059
3065 btrfs_destroy_pending_snapshots(t); 3060 btrfs_destroy_pending_snapshots(t);
3066 3061
3067 btrfs_destroy_delalloc_inodes(root); 3062 btrfs_destroy_delalloc_inodes(root);
3068 3063
3069 spin_lock(&root->fs_info->new_trans_lock); 3064 spin_lock(&root->fs_info->trans_lock);
3070 root->fs_info->running_transaction = NULL; 3065 root->fs_info->running_transaction = NULL;
3071 spin_unlock(&root->fs_info->new_trans_lock); 3066 spin_unlock(&root->fs_info->trans_lock);
3072 3067
3073 btrfs_destroy_marked_extents(root, &t->dirty_pages, 3068 btrfs_destroy_marked_extents(root, &t->dirty_pages,
3074 EXTENT_DIRTY); 3069 EXTENT_DIRTY);
@@ -3082,8 +3077,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3082 kmem_cache_free(btrfs_transaction_cachep, t); 3077 kmem_cache_free(btrfs_transaction_cachep, t);
3083 } 3078 }
3084 3079
3080 spin_lock(&root->fs_info->trans_lock);
3081 root->fs_info->trans_no_join = 0;
3082 spin_unlock(&root->fs_info->trans_lock);
3085 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 3083 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3086 mutex_unlock(&root->fs_info->trans_mutex);
3087 3084
3088 return 0; 3085 return 0;
3089} 3086}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 169bd62ce776..b42efc2ded51 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -348,7 +348,7 @@ static int caching_kthread(void *data)
348 */ 348 */
349 path->skip_locking = 1; 349 path->skip_locking = 1;
350 path->search_commit_root = 1; 350 path->search_commit_root = 1;
351 path->reada = 2; 351 path->reada = 1;
352 352
353 key.objectid = last; 353 key.objectid = last;
354 key.offset = 0; 354 key.offset = 0;
@@ -366,8 +366,7 @@ again:
366 nritems = btrfs_header_nritems(leaf); 366 nritems = btrfs_header_nritems(leaf);
367 367
368 while (1) { 368 while (1) {
369 smp_mb(); 369 if (btrfs_fs_closing(fs_info) > 1) {
370 if (fs_info->closing > 1) {
371 last = (u64)-1; 370 last = (u64)-1;
372 break; 371 break;
373 } 372 }
@@ -379,15 +378,18 @@ again:
379 if (ret) 378 if (ret)
380 break; 379 break;
381 380
382 caching_ctl->progress = last; 381 if (need_resched() ||
383 btrfs_release_path(path); 382 btrfs_next_leaf(extent_root, path)) {
384 up_read(&fs_info->extent_commit_sem); 383 caching_ctl->progress = last;
385 mutex_unlock(&caching_ctl->mutex); 384 btrfs_release_path(path);
386 if (btrfs_transaction_in_commit(fs_info)) 385 up_read(&fs_info->extent_commit_sem);
387 schedule_timeout(1); 386 mutex_unlock(&caching_ctl->mutex);
388 else
389 cond_resched(); 387 cond_resched();
390 goto again; 388 goto again;
389 }
390 leaf = path->nodes[0];
391 nritems = btrfs_header_nritems(leaf);
392 continue;
391 } 393 }
392 394
393 if (key.objectid < block_group->key.objectid) { 395 if (key.objectid < block_group->key.objectid) {
@@ -3065,7 +3067,7 @@ again:
3065 spin_unlock(&data_sinfo->lock); 3067 spin_unlock(&data_sinfo->lock);
3066alloc: 3068alloc:
3067 alloc_target = btrfs_get_alloc_profile(root, 1); 3069 alloc_target = btrfs_get_alloc_profile(root, 1);
3068 trans = btrfs_join_transaction(root, 1); 3070 trans = btrfs_join_transaction(root);
3069 if (IS_ERR(trans)) 3071 if (IS_ERR(trans))
3070 return PTR_ERR(trans); 3072 return PTR_ERR(trans);
3071 3073
@@ -3087,13 +3089,21 @@ alloc:
3087 } 3089 }
3088 goto again; 3090 goto again;
3089 } 3091 }
3092
3093 /*
3094 * If we have less pinned bytes than we want to allocate then
3095 * don't bother committing the transaction, it won't help us.
3096 */
3097 if (data_sinfo->bytes_pinned < bytes)
3098 committed = 1;
3090 spin_unlock(&data_sinfo->lock); 3099 spin_unlock(&data_sinfo->lock);
3091 3100
3092 /* commit the current transaction and try again */ 3101 /* commit the current transaction and try again */
3093commit_trans: 3102commit_trans:
3094 if (!committed && !root->fs_info->open_ioctl_trans) { 3103 if (!committed &&
3104 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3095 committed = 1; 3105 committed = 1;
3096 trans = btrfs_join_transaction(root, 1); 3106 trans = btrfs_join_transaction(root);
3097 if (IS_ERR(trans)) 3107 if (IS_ERR(trans))
3098 return PTR_ERR(trans); 3108 return PTR_ERR(trans);
3099 ret = btrfs_commit_transaction(trans, root); 3109 ret = btrfs_commit_transaction(trans, root);
@@ -3472,7 +3482,7 @@ again:
3472 goto out; 3482 goto out;
3473 3483
3474 ret = -ENOSPC; 3484 ret = -ENOSPC;
3475 trans = btrfs_join_transaction(root, 1); 3485 trans = btrfs_join_transaction(root);
3476 if (IS_ERR(trans)) 3486 if (IS_ERR(trans))
3477 goto out; 3487 goto out;
3478 ret = btrfs_commit_transaction(trans, root); 3488 ret = btrfs_commit_transaction(trans, root);
@@ -3699,7 +3709,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3699 if (trans) 3709 if (trans)
3700 return -EAGAIN; 3710 return -EAGAIN;
3701 3711
3702 trans = btrfs_join_transaction(root, 1); 3712 trans = btrfs_join_transaction(root);
3703 BUG_ON(IS_ERR(trans)); 3713 BUG_ON(IS_ERR(trans));
3704 ret = btrfs_commit_transaction(trans, root); 3714 ret = btrfs_commit_transaction(trans, root);
3705 return 0; 3715 return 0;
@@ -3837,6 +3847,37 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3837 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3847 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3838} 3848}
3839 3849
3850int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3851 struct btrfs_root *root,
3852 struct btrfs_block_rsv *rsv)
3853{
3854 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
3855 u64 num_bytes;
3856 int ret;
3857
3858 /*
3859 * Truncate should be freeing data, but give us 2 items just in case it
3860 * needs to use some space. We may want to be smarter about this in the
3861 * future.
3862 */
3863 num_bytes = btrfs_calc_trans_metadata_size(root, 2);
3864
3865 /* We already have enough bytes, just return */
3866 if (rsv->reserved >= num_bytes)
3867 return 0;
3868
3869 num_bytes -= rsv->reserved;
3870
3871 /*
3872 * You should have reserved enough space before hand to do this, so this
3873 * should not fail.
3874 */
3875 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
3876 BUG_ON(ret);
3877
3878 return 0;
3879}
3880
3840int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3881int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3841 struct btrfs_root *root, 3882 struct btrfs_root *root,
3842 int num_items) 3883 int num_items)
@@ -3877,23 +3918,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
3877 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 3918 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
3878 3919
3879 /* 3920 /*
3880 * one for deleting orphan item, one for updating inode and 3921 * We need to hold space in order to delete our orphan item once we've
3881 * two for calling btrfs_truncate_inode_items. 3922 * added it, so this takes the reservation so we can release it later
3882 * 3923 * when we are truly done with the orphan item.
3883 * btrfs_truncate_inode_items is a delete operation, it frees
3884 * more space than it uses in most cases. So two units of
3885 * metadata space should be enough for calling it many times.
3886 * If all of the metadata space is used, we can commit
3887 * transaction and use space it freed.
3888 */ 3924 */
3889 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); 3925 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3890 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3926 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3891} 3927}
3892 3928
3893void btrfs_orphan_release_metadata(struct inode *inode) 3929void btrfs_orphan_release_metadata(struct inode *inode)
3894{ 3930{
3895 struct btrfs_root *root = BTRFS_I(inode)->root; 3931 struct btrfs_root *root = BTRFS_I(inode)->root;
3896 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); 3932 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3897 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3933 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
3898} 3934}
3899 3935
@@ -4987,6 +5023,15 @@ have_block_group:
4987 if (unlikely(block_group->ro)) 5023 if (unlikely(block_group->ro))
4988 goto loop; 5024 goto loop;
4989 5025
5026 spin_lock(&block_group->free_space_ctl->tree_lock);
5027 if (cached &&
5028 block_group->free_space_ctl->free_space <
5029 num_bytes + empty_size) {
5030 spin_unlock(&block_group->free_space_ctl->tree_lock);
5031 goto loop;
5032 }
5033 spin_unlock(&block_group->free_space_ctl->tree_lock);
5034
4990 /* 5035 /*
4991 * Ok we want to try and use the cluster allocator, so lets look 5036 * Ok we want to try and use the cluster allocator, so lets look
4992 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5037 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
@@ -5150,6 +5195,7 @@ checks:
5150 btrfs_add_free_space(block_group, offset, 5195 btrfs_add_free_space(block_group, offset,
5151 search_start - offset); 5196 search_start - offset);
5152 BUG_ON(offset > search_start); 5197 BUG_ON(offset > search_start);
5198 btrfs_put_block_group(block_group);
5153 break; 5199 break;
5154loop: 5200loop:
5155 failed_cluster_refill = false; 5201 failed_cluster_refill = false;
@@ -5172,9 +5218,7 @@ loop:
5172 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5218 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
5173 * again 5219 * again
5174 */ 5220 */
5175 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 5221 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
5176 (found_uncached_bg || empty_size || empty_cluster ||
5177 allowed_chunk_alloc)) {
5178 index = 0; 5222 index = 0;
5179 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5223 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5180 found_uncached_bg = false; 5224 found_uncached_bg = false;
@@ -5214,42 +5258,39 @@ loop:
5214 goto search; 5258 goto search;
5215 } 5259 }
5216 5260
5217 if (loop < LOOP_CACHING_WAIT) { 5261 loop++;
5218 loop++;
5219 goto search;
5220 }
5221 5262
5222 if (loop == LOOP_ALLOC_CHUNK) { 5263 if (loop == LOOP_ALLOC_CHUNK) {
5223 empty_size = 0; 5264 if (allowed_chunk_alloc) {
5224 empty_cluster = 0; 5265 ret = do_chunk_alloc(trans, root, num_bytes +
5225 } 5266 2 * 1024 * 1024, data,
5267 CHUNK_ALLOC_LIMITED);
5268 allowed_chunk_alloc = 0;
5269 if (ret == 1)
5270 done_chunk_alloc = 1;
5271 } else if (!done_chunk_alloc &&
5272 space_info->force_alloc ==
5273 CHUNK_ALLOC_NO_FORCE) {
5274 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5275 }
5226 5276
5227 if (allowed_chunk_alloc) { 5277 /*
5228 ret = do_chunk_alloc(trans, root, num_bytes + 5278 * We didn't allocate a chunk, go ahead and drop the
5229 2 * 1024 * 1024, data, 5279 * empty size and loop again.
5230 CHUNK_ALLOC_LIMITED); 5280 */
5231 allowed_chunk_alloc = 0; 5281 if (!done_chunk_alloc)
5232 done_chunk_alloc = 1; 5282 loop = LOOP_NO_EMPTY_SIZE;
5233 } else if (!done_chunk_alloc &&
5234 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5235 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5236 } 5283 }
5237 5284
5238 if (loop < LOOP_NO_EMPTY_SIZE) { 5285 if (loop == LOOP_NO_EMPTY_SIZE) {
5239 loop++; 5286 empty_size = 0;
5240 goto search; 5287 empty_cluster = 0;
5241 } 5288 }
5242 ret = -ENOSPC; 5289
5290 goto search;
5243 } else if (!ins->objectid) { 5291 } else if (!ins->objectid) {
5244 ret = -ENOSPC; 5292 ret = -ENOSPC;
5245 } 5293 } else if (ins->objectid) {
5246
5247 /* we found what we needed */
5248 if (ins->objectid) {
5249 if (!(data & BTRFS_BLOCK_GROUP_DATA))
5250 trans->block_group = block_group->key.objectid;
5251
5252 btrfs_put_block_group(block_group);
5253 ret = 0; 5294 ret = 0;
5254 } 5295 }
5255 5296
@@ -6526,7 +6567,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
6526 6567
6527 BUG_ON(cache->ro); 6568 BUG_ON(cache->ro);
6528 6569
6529 trans = btrfs_join_transaction(root, 1); 6570 trans = btrfs_join_transaction(root);
6530 BUG_ON(IS_ERR(trans)); 6571 BUG_ON(IS_ERR(trans));
6531 6572
6532 alloc_flags = update_block_group_flags(root, cache->flags); 6573 alloc_flags = update_block_group_flags(root, cache->flags);
@@ -6882,6 +6923,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
6882 path = btrfs_alloc_path(); 6923 path = btrfs_alloc_path();
6883 if (!path) 6924 if (!path)
6884 return -ENOMEM; 6925 return -ENOMEM;
6926 path->reada = 1;
6885 6927
6886 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 6928 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
6887 if (cache_gen != 0 && 6929 if (cache_gen != 0 &&
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c5d9fbb92bc3..7055d11c1efd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1476,7 +1476,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
1476 if (total_bytes >= max_bytes) 1476 if (total_bytes >= max_bytes)
1477 break; 1477 break;
1478 if (!found) { 1478 if (!found) {
1479 *start = state->start; 1479 *start = max(cur_start, state->start);
1480 found = 1; 1480 found = 1;
1481 } 1481 }
1482 last = state->end; 1482 last = state->end;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e8445a4757c..a11a92ee2d30 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -126,9 +126,9 @@ struct extent_buffer {
126 unsigned long map_len; 126 unsigned long map_len;
127 struct page *first_page; 127 struct page *first_page;
128 unsigned long bflags; 128 unsigned long bflags;
129 atomic_t refs;
130 struct list_head leak_list; 129 struct list_head leak_list;
131 struct rcu_head rcu_head; 130 struct rcu_head rcu_head;
131 atomic_t refs;
132 132
133 /* the spinlock is used to protect most operations */ 133 /* the spinlock is used to protect most operations */
134 spinlock_t lock; 134 spinlock_t lock;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c6a22d783c35..fa4ef18b66b1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
129 if (!btrfs_test_opt(root, AUTO_DEFRAG)) 129 if (!btrfs_test_opt(root, AUTO_DEFRAG))
130 return 0; 130 return 0;
131 131
132 if (root->fs_info->closing) 132 if (btrfs_fs_closing(root->fs_info))
133 return 0; 133 return 0;
134 134
135 if (BTRFS_I(inode)->in_defrag) 135 if (BTRFS_I(inode)->in_defrag)
@@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
144 if (!defrag) 144 if (!defrag)
145 return -ENOMEM; 145 return -ENOMEM;
146 146
147 defrag->ino = inode->i_ino; 147 defrag->ino = btrfs_ino(inode);
148 defrag->transid = transid; 148 defrag->transid = transid;
149 defrag->root = root->root_key.objectid; 149 defrag->root = root->root_key.objectid;
150 150
@@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
229 first_ino = defrag->ino + 1; 229 first_ino = defrag->ino + 1;
230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); 230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
231 231
232 if (fs_info->closing) 232 if (btrfs_fs_closing(fs_info))
233 goto next_free; 233 goto next_free;
234 234
235 spin_unlock(&fs_info->defrag_inodes_lock); 235 spin_unlock(&fs_info->defrag_inodes_lock);
@@ -1480,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync)
1480 * the current transaction, we can bail out now without any 1480 * the current transaction, we can bail out now without any
1481 * syncing 1481 * syncing
1482 */ 1482 */
1483 mutex_lock(&root->fs_info->trans_mutex); 1483 smp_mb();
1484 if (BTRFS_I(inode)->last_trans <= 1484 if (BTRFS_I(inode)->last_trans <=
1485 root->fs_info->last_trans_committed) { 1485 root->fs_info->last_trans_committed) {
1486 BTRFS_I(inode)->last_trans = 0; 1486 BTRFS_I(inode)->last_trans = 0;
1487 mutex_unlock(&root->fs_info->trans_mutex);
1488 goto out; 1487 goto out;
1489 } 1488 }
1490 mutex_unlock(&root->fs_info->trans_mutex);
1491 1489
1492 /* 1490 /*
1493 * ok we haven't committed the transaction yet, lets do a commit 1491 * ok we haven't committed the transaction yet, lets do a commit
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 70d45795d758..9f985a429877 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -98,7 +98,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
98 return inode; 98 return inode;
99 99
100 spin_lock(&block_group->lock); 100 spin_lock(&block_group->lock);
101 if (!root->fs_info->closing) { 101 if (!btrfs_fs_closing(root->fs_info)) {
102 block_group->inode = igrab(inode); 102 block_group->inode = igrab(inode);
103 block_group->iref = 1; 103 block_group->iref = 1;
104 } 104 }
@@ -250,7 +250,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
250 pgoff_t index = 0; 250 pgoff_t index = 0;
251 unsigned long first_page_offset; 251 unsigned long first_page_offset;
252 int num_checksums; 252 int num_checksums;
253 int ret = 0, ret2; 253 int ret = 0;
254 254
255 INIT_LIST_HEAD(&bitmaps); 255 INIT_LIST_HEAD(&bitmaps);
256 256
@@ -402,7 +402,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
402 spin_lock(&ctl->tree_lock); 402 spin_lock(&ctl->tree_lock);
403 ret = link_free_space(ctl, e); 403 ret = link_free_space(ctl, e);
404 spin_unlock(&ctl->tree_lock); 404 spin_unlock(&ctl->tree_lock);
405 BUG_ON(ret); 405 if (ret) {
406 printk(KERN_ERR "Duplicate entries in "
407 "free space cache, dumping\n");
408 kunmap(page);
409 unlock_page(page);
410 page_cache_release(page);
411 goto free_cache;
412 }
406 } else { 413 } else {
407 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 414 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
408 if (!e->bitmap) { 415 if (!e->bitmap) {
@@ -414,10 +421,18 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
414 goto free_cache; 421 goto free_cache;
415 } 422 }
416 spin_lock(&ctl->tree_lock); 423 spin_lock(&ctl->tree_lock);
417 ret2 = link_free_space(ctl, e); 424 ret = link_free_space(ctl, e);
418 ctl->total_bitmaps++; 425 ctl->total_bitmaps++;
419 ctl->op->recalc_thresholds(ctl); 426 ctl->op->recalc_thresholds(ctl);
420 spin_unlock(&ctl->tree_lock); 427 spin_unlock(&ctl->tree_lock);
428 if (ret) {
429 printk(KERN_ERR "Duplicate entries in "
430 "free space cache, dumping\n");
431 kunmap(page);
432 unlock_page(page);
433 page_cache_release(page);
434 goto free_cache;
435 }
421 list_add_tail(&e->list, &bitmaps); 436 list_add_tail(&e->list, &bitmaps);
422 } 437 }
423 438
@@ -478,8 +493,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
478 * If we're unmounting then just return, since this does a search on the 493 * If we're unmounting then just return, since this does a search on the
479 * normal root and not the commit root and we could deadlock. 494 * normal root and not the commit root and we could deadlock.
480 */ 495 */
481 smp_mb(); 496 if (btrfs_fs_closing(fs_info))
482 if (fs_info->closing)
483 return 0; 497 return 0;
484 498
485 /* 499 /*
@@ -575,10 +589,25 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
575 589
576 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 590 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
577 PAGE_CACHE_SHIFT; 591 PAGE_CACHE_SHIFT;
592
593 /* Since the first page has all of our checksums and our generation we
594 * need to calculate the offset into the page that we can start writing
595 * our entries.
596 */
597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
598
578 filemap_write_and_wait(inode->i_mapping); 599 filemap_write_and_wait(inode->i_mapping);
579 btrfs_wait_ordered_range(inode, inode->i_size & 600 btrfs_wait_ordered_range(inode, inode->i_size &
580 ~(root->sectorsize - 1), (u64)-1); 601 ~(root->sectorsize - 1), (u64)-1);
581 602
603 /* make sure we don't overflow that first page */
604 if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
605 /* this is really the same as running out of space, where we also return 0 */
606 printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
607 ret = 0;
608 goto out_update;
609 }
610
582 /* We need a checksum per page. */ 611 /* We need a checksum per page. */
583 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); 612 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
584 if (!crc) 613 if (!crc)
@@ -590,12 +619,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
590 return -1; 619 return -1;
591 } 620 }
592 621
593 /* Since the first page has all of our checksums and our generation we
594 * need to calculate the offset into the page that we can start writing
595 * our entries.
596 */
597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
598
599 /* Get the cluster for this block_group if it exists */ 622 /* Get the cluster for this block_group if it exists */
600 if (block_group && !list_empty(&block_group->cluster_list)) 623 if (block_group && !list_empty(&block_group->cluster_list))
601 cluster = list_entry(block_group->cluster_list.next, 624 cluster = list_entry(block_group->cluster_list.next,
@@ -857,12 +880,14 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
857 ret = 1; 880 ret = 1;
858 881
859out_free: 882out_free:
883 kfree(checksums);
884 kfree(pages);
885
886out_update:
860 if (ret != 1) { 887 if (ret != 1) {
861 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 888 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
862 BTRFS_I(inode)->generation = 0; 889 BTRFS_I(inode)->generation = 0;
863 } 890 }
864 kfree(checksums);
865 kfree(pages);
866 btrfs_update_inode(trans, root, inode); 891 btrfs_update_inode(trans, root, inode);
867 return ret; 892 return ret;
868} 893}
@@ -963,10 +988,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
963 * logically. 988 * logically.
964 */ 989 */
965 if (bitmap) { 990 if (bitmap) {
966 WARN_ON(info->bitmap); 991 if (info->bitmap) {
992 WARN_ON_ONCE(1);
993 return -EEXIST;
994 }
967 p = &(*p)->rb_right; 995 p = &(*p)->rb_right;
968 } else { 996 } else {
969 WARN_ON(!info->bitmap); 997 if (!info->bitmap) {
998 WARN_ON_ONCE(1);
999 return -EEXIST;
1000 }
970 p = &(*p)->rb_left; 1001 p = &(*p)->rb_left;
971 } 1002 }
972 } 1003 }
@@ -1386,6 +1417,23 @@ again:
1386 return 0; 1417 return 0;
1387} 1418}
1388 1419
1420static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1421 struct btrfs_free_space *info, u64 offset,
1422 u64 bytes)
1423{
1424 u64 bytes_to_set = 0;
1425 u64 end;
1426
1427 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1428
1429 bytes_to_set = min(end - offset, bytes);
1430
1431 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1432
1433 return bytes_to_set;
1434
1435}
1436
1389static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1437static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1390 struct btrfs_free_space *info) 1438 struct btrfs_free_space *info)
1391{ 1439{
@@ -1422,12 +1470,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1422 return true; 1470 return true;
1423} 1471}
1424 1472
1473static struct btrfs_free_space_op free_space_op = {
1474 .recalc_thresholds = recalculate_thresholds,
1475 .use_bitmap = use_bitmap,
1476};
1477
1425static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1478static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1426 struct btrfs_free_space *info) 1479 struct btrfs_free_space *info)
1427{ 1480{
1428 struct btrfs_free_space *bitmap_info; 1481 struct btrfs_free_space *bitmap_info;
1482 struct btrfs_block_group_cache *block_group = NULL;
1429 int added = 0; 1483 int added = 0;
1430 u64 bytes, offset, end; 1484 u64 bytes, offset, bytes_added;
1431 int ret; 1485 int ret;
1432 1486
1433 bytes = info->bytes; 1487 bytes = info->bytes;
@@ -1436,7 +1490,49 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1436 if (!ctl->op->use_bitmap(ctl, info)) 1490 if (!ctl->op->use_bitmap(ctl, info))
1437 return 0; 1491 return 0;
1438 1492
1493 if (ctl->op == &free_space_op)
1494 block_group = ctl->private;
1439again: 1495again:
1496 /*
1497 * Since we link bitmaps right into the cluster we need to see if we
1498 * have a cluster here, and if so and it has our bitmap we need to add
1499 * the free space to that bitmap.
1500 */
1501 if (block_group && !list_empty(&block_group->cluster_list)) {
1502 struct btrfs_free_cluster *cluster;
1503 struct rb_node *node;
1504 struct btrfs_free_space *entry;
1505
1506 cluster = list_entry(block_group->cluster_list.next,
1507 struct btrfs_free_cluster,
1508 block_group_list);
1509 spin_lock(&cluster->lock);
1510 node = rb_first(&cluster->root);
1511 if (!node) {
1512 spin_unlock(&cluster->lock);
1513 goto no_cluster_bitmap;
1514 }
1515
1516 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1517 if (!entry->bitmap) {
1518 spin_unlock(&cluster->lock);
1519 goto no_cluster_bitmap;
1520 }
1521
1522 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1523 bytes_added = add_bytes_to_bitmap(ctl, entry,
1524 offset, bytes);
1525 bytes -= bytes_added;
1526 offset += bytes_added;
1527 }
1528 spin_unlock(&cluster->lock);
1529 if (!bytes) {
1530 ret = 1;
1531 goto out;
1532 }
1533 }
1534
1535no_cluster_bitmap:
1440 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1536 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1441 1, 0); 1537 1, 0);
1442 if (!bitmap_info) { 1538 if (!bitmap_info) {
@@ -1444,19 +1540,10 @@ again:
1444 goto new_bitmap; 1540 goto new_bitmap;
1445 } 1541 }
1446 1542
1447 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1543 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1448 1544 bytes -= bytes_added;
1449 if (offset >= bitmap_info->offset && offset + bytes > end) { 1545 offset += bytes_added;
1450 bitmap_set_bits(ctl, bitmap_info, offset, end - offset); 1546 added = 0;
1451 bytes -= end - offset;
1452 offset = end;
1453 added = 0;
1454 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1455 bitmap_set_bits(ctl, bitmap_info, offset, bytes);
1456 bytes = 0;
1457 } else {
1458 BUG();
1459 }
1460 1547
1461 if (!bytes) { 1548 if (!bytes) {
1462 ret = 1; 1549 ret = 1;
@@ -1735,11 +1822,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1735 "\n", count); 1822 "\n", count);
1736} 1823}
1737 1824
1738static struct btrfs_free_space_op free_space_op = {
1739 .recalc_thresholds = recalculate_thresholds,
1740 .use_bitmap = use_bitmap,
1741};
1742
1743void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1825void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1744{ 1826{
1745 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1827 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -2111,9 +2193,11 @@ again:
2111/* 2193/*
2112 * This searches the block group for just extents to fill the cluster with. 2194 * This searches the block group for just extents to fill the cluster with.
2113 */ 2195 */
2114static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2196static noinline int
2115 struct btrfs_free_cluster *cluster, 2197setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2116 u64 offset, u64 bytes, u64 min_bytes) 2198 struct btrfs_free_cluster *cluster,
2199 struct list_head *bitmaps, u64 offset, u64 bytes,
2200 u64 min_bytes)
2117{ 2201{
2118 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2202 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2119 struct btrfs_free_space *first = NULL; 2203 struct btrfs_free_space *first = NULL;
@@ -2135,6 +2219,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2135 * extent entry. 2219 * extent entry.
2136 */ 2220 */
2137 while (entry->bitmap) { 2221 while (entry->bitmap) {
2222 if (list_empty(&entry->list))
2223 list_add_tail(&entry->list, bitmaps);
2138 node = rb_next(&entry->offset_index); 2224 node = rb_next(&entry->offset_index);
2139 if (!node) 2225 if (!node)
2140 return -ENOSPC; 2226 return -ENOSPC;
@@ -2154,8 +2240,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2154 return -ENOSPC; 2240 return -ENOSPC;
2155 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2241 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2156 2242
2157 if (entry->bitmap) 2243 if (entry->bitmap) {
2244 if (list_empty(&entry->list))
2245 list_add_tail(&entry->list, bitmaps);
2158 continue; 2246 continue;
2247 }
2248
2159 /* 2249 /*
2160 * we haven't filled the empty size and the window is 2250 * we haven't filled the empty size and the window is
2161 * very large. reset and try again 2251 * very large. reset and try again
@@ -2207,9 +2297,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2207 * This specifically looks for bitmaps that may work in the cluster, we assume 2297 * This specifically looks for bitmaps that may work in the cluster, we assume
2208 * that we have already failed to find extents that will work. 2298 * that we have already failed to find extents that will work.
2209 */ 2299 */
2210static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2300static noinline int
2211 struct btrfs_free_cluster *cluster, 2301setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2212 u64 offset, u64 bytes, u64 min_bytes) 2302 struct btrfs_free_cluster *cluster,
2303 struct list_head *bitmaps, u64 offset, u64 bytes,
2304 u64 min_bytes)
2213{ 2305{
2214 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2306 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2215 struct btrfs_free_space *entry; 2307 struct btrfs_free_space *entry;
@@ -2219,10 +2311,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2219 if (ctl->total_bitmaps == 0) 2311 if (ctl->total_bitmaps == 0)
2220 return -ENOSPC; 2312 return -ENOSPC;
2221 2313
2314 /*
2315 * First check our cached list of bitmaps and see if there is an entry
2316 * here that will work.
2317 */
2318 list_for_each_entry(entry, bitmaps, list) {
2319 if (entry->bytes < min_bytes)
2320 continue;
2321 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2322 bytes, min_bytes);
2323 if (!ret)
2324 return 0;
2325 }
2326
2327 /*
2328 * If we do have entries on our list and we are here then we didn't find
2329 * anything, so go ahead and get the next entry after the last entry in
2330 * this list and start the search from there.
2331 */
2332 if (!list_empty(bitmaps)) {
2333 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2334 list);
2335 node = rb_next(&entry->offset_index);
2336 if (!node)
2337 return -ENOSPC;
2338 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2339 goto search;
2340 }
2341
2222 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); 2342 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2223 if (!entry) 2343 if (!entry)
2224 return -ENOSPC; 2344 return -ENOSPC;
2225 2345
2346search:
2226 node = &entry->offset_index; 2347 node = &entry->offset_index;
2227 do { 2348 do {
2228 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2349 entry = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -2253,6 +2374,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2253 u64 offset, u64 bytes, u64 empty_size) 2374 u64 offset, u64 bytes, u64 empty_size)
2254{ 2375{
2255 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2376 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2377 struct list_head bitmaps;
2378 struct btrfs_free_space *entry, *tmp;
2256 u64 min_bytes; 2379 u64 min_bytes;
2257 int ret; 2380 int ret;
2258 2381
@@ -2291,11 +2414,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2291 goto out; 2414 goto out;
2292 } 2415 }
2293 2416
2294 ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, 2417 INIT_LIST_HEAD(&bitmaps);
2295 min_bytes); 2418 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2419 bytes, min_bytes);
2296 if (ret) 2420 if (ret)
2297 ret = setup_cluster_bitmap(block_group, cluster, offset, 2421 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2298 bytes, min_bytes); 2422 offset, bytes, min_bytes);
2423
2424 /* Clear our temporary list */
2425 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2426 list_del_init(&entry->list);
2299 2427
2300 if (!ret) { 2428 if (!ret) {
2301 atomic_inc(&block_group->count); 2429 atomic_inc(&block_group->count);
@@ -2481,7 +2609,7 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2481 return inode; 2609 return inode;
2482 2610
2483 spin_lock(&root->cache_lock); 2611 spin_lock(&root->cache_lock);
2484 if (!root->fs_info->closing) 2612 if (!btrfs_fs_closing(root->fs_info))
2485 root->cache_inode = igrab(inode); 2613 root->cache_inode = igrab(inode);
2486 spin_unlock(&root->cache_lock); 2614 spin_unlock(&root->cache_lock);
2487 2615
@@ -2504,12 +2632,14 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2504 int ret = 0; 2632 int ret = 0;
2505 u64 root_gen = btrfs_root_generation(&root->root_item); 2633 u64 root_gen = btrfs_root_generation(&root->root_item);
2506 2634
2635 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2636 return 0;
2637
2507 /* 2638 /*
2508 * If we're unmounting then just return, since this does a search on the 2639 * If we're unmounting then just return, since this does a search on the
2509 * normal root and not the commit root and we could deadlock. 2640 * normal root and not the commit root and we could deadlock.
2510 */ 2641 */
2511 smp_mb(); 2642 if (btrfs_fs_closing(fs_info))
2512 if (fs_info->closing)
2513 return 0; 2643 return 0;
2514 2644
2515 path = btrfs_alloc_path(); 2645 path = btrfs_alloc_path();
@@ -2543,6 +2673,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
2543 struct inode *inode; 2673 struct inode *inode;
2544 int ret; 2674 int ret;
2545 2675
2676 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2677 return 0;
2678
2546 inode = lookup_free_ino_inode(root, path); 2679 inode = lookup_free_ino_inode(root, path);
2547 if (IS_ERR(inode)) 2680 if (IS_ERR(inode))
2548 return 0; 2681 return 0;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 3262cd17a12f..b4087e0fa871 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -38,6 +38,9 @@ static int caching_kthread(void *data)
38 int slot; 38 int slot;
39 int ret; 39 int ret;
40 40
41 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
42 return 0;
43
41 path = btrfs_alloc_path(); 44 path = btrfs_alloc_path();
42 if (!path) 45 if (!path)
43 return -ENOMEM; 46 return -ENOMEM;
@@ -59,8 +62,7 @@ again:
59 goto out; 62 goto out;
60 63
61 while (1) { 64 while (1) {
62 smp_mb(); 65 if (btrfs_fs_closing(fs_info))
63 if (fs_info->closing)
64 goto out; 66 goto out;
65 67
66 leaf = path->nodes[0]; 68 leaf = path->nodes[0];
@@ -141,6 +143,9 @@ static void start_caching(struct btrfs_root *root)
141 int ret; 143 int ret;
142 u64 objectid; 144 u64 objectid;
143 145
146 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
147 return;
148
144 spin_lock(&root->cache_lock); 149 spin_lock(&root->cache_lock);
145 if (root->cached != BTRFS_CACHE_NO) { 150 if (root->cached != BTRFS_CACHE_NO) {
146 spin_unlock(&root->cache_lock); 151 spin_unlock(&root->cache_lock);
@@ -178,6 +183,9 @@ static void start_caching(struct btrfs_root *root)
178 183
179int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) 184int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
180{ 185{
186 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
187 return btrfs_find_free_objectid(root, objectid);
188
181again: 189again:
182 *objectid = btrfs_find_ino_for_alloc(root); 190 *objectid = btrfs_find_ino_for_alloc(root);
183 191
@@ -201,6 +209,10 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
201{ 209{
202 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 210 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
203 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; 211 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
212
213 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
214 return;
215
204again: 216again:
205 if (root->cached == BTRFS_CACHE_FINISHED) { 217 if (root->cached == BTRFS_CACHE_FINISHED) {
206 __btrfs_add_free_space(ctl, objectid, 1); 218 __btrfs_add_free_space(ctl, objectid, 1);
@@ -250,6 +262,9 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
250 struct rb_node *n; 262 struct rb_node *n;
251 u64 count; 263 u64 count;
252 264
265 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
266 return;
267
253 while (1) { 268 while (1) {
254 n = rb_first(rbroot); 269 n = rb_first(rbroot);
255 if (!n) 270 if (!n)
@@ -388,9 +403,24 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
388 int prealloc; 403 int prealloc;
389 bool retry = false; 404 bool retry = false;
390 405
406 /* only fs tree and subvol/snap needs ino cache */
407 if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
408 (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
409 root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
410 return 0;
411
412 /* Don't save inode cache if we are deleting this root */
413 if (btrfs_root_refs(&root->root_item) == 0 &&
414 root != root->fs_info->tree_root)
415 return 0;
416
417 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
418 return 0;
419
391 path = btrfs_alloc_path(); 420 path = btrfs_alloc_path();
392 if (!path) 421 if (!path)
393 return -ENOMEM; 422 return -ENOMEM;
423
394again: 424again:
395 inode = lookup_free_ino_inode(root, path); 425 inode = lookup_free_ino_inode(root, path);
396 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 426 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 39a9d5750efd..751ddf8fc58a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -138,7 +138,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
138 return -ENOMEM; 138 return -ENOMEM;
139 139
140 path->leave_spinning = 1; 140 path->leave_spinning = 1;
141 btrfs_set_trans_block_group(trans, inode);
142 141
143 key.objectid = btrfs_ino(inode); 142 key.objectid = btrfs_ino(inode);
144 key.offset = start; 143 key.offset = start;
@@ -426,9 +425,8 @@ again:
426 } 425 }
427 } 426 }
428 if (start == 0) { 427 if (start == 0) {
429 trans = btrfs_join_transaction(root, 1); 428 trans = btrfs_join_transaction(root);
430 BUG_ON(IS_ERR(trans)); 429 BUG_ON(IS_ERR(trans));
431 btrfs_set_trans_block_group(trans, inode);
432 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 430 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
433 431
434 /* lets try to make an inline extent */ 432 /* lets try to make an inline extent */
@@ -623,8 +621,9 @@ retry:
623 async_extent->start + async_extent->ram_size - 1, 621 async_extent->start + async_extent->ram_size - 1,
624 GFP_NOFS); 622 GFP_NOFS);
625 623
626 trans = btrfs_join_transaction(root, 1); 624 trans = btrfs_join_transaction(root);
627 BUG_ON(IS_ERR(trans)); 625 BUG_ON(IS_ERR(trans));
626 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
628 ret = btrfs_reserve_extent(trans, root, 627 ret = btrfs_reserve_extent(trans, root,
629 async_extent->compressed_size, 628 async_extent->compressed_size,
630 async_extent->compressed_size, 629 async_extent->compressed_size,
@@ -793,9 +792,8 @@ static noinline int cow_file_range(struct inode *inode,
793 int ret = 0; 792 int ret = 0;
794 793
795 BUG_ON(is_free_space_inode(root, inode)); 794 BUG_ON(is_free_space_inode(root, inode));
796 trans = btrfs_join_transaction(root, 1); 795 trans = btrfs_join_transaction(root);
797 BUG_ON(IS_ERR(trans)); 796 BUG_ON(IS_ERR(trans));
798 btrfs_set_trans_block_group(trans, inode);
799 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 797 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
800 798
801 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 799 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -1077,10 +1075,12 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1077 nolock = is_free_space_inode(root, inode); 1075 nolock = is_free_space_inode(root, inode);
1078 1076
1079 if (nolock) 1077 if (nolock)
1080 trans = btrfs_join_transaction_nolock(root, 1); 1078 trans = btrfs_join_transaction_nolock(root);
1081 else 1079 else
1082 trans = btrfs_join_transaction(root, 1); 1080 trans = btrfs_join_transaction(root);
1081
1083 BUG_ON(IS_ERR(trans)); 1082 BUG_ON(IS_ERR(trans));
1083 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1084 1084
1085 cow_start = (u64)-1; 1085 cow_start = (u64)-1;
1086 cur_offset = start; 1086 cur_offset = start;
@@ -1519,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1519{ 1519{
1520 struct btrfs_ordered_sum *sum; 1520 struct btrfs_ordered_sum *sum;
1521 1521
1522 btrfs_set_trans_block_group(trans, inode);
1523
1524 list_for_each_entry(sum, list, list) { 1522 list_for_each_entry(sum, list, list) {
1525 btrfs_csum_file_blocks(trans, 1523 btrfs_csum_file_blocks(trans,
1526 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1524 BTRFS_I(inode)->root->fs_info->csum_root, sum);
@@ -1735,11 +1733,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1735 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1733 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1736 if (!ret) { 1734 if (!ret) {
1737 if (nolock) 1735 if (nolock)
1738 trans = btrfs_join_transaction_nolock(root, 1); 1736 trans = btrfs_join_transaction_nolock(root);
1739 else 1737 else
1740 trans = btrfs_join_transaction(root, 1); 1738 trans = btrfs_join_transaction(root);
1741 BUG_ON(IS_ERR(trans)); 1739 BUG_ON(IS_ERR(trans));
1742 btrfs_set_trans_block_group(trans, inode);
1743 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1740 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1744 ret = btrfs_update_inode(trans, root, inode); 1741 ret = btrfs_update_inode(trans, root, inode);
1745 BUG_ON(ret); 1742 BUG_ON(ret);
@@ -1752,11 +1749,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1752 0, &cached_state, GFP_NOFS); 1749 0, &cached_state, GFP_NOFS);
1753 1750
1754 if (nolock) 1751 if (nolock)
1755 trans = btrfs_join_transaction_nolock(root, 1); 1752 trans = btrfs_join_transaction_nolock(root);
1756 else 1753 else
1757 trans = btrfs_join_transaction(root, 1); 1754 trans = btrfs_join_transaction(root);
1758 BUG_ON(IS_ERR(trans)); 1755 BUG_ON(IS_ERR(trans));
1759 btrfs_set_trans_block_group(trans, inode);
1760 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1756 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1761 1757
1762 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1758 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -1990,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1990 } 1986 }
1991 1987
1992 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) 1988 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
1993 return 0; 1989 goto good;
1994 1990
1995 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && 1991 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
1996 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { 1992 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
@@ -2431,7 +2427,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2431 (u64)-1); 2427 (u64)-1);
2432 2428
2433 if (root->orphan_block_rsv || root->orphan_item_inserted) { 2429 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2434 trans = btrfs_join_transaction(root, 1); 2430 trans = btrfs_join_transaction(root);
2435 if (!IS_ERR(trans)) 2431 if (!IS_ERR(trans))
2436 btrfs_end_transaction(trans, root); 2432 btrfs_end_transaction(trans, root);
2437 } 2433 }
@@ -2511,12 +2507,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
2511 struct btrfs_root *root = BTRFS_I(inode)->root; 2507 struct btrfs_root *root = BTRFS_I(inode)->root;
2512 struct btrfs_key location; 2508 struct btrfs_key location;
2513 int maybe_acls; 2509 int maybe_acls;
2514 u64 alloc_group_block;
2515 u32 rdev; 2510 u32 rdev;
2516 int ret; 2511 int ret;
2517 2512
2518 path = btrfs_alloc_path(); 2513 path = btrfs_alloc_path();
2519 BUG_ON(!path); 2514 BUG_ON(!path);
2515 path->leave_spinning = 1;
2520 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 2516 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
2521 2517
2522 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 2518 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -2526,6 +2522,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
2526 leaf = path->nodes[0]; 2522 leaf = path->nodes[0];
2527 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2523 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2528 struct btrfs_inode_item); 2524 struct btrfs_inode_item);
2525 if (!leaf->map_token)
2526 map_private_extent_buffer(leaf, (unsigned long)inode_item,
2527 sizeof(struct btrfs_inode_item),
2528 &leaf->map_token, &leaf->kaddr,
2529 &leaf->map_start, &leaf->map_len,
2530 KM_USER1);
2529 2531
2530 inode->i_mode = btrfs_inode_mode(leaf, inode_item); 2532 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2531 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); 2533 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
@@ -2555,8 +2557,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
2555 BTRFS_I(inode)->index_cnt = (u64)-1; 2557 BTRFS_I(inode)->index_cnt = (u64)-1;
2556 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2558 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2557 2559
2558 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2559
2560 /* 2560 /*
2561 * try to precache a NULL acl entry for files that don't have 2561 * try to precache a NULL acl entry for files that don't have
2562 * any xattrs or acls 2562 * any xattrs or acls
@@ -2566,8 +2566,11 @@ static void btrfs_read_locked_inode(struct inode *inode)
2566 if (!maybe_acls) 2566 if (!maybe_acls)
2567 cache_no_acl(inode); 2567 cache_no_acl(inode);
2568 2568
2569 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2569 if (leaf->map_token) {
2570 alloc_group_block, 0); 2570 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2571 leaf->map_token = NULL;
2572 }
2573
2571 btrfs_free_path(path); 2574 btrfs_free_path(path);
2572 inode_item = NULL; 2575 inode_item = NULL;
2573 2576
@@ -2647,7 +2650,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2647 btrfs_set_inode_transid(leaf, item, trans->transid); 2650 btrfs_set_inode_transid(leaf, item, trans->transid);
2648 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2651 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2649 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2652 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2650 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2653 btrfs_set_inode_block_group(leaf, item, 0);
2651 2654
2652 if (leaf->map_token) { 2655 if (leaf->map_token) {
2653 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 2656 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
@@ -3004,8 +3007,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3004 if (IS_ERR(trans)) 3007 if (IS_ERR(trans))
3005 return PTR_ERR(trans); 3008 return PTR_ERR(trans);
3006 3009
3007 btrfs_set_trans_block_group(trans, dir);
3008
3009 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); 3010 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
3010 3011
3011 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 3012 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
@@ -3094,8 +3095,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3094 if (IS_ERR(trans)) 3095 if (IS_ERR(trans))
3095 return PTR_ERR(trans); 3096 return PTR_ERR(trans);
3096 3097
3097 btrfs_set_trans_block_group(trans, dir);
3098
3099 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 3098 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
3100 err = btrfs_unlink_subvol(trans, root, dir, 3099 err = btrfs_unlink_subvol(trans, root, dir,
3101 BTRFS_I(inode)->location.objectid, 3100 BTRFS_I(inode)->location.objectid,
@@ -3514,7 +3513,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3514 err = PTR_ERR(trans); 3513 err = PTR_ERR(trans);
3515 break; 3514 break;
3516 } 3515 }
3517 btrfs_set_trans_block_group(trans, inode);
3518 3516
3519 err = btrfs_drop_extents(trans, inode, cur_offset, 3517 err = btrfs_drop_extents(trans, inode, cur_offset,
3520 cur_offset + hole_size, 3518 cur_offset + hole_size,
@@ -3648,9 +3646,8 @@ void btrfs_evict_inode(struct inode *inode)
3648 btrfs_i_size_write(inode, 0); 3646 btrfs_i_size_write(inode, 0);
3649 3647
3650 while (1) { 3648 while (1) {
3651 trans = btrfs_start_transaction(root, 0); 3649 trans = btrfs_join_transaction(root);
3652 BUG_ON(IS_ERR(trans)); 3650 BUG_ON(IS_ERR(trans));
3653 btrfs_set_trans_block_group(trans, inode);
3654 trans->block_rsv = root->orphan_block_rsv; 3651 trans->block_rsv = root->orphan_block_rsv;
3655 3652
3656 ret = btrfs_block_rsv_check(trans, root, 3653 ret = btrfs_block_rsv_check(trans, root,
@@ -4133,7 +4130,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4133 path = btrfs_alloc_path(); 4130 path = btrfs_alloc_path();
4134 if (!path) 4131 if (!path)
4135 return -ENOMEM; 4132 return -ENOMEM;
4136 path->reada = 2; 4133
4134 path->reada = 1;
4137 4135
4138 if (key_type == BTRFS_DIR_INDEX_KEY) { 4136 if (key_type == BTRFS_DIR_INDEX_KEY) {
4139 INIT_LIST_HEAD(&ins_list); 4137 INIT_LIST_HEAD(&ins_list);
@@ -4268,18 +4266,16 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4268 if (BTRFS_I(inode)->dummy_inode) 4266 if (BTRFS_I(inode)->dummy_inode)
4269 return 0; 4267 return 0;
4270 4268
4271 smp_mb(); 4269 if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
4272 if (root->fs_info->closing && is_free_space_inode(root, inode))
4273 nolock = true; 4270 nolock = true;
4274 4271
4275 if (wbc->sync_mode == WB_SYNC_ALL) { 4272 if (wbc->sync_mode == WB_SYNC_ALL) {
4276 if (nolock) 4273 if (nolock)
4277 trans = btrfs_join_transaction_nolock(root, 1); 4274 trans = btrfs_join_transaction_nolock(root);
4278 else 4275 else
4279 trans = btrfs_join_transaction(root, 1); 4276 trans = btrfs_join_transaction(root);
4280 if (IS_ERR(trans)) 4277 if (IS_ERR(trans))
4281 return PTR_ERR(trans); 4278 return PTR_ERR(trans);
4282 btrfs_set_trans_block_group(trans, inode);
4283 if (nolock) 4279 if (nolock)
4284 ret = btrfs_end_transaction_nolock(trans, root); 4280 ret = btrfs_end_transaction_nolock(trans, root);
4285 else 4281 else
@@ -4303,9 +4299,8 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
4303 if (BTRFS_I(inode)->dummy_inode) 4299 if (BTRFS_I(inode)->dummy_inode)
4304 return; 4300 return;
4305 4301
4306 trans = btrfs_join_transaction(root, 1); 4302 trans = btrfs_join_transaction(root);
4307 BUG_ON(IS_ERR(trans)); 4303 BUG_ON(IS_ERR(trans));
4308 btrfs_set_trans_block_group(trans, inode);
4309 4304
4310 ret = btrfs_update_inode(trans, root, inode); 4305 ret = btrfs_update_inode(trans, root, inode);
4311 if (ret && ret == -ENOSPC) { 4306 if (ret && ret == -ENOSPC) {
@@ -4319,7 +4314,6 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
4319 PTR_ERR(trans)); 4314 PTR_ERR(trans));
4320 return; 4315 return;
4321 } 4316 }
4322 btrfs_set_trans_block_group(trans, inode);
4323 4317
4324 ret = btrfs_update_inode(trans, root, inode); 4318 ret = btrfs_update_inode(trans, root, inode);
4325 if (ret) { 4319 if (ret) {
@@ -4418,8 +4412,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4418 struct btrfs_root *root, 4412 struct btrfs_root *root,
4419 struct inode *dir, 4413 struct inode *dir,
4420 const char *name, int name_len, 4414 const char *name, int name_len,
4421 u64 ref_objectid, u64 objectid, 4415 u64 ref_objectid, u64 objectid, int mode,
4422 u64 alloc_hint, int mode, u64 *index) 4416 u64 *index)
4423{ 4417{
4424 struct inode *inode; 4418 struct inode *inode;
4425 struct btrfs_inode_item *inode_item; 4419 struct btrfs_inode_item *inode_item;
@@ -4472,8 +4466,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4472 owner = 0; 4466 owner = 0;
4473 else 4467 else
4474 owner = 1; 4468 owner = 1;
4475 BTRFS_I(inode)->block_group =
4476 btrfs_find_block_group(root, 0, alloc_hint, owner);
4477 4469
4478 key[0].objectid = objectid; 4470 key[0].objectid = objectid;
4479 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 4471 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -4629,15 +4621,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4629 if (IS_ERR(trans)) 4621 if (IS_ERR(trans))
4630 return PTR_ERR(trans); 4622 return PTR_ERR(trans);
4631 4623
4632 btrfs_set_trans_block_group(trans, dir);
4633
4634 err = btrfs_find_free_ino(root, &objectid); 4624 err = btrfs_find_free_ino(root, &objectid);
4635 if (err) 4625 if (err)
4636 goto out_unlock; 4626 goto out_unlock;
4637 4627
4638 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4628 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4639 dentry->d_name.len, btrfs_ino(dir), objectid, 4629 dentry->d_name.len, btrfs_ino(dir), objectid,
4640 BTRFS_I(dir)->block_group, mode, &index); 4630 mode, &index);
4641 if (IS_ERR(inode)) { 4631 if (IS_ERR(inode)) {
4642 err = PTR_ERR(inode); 4632 err = PTR_ERR(inode);
4643 goto out_unlock; 4633 goto out_unlock;
@@ -4649,7 +4639,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4649 goto out_unlock; 4639 goto out_unlock;
4650 } 4640 }
4651 4641
4652 btrfs_set_trans_block_group(trans, inode);
4653 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4642 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4654 if (err) 4643 if (err)
4655 drop_inode = 1; 4644 drop_inode = 1;
@@ -4658,8 +4647,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4658 init_special_inode(inode, inode->i_mode, rdev); 4647 init_special_inode(inode, inode->i_mode, rdev);
4659 btrfs_update_inode(trans, root, inode); 4648 btrfs_update_inode(trans, root, inode);
4660 } 4649 }
4661 btrfs_update_inode_block_group(trans, inode);
4662 btrfs_update_inode_block_group(trans, dir);
4663out_unlock: 4650out_unlock:
4664 nr = trans->blocks_used; 4651 nr = trans->blocks_used;
4665 btrfs_end_transaction_throttle(trans, root); 4652 btrfs_end_transaction_throttle(trans, root);
@@ -4692,15 +4679,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4692 if (IS_ERR(trans)) 4679 if (IS_ERR(trans))
4693 return PTR_ERR(trans); 4680 return PTR_ERR(trans);
4694 4681
4695 btrfs_set_trans_block_group(trans, dir);
4696
4697 err = btrfs_find_free_ino(root, &objectid); 4682 err = btrfs_find_free_ino(root, &objectid);
4698 if (err) 4683 if (err)
4699 goto out_unlock; 4684 goto out_unlock;
4700 4685
4701 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4686 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4702 dentry->d_name.len, btrfs_ino(dir), objectid, 4687 dentry->d_name.len, btrfs_ino(dir), objectid,
4703 BTRFS_I(dir)->block_group, mode, &index); 4688 mode, &index);
4704 if (IS_ERR(inode)) { 4689 if (IS_ERR(inode)) {
4705 err = PTR_ERR(inode); 4690 err = PTR_ERR(inode);
4706 goto out_unlock; 4691 goto out_unlock;
@@ -4712,7 +4697,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4712 goto out_unlock; 4697 goto out_unlock;
4713 } 4698 }
4714 4699
4715 btrfs_set_trans_block_group(trans, inode);
4716 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4700 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4717 if (err) 4701 if (err)
4718 drop_inode = 1; 4702 drop_inode = 1;
@@ -4723,8 +4707,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4723 inode->i_op = &btrfs_file_inode_operations; 4707 inode->i_op = &btrfs_file_inode_operations;
4724 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 4708 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
4725 } 4709 }
4726 btrfs_update_inode_block_group(trans, inode);
4727 btrfs_update_inode_block_group(trans, dir);
4728out_unlock: 4710out_unlock:
4729 nr = trans->blocks_used; 4711 nr = trans->blocks_used;
4730 btrfs_end_transaction_throttle(trans, root); 4712 btrfs_end_transaction_throttle(trans, root);
@@ -4771,8 +4753,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4771 4753
4772 btrfs_inc_nlink(inode); 4754 btrfs_inc_nlink(inode);
4773 inode->i_ctime = CURRENT_TIME; 4755 inode->i_ctime = CURRENT_TIME;
4774
4775 btrfs_set_trans_block_group(trans, dir);
4776 ihold(inode); 4756 ihold(inode);
4777 4757
4778 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); 4758 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
@@ -4781,7 +4761,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4781 drop_inode = 1; 4761 drop_inode = 1;
4782 } else { 4762 } else {
4783 struct dentry *parent = dget_parent(dentry); 4763 struct dentry *parent = dget_parent(dentry);
4784 btrfs_update_inode_block_group(trans, dir);
4785 err = btrfs_update_inode(trans, root, inode); 4764 err = btrfs_update_inode(trans, root, inode);
4786 BUG_ON(err); 4765 BUG_ON(err);
4787 btrfs_log_new_name(trans, inode, NULL, parent); 4766 btrfs_log_new_name(trans, inode, NULL, parent);
@@ -4818,7 +4797,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4818 trans = btrfs_start_transaction(root, 5); 4797 trans = btrfs_start_transaction(root, 5);
4819 if (IS_ERR(trans)) 4798 if (IS_ERR(trans))
4820 return PTR_ERR(trans); 4799 return PTR_ERR(trans);
4821 btrfs_set_trans_block_group(trans, dir);
4822 4800
4823 err = btrfs_find_free_ino(root, &objectid); 4801 err = btrfs_find_free_ino(root, &objectid);
4824 if (err) 4802 if (err)
@@ -4826,8 +4804,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4826 4804
4827 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4805 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4828 dentry->d_name.len, btrfs_ino(dir), objectid, 4806 dentry->d_name.len, btrfs_ino(dir), objectid,
4829 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4807 S_IFDIR | mode, &index);
4830 &index);
4831 if (IS_ERR(inode)) { 4808 if (IS_ERR(inode)) {
4832 err = PTR_ERR(inode); 4809 err = PTR_ERR(inode);
4833 goto out_fail; 4810 goto out_fail;
@@ -4841,7 +4818,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4841 4818
4842 inode->i_op = &btrfs_dir_inode_operations; 4819 inode->i_op = &btrfs_dir_inode_operations;
4843 inode->i_fop = &btrfs_dir_file_operations; 4820 inode->i_fop = &btrfs_dir_file_operations;
4844 btrfs_set_trans_block_group(trans, inode);
4845 4821
4846 btrfs_i_size_write(inode, 0); 4822 btrfs_i_size_write(inode, 0);
4847 err = btrfs_update_inode(trans, root, inode); 4823 err = btrfs_update_inode(trans, root, inode);
@@ -4855,8 +4831,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4855 4831
4856 d_instantiate(dentry, inode); 4832 d_instantiate(dentry, inode);
4857 drop_on_err = 0; 4833 drop_on_err = 0;
4858 btrfs_update_inode_block_group(trans, inode);
4859 btrfs_update_inode_block_group(trans, dir);
4860 4834
4861out_fail: 4835out_fail:
4862 nr = trans->blocks_used; 4836 nr = trans->blocks_used;
@@ -4989,7 +4963,15 @@ again:
4989 4963
4990 if (!path) { 4964 if (!path) {
4991 path = btrfs_alloc_path(); 4965 path = btrfs_alloc_path();
4992 BUG_ON(!path); 4966 if (!path) {
4967 err = -ENOMEM;
4968 goto out;
4969 }
4970 /*
4971 * Chances are we'll be called again, so go ahead and do
4972 * readahead
4973 */
4974 path->reada = 1;
4993 } 4975 }
4994 4976
4995 ret = btrfs_lookup_file_extent(trans, root, path, 4977 ret = btrfs_lookup_file_extent(trans, root, path,
@@ -5130,8 +5112,10 @@ again:
5130 kunmap(page); 5112 kunmap(page);
5131 free_extent_map(em); 5113 free_extent_map(em);
5132 em = NULL; 5114 em = NULL;
5115
5133 btrfs_release_path(path); 5116 btrfs_release_path(path);
5134 trans = btrfs_join_transaction(root, 1); 5117 trans = btrfs_join_transaction(root);
5118
5135 if (IS_ERR(trans)) 5119 if (IS_ERR(trans))
5136 return ERR_CAST(trans); 5120 return ERR_CAST(trans);
5137 goto again; 5121 goto again;
@@ -5375,7 +5359,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5375 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5359 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5376 } 5360 }
5377 5361
5378 trans = btrfs_join_transaction(root, 0); 5362 trans = btrfs_join_transaction(root);
5379 if (IS_ERR(trans)) 5363 if (IS_ERR(trans))
5380 return ERR_CAST(trans); 5364 return ERR_CAST(trans);
5381 5365
@@ -5611,7 +5595,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5611 * to make sure the current transaction stays open 5595 * to make sure the current transaction stays open
5612 * while we look for nocow cross refs 5596 * while we look for nocow cross refs
5613 */ 5597 */
5614 trans = btrfs_join_transaction(root, 0); 5598 trans = btrfs_join_transaction(root);
5615 if (IS_ERR(trans)) 5599 if (IS_ERR(trans))
5616 goto must_cow; 5600 goto must_cow;
5617 5601
@@ -5750,7 +5734,7 @@ again:
5750 5734
5751 BUG_ON(!ordered); 5735 BUG_ON(!ordered);
5752 5736
5753 trans = btrfs_join_transaction(root, 1); 5737 trans = btrfs_join_transaction(root);
5754 if (IS_ERR(trans)) { 5738 if (IS_ERR(trans)) {
5755 err = -ENOMEM; 5739 err = -ENOMEM;
5756 goto out; 5740 goto out;
@@ -6500,6 +6484,7 @@ out:
6500static int btrfs_truncate(struct inode *inode) 6484static int btrfs_truncate(struct inode *inode)
6501{ 6485{
6502 struct btrfs_root *root = BTRFS_I(inode)->root; 6486 struct btrfs_root *root = BTRFS_I(inode)->root;
6487 struct btrfs_block_rsv *rsv;
6503 int ret; 6488 int ret;
6504 int err = 0; 6489 int err = 0;
6505 struct btrfs_trans_handle *trans; 6490 struct btrfs_trans_handle *trans;
@@ -6513,28 +6498,80 @@ static int btrfs_truncate(struct inode *inode)
6513 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6498 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
6514 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6499 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
6515 6500
6516 trans = btrfs_start_transaction(root, 5); 6501 /*
6517 if (IS_ERR(trans)) 6502 * Yes ladies and gentelment, this is indeed ugly. The fact is we have
6518 return PTR_ERR(trans); 6503 * 3 things going on here
6504 *
6505 * 1) We need to reserve space for our orphan item and the space to
6506 * delete our orphan item. Lord knows we don't want to have a dangling
6507 * orphan item because we didn't reserve space to remove it.
6508 *
6509 * 2) We need to reserve space to update our inode.
6510 *
6511 * 3) We need to have something to cache all the space that is going to
6512 * be free'd up by the truncate operation, but also have some slack
6513 * space reserved in case it uses space during the truncate (thank you
6514 * very much snapshotting).
6515 *
6516 * And we need these to all be seperate. The fact is we can use alot of
6517 * space doing the truncate, and we have no earthly idea how much space
6518 * we will use, so we need the truncate reservation to be seperate so it
6519 * doesn't end up using space reserved for updating the inode or
6520 * removing the orphan item. We also need to be able to stop the
6521 * transaction and start a new one, which means we need to be able to
6522 * update the inode several times, and we have no idea of knowing how
6523 * many times that will be, so we can't just reserve 1 item for the
6524 * entirety of the opration, so that has to be done seperately as well.
6525 * Then there is the orphan item, which does indeed need to be held on
6526 * to for the whole operation, and we need nobody to touch this reserved
6527 * space except the orphan code.
6528 *
6529 * So that leaves us with
6530 *
6531 * 1) root->orphan_block_rsv - for the orphan deletion.
6532 * 2) rsv - for the truncate reservation, which we will steal from the
6533 * transaction reservation.
6534 * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
6535 * updating the inode.
6536 */
6537 rsv = btrfs_alloc_block_rsv(root);
6538 if (!rsv)
6539 return -ENOMEM;
6540 btrfs_add_durable_block_rsv(root->fs_info, rsv);
6541
6542 trans = btrfs_start_transaction(root, 4);
6543 if (IS_ERR(trans)) {
6544 err = PTR_ERR(trans);
6545 goto out;
6546 }
6519 6547
6520 btrfs_set_trans_block_group(trans, inode); 6548 /*
6549 * Reserve space for the truncate process. Truncate should be adding
6550 * space, but if there are snapshots it may end up using space.
6551 */
6552 ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
6553 BUG_ON(ret);
6521 6554
6522 ret = btrfs_orphan_add(trans, inode); 6555 ret = btrfs_orphan_add(trans, inode);
6523 if (ret) { 6556 if (ret) {
6524 btrfs_end_transaction(trans, root); 6557 btrfs_end_transaction(trans, root);
6525 return ret; 6558 goto out;
6526 } 6559 }
6527 6560
6528 nr = trans->blocks_used; 6561 nr = trans->blocks_used;
6529 btrfs_end_transaction(trans, root); 6562 btrfs_end_transaction(trans, root);
6530 btrfs_btree_balance_dirty(root, nr); 6563 btrfs_btree_balance_dirty(root, nr);
6531 6564
6532 /* Now start a transaction for the truncate */ 6565 /*
6533 trans = btrfs_start_transaction(root, 0); 6566 * Ok so we've already migrated our bytes over for the truncate, so here
6534 if (IS_ERR(trans)) 6567 * just reserve the one slot we need for updating the inode.
6535 return PTR_ERR(trans); 6568 */
6536 btrfs_set_trans_block_group(trans, inode); 6569 trans = btrfs_start_transaction(root, 1);
6537 trans->block_rsv = root->orphan_block_rsv; 6570 if (IS_ERR(trans)) {
6571 err = PTR_ERR(trans);
6572 goto out;
6573 }
6574 trans->block_rsv = rsv;
6538 6575
6539 /* 6576 /*
6540 * setattr is responsible for setting the ordered_data_close flag, 6577 * setattr is responsible for setting the ordered_data_close flag,
@@ -6558,24 +6595,17 @@ static int btrfs_truncate(struct inode *inode)
6558 6595
6559 while (1) { 6596 while (1) {
6560 if (!trans) { 6597 if (!trans) {
6561 trans = btrfs_start_transaction(root, 0); 6598 trans = btrfs_start_transaction(root, 3);
6562 if (IS_ERR(trans)) 6599 if (IS_ERR(trans)) {
6563 return PTR_ERR(trans); 6600 err = PTR_ERR(trans);
6564 btrfs_set_trans_block_group(trans, inode); 6601 goto out;
6565 trans->block_rsv = root->orphan_block_rsv; 6602 }
6566 }
6567 6603
6568 ret = btrfs_block_rsv_check(trans, root, 6604 ret = btrfs_truncate_reserve_metadata(trans, root,
6569 root->orphan_block_rsv, 0, 5); 6605 rsv);
6570 if (ret == -EAGAIN) { 6606 BUG_ON(ret);
6571 ret = btrfs_commit_transaction(trans, root); 6607
6572 if (ret) 6608 trans->block_rsv = rsv;
6573 return ret;
6574 trans = NULL;
6575 continue;
6576 } else if (ret) {
6577 err = ret;
6578 break;
6579 } 6609 }
6580 6610
6581 ret = btrfs_truncate_inode_items(trans, root, inode, 6611 ret = btrfs_truncate_inode_items(trans, root, inode,
@@ -6586,6 +6616,7 @@ static int btrfs_truncate(struct inode *inode)
6586 break; 6616 break;
6587 } 6617 }
6588 6618
6619 trans->block_rsv = &root->fs_info->trans_block_rsv;
6589 ret = btrfs_update_inode(trans, root, inode); 6620 ret = btrfs_update_inode(trans, root, inode);
6590 if (ret) { 6621 if (ret) {
6591 err = ret; 6622 err = ret;
@@ -6599,6 +6630,7 @@ static int btrfs_truncate(struct inode *inode)
6599 } 6630 }
6600 6631
6601 if (ret == 0 && inode->i_nlink > 0) { 6632 if (ret == 0 && inode->i_nlink > 0) {
6633 trans->block_rsv = root->orphan_block_rsv;
6602 ret = btrfs_orphan_del(trans, inode); 6634 ret = btrfs_orphan_del(trans, inode);
6603 if (ret) 6635 if (ret)
6604 err = ret; 6636 err = ret;
@@ -6610,15 +6642,20 @@ static int btrfs_truncate(struct inode *inode)
6610 ret = btrfs_orphan_del(NULL, inode); 6642 ret = btrfs_orphan_del(NULL, inode);
6611 } 6643 }
6612 6644
6645 trans->block_rsv = &root->fs_info->trans_block_rsv;
6613 ret = btrfs_update_inode(trans, root, inode); 6646 ret = btrfs_update_inode(trans, root, inode);
6614 if (ret && !err) 6647 if (ret && !err)
6615 err = ret; 6648 err = ret;
6616 6649
6617 nr = trans->blocks_used; 6650 nr = trans->blocks_used;
6618 ret = btrfs_end_transaction_throttle(trans, root); 6651 ret = btrfs_end_transaction_throttle(trans, root);
6652 btrfs_btree_balance_dirty(root, nr);
6653
6654out:
6655 btrfs_free_block_rsv(root, rsv);
6656
6619 if (ret && !err) 6657 if (ret && !err)
6620 err = ret; 6658 err = ret;
6621 btrfs_btree_balance_dirty(root, nr);
6622 6659
6623 return err; 6660 return err;
6624} 6661}
@@ -6627,15 +6664,14 @@ static int btrfs_truncate(struct inode *inode)
6627 * create a new subvolume directory/inode (helper for the ioctl). 6664 * create a new subvolume directory/inode (helper for the ioctl).
6628 */ 6665 */
6629int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 6666int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6630 struct btrfs_root *new_root, 6667 struct btrfs_root *new_root, u64 new_dirid)
6631 u64 new_dirid, u64 alloc_hint)
6632{ 6668{
6633 struct inode *inode; 6669 struct inode *inode;
6634 int err; 6670 int err;
6635 u64 index = 0; 6671 u64 index = 0;
6636 6672
6637 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, 6673 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
6638 new_dirid, alloc_hint, S_IFDIR | 0700, &index); 6674 new_dirid, S_IFDIR | 0700, &index);
6639 if (IS_ERR(inode)) 6675 if (IS_ERR(inode))
6640 return PTR_ERR(inode); 6676 return PTR_ERR(inode);
6641 inode->i_op = &btrfs_dir_inode_operations; 6677 inode->i_op = &btrfs_dir_inode_operations;
@@ -6748,21 +6784,6 @@ void btrfs_destroy_inode(struct inode *inode)
6748 spin_unlock(&root->fs_info->ordered_extent_lock); 6784 spin_unlock(&root->fs_info->ordered_extent_lock);
6749 } 6785 }
6750 6786
6751 if (root == root->fs_info->tree_root) {
6752 struct btrfs_block_group_cache *block_group;
6753
6754 block_group = btrfs_lookup_block_group(root->fs_info,
6755 BTRFS_I(inode)->block_group);
6756 if (block_group && block_group->inode == inode) {
6757 spin_lock(&block_group->lock);
6758 block_group->inode = NULL;
6759 spin_unlock(&block_group->lock);
6760 btrfs_put_block_group(block_group);
6761 } else if (block_group) {
6762 btrfs_put_block_group(block_group);
6763 }
6764 }
6765
6766 spin_lock(&root->orphan_lock); 6787 spin_lock(&root->orphan_lock);
6767 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6788 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6768 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", 6789 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
@@ -6948,8 +6969,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6948 goto out_notrans; 6969 goto out_notrans;
6949 } 6970 }
6950 6971
6951 btrfs_set_trans_block_group(trans, new_dir);
6952
6953 if (dest != root) 6972 if (dest != root)
6954 btrfs_record_root_in_trans(trans, dest); 6973 btrfs_record_root_in_trans(trans, dest);
6955 6974
@@ -7131,16 +7150,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7131 if (IS_ERR(trans)) 7150 if (IS_ERR(trans))
7132 return PTR_ERR(trans); 7151 return PTR_ERR(trans);
7133 7152
7134 btrfs_set_trans_block_group(trans, dir);
7135
7136 err = btrfs_find_free_ino(root, &objectid); 7153 err = btrfs_find_free_ino(root, &objectid);
7137 if (err) 7154 if (err)
7138 goto out_unlock; 7155 goto out_unlock;
7139 7156
7140 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 7157 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
7141 dentry->d_name.len, btrfs_ino(dir), objectid, 7158 dentry->d_name.len, btrfs_ino(dir), objectid,
7142 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 7159 S_IFLNK|S_IRWXUGO, &index);
7143 &index);
7144 if (IS_ERR(inode)) { 7160 if (IS_ERR(inode)) {
7145 err = PTR_ERR(inode); 7161 err = PTR_ERR(inode);
7146 goto out_unlock; 7162 goto out_unlock;
@@ -7152,7 +7168,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7152 goto out_unlock; 7168 goto out_unlock;
7153 } 7169 }
7154 7170
7155 btrfs_set_trans_block_group(trans, inode);
7156 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 7171 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
7157 if (err) 7172 if (err)
7158 drop_inode = 1; 7173 drop_inode = 1;
@@ -7163,8 +7178,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7163 inode->i_op = &btrfs_file_inode_operations; 7178 inode->i_op = &btrfs_file_inode_operations;
7164 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 7179 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
7165 } 7180 }
7166 btrfs_update_inode_block_group(trans, inode);
7167 btrfs_update_inode_block_group(trans, dir);
7168 if (drop_inode) 7181 if (drop_inode)
7169 goto out_unlock; 7182 goto out_unlock;
7170 7183
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 85e818ce00c5..b793d112d1f6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -243,7 +243,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
243 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 243 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
244 } 244 }
245 245
246 trans = btrfs_join_transaction(root, 1); 246 trans = btrfs_join_transaction(root);
247 BUG_ON(IS_ERR(trans)); 247 BUG_ON(IS_ERR(trans));
248 248
249 ret = btrfs_update_inode(trans, root, inode); 249 ret = btrfs_update_inode(trans, root, inode);
@@ -414,8 +414,7 @@ static noinline int create_subvol(struct btrfs_root *root,
414 414
415 btrfs_record_root_in_trans(trans, new_root); 415 btrfs_record_root_in_trans(trans, new_root);
416 416
417 ret = btrfs_create_subvol_root(trans, new_root, new_dirid, 417 ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
418 BTRFS_I(dir)->block_group);
419 /* 418 /*
420 * insert the directory item 419 * insert the directory item
421 */ 420 */
@@ -707,16 +706,17 @@ static int find_new_extents(struct btrfs_root *root,
707 struct btrfs_file_extent_item *extent; 706 struct btrfs_file_extent_item *extent;
708 int type; 707 int type;
709 int ret; 708 int ret;
709 u64 ino = btrfs_ino(inode);
710 710
711 path = btrfs_alloc_path(); 711 path = btrfs_alloc_path();
712 if (!path) 712 if (!path)
713 return -ENOMEM; 713 return -ENOMEM;
714 714
715 min_key.objectid = inode->i_ino; 715 min_key.objectid = ino;
716 min_key.type = BTRFS_EXTENT_DATA_KEY; 716 min_key.type = BTRFS_EXTENT_DATA_KEY;
717 min_key.offset = *off; 717 min_key.offset = *off;
718 718
719 max_key.objectid = inode->i_ino; 719 max_key.objectid = ino;
720 max_key.type = (u8)-1; 720 max_key.type = (u8)-1;
721 max_key.offset = (u64)-1; 721 max_key.offset = (u64)-1;
722 722
@@ -727,7 +727,7 @@ static int find_new_extents(struct btrfs_root *root,
727 path, 0, newer_than); 727 path, 0, newer_than);
728 if (ret != 0) 728 if (ret != 0)
729 goto none; 729 goto none;
730 if (min_key.objectid != inode->i_ino) 730 if (min_key.objectid != ino)
731 goto none; 731 goto none;
732 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 732 if (min_key.type != BTRFS_EXTENT_DATA_KEY)
733 goto none; 733 goto none;
@@ -2054,29 +2054,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
2054 2054
2055static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) 2055static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2056{ 2056{
2057 struct btrfs_ioctl_fs_info_args fi_args; 2057 struct btrfs_ioctl_fs_info_args *fi_args;
2058 struct btrfs_device *device; 2058 struct btrfs_device *device;
2059 struct btrfs_device *next; 2059 struct btrfs_device *next;
2060 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2060 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2061 int ret = 0;
2061 2062
2062 if (!capable(CAP_SYS_ADMIN)) 2063 if (!capable(CAP_SYS_ADMIN))
2063 return -EPERM; 2064 return -EPERM;
2064 2065
2065 fi_args.num_devices = fs_devices->num_devices; 2066 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
2066 fi_args.max_id = 0; 2067 if (!fi_args)
2067 memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid)); 2068 return -ENOMEM;
2069
2070 fi_args->num_devices = fs_devices->num_devices;
2071 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2068 2072
2069 mutex_lock(&fs_devices->device_list_mutex); 2073 mutex_lock(&fs_devices->device_list_mutex);
2070 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2074 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2071 if (device->devid > fi_args.max_id) 2075 if (device->devid > fi_args->max_id)
2072 fi_args.max_id = device->devid; 2076 fi_args->max_id = device->devid;
2073 } 2077 }
2074 mutex_unlock(&fs_devices->device_list_mutex); 2078 mutex_unlock(&fs_devices->device_list_mutex);
2075 2079
2076 if (copy_to_user(arg, &fi_args, sizeof(fi_args))) 2080 if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2077 return -EFAULT; 2081 ret = -EFAULT;
2078 2082
2079 return 0; 2083 kfree(fi_args);
2084 return ret;
2080} 2085}
2081 2086
2082static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) 2087static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
@@ -2489,12 +2494,10 @@ static long btrfs_ioctl_trans_start(struct file *file)
2489 if (ret) 2494 if (ret)
2490 goto out; 2495 goto out;
2491 2496
2492 mutex_lock(&root->fs_info->trans_mutex); 2497 atomic_inc(&root->fs_info->open_ioctl_trans);
2493 root->fs_info->open_ioctl_trans++;
2494 mutex_unlock(&root->fs_info->trans_mutex);
2495 2498
2496 ret = -ENOMEM; 2499 ret = -ENOMEM;
2497 trans = btrfs_start_ioctl_transaction(root, 0); 2500 trans = btrfs_start_ioctl_transaction(root);
2498 if (IS_ERR(trans)) 2501 if (IS_ERR(trans))
2499 goto out_drop; 2502 goto out_drop;
2500 2503
@@ -2502,9 +2505,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2502 return 0; 2505 return 0;
2503 2506
2504out_drop: 2507out_drop:
2505 mutex_lock(&root->fs_info->trans_mutex); 2508 atomic_dec(&root->fs_info->open_ioctl_trans);
2506 root->fs_info->open_ioctl_trans--;
2507 mutex_unlock(&root->fs_info->trans_mutex);
2508 mnt_drop_write(file->f_path.mnt); 2509 mnt_drop_write(file->f_path.mnt);
2509out: 2510out:
2510 return ret; 2511 return ret;
@@ -2738,9 +2739,7 @@ long btrfs_ioctl_trans_end(struct file *file)
2738 2739
2739 btrfs_end_transaction(trans, root); 2740 btrfs_end_transaction(trans, root);
2740 2741
2741 mutex_lock(&root->fs_info->trans_mutex); 2742 atomic_dec(&root->fs_info->open_ioctl_trans);
2742 root->fs_info->open_ioctl_trans--;
2743 mutex_unlock(&root->fs_info->trans_mutex);
2744 2743
2745 mnt_drop_write(file->f_path.mnt); 2744 mnt_drop_write(file->f_path.mnt);
2746 return 0; 2745 return 0;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ca38eca70af0..b1ef27cc673b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -677,6 +677,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
677 err = -ENOMEM; 677 err = -ENOMEM;
678 goto out; 678 goto out;
679 } 679 }
680 path1->reada = 1;
681 path2->reada = 2;
680 682
681 node = alloc_backref_node(cache); 683 node = alloc_backref_node(cache);
682 if (!node) { 684 if (!node) {
@@ -1999,6 +2001,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1999 path = btrfs_alloc_path(); 2001 path = btrfs_alloc_path();
2000 if (!path) 2002 if (!path)
2001 return -ENOMEM; 2003 return -ENOMEM;
2004 path->reada = 1;
2002 2005
2003 reloc_root = root->reloc_root; 2006 reloc_root = root->reloc_root;
2004 root_item = &reloc_root->root_item; 2007 root_item = &reloc_root->root_item;
@@ -2139,10 +2142,10 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2139 u64 num_bytes = 0; 2142 u64 num_bytes = 0;
2140 int ret; 2143 int ret;
2141 2144
2142 mutex_lock(&root->fs_info->trans_mutex); 2145 spin_lock(&root->fs_info->trans_lock);
2143 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2146 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2144 rc->merging_rsv_size += rc->nodes_relocated * 2; 2147 rc->merging_rsv_size += rc->nodes_relocated * 2;
2145 mutex_unlock(&root->fs_info->trans_mutex); 2148 spin_unlock(&root->fs_info->trans_lock);
2146again: 2149again:
2147 if (!err) { 2150 if (!err) {
2148 num_bytes = rc->merging_rsv_size; 2151 num_bytes = rc->merging_rsv_size;
@@ -2152,7 +2155,7 @@ again:
2152 err = ret; 2155 err = ret;
2153 } 2156 }
2154 2157
2155 trans = btrfs_join_transaction(rc->extent_root, 1); 2158 trans = btrfs_join_transaction(rc->extent_root);
2156 if (IS_ERR(trans)) { 2159 if (IS_ERR(trans)) {
2157 if (!err) 2160 if (!err)
2158 btrfs_block_rsv_release(rc->extent_root, 2161 btrfs_block_rsv_release(rc->extent_root,
@@ -2211,9 +2214,9 @@ int merge_reloc_roots(struct reloc_control *rc)
2211 int ret; 2214 int ret;
2212again: 2215again:
2213 root = rc->extent_root; 2216 root = rc->extent_root;
2214 mutex_lock(&root->fs_info->trans_mutex); 2217 spin_lock(&root->fs_info->trans_lock);
2215 list_splice_init(&rc->reloc_roots, &reloc_roots); 2218 list_splice_init(&rc->reloc_roots, &reloc_roots);
2216 mutex_unlock(&root->fs_info->trans_mutex); 2219 spin_unlock(&root->fs_info->trans_lock);
2217 2220
2218 while (!list_empty(&reloc_roots)) { 2221 while (!list_empty(&reloc_roots)) {
2219 found = 1; 2222 found = 1;
@@ -3236,7 +3239,7 @@ truncate:
3236 goto out; 3239 goto out;
3237 } 3240 }
3238 3241
3239 trans = btrfs_join_transaction(root, 0); 3242 trans = btrfs_join_transaction(root);
3240 if (IS_ERR(trans)) { 3243 if (IS_ERR(trans)) {
3241 btrfs_free_path(path); 3244 btrfs_free_path(path);
3242 ret = PTR_ERR(trans); 3245 ret = PTR_ERR(trans);
@@ -3300,6 +3303,7 @@ static int find_data_references(struct reloc_control *rc,
3300 path = btrfs_alloc_path(); 3303 path = btrfs_alloc_path();
3301 if (!path) 3304 if (!path)
3302 return -ENOMEM; 3305 return -ENOMEM;
3306 path->reada = 1;
3303 3307
3304 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3308 root = read_fs_root(rc->extent_root->fs_info, ref_root);
3305 if (IS_ERR(root)) { 3309 if (IS_ERR(root)) {
@@ -3586,17 +3590,17 @@ next:
3586static void set_reloc_control(struct reloc_control *rc) 3590static void set_reloc_control(struct reloc_control *rc)
3587{ 3591{
3588 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3592 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3589 mutex_lock(&fs_info->trans_mutex); 3593 spin_lock(&fs_info->trans_lock);
3590 fs_info->reloc_ctl = rc; 3594 fs_info->reloc_ctl = rc;
3591 mutex_unlock(&fs_info->trans_mutex); 3595 spin_unlock(&fs_info->trans_lock);
3592} 3596}
3593 3597
3594static void unset_reloc_control(struct reloc_control *rc) 3598static void unset_reloc_control(struct reloc_control *rc)
3595{ 3599{
3596 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3600 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3597 mutex_lock(&fs_info->trans_mutex); 3601 spin_lock(&fs_info->trans_lock);
3598 fs_info->reloc_ctl = NULL; 3602 fs_info->reloc_ctl = NULL;
3599 mutex_unlock(&fs_info->trans_mutex); 3603 spin_unlock(&fs_info->trans_lock);
3600} 3604}
3601 3605
3602static int check_extent_flags(u64 flags) 3606static int check_extent_flags(u64 flags)
@@ -3645,7 +3649,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3645 rc->create_reloc_tree = 1; 3649 rc->create_reloc_tree = 1;
3646 set_reloc_control(rc); 3650 set_reloc_control(rc);
3647 3651
3648 trans = btrfs_join_transaction(rc->extent_root, 1); 3652 trans = btrfs_join_transaction(rc->extent_root);
3649 BUG_ON(IS_ERR(trans)); 3653 BUG_ON(IS_ERR(trans));
3650 btrfs_commit_transaction(trans, rc->extent_root); 3654 btrfs_commit_transaction(trans, rc->extent_root);
3651 return 0; 3655 return 0;
@@ -3668,6 +3672,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3668 path = btrfs_alloc_path(); 3672 path = btrfs_alloc_path();
3669 if (!path) 3673 if (!path)
3670 return -ENOMEM; 3674 return -ENOMEM;
3675 path->reada = 1;
3671 3676
3672 ret = prepare_to_relocate(rc); 3677 ret = prepare_to_relocate(rc);
3673 if (ret) { 3678 if (ret) {
@@ -3834,7 +3839,7 @@ restart:
3834 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 3839 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
3835 3840
3836 /* get rid of pinned extents */ 3841 /* get rid of pinned extents */
3837 trans = btrfs_join_transaction(rc->extent_root, 1); 3842 trans = btrfs_join_transaction(rc->extent_root);
3838 if (IS_ERR(trans)) 3843 if (IS_ERR(trans))
3839 err = PTR_ERR(trans); 3844 err = PTR_ERR(trans);
3840 else 3845 else
@@ -4093,6 +4098,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4093 path = btrfs_alloc_path(); 4098 path = btrfs_alloc_path();
4094 if (!path) 4099 if (!path)
4095 return -ENOMEM; 4100 return -ENOMEM;
4101 path->reada = -1;
4096 4102
4097 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 4103 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4098 key.type = BTRFS_ROOT_ITEM_KEY; 4104 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -4159,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4159 4165
4160 set_reloc_control(rc); 4166 set_reloc_control(rc);
4161 4167
4162 trans = btrfs_join_transaction(rc->extent_root, 1); 4168 trans = btrfs_join_transaction(rc->extent_root);
4163 if (IS_ERR(trans)) { 4169 if (IS_ERR(trans)) {
4164 unset_reloc_control(rc); 4170 unset_reloc_control(rc);
4165 err = PTR_ERR(trans); 4171 err = PTR_ERR(trans);
@@ -4193,7 +4199,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4193 4199
4194 unset_reloc_control(rc); 4200 unset_reloc_control(rc);
4195 4201
4196 trans = btrfs_join_transaction(rc->extent_root, 1); 4202 trans = btrfs_join_transaction(rc->extent_root);
4197 if (IS_ERR(trans)) 4203 if (IS_ERR(trans))
4198 err = PTR_ERR(trans); 4204 err = PTR_ERR(trans);
4199 else 4205 else
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6dfed0c27ac3..a8d03d5efb5d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -16,13 +16,7 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h> 19#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include "ctree.h" 20#include "ctree.h"
27#include "volumes.h" 21#include "volumes.h"
28#include "disk-io.h" 22#include "disk-io.h"
@@ -117,33 +111,37 @@ static void scrub_free_csums(struct scrub_dev *sdev)
117 } 111 }
118} 112}
119 113
114static void scrub_free_bio(struct bio *bio)
115{
116 int i;
117 struct page *last_page = NULL;
118
119 if (!bio)
120 return;
121
122 for (i = 0; i < bio->bi_vcnt; ++i) {
123 if (bio->bi_io_vec[i].bv_page == last_page)
124 continue;
125 last_page = bio->bi_io_vec[i].bv_page;
126 __free_page(last_page);
127 }
128 bio_put(bio);
129}
130
120static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) 131static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
121{ 132{
122 int i; 133 int i;
123 int j;
124 struct page *last_page;
125 134
126 if (!sdev) 135 if (!sdev)
127 return; 136 return;
128 137
129 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 138 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
130 struct scrub_bio *sbio = sdev->bios[i]; 139 struct scrub_bio *sbio = sdev->bios[i];
131 struct bio *bio;
132 140
133 if (!sbio) 141 if (!sbio)
134 break; 142 break;
135 143
136 bio = sbio->bio; 144 scrub_free_bio(sbio->bio);
137 if (bio) {
138 last_page = NULL;
139 for (j = 0; j < bio->bi_vcnt; ++j) {
140 if (bio->bi_io_vec[j].bv_page == last_page)
141 continue;
142 last_page = bio->bi_io_vec[j].bv_page;
143 __free_page(last_page);
144 }
145 bio_put(bio);
146 }
147 kfree(sbio); 145 kfree(sbio);
148 } 146 }
149 147
@@ -156,8 +154,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
156{ 154{
157 struct scrub_dev *sdev; 155 struct scrub_dev *sdev;
158 int i; 156 int i;
159 int j;
160 int ret;
161 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 157 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
162 158
163 sdev = kzalloc(sizeof(*sdev), GFP_NOFS); 159 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
@@ -165,7 +161,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
165 goto nomem; 161 goto nomem;
166 sdev->dev = dev; 162 sdev->dev = dev;
167 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 163 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
168 struct bio *bio;
169 struct scrub_bio *sbio; 164 struct scrub_bio *sbio;
170 165
171 sbio = kzalloc(sizeof(*sbio), GFP_NOFS); 166 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
@@ -173,32 +168,10 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
173 goto nomem; 168 goto nomem;
174 sdev->bios[i] = sbio; 169 sdev->bios[i] = sbio;
175 170
176 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
177 if (!bio)
178 goto nomem;
179
180 sbio->index = i; 171 sbio->index = i;
181 sbio->sdev = sdev; 172 sbio->sdev = sdev;
182 sbio->bio = bio;
183 sbio->count = 0; 173 sbio->count = 0;
184 sbio->work.func = scrub_checksum; 174 sbio->work.func = scrub_checksum;
185 bio->bi_private = sdev->bios[i];
186 bio->bi_end_io = scrub_bio_end_io;
187 bio->bi_sector = 0;
188 bio->bi_bdev = dev->bdev;
189 bio->bi_size = 0;
190
191 for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
192 struct page *page;
193 page = alloc_page(GFP_NOFS);
194 if (!page)
195 goto nomem;
196
197 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
198 if (!ret)
199 goto nomem;
200 }
201 WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
202 175
203 if (i != SCRUB_BIOS_PER_DEV-1) 176 if (i != SCRUB_BIOS_PER_DEV-1)
204 sdev->bios[i]->next_free = i + 1; 177 sdev->bios[i]->next_free = i + 1;
@@ -369,9 +342,6 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
369 int ret; 342 int ret;
370 DECLARE_COMPLETION_ONSTACK(complete); 343 DECLARE_COMPLETION_ONSTACK(complete);
371 344
372 /* we are going to wait on this IO */
373 rw |= REQ_SYNC;
374
375 bio = bio_alloc(GFP_NOFS, 1); 345 bio = bio_alloc(GFP_NOFS, 1);
376 bio->bi_bdev = bdev; 346 bio->bi_bdev = bdev;
377 bio->bi_sector = sector; 347 bio->bi_sector = sector;
@@ -380,6 +350,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
380 bio->bi_private = &complete; 350 bio->bi_private = &complete;
381 submit_bio(rw, bio); 351 submit_bio(rw, bio);
382 352
353 /* this will also unplug the queue */
383 wait_for_completion(&complete); 354 wait_for_completion(&complete);
384 355
385 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); 356 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -394,6 +365,7 @@ static void scrub_bio_end_io(struct bio *bio, int err)
394 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 365 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
395 366
396 sbio->err = err; 367 sbio->err = err;
368 sbio->bio = bio;
397 369
398 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); 370 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
399} 371}
@@ -453,6 +425,8 @@ static void scrub_checksum(struct btrfs_work *work)
453 } 425 }
454 426
455out: 427out:
428 scrub_free_bio(sbio->bio);
429 sbio->bio = NULL;
456 spin_lock(&sdev->list_lock); 430 spin_lock(&sdev->list_lock);
457 sbio->next_free = sdev->first_free; 431 sbio->next_free = sdev->first_free;
458 sdev->first_free = sbio->index; 432 sdev->first_free = sbio->index;
@@ -583,25 +557,50 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
583static int scrub_submit(struct scrub_dev *sdev) 557static int scrub_submit(struct scrub_dev *sdev)
584{ 558{
585 struct scrub_bio *sbio; 559 struct scrub_bio *sbio;
560 struct bio *bio;
561 int i;
586 562
587 if (sdev->curr == -1) 563 if (sdev->curr == -1)
588 return 0; 564 return 0;
589 565
590 sbio = sdev->bios[sdev->curr]; 566 sbio = sdev->bios[sdev->curr];
591 567
592 sbio->bio->bi_sector = sbio->physical >> 9; 568 bio = bio_alloc(GFP_NOFS, sbio->count);
593 sbio->bio->bi_size = sbio->count * PAGE_SIZE; 569 if (!bio)
594 sbio->bio->bi_next = NULL; 570 goto nomem;
595 sbio->bio->bi_flags |= 1 << BIO_UPTODATE; 571
596 sbio->bio->bi_comp_cpu = -1; 572 bio->bi_private = sbio;
597 sbio->bio->bi_bdev = sdev->dev->bdev; 573 bio->bi_end_io = scrub_bio_end_io;
574 bio->bi_bdev = sdev->dev->bdev;
575 bio->bi_sector = sbio->physical >> 9;
576
577 for (i = 0; i < sbio->count; ++i) {
578 struct page *page;
579 int ret;
580
581 page = alloc_page(GFP_NOFS);
582 if (!page)
583 goto nomem;
584
585 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
586 if (!ret) {
587 __free_page(page);
588 goto nomem;
589 }
590 }
591
598 sbio->err = 0; 592 sbio->err = 0;
599 sdev->curr = -1; 593 sdev->curr = -1;
600 atomic_inc(&sdev->in_flight); 594 atomic_inc(&sdev->in_flight);
601 595
602 submit_bio(0, sbio->bio); 596 submit_bio(READ, bio);
603 597
604 return 0; 598 return 0;
599
600nomem:
601 scrub_free_bio(bio);
602
603 return -ENOMEM;
605} 604}
606 605
607static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, 606static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
@@ -633,7 +632,11 @@ again:
633 sbio->logical = logical; 632 sbio->logical = logical;
634 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || 633 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
635 sbio->logical + sbio->count * PAGE_SIZE != logical) { 634 sbio->logical + sbio->count * PAGE_SIZE != logical) {
636 scrub_submit(sdev); 635 int ret;
636
637 ret = scrub_submit(sdev);
638 if (ret)
639 return ret;
637 goto again; 640 goto again;
638 } 641 }
639 sbio->spag[sbio->count].flags = flags; 642 sbio->spag[sbio->count].flags = flags;
@@ -645,8 +648,13 @@ again:
645 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); 648 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
646 } 649 }
647 ++sbio->count; 650 ++sbio->count;
648 if (sbio->count == SCRUB_PAGES_PER_BIO || force) 651 if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
649 scrub_submit(sdev); 652 int ret;
653
654 ret = scrub_submit(sdev);
655 if (ret)
656 return ret;
657 }
650 658
651 return 0; 659 return 0;
652} 660}
@@ -727,6 +735,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
727 struct btrfs_root *root = fs_info->extent_root; 735 struct btrfs_root *root = fs_info->extent_root;
728 struct btrfs_root *csum_root = fs_info->csum_root; 736 struct btrfs_root *csum_root = fs_info->csum_root;
729 struct btrfs_extent_item *extent; 737 struct btrfs_extent_item *extent;
738 struct blk_plug plug;
730 u64 flags; 739 u64 flags;
731 int ret; 740 int ret;
732 int slot; 741 int slot;
@@ -789,18 +798,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
789 798
790 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 799 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
791 if (ret < 0) 800 if (ret < 0)
792 goto out; 801 goto out_noplug;
793
794 l = path->nodes[0];
795 slot = path->slots[0];
796 btrfs_item_key_to_cpu(l, &key, slot);
797 if (key.objectid != logical) {
798 ret = btrfs_previous_item(root, path, 0,
799 BTRFS_EXTENT_ITEM_KEY);
800 if (ret < 0)
801 goto out;
802 }
803 802
803 /*
804 * we might miss half an extent here, but that doesn't matter,
805 * as it's only the prefetch
806 */
804 while (1) { 807 while (1) {
805 l = path->nodes[0]; 808 l = path->nodes[0];
806 slot = path->slots[0]; 809 slot = path->slots[0];
@@ -809,7 +812,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
809 if (ret == 0) 812 if (ret == 0)
810 continue; 813 continue;
811 if (ret < 0) 814 if (ret < 0)
812 goto out; 815 goto out_noplug;
813 816
814 break; 817 break;
815 } 818 }
@@ -831,6 +834,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
831 * the scrub. This might currently (crc32) end up to be about 1MB 834 * the scrub. This might currently (crc32) end up to be about 1MB
832 */ 835 */
833 start_stripe = 0; 836 start_stripe = 0;
837 blk_start_plug(&plug);
834again: 838again:
835 logical = base + offset + start_stripe * increment; 839 logical = base + offset + start_stripe * increment;
836 for (i = start_stripe; i < nstripes; ++i) { 840 for (i = start_stripe; i < nstripes; ++i) {
@@ -890,15 +894,20 @@ again:
890 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 894 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
891 if (ret < 0) 895 if (ret < 0)
892 goto out; 896 goto out;
893 897 if (ret > 0) {
894 l = path->nodes[0];
895 slot = path->slots[0];
896 btrfs_item_key_to_cpu(l, &key, slot);
897 if (key.objectid != logical) {
898 ret = btrfs_previous_item(root, path, 0, 898 ret = btrfs_previous_item(root, path, 0,
899 BTRFS_EXTENT_ITEM_KEY); 899 BTRFS_EXTENT_ITEM_KEY);
900 if (ret < 0) 900 if (ret < 0)
901 goto out; 901 goto out;
902 if (ret > 0) {
903 /* there's no smaller item, so stick with the
904 * larger one */
905 btrfs_release_path(path);
906 ret = btrfs_search_slot(NULL, root, &key,
907 path, 0, 0);
908 if (ret < 0)
909 goto out;
910 }
902 } 911 }
903 912
904 while (1) { 913 while (1) {
@@ -972,6 +981,8 @@ next:
972 scrub_submit(sdev); 981 scrub_submit(sdev);
973 982
974out: 983out:
984 blk_finish_plug(&plug);
985out_noplug:
975 btrfs_free_path(path); 986 btrfs_free_path(path);
976 return ret < 0 ? ret : 0; 987 return ret < 0 ? ret : 0;
977} 988}
@@ -1047,8 +1058,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1047 while (1) { 1058 while (1) {
1048 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1059 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1049 if (ret < 0) 1060 if (ret < 0)
1050 goto out; 1061 break;
1051 ret = 0; 1062 if (ret > 0) {
1063 if (path->slots[0] >=
1064 btrfs_header_nritems(path->nodes[0])) {
1065 ret = btrfs_next_leaf(root, path);
1066 if (ret)
1067 break;
1068 }
1069 }
1052 1070
1053 l = path->nodes[0]; 1071 l = path->nodes[0];
1054 slot = path->slots[0]; 1072 slot = path->slots[0];
@@ -1058,7 +1076,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1058 if (found_key.objectid != sdev->dev->devid) 1076 if (found_key.objectid != sdev->dev->devid)
1059 break; 1077 break;
1060 1078
1061 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) 1079 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1062 break; 1080 break;
1063 1081
1064 if (found_key.offset >= end) 1082 if (found_key.offset >= end)
@@ -1087,7 +1105,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1087 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 1105 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1088 if (!cache) { 1106 if (!cache) {
1089 ret = -ENOENT; 1107 ret = -ENOENT;
1090 goto out; 1108 break;
1091 } 1109 }
1092 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, 1110 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1093 chunk_offset, length); 1111 chunk_offset, length);
@@ -1099,9 +1117,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1099 btrfs_release_path(path); 1117 btrfs_release_path(path);
1100 } 1118 }
1101 1119
1102out:
1103 btrfs_free_path(path); 1120 btrfs_free_path(path);
1104 return ret; 1121
1122 /*
1123 * ret can still be 1 from search_slot or next_leaf,
1124 * that's not an error
1125 */
1126 return ret < 0 ? ret : 0;
1105} 1127}
1106 1128
1107static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) 1129static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
@@ -1138,8 +1160,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1138 struct btrfs_fs_info *fs_info = root->fs_info; 1160 struct btrfs_fs_info *fs_info = root->fs_info;
1139 1161
1140 mutex_lock(&fs_info->scrub_lock); 1162 mutex_lock(&fs_info->scrub_lock);
1141 if (fs_info->scrub_workers_refcnt == 0) 1163 if (fs_info->scrub_workers_refcnt == 0) {
1164 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1165 fs_info->thread_pool_size, &fs_info->generic_worker);
1166 fs_info->scrub_workers.idle_thresh = 4;
1142 btrfs_start_workers(&fs_info->scrub_workers, 1); 1167 btrfs_start_workers(&fs_info->scrub_workers, 1);
1168 }
1143 ++fs_info->scrub_workers_refcnt; 1169 ++fs_info->scrub_workers_refcnt;
1144 mutex_unlock(&fs_info->scrub_lock); 1170 mutex_unlock(&fs_info->scrub_lock);
1145 1171
@@ -1166,7 +1192,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1166 int ret; 1192 int ret;
1167 struct btrfs_device *dev; 1193 struct btrfs_device *dev;
1168 1194
1169 if (root->fs_info->closing) 1195 if (btrfs_fs_closing(root->fs_info))
1170 return -EINVAL; 1196 return -EINVAL;
1171 1197
1172 /* 1198 /*
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9b2e7e5bc3ef..0bb4ebbb71b7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -161,7 +161,8 @@ enum {
161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err, 164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
165 Opt_inode_cache, Opt_err,
165}; 166};
166 167
167static match_table_t tokens = { 168static match_table_t tokens = {
@@ -193,6 +194,7 @@ static match_table_t tokens = {
193 {Opt_enospc_debug, "enospc_debug"}, 194 {Opt_enospc_debug, "enospc_debug"},
194 {Opt_subvolrootid, "subvolrootid=%d"}, 195 {Opt_subvolrootid, "subvolrootid=%d"},
195 {Opt_defrag, "autodefrag"}, 196 {Opt_defrag, "autodefrag"},
197 {Opt_inode_cache, "inode_cache"},
196 {Opt_err, NULL}, 198 {Opt_err, NULL},
197}; 199};
198 200
@@ -361,6 +363,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
361 printk(KERN_INFO "btrfs: enabling disk space caching\n"); 363 printk(KERN_INFO "btrfs: enabling disk space caching\n");
362 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 364 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
363 break; 365 break;
366 case Opt_inode_cache:
367 printk(KERN_INFO "btrfs: enabling inode map caching\n");
368 btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
369 break;
364 case Opt_clear_cache: 370 case Opt_clear_cache:
365 printk(KERN_INFO "btrfs: force clearing of disk cache\n"); 371 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
366 btrfs_set_opt(info->mount_opt, CLEAR_CACHE); 372 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
@@ -819,7 +825,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
819 } else { 825 } else {
820 char b[BDEVNAME_SIZE]; 826 char b[BDEVNAME_SIZE];
821 827
822 s->s_flags = flags; 828 s->s_flags = flags | MS_NOSEC;
823 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 829 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
824 error = btrfs_fill_super(s, fs_devices, data, 830 error = btrfs_fill_super(s, fs_devices, data,
825 flags & MS_SILENT ? 1 : 0); 831 flags & MS_SILENT ? 1 : 0);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dc80f7156923..2b3590b9fe98 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -35,6 +35,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
35{ 35{
36 WARN_ON(atomic_read(&transaction->use_count) == 0); 36 WARN_ON(atomic_read(&transaction->use_count) == 0);
37 if (atomic_dec_and_test(&transaction->use_count)) { 37 if (atomic_dec_and_test(&transaction->use_count)) {
38 BUG_ON(!list_empty(&transaction->list));
38 memset(transaction, 0, sizeof(*transaction)); 39 memset(transaction, 0, sizeof(*transaction));
39 kmem_cache_free(btrfs_transaction_cachep, transaction); 40 kmem_cache_free(btrfs_transaction_cachep, transaction);
40 } 41 }
@@ -49,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root)
49/* 50/*
50 * either allocate a new transaction or hop into the existing one 51 * either allocate a new transaction or hop into the existing one
51 */ 52 */
52static noinline int join_transaction(struct btrfs_root *root) 53static noinline int join_transaction(struct btrfs_root *root, int nofail)
53{ 54{
54 struct btrfs_transaction *cur_trans; 55 struct btrfs_transaction *cur_trans;
56
57 spin_lock(&root->fs_info->trans_lock);
58 if (root->fs_info->trans_no_join) {
59 if (!nofail) {
60 spin_unlock(&root->fs_info->trans_lock);
61 return -EBUSY;
62 }
63 }
64
55 cur_trans = root->fs_info->running_transaction; 65 cur_trans = root->fs_info->running_transaction;
56 if (!cur_trans) { 66 if (cur_trans) {
57 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 67 atomic_inc(&cur_trans->use_count);
58 GFP_NOFS);
59 if (!cur_trans)
60 return -ENOMEM;
61 root->fs_info->generation++;
62 atomic_set(&cur_trans->num_writers, 1);
63 cur_trans->num_joined = 0;
64 cur_trans->transid = root->fs_info->generation;
65 init_waitqueue_head(&cur_trans->writer_wait);
66 init_waitqueue_head(&cur_trans->commit_wait);
67 cur_trans->in_commit = 0;
68 cur_trans->blocked = 0;
69 atomic_set(&cur_trans->use_count, 1);
70 cur_trans->commit_done = 0;
71 cur_trans->start_time = get_seconds();
72
73 cur_trans->delayed_refs.root = RB_ROOT;
74 cur_trans->delayed_refs.num_entries = 0;
75 cur_trans->delayed_refs.num_heads_ready = 0;
76 cur_trans->delayed_refs.num_heads = 0;
77 cur_trans->delayed_refs.flushing = 0;
78 cur_trans->delayed_refs.run_delayed_start = 0;
79 spin_lock_init(&cur_trans->delayed_refs.lock);
80
81 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
82 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
83 extent_io_tree_init(&cur_trans->dirty_pages,
84 root->fs_info->btree_inode->i_mapping);
85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock);
88 } else {
89 atomic_inc(&cur_trans->num_writers); 68 atomic_inc(&cur_trans->num_writers);
90 cur_trans->num_joined++; 69 cur_trans->num_joined++;
70 spin_unlock(&root->fs_info->trans_lock);
71 return 0;
91 } 72 }
73 spin_unlock(&root->fs_info->trans_lock);
74
75 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
76 if (!cur_trans)
77 return -ENOMEM;
78 spin_lock(&root->fs_info->trans_lock);
79 if (root->fs_info->running_transaction) {
80 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
81 cur_trans = root->fs_info->running_transaction;
82 atomic_inc(&cur_trans->use_count);
83 atomic_inc(&cur_trans->num_writers);
84 cur_trans->num_joined++;
85 spin_unlock(&root->fs_info->trans_lock);
86 return 0;
87 }
88 atomic_set(&cur_trans->num_writers, 1);
89 cur_trans->num_joined = 0;
90 init_waitqueue_head(&cur_trans->writer_wait);
91 init_waitqueue_head(&cur_trans->commit_wait);
92 cur_trans->in_commit = 0;
93 cur_trans->blocked = 0;
94 /*
95 * One for this trans handle, one so it will live on until we
96 * commit the transaction.
97 */
98 atomic_set(&cur_trans->use_count, 2);
99 cur_trans->commit_done = 0;
100 cur_trans->start_time = get_seconds();
101
102 cur_trans->delayed_refs.root = RB_ROOT;
103 cur_trans->delayed_refs.num_entries = 0;
104 cur_trans->delayed_refs.num_heads_ready = 0;
105 cur_trans->delayed_refs.num_heads = 0;
106 cur_trans->delayed_refs.flushing = 0;
107 cur_trans->delayed_refs.run_delayed_start = 0;
108 spin_lock_init(&cur_trans->commit_lock);
109 spin_lock_init(&cur_trans->delayed_refs.lock);
110
111 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
112 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
113 extent_io_tree_init(&cur_trans->dirty_pages,
114 root->fs_info->btree_inode->i_mapping);
115 root->fs_info->generation++;
116 cur_trans->transid = root->fs_info->generation;
117 root->fs_info->running_transaction = cur_trans;
118 spin_unlock(&root->fs_info->trans_lock);
92 119
93 return 0; 120 return 0;
94} 121}
@@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root)
99 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
100 * when the transaction commits 127 * when the transaction commits
101 */ 128 */
102static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 129int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root) 130 struct btrfs_root *root)
104{ 131{
105 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
106 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
107 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
108 135
136 spin_lock(&root->fs_info->fs_roots_radix_lock);
137 if (root->last_trans == trans->transid) {
138 spin_unlock(&root->fs_info->fs_roots_radix_lock);
139 return 0;
140 }
141 root->last_trans = trans->transid;
109 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 142 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
110 (unsigned long)root->root_key.objectid, 143 (unsigned long)root->root_key.objectid,
111 BTRFS_ROOT_TRANS_TAG); 144 BTRFS_ROOT_TRANS_TAG);
112 root->last_trans = trans->transid; 145 spin_unlock(&root->fs_info->fs_roots_radix_lock);
113 btrfs_init_reloc_root(trans, root); 146 btrfs_init_reloc_root(trans, root);
114 } 147 }
115 return 0; 148 return 0;
116} 149}
117 150
118int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
119 struct btrfs_root *root)
120{
121 if (!root->ref_cows)
122 return 0;
123
124 mutex_lock(&root->fs_info->trans_mutex);
125 if (root->last_trans == trans->transid) {
126 mutex_unlock(&root->fs_info->trans_mutex);
127 return 0;
128 }
129
130 record_root_in_trans(trans, root);
131 mutex_unlock(&root->fs_info->trans_mutex);
132 return 0;
133}
134
135/* wait for commit against the current transaction to become unblocked 151/* wait for commit against the current transaction to become unblocked
136 * when this is done, it is safe to start a new transaction, but the current 152 * when this is done, it is safe to start a new transaction, but the current
137 * transaction might not be fully on disk. 153 * transaction might not be fully on disk.
@@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root)
140{ 156{
141 struct btrfs_transaction *cur_trans; 157 struct btrfs_transaction *cur_trans;
142 158
159 spin_lock(&root->fs_info->trans_lock);
143 cur_trans = root->fs_info->running_transaction; 160 cur_trans = root->fs_info->running_transaction;
144 if (cur_trans && cur_trans->blocked) { 161 if (cur_trans && cur_trans->blocked) {
145 DEFINE_WAIT(wait); 162 DEFINE_WAIT(wait);
146 atomic_inc(&cur_trans->use_count); 163 atomic_inc(&cur_trans->use_count);
164 spin_unlock(&root->fs_info->trans_lock);
147 while (1) { 165 while (1) {
148 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 166 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
149 TASK_UNINTERRUPTIBLE); 167 TASK_UNINTERRUPTIBLE);
150 if (!cur_trans->blocked) 168 if (!cur_trans->blocked)
151 break; 169 break;
152 mutex_unlock(&root->fs_info->trans_mutex);
153 schedule(); 170 schedule();
154 mutex_lock(&root->fs_info->trans_mutex);
155 } 171 }
156 finish_wait(&root->fs_info->transaction_wait, &wait); 172 finish_wait(&root->fs_info->transaction_wait, &wait);
157 put_transaction(cur_trans); 173 put_transaction(cur_trans);
174 } else {
175 spin_unlock(&root->fs_info->trans_lock);
158 } 176 }
159} 177}
160 178
@@ -167,10 +185,16 @@ enum btrfs_trans_type {
167 185
168static int may_wait_transaction(struct btrfs_root *root, int type) 186static int may_wait_transaction(struct btrfs_root *root, int type)
169{ 187{
170 if (!root->fs_info->log_root_recovering && 188 if (root->fs_info->log_root_recovering)
171 ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || 189 return 0;
172 type == TRANS_USERSPACE)) 190
191 if (type == TRANS_USERSPACE)
173 return 1; 192 return 1;
193
194 if (type == TRANS_START &&
195 !atomic_read(&root->fs_info->open_ioctl_trans))
196 return 1;
197
174 return 0; 198 return 0;
175} 199}
176 200
@@ -184,36 +208,44 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
184 208
185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 209 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
186 return ERR_PTR(-EROFS); 210 return ERR_PTR(-EROFS);
211
212 if (current->journal_info) {
213 WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
214 h = current->journal_info;
215 h->use_count++;
216 h->orig_rsv = h->block_rsv;
217 h->block_rsv = NULL;
218 goto got_it;
219 }
187again: 220again:
188 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 221 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
189 if (!h) 222 if (!h)
190 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
191 224
192 if (type != TRANS_JOIN_NOLOCK)
193 mutex_lock(&root->fs_info->trans_mutex);
194 if (may_wait_transaction(root, type)) 225 if (may_wait_transaction(root, type))
195 wait_current_trans(root); 226 wait_current_trans(root);
196 227
197 ret = join_transaction(root); 228 do {
229 ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
230 if (ret == -EBUSY)
231 wait_current_trans(root);
232 } while (ret == -EBUSY);
233
198 if (ret < 0) { 234 if (ret < 0) {
199 kmem_cache_free(btrfs_trans_handle_cachep, h); 235 kmem_cache_free(btrfs_trans_handle_cachep, h);
200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex);
202 return ERR_PTR(ret); 236 return ERR_PTR(ret);
203 } 237 }
204 238
205 cur_trans = root->fs_info->running_transaction; 239 cur_trans = root->fs_info->running_transaction;
206 atomic_inc(&cur_trans->use_count);
207 if (type != TRANS_JOIN_NOLOCK)
208 mutex_unlock(&root->fs_info->trans_mutex);
209 240
210 h->transid = cur_trans->transid; 241 h->transid = cur_trans->transid;
211 h->transaction = cur_trans; 242 h->transaction = cur_trans;
212 h->blocks_used = 0; 243 h->blocks_used = 0;
213 h->block_group = 0;
214 h->bytes_reserved = 0; 244 h->bytes_reserved = 0;
215 h->delayed_ref_updates = 0; 245 h->delayed_ref_updates = 0;
246 h->use_count = 1;
216 h->block_rsv = NULL; 247 h->block_rsv = NULL;
248 h->orig_rsv = NULL;
217 249
218 smp_mb(); 250 smp_mb();
219 if (cur_trans->blocked && may_wait_transaction(root, type)) { 251 if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -241,11 +273,8 @@ again:
241 } 273 }
242 } 274 }
243 275
244 if (type != TRANS_JOIN_NOLOCK) 276got_it:
245 mutex_lock(&root->fs_info->trans_mutex); 277 btrfs_record_root_in_trans(h, root);
246 record_root_in_trans(h, root);
247 if (type != TRANS_JOIN_NOLOCK)
248 mutex_unlock(&root->fs_info->trans_mutex);
249 278
250 if (!current->journal_info && type != TRANS_USERSPACE) 279 if (!current->journal_info && type != TRANS_USERSPACE)
251 current->journal_info = h; 280 current->journal_info = h;
@@ -257,22 +286,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
257{ 286{
258 return start_transaction(root, num_items, TRANS_START); 287 return start_transaction(root, num_items, TRANS_START);
259} 288}
260struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 289struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
261 int num_blocks)
262{ 290{
263 return start_transaction(root, 0, TRANS_JOIN); 291 return start_transaction(root, 0, TRANS_JOIN);
264} 292}
265 293
266struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 294struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
267 int num_blocks)
268{ 295{
269 return start_transaction(root, 0, TRANS_JOIN_NOLOCK); 296 return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
270} 297}
271 298
272struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 299struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
273 int num_blocks)
274{ 300{
275 return start_transaction(r, 0, TRANS_USERSPACE); 301 return start_transaction(root, 0, TRANS_USERSPACE);
276} 302}
277 303
278/* wait for a transaction commit to be fully complete */ 304/* wait for a transaction commit to be fully complete */
@@ -280,17 +306,13 @@ static noinline int wait_for_commit(struct btrfs_root *root,
280 struct btrfs_transaction *commit) 306 struct btrfs_transaction *commit)
281{ 307{
282 DEFINE_WAIT(wait); 308 DEFINE_WAIT(wait);
283 mutex_lock(&root->fs_info->trans_mutex);
284 while (!commit->commit_done) { 309 while (!commit->commit_done) {
285 prepare_to_wait(&commit->commit_wait, &wait, 310 prepare_to_wait(&commit->commit_wait, &wait,
286 TASK_UNINTERRUPTIBLE); 311 TASK_UNINTERRUPTIBLE);
287 if (commit->commit_done) 312 if (commit->commit_done)
288 break; 313 break;
289 mutex_unlock(&root->fs_info->trans_mutex);
290 schedule(); 314 schedule();
291 mutex_lock(&root->fs_info->trans_mutex);
292 } 315 }
293 mutex_unlock(&root->fs_info->trans_mutex);
294 finish_wait(&commit->commit_wait, &wait); 316 finish_wait(&commit->commit_wait, &wait);
295 return 0; 317 return 0;
296} 318}
@@ -300,59 +322,56 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
300 struct btrfs_transaction *cur_trans = NULL, *t; 322 struct btrfs_transaction *cur_trans = NULL, *t;
301 int ret; 323 int ret;
302 324
303 mutex_lock(&root->fs_info->trans_mutex);
304
305 ret = 0; 325 ret = 0;
306 if (transid) { 326 if (transid) {
307 if (transid <= root->fs_info->last_trans_committed) 327 if (transid <= root->fs_info->last_trans_committed)
308 goto out_unlock; 328 goto out;
309 329
310 /* find specified transaction */ 330 /* find specified transaction */
331 spin_lock(&root->fs_info->trans_lock);
311 list_for_each_entry(t, &root->fs_info->trans_list, list) { 332 list_for_each_entry(t, &root->fs_info->trans_list, list) {
312 if (t->transid == transid) { 333 if (t->transid == transid) {
313 cur_trans = t; 334 cur_trans = t;
335 atomic_inc(&cur_trans->use_count);
314 break; 336 break;
315 } 337 }
316 if (t->transid > transid) 338 if (t->transid > transid)
317 break; 339 break;
318 } 340 }
341 spin_unlock(&root->fs_info->trans_lock);
319 ret = -EINVAL; 342 ret = -EINVAL;
320 if (!cur_trans) 343 if (!cur_trans)
321 goto out_unlock; /* bad transid */ 344 goto out; /* bad transid */
322 } else { 345 } else {
323 /* find newest transaction that is committing | committed */ 346 /* find newest transaction that is committing | committed */
347 spin_lock(&root->fs_info->trans_lock);
324 list_for_each_entry_reverse(t, &root->fs_info->trans_list, 348 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
325 list) { 349 list) {
326 if (t->in_commit) { 350 if (t->in_commit) {
327 if (t->commit_done) 351 if (t->commit_done)
328 goto out_unlock; 352 break;
329 cur_trans = t; 353 cur_trans = t;
354 atomic_inc(&cur_trans->use_count);
330 break; 355 break;
331 } 356 }
332 } 357 }
358 spin_unlock(&root->fs_info->trans_lock);
333 if (!cur_trans) 359 if (!cur_trans)
334 goto out_unlock; /* nothing committing|committed */ 360 goto out; /* nothing committing|committed */
335 } 361 }
336 362
337 atomic_inc(&cur_trans->use_count);
338 mutex_unlock(&root->fs_info->trans_mutex);
339
340 wait_for_commit(root, cur_trans); 363 wait_for_commit(root, cur_trans);
341 364
342 mutex_lock(&root->fs_info->trans_mutex);
343 put_transaction(cur_trans); 365 put_transaction(cur_trans);
344 ret = 0; 366 ret = 0;
345out_unlock: 367out:
346 mutex_unlock(&root->fs_info->trans_mutex);
347 return ret; 368 return ret;
348} 369}
349 370
350void btrfs_throttle(struct btrfs_root *root) 371void btrfs_throttle(struct btrfs_root *root)
351{ 372{
352 mutex_lock(&root->fs_info->trans_mutex); 373 if (!atomic_read(&root->fs_info->open_ioctl_trans))
353 if (!root->fs_info->open_ioctl_trans)
354 wait_current_trans(root); 374 wait_current_trans(root);
355 mutex_unlock(&root->fs_info->trans_mutex);
356} 375}
357 376
358static int should_end_transaction(struct btrfs_trans_handle *trans, 377static int should_end_transaction(struct btrfs_trans_handle *trans,
@@ -370,6 +389,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
370 struct btrfs_transaction *cur_trans = trans->transaction; 389 struct btrfs_transaction *cur_trans = trans->transaction;
371 int updates; 390 int updates;
372 391
392 smp_mb();
373 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 393 if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
374 return 1; 394 return 1;
375 395
@@ -388,6 +408,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
388 struct btrfs_fs_info *info = root->fs_info; 408 struct btrfs_fs_info *info = root->fs_info;
389 int count = 0; 409 int count = 0;
390 410
411 if (--trans->use_count) {
412 trans->block_rsv = trans->orig_rsv;
413 return 0;
414 }
415
391 while (count < 4) { 416 while (count < 4) {
392 unsigned long cur = trans->delayed_ref_updates; 417 unsigned long cur = trans->delayed_ref_updates;
393 trans->delayed_ref_updates = 0; 418 trans->delayed_ref_updates = 0;
@@ -410,9 +435,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
410 435
411 btrfs_trans_release_metadata(trans, root); 436 btrfs_trans_release_metadata(trans, root);
412 437
413 if (lock && !root->fs_info->open_ioctl_trans && 438 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
414 should_end_transaction(trans, root)) 439 should_end_transaction(trans, root)) {
415 trans->transaction->blocked = 1; 440 trans->transaction->blocked = 1;
441 smp_wmb();
442 }
416 443
417 if (lock && cur_trans->blocked && !cur_trans->in_commit) { 444 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
418 if (throttle) 445 if (throttle)
@@ -703,9 +730,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
703 */ 730 */
704int btrfs_add_dead_root(struct btrfs_root *root) 731int btrfs_add_dead_root(struct btrfs_root *root)
705{ 732{
706 mutex_lock(&root->fs_info->trans_mutex); 733 spin_lock(&root->fs_info->trans_lock);
707 list_add(&root->root_list, &root->fs_info->dead_roots); 734 list_add(&root->root_list, &root->fs_info->dead_roots);
708 mutex_unlock(&root->fs_info->trans_mutex); 735 spin_unlock(&root->fs_info->trans_lock);
709 return 0; 736 return 0;
710} 737}
711 738
@@ -721,6 +748,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
721 int ret; 748 int ret;
722 int err = 0; 749 int err = 0;
723 750
751 spin_lock(&fs_info->fs_roots_radix_lock);
724 while (1) { 752 while (1) {
725 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 753 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
726 (void **)gang, 0, 754 (void **)gang, 0,
@@ -733,6 +761,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
733 radix_tree_tag_clear(&fs_info->fs_roots_radix, 761 radix_tree_tag_clear(&fs_info->fs_roots_radix,
734 (unsigned long)root->root_key.objectid, 762 (unsigned long)root->root_key.objectid,
735 BTRFS_ROOT_TRANS_TAG); 763 BTRFS_ROOT_TRANS_TAG);
764 spin_unlock(&fs_info->fs_roots_radix_lock);
736 765
737 btrfs_free_log(trans, root); 766 btrfs_free_log(trans, root);
738 btrfs_update_reloc_root(trans, root); 767 btrfs_update_reloc_root(trans, root);
@@ -753,10 +782,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
753 err = btrfs_update_root(trans, fs_info->tree_root, 782 err = btrfs_update_root(trans, fs_info->tree_root,
754 &root->root_key, 783 &root->root_key,
755 &root->root_item); 784 &root->root_item);
785 spin_lock(&fs_info->fs_roots_radix_lock);
756 if (err) 786 if (err)
757 break; 787 break;
758 } 788 }
759 } 789 }
790 spin_unlock(&fs_info->fs_roots_radix_lock);
760 return err; 791 return err;
761} 792}
762 793
@@ -786,7 +817,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
786 btrfs_btree_balance_dirty(info->tree_root, nr); 817 btrfs_btree_balance_dirty(info->tree_root, nr);
787 cond_resched(); 818 cond_resched();
788 819
789 if (root->fs_info->closing || ret != -EAGAIN) 820 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
790 break; 821 break;
791 } 822 }
792 root->defrag_running = 0; 823 root->defrag_running = 0;
@@ -851,7 +882,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
851 parent = dget_parent(dentry); 882 parent = dget_parent(dentry);
852 parent_inode = parent->d_inode; 883 parent_inode = parent->d_inode;
853 parent_root = BTRFS_I(parent_inode)->root; 884 parent_root = BTRFS_I(parent_inode)->root;
854 record_root_in_trans(trans, parent_root); 885 btrfs_record_root_in_trans(trans, parent_root);
855 886
856 /* 887 /*
857 * insert the directory item 888 * insert the directory item
@@ -869,7 +900,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
869 ret = btrfs_update_inode(trans, parent_root, parent_inode); 900 ret = btrfs_update_inode(trans, parent_root, parent_inode);
870 BUG_ON(ret); 901 BUG_ON(ret);
871 902
872 record_root_in_trans(trans, root); 903 btrfs_record_root_in_trans(trans, root);
873 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 904 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
874 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 905 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
875 btrfs_check_and_init_root_item(new_root_item); 906 btrfs_check_and_init_root_item(new_root_item);
@@ -967,20 +998,20 @@ static void update_super_roots(struct btrfs_root *root)
967int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 998int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
968{ 999{
969 int ret = 0; 1000 int ret = 0;
970 spin_lock(&info->new_trans_lock); 1001 spin_lock(&info->trans_lock);
971 if (info->running_transaction) 1002 if (info->running_transaction)
972 ret = info->running_transaction->in_commit; 1003 ret = info->running_transaction->in_commit;
973 spin_unlock(&info->new_trans_lock); 1004 spin_unlock(&info->trans_lock);
974 return ret; 1005 return ret;
975} 1006}
976 1007
977int btrfs_transaction_blocked(struct btrfs_fs_info *info) 1008int btrfs_transaction_blocked(struct btrfs_fs_info *info)
978{ 1009{
979 int ret = 0; 1010 int ret = 0;
980 spin_lock(&info->new_trans_lock); 1011 spin_lock(&info->trans_lock);
981 if (info->running_transaction) 1012 if (info->running_transaction)
982 ret = info->running_transaction->blocked; 1013 ret = info->running_transaction->blocked;
983 spin_unlock(&info->new_trans_lock); 1014 spin_unlock(&info->trans_lock);
984 return ret; 1015 return ret;
985} 1016}
986 1017
@@ -1004,9 +1035,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
1004 &wait); 1035 &wait);
1005 break; 1036 break;
1006 } 1037 }
1007 mutex_unlock(&root->fs_info->trans_mutex);
1008 schedule(); 1038 schedule();
1009 mutex_lock(&root->fs_info->trans_mutex);
1010 finish_wait(&root->fs_info->transaction_blocked_wait, &wait); 1039 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1011 } 1040 }
1012} 1041}
@@ -1032,9 +1061,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1032 &wait); 1061 &wait);
1033 break; 1062 break;
1034 } 1063 }
1035 mutex_unlock(&root->fs_info->trans_mutex);
1036 schedule(); 1064 schedule();
1037 mutex_lock(&root->fs_info->trans_mutex);
1038 finish_wait(&root->fs_info->transaction_wait, 1065 finish_wait(&root->fs_info->transaction_wait,
1039 &wait); 1066 &wait);
1040 } 1067 }
@@ -1072,7 +1099,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1072 1099
1073 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1100 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1074 ac->root = root; 1101 ac->root = root;
1075 ac->newtrans = btrfs_join_transaction(root, 0); 1102 ac->newtrans = btrfs_join_transaction(root);
1076 if (IS_ERR(ac->newtrans)) { 1103 if (IS_ERR(ac->newtrans)) {
1077 int err = PTR_ERR(ac->newtrans); 1104 int err = PTR_ERR(ac->newtrans);
1078 kfree(ac); 1105 kfree(ac);
@@ -1080,23 +1107,22 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1080 } 1107 }
1081 1108
1082 /* take transaction reference */ 1109 /* take transaction reference */
1083 mutex_lock(&root->fs_info->trans_mutex);
1084 cur_trans = trans->transaction; 1110 cur_trans = trans->transaction;
1085 atomic_inc(&cur_trans->use_count); 1111 atomic_inc(&cur_trans->use_count);
1086 mutex_unlock(&root->fs_info->trans_mutex);
1087 1112
1088 btrfs_end_transaction(trans, root); 1113 btrfs_end_transaction(trans, root);
1089 schedule_delayed_work(&ac->work, 0); 1114 schedule_delayed_work(&ac->work, 0);
1090 1115
1091 /* wait for transaction to start and unblock */ 1116 /* wait for transaction to start and unblock */
1092 mutex_lock(&root->fs_info->trans_mutex);
1093 if (wait_for_unblock) 1117 if (wait_for_unblock)
1094 wait_current_trans_commit_start_and_unblock(root, cur_trans); 1118 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1095 else 1119 else
1096 wait_current_trans_commit_start(root, cur_trans); 1120 wait_current_trans_commit_start(root, cur_trans);
1097 put_transaction(cur_trans);
1098 mutex_unlock(&root->fs_info->trans_mutex);
1099 1121
1122 if (current->journal_info == trans)
1123 current->journal_info = NULL;
1124
1125 put_transaction(cur_trans);
1100 return 0; 1126 return 0;
1101} 1127}
1102 1128
@@ -1139,38 +1165,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1139 ret = btrfs_run_delayed_refs(trans, root, 0); 1165 ret = btrfs_run_delayed_refs(trans, root, 0);
1140 BUG_ON(ret); 1166 BUG_ON(ret);
1141 1167
1142 mutex_lock(&root->fs_info->trans_mutex); 1168 spin_lock(&cur_trans->commit_lock);
1143 if (cur_trans->in_commit) { 1169 if (cur_trans->in_commit) {
1170 spin_unlock(&cur_trans->commit_lock);
1144 atomic_inc(&cur_trans->use_count); 1171 atomic_inc(&cur_trans->use_count);
1145 mutex_unlock(&root->fs_info->trans_mutex);
1146 btrfs_end_transaction(trans, root); 1172 btrfs_end_transaction(trans, root);
1147 1173
1148 ret = wait_for_commit(root, cur_trans); 1174 ret = wait_for_commit(root, cur_trans);
1149 BUG_ON(ret); 1175 BUG_ON(ret);
1150 1176
1151 mutex_lock(&root->fs_info->trans_mutex);
1152 put_transaction(cur_trans); 1177 put_transaction(cur_trans);
1153 mutex_unlock(&root->fs_info->trans_mutex);
1154 1178
1155 return 0; 1179 return 0;
1156 } 1180 }
1157 1181
1158 trans->transaction->in_commit = 1; 1182 trans->transaction->in_commit = 1;
1159 trans->transaction->blocked = 1; 1183 trans->transaction->blocked = 1;
1184 spin_unlock(&cur_trans->commit_lock);
1160 wake_up(&root->fs_info->transaction_blocked_wait); 1185 wake_up(&root->fs_info->transaction_blocked_wait);
1161 1186
1187 spin_lock(&root->fs_info->trans_lock);
1162 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1188 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1163 prev_trans = list_entry(cur_trans->list.prev, 1189 prev_trans = list_entry(cur_trans->list.prev,
1164 struct btrfs_transaction, list); 1190 struct btrfs_transaction, list);
1165 if (!prev_trans->commit_done) { 1191 if (!prev_trans->commit_done) {
1166 atomic_inc(&prev_trans->use_count); 1192 atomic_inc(&prev_trans->use_count);
1167 mutex_unlock(&root->fs_info->trans_mutex); 1193 spin_unlock(&root->fs_info->trans_lock);
1168 1194
1169 wait_for_commit(root, prev_trans); 1195 wait_for_commit(root, prev_trans);
1170 1196
1171 mutex_lock(&root->fs_info->trans_mutex);
1172 put_transaction(prev_trans); 1197 put_transaction(prev_trans);
1198 } else {
1199 spin_unlock(&root->fs_info->trans_lock);
1173 } 1200 }
1201 } else {
1202 spin_unlock(&root->fs_info->trans_lock);
1174 } 1203 }
1175 1204
1176 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 1205 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
@@ -1178,12 +1207,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1178 1207
1179 do { 1208 do {
1180 int snap_pending = 0; 1209 int snap_pending = 0;
1210
1181 joined = cur_trans->num_joined; 1211 joined = cur_trans->num_joined;
1182 if (!list_empty(&trans->transaction->pending_snapshots)) 1212 if (!list_empty(&trans->transaction->pending_snapshots))
1183 snap_pending = 1; 1213 snap_pending = 1;
1184 1214
1185 WARN_ON(cur_trans != trans->transaction); 1215 WARN_ON(cur_trans != trans->transaction);
1186 mutex_unlock(&root->fs_info->trans_mutex);
1187 1216
1188 if (flush_on_commit || snap_pending) { 1217 if (flush_on_commit || snap_pending) {
1189 btrfs_start_delalloc_inodes(root, 1); 1218 btrfs_start_delalloc_inodes(root, 1);
@@ -1206,14 +1235,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1206 prepare_to_wait(&cur_trans->writer_wait, &wait, 1235 prepare_to_wait(&cur_trans->writer_wait, &wait,
1207 TASK_UNINTERRUPTIBLE); 1236 TASK_UNINTERRUPTIBLE);
1208 1237
1209 smp_mb();
1210 if (atomic_read(&cur_trans->num_writers) > 1) 1238 if (atomic_read(&cur_trans->num_writers) > 1)
1211 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1239 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1212 else if (should_grow) 1240 else if (should_grow)
1213 schedule_timeout(1); 1241 schedule_timeout(1);
1214 1242
1215 mutex_lock(&root->fs_info->trans_mutex);
1216 finish_wait(&cur_trans->writer_wait, &wait); 1243 finish_wait(&cur_trans->writer_wait, &wait);
1244 spin_lock(&root->fs_info->trans_lock);
1245 root->fs_info->trans_no_join = 1;
1246 spin_unlock(&root->fs_info->trans_lock);
1217 } while (atomic_read(&cur_trans->num_writers) > 1 || 1247 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1218 (should_grow && cur_trans->num_joined != joined)); 1248 (should_grow && cur_trans->num_joined != joined));
1219 1249
@@ -1258,9 +1288,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1258 btrfs_prepare_extent_commit(trans, root); 1288 btrfs_prepare_extent_commit(trans, root);
1259 1289
1260 cur_trans = root->fs_info->running_transaction; 1290 cur_trans = root->fs_info->running_transaction;
1261 spin_lock(&root->fs_info->new_trans_lock);
1262 root->fs_info->running_transaction = NULL;
1263 spin_unlock(&root->fs_info->new_trans_lock);
1264 1291
1265 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1292 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1266 root->fs_info->tree_root->node); 1293 root->fs_info->tree_root->node);
@@ -1281,10 +1308,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1281 sizeof(root->fs_info->super_copy)); 1308 sizeof(root->fs_info->super_copy));
1282 1309
1283 trans->transaction->blocked = 0; 1310 trans->transaction->blocked = 0;
1311 spin_lock(&root->fs_info->trans_lock);
1312 root->fs_info->running_transaction = NULL;
1313 root->fs_info->trans_no_join = 0;
1314 spin_unlock(&root->fs_info->trans_lock);
1284 1315
1285 wake_up(&root->fs_info->transaction_wait); 1316 wake_up(&root->fs_info->transaction_wait);
1286 1317
1287 mutex_unlock(&root->fs_info->trans_mutex);
1288 ret = btrfs_write_and_wait_transaction(trans, root); 1318 ret = btrfs_write_and_wait_transaction(trans, root);
1289 BUG_ON(ret); 1319 BUG_ON(ret);
1290 write_ctree_super(trans, root, 0); 1320 write_ctree_super(trans, root, 0);
@@ -1297,22 +1327,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1297 1327
1298 btrfs_finish_extent_commit(trans, root); 1328 btrfs_finish_extent_commit(trans, root);
1299 1329
1300 mutex_lock(&root->fs_info->trans_mutex);
1301
1302 cur_trans->commit_done = 1; 1330 cur_trans->commit_done = 1;
1303 1331
1304 root->fs_info->last_trans_committed = cur_trans->transid; 1332 root->fs_info->last_trans_committed = cur_trans->transid;
1305 1333
1306 wake_up(&cur_trans->commit_wait); 1334 wake_up(&cur_trans->commit_wait);
1307 1335
1336 spin_lock(&root->fs_info->trans_lock);
1308 list_del_init(&cur_trans->list); 1337 list_del_init(&cur_trans->list);
1338 spin_unlock(&root->fs_info->trans_lock);
1339
1309 put_transaction(cur_trans); 1340 put_transaction(cur_trans);
1310 put_transaction(cur_trans); 1341 put_transaction(cur_trans);
1311 1342
1312 trace_btrfs_transaction_commit(root); 1343 trace_btrfs_transaction_commit(root);
1313 1344
1314 mutex_unlock(&root->fs_info->trans_mutex);
1315
1316 btrfs_scrub_continue(root); 1345 btrfs_scrub_continue(root);
1317 1346
1318 if (current->journal_info == trans) 1347 if (current->journal_info == trans)
@@ -1334,9 +1363,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1334 LIST_HEAD(list); 1363 LIST_HEAD(list);
1335 struct btrfs_fs_info *fs_info = root->fs_info; 1364 struct btrfs_fs_info *fs_info = root->fs_info;
1336 1365
1337 mutex_lock(&fs_info->trans_mutex); 1366 spin_lock(&fs_info->trans_lock);
1338 list_splice_init(&fs_info->dead_roots, &list); 1367 list_splice_init(&fs_info->dead_roots, &list);
1339 mutex_unlock(&fs_info->trans_mutex); 1368 spin_unlock(&fs_info->trans_lock);
1340 1369
1341 while (!list_empty(&list)) { 1370 while (!list_empty(&list)) {
1342 root = list_entry(list.next, struct btrfs_root, root_list); 1371 root = list_entry(list.next, struct btrfs_root, root_list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 804c88639e5d..02564e6230ac 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -28,10 +28,12 @@ struct btrfs_transaction {
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 atomic_t num_writers; 30 atomic_t num_writers;
31 atomic_t use_count;
31 32
32 unsigned long num_joined; 33 unsigned long num_joined;
34
35 spinlock_t commit_lock;
33 int in_commit; 36 int in_commit;
34 atomic_t use_count;
35 int commit_done; 37 int commit_done;
36 int blocked; 38 int blocked;
37 struct list_head list; 39 struct list_head list;
@@ -45,13 +47,14 @@ struct btrfs_transaction {
45 47
46struct btrfs_trans_handle { 48struct btrfs_trans_handle {
47 u64 transid; 49 u64 transid;
48 u64 block_group;
49 u64 bytes_reserved; 50 u64 bytes_reserved;
51 unsigned long use_count;
50 unsigned long blocks_reserved; 52 unsigned long blocks_reserved;
51 unsigned long blocks_used; 53 unsigned long blocks_used;
52 unsigned long delayed_ref_updates; 54 unsigned long delayed_ref_updates;
53 struct btrfs_transaction *transaction; 55 struct btrfs_transaction *transaction;
54 struct btrfs_block_rsv *block_rsv; 56 struct btrfs_block_rsv *block_rsv;
57 struct btrfs_block_rsv *orig_rsv;
55}; 58};
56 59
57struct btrfs_pending_snapshot { 60struct btrfs_pending_snapshot {
@@ -66,19 +69,6 @@ struct btrfs_pending_snapshot {
66 struct list_head list; 69 struct list_head list;
67}; 70};
68 71
69static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
70 struct inode *inode)
71{
72 trans->block_group = BTRFS_I(inode)->block_group;
73}
74
75static inline void btrfs_update_inode_block_group(
76 struct btrfs_trans_handle *trans,
77 struct inode *inode)
78{
79 BTRFS_I(inode)->block_group = trans->block_group;
80}
81
82static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, 72static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
83 struct inode *inode) 73 struct inode *inode)
84{ 74{
@@ -92,12 +82,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
92 struct btrfs_root *root); 82 struct btrfs_root *root);
93struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 83struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
94 int num_items); 84 int num_items);
95struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 85struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
96 int num_blocks); 86struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
97struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 87struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
98 int num_blocks);
99struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
100 int num_blocks);
101int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); 88int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
102int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 89int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root); 90 struct btrfs_root *root);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c48214ef5c09..1efa56e18f9b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -504,7 +504,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
504 BUG_ON(!new_device); 504 BUG_ON(!new_device);
505 memcpy(new_device, device, sizeof(*new_device)); 505 memcpy(new_device, device, sizeof(*new_device));
506 new_device->name = kstrdup(device->name, GFP_NOFS); 506 new_device->name = kstrdup(device->name, GFP_NOFS);
507 BUG_ON(!new_device->name); 507 BUG_ON(device->name && !new_device->name);
508 new_device->bdev = NULL; 508 new_device->bdev = NULL;
509 new_device->writeable = 0; 509 new_device->writeable = 0;
510 new_device->in_fs_metadata = 0; 510 new_device->in_fs_metadata = 0;
@@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
689 transid = btrfs_super_generation(disk_super); 689 transid = btrfs_super_generation(disk_super);
690 if (disk_super->label[0]) 690 if (disk_super->label[0])
691 printk(KERN_INFO "device label %s ", disk_super->label); 691 printk(KERN_INFO "device label %s ", disk_super->label);
692 else { 692 else
693 /* FIXME, make a readl uuid parser */ 693 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
694 printk(KERN_INFO "device fsid %llx-%llx ",
695 *(unsigned long long *)disk_super->fsid,
696 *(unsigned long long *)(disk_super->fsid + 8));
697 }
698 printk(KERN_CONT "devid %llu transid %llu %s\n", 694 printk(KERN_CONT "devid %llu transid %llu %s\n",
699 (unsigned long long)devid, (unsigned long long)transid, path); 695 (unsigned long long)devid, (unsigned long long)transid, path);
700 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 696 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index f3107e4b4d56..5366fe452ab0 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
158 if (IS_ERR(trans)) 158 if (IS_ERR(trans))
159 return PTR_ERR(trans); 159 return PTR_ERR(trans);
160 160
161 btrfs_set_trans_block_group(trans, inode);
162
163 ret = do_setxattr(trans, inode, name, value, size, flags); 161 ret = do_setxattr(trans, inode, name, value, size, flags);
164 if (ret) 162 if (ret)
165 goto out; 163 goto out;