aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/block_dev.c4
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.c28
-rw-r--r--fs/btrfs/ctree.h22
-rw-r--r--fs/btrfs/delayed-inode.c8
-rw-r--r--fs/btrfs/disk-io.c36
-rw-r--r--fs/btrfs/extent-tree.c103
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/file.c10
-rw-r--r--fs/btrfs/free-space-cache.c70
-rw-r--r--fs/btrfs/inode-map.c34
-rw-r--r--fs/btrfs/inode.c261
-rw-r--r--fs/btrfs/ioctl.c26
-rw-r--r--fs/btrfs/relocation.c34
-rw-r--r--fs/btrfs/scrub.c123
-rw-r--r--fs/btrfs/super.c8
-rw-r--r--fs/btrfs/transaction.c302
-rw-r--r--fs/btrfs/transaction.h29
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/namei.c3
-rw-r--r--fs/partitions/check.c10
-rw-r--r--fs/ubifs/io.c2
-rw-r--r--fs/ubifs/journal.c1
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/recovery.c164
-rw-r--r--fs/ubifs/replay.c3
-rw-r--r--fs/ubifs/shrinker.c6
-rw-r--r--fs/ubifs/super.c42
-rw-r--r--fs/ubifs/tnc.c9
-rw-r--r--fs/ubifs/ubifs.h4
32 files changed, 779 insertions, 576 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 87d95a8cddbc..f55ae23b137e 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -583,8 +583,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
583 if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) 583 if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
584 return -EACCES; 584 return -EACCES;
585 585
586 dentry_unhash(dentry);
587
588 if (atomic_dec_and_test(&ino->count)) { 586 if (atomic_dec_and_test(&ino->count)) {
589 p_ino = autofs4_dentry_ino(dentry->d_parent); 587 p_ino = autofs4_dentry_ino(dentry->d_parent);
590 if (p_ino && dentry->d_parent != dentry) 588 if (p_ino && dentry->d_parent != dentry)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1f2b19978333..1a2421f908f0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1272,8 +1272,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1272 * individual writeable reference is too fragile given the 1272 * individual writeable reference is too fragile given the
1273 * way @mode is used in blkdev_get/put(). 1273 * way @mode is used in blkdev_get/put().
1274 */ 1274 */
1275 if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && 1275 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1276 !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { 1276 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1277 bdev->bd_write_holder = true; 1277 bdev->bd_write_holder = true;
1278 disk_block_events(disk); 1278 disk_block_events(disk);
1279 } 1279 }
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 93b1aa932014..52d7eca8c7bf 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -121,9 +121,6 @@ struct btrfs_inode {
121 */ 121 */
122 u64 index_cnt; 122 u64 index_cnt;
123 123
124 /* the start of block group preferred for allocations. */
125 u64 block_group;
126
127 /* the fsync log has some corner cases that mean we have to check 124 /* the fsync log has some corner cases that mean we have to check
128 * directories to see if any unlinks have been done before 125 * directories to see if any unlinks have been done before
129 * the directory was logged. See tree-log.c for all the 126 * the directory was logged. See tree-log.c for all the
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b0e18d986e0a..d84089349c82 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -43,8 +43,6 @@ struct btrfs_path *btrfs_alloc_path(void)
43{ 43{
44 struct btrfs_path *path; 44 struct btrfs_path *path;
45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
46 if (path)
47 path->reada = 1;
48 return path; 46 return path;
49} 47}
50 48
@@ -1224,6 +1222,7 @@ static void reada_for_search(struct btrfs_root *root,
1224 u64 search; 1222 u64 search;
1225 u64 target; 1223 u64 target;
1226 u64 nread = 0; 1224 u64 nread = 0;
1225 u64 gen;
1227 int direction = path->reada; 1226 int direction = path->reada;
1228 struct extent_buffer *eb; 1227 struct extent_buffer *eb;
1229 u32 nr; 1228 u32 nr;
@@ -1251,6 +1250,15 @@ static void reada_for_search(struct btrfs_root *root,
1251 nritems = btrfs_header_nritems(node); 1250 nritems = btrfs_header_nritems(node);
1252 nr = slot; 1251 nr = slot;
1253 while (1) { 1252 while (1) {
1253 if (!node->map_token) {
1254 unsigned long offset = btrfs_node_key_ptr_offset(nr);
1255 map_private_extent_buffer(node, offset,
1256 sizeof(struct btrfs_key_ptr),
1257 &node->map_token,
1258 &node->kaddr,
1259 &node->map_start,
1260 &node->map_len, KM_USER1);
1261 }
1254 if (direction < 0) { 1262 if (direction < 0) {
1255 if (nr == 0) 1263 if (nr == 0)
1256 break; 1264 break;
@@ -1268,14 +1276,23 @@ static void reada_for_search(struct btrfs_root *root,
1268 search = btrfs_node_blockptr(node, nr); 1276 search = btrfs_node_blockptr(node, nr);
1269 if ((search <= target && target - search <= 65536) || 1277 if ((search <= target && target - search <= 65536) ||
1270 (search > target && search - target <= 65536)) { 1278 (search > target && search - target <= 65536)) {
1271 readahead_tree_block(root, search, blocksize, 1279 gen = btrfs_node_ptr_generation(node, nr);
1272 btrfs_node_ptr_generation(node, nr)); 1280 if (node->map_token) {
1281 unmap_extent_buffer(node, node->map_token,
1282 KM_USER1);
1283 node->map_token = NULL;
1284 }
1285 readahead_tree_block(root, search, blocksize, gen);
1273 nread += blocksize; 1286 nread += blocksize;
1274 } 1287 }
1275 nscan++; 1288 nscan++;
1276 if ((nread > 65536 || nscan > 32)) 1289 if ((nread > 65536 || nscan > 32))
1277 break; 1290 break;
1278 } 1291 }
1292 if (node->map_token) {
1293 unmap_extent_buffer(node, node->map_token, KM_USER1);
1294 node->map_token = NULL;
1295 }
1279} 1296}
1280 1297
1281/* 1298/*
@@ -1648,9 +1665,6 @@ again:
1648 } 1665 }
1649cow_done: 1666cow_done:
1650 BUG_ON(!cow && ins_len); 1667 BUG_ON(!cow && ins_len);
1651 if (level != btrfs_header_level(b))
1652 WARN_ON(1);
1653 level = btrfs_header_level(b);
1654 1668
1655 p->nodes[level] = b; 1669 p->nodes[level] = b;
1656 if (!p->skip_locking) 1670 if (!p->skip_locking)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6c093fa98f61..378b5b4443f3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -930,7 +930,6 @@ struct btrfs_fs_info {
930 * is required instead of the faster short fsync log commits 930 * is required instead of the faster short fsync log commits
931 */ 931 */
932 u64 last_trans_log_full_commit; 932 u64 last_trans_log_full_commit;
933 u64 open_ioctl_trans;
934 unsigned long mount_opt:20; 933 unsigned long mount_opt:20;
935 unsigned long compress_type:4; 934 unsigned long compress_type:4;
936 u64 max_inline; 935 u64 max_inline;
@@ -947,7 +946,6 @@ struct btrfs_fs_info {
947 struct super_block *sb; 946 struct super_block *sb;
948 struct inode *btree_inode; 947 struct inode *btree_inode;
949 struct backing_dev_info bdi; 948 struct backing_dev_info bdi;
950 struct mutex trans_mutex;
951 struct mutex tree_log_mutex; 949 struct mutex tree_log_mutex;
952 struct mutex transaction_kthread_mutex; 950 struct mutex transaction_kthread_mutex;
953 struct mutex cleaner_mutex; 951 struct mutex cleaner_mutex;
@@ -968,6 +966,7 @@ struct btrfs_fs_info {
968 struct rw_semaphore subvol_sem; 966 struct rw_semaphore subvol_sem;
969 struct srcu_struct subvol_srcu; 967 struct srcu_struct subvol_srcu;
970 968
969 spinlock_t trans_lock;
971 struct list_head trans_list; 970 struct list_head trans_list;
972 struct list_head hashers; 971 struct list_head hashers;
973 struct list_head dead_roots; 972 struct list_head dead_roots;
@@ -980,6 +979,7 @@ struct btrfs_fs_info {
980 atomic_t async_submit_draining; 979 atomic_t async_submit_draining;
981 atomic_t nr_async_bios; 980 atomic_t nr_async_bios;
982 atomic_t async_delalloc_pages; 981 atomic_t async_delalloc_pages;
982 atomic_t open_ioctl_trans;
983 983
984 /* 984 /*
985 * this is used by the balancing code to wait for all the pending 985 * this is used by the balancing code to wait for all the pending
@@ -1044,6 +1044,7 @@ struct btrfs_fs_info {
1044 int closing; 1044 int closing;
1045 int log_root_recovering; 1045 int log_root_recovering;
1046 int enospc_unlink; 1046 int enospc_unlink;
1047 int trans_no_join;
1047 1048
1048 u64 total_pinned; 1049 u64 total_pinned;
1049 1050
@@ -1065,7 +1066,6 @@ struct btrfs_fs_info {
1065 struct reloc_control *reloc_ctl; 1066 struct reloc_control *reloc_ctl;
1066 1067
1067 spinlock_t delalloc_lock; 1068 spinlock_t delalloc_lock;
1068 spinlock_t new_trans_lock;
1069 u64 delalloc_bytes; 1069 u64 delalloc_bytes;
1070 1070
1071 /* data_alloc_cluster is only used in ssd mode */ 1071 /* data_alloc_cluster is only used in ssd mode */
@@ -1340,6 +1340,7 @@ struct btrfs_ioctl_defrag_range_args {
1340#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1340#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1341#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) 1341#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1342#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) 1342#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
1343#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
1343 1344
1344#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1345#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1345#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1346#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2238,6 +2239,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
2238void btrfs_block_rsv_release(struct btrfs_root *root, 2239void btrfs_block_rsv_release(struct btrfs_root *root,
2239 struct btrfs_block_rsv *block_rsv, 2240 struct btrfs_block_rsv *block_rsv,
2240 u64 num_bytes); 2241 u64 num_bytes);
2242int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
2243 struct btrfs_root *root,
2244 struct btrfs_block_rsv *rsv);
2241int btrfs_set_block_group_ro(struct btrfs_root *root, 2245int btrfs_set_block_group_ro(struct btrfs_root *root,
2242 struct btrfs_block_group_cache *cache); 2246 struct btrfs_block_group_cache *cache);
2243int btrfs_set_block_group_rw(struct btrfs_root *root, 2247int btrfs_set_block_group_rw(struct btrfs_root *root,
@@ -2350,6 +2354,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2350 struct btrfs_root *root, 2354 struct btrfs_root *root,
2351 struct extent_buffer *node, 2355 struct extent_buffer *node,
2352 struct extent_buffer *parent); 2356 struct extent_buffer *parent);
2357static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
2358{
2359 /*
2360 * Get synced with close_ctree()
2361 */
2362 smp_mb();
2363 return fs_info->closing;
2364}
2365
2353/* root-item.c */ 2366/* root-item.c */
2354int btrfs_find_root_ref(struct btrfs_root *tree_root, 2367int btrfs_find_root_ref(struct btrfs_root *tree_root,
2355 struct btrfs_path *path, 2368 struct btrfs_path *path,
@@ -2512,8 +2525,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2512int btrfs_writepages(struct address_space *mapping, 2525int btrfs_writepages(struct address_space *mapping,
2513 struct writeback_control *wbc); 2526 struct writeback_control *wbc);
2514int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 2527int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
2515 struct btrfs_root *new_root, 2528 struct btrfs_root *new_root, u64 new_dirid);
2516 u64 new_dirid, u64 alloc_hint);
2517int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 2529int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2518 size_t size, struct bio *bio, unsigned long bio_flags); 2530 size_t size, struct bio *bio, unsigned long bio_flags);
2519 2531
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 01e29503a54b..6462c29d2d37 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -678,6 +678,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
678 INIT_LIST_HEAD(&head); 678 INIT_LIST_HEAD(&head);
679 679
680 next = item; 680 next = item;
681 nitems = 0;
681 682
682 /* 683 /*
683 * count the number of the continuous items that we can insert in batch 684 * count the number of the continuous items that we can insert in batch
@@ -1129,7 +1130,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1129 delayed_node = async_node->delayed_node; 1130 delayed_node = async_node->delayed_node;
1130 root = delayed_node->root; 1131 root = delayed_node->root;
1131 1132
1132 trans = btrfs_join_transaction(root, 0); 1133 trans = btrfs_join_transaction(root);
1133 if (IS_ERR(trans)) 1134 if (IS_ERR(trans))
1134 goto free_path; 1135 goto free_path;
1135 1136
@@ -1572,8 +1573,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1572 btrfs_set_stack_inode_transid(inode_item, trans->transid); 1573 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1573 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); 1574 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1574 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1575 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1575 btrfs_set_stack_inode_block_group(inode_item, 1576 btrfs_set_stack_inode_block_group(inode_item, 0);
1576 BTRFS_I(inode)->block_group);
1577 1577
1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), 1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1579 inode->i_atime.tv_sec); 1579 inode->i_atime.tv_sec);
@@ -1595,7 +1595,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1595 struct btrfs_root *root, struct inode *inode) 1595 struct btrfs_root *root, struct inode *inode)
1596{ 1596{
1597 struct btrfs_delayed_node *delayed_node; 1597 struct btrfs_delayed_node *delayed_node;
1598 int ret; 1598 int ret = 0;
1599 1599
1600 delayed_node = btrfs_get_or_create_delayed_node(inode); 1600 delayed_node = btrfs_get_or_create_delayed_node(inode);
1601 if (IS_ERR(delayed_node)) 1601 if (IS_ERR(delayed_node))
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 98b6a71decba..a203d363184d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1505,24 +1505,24 @@ static int transaction_kthread(void *arg)
1505 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1505 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1506 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1506 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1507 1507
1508 spin_lock(&root->fs_info->new_trans_lock); 1508 spin_lock(&root->fs_info->trans_lock);
1509 cur = root->fs_info->running_transaction; 1509 cur = root->fs_info->running_transaction;
1510 if (!cur) { 1510 if (!cur) {
1511 spin_unlock(&root->fs_info->new_trans_lock); 1511 spin_unlock(&root->fs_info->trans_lock);
1512 goto sleep; 1512 goto sleep;
1513 } 1513 }
1514 1514
1515 now = get_seconds(); 1515 now = get_seconds();
1516 if (!cur->blocked && 1516 if (!cur->blocked &&
1517 (now < cur->start_time || now - cur->start_time < 30)) { 1517 (now < cur->start_time || now - cur->start_time < 30)) {
1518 spin_unlock(&root->fs_info->new_trans_lock); 1518 spin_unlock(&root->fs_info->trans_lock);
1519 delay = HZ * 5; 1519 delay = HZ * 5;
1520 goto sleep; 1520 goto sleep;
1521 } 1521 }
1522 transid = cur->transid; 1522 transid = cur->transid;
1523 spin_unlock(&root->fs_info->new_trans_lock); 1523 spin_unlock(&root->fs_info->trans_lock);
1524 1524
1525 trans = btrfs_join_transaction(root, 1); 1525 trans = btrfs_join_transaction(root);
1526 BUG_ON(IS_ERR(trans)); 1526 BUG_ON(IS_ERR(trans));
1527 if (transid == trans->transid) { 1527 if (transid == trans->transid) {
1528 ret = btrfs_commit_transaction(trans, root); 1528 ret = btrfs_commit_transaction(trans, root);
@@ -1613,7 +1613,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1613 INIT_LIST_HEAD(&fs_info->ordered_operations); 1613 INIT_LIST_HEAD(&fs_info->ordered_operations);
1614 INIT_LIST_HEAD(&fs_info->caching_block_groups); 1614 INIT_LIST_HEAD(&fs_info->caching_block_groups);
1615 spin_lock_init(&fs_info->delalloc_lock); 1615 spin_lock_init(&fs_info->delalloc_lock);
1616 spin_lock_init(&fs_info->new_trans_lock); 1616 spin_lock_init(&fs_info->trans_lock);
1617 spin_lock_init(&fs_info->ref_cache_lock); 1617 spin_lock_init(&fs_info->ref_cache_lock);
1618 spin_lock_init(&fs_info->fs_roots_radix_lock); 1618 spin_lock_init(&fs_info->fs_roots_radix_lock);
1619 spin_lock_init(&fs_info->delayed_iput_lock); 1619 spin_lock_init(&fs_info->delayed_iput_lock);
@@ -1645,6 +1645,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1645 fs_info->max_inline = 8192 * 1024; 1645 fs_info->max_inline = 8192 * 1024;
1646 fs_info->metadata_ratio = 0; 1646 fs_info->metadata_ratio = 0;
1647 fs_info->defrag_inodes = RB_ROOT; 1647 fs_info->defrag_inodes = RB_ROOT;
1648 fs_info->trans_no_join = 0;
1648 1649
1649 fs_info->thread_pool_size = min_t(unsigned long, 1650 fs_info->thread_pool_size = min_t(unsigned long,
1650 num_online_cpus() + 2, 8); 1651 num_online_cpus() + 2, 8);
@@ -1709,7 +1710,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1709 fs_info->do_barriers = 1; 1710 fs_info->do_barriers = 1;
1710 1711
1711 1712
1712 mutex_init(&fs_info->trans_mutex);
1713 mutex_init(&fs_info->ordered_operations_mutex); 1713 mutex_init(&fs_info->ordered_operations_mutex);
1714 mutex_init(&fs_info->tree_log_mutex); 1714 mutex_init(&fs_info->tree_log_mutex);
1715 mutex_init(&fs_info->chunk_mutex); 1715 mutex_init(&fs_info->chunk_mutex);
@@ -2479,13 +2479,13 @@ int btrfs_commit_super(struct btrfs_root *root)
2479 down_write(&root->fs_info->cleanup_work_sem); 2479 down_write(&root->fs_info->cleanup_work_sem);
2480 up_write(&root->fs_info->cleanup_work_sem); 2480 up_write(&root->fs_info->cleanup_work_sem);
2481 2481
2482 trans = btrfs_join_transaction(root, 1); 2482 trans = btrfs_join_transaction(root);
2483 if (IS_ERR(trans)) 2483 if (IS_ERR(trans))
2484 return PTR_ERR(trans); 2484 return PTR_ERR(trans);
2485 ret = btrfs_commit_transaction(trans, root); 2485 ret = btrfs_commit_transaction(trans, root);
2486 BUG_ON(ret); 2486 BUG_ON(ret);
2487 /* run commit again to drop the original snapshot */ 2487 /* run commit again to drop the original snapshot */
2488 trans = btrfs_join_transaction(root, 1); 2488 trans = btrfs_join_transaction(root);
2489 if (IS_ERR(trans)) 2489 if (IS_ERR(trans))
2490 return PTR_ERR(trans); 2490 return PTR_ERR(trans);
2491 btrfs_commit_transaction(trans, root); 2491 btrfs_commit_transaction(trans, root);
@@ -3024,10 +3024,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3024 3024
3025 WARN_ON(1); 3025 WARN_ON(1);
3026 3026
3027 mutex_lock(&root->fs_info->trans_mutex);
3028 mutex_lock(&root->fs_info->transaction_kthread_mutex); 3027 mutex_lock(&root->fs_info->transaction_kthread_mutex);
3029 3028
3029 spin_lock(&root->fs_info->trans_lock);
3030 list_splice_init(&root->fs_info->trans_list, &list); 3030 list_splice_init(&root->fs_info->trans_list, &list);
3031 root->fs_info->trans_no_join = 1;
3032 spin_unlock(&root->fs_info->trans_lock);
3033
3031 while (!list_empty(&list)) { 3034 while (!list_empty(&list)) {
3032 t = list_entry(list.next, struct btrfs_transaction, list); 3035 t = list_entry(list.next, struct btrfs_transaction, list);
3033 if (!t) 3036 if (!t)
@@ -3052,23 +3055,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3052 t->blocked = 0; 3055 t->blocked = 0;
3053 if (waitqueue_active(&root->fs_info->transaction_wait)) 3056 if (waitqueue_active(&root->fs_info->transaction_wait))
3054 wake_up(&root->fs_info->transaction_wait); 3057 wake_up(&root->fs_info->transaction_wait);
3055 mutex_unlock(&root->fs_info->trans_mutex);
3056 3058
3057 mutex_lock(&root->fs_info->trans_mutex);
3058 t->commit_done = 1; 3059 t->commit_done = 1;
3059 if (waitqueue_active(&t->commit_wait)) 3060 if (waitqueue_active(&t->commit_wait))
3060 wake_up(&t->commit_wait); 3061 wake_up(&t->commit_wait);
3061 mutex_unlock(&root->fs_info->trans_mutex);
3062
3063 mutex_lock(&root->fs_info->trans_mutex);
3064 3062
3065 btrfs_destroy_pending_snapshots(t); 3063 btrfs_destroy_pending_snapshots(t);
3066 3064
3067 btrfs_destroy_delalloc_inodes(root); 3065 btrfs_destroy_delalloc_inodes(root);
3068 3066
3069 spin_lock(&root->fs_info->new_trans_lock); 3067 spin_lock(&root->fs_info->trans_lock);
3070 root->fs_info->running_transaction = NULL; 3068 root->fs_info->running_transaction = NULL;
3071 spin_unlock(&root->fs_info->new_trans_lock); 3069 spin_unlock(&root->fs_info->trans_lock);
3072 3070
3073 btrfs_destroy_marked_extents(root, &t->dirty_pages, 3071 btrfs_destroy_marked_extents(root, &t->dirty_pages,
3074 EXTENT_DIRTY); 3072 EXTENT_DIRTY);
@@ -3082,8 +3080,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3082 kmem_cache_free(btrfs_transaction_cachep, t); 3080 kmem_cache_free(btrfs_transaction_cachep, t);
3083 } 3081 }
3084 3082
3083 spin_lock(&root->fs_info->trans_lock);
3084 root->fs_info->trans_no_join = 0;
3085 spin_unlock(&root->fs_info->trans_lock);
3085 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 3086 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3086 mutex_unlock(&root->fs_info->trans_mutex);
3087 3087
3088 return 0; 3088 return 0;
3089} 3089}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 169bd62ce776..5b9b6b6df242 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -348,7 +348,7 @@ static int caching_kthread(void *data)
348 */ 348 */
349 path->skip_locking = 1; 349 path->skip_locking = 1;
350 path->search_commit_root = 1; 350 path->search_commit_root = 1;
351 path->reada = 2; 351 path->reada = 1;
352 352
353 key.objectid = last; 353 key.objectid = last;
354 key.offset = 0; 354 key.offset = 0;
@@ -366,8 +366,7 @@ again:
366 nritems = btrfs_header_nritems(leaf); 366 nritems = btrfs_header_nritems(leaf);
367 367
368 while (1) { 368 while (1) {
369 smp_mb(); 369 if (btrfs_fs_closing(fs_info) > 1) {
370 if (fs_info->closing > 1) {
371 last = (u64)-1; 370 last = (u64)-1;
372 break; 371 break;
373 } 372 }
@@ -379,15 +378,18 @@ again:
379 if (ret) 378 if (ret)
380 break; 379 break;
381 380
382 caching_ctl->progress = last; 381 if (need_resched() ||
383 btrfs_release_path(path); 382 btrfs_next_leaf(extent_root, path)) {
384 up_read(&fs_info->extent_commit_sem); 383 caching_ctl->progress = last;
385 mutex_unlock(&caching_ctl->mutex); 384 btrfs_release_path(path);
386 if (btrfs_transaction_in_commit(fs_info)) 385 up_read(&fs_info->extent_commit_sem);
387 schedule_timeout(1); 386 mutex_unlock(&caching_ctl->mutex);
388 else
389 cond_resched(); 387 cond_resched();
390 goto again; 388 goto again;
389 }
390 leaf = path->nodes[0];
391 nritems = btrfs_header_nritems(leaf);
392 continue;
391 } 393 }
392 394
393 if (key.objectid < block_group->key.objectid) { 395 if (key.objectid < block_group->key.objectid) {
@@ -3065,7 +3067,7 @@ again:
3065 spin_unlock(&data_sinfo->lock); 3067 spin_unlock(&data_sinfo->lock);
3066alloc: 3068alloc:
3067 alloc_target = btrfs_get_alloc_profile(root, 1); 3069 alloc_target = btrfs_get_alloc_profile(root, 1);
3068 trans = btrfs_join_transaction(root, 1); 3070 trans = btrfs_join_transaction(root);
3069 if (IS_ERR(trans)) 3071 if (IS_ERR(trans))
3070 return PTR_ERR(trans); 3072 return PTR_ERR(trans);
3071 3073
@@ -3091,9 +3093,10 @@ alloc:
3091 3093
3092 /* commit the current transaction and try again */ 3094 /* commit the current transaction and try again */
3093commit_trans: 3095commit_trans:
3094 if (!committed && !root->fs_info->open_ioctl_trans) { 3096 if (!committed &&
3097 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3095 committed = 1; 3098 committed = 1;
3096 trans = btrfs_join_transaction(root, 1); 3099 trans = btrfs_join_transaction(root);
3097 if (IS_ERR(trans)) 3100 if (IS_ERR(trans))
3098 return PTR_ERR(trans); 3101 return PTR_ERR(trans);
3099 ret = btrfs_commit_transaction(trans, root); 3102 ret = btrfs_commit_transaction(trans, root);
@@ -3472,7 +3475,7 @@ again:
3472 goto out; 3475 goto out;
3473 3476
3474 ret = -ENOSPC; 3477 ret = -ENOSPC;
3475 trans = btrfs_join_transaction(root, 1); 3478 trans = btrfs_join_transaction(root);
3476 if (IS_ERR(trans)) 3479 if (IS_ERR(trans))
3477 goto out; 3480 goto out;
3478 ret = btrfs_commit_transaction(trans, root); 3481 ret = btrfs_commit_transaction(trans, root);
@@ -3699,7 +3702,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3699 if (trans) 3702 if (trans)
3700 return -EAGAIN; 3703 return -EAGAIN;
3701 3704
3702 trans = btrfs_join_transaction(root, 1); 3705 trans = btrfs_join_transaction(root);
3703 BUG_ON(IS_ERR(trans)); 3706 BUG_ON(IS_ERR(trans));
3704 ret = btrfs_commit_transaction(trans, root); 3707 ret = btrfs_commit_transaction(trans, root);
3705 return 0; 3708 return 0;
@@ -3837,6 +3840,37 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3837 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3840 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3838} 3841}
3839 3842
3843int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3844 struct btrfs_root *root,
3845 struct btrfs_block_rsv *rsv)
3846{
3847 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
3848 u64 num_bytes;
3849 int ret;
3850
3851 /*
3852 * Truncate should be freeing data, but give us 2 items just in case it
3853 * needs to use some space. We may want to be smarter about this in the
3854 * future.
3855 */
3856 num_bytes = btrfs_calc_trans_metadata_size(root, 2);
3857
3858 /* We already have enough bytes, just return */
3859 if (rsv->reserved >= num_bytes)
3860 return 0;
3861
3862 num_bytes -= rsv->reserved;
3863
3864 /*
3865 * You should have reserved enough space before hand to do this, so this
3866 * should not fail.
3867 */
3868 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
3869 BUG_ON(ret);
3870
3871 return 0;
3872}
3873
3840int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3874int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3841 struct btrfs_root *root, 3875 struct btrfs_root *root,
3842 int num_items) 3876 int num_items)
@@ -3877,23 +3911,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
3877 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 3911 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
3878 3912
3879 /* 3913 /*
3880 * one for deleting orphan item, one for updating inode and 3914 * We need to hold space in order to delete our orphan item once we've
3881 * two for calling btrfs_truncate_inode_items. 3915 * added it, so this takes the reservation so we can release it later
3882 * 3916 * when we are truly done with the orphan item.
3883 * btrfs_truncate_inode_items is a delete operation, it frees
3884 * more space than it uses in most cases. So two units of
3885 * metadata space should be enough for calling it many times.
3886 * If all of the metadata space is used, we can commit
3887 * transaction and use space it freed.
3888 */ 3917 */
3889 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); 3918 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3890 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3919 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3891} 3920}
3892 3921
3893void btrfs_orphan_release_metadata(struct inode *inode) 3922void btrfs_orphan_release_metadata(struct inode *inode)
3894{ 3923{
3895 struct btrfs_root *root = BTRFS_I(inode)->root; 3924 struct btrfs_root *root = BTRFS_I(inode)->root;
3896 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); 3925 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3897 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3926 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
3898} 3927}
3899 3928
@@ -4987,6 +5016,15 @@ have_block_group:
4987 if (unlikely(block_group->ro)) 5016 if (unlikely(block_group->ro))
4988 goto loop; 5017 goto loop;
4989 5018
5019 spin_lock(&block_group->free_space_ctl->tree_lock);
5020 if (cached &&
5021 block_group->free_space_ctl->free_space <
5022 num_bytes + empty_size) {
5023 spin_unlock(&block_group->free_space_ctl->tree_lock);
5024 goto loop;
5025 }
5026 spin_unlock(&block_group->free_space_ctl->tree_lock);
5027
4990 /* 5028 /*
4991 * Ok we want to try and use the cluster allocator, so lets look 5029 * Ok we want to try and use the cluster allocator, so lets look
4992 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5030 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
@@ -5150,6 +5188,7 @@ checks:
5150 btrfs_add_free_space(block_group, offset, 5188 btrfs_add_free_space(block_group, offset,
5151 search_start - offset); 5189 search_start - offset);
5152 BUG_ON(offset > search_start); 5190 BUG_ON(offset > search_start);
5191 btrfs_put_block_group(block_group);
5153 break; 5192 break;
5154loop: 5193loop:
5155 failed_cluster_refill = false; 5194 failed_cluster_refill = false;
@@ -5242,14 +5281,7 @@ loop:
5242 ret = -ENOSPC; 5281 ret = -ENOSPC;
5243 } else if (!ins->objectid) { 5282 } else if (!ins->objectid) {
5244 ret = -ENOSPC; 5283 ret = -ENOSPC;
5245 } 5284 } else if (ins->objectid) {
5246
5247 /* we found what we needed */
5248 if (ins->objectid) {
5249 if (!(data & BTRFS_BLOCK_GROUP_DATA))
5250 trans->block_group = block_group->key.objectid;
5251
5252 btrfs_put_block_group(block_group);
5253 ret = 0; 5285 ret = 0;
5254 } 5286 }
5255 5287
@@ -6526,7 +6558,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
6526 6558
6527 BUG_ON(cache->ro); 6559 BUG_ON(cache->ro);
6528 6560
6529 trans = btrfs_join_transaction(root, 1); 6561 trans = btrfs_join_transaction(root);
6530 BUG_ON(IS_ERR(trans)); 6562 BUG_ON(IS_ERR(trans));
6531 6563
6532 alloc_flags = update_block_group_flags(root, cache->flags); 6564 alloc_flags = update_block_group_flags(root, cache->flags);
@@ -6882,6 +6914,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
6882 path = btrfs_alloc_path(); 6914 path = btrfs_alloc_path();
6883 if (!path) 6915 if (!path)
6884 return -ENOMEM; 6916 return -ENOMEM;
6917 path->reada = 1;
6885 6918
6886 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 6919 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
6887 if (cache_gen != 0 && 6920 if (cache_gen != 0 &&
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c5d9fbb92bc3..7055d11c1efd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1476,7 +1476,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
1476 if (total_bytes >= max_bytes) 1476 if (total_bytes >= max_bytes)
1477 break; 1477 break;
1478 if (!found) { 1478 if (!found) {
1479 *start = state->start; 1479 *start = max(cur_start, state->start);
1480 found = 1; 1480 found = 1;
1481 } 1481 }
1482 last = state->end; 1482 last = state->end;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c6a22d783c35..fa4ef18b66b1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
129 if (!btrfs_test_opt(root, AUTO_DEFRAG)) 129 if (!btrfs_test_opt(root, AUTO_DEFRAG))
130 return 0; 130 return 0;
131 131
132 if (root->fs_info->closing) 132 if (btrfs_fs_closing(root->fs_info))
133 return 0; 133 return 0;
134 134
135 if (BTRFS_I(inode)->in_defrag) 135 if (BTRFS_I(inode)->in_defrag)
@@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
144 if (!defrag) 144 if (!defrag)
145 return -ENOMEM; 145 return -ENOMEM;
146 146
147 defrag->ino = inode->i_ino; 147 defrag->ino = btrfs_ino(inode);
148 defrag->transid = transid; 148 defrag->transid = transid;
149 defrag->root = root->root_key.objectid; 149 defrag->root = root->root_key.objectid;
150 150
@@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
229 first_ino = defrag->ino + 1; 229 first_ino = defrag->ino + 1;
230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); 230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
231 231
232 if (fs_info->closing) 232 if (btrfs_fs_closing(fs_info))
233 goto next_free; 233 goto next_free;
234 234
235 spin_unlock(&fs_info->defrag_inodes_lock); 235 spin_unlock(&fs_info->defrag_inodes_lock);
@@ -1480,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync)
1480 * the current transaction, we can bail out now without any 1480 * the current transaction, we can bail out now without any
1481 * syncing 1481 * syncing
1482 */ 1482 */
1483 mutex_lock(&root->fs_info->trans_mutex); 1483 smp_mb();
1484 if (BTRFS_I(inode)->last_trans <= 1484 if (BTRFS_I(inode)->last_trans <=
1485 root->fs_info->last_trans_committed) { 1485 root->fs_info->last_trans_committed) {
1486 BTRFS_I(inode)->last_trans = 0; 1486 BTRFS_I(inode)->last_trans = 0;
1487 mutex_unlock(&root->fs_info->trans_mutex);
1488 goto out; 1487 goto out;
1489 } 1488 }
1490 mutex_unlock(&root->fs_info->trans_mutex);
1491 1489
1492 /* 1490 /*
1493 * ok we haven't committed the transaction yet, lets do a commit 1491 * ok we haven't committed the transaction yet, lets do a commit
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 70d45795d758..ad144736a5fd 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -98,7 +98,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
98 return inode; 98 return inode;
99 99
100 spin_lock(&block_group->lock); 100 spin_lock(&block_group->lock);
101 if (!root->fs_info->closing) { 101 if (!btrfs_fs_closing(root->fs_info)) {
102 block_group->inode = igrab(inode); 102 block_group->inode = igrab(inode);
103 block_group->iref = 1; 103 block_group->iref = 1;
104 } 104 }
@@ -402,7 +402,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
402 spin_lock(&ctl->tree_lock); 402 spin_lock(&ctl->tree_lock);
403 ret = link_free_space(ctl, e); 403 ret = link_free_space(ctl, e);
404 spin_unlock(&ctl->tree_lock); 404 spin_unlock(&ctl->tree_lock);
405 BUG_ON(ret); 405 if (ret) {
406 printk(KERN_ERR "Duplicate entries in "
407 "free space cache, dumping\n");
408 kunmap(page);
409 unlock_page(page);
410 page_cache_release(page);
411 goto free_cache;
412 }
406 } else { 413 } else {
407 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 414 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
408 if (!e->bitmap) { 415 if (!e->bitmap) {
@@ -419,6 +426,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
419 ctl->op->recalc_thresholds(ctl); 426 ctl->op->recalc_thresholds(ctl);
420 spin_unlock(&ctl->tree_lock); 427 spin_unlock(&ctl->tree_lock);
421 list_add_tail(&e->list, &bitmaps); 428 list_add_tail(&e->list, &bitmaps);
429 if (ret) {
430 printk(KERN_ERR "Duplicate entries in "
431 "free space cache, dumping\n");
432 kunmap(page);
433 unlock_page(page);
434 page_cache_release(page);
435 goto free_cache;
436 }
422 } 437 }
423 438
424 num_entries--; 439 num_entries--;
@@ -478,8 +493,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
478 * If we're unmounting then just return, since this does a search on the 493 * If we're unmounting then just return, since this does a search on the
479 * normal root and not the commit root and we could deadlock. 494 * normal root and not the commit root and we could deadlock.
480 */ 495 */
481 smp_mb(); 496 if (btrfs_fs_closing(fs_info))
482 if (fs_info->closing)
483 return 0; 497 return 0;
484 498
485 /* 499 /*
@@ -575,10 +589,25 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
575 589
576 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 590 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
577 PAGE_CACHE_SHIFT; 591 PAGE_CACHE_SHIFT;
592
593 /* Since the first page has all of our checksums and our generation we
594 * need to calculate the offset into the page that we can start writing
595 * our entries.
596 */
597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
598
578 filemap_write_and_wait(inode->i_mapping); 599 filemap_write_and_wait(inode->i_mapping);
579 btrfs_wait_ordered_range(inode, inode->i_size & 600 btrfs_wait_ordered_range(inode, inode->i_size &
580 ~(root->sectorsize - 1), (u64)-1); 601 ~(root->sectorsize - 1), (u64)-1);
581 602
603 /* make sure we don't overflow that first page */
604 if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
605 /* this is really the same as running out of space, where we also return 0 */
606 printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
607 ret = 0;
608 goto out_update;
609 }
610
582 /* We need a checksum per page. */ 611 /* We need a checksum per page. */
583 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); 612 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
584 if (!crc) 613 if (!crc)
@@ -590,12 +619,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
590 return -1; 619 return -1;
591 } 620 }
592 621
593 /* Since the first page has all of our checksums and our generation we
594 * need to calculate the offset into the page that we can start writing
595 * our entries.
596 */
597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
598
599 /* Get the cluster for this block_group if it exists */ 622 /* Get the cluster for this block_group if it exists */
600 if (block_group && !list_empty(&block_group->cluster_list)) 623 if (block_group && !list_empty(&block_group->cluster_list))
601 cluster = list_entry(block_group->cluster_list.next, 624 cluster = list_entry(block_group->cluster_list.next,
@@ -857,12 +880,14 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
857 ret = 1; 880 ret = 1;
858 881
859out_free: 882out_free:
883 kfree(checksums);
884 kfree(pages);
885
886out_update:
860 if (ret != 1) { 887 if (ret != 1) {
861 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 888 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
862 BTRFS_I(inode)->generation = 0; 889 BTRFS_I(inode)->generation = 0;
863 } 890 }
864 kfree(checksums);
865 kfree(pages);
866 btrfs_update_inode(trans, root, inode); 891 btrfs_update_inode(trans, root, inode);
867 return ret; 892 return ret;
868} 893}
@@ -963,10 +988,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
963 * logically. 988 * logically.
964 */ 989 */
965 if (bitmap) { 990 if (bitmap) {
966 WARN_ON(info->bitmap); 991 if (info->bitmap) {
992 WARN_ON_ONCE(1);
993 return -EEXIST;
994 }
967 p = &(*p)->rb_right; 995 p = &(*p)->rb_right;
968 } else { 996 } else {
969 WARN_ON(!info->bitmap); 997 if (!info->bitmap) {
998 WARN_ON_ONCE(1);
999 return -EEXIST;
1000 }
970 p = &(*p)->rb_left; 1001 p = &(*p)->rb_left;
971 } 1002 }
972 } 1003 }
@@ -2481,7 +2512,7 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2481 return inode; 2512 return inode;
2482 2513
2483 spin_lock(&root->cache_lock); 2514 spin_lock(&root->cache_lock);
2484 if (!root->fs_info->closing) 2515 if (!btrfs_fs_closing(root->fs_info))
2485 root->cache_inode = igrab(inode); 2516 root->cache_inode = igrab(inode);
2486 spin_unlock(&root->cache_lock); 2517 spin_unlock(&root->cache_lock);
2487 2518
@@ -2504,12 +2535,14 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2504 int ret = 0; 2535 int ret = 0;
2505 u64 root_gen = btrfs_root_generation(&root->root_item); 2536 u64 root_gen = btrfs_root_generation(&root->root_item);
2506 2537
2538 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2539 return 0;
2540
2507 /* 2541 /*
2508 * If we're unmounting then just return, since this does a search on the 2542 * If we're unmounting then just return, since this does a search on the
2509 * normal root and not the commit root and we could deadlock. 2543 * normal root and not the commit root and we could deadlock.
2510 */ 2544 */
2511 smp_mb(); 2545 if (btrfs_fs_closing(fs_info))
2512 if (fs_info->closing)
2513 return 0; 2546 return 0;
2514 2547
2515 path = btrfs_alloc_path(); 2548 path = btrfs_alloc_path();
@@ -2543,6 +2576,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
2543 struct inode *inode; 2576 struct inode *inode;
2544 int ret; 2577 int ret;
2545 2578
2579 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2580 return 0;
2581
2546 inode = lookup_free_ino_inode(root, path); 2582 inode = lookup_free_ino_inode(root, path);
2547 if (IS_ERR(inode)) 2583 if (IS_ERR(inode))
2548 return 0; 2584 return 0;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 3262cd17a12f..b4087e0fa871 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -38,6 +38,9 @@ static int caching_kthread(void *data)
38 int slot; 38 int slot;
39 int ret; 39 int ret;
40 40
41 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
42 return 0;
43
41 path = btrfs_alloc_path(); 44 path = btrfs_alloc_path();
42 if (!path) 45 if (!path)
43 return -ENOMEM; 46 return -ENOMEM;
@@ -59,8 +62,7 @@ again:
59 goto out; 62 goto out;
60 63
61 while (1) { 64 while (1) {
62 smp_mb(); 65 if (btrfs_fs_closing(fs_info))
63 if (fs_info->closing)
64 goto out; 66 goto out;
65 67
66 leaf = path->nodes[0]; 68 leaf = path->nodes[0];
@@ -141,6 +143,9 @@ static void start_caching(struct btrfs_root *root)
141 int ret; 143 int ret;
142 u64 objectid; 144 u64 objectid;
143 145
146 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
147 return;
148
144 spin_lock(&root->cache_lock); 149 spin_lock(&root->cache_lock);
145 if (root->cached != BTRFS_CACHE_NO) { 150 if (root->cached != BTRFS_CACHE_NO) {
146 spin_unlock(&root->cache_lock); 151 spin_unlock(&root->cache_lock);
@@ -178,6 +183,9 @@ static void start_caching(struct btrfs_root *root)
178 183
179int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) 184int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
180{ 185{
186 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
187 return btrfs_find_free_objectid(root, objectid);
188
181again: 189again:
182 *objectid = btrfs_find_ino_for_alloc(root); 190 *objectid = btrfs_find_ino_for_alloc(root);
183 191
@@ -201,6 +209,10 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
201{ 209{
202 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 210 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
203 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; 211 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
212
213 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
214 return;
215
204again: 216again:
205 if (root->cached == BTRFS_CACHE_FINISHED) { 217 if (root->cached == BTRFS_CACHE_FINISHED) {
206 __btrfs_add_free_space(ctl, objectid, 1); 218 __btrfs_add_free_space(ctl, objectid, 1);
@@ -250,6 +262,9 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
250 struct rb_node *n; 262 struct rb_node *n;
251 u64 count; 263 u64 count;
252 264
265 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
266 return;
267
253 while (1) { 268 while (1) {
254 n = rb_first(rbroot); 269 n = rb_first(rbroot);
255 if (!n) 270 if (!n)
@@ -388,9 +403,24 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
388 int prealloc; 403 int prealloc;
389 bool retry = false; 404 bool retry = false;
390 405
406 /* only fs tree and subvol/snap needs ino cache */
407 if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
408 (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
409 root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
410 return 0;
411
412 /* Don't save inode cache if we are deleting this root */
413 if (btrfs_root_refs(&root->root_item) == 0 &&
414 root != root->fs_info->tree_root)
415 return 0;
416
417 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
418 return 0;
419
391 path = btrfs_alloc_path(); 420 path = btrfs_alloc_path();
392 if (!path) 421 if (!path)
393 return -ENOMEM; 422 return -ENOMEM;
423
394again: 424again:
395 inode = lookup_free_ino_inode(root, path); 425 inode = lookup_free_ino_inode(root, path);
396 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 426 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 39a9d5750efd..ebf95f7a44d6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -138,7 +138,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
138 return -ENOMEM; 138 return -ENOMEM;
139 139
140 path->leave_spinning = 1; 140 path->leave_spinning = 1;
141 btrfs_set_trans_block_group(trans, inode);
142 141
143 key.objectid = btrfs_ino(inode); 142 key.objectid = btrfs_ino(inode);
144 key.offset = start; 143 key.offset = start;
@@ -426,9 +425,8 @@ again:
426 } 425 }
427 } 426 }
428 if (start == 0) { 427 if (start == 0) {
429 trans = btrfs_join_transaction(root, 1); 428 trans = btrfs_join_transaction(root);
430 BUG_ON(IS_ERR(trans)); 429 BUG_ON(IS_ERR(trans));
431 btrfs_set_trans_block_group(trans, inode);
432 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 430 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
433 431
434 /* lets try to make an inline extent */ 432 /* lets try to make an inline extent */
@@ -623,8 +621,9 @@ retry:
623 async_extent->start + async_extent->ram_size - 1, 621 async_extent->start + async_extent->ram_size - 1,
624 GFP_NOFS); 622 GFP_NOFS);
625 623
626 trans = btrfs_join_transaction(root, 1); 624 trans = btrfs_join_transaction(root);
627 BUG_ON(IS_ERR(trans)); 625 BUG_ON(IS_ERR(trans));
626 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
628 ret = btrfs_reserve_extent(trans, root, 627 ret = btrfs_reserve_extent(trans, root,
629 async_extent->compressed_size, 628 async_extent->compressed_size,
630 async_extent->compressed_size, 629 async_extent->compressed_size,
@@ -793,9 +792,8 @@ static noinline int cow_file_range(struct inode *inode,
793 int ret = 0; 792 int ret = 0;
794 793
795 BUG_ON(is_free_space_inode(root, inode)); 794 BUG_ON(is_free_space_inode(root, inode));
796 trans = btrfs_join_transaction(root, 1); 795 trans = btrfs_join_transaction(root);
797 BUG_ON(IS_ERR(trans)); 796 BUG_ON(IS_ERR(trans));
798 btrfs_set_trans_block_group(trans, inode);
799 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 797 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
800 798
801 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 799 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -1077,10 +1075,12 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1077 nolock = is_free_space_inode(root, inode); 1075 nolock = is_free_space_inode(root, inode);
1078 1076
1079 if (nolock) 1077 if (nolock)
1080 trans = btrfs_join_transaction_nolock(root, 1); 1078 trans = btrfs_join_transaction_nolock(root);
1081 else 1079 else
1082 trans = btrfs_join_transaction(root, 1); 1080 trans = btrfs_join_transaction(root);
1081
1083 BUG_ON(IS_ERR(trans)); 1082 BUG_ON(IS_ERR(trans));
1083 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1084 1084
1085 cow_start = (u64)-1; 1085 cow_start = (u64)-1;
1086 cur_offset = start; 1086 cur_offset = start;
@@ -1519,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1519{ 1519{
1520 struct btrfs_ordered_sum *sum; 1520 struct btrfs_ordered_sum *sum;
1521 1521
1522 btrfs_set_trans_block_group(trans, inode);
1523
1524 list_for_each_entry(sum, list, list) { 1522 list_for_each_entry(sum, list, list) {
1525 btrfs_csum_file_blocks(trans, 1523 btrfs_csum_file_blocks(trans,
1526 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1524 BTRFS_I(inode)->root->fs_info->csum_root, sum);
@@ -1735,11 +1733,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1735 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1733 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1736 if (!ret) { 1734 if (!ret) {
1737 if (nolock) 1735 if (nolock)
1738 trans = btrfs_join_transaction_nolock(root, 1); 1736 trans = btrfs_join_transaction_nolock(root);
1739 else 1737 else
1740 trans = btrfs_join_transaction(root, 1); 1738 trans = btrfs_join_transaction(root);
1741 BUG_ON(IS_ERR(trans)); 1739 BUG_ON(IS_ERR(trans));
1742 btrfs_set_trans_block_group(trans, inode);
1743 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1740 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1744 ret = btrfs_update_inode(trans, root, inode); 1741 ret = btrfs_update_inode(trans, root, inode);
1745 BUG_ON(ret); 1742 BUG_ON(ret);
@@ -1752,11 +1749,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1752 0, &cached_state, GFP_NOFS); 1749 0, &cached_state, GFP_NOFS);
1753 1750
1754 if (nolock) 1751 if (nolock)
1755 trans = btrfs_join_transaction_nolock(root, 1); 1752 trans = btrfs_join_transaction_nolock(root);
1756 else 1753 else
1757 trans = btrfs_join_transaction(root, 1); 1754 trans = btrfs_join_transaction(root);
1758 BUG_ON(IS_ERR(trans)); 1755 BUG_ON(IS_ERR(trans));
1759 btrfs_set_trans_block_group(trans, inode);
1760 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1756 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1761 1757
1762 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1758 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -2431,7 +2427,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2431 (u64)-1); 2427 (u64)-1);
2432 2428
2433 if (root->orphan_block_rsv || root->orphan_item_inserted) { 2429 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2434 trans = btrfs_join_transaction(root, 1); 2430 trans = btrfs_join_transaction(root);
2435 if (!IS_ERR(trans)) 2431 if (!IS_ERR(trans))
2436 btrfs_end_transaction(trans, root); 2432 btrfs_end_transaction(trans, root);
2437 } 2433 }
@@ -2511,12 +2507,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
2511 struct btrfs_root *root = BTRFS_I(inode)->root; 2507 struct btrfs_root *root = BTRFS_I(inode)->root;
2512 struct btrfs_key location; 2508 struct btrfs_key location;
2513 int maybe_acls; 2509 int maybe_acls;
2514 u64 alloc_group_block;
2515 u32 rdev; 2510 u32 rdev;
2516 int ret; 2511 int ret;
2517 2512
2518 path = btrfs_alloc_path(); 2513 path = btrfs_alloc_path();
2519 BUG_ON(!path); 2514 BUG_ON(!path);
2515 path->leave_spinning = 1;
2520 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 2516 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
2521 2517
2522 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 2518 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -2526,6 +2522,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
2526 leaf = path->nodes[0]; 2522 leaf = path->nodes[0];
2527 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2523 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2528 struct btrfs_inode_item); 2524 struct btrfs_inode_item);
2525 if (!leaf->map_token)
2526 map_private_extent_buffer(leaf, (unsigned long)inode_item,
2527 sizeof(struct btrfs_inode_item),
2528 &leaf->map_token, &leaf->kaddr,
2529 &leaf->map_start, &leaf->map_len,
2530 KM_USER1);
2529 2531
2530 inode->i_mode = btrfs_inode_mode(leaf, inode_item); 2532 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2531 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); 2533 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
@@ -2555,8 +2557,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
2555 BTRFS_I(inode)->index_cnt = (u64)-1; 2557 BTRFS_I(inode)->index_cnt = (u64)-1;
2556 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2558 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2557 2559
2558 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2559
2560 /* 2560 /*
2561 * try to precache a NULL acl entry for files that don't have 2561 * try to precache a NULL acl entry for files that don't have
2562 * any xattrs or acls 2562 * any xattrs or acls
@@ -2566,8 +2566,11 @@ static void btrfs_read_locked_inode(struct inode *inode)
2566 if (!maybe_acls) 2566 if (!maybe_acls)
2567 cache_no_acl(inode); 2567 cache_no_acl(inode);
2568 2568
2569 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2569 if (leaf->map_token) {
2570 alloc_group_block, 0); 2570 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2571 leaf->map_token = NULL;
2572 }
2573
2571 btrfs_free_path(path); 2574 btrfs_free_path(path);
2572 inode_item = NULL; 2575 inode_item = NULL;
2573 2576
@@ -2647,7 +2650,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2647 btrfs_set_inode_transid(leaf, item, trans->transid); 2650 btrfs_set_inode_transid(leaf, item, trans->transid);
2648 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2651 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2649 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2652 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2650 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2653 btrfs_set_inode_block_group(leaf, item, 0);
2651 2654
2652 if (leaf->map_token) { 2655 if (leaf->map_token) {
2653 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 2656 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
@@ -3004,8 +3007,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3004 if (IS_ERR(trans)) 3007 if (IS_ERR(trans))
3005 return PTR_ERR(trans); 3008 return PTR_ERR(trans);
3006 3009
3007 btrfs_set_trans_block_group(trans, dir);
3008
3009 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); 3010 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
3010 3011
3011 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 3012 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
@@ -3094,8 +3095,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3094 if (IS_ERR(trans)) 3095 if (IS_ERR(trans))
3095 return PTR_ERR(trans); 3096 return PTR_ERR(trans);
3096 3097
3097 btrfs_set_trans_block_group(trans, dir);
3098
3099 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 3098 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
3100 err = btrfs_unlink_subvol(trans, root, dir, 3099 err = btrfs_unlink_subvol(trans, root, dir,
3101 BTRFS_I(inode)->location.objectid, 3100 BTRFS_I(inode)->location.objectid,
@@ -3514,7 +3513,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3514 err = PTR_ERR(trans); 3513 err = PTR_ERR(trans);
3515 break; 3514 break;
3516 } 3515 }
3517 btrfs_set_trans_block_group(trans, inode);
3518 3516
3519 err = btrfs_drop_extents(trans, inode, cur_offset, 3517 err = btrfs_drop_extents(trans, inode, cur_offset,
3520 cur_offset + hole_size, 3518 cur_offset + hole_size,
@@ -3650,7 +3648,6 @@ void btrfs_evict_inode(struct inode *inode)
3650 while (1) { 3648 while (1) {
3651 trans = btrfs_start_transaction(root, 0); 3649 trans = btrfs_start_transaction(root, 0);
3652 BUG_ON(IS_ERR(trans)); 3650 BUG_ON(IS_ERR(trans));
3653 btrfs_set_trans_block_group(trans, inode);
3654 trans->block_rsv = root->orphan_block_rsv; 3651 trans->block_rsv = root->orphan_block_rsv;
3655 3652
3656 ret = btrfs_block_rsv_check(trans, root, 3653 ret = btrfs_block_rsv_check(trans, root,
@@ -4133,7 +4130,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4133 path = btrfs_alloc_path(); 4130 path = btrfs_alloc_path();
4134 if (!path) 4131 if (!path)
4135 return -ENOMEM; 4132 return -ENOMEM;
4136 path->reada = 2; 4133
4134 path->reada = 1;
4137 4135
4138 if (key_type == BTRFS_DIR_INDEX_KEY) { 4136 if (key_type == BTRFS_DIR_INDEX_KEY) {
4139 INIT_LIST_HEAD(&ins_list); 4137 INIT_LIST_HEAD(&ins_list);
@@ -4268,18 +4266,16 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4268 if (BTRFS_I(inode)->dummy_inode) 4266 if (BTRFS_I(inode)->dummy_inode)
4269 return 0; 4267 return 0;
4270 4268
4271 smp_mb(); 4269 if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
4272 if (root->fs_info->closing && is_free_space_inode(root, inode))
4273 nolock = true; 4270 nolock = true;
4274 4271
4275 if (wbc->sync_mode == WB_SYNC_ALL) { 4272 if (wbc->sync_mode == WB_SYNC_ALL) {
4276 if (nolock) 4273 if (nolock)
4277 trans = btrfs_join_transaction_nolock(root, 1); 4274 trans = btrfs_join_transaction_nolock(root);
4278 else 4275 else
4279 trans = btrfs_join_transaction(root, 1); 4276 trans = btrfs_join_transaction(root);
4280 if (IS_ERR(trans)) 4277 if (IS_ERR(trans))
4281 return PTR_ERR(trans); 4278 return PTR_ERR(trans);
4282 btrfs_set_trans_block_group(trans, inode);
4283 if (nolock) 4279 if (nolock)
4284 ret = btrfs_end_transaction_nolock(trans, root); 4280 ret = btrfs_end_transaction_nolock(trans, root);
4285 else 4281 else
@@ -4303,9 +4299,8 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
4303 if (BTRFS_I(inode)->dummy_inode) 4299 if (BTRFS_I(inode)->dummy_inode)
4304 return; 4300 return;
4305 4301
4306 trans = btrfs_join_transaction(root, 1); 4302 trans = btrfs_join_transaction(root);
4307 BUG_ON(IS_ERR(trans)); 4303 BUG_ON(IS_ERR(trans));
4308 btrfs_set_trans_block_group(trans, inode);
4309 4304
4310 ret = btrfs_update_inode(trans, root, inode); 4305 ret = btrfs_update_inode(trans, root, inode);
4311 if (ret && ret == -ENOSPC) { 4306 if (ret && ret == -ENOSPC) {
@@ -4319,7 +4314,6 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
4319 PTR_ERR(trans)); 4314 PTR_ERR(trans));
4320 return; 4315 return;
4321 } 4316 }
4322 btrfs_set_trans_block_group(trans, inode);
4323 4317
4324 ret = btrfs_update_inode(trans, root, inode); 4318 ret = btrfs_update_inode(trans, root, inode);
4325 if (ret) { 4319 if (ret) {
@@ -4418,8 +4412,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4418 struct btrfs_root *root, 4412 struct btrfs_root *root,
4419 struct inode *dir, 4413 struct inode *dir,
4420 const char *name, int name_len, 4414 const char *name, int name_len,
4421 u64 ref_objectid, u64 objectid, 4415 u64 ref_objectid, u64 objectid, int mode,
4422 u64 alloc_hint, int mode, u64 *index) 4416 u64 *index)
4423{ 4417{
4424 struct inode *inode; 4418 struct inode *inode;
4425 struct btrfs_inode_item *inode_item; 4419 struct btrfs_inode_item *inode_item;
@@ -4472,8 +4466,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4472 owner = 0; 4466 owner = 0;
4473 else 4467 else
4474 owner = 1; 4468 owner = 1;
4475 BTRFS_I(inode)->block_group =
4476 btrfs_find_block_group(root, 0, alloc_hint, owner);
4477 4469
4478 key[0].objectid = objectid; 4470 key[0].objectid = objectid;
4479 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 4471 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -4629,15 +4621,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4629 if (IS_ERR(trans)) 4621 if (IS_ERR(trans))
4630 return PTR_ERR(trans); 4622 return PTR_ERR(trans);
4631 4623
4632 btrfs_set_trans_block_group(trans, dir);
4633
4634 err = btrfs_find_free_ino(root, &objectid); 4624 err = btrfs_find_free_ino(root, &objectid);
4635 if (err) 4625 if (err)
4636 goto out_unlock; 4626 goto out_unlock;
4637 4627
4638 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4628 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4639 dentry->d_name.len, btrfs_ino(dir), objectid, 4629 dentry->d_name.len, btrfs_ino(dir), objectid,
4640 BTRFS_I(dir)->block_group, mode, &index); 4630 mode, &index);
4641 if (IS_ERR(inode)) { 4631 if (IS_ERR(inode)) {
4642 err = PTR_ERR(inode); 4632 err = PTR_ERR(inode);
4643 goto out_unlock; 4633 goto out_unlock;
@@ -4649,7 +4639,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4649 goto out_unlock; 4639 goto out_unlock;
4650 } 4640 }
4651 4641
4652 btrfs_set_trans_block_group(trans, inode);
4653 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4642 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4654 if (err) 4643 if (err)
4655 drop_inode = 1; 4644 drop_inode = 1;
@@ -4658,8 +4647,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4658 init_special_inode(inode, inode->i_mode, rdev); 4647 init_special_inode(inode, inode->i_mode, rdev);
4659 btrfs_update_inode(trans, root, inode); 4648 btrfs_update_inode(trans, root, inode);
4660 } 4649 }
4661 btrfs_update_inode_block_group(trans, inode);
4662 btrfs_update_inode_block_group(trans, dir);
4663out_unlock: 4650out_unlock:
4664 nr = trans->blocks_used; 4651 nr = trans->blocks_used;
4665 btrfs_end_transaction_throttle(trans, root); 4652 btrfs_end_transaction_throttle(trans, root);
@@ -4692,15 +4679,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4692 if (IS_ERR(trans)) 4679 if (IS_ERR(trans))
4693 return PTR_ERR(trans); 4680 return PTR_ERR(trans);
4694 4681
4695 btrfs_set_trans_block_group(trans, dir);
4696
4697 err = btrfs_find_free_ino(root, &objectid); 4682 err = btrfs_find_free_ino(root, &objectid);
4698 if (err) 4683 if (err)
4699 goto out_unlock; 4684 goto out_unlock;
4700 4685
4701 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4686 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4702 dentry->d_name.len, btrfs_ino(dir), objectid, 4687 dentry->d_name.len, btrfs_ino(dir), objectid,
4703 BTRFS_I(dir)->block_group, mode, &index); 4688 mode, &index);
4704 if (IS_ERR(inode)) { 4689 if (IS_ERR(inode)) {
4705 err = PTR_ERR(inode); 4690 err = PTR_ERR(inode);
4706 goto out_unlock; 4691 goto out_unlock;
@@ -4712,7 +4697,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4712 goto out_unlock; 4697 goto out_unlock;
4713 } 4698 }
4714 4699
4715 btrfs_set_trans_block_group(trans, inode);
4716 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4700 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4717 if (err) 4701 if (err)
4718 drop_inode = 1; 4702 drop_inode = 1;
@@ -4723,8 +4707,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4723 inode->i_op = &btrfs_file_inode_operations; 4707 inode->i_op = &btrfs_file_inode_operations;
4724 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 4708 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
4725 } 4709 }
4726 btrfs_update_inode_block_group(trans, inode);
4727 btrfs_update_inode_block_group(trans, dir);
4728out_unlock: 4710out_unlock:
4729 nr = trans->blocks_used; 4711 nr = trans->blocks_used;
4730 btrfs_end_transaction_throttle(trans, root); 4712 btrfs_end_transaction_throttle(trans, root);
@@ -4771,8 +4753,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4771 4753
4772 btrfs_inc_nlink(inode); 4754 btrfs_inc_nlink(inode);
4773 inode->i_ctime = CURRENT_TIME; 4755 inode->i_ctime = CURRENT_TIME;
4774
4775 btrfs_set_trans_block_group(trans, dir);
4776 ihold(inode); 4756 ihold(inode);
4777 4757
4778 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); 4758 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
@@ -4781,7 +4761,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4781 drop_inode = 1; 4761 drop_inode = 1;
4782 } else { 4762 } else {
4783 struct dentry *parent = dget_parent(dentry); 4763 struct dentry *parent = dget_parent(dentry);
4784 btrfs_update_inode_block_group(trans, dir);
4785 err = btrfs_update_inode(trans, root, inode); 4764 err = btrfs_update_inode(trans, root, inode);
4786 BUG_ON(err); 4765 BUG_ON(err);
4787 btrfs_log_new_name(trans, inode, NULL, parent); 4766 btrfs_log_new_name(trans, inode, NULL, parent);
@@ -4818,7 +4797,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4818 trans = btrfs_start_transaction(root, 5); 4797 trans = btrfs_start_transaction(root, 5);
4819 if (IS_ERR(trans)) 4798 if (IS_ERR(trans))
4820 return PTR_ERR(trans); 4799 return PTR_ERR(trans);
4821 btrfs_set_trans_block_group(trans, dir);
4822 4800
4823 err = btrfs_find_free_ino(root, &objectid); 4801 err = btrfs_find_free_ino(root, &objectid);
4824 if (err) 4802 if (err)
@@ -4826,8 +4804,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4826 4804
4827 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4805 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4828 dentry->d_name.len, btrfs_ino(dir), objectid, 4806 dentry->d_name.len, btrfs_ino(dir), objectid,
4829 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4807 S_IFDIR | mode, &index);
4830 &index);
4831 if (IS_ERR(inode)) { 4808 if (IS_ERR(inode)) {
4832 err = PTR_ERR(inode); 4809 err = PTR_ERR(inode);
4833 goto out_fail; 4810 goto out_fail;
@@ -4841,7 +4818,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4841 4818
4842 inode->i_op = &btrfs_dir_inode_operations; 4819 inode->i_op = &btrfs_dir_inode_operations;
4843 inode->i_fop = &btrfs_dir_file_operations; 4820 inode->i_fop = &btrfs_dir_file_operations;
4844 btrfs_set_trans_block_group(trans, inode);
4845 4821
4846 btrfs_i_size_write(inode, 0); 4822 btrfs_i_size_write(inode, 0);
4847 err = btrfs_update_inode(trans, root, inode); 4823 err = btrfs_update_inode(trans, root, inode);
@@ -4855,8 +4831,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4855 4831
4856 d_instantiate(dentry, inode); 4832 d_instantiate(dentry, inode);
4857 drop_on_err = 0; 4833 drop_on_err = 0;
4858 btrfs_update_inode_block_group(trans, inode);
4859 btrfs_update_inode_block_group(trans, dir);
4860 4834
4861out_fail: 4835out_fail:
4862 nr = trans->blocks_used; 4836 nr = trans->blocks_used;
@@ -4989,7 +4963,15 @@ again:
4989 4963
4990 if (!path) { 4964 if (!path) {
4991 path = btrfs_alloc_path(); 4965 path = btrfs_alloc_path();
4992 BUG_ON(!path); 4966 if (!path) {
4967 err = -ENOMEM;
4968 goto out;
4969 }
4970 /*
4971 * Chances are we'll be called again, so go ahead and do
4972 * readahead
4973 */
4974 path->reada = 1;
4993 } 4975 }
4994 4976
4995 ret = btrfs_lookup_file_extent(trans, root, path, 4977 ret = btrfs_lookup_file_extent(trans, root, path,
@@ -5130,8 +5112,10 @@ again:
5130 kunmap(page); 5112 kunmap(page);
5131 free_extent_map(em); 5113 free_extent_map(em);
5132 em = NULL; 5114 em = NULL;
5115
5133 btrfs_release_path(path); 5116 btrfs_release_path(path);
5134 trans = btrfs_join_transaction(root, 1); 5117 trans = btrfs_join_transaction(root);
5118
5135 if (IS_ERR(trans)) 5119 if (IS_ERR(trans))
5136 return ERR_CAST(trans); 5120 return ERR_CAST(trans);
5137 goto again; 5121 goto again;
@@ -5375,7 +5359,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5375 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5359 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5376 } 5360 }
5377 5361
5378 trans = btrfs_join_transaction(root, 0); 5362 trans = btrfs_join_transaction(root);
5379 if (IS_ERR(trans)) 5363 if (IS_ERR(trans))
5380 return ERR_CAST(trans); 5364 return ERR_CAST(trans);
5381 5365
@@ -5611,7 +5595,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5611 * to make sure the current transaction stays open 5595 * to make sure the current transaction stays open
5612 * while we look for nocow cross refs 5596 * while we look for nocow cross refs
5613 */ 5597 */
5614 trans = btrfs_join_transaction(root, 0); 5598 trans = btrfs_join_transaction(root);
5615 if (IS_ERR(trans)) 5599 if (IS_ERR(trans))
5616 goto must_cow; 5600 goto must_cow;
5617 5601
@@ -5750,7 +5734,7 @@ again:
5750 5734
5751 BUG_ON(!ordered); 5735 BUG_ON(!ordered);
5752 5736
5753 trans = btrfs_join_transaction(root, 1); 5737 trans = btrfs_join_transaction(root);
5754 if (IS_ERR(trans)) { 5738 if (IS_ERR(trans)) {
5755 err = -ENOMEM; 5739 err = -ENOMEM;
5756 goto out; 5740 goto out;
@@ -6500,6 +6484,7 @@ out:
6500static int btrfs_truncate(struct inode *inode) 6484static int btrfs_truncate(struct inode *inode)
6501{ 6485{
6502 struct btrfs_root *root = BTRFS_I(inode)->root; 6486 struct btrfs_root *root = BTRFS_I(inode)->root;
6487 struct btrfs_block_rsv *rsv;
6503 int ret; 6488 int ret;
6504 int err = 0; 6489 int err = 0;
6505 struct btrfs_trans_handle *trans; 6490 struct btrfs_trans_handle *trans;
@@ -6513,28 +6498,80 @@ static int btrfs_truncate(struct inode *inode)
6513 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6498 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
6514 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6499 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
6515 6500
6516 trans = btrfs_start_transaction(root, 5); 6501 /*
6517 if (IS_ERR(trans)) 6502 * Yes ladies and gentelment, this is indeed ugly. The fact is we have
6518 return PTR_ERR(trans); 6503 * 3 things going on here
6504 *
6505 * 1) We need to reserve space for our orphan item and the space to
6506 * delete our orphan item. Lord knows we don't want to have a dangling
6507 * orphan item because we didn't reserve space to remove it.
6508 *
6509 * 2) We need to reserve space to update our inode.
6510 *
6511 * 3) We need to have something to cache all the space that is going to
6512 * be free'd up by the truncate operation, but also have some slack
6513 * space reserved in case it uses space during the truncate (thank you
6514 * very much snapshotting).
6515 *
6516 * And we need these to all be seperate. The fact is we can use alot of
6517 * space doing the truncate, and we have no earthly idea how much space
6518 * we will use, so we need the truncate reservation to be seperate so it
6519 * doesn't end up using space reserved for updating the inode or
6520 * removing the orphan item. We also need to be able to stop the
6521 * transaction and start a new one, which means we need to be able to
6522 * update the inode several times, and we have no idea of knowing how
6523 * many times that will be, so we can't just reserve 1 item for the
6524 * entirety of the opration, so that has to be done seperately as well.
6525 * Then there is the orphan item, which does indeed need to be held on
6526 * to for the whole operation, and we need nobody to touch this reserved
6527 * space except the orphan code.
6528 *
6529 * So that leaves us with
6530 *
6531 * 1) root->orphan_block_rsv - for the orphan deletion.
6532 * 2) rsv - for the truncate reservation, which we will steal from the
6533 * transaction reservation.
6534 * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
6535 * updating the inode.
6536 */
6537 rsv = btrfs_alloc_block_rsv(root);
6538 if (!rsv)
6539 return -ENOMEM;
6540 btrfs_add_durable_block_rsv(root->fs_info, rsv);
6541
6542 trans = btrfs_start_transaction(root, 4);
6543 if (IS_ERR(trans)) {
6544 err = PTR_ERR(trans);
6545 goto out;
6546 }
6519 6547
6520 btrfs_set_trans_block_group(trans, inode); 6548 /*
6549 * Reserve space for the truncate process. Truncate should be adding
6550 * space, but if there are snapshots it may end up using space.
6551 */
6552 ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
6553 BUG_ON(ret);
6521 6554
6522 ret = btrfs_orphan_add(trans, inode); 6555 ret = btrfs_orphan_add(trans, inode);
6523 if (ret) { 6556 if (ret) {
6524 btrfs_end_transaction(trans, root); 6557 btrfs_end_transaction(trans, root);
6525 return ret; 6558 goto out;
6526 } 6559 }
6527 6560
6528 nr = trans->blocks_used; 6561 nr = trans->blocks_used;
6529 btrfs_end_transaction(trans, root); 6562 btrfs_end_transaction(trans, root);
6530 btrfs_btree_balance_dirty(root, nr); 6563 btrfs_btree_balance_dirty(root, nr);
6531 6564
6532 /* Now start a transaction for the truncate */ 6565 /*
6533 trans = btrfs_start_transaction(root, 0); 6566 * Ok so we've already migrated our bytes over for the truncate, so here
6534 if (IS_ERR(trans)) 6567 * just reserve the one slot we need for updating the inode.
6535 return PTR_ERR(trans); 6568 */
6536 btrfs_set_trans_block_group(trans, inode); 6569 trans = btrfs_start_transaction(root, 1);
6537 trans->block_rsv = root->orphan_block_rsv; 6570 if (IS_ERR(trans)) {
6571 err = PTR_ERR(trans);
6572 goto out;
6573 }
6574 trans->block_rsv = rsv;
6538 6575
6539 /* 6576 /*
6540 * setattr is responsible for setting the ordered_data_close flag, 6577 * setattr is responsible for setting the ordered_data_close flag,
@@ -6558,24 +6595,17 @@ static int btrfs_truncate(struct inode *inode)
6558 6595
6559 while (1) { 6596 while (1) {
6560 if (!trans) { 6597 if (!trans) {
6561 trans = btrfs_start_transaction(root, 0); 6598 trans = btrfs_start_transaction(root, 3);
6562 if (IS_ERR(trans)) 6599 if (IS_ERR(trans)) {
6563 return PTR_ERR(trans); 6600 err = PTR_ERR(trans);
6564 btrfs_set_trans_block_group(trans, inode); 6601 goto out;
6565 trans->block_rsv = root->orphan_block_rsv; 6602 }
6566 }
6567 6603
6568 ret = btrfs_block_rsv_check(trans, root, 6604 ret = btrfs_truncate_reserve_metadata(trans, root,
6569 root->orphan_block_rsv, 0, 5); 6605 rsv);
6570 if (ret == -EAGAIN) { 6606 BUG_ON(ret);
6571 ret = btrfs_commit_transaction(trans, root); 6607
6572 if (ret) 6608 trans->block_rsv = rsv;
6573 return ret;
6574 trans = NULL;
6575 continue;
6576 } else if (ret) {
6577 err = ret;
6578 break;
6579 } 6609 }
6580 6610
6581 ret = btrfs_truncate_inode_items(trans, root, inode, 6611 ret = btrfs_truncate_inode_items(trans, root, inode,
@@ -6586,6 +6616,7 @@ static int btrfs_truncate(struct inode *inode)
6586 break; 6616 break;
6587 } 6617 }
6588 6618
6619 trans->block_rsv = &root->fs_info->trans_block_rsv;
6589 ret = btrfs_update_inode(trans, root, inode); 6620 ret = btrfs_update_inode(trans, root, inode);
6590 if (ret) { 6621 if (ret) {
6591 err = ret; 6622 err = ret;
@@ -6599,6 +6630,7 @@ static int btrfs_truncate(struct inode *inode)
6599 } 6630 }
6600 6631
6601 if (ret == 0 && inode->i_nlink > 0) { 6632 if (ret == 0 && inode->i_nlink > 0) {
6633 trans->block_rsv = root->orphan_block_rsv;
6602 ret = btrfs_orphan_del(trans, inode); 6634 ret = btrfs_orphan_del(trans, inode);
6603 if (ret) 6635 if (ret)
6604 err = ret; 6636 err = ret;
@@ -6610,15 +6642,20 @@ static int btrfs_truncate(struct inode *inode)
6610 ret = btrfs_orphan_del(NULL, inode); 6642 ret = btrfs_orphan_del(NULL, inode);
6611 } 6643 }
6612 6644
6645 trans->block_rsv = &root->fs_info->trans_block_rsv;
6613 ret = btrfs_update_inode(trans, root, inode); 6646 ret = btrfs_update_inode(trans, root, inode);
6614 if (ret && !err) 6647 if (ret && !err)
6615 err = ret; 6648 err = ret;
6616 6649
6617 nr = trans->blocks_used; 6650 nr = trans->blocks_used;
6618 ret = btrfs_end_transaction_throttle(trans, root); 6651 ret = btrfs_end_transaction_throttle(trans, root);
6652 btrfs_btree_balance_dirty(root, nr);
6653
6654out:
6655 btrfs_free_block_rsv(root, rsv);
6656
6619 if (ret && !err) 6657 if (ret && !err)
6620 err = ret; 6658 err = ret;
6621 btrfs_btree_balance_dirty(root, nr);
6622 6659
6623 return err; 6660 return err;
6624} 6661}
@@ -6627,15 +6664,14 @@ static int btrfs_truncate(struct inode *inode)
6627 * create a new subvolume directory/inode (helper for the ioctl). 6664 * create a new subvolume directory/inode (helper for the ioctl).
6628 */ 6665 */
6629int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 6666int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6630 struct btrfs_root *new_root, 6667 struct btrfs_root *new_root, u64 new_dirid)
6631 u64 new_dirid, u64 alloc_hint)
6632{ 6668{
6633 struct inode *inode; 6669 struct inode *inode;
6634 int err; 6670 int err;
6635 u64 index = 0; 6671 u64 index = 0;
6636 6672
6637 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, 6673 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
6638 new_dirid, alloc_hint, S_IFDIR | 0700, &index); 6674 new_dirid, S_IFDIR | 0700, &index);
6639 if (IS_ERR(inode)) 6675 if (IS_ERR(inode))
6640 return PTR_ERR(inode); 6676 return PTR_ERR(inode);
6641 inode->i_op = &btrfs_dir_inode_operations; 6677 inode->i_op = &btrfs_dir_inode_operations;
@@ -6748,21 +6784,6 @@ void btrfs_destroy_inode(struct inode *inode)
6748 spin_unlock(&root->fs_info->ordered_extent_lock); 6784 spin_unlock(&root->fs_info->ordered_extent_lock);
6749 } 6785 }
6750 6786
6751 if (root == root->fs_info->tree_root) {
6752 struct btrfs_block_group_cache *block_group;
6753
6754 block_group = btrfs_lookup_block_group(root->fs_info,
6755 BTRFS_I(inode)->block_group);
6756 if (block_group && block_group->inode == inode) {
6757 spin_lock(&block_group->lock);
6758 block_group->inode = NULL;
6759 spin_unlock(&block_group->lock);
6760 btrfs_put_block_group(block_group);
6761 } else if (block_group) {
6762 btrfs_put_block_group(block_group);
6763 }
6764 }
6765
6766 spin_lock(&root->orphan_lock); 6787 spin_lock(&root->orphan_lock);
6767 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6788 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6768 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", 6789 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
@@ -6948,8 +6969,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6948 goto out_notrans; 6969 goto out_notrans;
6949 } 6970 }
6950 6971
6951 btrfs_set_trans_block_group(trans, new_dir);
6952
6953 if (dest != root) 6972 if (dest != root)
6954 btrfs_record_root_in_trans(trans, dest); 6973 btrfs_record_root_in_trans(trans, dest);
6955 6974
@@ -7131,16 +7150,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7131 if (IS_ERR(trans)) 7150 if (IS_ERR(trans))
7132 return PTR_ERR(trans); 7151 return PTR_ERR(trans);
7133 7152
7134 btrfs_set_trans_block_group(trans, dir);
7135
7136 err = btrfs_find_free_ino(root, &objectid); 7153 err = btrfs_find_free_ino(root, &objectid);
7137 if (err) 7154 if (err)
7138 goto out_unlock; 7155 goto out_unlock;
7139 7156
7140 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 7157 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
7141 dentry->d_name.len, btrfs_ino(dir), objectid, 7158 dentry->d_name.len, btrfs_ino(dir), objectid,
7142 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 7159 S_IFLNK|S_IRWXUGO, &index);
7143 &index);
7144 if (IS_ERR(inode)) { 7160 if (IS_ERR(inode)) {
7145 err = PTR_ERR(inode); 7161 err = PTR_ERR(inode);
7146 goto out_unlock; 7162 goto out_unlock;
@@ -7152,7 +7168,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7152 goto out_unlock; 7168 goto out_unlock;
7153 } 7169 }
7154 7170
7155 btrfs_set_trans_block_group(trans, inode);
7156 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 7171 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
7157 if (err) 7172 if (err)
7158 drop_inode = 1; 7173 drop_inode = 1;
@@ -7163,8 +7178,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7163 inode->i_op = &btrfs_file_inode_operations; 7178 inode->i_op = &btrfs_file_inode_operations;
7164 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 7179 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
7165 } 7180 }
7166 btrfs_update_inode_block_group(trans, inode);
7167 btrfs_update_inode_block_group(trans, dir);
7168 if (drop_inode) 7181 if (drop_inode)
7169 goto out_unlock; 7182 goto out_unlock;
7170 7183
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 85e818ce00c5..ac37040e426a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -243,7 +243,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
243 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 243 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
244 } 244 }
245 245
246 trans = btrfs_join_transaction(root, 1); 246 trans = btrfs_join_transaction(root);
247 BUG_ON(IS_ERR(trans)); 247 BUG_ON(IS_ERR(trans));
248 248
249 ret = btrfs_update_inode(trans, root, inode); 249 ret = btrfs_update_inode(trans, root, inode);
@@ -414,8 +414,7 @@ static noinline int create_subvol(struct btrfs_root *root,
414 414
415 btrfs_record_root_in_trans(trans, new_root); 415 btrfs_record_root_in_trans(trans, new_root);
416 416
417 ret = btrfs_create_subvol_root(trans, new_root, new_dirid, 417 ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
418 BTRFS_I(dir)->block_group);
419 /* 418 /*
420 * insert the directory item 419 * insert the directory item
421 */ 420 */
@@ -707,16 +706,17 @@ static int find_new_extents(struct btrfs_root *root,
707 struct btrfs_file_extent_item *extent; 706 struct btrfs_file_extent_item *extent;
708 int type; 707 int type;
709 int ret; 708 int ret;
709 u64 ino = btrfs_ino(inode);
710 710
711 path = btrfs_alloc_path(); 711 path = btrfs_alloc_path();
712 if (!path) 712 if (!path)
713 return -ENOMEM; 713 return -ENOMEM;
714 714
715 min_key.objectid = inode->i_ino; 715 min_key.objectid = ino;
716 min_key.type = BTRFS_EXTENT_DATA_KEY; 716 min_key.type = BTRFS_EXTENT_DATA_KEY;
717 min_key.offset = *off; 717 min_key.offset = *off;
718 718
719 max_key.objectid = inode->i_ino; 719 max_key.objectid = ino;
720 max_key.type = (u8)-1; 720 max_key.type = (u8)-1;
721 max_key.offset = (u64)-1; 721 max_key.offset = (u64)-1;
722 722
@@ -727,7 +727,7 @@ static int find_new_extents(struct btrfs_root *root,
727 path, 0, newer_than); 727 path, 0, newer_than);
728 if (ret != 0) 728 if (ret != 0)
729 goto none; 729 goto none;
730 if (min_key.objectid != inode->i_ino) 730 if (min_key.objectid != ino)
731 goto none; 731 goto none;
732 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 732 if (min_key.type != BTRFS_EXTENT_DATA_KEY)
733 goto none; 733 goto none;
@@ -2489,12 +2489,10 @@ static long btrfs_ioctl_trans_start(struct file *file)
2489 if (ret) 2489 if (ret)
2490 goto out; 2490 goto out;
2491 2491
2492 mutex_lock(&root->fs_info->trans_mutex); 2492 atomic_inc(&root->fs_info->open_ioctl_trans);
2493 root->fs_info->open_ioctl_trans++;
2494 mutex_unlock(&root->fs_info->trans_mutex);
2495 2493
2496 ret = -ENOMEM; 2494 ret = -ENOMEM;
2497 trans = btrfs_start_ioctl_transaction(root, 0); 2495 trans = btrfs_start_ioctl_transaction(root);
2498 if (IS_ERR(trans)) 2496 if (IS_ERR(trans))
2499 goto out_drop; 2497 goto out_drop;
2500 2498
@@ -2502,9 +2500,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2502 return 0; 2500 return 0;
2503 2501
2504out_drop: 2502out_drop:
2505 mutex_lock(&root->fs_info->trans_mutex); 2503 atomic_dec(&root->fs_info->open_ioctl_trans);
2506 root->fs_info->open_ioctl_trans--;
2507 mutex_unlock(&root->fs_info->trans_mutex);
2508 mnt_drop_write(file->f_path.mnt); 2504 mnt_drop_write(file->f_path.mnt);
2509out: 2505out:
2510 return ret; 2506 return ret;
@@ -2738,9 +2734,7 @@ long btrfs_ioctl_trans_end(struct file *file)
2738 2734
2739 btrfs_end_transaction(trans, root); 2735 btrfs_end_transaction(trans, root);
2740 2736
2741 mutex_lock(&root->fs_info->trans_mutex); 2737 atomic_dec(&root->fs_info->open_ioctl_trans);
2742 root->fs_info->open_ioctl_trans--;
2743 mutex_unlock(&root->fs_info->trans_mutex);
2744 2738
2745 mnt_drop_write(file->f_path.mnt); 2739 mnt_drop_write(file->f_path.mnt);
2746 return 0; 2740 return 0;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ca38eca70af0..b1ef27cc673b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -677,6 +677,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
677 err = -ENOMEM; 677 err = -ENOMEM;
678 goto out; 678 goto out;
679 } 679 }
680 path1->reada = 1;
681 path2->reada = 2;
680 682
681 node = alloc_backref_node(cache); 683 node = alloc_backref_node(cache);
682 if (!node) { 684 if (!node) {
@@ -1999,6 +2001,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1999 path = btrfs_alloc_path(); 2001 path = btrfs_alloc_path();
2000 if (!path) 2002 if (!path)
2001 return -ENOMEM; 2003 return -ENOMEM;
2004 path->reada = 1;
2002 2005
2003 reloc_root = root->reloc_root; 2006 reloc_root = root->reloc_root;
2004 root_item = &reloc_root->root_item; 2007 root_item = &reloc_root->root_item;
@@ -2139,10 +2142,10 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2139 u64 num_bytes = 0; 2142 u64 num_bytes = 0;
2140 int ret; 2143 int ret;
2141 2144
2142 mutex_lock(&root->fs_info->trans_mutex); 2145 spin_lock(&root->fs_info->trans_lock);
2143 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2146 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2144 rc->merging_rsv_size += rc->nodes_relocated * 2; 2147 rc->merging_rsv_size += rc->nodes_relocated * 2;
2145 mutex_unlock(&root->fs_info->trans_mutex); 2148 spin_unlock(&root->fs_info->trans_lock);
2146again: 2149again:
2147 if (!err) { 2150 if (!err) {
2148 num_bytes = rc->merging_rsv_size; 2151 num_bytes = rc->merging_rsv_size;
@@ -2152,7 +2155,7 @@ again:
2152 err = ret; 2155 err = ret;
2153 } 2156 }
2154 2157
2155 trans = btrfs_join_transaction(rc->extent_root, 1); 2158 trans = btrfs_join_transaction(rc->extent_root);
2156 if (IS_ERR(trans)) { 2159 if (IS_ERR(trans)) {
2157 if (!err) 2160 if (!err)
2158 btrfs_block_rsv_release(rc->extent_root, 2161 btrfs_block_rsv_release(rc->extent_root,
@@ -2211,9 +2214,9 @@ int merge_reloc_roots(struct reloc_control *rc)
2211 int ret; 2214 int ret;
2212again: 2215again:
2213 root = rc->extent_root; 2216 root = rc->extent_root;
2214 mutex_lock(&root->fs_info->trans_mutex); 2217 spin_lock(&root->fs_info->trans_lock);
2215 list_splice_init(&rc->reloc_roots, &reloc_roots); 2218 list_splice_init(&rc->reloc_roots, &reloc_roots);
2216 mutex_unlock(&root->fs_info->trans_mutex); 2219 spin_unlock(&root->fs_info->trans_lock);
2217 2220
2218 while (!list_empty(&reloc_roots)) { 2221 while (!list_empty(&reloc_roots)) {
2219 found = 1; 2222 found = 1;
@@ -3236,7 +3239,7 @@ truncate:
3236 goto out; 3239 goto out;
3237 } 3240 }
3238 3241
3239 trans = btrfs_join_transaction(root, 0); 3242 trans = btrfs_join_transaction(root);
3240 if (IS_ERR(trans)) { 3243 if (IS_ERR(trans)) {
3241 btrfs_free_path(path); 3244 btrfs_free_path(path);
3242 ret = PTR_ERR(trans); 3245 ret = PTR_ERR(trans);
@@ -3300,6 +3303,7 @@ static int find_data_references(struct reloc_control *rc,
3300 path = btrfs_alloc_path(); 3303 path = btrfs_alloc_path();
3301 if (!path) 3304 if (!path)
3302 return -ENOMEM; 3305 return -ENOMEM;
3306 path->reada = 1;
3303 3307
3304 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3308 root = read_fs_root(rc->extent_root->fs_info, ref_root);
3305 if (IS_ERR(root)) { 3309 if (IS_ERR(root)) {
@@ -3586,17 +3590,17 @@ next:
3586static void set_reloc_control(struct reloc_control *rc) 3590static void set_reloc_control(struct reloc_control *rc)
3587{ 3591{
3588 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3592 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3589 mutex_lock(&fs_info->trans_mutex); 3593 spin_lock(&fs_info->trans_lock);
3590 fs_info->reloc_ctl = rc; 3594 fs_info->reloc_ctl = rc;
3591 mutex_unlock(&fs_info->trans_mutex); 3595 spin_unlock(&fs_info->trans_lock);
3592} 3596}
3593 3597
3594static void unset_reloc_control(struct reloc_control *rc) 3598static void unset_reloc_control(struct reloc_control *rc)
3595{ 3599{
3596 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3600 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3597 mutex_lock(&fs_info->trans_mutex); 3601 spin_lock(&fs_info->trans_lock);
3598 fs_info->reloc_ctl = NULL; 3602 fs_info->reloc_ctl = NULL;
3599 mutex_unlock(&fs_info->trans_mutex); 3603 spin_unlock(&fs_info->trans_lock);
3600} 3604}
3601 3605
3602static int check_extent_flags(u64 flags) 3606static int check_extent_flags(u64 flags)
@@ -3645,7 +3649,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3645 rc->create_reloc_tree = 1; 3649 rc->create_reloc_tree = 1;
3646 set_reloc_control(rc); 3650 set_reloc_control(rc);
3647 3651
3648 trans = btrfs_join_transaction(rc->extent_root, 1); 3652 trans = btrfs_join_transaction(rc->extent_root);
3649 BUG_ON(IS_ERR(trans)); 3653 BUG_ON(IS_ERR(trans));
3650 btrfs_commit_transaction(trans, rc->extent_root); 3654 btrfs_commit_transaction(trans, rc->extent_root);
3651 return 0; 3655 return 0;
@@ -3668,6 +3672,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3668 path = btrfs_alloc_path(); 3672 path = btrfs_alloc_path();
3669 if (!path) 3673 if (!path)
3670 return -ENOMEM; 3674 return -ENOMEM;
3675 path->reada = 1;
3671 3676
3672 ret = prepare_to_relocate(rc); 3677 ret = prepare_to_relocate(rc);
3673 if (ret) { 3678 if (ret) {
@@ -3834,7 +3839,7 @@ restart:
3834 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 3839 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
3835 3840
3836 /* get rid of pinned extents */ 3841 /* get rid of pinned extents */
3837 trans = btrfs_join_transaction(rc->extent_root, 1); 3842 trans = btrfs_join_transaction(rc->extent_root);
3838 if (IS_ERR(trans)) 3843 if (IS_ERR(trans))
3839 err = PTR_ERR(trans); 3844 err = PTR_ERR(trans);
3840 else 3845 else
@@ -4093,6 +4098,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4093 path = btrfs_alloc_path(); 4098 path = btrfs_alloc_path();
4094 if (!path) 4099 if (!path)
4095 return -ENOMEM; 4100 return -ENOMEM;
4101 path->reada = -1;
4096 4102
4097 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 4103 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4098 key.type = BTRFS_ROOT_ITEM_KEY; 4104 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -4159,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4159 4165
4160 set_reloc_control(rc); 4166 set_reloc_control(rc);
4161 4167
4162 trans = btrfs_join_transaction(rc->extent_root, 1); 4168 trans = btrfs_join_transaction(rc->extent_root);
4163 if (IS_ERR(trans)) { 4169 if (IS_ERR(trans)) {
4164 unset_reloc_control(rc); 4170 unset_reloc_control(rc);
4165 err = PTR_ERR(trans); 4171 err = PTR_ERR(trans);
@@ -4193,7 +4199,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4193 4199
4194 unset_reloc_control(rc); 4200 unset_reloc_control(rc);
4195 4201
4196 trans = btrfs_join_transaction(rc->extent_root, 1); 4202 trans = btrfs_join_transaction(rc->extent_root);
4197 if (IS_ERR(trans)) 4203 if (IS_ERR(trans))
4198 err = PTR_ERR(trans); 4204 err = PTR_ERR(trans);
4199 else 4205 else
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6dfed0c27ac3..df50fd1eca8f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -117,33 +117,37 @@ static void scrub_free_csums(struct scrub_dev *sdev)
117 } 117 }
118} 118}
119 119
120static void scrub_free_bio(struct bio *bio)
121{
122 int i;
123 struct page *last_page = NULL;
124
125 if (!bio)
126 return;
127
128 for (i = 0; i < bio->bi_vcnt; ++i) {
129 if (bio->bi_io_vec[i].bv_page == last_page)
130 continue;
131 last_page = bio->bi_io_vec[i].bv_page;
132 __free_page(last_page);
133 }
134 bio_put(bio);
135}
136
120static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) 137static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
121{ 138{
122 int i; 139 int i;
123 int j;
124 struct page *last_page;
125 140
126 if (!sdev) 141 if (!sdev)
127 return; 142 return;
128 143
129 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 144 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
130 struct scrub_bio *sbio = sdev->bios[i]; 145 struct scrub_bio *sbio = sdev->bios[i];
131 struct bio *bio;
132 146
133 if (!sbio) 147 if (!sbio)
134 break; 148 break;
135 149
136 bio = sbio->bio; 150 scrub_free_bio(sbio->bio);
137 if (bio) {
138 last_page = NULL;
139 for (j = 0; j < bio->bi_vcnt; ++j) {
140 if (bio->bi_io_vec[j].bv_page == last_page)
141 continue;
142 last_page = bio->bi_io_vec[j].bv_page;
143 __free_page(last_page);
144 }
145 bio_put(bio);
146 }
147 kfree(sbio); 151 kfree(sbio);
148 } 152 }
149 153
@@ -156,8 +160,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
156{ 160{
157 struct scrub_dev *sdev; 161 struct scrub_dev *sdev;
158 int i; 162 int i;
159 int j;
160 int ret;
161 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 163 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
162 164
163 sdev = kzalloc(sizeof(*sdev), GFP_NOFS); 165 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
@@ -165,7 +167,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
165 goto nomem; 167 goto nomem;
166 sdev->dev = dev; 168 sdev->dev = dev;
167 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 169 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
168 struct bio *bio;
169 struct scrub_bio *sbio; 170 struct scrub_bio *sbio;
170 171
171 sbio = kzalloc(sizeof(*sbio), GFP_NOFS); 172 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
@@ -173,32 +174,10 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
173 goto nomem; 174 goto nomem;
174 sdev->bios[i] = sbio; 175 sdev->bios[i] = sbio;
175 176
176 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
177 if (!bio)
178 goto nomem;
179
180 sbio->index = i; 177 sbio->index = i;
181 sbio->sdev = sdev; 178 sbio->sdev = sdev;
182 sbio->bio = bio;
183 sbio->count = 0; 179 sbio->count = 0;
184 sbio->work.func = scrub_checksum; 180 sbio->work.func = scrub_checksum;
185 bio->bi_private = sdev->bios[i];
186 bio->bi_end_io = scrub_bio_end_io;
187 bio->bi_sector = 0;
188 bio->bi_bdev = dev->bdev;
189 bio->bi_size = 0;
190
191 for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
192 struct page *page;
193 page = alloc_page(GFP_NOFS);
194 if (!page)
195 goto nomem;
196
197 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
198 if (!ret)
199 goto nomem;
200 }
201 WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
202 181
203 if (i != SCRUB_BIOS_PER_DEV-1) 182 if (i != SCRUB_BIOS_PER_DEV-1)
204 sdev->bios[i]->next_free = i + 1; 183 sdev->bios[i]->next_free = i + 1;
@@ -369,9 +348,6 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
369 int ret; 348 int ret;
370 DECLARE_COMPLETION_ONSTACK(complete); 349 DECLARE_COMPLETION_ONSTACK(complete);
371 350
372 /* we are going to wait on this IO */
373 rw |= REQ_SYNC;
374
375 bio = bio_alloc(GFP_NOFS, 1); 351 bio = bio_alloc(GFP_NOFS, 1);
376 bio->bi_bdev = bdev; 352 bio->bi_bdev = bdev;
377 bio->bi_sector = sector; 353 bio->bi_sector = sector;
@@ -380,6 +356,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
380 bio->bi_private = &complete; 356 bio->bi_private = &complete;
381 submit_bio(rw, bio); 357 submit_bio(rw, bio);
382 358
359 /* this will also unplug the queue */
383 wait_for_completion(&complete); 360 wait_for_completion(&complete);
384 361
385 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); 362 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -394,6 +371,7 @@ static void scrub_bio_end_io(struct bio *bio, int err)
394 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 371 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
395 372
396 sbio->err = err; 373 sbio->err = err;
374 sbio->bio = bio;
397 375
398 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); 376 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
399} 377}
@@ -453,6 +431,8 @@ static void scrub_checksum(struct btrfs_work *work)
453 } 431 }
454 432
455out: 433out:
434 scrub_free_bio(sbio->bio);
435 sbio->bio = NULL;
456 spin_lock(&sdev->list_lock); 436 spin_lock(&sdev->list_lock);
457 sbio->next_free = sdev->first_free; 437 sbio->next_free = sdev->first_free;
458 sdev->first_free = sbio->index; 438 sdev->first_free = sbio->index;
@@ -583,25 +563,50 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
583static int scrub_submit(struct scrub_dev *sdev) 563static int scrub_submit(struct scrub_dev *sdev)
584{ 564{
585 struct scrub_bio *sbio; 565 struct scrub_bio *sbio;
566 struct bio *bio;
567 int i;
586 568
587 if (sdev->curr == -1) 569 if (sdev->curr == -1)
588 return 0; 570 return 0;
589 571
590 sbio = sdev->bios[sdev->curr]; 572 sbio = sdev->bios[sdev->curr];
591 573
592 sbio->bio->bi_sector = sbio->physical >> 9; 574 bio = bio_alloc(GFP_NOFS, sbio->count);
593 sbio->bio->bi_size = sbio->count * PAGE_SIZE; 575 if (!bio)
594 sbio->bio->bi_next = NULL; 576 goto nomem;
595 sbio->bio->bi_flags |= 1 << BIO_UPTODATE; 577
596 sbio->bio->bi_comp_cpu = -1; 578 bio->bi_private = sbio;
597 sbio->bio->bi_bdev = sdev->dev->bdev; 579 bio->bi_end_io = scrub_bio_end_io;
580 bio->bi_bdev = sdev->dev->bdev;
581 bio->bi_sector = sbio->physical >> 9;
582
583 for (i = 0; i < sbio->count; ++i) {
584 struct page *page;
585 int ret;
586
587 page = alloc_page(GFP_NOFS);
588 if (!page)
589 goto nomem;
590
591 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
592 if (!ret) {
593 __free_page(page);
594 goto nomem;
595 }
596 }
597
598 sbio->err = 0; 598 sbio->err = 0;
599 sdev->curr = -1; 599 sdev->curr = -1;
600 atomic_inc(&sdev->in_flight); 600 atomic_inc(&sdev->in_flight);
601 601
602 submit_bio(0, sbio->bio); 602 submit_bio(READ, bio);
603 603
604 return 0; 604 return 0;
605
606nomem:
607 scrub_free_bio(bio);
608
609 return -ENOMEM;
605} 610}
606 611
607static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, 612static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
@@ -633,7 +638,11 @@ again:
633 sbio->logical = logical; 638 sbio->logical = logical;
634 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || 639 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
635 sbio->logical + sbio->count * PAGE_SIZE != logical) { 640 sbio->logical + sbio->count * PAGE_SIZE != logical) {
636 scrub_submit(sdev); 641 int ret;
642
643 ret = scrub_submit(sdev);
644 if (ret)
645 return ret;
637 goto again; 646 goto again;
638 } 647 }
639 sbio->spag[sbio->count].flags = flags; 648 sbio->spag[sbio->count].flags = flags;
@@ -645,8 +654,13 @@ again:
645 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); 654 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
646 } 655 }
647 ++sbio->count; 656 ++sbio->count;
648 if (sbio->count == SCRUB_PAGES_PER_BIO || force) 657 if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
649 scrub_submit(sdev); 658 int ret;
659
660 ret = scrub_submit(sdev);
661 if (ret)
662 return ret;
663 }
650 664
651 return 0; 665 return 0;
652} 666}
@@ -727,6 +741,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
727 struct btrfs_root *root = fs_info->extent_root; 741 struct btrfs_root *root = fs_info->extent_root;
728 struct btrfs_root *csum_root = fs_info->csum_root; 742 struct btrfs_root *csum_root = fs_info->csum_root;
729 struct btrfs_extent_item *extent; 743 struct btrfs_extent_item *extent;
744 struct blk_plug plug;
730 u64 flags; 745 u64 flags;
731 int ret; 746 int ret;
732 int slot; 747 int slot;
@@ -831,6 +846,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
831 * the scrub. This might currently (crc32) end up to be about 1MB 846 * the scrub. This might currently (crc32) end up to be about 1MB
832 */ 847 */
833 start_stripe = 0; 848 start_stripe = 0;
849 blk_start_plug(&plug);
834again: 850again:
835 logical = base + offset + start_stripe * increment; 851 logical = base + offset + start_stripe * increment;
836 for (i = start_stripe; i < nstripes; ++i) { 852 for (i = start_stripe; i < nstripes; ++i) {
@@ -972,6 +988,7 @@ next:
972 scrub_submit(sdev); 988 scrub_submit(sdev);
973 989
974out: 990out:
991 blk_finish_plug(&plug);
975 btrfs_free_path(path); 992 btrfs_free_path(path);
976 return ret < 0 ? ret : 0; 993 return ret < 0 ? ret : 0;
977} 994}
@@ -1166,7 +1183,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1166 int ret; 1183 int ret;
1167 struct btrfs_device *dev; 1184 struct btrfs_device *dev;
1168 1185
1169 if (root->fs_info->closing) 1186 if (btrfs_fs_closing(root->fs_info))
1170 return -EINVAL; 1187 return -EINVAL;
1171 1188
1172 /* 1189 /*
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9b2e7e5bc3ef..117e74e3604b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -161,7 +161,8 @@ enum {
161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err, 164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
165 Opt_inode_cache, Opt_err,
165}; 166};
166 167
167static match_table_t tokens = { 168static match_table_t tokens = {
@@ -193,6 +194,7 @@ static match_table_t tokens = {
193 {Opt_enospc_debug, "enospc_debug"}, 194 {Opt_enospc_debug, "enospc_debug"},
194 {Opt_subvolrootid, "subvolrootid=%d"}, 195 {Opt_subvolrootid, "subvolrootid=%d"},
195 {Opt_defrag, "autodefrag"}, 196 {Opt_defrag, "autodefrag"},
197 {Opt_inode_cache, "inode_cache"},
196 {Opt_err, NULL}, 198 {Opt_err, NULL},
197}; 199};
198 200
@@ -361,6 +363,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
361 printk(KERN_INFO "btrfs: enabling disk space caching\n"); 363 printk(KERN_INFO "btrfs: enabling disk space caching\n");
362 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 364 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
363 break; 365 break;
366 case Opt_inode_cache:
367 printk(KERN_INFO "btrfs: enabling inode map caching\n");
368 btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
369 break;
364 case Opt_clear_cache: 370 case Opt_clear_cache:
365 printk(KERN_INFO "btrfs: force clearing of disk cache\n"); 371 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
366 btrfs_set_opt(info->mount_opt, CLEAR_CACHE); 372 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dc80f7156923..dd719662340e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -35,6 +35,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
35{ 35{
36 WARN_ON(atomic_read(&transaction->use_count) == 0); 36 WARN_ON(atomic_read(&transaction->use_count) == 0);
37 if (atomic_dec_and_test(&transaction->use_count)) { 37 if (atomic_dec_and_test(&transaction->use_count)) {
38 BUG_ON(!list_empty(&transaction->list));
38 memset(transaction, 0, sizeof(*transaction)); 39 memset(transaction, 0, sizeof(*transaction));
39 kmem_cache_free(btrfs_transaction_cachep, transaction); 40 kmem_cache_free(btrfs_transaction_cachep, transaction);
40 } 41 }
@@ -49,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root)
49/* 50/*
50 * either allocate a new transaction or hop into the existing one 51 * either allocate a new transaction or hop into the existing one
51 */ 52 */
52static noinline int join_transaction(struct btrfs_root *root) 53static noinline int join_transaction(struct btrfs_root *root, int nofail)
53{ 54{
54 struct btrfs_transaction *cur_trans; 55 struct btrfs_transaction *cur_trans;
56
57 spin_lock(&root->fs_info->trans_lock);
58 if (root->fs_info->trans_no_join) {
59 if (!nofail) {
60 spin_unlock(&root->fs_info->trans_lock);
61 return -EBUSY;
62 }
63 }
64
55 cur_trans = root->fs_info->running_transaction; 65 cur_trans = root->fs_info->running_transaction;
56 if (!cur_trans) { 66 if (cur_trans) {
57 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 67 atomic_inc(&cur_trans->use_count);
58 GFP_NOFS);
59 if (!cur_trans)
60 return -ENOMEM;
61 root->fs_info->generation++;
62 atomic_set(&cur_trans->num_writers, 1);
63 cur_trans->num_joined = 0;
64 cur_trans->transid = root->fs_info->generation;
65 init_waitqueue_head(&cur_trans->writer_wait);
66 init_waitqueue_head(&cur_trans->commit_wait);
67 cur_trans->in_commit = 0;
68 cur_trans->blocked = 0;
69 atomic_set(&cur_trans->use_count, 1);
70 cur_trans->commit_done = 0;
71 cur_trans->start_time = get_seconds();
72
73 cur_trans->delayed_refs.root = RB_ROOT;
74 cur_trans->delayed_refs.num_entries = 0;
75 cur_trans->delayed_refs.num_heads_ready = 0;
76 cur_trans->delayed_refs.num_heads = 0;
77 cur_trans->delayed_refs.flushing = 0;
78 cur_trans->delayed_refs.run_delayed_start = 0;
79 spin_lock_init(&cur_trans->delayed_refs.lock);
80
81 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
82 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
83 extent_io_tree_init(&cur_trans->dirty_pages,
84 root->fs_info->btree_inode->i_mapping);
85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock);
88 } else {
89 atomic_inc(&cur_trans->num_writers); 68 atomic_inc(&cur_trans->num_writers);
90 cur_trans->num_joined++; 69 cur_trans->num_joined++;
70 spin_unlock(&root->fs_info->trans_lock);
71 return 0;
91 } 72 }
73 spin_unlock(&root->fs_info->trans_lock);
74
75 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
76 if (!cur_trans)
77 return -ENOMEM;
78 spin_lock(&root->fs_info->trans_lock);
79 if (root->fs_info->running_transaction) {
80 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
81 cur_trans = root->fs_info->running_transaction;
82 atomic_inc(&cur_trans->use_count);
83 atomic_inc(&cur_trans->num_writers);
84 cur_trans->num_joined++;
85 spin_unlock(&root->fs_info->trans_lock);
86 return 0;
87 }
88 atomic_set(&cur_trans->num_writers, 1);
89 cur_trans->num_joined = 0;
90 init_waitqueue_head(&cur_trans->writer_wait);
91 init_waitqueue_head(&cur_trans->commit_wait);
92 cur_trans->in_commit = 0;
93 cur_trans->blocked = 0;
94 /*
95 * One for this trans handle, one so it will live on until we
96 * commit the transaction.
97 */
98 atomic_set(&cur_trans->use_count, 2);
99 cur_trans->commit_done = 0;
100 cur_trans->start_time = get_seconds();
101
102 cur_trans->delayed_refs.root = RB_ROOT;
103 cur_trans->delayed_refs.num_entries = 0;
104 cur_trans->delayed_refs.num_heads_ready = 0;
105 cur_trans->delayed_refs.num_heads = 0;
106 cur_trans->delayed_refs.flushing = 0;
107 cur_trans->delayed_refs.run_delayed_start = 0;
108 spin_lock_init(&cur_trans->commit_lock);
109 spin_lock_init(&cur_trans->delayed_refs.lock);
110
111 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
112 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
113 extent_io_tree_init(&cur_trans->dirty_pages,
114 root->fs_info->btree_inode->i_mapping);
115 root->fs_info->generation++;
116 cur_trans->transid = root->fs_info->generation;
117 root->fs_info->running_transaction = cur_trans;
118 spin_unlock(&root->fs_info->trans_lock);
92 119
93 return 0; 120 return 0;
94} 121}
@@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root)
99 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
100 * when the transaction commits 127 * when the transaction commits
101 */ 128 */
102static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 129int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root) 130 struct btrfs_root *root)
104{ 131{
105 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
106 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
107 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
108 135
136 spin_lock(&root->fs_info->fs_roots_radix_lock);
137 if (root->last_trans == trans->transid) {
138 spin_unlock(&root->fs_info->fs_roots_radix_lock);
139 return 0;
140 }
141 root->last_trans = trans->transid;
109 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 142 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
110 (unsigned long)root->root_key.objectid, 143 (unsigned long)root->root_key.objectid,
111 BTRFS_ROOT_TRANS_TAG); 144 BTRFS_ROOT_TRANS_TAG);
112 root->last_trans = trans->transid; 145 spin_unlock(&root->fs_info->fs_roots_radix_lock);
113 btrfs_init_reloc_root(trans, root); 146 btrfs_init_reloc_root(trans, root);
114 } 147 }
115 return 0; 148 return 0;
116} 149}
117 150
118int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
119 struct btrfs_root *root)
120{
121 if (!root->ref_cows)
122 return 0;
123
124 mutex_lock(&root->fs_info->trans_mutex);
125 if (root->last_trans == trans->transid) {
126 mutex_unlock(&root->fs_info->trans_mutex);
127 return 0;
128 }
129
130 record_root_in_trans(trans, root);
131 mutex_unlock(&root->fs_info->trans_mutex);
132 return 0;
133}
134
135/* wait for commit against the current transaction to become unblocked 151/* wait for commit against the current transaction to become unblocked
136 * when this is done, it is safe to start a new transaction, but the current 152 * when this is done, it is safe to start a new transaction, but the current
137 * transaction might not be fully on disk. 153 * transaction might not be fully on disk.
@@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root)
140{ 156{
141 struct btrfs_transaction *cur_trans; 157 struct btrfs_transaction *cur_trans;
142 158
159 spin_lock(&root->fs_info->trans_lock);
143 cur_trans = root->fs_info->running_transaction; 160 cur_trans = root->fs_info->running_transaction;
144 if (cur_trans && cur_trans->blocked) { 161 if (cur_trans && cur_trans->blocked) {
145 DEFINE_WAIT(wait); 162 DEFINE_WAIT(wait);
146 atomic_inc(&cur_trans->use_count); 163 atomic_inc(&cur_trans->use_count);
164 spin_unlock(&root->fs_info->trans_lock);
147 while (1) { 165 while (1) {
148 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 166 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
149 TASK_UNINTERRUPTIBLE); 167 TASK_UNINTERRUPTIBLE);
150 if (!cur_trans->blocked) 168 if (!cur_trans->blocked)
151 break; 169 break;
152 mutex_unlock(&root->fs_info->trans_mutex);
153 schedule(); 170 schedule();
154 mutex_lock(&root->fs_info->trans_mutex);
155 } 171 }
156 finish_wait(&root->fs_info->transaction_wait, &wait); 172 finish_wait(&root->fs_info->transaction_wait, &wait);
157 put_transaction(cur_trans); 173 put_transaction(cur_trans);
174 } else {
175 spin_unlock(&root->fs_info->trans_lock);
158 } 176 }
159} 177}
160 178
@@ -167,10 +185,16 @@ enum btrfs_trans_type {
167 185
168static int may_wait_transaction(struct btrfs_root *root, int type) 186static int may_wait_transaction(struct btrfs_root *root, int type)
169{ 187{
170 if (!root->fs_info->log_root_recovering && 188 if (root->fs_info->log_root_recovering)
171 ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || 189 return 0;
172 type == TRANS_USERSPACE)) 190
191 if (type == TRANS_USERSPACE)
192 return 1;
193
194 if (type == TRANS_START &&
195 !atomic_read(&root->fs_info->open_ioctl_trans))
173 return 1; 196 return 1;
197
174 return 0; 198 return 0;
175} 199}
176 200
@@ -184,36 +208,44 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
184 208
185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 209 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
186 return ERR_PTR(-EROFS); 210 return ERR_PTR(-EROFS);
211
212 if (current->journal_info) {
213 WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
214 h = current->journal_info;
215 h->use_count++;
216 h->orig_rsv = h->block_rsv;
217 h->block_rsv = NULL;
218 goto got_it;
219 }
187again: 220again:
188 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 221 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
189 if (!h) 222 if (!h)
190 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
191 224
192 if (type != TRANS_JOIN_NOLOCK)
193 mutex_lock(&root->fs_info->trans_mutex);
194 if (may_wait_transaction(root, type)) 225 if (may_wait_transaction(root, type))
195 wait_current_trans(root); 226 wait_current_trans(root);
196 227
197 ret = join_transaction(root); 228 do {
229 ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
230 if (ret == -EBUSY)
231 wait_current_trans(root);
232 } while (ret == -EBUSY);
233
198 if (ret < 0) { 234 if (ret < 0) {
199 kmem_cache_free(btrfs_trans_handle_cachep, h); 235 kmem_cache_free(btrfs_trans_handle_cachep, h);
200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex);
202 return ERR_PTR(ret); 236 return ERR_PTR(ret);
203 } 237 }
204 238
205 cur_trans = root->fs_info->running_transaction; 239 cur_trans = root->fs_info->running_transaction;
206 atomic_inc(&cur_trans->use_count);
207 if (type != TRANS_JOIN_NOLOCK)
208 mutex_unlock(&root->fs_info->trans_mutex);
209 240
210 h->transid = cur_trans->transid; 241 h->transid = cur_trans->transid;
211 h->transaction = cur_trans; 242 h->transaction = cur_trans;
212 h->blocks_used = 0; 243 h->blocks_used = 0;
213 h->block_group = 0;
214 h->bytes_reserved = 0; 244 h->bytes_reserved = 0;
215 h->delayed_ref_updates = 0; 245 h->delayed_ref_updates = 0;
246 h->use_count = 1;
216 h->block_rsv = NULL; 247 h->block_rsv = NULL;
248 h->orig_rsv = NULL;
217 249
218 smp_mb(); 250 smp_mb();
219 if (cur_trans->blocked && may_wait_transaction(root, type)) { 251 if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -241,11 +273,8 @@ again:
241 } 273 }
242 } 274 }
243 275
244 if (type != TRANS_JOIN_NOLOCK) 276got_it:
245 mutex_lock(&root->fs_info->trans_mutex); 277 btrfs_record_root_in_trans(h, root);
246 record_root_in_trans(h, root);
247 if (type != TRANS_JOIN_NOLOCK)
248 mutex_unlock(&root->fs_info->trans_mutex);
249 278
250 if (!current->journal_info && type != TRANS_USERSPACE) 279 if (!current->journal_info && type != TRANS_USERSPACE)
251 current->journal_info = h; 280 current->journal_info = h;
@@ -257,22 +286,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
257{ 286{
258 return start_transaction(root, num_items, TRANS_START); 287 return start_transaction(root, num_items, TRANS_START);
259} 288}
260struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 289struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
261 int num_blocks)
262{ 290{
263 return start_transaction(root, 0, TRANS_JOIN); 291 return start_transaction(root, 0, TRANS_JOIN);
264} 292}
265 293
266struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 294struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
267 int num_blocks)
268{ 295{
269 return start_transaction(root, 0, TRANS_JOIN_NOLOCK); 296 return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
270} 297}
271 298
272struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 299struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
273 int num_blocks)
274{ 300{
275 return start_transaction(r, 0, TRANS_USERSPACE); 301 return start_transaction(root, 0, TRANS_USERSPACE);
276} 302}
277 303
278/* wait for a transaction commit to be fully complete */ 304/* wait for a transaction commit to be fully complete */
@@ -280,17 +306,13 @@ static noinline int wait_for_commit(struct btrfs_root *root,
280 struct btrfs_transaction *commit) 306 struct btrfs_transaction *commit)
281{ 307{
282 DEFINE_WAIT(wait); 308 DEFINE_WAIT(wait);
283 mutex_lock(&root->fs_info->trans_mutex);
284 while (!commit->commit_done) { 309 while (!commit->commit_done) {
285 prepare_to_wait(&commit->commit_wait, &wait, 310 prepare_to_wait(&commit->commit_wait, &wait,
286 TASK_UNINTERRUPTIBLE); 311 TASK_UNINTERRUPTIBLE);
287 if (commit->commit_done) 312 if (commit->commit_done)
288 break; 313 break;
289 mutex_unlock(&root->fs_info->trans_mutex);
290 schedule(); 314 schedule();
291 mutex_lock(&root->fs_info->trans_mutex);
292 } 315 }
293 mutex_unlock(&root->fs_info->trans_mutex);
294 finish_wait(&commit->commit_wait, &wait); 316 finish_wait(&commit->commit_wait, &wait);
295 return 0; 317 return 0;
296} 318}
@@ -300,59 +322,56 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
300 struct btrfs_transaction *cur_trans = NULL, *t; 322 struct btrfs_transaction *cur_trans = NULL, *t;
301 int ret; 323 int ret;
302 324
303 mutex_lock(&root->fs_info->trans_mutex);
304
305 ret = 0; 325 ret = 0;
306 if (transid) { 326 if (transid) {
307 if (transid <= root->fs_info->last_trans_committed) 327 if (transid <= root->fs_info->last_trans_committed)
308 goto out_unlock; 328 goto out;
309 329
310 /* find specified transaction */ 330 /* find specified transaction */
331 spin_lock(&root->fs_info->trans_lock);
311 list_for_each_entry(t, &root->fs_info->trans_list, list) { 332 list_for_each_entry(t, &root->fs_info->trans_list, list) {
312 if (t->transid == transid) { 333 if (t->transid == transid) {
313 cur_trans = t; 334 cur_trans = t;
335 atomic_inc(&cur_trans->use_count);
314 break; 336 break;
315 } 337 }
316 if (t->transid > transid) 338 if (t->transid > transid)
317 break; 339 break;
318 } 340 }
341 spin_unlock(&root->fs_info->trans_lock);
319 ret = -EINVAL; 342 ret = -EINVAL;
320 if (!cur_trans) 343 if (!cur_trans)
321 goto out_unlock; /* bad transid */ 344 goto out; /* bad transid */
322 } else { 345 } else {
323 /* find newest transaction that is committing | committed */ 346 /* find newest transaction that is committing | committed */
347 spin_lock(&root->fs_info->trans_lock);
324 list_for_each_entry_reverse(t, &root->fs_info->trans_list, 348 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
325 list) { 349 list) {
326 if (t->in_commit) { 350 if (t->in_commit) {
327 if (t->commit_done) 351 if (t->commit_done)
328 goto out_unlock; 352 goto out;
329 cur_trans = t; 353 cur_trans = t;
354 atomic_inc(&cur_trans->use_count);
330 break; 355 break;
331 } 356 }
332 } 357 }
358 spin_unlock(&root->fs_info->trans_lock);
333 if (!cur_trans) 359 if (!cur_trans)
334 goto out_unlock; /* nothing committing|committed */ 360 goto out; /* nothing committing|committed */
335 } 361 }
336 362
337 atomic_inc(&cur_trans->use_count);
338 mutex_unlock(&root->fs_info->trans_mutex);
339
340 wait_for_commit(root, cur_trans); 363 wait_for_commit(root, cur_trans);
341 364
342 mutex_lock(&root->fs_info->trans_mutex);
343 put_transaction(cur_trans); 365 put_transaction(cur_trans);
344 ret = 0; 366 ret = 0;
345out_unlock: 367out:
346 mutex_unlock(&root->fs_info->trans_mutex);
347 return ret; 368 return ret;
348} 369}
349 370
350void btrfs_throttle(struct btrfs_root *root) 371void btrfs_throttle(struct btrfs_root *root)
351{ 372{
352 mutex_lock(&root->fs_info->trans_mutex); 373 if (!atomic_read(&root->fs_info->open_ioctl_trans))
353 if (!root->fs_info->open_ioctl_trans)
354 wait_current_trans(root); 374 wait_current_trans(root);
355 mutex_unlock(&root->fs_info->trans_mutex);
356} 375}
357 376
358static int should_end_transaction(struct btrfs_trans_handle *trans, 377static int should_end_transaction(struct btrfs_trans_handle *trans,
@@ -370,6 +389,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
370 struct btrfs_transaction *cur_trans = trans->transaction; 389 struct btrfs_transaction *cur_trans = trans->transaction;
371 int updates; 390 int updates;
372 391
392 smp_mb();
373 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 393 if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
374 return 1; 394 return 1;
375 395
@@ -388,6 +408,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
388 struct btrfs_fs_info *info = root->fs_info; 408 struct btrfs_fs_info *info = root->fs_info;
389 int count = 0; 409 int count = 0;
390 410
411 if (--trans->use_count) {
412 trans->block_rsv = trans->orig_rsv;
413 return 0;
414 }
415
391 while (count < 4) { 416 while (count < 4) {
392 unsigned long cur = trans->delayed_ref_updates; 417 unsigned long cur = trans->delayed_ref_updates;
393 trans->delayed_ref_updates = 0; 418 trans->delayed_ref_updates = 0;
@@ -410,9 +435,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
410 435
411 btrfs_trans_release_metadata(trans, root); 436 btrfs_trans_release_metadata(trans, root);
412 437
413 if (lock && !root->fs_info->open_ioctl_trans && 438 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
414 should_end_transaction(trans, root)) 439 should_end_transaction(trans, root)) {
415 trans->transaction->blocked = 1; 440 trans->transaction->blocked = 1;
441 smp_wmb();
442 }
416 443
417 if (lock && cur_trans->blocked && !cur_trans->in_commit) { 444 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
418 if (throttle) 445 if (throttle)
@@ -703,9 +730,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
703 */ 730 */
704int btrfs_add_dead_root(struct btrfs_root *root) 731int btrfs_add_dead_root(struct btrfs_root *root)
705{ 732{
706 mutex_lock(&root->fs_info->trans_mutex); 733 spin_lock(&root->fs_info->trans_lock);
707 list_add(&root->root_list, &root->fs_info->dead_roots); 734 list_add(&root->root_list, &root->fs_info->dead_roots);
708 mutex_unlock(&root->fs_info->trans_mutex); 735 spin_unlock(&root->fs_info->trans_lock);
709 return 0; 736 return 0;
710} 737}
711 738
@@ -721,6 +748,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
721 int ret; 748 int ret;
722 int err = 0; 749 int err = 0;
723 750
751 spin_lock(&fs_info->fs_roots_radix_lock);
724 while (1) { 752 while (1) {
725 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 753 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
726 (void **)gang, 0, 754 (void **)gang, 0,
@@ -733,6 +761,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
733 radix_tree_tag_clear(&fs_info->fs_roots_radix, 761 radix_tree_tag_clear(&fs_info->fs_roots_radix,
734 (unsigned long)root->root_key.objectid, 762 (unsigned long)root->root_key.objectid,
735 BTRFS_ROOT_TRANS_TAG); 763 BTRFS_ROOT_TRANS_TAG);
764 spin_unlock(&fs_info->fs_roots_radix_lock);
736 765
737 btrfs_free_log(trans, root); 766 btrfs_free_log(trans, root);
738 btrfs_update_reloc_root(trans, root); 767 btrfs_update_reloc_root(trans, root);
@@ -753,10 +782,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
753 err = btrfs_update_root(trans, fs_info->tree_root, 782 err = btrfs_update_root(trans, fs_info->tree_root,
754 &root->root_key, 783 &root->root_key,
755 &root->root_item); 784 &root->root_item);
785 spin_lock(&fs_info->fs_roots_radix_lock);
756 if (err) 786 if (err)
757 break; 787 break;
758 } 788 }
759 } 789 }
790 spin_unlock(&fs_info->fs_roots_radix_lock);
760 return err; 791 return err;
761} 792}
762 793
@@ -786,7 +817,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
786 btrfs_btree_balance_dirty(info->tree_root, nr); 817 btrfs_btree_balance_dirty(info->tree_root, nr);
787 cond_resched(); 818 cond_resched();
788 819
789 if (root->fs_info->closing || ret != -EAGAIN) 820 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
790 break; 821 break;
791 } 822 }
792 root->defrag_running = 0; 823 root->defrag_running = 0;
@@ -851,7 +882,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
851 parent = dget_parent(dentry); 882 parent = dget_parent(dentry);
852 parent_inode = parent->d_inode; 883 parent_inode = parent->d_inode;
853 parent_root = BTRFS_I(parent_inode)->root; 884 parent_root = BTRFS_I(parent_inode)->root;
854 record_root_in_trans(trans, parent_root); 885 btrfs_record_root_in_trans(trans, parent_root);
855 886
856 /* 887 /*
857 * insert the directory item 888 * insert the directory item
@@ -869,7 +900,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
869 ret = btrfs_update_inode(trans, parent_root, parent_inode); 900 ret = btrfs_update_inode(trans, parent_root, parent_inode);
870 BUG_ON(ret); 901 BUG_ON(ret);
871 902
872 record_root_in_trans(trans, root); 903 btrfs_record_root_in_trans(trans, root);
873 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 904 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
874 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 905 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
875 btrfs_check_and_init_root_item(new_root_item); 906 btrfs_check_and_init_root_item(new_root_item);
@@ -967,20 +998,20 @@ static void update_super_roots(struct btrfs_root *root)
967int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 998int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
968{ 999{
969 int ret = 0; 1000 int ret = 0;
970 spin_lock(&info->new_trans_lock); 1001 spin_lock(&info->trans_lock);
971 if (info->running_transaction) 1002 if (info->running_transaction)
972 ret = info->running_transaction->in_commit; 1003 ret = info->running_transaction->in_commit;
973 spin_unlock(&info->new_trans_lock); 1004 spin_unlock(&info->trans_lock);
974 return ret; 1005 return ret;
975} 1006}
976 1007
977int btrfs_transaction_blocked(struct btrfs_fs_info *info) 1008int btrfs_transaction_blocked(struct btrfs_fs_info *info)
978{ 1009{
979 int ret = 0; 1010 int ret = 0;
980 spin_lock(&info->new_trans_lock); 1011 spin_lock(&info->trans_lock);
981 if (info->running_transaction) 1012 if (info->running_transaction)
982 ret = info->running_transaction->blocked; 1013 ret = info->running_transaction->blocked;
983 spin_unlock(&info->new_trans_lock); 1014 spin_unlock(&info->trans_lock);
984 return ret; 1015 return ret;
985} 1016}
986 1017
@@ -1004,9 +1035,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
1004 &wait); 1035 &wait);
1005 break; 1036 break;
1006 } 1037 }
1007 mutex_unlock(&root->fs_info->trans_mutex);
1008 schedule(); 1038 schedule();
1009 mutex_lock(&root->fs_info->trans_mutex);
1010 finish_wait(&root->fs_info->transaction_blocked_wait, &wait); 1039 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1011 } 1040 }
1012} 1041}
@@ -1032,9 +1061,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1032 &wait); 1061 &wait);
1033 break; 1062 break;
1034 } 1063 }
1035 mutex_unlock(&root->fs_info->trans_mutex);
1036 schedule(); 1064 schedule();
1037 mutex_lock(&root->fs_info->trans_mutex);
1038 finish_wait(&root->fs_info->transaction_wait, 1065 finish_wait(&root->fs_info->transaction_wait,
1039 &wait); 1066 &wait);
1040 } 1067 }
@@ -1072,7 +1099,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1072 1099
1073 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1100 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1074 ac->root = root; 1101 ac->root = root;
1075 ac->newtrans = btrfs_join_transaction(root, 0); 1102 ac->newtrans = btrfs_join_transaction(root);
1076 if (IS_ERR(ac->newtrans)) { 1103 if (IS_ERR(ac->newtrans)) {
1077 int err = PTR_ERR(ac->newtrans); 1104 int err = PTR_ERR(ac->newtrans);
1078 kfree(ac); 1105 kfree(ac);
@@ -1080,22 +1107,18 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1080 } 1107 }
1081 1108
1082 /* take transaction reference */ 1109 /* take transaction reference */
1083 mutex_lock(&root->fs_info->trans_mutex);
1084 cur_trans = trans->transaction; 1110 cur_trans = trans->transaction;
1085 atomic_inc(&cur_trans->use_count); 1111 atomic_inc(&cur_trans->use_count);
1086 mutex_unlock(&root->fs_info->trans_mutex);
1087 1112
1088 btrfs_end_transaction(trans, root); 1113 btrfs_end_transaction(trans, root);
1089 schedule_delayed_work(&ac->work, 0); 1114 schedule_delayed_work(&ac->work, 0);
1090 1115
1091 /* wait for transaction to start and unblock */ 1116 /* wait for transaction to start and unblock */
1092 mutex_lock(&root->fs_info->trans_mutex);
1093 if (wait_for_unblock) 1117 if (wait_for_unblock)
1094 wait_current_trans_commit_start_and_unblock(root, cur_trans); 1118 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1095 else 1119 else
1096 wait_current_trans_commit_start(root, cur_trans); 1120 wait_current_trans_commit_start(root, cur_trans);
1097 put_transaction(cur_trans); 1121 put_transaction(cur_trans);
1098 mutex_unlock(&root->fs_info->trans_mutex);
1099 1122
1100 return 0; 1123 return 0;
1101} 1124}
@@ -1139,38 +1162,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1139 ret = btrfs_run_delayed_refs(trans, root, 0); 1162 ret = btrfs_run_delayed_refs(trans, root, 0);
1140 BUG_ON(ret); 1163 BUG_ON(ret);
1141 1164
1142 mutex_lock(&root->fs_info->trans_mutex); 1165 spin_lock(&cur_trans->commit_lock);
1143 if (cur_trans->in_commit) { 1166 if (cur_trans->in_commit) {
1167 spin_unlock(&cur_trans->commit_lock);
1144 atomic_inc(&cur_trans->use_count); 1168 atomic_inc(&cur_trans->use_count);
1145 mutex_unlock(&root->fs_info->trans_mutex);
1146 btrfs_end_transaction(trans, root); 1169 btrfs_end_transaction(trans, root);
1147 1170
1148 ret = wait_for_commit(root, cur_trans); 1171 ret = wait_for_commit(root, cur_trans);
1149 BUG_ON(ret); 1172 BUG_ON(ret);
1150 1173
1151 mutex_lock(&root->fs_info->trans_mutex);
1152 put_transaction(cur_trans); 1174 put_transaction(cur_trans);
1153 mutex_unlock(&root->fs_info->trans_mutex);
1154 1175
1155 return 0; 1176 return 0;
1156 } 1177 }
1157 1178
1158 trans->transaction->in_commit = 1; 1179 trans->transaction->in_commit = 1;
1159 trans->transaction->blocked = 1; 1180 trans->transaction->blocked = 1;
1181 spin_unlock(&cur_trans->commit_lock);
1160 wake_up(&root->fs_info->transaction_blocked_wait); 1182 wake_up(&root->fs_info->transaction_blocked_wait);
1161 1183
1184 spin_lock(&root->fs_info->trans_lock);
1162 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1185 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1163 prev_trans = list_entry(cur_trans->list.prev, 1186 prev_trans = list_entry(cur_trans->list.prev,
1164 struct btrfs_transaction, list); 1187 struct btrfs_transaction, list);
1165 if (!prev_trans->commit_done) { 1188 if (!prev_trans->commit_done) {
1166 atomic_inc(&prev_trans->use_count); 1189 atomic_inc(&prev_trans->use_count);
1167 mutex_unlock(&root->fs_info->trans_mutex); 1190 spin_unlock(&root->fs_info->trans_lock);
1168 1191
1169 wait_for_commit(root, prev_trans); 1192 wait_for_commit(root, prev_trans);
1170 1193
1171 mutex_lock(&root->fs_info->trans_mutex);
1172 put_transaction(prev_trans); 1194 put_transaction(prev_trans);
1195 } else {
1196 spin_unlock(&root->fs_info->trans_lock);
1173 } 1197 }
1198 } else {
1199 spin_unlock(&root->fs_info->trans_lock);
1174 } 1200 }
1175 1201
1176 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 1202 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
@@ -1178,12 +1204,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1178 1204
1179 do { 1205 do {
1180 int snap_pending = 0; 1206 int snap_pending = 0;
1207
1181 joined = cur_trans->num_joined; 1208 joined = cur_trans->num_joined;
1182 if (!list_empty(&trans->transaction->pending_snapshots)) 1209 if (!list_empty(&trans->transaction->pending_snapshots))
1183 snap_pending = 1; 1210 snap_pending = 1;
1184 1211
1185 WARN_ON(cur_trans != trans->transaction); 1212 WARN_ON(cur_trans != trans->transaction);
1186 mutex_unlock(&root->fs_info->trans_mutex);
1187 1213
1188 if (flush_on_commit || snap_pending) { 1214 if (flush_on_commit || snap_pending) {
1189 btrfs_start_delalloc_inodes(root, 1); 1215 btrfs_start_delalloc_inodes(root, 1);
@@ -1206,14 +1232,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1206 prepare_to_wait(&cur_trans->writer_wait, &wait, 1232 prepare_to_wait(&cur_trans->writer_wait, &wait,
1207 TASK_UNINTERRUPTIBLE); 1233 TASK_UNINTERRUPTIBLE);
1208 1234
1209 smp_mb();
1210 if (atomic_read(&cur_trans->num_writers) > 1) 1235 if (atomic_read(&cur_trans->num_writers) > 1)
1211 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1236 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1212 else if (should_grow) 1237 else if (should_grow)
1213 schedule_timeout(1); 1238 schedule_timeout(1);
1214 1239
1215 mutex_lock(&root->fs_info->trans_mutex);
1216 finish_wait(&cur_trans->writer_wait, &wait); 1240 finish_wait(&cur_trans->writer_wait, &wait);
1241 spin_lock(&root->fs_info->trans_lock);
1242 root->fs_info->trans_no_join = 1;
1243 spin_unlock(&root->fs_info->trans_lock);
1217 } while (atomic_read(&cur_trans->num_writers) > 1 || 1244 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1218 (should_grow && cur_trans->num_joined != joined)); 1245 (should_grow && cur_trans->num_joined != joined));
1219 1246
@@ -1258,9 +1285,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1258 btrfs_prepare_extent_commit(trans, root); 1285 btrfs_prepare_extent_commit(trans, root);
1259 1286
1260 cur_trans = root->fs_info->running_transaction; 1287 cur_trans = root->fs_info->running_transaction;
1261 spin_lock(&root->fs_info->new_trans_lock);
1262 root->fs_info->running_transaction = NULL;
1263 spin_unlock(&root->fs_info->new_trans_lock);
1264 1288
1265 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1289 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1266 root->fs_info->tree_root->node); 1290 root->fs_info->tree_root->node);
@@ -1281,10 +1305,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1281 sizeof(root->fs_info->super_copy)); 1305 sizeof(root->fs_info->super_copy));
1282 1306
1283 trans->transaction->blocked = 0; 1307 trans->transaction->blocked = 0;
1308 spin_lock(&root->fs_info->trans_lock);
1309 root->fs_info->running_transaction = NULL;
1310 root->fs_info->trans_no_join = 0;
1311 spin_unlock(&root->fs_info->trans_lock);
1284 1312
1285 wake_up(&root->fs_info->transaction_wait); 1313 wake_up(&root->fs_info->transaction_wait);
1286 1314
1287 mutex_unlock(&root->fs_info->trans_mutex);
1288 ret = btrfs_write_and_wait_transaction(trans, root); 1315 ret = btrfs_write_and_wait_transaction(trans, root);
1289 BUG_ON(ret); 1316 BUG_ON(ret);
1290 write_ctree_super(trans, root, 0); 1317 write_ctree_super(trans, root, 0);
@@ -1297,22 +1324,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1297 1324
1298 btrfs_finish_extent_commit(trans, root); 1325 btrfs_finish_extent_commit(trans, root);
1299 1326
1300 mutex_lock(&root->fs_info->trans_mutex);
1301
1302 cur_trans->commit_done = 1; 1327 cur_trans->commit_done = 1;
1303 1328
1304 root->fs_info->last_trans_committed = cur_trans->transid; 1329 root->fs_info->last_trans_committed = cur_trans->transid;
1305 1330
1306 wake_up(&cur_trans->commit_wait); 1331 wake_up(&cur_trans->commit_wait);
1307 1332
1333 spin_lock(&root->fs_info->trans_lock);
1308 list_del_init(&cur_trans->list); 1334 list_del_init(&cur_trans->list);
1335 spin_unlock(&root->fs_info->trans_lock);
1336
1309 put_transaction(cur_trans); 1337 put_transaction(cur_trans);
1310 put_transaction(cur_trans); 1338 put_transaction(cur_trans);
1311 1339
1312 trace_btrfs_transaction_commit(root); 1340 trace_btrfs_transaction_commit(root);
1313 1341
1314 mutex_unlock(&root->fs_info->trans_mutex);
1315
1316 btrfs_scrub_continue(root); 1342 btrfs_scrub_continue(root);
1317 1343
1318 if (current->journal_info == trans) 1344 if (current->journal_info == trans)
@@ -1334,9 +1360,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1334 LIST_HEAD(list); 1360 LIST_HEAD(list);
1335 struct btrfs_fs_info *fs_info = root->fs_info; 1361 struct btrfs_fs_info *fs_info = root->fs_info;
1336 1362
1337 mutex_lock(&fs_info->trans_mutex); 1363 spin_lock(&fs_info->trans_lock);
1338 list_splice_init(&fs_info->dead_roots, &list); 1364 list_splice_init(&fs_info->dead_roots, &list);
1339 mutex_unlock(&fs_info->trans_mutex); 1365 spin_unlock(&fs_info->trans_lock);
1340 1366
1341 while (!list_empty(&list)) { 1367 while (!list_empty(&list)) {
1342 root = list_entry(list.next, struct btrfs_root, root_list); 1368 root = list_entry(list.next, struct btrfs_root, root_list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 804c88639e5d..02564e6230ac 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -28,10 +28,12 @@ struct btrfs_transaction {
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 atomic_t num_writers; 30 atomic_t num_writers;
31 atomic_t use_count;
31 32
32 unsigned long num_joined; 33 unsigned long num_joined;
34
35 spinlock_t commit_lock;
33 int in_commit; 36 int in_commit;
34 atomic_t use_count;
35 int commit_done; 37 int commit_done;
36 int blocked; 38 int blocked;
37 struct list_head list; 39 struct list_head list;
@@ -45,13 +47,14 @@ struct btrfs_transaction {
45 47
46struct btrfs_trans_handle { 48struct btrfs_trans_handle {
47 u64 transid; 49 u64 transid;
48 u64 block_group;
49 u64 bytes_reserved; 50 u64 bytes_reserved;
51 unsigned long use_count;
50 unsigned long blocks_reserved; 52 unsigned long blocks_reserved;
51 unsigned long blocks_used; 53 unsigned long blocks_used;
52 unsigned long delayed_ref_updates; 54 unsigned long delayed_ref_updates;
53 struct btrfs_transaction *transaction; 55 struct btrfs_transaction *transaction;
54 struct btrfs_block_rsv *block_rsv; 56 struct btrfs_block_rsv *block_rsv;
57 struct btrfs_block_rsv *orig_rsv;
55}; 58};
56 59
57struct btrfs_pending_snapshot { 60struct btrfs_pending_snapshot {
@@ -66,19 +69,6 @@ struct btrfs_pending_snapshot {
66 struct list_head list; 69 struct list_head list;
67}; 70};
68 71
69static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
70 struct inode *inode)
71{
72 trans->block_group = BTRFS_I(inode)->block_group;
73}
74
75static inline void btrfs_update_inode_block_group(
76 struct btrfs_trans_handle *trans,
77 struct inode *inode)
78{
79 BTRFS_I(inode)->block_group = trans->block_group;
80}
81
82static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, 72static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
83 struct inode *inode) 73 struct inode *inode)
84{ 74{
@@ -92,12 +82,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
92 struct btrfs_root *root); 82 struct btrfs_root *root);
93struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 83struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
94 int num_items); 84 int num_items);
95struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 85struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
96 int num_blocks); 86struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
97struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 87struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
98 int num_blocks);
99struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
100 int num_blocks);
101int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); 88int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
102int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 89int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root); 90 struct btrfs_root *root);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c48214ef5c09..da541dfca2e3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -504,7 +504,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
504 BUG_ON(!new_device); 504 BUG_ON(!new_device);
505 memcpy(new_device, device, sizeof(*new_device)); 505 memcpy(new_device, device, sizeof(*new_device));
506 new_device->name = kstrdup(device->name, GFP_NOFS); 506 new_device->name = kstrdup(device->name, GFP_NOFS);
507 BUG_ON(!new_device->name); 507 BUG_ON(device->name && !new_device->name);
508 new_device->bdev = NULL; 508 new_device->bdev = NULL;
509 new_device->writeable = 0; 509 new_device->writeable = 0;
510 new_device->in_fs_metadata = 0; 510 new_device->in_fs_metadata = 0;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index f3107e4b4d56..5366fe452ab0 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
158 if (IS_ERR(trans)) 158 if (IS_ERR(trans))
159 return PTR_ERR(trans); 159 return PTR_ERR(trans);
160 160
161 btrfs_set_trans_block_group(trans, inode);
162
163 ret = do_setxattr(trans, inode, name, value, size, flags); 161 ret = do_setxattr(trans, inode, name, value, size, flags);
164 if (ret) 162 if (ret)
165 goto out; 163 goto out;
diff --git a/fs/namei.c b/fs/namei.c
index 1ab641f2e78e..e2e4e8d032ee 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2579,6 +2579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2579 if (error) 2579 if (error)
2580 goto out; 2580 goto out;
2581 2581
2582 shrink_dcache_parent(dentry);
2582 error = dir->i_op->rmdir(dir, dentry); 2583 error = dir->i_op->rmdir(dir, dentry);
2583 if (error) 2584 if (error)
2584 goto out; 2585 goto out;
@@ -2993,6 +2994,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2993 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) 2994 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
2994 goto out; 2995 goto out;
2995 2996
2997 if (target)
2998 shrink_dcache_parent(new_dentry);
2996 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2999 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2997 if (error) 3000 if (error)
2998 goto out; 3001 goto out;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f82e762eeca2..d545e97d99c3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,13 +255,7 @@ ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf) 255 struct device_attribute *attr, char *buf)
256{ 256{
257 struct hd_struct *p = dev_to_part(dev); 257 struct hd_struct *p = dev_to_part(dev);
258 struct gendisk *disk = dev_to_disk(dev); 258 return sprintf(buf, "%u\n", p->discard_alignment);
259 unsigned int alignment = 0;
260
261 if (disk->queue)
262 alignment = queue_limit_discard_alignment(&disk->queue->limits,
263 p->start_sect);
264 return sprintf(buf, "%u\n", alignment);
265} 259}
266 260
267ssize_t part_stat_show(struct device *dev, 261ssize_t part_stat_show(struct device *dev,
@@ -455,6 +449,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
455 p->start_sect = start; 449 p->start_sect = start;
456 p->alignment_offset = 450 p->alignment_offset =
457 queue_limit_alignment_offset(&disk->queue->limits, start); 451 queue_limit_alignment_offset(&disk->queue->limits, start);
452 p->discard_alignment =
453 queue_limit_discard_alignment(&disk->queue->limits, start);
458 p->nr_sects = len; 454 p->nr_sects = len;
459 p->partno = partno; 455 p->partno = partno;
460 p->policy = get_disk_ro(disk); 456 p->policy = get_disk_ro(disk);
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 166951e0dcd3..3be645e012c9 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -581,6 +581,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
581 ubifs_assert(wbuf->size % c->min_io_size == 0); 581 ubifs_assert(wbuf->size % c->min_io_size == 0);
582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
583 ubifs_assert(!c->ro_media && !c->ro_mount); 583 ubifs_assert(!c->ro_media && !c->ro_mount);
584 ubifs_assert(!c->space_fixup);
584 if (c->leb_size - wbuf->offs >= c->max_write_size) 585 if (c->leb_size - wbuf->offs >= c->max_write_size)
585 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); 586 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
586 587
@@ -759,6 +760,7 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
759 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 760 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
760 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); 761 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
761 ubifs_assert(!c->ro_media && !c->ro_mount); 762 ubifs_assert(!c->ro_media && !c->ro_mount);
763 ubifs_assert(!c->space_fixup);
762 764
763 if (c->ro_error) 765 if (c->ro_error)
764 return -EROFS; 766 return -EROFS;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 34b1679e6e3a..cef0460f4c54 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -669,6 +669,7 @@ out_free:
669 669
670out_release: 670out_release:
671 release_head(c, BASEHD); 671 release_head(c, BASEHD);
672 kfree(dent);
672out_ro: 673out_ro:
673 ubifs_ro_mode(c, err); 674 ubifs_ro_mode(c, err);
674 if (last_reference) 675 if (last_reference)
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index bd644bf587a8..a5422fffbd69 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -674,7 +674,7 @@ static int kill_orphans(struct ubifs_info *c)
674 if (IS_ERR(sleb)) { 674 if (IS_ERR(sleb)) {
675 if (PTR_ERR(sleb) == -EUCLEAN) 675 if (PTR_ERR(sleb) == -EUCLEAN)
676 sleb = ubifs_recover_leb(c, lnum, 0, 676 sleb = ubifs_recover_leb(c, lnum, 0,
677 c->sbuf, 0); 677 c->sbuf, -1);
678 if (IS_ERR(sleb)) { 678 if (IS_ERR(sleb)) {
679 err = PTR_ERR(sleb); 679 err = PTR_ERR(sleb);
680 break; 680 break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 731d9e2e7b50..783d8e0beb76 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -564,19 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
564} 564}
565 565
566/** 566/**
567 * drop_last_node - drop the last node or group of nodes. 567 * drop_last_group - drop the last group of nodes.
568 * @sleb: scanned LEB information 568 * @sleb: scanned LEB information
569 * @offs: offset of dropped nodes is returned here 569 * @offs: offset of dropped nodes is returned here
570 * @grouped: non-zero if whole group of nodes have to be dropped
571 * 570 *
572 * This is a helper function for 'ubifs_recover_leb()' which drops the last 571 * This is a helper function for 'ubifs_recover_leb()' which drops the last
573 * node of the scanned LEB or the last group of nodes if @grouped is not zero. 572 * group of nodes of the scanned LEB.
574 * This function returns %1 if a node was dropped and %0 otherwise.
575 */ 573 */
576static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) 574static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
577{ 575{
578 int dropped = 0;
579
580 while (!list_empty(&sleb->nodes)) { 576 while (!list_empty(&sleb->nodes)) {
581 struct ubifs_scan_node *snod; 577 struct ubifs_scan_node *snod;
582 struct ubifs_ch *ch; 578 struct ubifs_ch *ch;
@@ -585,17 +581,40 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
585 list); 581 list);
586 ch = snod->node; 582 ch = snod->node;
587 if (ch->group_type != UBIFS_IN_NODE_GROUP) 583 if (ch->group_type != UBIFS_IN_NODE_GROUP)
588 return dropped; 584 break;
589 dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); 585
586 dbg_rcvry("dropping grouped node at %d:%d",
587 sleb->lnum, snod->offs);
588 *offs = snod->offs;
589 list_del(&snod->list);
590 kfree(snod);
591 sleb->nodes_cnt -= 1;
592 }
593}
594
595/**
596 * drop_last_node - drop the last node.
597 * @sleb: scanned LEB information
598 * @offs: offset of dropped nodes is returned here
599 * @grouped: non-zero if whole group of nodes have to be dropped
600 *
601 * This is a helper function for 'ubifs_recover_leb()' which drops the last
602 * node of the scanned LEB.
603 */
604static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
605{
606 struct ubifs_scan_node *snod;
607
608 if (!list_empty(&sleb->nodes)) {
609 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
610 list);
611
612 dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs);
590 *offs = snod->offs; 613 *offs = snod->offs;
591 list_del(&snod->list); 614 list_del(&snod->list);
592 kfree(snod); 615 kfree(snod);
593 sleb->nodes_cnt -= 1; 616 sleb->nodes_cnt -= 1;
594 dropped = 1;
595 if (!grouped)
596 break;
597 } 617 }
598 return dropped;
599} 618}
600 619
601/** 620/**
@@ -604,7 +623,8 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
604 * @lnum: LEB number 623 * @lnum: LEB number
605 * @offs: offset 624 * @offs: offset
606 * @sbuf: LEB-sized buffer to use 625 * @sbuf: LEB-sized buffer to use
607 * @grouped: nodes may be grouped for recovery 626 * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
627 * belong to any journal head)
608 * 628 *
609 * This function does a scan of a LEB, but caters for errors that might have 629 * This function does a scan of a LEB, but caters for errors that might have
610 * been caused by the unclean unmount from which we are attempting to recover. 630 * been caused by the unclean unmount from which we are attempting to recover.
@@ -612,13 +632,14 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
612 * found, and a negative error code in case of failure. 632 * found, and a negative error code in case of failure.
613 */ 633 */
614struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 634struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
615 int offs, void *sbuf, int grouped) 635 int offs, void *sbuf, int jhead)
616{ 636{
617 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; 637 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
638 int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
618 struct ubifs_scan_leb *sleb; 639 struct ubifs_scan_leb *sleb;
619 void *buf = sbuf + offs; 640 void *buf = sbuf + offs;
620 641
621 dbg_rcvry("%d:%d", lnum, offs); 642 dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
622 643
623 sleb = ubifs_start_scan(c, lnum, offs, sbuf); 644 sleb = ubifs_start_scan(c, lnum, offs, sbuf);
624 if (IS_ERR(sleb)) 645 if (IS_ERR(sleb))
@@ -635,7 +656,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
635 * Scan quietly until there is an error from which we cannot 656 * Scan quietly until there is an error from which we cannot
636 * recover 657 * recover
637 */ 658 */
638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); 659 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
639 if (ret == SCANNED_A_NODE) { 660 if (ret == SCANNED_A_NODE) {
640 /* A valid node, and not a padding node */ 661 /* A valid node, and not a padding node */
641 struct ubifs_ch *ch = buf; 662 struct ubifs_ch *ch = buf;
@@ -695,59 +716,62 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
695 * If nodes are grouped, always drop the incomplete group at 716 * If nodes are grouped, always drop the incomplete group at
696 * the end. 717 * the end.
697 */ 718 */
698 drop_last_node(sleb, &offs, 1); 719 drop_last_group(sleb, &offs);
699 720
700 /* 721 if (jhead == GCHD) {
701 * While we are in the middle of the same min. I/O unit keep dropping 722 /*
702 * nodes. So basically, what we want is to make sure that the last min. 723 * If this LEB belongs to the GC head then while we are in the
703 * I/O unit where we saw the corruption is dropped completely with all 724 * middle of the same min. I/O unit keep dropping nodes. So
704 * the uncorrupted node which may possibly sit there. 725 * basically, what we want is to make sure that the last min.
705 * 726 * I/O unit where we saw the corruption is dropped completely
706 * In other words, let's name the min. I/O unit where the corruption 727 * with all the uncorrupted nodes which may possibly sit there.
707 * starts B, and the previous min. I/O unit A. The below code tries to 728 *
708 * deal with a situation when half of B contains valid nodes or the end 729 * In other words, let's name the min. I/O unit where the
709 * of a valid node, and the second half of B contains corrupted data or 730 * corruption starts B, and the previous min. I/O unit A. The
710 * garbage. This means that UBIFS had been writing to B just before the 731 * below code tries to deal with a situation when half of B
711 * power cut happened. I do not know how realistic is this scenario 732 * contains valid nodes or the end of a valid node, and the
712 * that half of the min. I/O unit had been written successfully and the 733 * second half of B contains corrupted data or garbage. This
713 * other half not, but this is possible in our 'failure mode emulation' 734 * means that UBIFS had been writing to B just before the power
714 * infrastructure at least. 735 * cut happened. I do not know how realistic is this scenario
715 * 736 * that half of the min. I/O unit had been written successfully
716 * So what is the problem, why we need to drop those nodes? Whey can't 737 * and the other half not, but this is possible in our 'failure
717 * we just clean-up the second half of B by putting a padding node 738 * mode emulation' infrastructure at least.
718 * there? We can, and this works fine with one exception which was 739 *
719 * reproduced with power cut emulation testing and happens extremely 740 * So what is the problem, why we need to drop those nodes? Why
720 * rarely. The description follows, but it is worth noting that that is 741 * can't we just clean-up the second half of B by putting a
721 * only about the GC head, so we could do this trick only if the bud 742 * padding node there? We can, and this works fine with one
722 * belongs to the GC head, but it does not seem to be worth an 743 * exception which was reproduced with power cut emulation
723 * additional "if" statement. 744 * testing and happens extremely rarely.
724 * 745 *
725 * So, imagine the file-system is full, we run GC which is moving valid 746 * Imagine the file-system is full, we run GC which starts
726 * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head 747 * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
727 * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X 748 * the current GC head LEB). The @c->gc_lnum is -1, which means
728 * and will try to continue. Imagine that LEB X is currently the 749 * that GC will retain LEB X and will try to continue. Imagine
729 * dirtiest LEB, and the amount of used space in LEB Y is exactly the 750 * that LEB X is currently the dirtiest LEB, and the amount of
730 * same as amount of free space in LEB X. 751 * used space in LEB Y is exactly the same as amount of free
731 * 752 * space in LEB X.
732 * And a power cut happens when nodes are moved from LEB X to LEB Y. We 753 *
733 * are here trying to recover LEB Y which is the GC head LEB. We find 754 * And a power cut happens when nodes are moved from LEB X to
734 * the min. I/O unit B as described above. Then we clean-up LEB Y by 755 * LEB Y. We are here trying to recover LEB Y which is the GC
735 * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function 756 * head LEB. We find the min. I/O unit B as described above.
736 * fails, because it cannot find a dirty LEB which could be GC'd into 757 * Then we clean-up LEB Y by padding min. I/O unit. And later
737 * LEB Y! Even LEB X does not match because the amount of valid nodes 758 * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
738 * there does not fit the free space in LEB Y any more! And this is 759 * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
739 * because of the padding node which we added to LEB Y. The 760 * does not match because the amount of valid nodes there does
740 * user-visible effect of this which I once observed and analysed is 761 * not fit the free space in LEB Y any more! And this is
741 * that we cannot mount the file-system with -ENOSPC error. 762 * because of the padding node which we added to LEB Y. The
742 * 763 * user-visible effect of this which I once observed and
743 * So obviously, to make sure that situation does not happen we should 764 * analysed is that we cannot mount the file-system with
744 * free min. I/O unit B in LEB Y completely and the last used min. I/O 765 * -ENOSPC error.
745 * unit in LEB Y should be A. This is basically what the below code 766 *
746 * tries to do. 767 * So obviously, to make sure that situation does not happen we
747 */ 768 * should free min. I/O unit B in LEB Y completely and the last
748 while (min_io_unit == round_down(offs, c->min_io_size) && 769 * used min. I/O unit in LEB Y should be A. This is basically
749 min_io_unit != offs && 770 * what the below code tries to do.
750 drop_last_node(sleb, &offs, grouped)); 771 */
772 while (offs > min_io_unit)
773 drop_last_node(sleb, &offs);
774 }
751 775
752 buf = sbuf + offs; 776 buf = sbuf + offs;
753 len = c->leb_size - offs; 777 len = c->leb_size - offs;
@@ -881,7 +905,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
881 } 905 }
882 ubifs_scan_destroy(sleb); 906 ubifs_scan_destroy(sleb);
883 } 907 }
884 return ubifs_recover_leb(c, lnum, offs, sbuf, 0); 908 return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
885} 909}
886 910
887/** 911/**
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 6617280d1679..5e97161ce4d3 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -557,8 +557,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
557 * these LEBs could possibly be written to at the power cut 557 * these LEBs could possibly be written to at the power cut
558 * time. 558 * time.
559 */ 559 */
560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, 560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
561 b->bud->jhead != GCHD);
562 else 561 else
563 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 562 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
564 if (IS_ERR(sleb)) 563 if (IS_ERR(sleb))
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index ca953a945029..9e1d05666fed 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -284,7 +284,11 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
284 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 284 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
285 285
286 if (nr == 0) 286 if (nr == 0)
287 return clean_zn_cnt; 287 /*
288 * Due to the way UBIFS updates the clean znode counter it may
289 * temporarily be negative.
290 */
291 return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
288 292
289 if (!clean_zn_cnt) { 293 if (!clean_zn_cnt) {
290 /* 294 /*
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1ab0d22e4c94..b5aeb5a8ebed 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -811,15 +811,18 @@ static int alloc_wbufs(struct ubifs_info *c)
811 811
812 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; 812 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
813 c->jheads[i].wbuf.jhead = i; 813 c->jheads[i].wbuf.jhead = i;
814 c->jheads[i].grouped = 1;
814 } 815 }
815 816
816 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; 817 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
817 /* 818 /*
818 * Garbage Collector head likely contains long-term data and 819 * Garbage Collector head likely contains long-term data and
819 * does not need to be synchronized by timer. 820 * does not need to be synchronized by timer. Also GC head nodes are
821 * not grouped.
820 */ 822 */
821 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; 823 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
822 c->jheads[GCHD].wbuf.no_timer = 1; 824 c->jheads[GCHD].wbuf.no_timer = 1;
825 c->jheads[GCHD].grouped = 0;
823 826
824 return 0; 827 return 0;
825} 828}
@@ -1284,12 +1287,25 @@ static int mount_ubifs(struct ubifs_info *c)
1284 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { 1287 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1285 ubifs_msg("recovery needed"); 1288 ubifs_msg("recovery needed");
1286 c->need_recovery = 1; 1289 c->need_recovery = 1;
1287 if (!c->ro_mount) { 1290 }
1288 err = ubifs_recover_inl_heads(c, c->sbuf); 1291
1289 if (err) 1292 if (c->need_recovery && !c->ro_mount) {
1290 goto out_master; 1293 err = ubifs_recover_inl_heads(c, c->sbuf);
1291 } 1294 if (err)
1292 } else if (!c->ro_mount) { 1295 goto out_master;
1296 }
1297
1298 err = ubifs_lpt_init(c, 1, !c->ro_mount);
1299 if (err)
1300 goto out_master;
1301
1302 if (!c->ro_mount && c->space_fixup) {
1303 err = ubifs_fixup_free_space(c);
1304 if (err)
1305 goto out_master;
1306 }
1307
1308 if (!c->ro_mount) {
1293 /* 1309 /*
1294 * Set the "dirty" flag so that if we reboot uncleanly we 1310 * Set the "dirty" flag so that if we reboot uncleanly we
1295 * will notice this immediately on the next mount. 1311 * will notice this immediately on the next mount.
@@ -1297,13 +1313,9 @@ static int mount_ubifs(struct ubifs_info *c)
1297 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); 1313 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
1298 err = ubifs_write_master(c); 1314 err = ubifs_write_master(c);
1299 if (err) 1315 if (err)
1300 goto out_master; 1316 goto out_lpt;
1301 } 1317 }
1302 1318
1303 err = ubifs_lpt_init(c, 1, !c->ro_mount);
1304 if (err)
1305 goto out_lpt;
1306
1307 err = dbg_check_idx_size(c, c->bi.old_idx_sz); 1319 err = dbg_check_idx_size(c, c->bi.old_idx_sz);
1308 if (err) 1320 if (err)
1309 goto out_lpt; 1321 goto out_lpt;
@@ -1396,12 +1408,6 @@ static int mount_ubifs(struct ubifs_info *c)
1396 } else 1408 } else
1397 ubifs_assert(c->lst.taken_empty_lebs > 0); 1409 ubifs_assert(c->lst.taken_empty_lebs > 0);
1398 1410
1399 if (!c->ro_mount && c->space_fixup) {
1400 err = ubifs_fixup_free_space(c);
1401 if (err)
1402 goto out_infos;
1403 }
1404
1405 err = dbg_check_filesystem(c); 1411 err = dbg_check_filesystem(c);
1406 if (err) 1412 if (err)
1407 goto out_infos; 1413 goto out_infos;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 8119b1fd8d94..91b4213dde84 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2876,12 +2876,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
2876 */ 2876 */
2877void ubifs_tnc_close(struct ubifs_info *c) 2877void ubifs_tnc_close(struct ubifs_info *c)
2878{ 2878{
2879 long clean_freed;
2880
2881 tnc_destroy_cnext(c); 2879 tnc_destroy_cnext(c);
2882 if (c->zroot.znode) { 2880 if (c->zroot.znode) {
2883 clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); 2881 long n;
2884 atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); 2882
2883 ubifs_destroy_tnc_subtree(c->zroot.znode);
2884 n = atomic_long_read(&c->clean_zn_cnt);
2885 atomic_long_sub(n, &ubifs_clean_zn_cnt);
2885 } 2886 }
2886 kfree(c->gap_lebs); 2887 kfree(c->gap_lebs);
2887 kfree(c->ilebs); 2888 kfree(c->ilebs);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a70d7b4ffb25..f79983d6f860 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -722,12 +722,14 @@ struct ubifs_bud {
722 * struct ubifs_jhead - journal head. 722 * struct ubifs_jhead - journal head.
723 * @wbuf: head's write-buffer 723 * @wbuf: head's write-buffer
724 * @buds_list: list of bud LEBs belonging to this journal head 724 * @buds_list: list of bud LEBs belonging to this journal head
725 * @grouped: non-zero if UBIFS groups nodes when writing to this journal head
725 * 726 *
726 * Note, the @buds list is protected by the @c->buds_lock. 727 * Note, the @buds list is protected by the @c->buds_lock.
727 */ 728 */
728struct ubifs_jhead { 729struct ubifs_jhead {
729 struct ubifs_wbuf wbuf; 730 struct ubifs_wbuf wbuf;
730 struct list_head buds_list; 731 struct list_head buds_list;
732 unsigned int grouped:1;
731}; 733};
732 734
733/** 735/**
@@ -1742,7 +1744,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
1742int ubifs_recover_master_node(struct ubifs_info *c); 1744int ubifs_recover_master_node(struct ubifs_info *c);
1743int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); 1745int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
1744struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 1746struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
1745 int offs, void *sbuf, int grouped); 1747 int offs, void *sbuf, int jhead);
1746struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, 1748struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
1747 int offs, void *sbuf); 1749 int offs, void *sbuf);
1748int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); 1750int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);