aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c4
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/ctree.c32
-rw-r--r--fs/btrfs/ctree.h22
-rw-r--r--fs/btrfs/delayed-inode.c8
-rw-r--r--fs/btrfs/disk-io.c41
-rw-r--r--fs/btrfs/extent-tree.c158
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c10
-rw-r--r--fs/btrfs/free-space-cache.c231
-rw-r--r--fs/btrfs/inode-map.c34
-rw-r--r--fs/btrfs/inode.c265
-rw-r--r--fs/btrfs/ioctl.c49
-rw-r--r--fs/btrfs/relocation.c34
-rw-r--r--fs/btrfs/scrub.c192
-rw-r--r--fs/btrfs/super.c10
-rw-r--r--fs/btrfs/transaction.c307
-rw-r--r--fs/btrfs/transaction.h29
-rw-r--r--fs/btrfs/volumes.c10
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/caps.c10
-rw-r--r--fs/ceph/dir.c11
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/file.c35
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/ioctl.c6
-rw-r--r--fs/ceph/locks.c29
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/xattr.c6
-rw-r--r--fs/cifs/Kconfig3
-rw-r--r--fs/cifs/cifsencrypt.c2
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/dcookies.c3
-rw-r--r--fs/exec.c5
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/glock.c9
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/namei.c11
-rw-r--r--fs/nilfs2/btree.c39
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/partitions/check.c10
-rw-r--r--fs/super.c2
-rw-r--r--fs/ubifs/io.c2
-rw-r--r--fs/ubifs/journal.c1
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/recovery.c164
-rw-r--r--fs/ubifs/replay.c3
-rw-r--r--fs/ubifs/shrinker.c6
-rw-r--r--fs/ubifs/super.c42
-rw-r--r--fs/ubifs/tnc.c9
-rw-r--r--fs/ubifs/ubifs.h4
55 files changed, 1133 insertions, 768 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1f2b19978333..1a2421f908f0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1272,8 +1272,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1272 * individual writeable reference is too fragile given the 1272 * individual writeable reference is too fragile given the
1273 * way @mode is used in blkdev_get/put(). 1273 * way @mode is used in blkdev_get/put().
1274 */ 1274 */
1275 if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && 1275 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
1276 !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { 1276 (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
1277 bdev->bd_write_holder = true; 1277 bdev->bd_write_holder = true;
1278 disk_block_events(disk); 1278 disk_block_events(disk);
1279 } 1279 }
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 93b1aa932014..52d7eca8c7bf 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -121,9 +121,6 @@ struct btrfs_inode {
121 */ 121 */
122 u64 index_cnt; 122 u64 index_cnt;
123 123
124 /* the start of block group preferred for allocations. */
125 u64 block_group;
126
127 /* the fsync log has some corner cases that mean we have to check 124 /* the fsync log has some corner cases that mean we have to check
128 * directories to see if any unlinks have been done before 125 * directories to see if any unlinks have been done before
129 * the directory was logged. See tree-log.c for all the 126 * the directory was logged. See tree-log.c for all the
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b0e18d986e0a..2e667868e0d2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -43,8 +43,6 @@ struct btrfs_path *btrfs_alloc_path(void)
43{ 43{
44 struct btrfs_path *path; 44 struct btrfs_path *path;
45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
46 if (path)
47 path->reada = 1;
48 return path; 46 return path;
49} 47}
50 48
@@ -1224,11 +1222,13 @@ static void reada_for_search(struct btrfs_root *root,
1224 u64 search; 1222 u64 search;
1225 u64 target; 1223 u64 target;
1226 u64 nread = 0; 1224 u64 nread = 0;
1225 u64 gen;
1227 int direction = path->reada; 1226 int direction = path->reada;
1228 struct extent_buffer *eb; 1227 struct extent_buffer *eb;
1229 u32 nr; 1228 u32 nr;
1230 u32 blocksize; 1229 u32 blocksize;
1231 u32 nscan = 0; 1230 u32 nscan = 0;
1231 bool map = true;
1232 1232
1233 if (level != 1) 1233 if (level != 1)
1234 return; 1234 return;
@@ -1250,7 +1250,19 @@ static void reada_for_search(struct btrfs_root *root,
1250 1250
1251 nritems = btrfs_header_nritems(node); 1251 nritems = btrfs_header_nritems(node);
1252 nr = slot; 1252 nr = slot;
1253 if (node->map_token || path->skip_locking)
1254 map = false;
1255
1253 while (1) { 1256 while (1) {
1257 if (map && !node->map_token) {
1258 unsigned long offset = btrfs_node_key_ptr_offset(nr);
1259 map_private_extent_buffer(node, offset,
1260 sizeof(struct btrfs_key_ptr),
1261 &node->map_token,
1262 &node->kaddr,
1263 &node->map_start,
1264 &node->map_len, KM_USER1);
1265 }
1254 if (direction < 0) { 1266 if (direction < 0) {
1255 if (nr == 0) 1267 if (nr == 0)
1256 break; 1268 break;
@@ -1268,14 +1280,23 @@ static void reada_for_search(struct btrfs_root *root,
1268 search = btrfs_node_blockptr(node, nr); 1280 search = btrfs_node_blockptr(node, nr);
1269 if ((search <= target && target - search <= 65536) || 1281 if ((search <= target && target - search <= 65536) ||
1270 (search > target && search - target <= 65536)) { 1282 (search > target && search - target <= 65536)) {
1271 readahead_tree_block(root, search, blocksize, 1283 gen = btrfs_node_ptr_generation(node, nr);
1272 btrfs_node_ptr_generation(node, nr)); 1284 if (map && node->map_token) {
1285 unmap_extent_buffer(node, node->map_token,
1286 KM_USER1);
1287 node->map_token = NULL;
1288 }
1289 readahead_tree_block(root, search, blocksize, gen);
1273 nread += blocksize; 1290 nread += blocksize;
1274 } 1291 }
1275 nscan++; 1292 nscan++;
1276 if ((nread > 65536 || nscan > 32)) 1293 if ((nread > 65536 || nscan > 32))
1277 break; 1294 break;
1278 } 1295 }
1296 if (map && node->map_token) {
1297 unmap_extent_buffer(node, node->map_token, KM_USER1);
1298 node->map_token = NULL;
1299 }
1279} 1300}
1280 1301
1281/* 1302/*
@@ -1648,9 +1669,6 @@ again:
1648 } 1669 }
1649cow_done: 1670cow_done:
1650 BUG_ON(!cow && ins_len); 1671 BUG_ON(!cow && ins_len);
1651 if (level != btrfs_header_level(b))
1652 WARN_ON(1);
1653 level = btrfs_header_level(b);
1654 1672
1655 p->nodes[level] = b; 1673 p->nodes[level] = b;
1656 if (!p->skip_locking) 1674 if (!p->skip_locking)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6c093fa98f61..378b5b4443f3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -930,7 +930,6 @@ struct btrfs_fs_info {
930 * is required instead of the faster short fsync log commits 930 * is required instead of the faster short fsync log commits
931 */ 931 */
932 u64 last_trans_log_full_commit; 932 u64 last_trans_log_full_commit;
933 u64 open_ioctl_trans;
934 unsigned long mount_opt:20; 933 unsigned long mount_opt:20;
935 unsigned long compress_type:4; 934 unsigned long compress_type:4;
936 u64 max_inline; 935 u64 max_inline;
@@ -947,7 +946,6 @@ struct btrfs_fs_info {
947 struct super_block *sb; 946 struct super_block *sb;
948 struct inode *btree_inode; 947 struct inode *btree_inode;
949 struct backing_dev_info bdi; 948 struct backing_dev_info bdi;
950 struct mutex trans_mutex;
951 struct mutex tree_log_mutex; 949 struct mutex tree_log_mutex;
952 struct mutex transaction_kthread_mutex; 950 struct mutex transaction_kthread_mutex;
953 struct mutex cleaner_mutex; 951 struct mutex cleaner_mutex;
@@ -968,6 +966,7 @@ struct btrfs_fs_info {
968 struct rw_semaphore subvol_sem; 966 struct rw_semaphore subvol_sem;
969 struct srcu_struct subvol_srcu; 967 struct srcu_struct subvol_srcu;
970 968
969 spinlock_t trans_lock;
971 struct list_head trans_list; 970 struct list_head trans_list;
972 struct list_head hashers; 971 struct list_head hashers;
973 struct list_head dead_roots; 972 struct list_head dead_roots;
@@ -980,6 +979,7 @@ struct btrfs_fs_info {
980 atomic_t async_submit_draining; 979 atomic_t async_submit_draining;
981 atomic_t nr_async_bios; 980 atomic_t nr_async_bios;
982 atomic_t async_delalloc_pages; 981 atomic_t async_delalloc_pages;
982 atomic_t open_ioctl_trans;
983 983
984 /* 984 /*
985 * this is used by the balancing code to wait for all the pending 985 * this is used by the balancing code to wait for all the pending
@@ -1044,6 +1044,7 @@ struct btrfs_fs_info {
1044 int closing; 1044 int closing;
1045 int log_root_recovering; 1045 int log_root_recovering;
1046 int enospc_unlink; 1046 int enospc_unlink;
1047 int trans_no_join;
1047 1048
1048 u64 total_pinned; 1049 u64 total_pinned;
1049 1050
@@ -1065,7 +1066,6 @@ struct btrfs_fs_info {
1065 struct reloc_control *reloc_ctl; 1066 struct reloc_control *reloc_ctl;
1066 1067
1067 spinlock_t delalloc_lock; 1068 spinlock_t delalloc_lock;
1068 spinlock_t new_trans_lock;
1069 u64 delalloc_bytes; 1069 u64 delalloc_bytes;
1070 1070
1071 /* data_alloc_cluster is only used in ssd mode */ 1071 /* data_alloc_cluster is only used in ssd mode */
@@ -1340,6 +1340,7 @@ struct btrfs_ioctl_defrag_range_args {
1340#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14) 1340#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1341#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15) 1341#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
1342#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16) 1342#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
1343#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
1343 1344
1344#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1345#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1345#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1346#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2238,6 +2239,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
2238void btrfs_block_rsv_release(struct btrfs_root *root, 2239void btrfs_block_rsv_release(struct btrfs_root *root,
2239 struct btrfs_block_rsv *block_rsv, 2240 struct btrfs_block_rsv *block_rsv,
2240 u64 num_bytes); 2241 u64 num_bytes);
2242int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
2243 struct btrfs_root *root,
2244 struct btrfs_block_rsv *rsv);
2241int btrfs_set_block_group_ro(struct btrfs_root *root, 2245int btrfs_set_block_group_ro(struct btrfs_root *root,
2242 struct btrfs_block_group_cache *cache); 2246 struct btrfs_block_group_cache *cache);
2243int btrfs_set_block_group_rw(struct btrfs_root *root, 2247int btrfs_set_block_group_rw(struct btrfs_root *root,
@@ -2350,6 +2354,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2350 struct btrfs_root *root, 2354 struct btrfs_root *root,
2351 struct extent_buffer *node, 2355 struct extent_buffer *node,
2352 struct extent_buffer *parent); 2356 struct extent_buffer *parent);
2357static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
2358{
2359 /*
2360 * Get synced with close_ctree()
2361 */
2362 smp_mb();
2363 return fs_info->closing;
2364}
2365
2353/* root-item.c */ 2366/* root-item.c */
2354int btrfs_find_root_ref(struct btrfs_root *tree_root, 2367int btrfs_find_root_ref(struct btrfs_root *tree_root,
2355 struct btrfs_path *path, 2368 struct btrfs_path *path,
@@ -2512,8 +2525,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2512int btrfs_writepages(struct address_space *mapping, 2525int btrfs_writepages(struct address_space *mapping,
2513 struct writeback_control *wbc); 2526 struct writeback_control *wbc);
2514int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 2527int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
2515 struct btrfs_root *new_root, 2528 struct btrfs_root *new_root, u64 new_dirid);
2516 u64 new_dirid, u64 alloc_hint);
2517int btrfs_merge_bio_hook(struct page *page, unsigned long offset, 2529int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2518 size_t size, struct bio *bio, unsigned long bio_flags); 2530 size_t size, struct bio *bio, unsigned long bio_flags);
2519 2531
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 01e29503a54b..6462c29d2d37 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -678,6 +678,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
678 INIT_LIST_HEAD(&head); 678 INIT_LIST_HEAD(&head);
679 679
680 next = item; 680 next = item;
681 nitems = 0;
681 682
682 /* 683 /*
683 * count the number of the continuous items that we can insert in batch 684 * count the number of the continuous items that we can insert in batch
@@ -1129,7 +1130,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1129 delayed_node = async_node->delayed_node; 1130 delayed_node = async_node->delayed_node;
1130 root = delayed_node->root; 1131 root = delayed_node->root;
1131 1132
1132 trans = btrfs_join_transaction(root, 0); 1133 trans = btrfs_join_transaction(root);
1133 if (IS_ERR(trans)) 1134 if (IS_ERR(trans))
1134 goto free_path; 1135 goto free_path;
1135 1136
@@ -1572,8 +1573,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1572 btrfs_set_stack_inode_transid(inode_item, trans->transid); 1573 btrfs_set_stack_inode_transid(inode_item, trans->transid);
1573 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); 1574 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1574 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1575 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1575 btrfs_set_stack_inode_block_group(inode_item, 1576 btrfs_set_stack_inode_block_group(inode_item, 0);
1576 BTRFS_I(inode)->block_group);
1577 1577
1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item), 1578 btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1579 inode->i_atime.tv_sec); 1579 inode->i_atime.tv_sec);
@@ -1595,7 +1595,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1595 struct btrfs_root *root, struct inode *inode) 1595 struct btrfs_root *root, struct inode *inode)
1596{ 1596{
1597 struct btrfs_delayed_node *delayed_node; 1597 struct btrfs_delayed_node *delayed_node;
1598 int ret; 1598 int ret = 0;
1599 1599
1600 delayed_node = btrfs_get_or_create_delayed_node(inode); 1600 delayed_node = btrfs_get_or_create_delayed_node(inode);
1601 if (IS_ERR(delayed_node)) 1601 if (IS_ERR(delayed_node))
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 98b6a71decba..9f68c6898653 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1505,24 +1505,24 @@ static int transaction_kthread(void *arg)
1505 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1505 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1506 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1506 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1507 1507
1508 spin_lock(&root->fs_info->new_trans_lock); 1508 spin_lock(&root->fs_info->trans_lock);
1509 cur = root->fs_info->running_transaction; 1509 cur = root->fs_info->running_transaction;
1510 if (!cur) { 1510 if (!cur) {
1511 spin_unlock(&root->fs_info->new_trans_lock); 1511 spin_unlock(&root->fs_info->trans_lock);
1512 goto sleep; 1512 goto sleep;
1513 } 1513 }
1514 1514
1515 now = get_seconds(); 1515 now = get_seconds();
1516 if (!cur->blocked && 1516 if (!cur->blocked &&
1517 (now < cur->start_time || now - cur->start_time < 30)) { 1517 (now < cur->start_time || now - cur->start_time < 30)) {
1518 spin_unlock(&root->fs_info->new_trans_lock); 1518 spin_unlock(&root->fs_info->trans_lock);
1519 delay = HZ * 5; 1519 delay = HZ * 5;
1520 goto sleep; 1520 goto sleep;
1521 } 1521 }
1522 transid = cur->transid; 1522 transid = cur->transid;
1523 spin_unlock(&root->fs_info->new_trans_lock); 1523 spin_unlock(&root->fs_info->trans_lock);
1524 1524
1525 trans = btrfs_join_transaction(root, 1); 1525 trans = btrfs_join_transaction(root);
1526 BUG_ON(IS_ERR(trans)); 1526 BUG_ON(IS_ERR(trans));
1527 if (transid == trans->transid) { 1527 if (transid == trans->transid) {
1528 ret = btrfs_commit_transaction(trans, root); 1528 ret = btrfs_commit_transaction(trans, root);
@@ -1613,7 +1613,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1613 INIT_LIST_HEAD(&fs_info->ordered_operations); 1613 INIT_LIST_HEAD(&fs_info->ordered_operations);
1614 INIT_LIST_HEAD(&fs_info->caching_block_groups); 1614 INIT_LIST_HEAD(&fs_info->caching_block_groups);
1615 spin_lock_init(&fs_info->delalloc_lock); 1615 spin_lock_init(&fs_info->delalloc_lock);
1616 spin_lock_init(&fs_info->new_trans_lock); 1616 spin_lock_init(&fs_info->trans_lock);
1617 spin_lock_init(&fs_info->ref_cache_lock); 1617 spin_lock_init(&fs_info->ref_cache_lock);
1618 spin_lock_init(&fs_info->fs_roots_radix_lock); 1618 spin_lock_init(&fs_info->fs_roots_radix_lock);
1619 spin_lock_init(&fs_info->delayed_iput_lock); 1619 spin_lock_init(&fs_info->delayed_iput_lock);
@@ -1645,6 +1645,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1645 fs_info->max_inline = 8192 * 1024; 1645 fs_info->max_inline = 8192 * 1024;
1646 fs_info->metadata_ratio = 0; 1646 fs_info->metadata_ratio = 0;
1647 fs_info->defrag_inodes = RB_ROOT; 1647 fs_info->defrag_inodes = RB_ROOT;
1648 fs_info->trans_no_join = 0;
1648 1649
1649 fs_info->thread_pool_size = min_t(unsigned long, 1650 fs_info->thread_pool_size = min_t(unsigned long,
1650 num_online_cpus() + 2, 8); 1651 num_online_cpus() + 2, 8);
@@ -1667,8 +1668,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1667 init_waitqueue_head(&fs_info->scrub_pause_wait); 1668 init_waitqueue_head(&fs_info->scrub_pause_wait);
1668 init_rwsem(&fs_info->scrub_super_lock); 1669 init_rwsem(&fs_info->scrub_super_lock);
1669 fs_info->scrub_workers_refcnt = 0; 1670 fs_info->scrub_workers_refcnt = 0;
1670 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1671 fs_info->thread_pool_size, &fs_info->generic_worker);
1672 1671
1673 sb->s_blocksize = 4096; 1672 sb->s_blocksize = 4096;
1674 sb->s_blocksize_bits = blksize_bits(4096); 1673 sb->s_blocksize_bits = blksize_bits(4096);
@@ -1709,7 +1708,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1709 fs_info->do_barriers = 1; 1708 fs_info->do_barriers = 1;
1710 1709
1711 1710
1712 mutex_init(&fs_info->trans_mutex);
1713 mutex_init(&fs_info->ordered_operations_mutex); 1711 mutex_init(&fs_info->ordered_operations_mutex);
1714 mutex_init(&fs_info->tree_log_mutex); 1712 mutex_init(&fs_info->tree_log_mutex);
1715 mutex_init(&fs_info->chunk_mutex); 1713 mutex_init(&fs_info->chunk_mutex);
@@ -2479,13 +2477,13 @@ int btrfs_commit_super(struct btrfs_root *root)
2479 down_write(&root->fs_info->cleanup_work_sem); 2477 down_write(&root->fs_info->cleanup_work_sem);
2480 up_write(&root->fs_info->cleanup_work_sem); 2478 up_write(&root->fs_info->cleanup_work_sem);
2481 2479
2482 trans = btrfs_join_transaction(root, 1); 2480 trans = btrfs_join_transaction(root);
2483 if (IS_ERR(trans)) 2481 if (IS_ERR(trans))
2484 return PTR_ERR(trans); 2482 return PTR_ERR(trans);
2485 ret = btrfs_commit_transaction(trans, root); 2483 ret = btrfs_commit_transaction(trans, root);
2486 BUG_ON(ret); 2484 BUG_ON(ret);
2487 /* run commit again to drop the original snapshot */ 2485 /* run commit again to drop the original snapshot */
2488 trans = btrfs_join_transaction(root, 1); 2486 trans = btrfs_join_transaction(root);
2489 if (IS_ERR(trans)) 2487 if (IS_ERR(trans))
2490 return PTR_ERR(trans); 2488 return PTR_ERR(trans);
2491 btrfs_commit_transaction(trans, root); 2489 btrfs_commit_transaction(trans, root);
@@ -2911,9 +2909,8 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
2911 2909
2912 INIT_LIST_HEAD(&splice); 2910 INIT_LIST_HEAD(&splice);
2913 2911
2914 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2915
2916 spin_lock(&root->fs_info->delalloc_lock); 2912 spin_lock(&root->fs_info->delalloc_lock);
2913 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
2917 2914
2918 while (!list_empty(&splice)) { 2915 while (!list_empty(&splice)) {
2919 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 2916 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
@@ -3024,10 +3021,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3024 3021
3025 WARN_ON(1); 3022 WARN_ON(1);
3026 3023
3027 mutex_lock(&root->fs_info->trans_mutex);
3028 mutex_lock(&root->fs_info->transaction_kthread_mutex); 3024 mutex_lock(&root->fs_info->transaction_kthread_mutex);
3029 3025
3026 spin_lock(&root->fs_info->trans_lock);
3030 list_splice_init(&root->fs_info->trans_list, &list); 3027 list_splice_init(&root->fs_info->trans_list, &list);
3028 root->fs_info->trans_no_join = 1;
3029 spin_unlock(&root->fs_info->trans_lock);
3030
3031 while (!list_empty(&list)) { 3031 while (!list_empty(&list)) {
3032 t = list_entry(list.next, struct btrfs_transaction, list); 3032 t = list_entry(list.next, struct btrfs_transaction, list);
3033 if (!t) 3033 if (!t)
@@ -3052,23 +3052,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3052 t->blocked = 0; 3052 t->blocked = 0;
3053 if (waitqueue_active(&root->fs_info->transaction_wait)) 3053 if (waitqueue_active(&root->fs_info->transaction_wait))
3054 wake_up(&root->fs_info->transaction_wait); 3054 wake_up(&root->fs_info->transaction_wait);
3055 mutex_unlock(&root->fs_info->trans_mutex);
3056 3055
3057 mutex_lock(&root->fs_info->trans_mutex);
3058 t->commit_done = 1; 3056 t->commit_done = 1;
3059 if (waitqueue_active(&t->commit_wait)) 3057 if (waitqueue_active(&t->commit_wait))
3060 wake_up(&t->commit_wait); 3058 wake_up(&t->commit_wait);
3061 mutex_unlock(&root->fs_info->trans_mutex);
3062
3063 mutex_lock(&root->fs_info->trans_mutex);
3064 3059
3065 btrfs_destroy_pending_snapshots(t); 3060 btrfs_destroy_pending_snapshots(t);
3066 3061
3067 btrfs_destroy_delalloc_inodes(root); 3062 btrfs_destroy_delalloc_inodes(root);
3068 3063
3069 spin_lock(&root->fs_info->new_trans_lock); 3064 spin_lock(&root->fs_info->trans_lock);
3070 root->fs_info->running_transaction = NULL; 3065 root->fs_info->running_transaction = NULL;
3071 spin_unlock(&root->fs_info->new_trans_lock); 3066 spin_unlock(&root->fs_info->trans_lock);
3072 3067
3073 btrfs_destroy_marked_extents(root, &t->dirty_pages, 3068 btrfs_destroy_marked_extents(root, &t->dirty_pages,
3074 EXTENT_DIRTY); 3069 EXTENT_DIRTY);
@@ -3082,8 +3077,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3082 kmem_cache_free(btrfs_transaction_cachep, t); 3077 kmem_cache_free(btrfs_transaction_cachep, t);
3083 } 3078 }
3084 3079
3080 spin_lock(&root->fs_info->trans_lock);
3081 root->fs_info->trans_no_join = 0;
3082 spin_unlock(&root->fs_info->trans_lock);
3085 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 3083 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
3086 mutex_unlock(&root->fs_info->trans_mutex);
3087 3084
3088 return 0; 3085 return 0;
3089} 3086}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 169bd62ce776..b42efc2ded51 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -348,7 +348,7 @@ static int caching_kthread(void *data)
348 */ 348 */
349 path->skip_locking = 1; 349 path->skip_locking = 1;
350 path->search_commit_root = 1; 350 path->search_commit_root = 1;
351 path->reada = 2; 351 path->reada = 1;
352 352
353 key.objectid = last; 353 key.objectid = last;
354 key.offset = 0; 354 key.offset = 0;
@@ -366,8 +366,7 @@ again:
366 nritems = btrfs_header_nritems(leaf); 366 nritems = btrfs_header_nritems(leaf);
367 367
368 while (1) { 368 while (1) {
369 smp_mb(); 369 if (btrfs_fs_closing(fs_info) > 1) {
370 if (fs_info->closing > 1) {
371 last = (u64)-1; 370 last = (u64)-1;
372 break; 371 break;
373 } 372 }
@@ -379,15 +378,18 @@ again:
379 if (ret) 378 if (ret)
380 break; 379 break;
381 380
382 caching_ctl->progress = last; 381 if (need_resched() ||
383 btrfs_release_path(path); 382 btrfs_next_leaf(extent_root, path)) {
384 up_read(&fs_info->extent_commit_sem); 383 caching_ctl->progress = last;
385 mutex_unlock(&caching_ctl->mutex); 384 btrfs_release_path(path);
386 if (btrfs_transaction_in_commit(fs_info)) 385 up_read(&fs_info->extent_commit_sem);
387 schedule_timeout(1); 386 mutex_unlock(&caching_ctl->mutex);
388 else
389 cond_resched(); 387 cond_resched();
390 goto again; 388 goto again;
389 }
390 leaf = path->nodes[0];
391 nritems = btrfs_header_nritems(leaf);
392 continue;
391 } 393 }
392 394
393 if (key.objectid < block_group->key.objectid) { 395 if (key.objectid < block_group->key.objectid) {
@@ -3065,7 +3067,7 @@ again:
3065 spin_unlock(&data_sinfo->lock); 3067 spin_unlock(&data_sinfo->lock);
3066alloc: 3068alloc:
3067 alloc_target = btrfs_get_alloc_profile(root, 1); 3069 alloc_target = btrfs_get_alloc_profile(root, 1);
3068 trans = btrfs_join_transaction(root, 1); 3070 trans = btrfs_join_transaction(root);
3069 if (IS_ERR(trans)) 3071 if (IS_ERR(trans))
3070 return PTR_ERR(trans); 3072 return PTR_ERR(trans);
3071 3073
@@ -3087,13 +3089,21 @@ alloc:
3087 } 3089 }
3088 goto again; 3090 goto again;
3089 } 3091 }
3092
3093 /*
3094 * If we have less pinned bytes than we want to allocate then
3095 * don't bother committing the transaction, it won't help us.
3096 */
3097 if (data_sinfo->bytes_pinned < bytes)
3098 committed = 1;
3090 spin_unlock(&data_sinfo->lock); 3099 spin_unlock(&data_sinfo->lock);
3091 3100
3092 /* commit the current transaction and try again */ 3101 /* commit the current transaction and try again */
3093commit_trans: 3102commit_trans:
3094 if (!committed && !root->fs_info->open_ioctl_trans) { 3103 if (!committed &&
3104 !atomic_read(&root->fs_info->open_ioctl_trans)) {
3095 committed = 1; 3105 committed = 1;
3096 trans = btrfs_join_transaction(root, 1); 3106 trans = btrfs_join_transaction(root);
3097 if (IS_ERR(trans)) 3107 if (IS_ERR(trans))
3098 return PTR_ERR(trans); 3108 return PTR_ERR(trans);
3099 ret = btrfs_commit_transaction(trans, root); 3109 ret = btrfs_commit_transaction(trans, root);
@@ -3472,7 +3482,7 @@ again:
3472 goto out; 3482 goto out;
3473 3483
3474 ret = -ENOSPC; 3484 ret = -ENOSPC;
3475 trans = btrfs_join_transaction(root, 1); 3485 trans = btrfs_join_transaction(root);
3476 if (IS_ERR(trans)) 3486 if (IS_ERR(trans))
3477 goto out; 3487 goto out;
3478 ret = btrfs_commit_transaction(trans, root); 3488 ret = btrfs_commit_transaction(trans, root);
@@ -3699,7 +3709,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3699 if (trans) 3709 if (trans)
3700 return -EAGAIN; 3710 return -EAGAIN;
3701 3711
3702 trans = btrfs_join_transaction(root, 1); 3712 trans = btrfs_join_transaction(root);
3703 BUG_ON(IS_ERR(trans)); 3713 BUG_ON(IS_ERR(trans));
3704 ret = btrfs_commit_transaction(trans, root); 3714 ret = btrfs_commit_transaction(trans, root);
3705 return 0; 3715 return 0;
@@ -3837,6 +3847,37 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3837 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3847 WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3838} 3848}
3839 3849
3850int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
3851 struct btrfs_root *root,
3852 struct btrfs_block_rsv *rsv)
3853{
3854 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
3855 u64 num_bytes;
3856 int ret;
3857
3858 /*
3859 * Truncate should be freeing data, but give us 2 items just in case it
3860 * needs to use some space. We may want to be smarter about this in the
3861 * future.
3862 */
3863 num_bytes = btrfs_calc_trans_metadata_size(root, 2);
3864
3865 /* We already have enough bytes, just return */
3866 if (rsv->reserved >= num_bytes)
3867 return 0;
3868
3869 num_bytes -= rsv->reserved;
3870
3871 /*
3872 * You should have reserved enough space before hand to do this, so this
3873 * should not fail.
3874 */
3875 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
3876 BUG_ON(ret);
3877
3878 return 0;
3879}
3880
3840int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3881int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3841 struct btrfs_root *root, 3882 struct btrfs_root *root,
3842 int num_items) 3883 int num_items)
@@ -3877,23 +3918,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
3877 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 3918 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
3878 3919
3879 /* 3920 /*
3880 * one for deleting orphan item, one for updating inode and 3921 * We need to hold space in order to delete our orphan item once we've
3881 * two for calling btrfs_truncate_inode_items. 3922 * added it, so this takes the reservation so we can release it later
3882 * 3923 * when we are truly done with the orphan item.
3883 * btrfs_truncate_inode_items is a delete operation, it frees
3884 * more space than it uses in most cases. So two units of
3885 * metadata space should be enough for calling it many times.
3886 * If all of the metadata space is used, we can commit
3887 * transaction and use space it freed.
3888 */ 3924 */
3889 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); 3925 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3890 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3926 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3891} 3927}
3892 3928
3893void btrfs_orphan_release_metadata(struct inode *inode) 3929void btrfs_orphan_release_metadata(struct inode *inode)
3894{ 3930{
3895 struct btrfs_root *root = BTRFS_I(inode)->root; 3931 struct btrfs_root *root = BTRFS_I(inode)->root;
3896 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4); 3932 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
3897 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3933 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
3898} 3934}
3899 3935
@@ -4987,6 +5023,15 @@ have_block_group:
4987 if (unlikely(block_group->ro)) 5023 if (unlikely(block_group->ro))
4988 goto loop; 5024 goto loop;
4989 5025
5026 spin_lock(&block_group->free_space_ctl->tree_lock);
5027 if (cached &&
5028 block_group->free_space_ctl->free_space <
5029 num_bytes + empty_size) {
5030 spin_unlock(&block_group->free_space_ctl->tree_lock);
5031 goto loop;
5032 }
5033 spin_unlock(&block_group->free_space_ctl->tree_lock);
5034
4990 /* 5035 /*
4991 * Ok we want to try and use the cluster allocator, so lets look 5036 * Ok we want to try and use the cluster allocator, so lets look
4992 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5037 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
@@ -5150,6 +5195,7 @@ checks:
5150 btrfs_add_free_space(block_group, offset, 5195 btrfs_add_free_space(block_group, offset,
5151 search_start - offset); 5196 search_start - offset);
5152 BUG_ON(offset > search_start); 5197 BUG_ON(offset > search_start);
5198 btrfs_put_block_group(block_group);
5153 break; 5199 break;
5154loop: 5200loop:
5155 failed_cluster_refill = false; 5201 failed_cluster_refill = false;
@@ -5172,9 +5218,7 @@ loop:
5172 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5218 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
5173 * again 5219 * again
5174 */ 5220 */
5175 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 5221 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) {
5176 (found_uncached_bg || empty_size || empty_cluster ||
5177 allowed_chunk_alloc)) {
5178 index = 0; 5222 index = 0;
5179 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5223 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5180 found_uncached_bg = false; 5224 found_uncached_bg = false;
@@ -5214,42 +5258,39 @@ loop:
5214 goto search; 5258 goto search;
5215 } 5259 }
5216 5260
5217 if (loop < LOOP_CACHING_WAIT) { 5261 loop++;
5218 loop++;
5219 goto search;
5220 }
5221 5262
5222 if (loop == LOOP_ALLOC_CHUNK) { 5263 if (loop == LOOP_ALLOC_CHUNK) {
5223 empty_size = 0; 5264 if (allowed_chunk_alloc) {
5224 empty_cluster = 0; 5265 ret = do_chunk_alloc(trans, root, num_bytes +
5225 } 5266 2 * 1024 * 1024, data,
5267 CHUNK_ALLOC_LIMITED);
5268 allowed_chunk_alloc = 0;
5269 if (ret == 1)
5270 done_chunk_alloc = 1;
5271 } else if (!done_chunk_alloc &&
5272 space_info->force_alloc ==
5273 CHUNK_ALLOC_NO_FORCE) {
5274 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5275 }
5226 5276
5227 if (allowed_chunk_alloc) { 5277 /*
5228 ret = do_chunk_alloc(trans, root, num_bytes + 5278 * We didn't allocate a chunk, go ahead and drop the
5229 2 * 1024 * 1024, data, 5279 * empty size and loop again.
5230 CHUNK_ALLOC_LIMITED); 5280 */
5231 allowed_chunk_alloc = 0; 5281 if (!done_chunk_alloc)
5232 done_chunk_alloc = 1; 5282 loop = LOOP_NO_EMPTY_SIZE;
5233 } else if (!done_chunk_alloc &&
5234 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5235 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5236 } 5283 }
5237 5284
5238 if (loop < LOOP_NO_EMPTY_SIZE) { 5285 if (loop == LOOP_NO_EMPTY_SIZE) {
5239 loop++; 5286 empty_size = 0;
5240 goto search; 5287 empty_cluster = 0;
5241 } 5288 }
5242 ret = -ENOSPC; 5289
5290 goto search;
5243 } else if (!ins->objectid) { 5291 } else if (!ins->objectid) {
5244 ret = -ENOSPC; 5292 ret = -ENOSPC;
5245 } 5293 } else if (ins->objectid) {
5246
5247 /* we found what we needed */
5248 if (ins->objectid) {
5249 if (!(data & BTRFS_BLOCK_GROUP_DATA))
5250 trans->block_group = block_group->key.objectid;
5251
5252 btrfs_put_block_group(block_group);
5253 ret = 0; 5294 ret = 0;
5254 } 5295 }
5255 5296
@@ -6526,7 +6567,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
6526 6567
6527 BUG_ON(cache->ro); 6568 BUG_ON(cache->ro);
6528 6569
6529 trans = btrfs_join_transaction(root, 1); 6570 trans = btrfs_join_transaction(root);
6530 BUG_ON(IS_ERR(trans)); 6571 BUG_ON(IS_ERR(trans));
6531 6572
6532 alloc_flags = update_block_group_flags(root, cache->flags); 6573 alloc_flags = update_block_group_flags(root, cache->flags);
@@ -6882,6 +6923,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
6882 path = btrfs_alloc_path(); 6923 path = btrfs_alloc_path();
6883 if (!path) 6924 if (!path)
6884 return -ENOMEM; 6925 return -ENOMEM;
6926 path->reada = 1;
6885 6927
6886 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 6928 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
6887 if (cache_gen != 0 && 6929 if (cache_gen != 0 &&
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c5d9fbb92bc3..7055d11c1efd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1476,7 +1476,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
1476 if (total_bytes >= max_bytes) 1476 if (total_bytes >= max_bytes)
1477 break; 1477 break;
1478 if (!found) { 1478 if (!found) {
1479 *start = state->start; 1479 *start = max(cur_start, state->start);
1480 found = 1; 1480 found = 1;
1481 } 1481 }
1482 last = state->end; 1482 last = state->end;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 4e8445a4757c..a11a92ee2d30 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -126,9 +126,9 @@ struct extent_buffer {
126 unsigned long map_len; 126 unsigned long map_len;
127 struct page *first_page; 127 struct page *first_page;
128 unsigned long bflags; 128 unsigned long bflags;
129 atomic_t refs;
130 struct list_head leak_list; 129 struct list_head leak_list;
131 struct rcu_head rcu_head; 130 struct rcu_head rcu_head;
131 atomic_t refs;
132 132
133 /* the spinlock is used to protect most operations */ 133 /* the spinlock is used to protect most operations */
134 spinlock_t lock; 134 spinlock_t lock;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c6a22d783c35..fa4ef18b66b1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
129 if (!btrfs_test_opt(root, AUTO_DEFRAG)) 129 if (!btrfs_test_opt(root, AUTO_DEFRAG))
130 return 0; 130 return 0;
131 131
132 if (root->fs_info->closing) 132 if (btrfs_fs_closing(root->fs_info))
133 return 0; 133 return 0;
134 134
135 if (BTRFS_I(inode)->in_defrag) 135 if (BTRFS_I(inode)->in_defrag)
@@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
144 if (!defrag) 144 if (!defrag)
145 return -ENOMEM; 145 return -ENOMEM;
146 146
147 defrag->ino = inode->i_ino; 147 defrag->ino = btrfs_ino(inode);
148 defrag->transid = transid; 148 defrag->transid = transid;
149 defrag->root = root->root_key.objectid; 149 defrag->root = root->root_key.objectid;
150 150
@@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
229 first_ino = defrag->ino + 1; 229 first_ino = defrag->ino + 1;
230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); 230 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
231 231
232 if (fs_info->closing) 232 if (btrfs_fs_closing(fs_info))
233 goto next_free; 233 goto next_free;
234 234
235 spin_unlock(&fs_info->defrag_inodes_lock); 235 spin_unlock(&fs_info->defrag_inodes_lock);
@@ -1480,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync)
1480 * the current transaction, we can bail out now without any 1480 * the current transaction, we can bail out now without any
1481 * syncing 1481 * syncing
1482 */ 1482 */
1483 mutex_lock(&root->fs_info->trans_mutex); 1483 smp_mb();
1484 if (BTRFS_I(inode)->last_trans <= 1484 if (BTRFS_I(inode)->last_trans <=
1485 root->fs_info->last_trans_committed) { 1485 root->fs_info->last_trans_committed) {
1486 BTRFS_I(inode)->last_trans = 0; 1486 BTRFS_I(inode)->last_trans = 0;
1487 mutex_unlock(&root->fs_info->trans_mutex);
1488 goto out; 1487 goto out;
1489 } 1488 }
1490 mutex_unlock(&root->fs_info->trans_mutex);
1491 1489
1492 /* 1490 /*
1493 * ok we haven't committed the transaction yet, lets do a commit 1491 * ok we haven't committed the transaction yet, lets do a commit
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 70d45795d758..9f985a429877 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -98,7 +98,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
98 return inode; 98 return inode;
99 99
100 spin_lock(&block_group->lock); 100 spin_lock(&block_group->lock);
101 if (!root->fs_info->closing) { 101 if (!btrfs_fs_closing(root->fs_info)) {
102 block_group->inode = igrab(inode); 102 block_group->inode = igrab(inode);
103 block_group->iref = 1; 103 block_group->iref = 1;
104 } 104 }
@@ -250,7 +250,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
250 pgoff_t index = 0; 250 pgoff_t index = 0;
251 unsigned long first_page_offset; 251 unsigned long first_page_offset;
252 int num_checksums; 252 int num_checksums;
253 int ret = 0, ret2; 253 int ret = 0;
254 254
255 INIT_LIST_HEAD(&bitmaps); 255 INIT_LIST_HEAD(&bitmaps);
256 256
@@ -402,7 +402,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
402 spin_lock(&ctl->tree_lock); 402 spin_lock(&ctl->tree_lock);
403 ret = link_free_space(ctl, e); 403 ret = link_free_space(ctl, e);
404 spin_unlock(&ctl->tree_lock); 404 spin_unlock(&ctl->tree_lock);
405 BUG_ON(ret); 405 if (ret) {
406 printk(KERN_ERR "Duplicate entries in "
407 "free space cache, dumping\n");
408 kunmap(page);
409 unlock_page(page);
410 page_cache_release(page);
411 goto free_cache;
412 }
406 } else { 413 } else {
407 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 414 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
408 if (!e->bitmap) { 415 if (!e->bitmap) {
@@ -414,10 +421,18 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
414 goto free_cache; 421 goto free_cache;
415 } 422 }
416 spin_lock(&ctl->tree_lock); 423 spin_lock(&ctl->tree_lock);
417 ret2 = link_free_space(ctl, e); 424 ret = link_free_space(ctl, e);
418 ctl->total_bitmaps++; 425 ctl->total_bitmaps++;
419 ctl->op->recalc_thresholds(ctl); 426 ctl->op->recalc_thresholds(ctl);
420 spin_unlock(&ctl->tree_lock); 427 spin_unlock(&ctl->tree_lock);
428 if (ret) {
429 printk(KERN_ERR "Duplicate entries in "
430 "free space cache, dumping\n");
431 kunmap(page);
432 unlock_page(page);
433 page_cache_release(page);
434 goto free_cache;
435 }
421 list_add_tail(&e->list, &bitmaps); 436 list_add_tail(&e->list, &bitmaps);
422 } 437 }
423 438
@@ -478,8 +493,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
478 * If we're unmounting then just return, since this does a search on the 493 * If we're unmounting then just return, since this does a search on the
479 * normal root and not the commit root and we could deadlock. 494 * normal root and not the commit root and we could deadlock.
480 */ 495 */
481 smp_mb(); 496 if (btrfs_fs_closing(fs_info))
482 if (fs_info->closing)
483 return 0; 497 return 0;
484 498
485 /* 499 /*
@@ -575,10 +589,25 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
575 589
576 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 590 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
577 PAGE_CACHE_SHIFT; 591 PAGE_CACHE_SHIFT;
592
593 /* Since the first page has all of our checksums and our generation we
594 * need to calculate the offset into the page that we can start writing
595 * our entries.
596 */
597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
598
578 filemap_write_and_wait(inode->i_mapping); 599 filemap_write_and_wait(inode->i_mapping);
579 btrfs_wait_ordered_range(inode, inode->i_size & 600 btrfs_wait_ordered_range(inode, inode->i_size &
580 ~(root->sectorsize - 1), (u64)-1); 601 ~(root->sectorsize - 1), (u64)-1);
581 602
603 /* make sure we don't overflow that first page */
604 if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
605 /* this is really the same as running out of space, where we also return 0 */
606 printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
607 ret = 0;
608 goto out_update;
609 }
610
582 /* We need a checksum per page. */ 611 /* We need a checksum per page. */
583 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); 612 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
584 if (!crc) 613 if (!crc)
@@ -590,12 +619,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
590 return -1; 619 return -1;
591 } 620 }
592 621
593 /* Since the first page has all of our checksums and our generation we
594 * need to calculate the offset into the page that we can start writing
595 * our entries.
596 */
597 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
598
599 /* Get the cluster for this block_group if it exists */ 622 /* Get the cluster for this block_group if it exists */
600 if (block_group && !list_empty(&block_group->cluster_list)) 623 if (block_group && !list_empty(&block_group->cluster_list))
601 cluster = list_entry(block_group->cluster_list.next, 624 cluster = list_entry(block_group->cluster_list.next,
@@ -857,12 +880,14 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
857 ret = 1; 880 ret = 1;
858 881
859out_free: 882out_free:
883 kfree(checksums);
884 kfree(pages);
885
886out_update:
860 if (ret != 1) { 887 if (ret != 1) {
861 invalidate_inode_pages2_range(inode->i_mapping, 0, index); 888 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
862 BTRFS_I(inode)->generation = 0; 889 BTRFS_I(inode)->generation = 0;
863 } 890 }
864 kfree(checksums);
865 kfree(pages);
866 btrfs_update_inode(trans, root, inode); 891 btrfs_update_inode(trans, root, inode);
867 return ret; 892 return ret;
868} 893}
@@ -963,10 +988,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
963 * logically. 988 * logically.
964 */ 989 */
965 if (bitmap) { 990 if (bitmap) {
966 WARN_ON(info->bitmap); 991 if (info->bitmap) {
992 WARN_ON_ONCE(1);
993 return -EEXIST;
994 }
967 p = &(*p)->rb_right; 995 p = &(*p)->rb_right;
968 } else { 996 } else {
969 WARN_ON(!info->bitmap); 997 if (!info->bitmap) {
998 WARN_ON_ONCE(1);
999 return -EEXIST;
1000 }
970 p = &(*p)->rb_left; 1001 p = &(*p)->rb_left;
971 } 1002 }
972 } 1003 }
@@ -1386,6 +1417,23 @@ again:
1386 return 0; 1417 return 0;
1387} 1418}
1388 1419
1420static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1421 struct btrfs_free_space *info, u64 offset,
1422 u64 bytes)
1423{
1424 u64 bytes_to_set = 0;
1425 u64 end;
1426
1427 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1428
1429 bytes_to_set = min(end - offset, bytes);
1430
1431 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1432
1433 return bytes_to_set;
1434
1435}
1436
1389static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1437static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1390 struct btrfs_free_space *info) 1438 struct btrfs_free_space *info)
1391{ 1439{
@@ -1422,12 +1470,18 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1422 return true; 1470 return true;
1423} 1471}
1424 1472
1473static struct btrfs_free_space_op free_space_op = {
1474 .recalc_thresholds = recalculate_thresholds,
1475 .use_bitmap = use_bitmap,
1476};
1477
1425static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1478static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1426 struct btrfs_free_space *info) 1479 struct btrfs_free_space *info)
1427{ 1480{
1428 struct btrfs_free_space *bitmap_info; 1481 struct btrfs_free_space *bitmap_info;
1482 struct btrfs_block_group_cache *block_group = NULL;
1429 int added = 0; 1483 int added = 0;
1430 u64 bytes, offset, end; 1484 u64 bytes, offset, bytes_added;
1431 int ret; 1485 int ret;
1432 1486
1433 bytes = info->bytes; 1487 bytes = info->bytes;
@@ -1436,7 +1490,49 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1436 if (!ctl->op->use_bitmap(ctl, info)) 1490 if (!ctl->op->use_bitmap(ctl, info))
1437 return 0; 1491 return 0;
1438 1492
1493 if (ctl->op == &free_space_op)
1494 block_group = ctl->private;
1439again: 1495again:
1496 /*
1497 * Since we link bitmaps right into the cluster we need to see if we
1498 * have a cluster here, and if so and it has our bitmap we need to add
1499 * the free space to that bitmap.
1500 */
1501 if (block_group && !list_empty(&block_group->cluster_list)) {
1502 struct btrfs_free_cluster *cluster;
1503 struct rb_node *node;
1504 struct btrfs_free_space *entry;
1505
1506 cluster = list_entry(block_group->cluster_list.next,
1507 struct btrfs_free_cluster,
1508 block_group_list);
1509 spin_lock(&cluster->lock);
1510 node = rb_first(&cluster->root);
1511 if (!node) {
1512 spin_unlock(&cluster->lock);
1513 goto no_cluster_bitmap;
1514 }
1515
1516 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1517 if (!entry->bitmap) {
1518 spin_unlock(&cluster->lock);
1519 goto no_cluster_bitmap;
1520 }
1521
1522 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1523 bytes_added = add_bytes_to_bitmap(ctl, entry,
1524 offset, bytes);
1525 bytes -= bytes_added;
1526 offset += bytes_added;
1527 }
1528 spin_unlock(&cluster->lock);
1529 if (!bytes) {
1530 ret = 1;
1531 goto out;
1532 }
1533 }
1534
1535no_cluster_bitmap:
1440 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1536 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1441 1, 0); 1537 1, 0);
1442 if (!bitmap_info) { 1538 if (!bitmap_info) {
@@ -1444,19 +1540,10 @@ again:
1444 goto new_bitmap; 1540 goto new_bitmap;
1445 } 1541 }
1446 1542
1447 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1543 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1448 1544 bytes -= bytes_added;
1449 if (offset >= bitmap_info->offset && offset + bytes > end) { 1545 offset += bytes_added;
1450 bitmap_set_bits(ctl, bitmap_info, offset, end - offset); 1546 added = 0;
1451 bytes -= end - offset;
1452 offset = end;
1453 added = 0;
1454 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1455 bitmap_set_bits(ctl, bitmap_info, offset, bytes);
1456 bytes = 0;
1457 } else {
1458 BUG();
1459 }
1460 1547
1461 if (!bytes) { 1548 if (!bytes) {
1462 ret = 1; 1549 ret = 1;
@@ -1735,11 +1822,6 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1735 "\n", count); 1822 "\n", count);
1736} 1823}
1737 1824
1738static struct btrfs_free_space_op free_space_op = {
1739 .recalc_thresholds = recalculate_thresholds,
1740 .use_bitmap = use_bitmap,
1741};
1742
1743void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1825void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1744{ 1826{
1745 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1827 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -2111,9 +2193,11 @@ again:
2111/* 2193/*
2112 * This searches the block group for just extents to fill the cluster with. 2194 * This searches the block group for just extents to fill the cluster with.
2113 */ 2195 */
2114static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2196static noinline int
2115 struct btrfs_free_cluster *cluster, 2197setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2116 u64 offset, u64 bytes, u64 min_bytes) 2198 struct btrfs_free_cluster *cluster,
2199 struct list_head *bitmaps, u64 offset, u64 bytes,
2200 u64 min_bytes)
2117{ 2201{
2118 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2202 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2119 struct btrfs_free_space *first = NULL; 2203 struct btrfs_free_space *first = NULL;
@@ -2135,6 +2219,8 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2135 * extent entry. 2219 * extent entry.
2136 */ 2220 */
2137 while (entry->bitmap) { 2221 while (entry->bitmap) {
2222 if (list_empty(&entry->list))
2223 list_add_tail(&entry->list, bitmaps);
2138 node = rb_next(&entry->offset_index); 2224 node = rb_next(&entry->offset_index);
2139 if (!node) 2225 if (!node)
2140 return -ENOSPC; 2226 return -ENOSPC;
@@ -2154,8 +2240,12 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2154 return -ENOSPC; 2240 return -ENOSPC;
2155 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2241 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2156 2242
2157 if (entry->bitmap) 2243 if (entry->bitmap) {
2244 if (list_empty(&entry->list))
2245 list_add_tail(&entry->list, bitmaps);
2158 continue; 2246 continue;
2247 }
2248
2159 /* 2249 /*
2160 * we haven't filled the empty size and the window is 2250 * we haven't filled the empty size and the window is
2161 * very large. reset and try again 2251 * very large. reset and try again
@@ -2207,9 +2297,11 @@ static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2207 * This specifically looks for bitmaps that may work in the cluster, we assume 2297 * This specifically looks for bitmaps that may work in the cluster, we assume
2208 * that we have already failed to find extents that will work. 2298 * that we have already failed to find extents that will work.
2209 */ 2299 */
2210static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2300static noinline int
2211 struct btrfs_free_cluster *cluster, 2301setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2212 u64 offset, u64 bytes, u64 min_bytes) 2302 struct btrfs_free_cluster *cluster,
2303 struct list_head *bitmaps, u64 offset, u64 bytes,
2304 u64 min_bytes)
2213{ 2305{
2214 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2306 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2215 struct btrfs_free_space *entry; 2307 struct btrfs_free_space *entry;
@@ -2219,10 +2311,39 @@ static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2219 if (ctl->total_bitmaps == 0) 2311 if (ctl->total_bitmaps == 0)
2220 return -ENOSPC; 2312 return -ENOSPC;
2221 2313
2314 /*
2315 * First check our cached list of bitmaps and see if there is an entry
2316 * here that will work.
2317 */
2318 list_for_each_entry(entry, bitmaps, list) {
2319 if (entry->bytes < min_bytes)
2320 continue;
2321 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2322 bytes, min_bytes);
2323 if (!ret)
2324 return 0;
2325 }
2326
2327 /*
2328 * If we do have entries on our list and we are here then we didn't find
2329 * anything, so go ahead and get the next entry after the last entry in
2330 * this list and start the search from there.
2331 */
2332 if (!list_empty(bitmaps)) {
2333 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2334 list);
2335 node = rb_next(&entry->offset_index);
2336 if (!node)
2337 return -ENOSPC;
2338 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2339 goto search;
2340 }
2341
2222 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1); 2342 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2223 if (!entry) 2343 if (!entry)
2224 return -ENOSPC; 2344 return -ENOSPC;
2225 2345
2346search:
2226 node = &entry->offset_index; 2347 node = &entry->offset_index;
2227 do { 2348 do {
2228 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2349 entry = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -2253,6 +2374,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2253 u64 offset, u64 bytes, u64 empty_size) 2374 u64 offset, u64 bytes, u64 empty_size)
2254{ 2375{
2255 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2376 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2377 struct list_head bitmaps;
2378 struct btrfs_free_space *entry, *tmp;
2256 u64 min_bytes; 2379 u64 min_bytes;
2257 int ret; 2380 int ret;
2258 2381
@@ -2291,11 +2414,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2291 goto out; 2414 goto out;
2292 } 2415 }
2293 2416
2294 ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, 2417 INIT_LIST_HEAD(&bitmaps);
2295 min_bytes); 2418 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2419 bytes, min_bytes);
2296 if (ret) 2420 if (ret)
2297 ret = setup_cluster_bitmap(block_group, cluster, offset, 2421 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2298 bytes, min_bytes); 2422 offset, bytes, min_bytes);
2423
2424 /* Clear our temporary list */
2425 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2426 list_del_init(&entry->list);
2299 2427
2300 if (!ret) { 2428 if (!ret) {
2301 atomic_inc(&block_group->count); 2429 atomic_inc(&block_group->count);
@@ -2481,7 +2609,7 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2481 return inode; 2609 return inode;
2482 2610
2483 spin_lock(&root->cache_lock); 2611 spin_lock(&root->cache_lock);
2484 if (!root->fs_info->closing) 2612 if (!btrfs_fs_closing(root->fs_info))
2485 root->cache_inode = igrab(inode); 2613 root->cache_inode = igrab(inode);
2486 spin_unlock(&root->cache_lock); 2614 spin_unlock(&root->cache_lock);
2487 2615
@@ -2504,12 +2632,14 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2504 int ret = 0; 2632 int ret = 0;
2505 u64 root_gen = btrfs_root_generation(&root->root_item); 2633 u64 root_gen = btrfs_root_generation(&root->root_item);
2506 2634
2635 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2636 return 0;
2637
2507 /* 2638 /*
2508 * If we're unmounting then just return, since this does a search on the 2639 * If we're unmounting then just return, since this does a search on the
2509 * normal root and not the commit root and we could deadlock. 2640 * normal root and not the commit root and we could deadlock.
2510 */ 2641 */
2511 smp_mb(); 2642 if (btrfs_fs_closing(fs_info))
2512 if (fs_info->closing)
2513 return 0; 2643 return 0;
2514 2644
2515 path = btrfs_alloc_path(); 2645 path = btrfs_alloc_path();
@@ -2543,6 +2673,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
2543 struct inode *inode; 2673 struct inode *inode;
2544 int ret; 2674 int ret;
2545 2675
2676 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2677 return 0;
2678
2546 inode = lookup_free_ino_inode(root, path); 2679 inode = lookup_free_ino_inode(root, path);
2547 if (IS_ERR(inode)) 2680 if (IS_ERR(inode))
2548 return 0; 2681 return 0;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 3262cd17a12f..b4087e0fa871 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -38,6 +38,9 @@ static int caching_kthread(void *data)
38 int slot; 38 int slot;
39 int ret; 39 int ret;
40 40
41 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
42 return 0;
43
41 path = btrfs_alloc_path(); 44 path = btrfs_alloc_path();
42 if (!path) 45 if (!path)
43 return -ENOMEM; 46 return -ENOMEM;
@@ -59,8 +62,7 @@ again:
59 goto out; 62 goto out;
60 63
61 while (1) { 64 while (1) {
62 smp_mb(); 65 if (btrfs_fs_closing(fs_info))
63 if (fs_info->closing)
64 goto out; 66 goto out;
65 67
66 leaf = path->nodes[0]; 68 leaf = path->nodes[0];
@@ -141,6 +143,9 @@ static void start_caching(struct btrfs_root *root)
141 int ret; 143 int ret;
142 u64 objectid; 144 u64 objectid;
143 145
146 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
147 return;
148
144 spin_lock(&root->cache_lock); 149 spin_lock(&root->cache_lock);
145 if (root->cached != BTRFS_CACHE_NO) { 150 if (root->cached != BTRFS_CACHE_NO) {
146 spin_unlock(&root->cache_lock); 151 spin_unlock(&root->cache_lock);
@@ -178,6 +183,9 @@ static void start_caching(struct btrfs_root *root)
178 183
179int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) 184int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
180{ 185{
186 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
187 return btrfs_find_free_objectid(root, objectid);
188
181again: 189again:
182 *objectid = btrfs_find_ino_for_alloc(root); 190 *objectid = btrfs_find_ino_for_alloc(root);
183 191
@@ -201,6 +209,10 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
201{ 209{
202 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 210 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
203 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; 211 struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
212
213 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
214 return;
215
204again: 216again:
205 if (root->cached == BTRFS_CACHE_FINISHED) { 217 if (root->cached == BTRFS_CACHE_FINISHED) {
206 __btrfs_add_free_space(ctl, objectid, 1); 218 __btrfs_add_free_space(ctl, objectid, 1);
@@ -250,6 +262,9 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
250 struct rb_node *n; 262 struct rb_node *n;
251 u64 count; 263 u64 count;
252 264
265 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
266 return;
267
253 while (1) { 268 while (1) {
254 n = rb_first(rbroot); 269 n = rb_first(rbroot);
255 if (!n) 270 if (!n)
@@ -388,9 +403,24 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
388 int prealloc; 403 int prealloc;
389 bool retry = false; 404 bool retry = false;
390 405
406 /* only fs tree and subvol/snap needs ino cache */
407 if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
408 (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
409 root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
410 return 0;
411
412 /* Don't save inode cache if we are deleting this root */
413 if (btrfs_root_refs(&root->root_item) == 0 &&
414 root != root->fs_info->tree_root)
415 return 0;
416
417 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
418 return 0;
419
391 path = btrfs_alloc_path(); 420 path = btrfs_alloc_path();
392 if (!path) 421 if (!path)
393 return -ENOMEM; 422 return -ENOMEM;
423
394again: 424again:
395 inode = lookup_free_ino_inode(root, path); 425 inode = lookup_free_ino_inode(root, path);
396 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 426 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 39a9d5750efd..751ddf8fc58a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -138,7 +138,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
138 return -ENOMEM; 138 return -ENOMEM;
139 139
140 path->leave_spinning = 1; 140 path->leave_spinning = 1;
141 btrfs_set_trans_block_group(trans, inode);
142 141
143 key.objectid = btrfs_ino(inode); 142 key.objectid = btrfs_ino(inode);
144 key.offset = start; 143 key.offset = start;
@@ -426,9 +425,8 @@ again:
426 } 425 }
427 } 426 }
428 if (start == 0) { 427 if (start == 0) {
429 trans = btrfs_join_transaction(root, 1); 428 trans = btrfs_join_transaction(root);
430 BUG_ON(IS_ERR(trans)); 429 BUG_ON(IS_ERR(trans));
431 btrfs_set_trans_block_group(trans, inode);
432 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 430 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
433 431
434 /* lets try to make an inline extent */ 432 /* lets try to make an inline extent */
@@ -623,8 +621,9 @@ retry:
623 async_extent->start + async_extent->ram_size - 1, 621 async_extent->start + async_extent->ram_size - 1,
624 GFP_NOFS); 622 GFP_NOFS);
625 623
626 trans = btrfs_join_transaction(root, 1); 624 trans = btrfs_join_transaction(root);
627 BUG_ON(IS_ERR(trans)); 625 BUG_ON(IS_ERR(trans));
626 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
628 ret = btrfs_reserve_extent(trans, root, 627 ret = btrfs_reserve_extent(trans, root,
629 async_extent->compressed_size, 628 async_extent->compressed_size,
630 async_extent->compressed_size, 629 async_extent->compressed_size,
@@ -793,9 +792,8 @@ static noinline int cow_file_range(struct inode *inode,
793 int ret = 0; 792 int ret = 0;
794 793
795 BUG_ON(is_free_space_inode(root, inode)); 794 BUG_ON(is_free_space_inode(root, inode));
796 trans = btrfs_join_transaction(root, 1); 795 trans = btrfs_join_transaction(root);
797 BUG_ON(IS_ERR(trans)); 796 BUG_ON(IS_ERR(trans));
798 btrfs_set_trans_block_group(trans, inode);
799 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 797 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
800 798
801 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 799 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -1077,10 +1075,12 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1077 nolock = is_free_space_inode(root, inode); 1075 nolock = is_free_space_inode(root, inode);
1078 1076
1079 if (nolock) 1077 if (nolock)
1080 trans = btrfs_join_transaction_nolock(root, 1); 1078 trans = btrfs_join_transaction_nolock(root);
1081 else 1079 else
1082 trans = btrfs_join_transaction(root, 1); 1080 trans = btrfs_join_transaction(root);
1081
1083 BUG_ON(IS_ERR(trans)); 1082 BUG_ON(IS_ERR(trans));
1083 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1084 1084
1085 cow_start = (u64)-1; 1085 cow_start = (u64)-1;
1086 cur_offset = start; 1086 cur_offset = start;
@@ -1519,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1519{ 1519{
1520 struct btrfs_ordered_sum *sum; 1520 struct btrfs_ordered_sum *sum;
1521 1521
1522 btrfs_set_trans_block_group(trans, inode);
1523
1524 list_for_each_entry(sum, list, list) { 1522 list_for_each_entry(sum, list, list) {
1525 btrfs_csum_file_blocks(trans, 1523 btrfs_csum_file_blocks(trans,
1526 BTRFS_I(inode)->root->fs_info->csum_root, sum); 1524 BTRFS_I(inode)->root->fs_info->csum_root, sum);
@@ -1735,11 +1733,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1735 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1733 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1736 if (!ret) { 1734 if (!ret) {
1737 if (nolock) 1735 if (nolock)
1738 trans = btrfs_join_transaction_nolock(root, 1); 1736 trans = btrfs_join_transaction_nolock(root);
1739 else 1737 else
1740 trans = btrfs_join_transaction(root, 1); 1738 trans = btrfs_join_transaction(root);
1741 BUG_ON(IS_ERR(trans)); 1739 BUG_ON(IS_ERR(trans));
1742 btrfs_set_trans_block_group(trans, inode);
1743 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1740 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1744 ret = btrfs_update_inode(trans, root, inode); 1741 ret = btrfs_update_inode(trans, root, inode);
1745 BUG_ON(ret); 1742 BUG_ON(ret);
@@ -1752,11 +1749,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1752 0, &cached_state, GFP_NOFS); 1749 0, &cached_state, GFP_NOFS);
1753 1750
1754 if (nolock) 1751 if (nolock)
1755 trans = btrfs_join_transaction_nolock(root, 1); 1752 trans = btrfs_join_transaction_nolock(root);
1756 else 1753 else
1757 trans = btrfs_join_transaction(root, 1); 1754 trans = btrfs_join_transaction(root);
1758 BUG_ON(IS_ERR(trans)); 1755 BUG_ON(IS_ERR(trans));
1759 btrfs_set_trans_block_group(trans, inode);
1760 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1756 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1761 1757
1762 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1758 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -1990,7 +1986,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1990 } 1986 }
1991 1987
1992 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) 1988 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
1993 return 0; 1989 goto good;
1994 1990
1995 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && 1991 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
1996 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { 1992 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
@@ -2431,7 +2427,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2431 (u64)-1); 2427 (u64)-1);
2432 2428
2433 if (root->orphan_block_rsv || root->orphan_item_inserted) { 2429 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2434 trans = btrfs_join_transaction(root, 1); 2430 trans = btrfs_join_transaction(root);
2435 if (!IS_ERR(trans)) 2431 if (!IS_ERR(trans))
2436 btrfs_end_transaction(trans, root); 2432 btrfs_end_transaction(trans, root);
2437 } 2433 }
@@ -2511,12 +2507,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
2511 struct btrfs_root *root = BTRFS_I(inode)->root; 2507 struct btrfs_root *root = BTRFS_I(inode)->root;
2512 struct btrfs_key location; 2508 struct btrfs_key location;
2513 int maybe_acls; 2509 int maybe_acls;
2514 u64 alloc_group_block;
2515 u32 rdev; 2510 u32 rdev;
2516 int ret; 2511 int ret;
2517 2512
2518 path = btrfs_alloc_path(); 2513 path = btrfs_alloc_path();
2519 BUG_ON(!path); 2514 BUG_ON(!path);
2515 path->leave_spinning = 1;
2520 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); 2516 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
2521 2517
2522 ret = btrfs_lookup_inode(NULL, root, path, &location, 0); 2518 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -2526,6 +2522,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
2526 leaf = path->nodes[0]; 2522 leaf = path->nodes[0];
2527 inode_item = btrfs_item_ptr(leaf, path->slots[0], 2523 inode_item = btrfs_item_ptr(leaf, path->slots[0],
2528 struct btrfs_inode_item); 2524 struct btrfs_inode_item);
2525 if (!leaf->map_token)
2526 map_private_extent_buffer(leaf, (unsigned long)inode_item,
2527 sizeof(struct btrfs_inode_item),
2528 &leaf->map_token, &leaf->kaddr,
2529 &leaf->map_start, &leaf->map_len,
2530 KM_USER1);
2529 2531
2530 inode->i_mode = btrfs_inode_mode(leaf, inode_item); 2532 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
2531 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); 2533 inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
@@ -2555,8 +2557,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
2555 BTRFS_I(inode)->index_cnt = (u64)-1; 2557 BTRFS_I(inode)->index_cnt = (u64)-1;
2556 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2558 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2557 2559
2558 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2559
2560 /* 2560 /*
2561 * try to precache a NULL acl entry for files that don't have 2561 * try to precache a NULL acl entry for files that don't have
2562 * any xattrs or acls 2562 * any xattrs or acls
@@ -2566,8 +2566,11 @@ static void btrfs_read_locked_inode(struct inode *inode)
2566 if (!maybe_acls) 2566 if (!maybe_acls)
2567 cache_no_acl(inode); 2567 cache_no_acl(inode);
2568 2568
2569 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2569 if (leaf->map_token) {
2570 alloc_group_block, 0); 2570 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2571 leaf->map_token = NULL;
2572 }
2573
2571 btrfs_free_path(path); 2574 btrfs_free_path(path);
2572 inode_item = NULL; 2575 inode_item = NULL;
2573 2576
@@ -2647,7 +2650,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2647 btrfs_set_inode_transid(leaf, item, trans->transid); 2650 btrfs_set_inode_transid(leaf, item, trans->transid);
2648 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2651 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2649 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2652 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2650 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2653 btrfs_set_inode_block_group(leaf, item, 0);
2651 2654
2652 if (leaf->map_token) { 2655 if (leaf->map_token) {
2653 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 2656 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
@@ -3004,8 +3007,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3004 if (IS_ERR(trans)) 3007 if (IS_ERR(trans))
3005 return PTR_ERR(trans); 3008 return PTR_ERR(trans);
3006 3009
3007 btrfs_set_trans_block_group(trans, dir);
3008
3009 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); 3010 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
3010 3011
3011 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 3012 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
@@ -3094,8 +3095,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
3094 if (IS_ERR(trans)) 3095 if (IS_ERR(trans))
3095 return PTR_ERR(trans); 3096 return PTR_ERR(trans);
3096 3097
3097 btrfs_set_trans_block_group(trans, dir);
3098
3099 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { 3098 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
3100 err = btrfs_unlink_subvol(trans, root, dir, 3099 err = btrfs_unlink_subvol(trans, root, dir,
3101 BTRFS_I(inode)->location.objectid, 3100 BTRFS_I(inode)->location.objectid,
@@ -3514,7 +3513,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3514 err = PTR_ERR(trans); 3513 err = PTR_ERR(trans);
3515 break; 3514 break;
3516 } 3515 }
3517 btrfs_set_trans_block_group(trans, inode);
3518 3516
3519 err = btrfs_drop_extents(trans, inode, cur_offset, 3517 err = btrfs_drop_extents(trans, inode, cur_offset,
3520 cur_offset + hole_size, 3518 cur_offset + hole_size,
@@ -3648,9 +3646,8 @@ void btrfs_evict_inode(struct inode *inode)
3648 btrfs_i_size_write(inode, 0); 3646 btrfs_i_size_write(inode, 0);
3649 3647
3650 while (1) { 3648 while (1) {
3651 trans = btrfs_start_transaction(root, 0); 3649 trans = btrfs_join_transaction(root);
3652 BUG_ON(IS_ERR(trans)); 3650 BUG_ON(IS_ERR(trans));
3653 btrfs_set_trans_block_group(trans, inode);
3654 trans->block_rsv = root->orphan_block_rsv; 3651 trans->block_rsv = root->orphan_block_rsv;
3655 3652
3656 ret = btrfs_block_rsv_check(trans, root, 3653 ret = btrfs_block_rsv_check(trans, root,
@@ -4133,7 +4130,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4133 path = btrfs_alloc_path(); 4130 path = btrfs_alloc_path();
4134 if (!path) 4131 if (!path)
4135 return -ENOMEM; 4132 return -ENOMEM;
4136 path->reada = 2; 4133
4134 path->reada = 1;
4137 4135
4138 if (key_type == BTRFS_DIR_INDEX_KEY) { 4136 if (key_type == BTRFS_DIR_INDEX_KEY) {
4139 INIT_LIST_HEAD(&ins_list); 4137 INIT_LIST_HEAD(&ins_list);
@@ -4268,18 +4266,16 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4268 if (BTRFS_I(inode)->dummy_inode) 4266 if (BTRFS_I(inode)->dummy_inode)
4269 return 0; 4267 return 0;
4270 4268
4271 smp_mb(); 4269 if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
4272 if (root->fs_info->closing && is_free_space_inode(root, inode))
4273 nolock = true; 4270 nolock = true;
4274 4271
4275 if (wbc->sync_mode == WB_SYNC_ALL) { 4272 if (wbc->sync_mode == WB_SYNC_ALL) {
4276 if (nolock) 4273 if (nolock)
4277 trans = btrfs_join_transaction_nolock(root, 1); 4274 trans = btrfs_join_transaction_nolock(root);
4278 else 4275 else
4279 trans = btrfs_join_transaction(root, 1); 4276 trans = btrfs_join_transaction(root);
4280 if (IS_ERR(trans)) 4277 if (IS_ERR(trans))
4281 return PTR_ERR(trans); 4278 return PTR_ERR(trans);
4282 btrfs_set_trans_block_group(trans, inode);
4283 if (nolock) 4279 if (nolock)
4284 ret = btrfs_end_transaction_nolock(trans, root); 4280 ret = btrfs_end_transaction_nolock(trans, root);
4285 else 4281 else
@@ -4303,9 +4299,8 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
4303 if (BTRFS_I(inode)->dummy_inode) 4299 if (BTRFS_I(inode)->dummy_inode)
4304 return; 4300 return;
4305 4301
4306 trans = btrfs_join_transaction(root, 1); 4302 trans = btrfs_join_transaction(root);
4307 BUG_ON(IS_ERR(trans)); 4303 BUG_ON(IS_ERR(trans));
4308 btrfs_set_trans_block_group(trans, inode);
4309 4304
4310 ret = btrfs_update_inode(trans, root, inode); 4305 ret = btrfs_update_inode(trans, root, inode);
4311 if (ret && ret == -ENOSPC) { 4306 if (ret && ret == -ENOSPC) {
@@ -4319,7 +4314,6 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
4319 PTR_ERR(trans)); 4314 PTR_ERR(trans));
4320 return; 4315 return;
4321 } 4316 }
4322 btrfs_set_trans_block_group(trans, inode);
4323 4317
4324 ret = btrfs_update_inode(trans, root, inode); 4318 ret = btrfs_update_inode(trans, root, inode);
4325 if (ret) { 4319 if (ret) {
@@ -4418,8 +4412,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4418 struct btrfs_root *root, 4412 struct btrfs_root *root,
4419 struct inode *dir, 4413 struct inode *dir,
4420 const char *name, int name_len, 4414 const char *name, int name_len,
4421 u64 ref_objectid, u64 objectid, 4415 u64 ref_objectid, u64 objectid, int mode,
4422 u64 alloc_hint, int mode, u64 *index) 4416 u64 *index)
4423{ 4417{
4424 struct inode *inode; 4418 struct inode *inode;
4425 struct btrfs_inode_item *inode_item; 4419 struct btrfs_inode_item *inode_item;
@@ -4472,8 +4466,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4472 owner = 0; 4466 owner = 0;
4473 else 4467 else
4474 owner = 1; 4468 owner = 1;
4475 BTRFS_I(inode)->block_group =
4476 btrfs_find_block_group(root, 0, alloc_hint, owner);
4477 4469
4478 key[0].objectid = objectid; 4470 key[0].objectid = objectid;
4479 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 4471 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -4629,15 +4621,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4629 if (IS_ERR(trans)) 4621 if (IS_ERR(trans))
4630 return PTR_ERR(trans); 4622 return PTR_ERR(trans);
4631 4623
4632 btrfs_set_trans_block_group(trans, dir);
4633
4634 err = btrfs_find_free_ino(root, &objectid); 4624 err = btrfs_find_free_ino(root, &objectid);
4635 if (err) 4625 if (err)
4636 goto out_unlock; 4626 goto out_unlock;
4637 4627
4638 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4628 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4639 dentry->d_name.len, btrfs_ino(dir), objectid, 4629 dentry->d_name.len, btrfs_ino(dir), objectid,
4640 BTRFS_I(dir)->block_group, mode, &index); 4630 mode, &index);
4641 if (IS_ERR(inode)) { 4631 if (IS_ERR(inode)) {
4642 err = PTR_ERR(inode); 4632 err = PTR_ERR(inode);
4643 goto out_unlock; 4633 goto out_unlock;
@@ -4649,7 +4639,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4649 goto out_unlock; 4639 goto out_unlock;
4650 } 4640 }
4651 4641
4652 btrfs_set_trans_block_group(trans, inode);
4653 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4642 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4654 if (err) 4643 if (err)
4655 drop_inode = 1; 4644 drop_inode = 1;
@@ -4658,8 +4647,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4658 init_special_inode(inode, inode->i_mode, rdev); 4647 init_special_inode(inode, inode->i_mode, rdev);
4659 btrfs_update_inode(trans, root, inode); 4648 btrfs_update_inode(trans, root, inode);
4660 } 4649 }
4661 btrfs_update_inode_block_group(trans, inode);
4662 btrfs_update_inode_block_group(trans, dir);
4663out_unlock: 4650out_unlock:
4664 nr = trans->blocks_used; 4651 nr = trans->blocks_used;
4665 btrfs_end_transaction_throttle(trans, root); 4652 btrfs_end_transaction_throttle(trans, root);
@@ -4692,15 +4679,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4692 if (IS_ERR(trans)) 4679 if (IS_ERR(trans))
4693 return PTR_ERR(trans); 4680 return PTR_ERR(trans);
4694 4681
4695 btrfs_set_trans_block_group(trans, dir);
4696
4697 err = btrfs_find_free_ino(root, &objectid); 4682 err = btrfs_find_free_ino(root, &objectid);
4698 if (err) 4683 if (err)
4699 goto out_unlock; 4684 goto out_unlock;
4700 4685
4701 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4686 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4702 dentry->d_name.len, btrfs_ino(dir), objectid, 4687 dentry->d_name.len, btrfs_ino(dir), objectid,
4703 BTRFS_I(dir)->block_group, mode, &index); 4688 mode, &index);
4704 if (IS_ERR(inode)) { 4689 if (IS_ERR(inode)) {
4705 err = PTR_ERR(inode); 4690 err = PTR_ERR(inode);
4706 goto out_unlock; 4691 goto out_unlock;
@@ -4712,7 +4697,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4712 goto out_unlock; 4697 goto out_unlock;
4713 } 4698 }
4714 4699
4715 btrfs_set_trans_block_group(trans, inode);
4716 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4700 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4717 if (err) 4701 if (err)
4718 drop_inode = 1; 4702 drop_inode = 1;
@@ -4723,8 +4707,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4723 inode->i_op = &btrfs_file_inode_operations; 4707 inode->i_op = &btrfs_file_inode_operations;
4724 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 4708 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
4725 } 4709 }
4726 btrfs_update_inode_block_group(trans, inode);
4727 btrfs_update_inode_block_group(trans, dir);
4728out_unlock: 4710out_unlock:
4729 nr = trans->blocks_used; 4711 nr = trans->blocks_used;
4730 btrfs_end_transaction_throttle(trans, root); 4712 btrfs_end_transaction_throttle(trans, root);
@@ -4771,8 +4753,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4771 4753
4772 btrfs_inc_nlink(inode); 4754 btrfs_inc_nlink(inode);
4773 inode->i_ctime = CURRENT_TIME; 4755 inode->i_ctime = CURRENT_TIME;
4774
4775 btrfs_set_trans_block_group(trans, dir);
4776 ihold(inode); 4756 ihold(inode);
4777 4757
4778 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); 4758 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
@@ -4781,7 +4761,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4781 drop_inode = 1; 4761 drop_inode = 1;
4782 } else { 4762 } else {
4783 struct dentry *parent = dget_parent(dentry); 4763 struct dentry *parent = dget_parent(dentry);
4784 btrfs_update_inode_block_group(trans, dir);
4785 err = btrfs_update_inode(trans, root, inode); 4764 err = btrfs_update_inode(trans, root, inode);
4786 BUG_ON(err); 4765 BUG_ON(err);
4787 btrfs_log_new_name(trans, inode, NULL, parent); 4766 btrfs_log_new_name(trans, inode, NULL, parent);
@@ -4818,7 +4797,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4818 trans = btrfs_start_transaction(root, 5); 4797 trans = btrfs_start_transaction(root, 5);
4819 if (IS_ERR(trans)) 4798 if (IS_ERR(trans))
4820 return PTR_ERR(trans); 4799 return PTR_ERR(trans);
4821 btrfs_set_trans_block_group(trans, dir);
4822 4800
4823 err = btrfs_find_free_ino(root, &objectid); 4801 err = btrfs_find_free_ino(root, &objectid);
4824 if (err) 4802 if (err)
@@ -4826,8 +4804,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4826 4804
4827 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4805 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4828 dentry->d_name.len, btrfs_ino(dir), objectid, 4806 dentry->d_name.len, btrfs_ino(dir), objectid,
4829 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4807 S_IFDIR | mode, &index);
4830 &index);
4831 if (IS_ERR(inode)) { 4808 if (IS_ERR(inode)) {
4832 err = PTR_ERR(inode); 4809 err = PTR_ERR(inode);
4833 goto out_fail; 4810 goto out_fail;
@@ -4841,7 +4818,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4841 4818
4842 inode->i_op = &btrfs_dir_inode_operations; 4819 inode->i_op = &btrfs_dir_inode_operations;
4843 inode->i_fop = &btrfs_dir_file_operations; 4820 inode->i_fop = &btrfs_dir_file_operations;
4844 btrfs_set_trans_block_group(trans, inode);
4845 4821
4846 btrfs_i_size_write(inode, 0); 4822 btrfs_i_size_write(inode, 0);
4847 err = btrfs_update_inode(trans, root, inode); 4823 err = btrfs_update_inode(trans, root, inode);
@@ -4855,8 +4831,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4855 4831
4856 d_instantiate(dentry, inode); 4832 d_instantiate(dentry, inode);
4857 drop_on_err = 0; 4833 drop_on_err = 0;
4858 btrfs_update_inode_block_group(trans, inode);
4859 btrfs_update_inode_block_group(trans, dir);
4860 4834
4861out_fail: 4835out_fail:
4862 nr = trans->blocks_used; 4836 nr = trans->blocks_used;
@@ -4989,7 +4963,15 @@ again:
4989 4963
4990 if (!path) { 4964 if (!path) {
4991 path = btrfs_alloc_path(); 4965 path = btrfs_alloc_path();
4992 BUG_ON(!path); 4966 if (!path) {
4967 err = -ENOMEM;
4968 goto out;
4969 }
4970 /*
4971 * Chances are we'll be called again, so go ahead and do
4972 * readahead
4973 */
4974 path->reada = 1;
4993 } 4975 }
4994 4976
4995 ret = btrfs_lookup_file_extent(trans, root, path, 4977 ret = btrfs_lookup_file_extent(trans, root, path,
@@ -5130,8 +5112,10 @@ again:
5130 kunmap(page); 5112 kunmap(page);
5131 free_extent_map(em); 5113 free_extent_map(em);
5132 em = NULL; 5114 em = NULL;
5115
5133 btrfs_release_path(path); 5116 btrfs_release_path(path);
5134 trans = btrfs_join_transaction(root, 1); 5117 trans = btrfs_join_transaction(root);
5118
5135 if (IS_ERR(trans)) 5119 if (IS_ERR(trans))
5136 return ERR_CAST(trans); 5120 return ERR_CAST(trans);
5137 goto again; 5121 goto again;
@@ -5375,7 +5359,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5375 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5359 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5376 } 5360 }
5377 5361
5378 trans = btrfs_join_transaction(root, 0); 5362 trans = btrfs_join_transaction(root);
5379 if (IS_ERR(trans)) 5363 if (IS_ERR(trans))
5380 return ERR_CAST(trans); 5364 return ERR_CAST(trans);
5381 5365
@@ -5611,7 +5595,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5611 * to make sure the current transaction stays open 5595 * to make sure the current transaction stays open
5612 * while we look for nocow cross refs 5596 * while we look for nocow cross refs
5613 */ 5597 */
5614 trans = btrfs_join_transaction(root, 0); 5598 trans = btrfs_join_transaction(root);
5615 if (IS_ERR(trans)) 5599 if (IS_ERR(trans))
5616 goto must_cow; 5600 goto must_cow;
5617 5601
@@ -5750,7 +5734,7 @@ again:
5750 5734
5751 BUG_ON(!ordered); 5735 BUG_ON(!ordered);
5752 5736
5753 trans = btrfs_join_transaction(root, 1); 5737 trans = btrfs_join_transaction(root);
5754 if (IS_ERR(trans)) { 5738 if (IS_ERR(trans)) {
5755 err = -ENOMEM; 5739 err = -ENOMEM;
5756 goto out; 5740 goto out;
@@ -6500,6 +6484,7 @@ out:
6500static int btrfs_truncate(struct inode *inode) 6484static int btrfs_truncate(struct inode *inode)
6501{ 6485{
6502 struct btrfs_root *root = BTRFS_I(inode)->root; 6486 struct btrfs_root *root = BTRFS_I(inode)->root;
6487 struct btrfs_block_rsv *rsv;
6503 int ret; 6488 int ret;
6504 int err = 0; 6489 int err = 0;
6505 struct btrfs_trans_handle *trans; 6490 struct btrfs_trans_handle *trans;
@@ -6513,28 +6498,80 @@ static int btrfs_truncate(struct inode *inode)
6513 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6498 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
6514 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6499 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
6515 6500
6516 trans = btrfs_start_transaction(root, 5); 6501 /*
6517 if (IS_ERR(trans)) 6502 * Yes ladies and gentelment, this is indeed ugly. The fact is we have
6518 return PTR_ERR(trans); 6503 * 3 things going on here
6504 *
6505 * 1) We need to reserve space for our orphan item and the space to
6506 * delete our orphan item. Lord knows we don't want to have a dangling
6507 * orphan item because we didn't reserve space to remove it.
6508 *
6509 * 2) We need to reserve space to update our inode.
6510 *
6511 * 3) We need to have something to cache all the space that is going to
6512 * be free'd up by the truncate operation, but also have some slack
6513 * space reserved in case it uses space during the truncate (thank you
6514 * very much snapshotting).
6515 *
6516 * And we need these to all be seperate. The fact is we can use alot of
6517 * space doing the truncate, and we have no earthly idea how much space
6518 * we will use, so we need the truncate reservation to be seperate so it
6519 * doesn't end up using space reserved for updating the inode or
6520 * removing the orphan item. We also need to be able to stop the
6521 * transaction and start a new one, which means we need to be able to
6522 * update the inode several times, and we have no idea of knowing how
6523 * many times that will be, so we can't just reserve 1 item for the
6524 * entirety of the opration, so that has to be done seperately as well.
6525 * Then there is the orphan item, which does indeed need to be held on
6526 * to for the whole operation, and we need nobody to touch this reserved
6527 * space except the orphan code.
6528 *
6529 * So that leaves us with
6530 *
6531 * 1) root->orphan_block_rsv - for the orphan deletion.
6532 * 2) rsv - for the truncate reservation, which we will steal from the
6533 * transaction reservation.
6534 * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
6535 * updating the inode.
6536 */
6537 rsv = btrfs_alloc_block_rsv(root);
6538 if (!rsv)
6539 return -ENOMEM;
6540 btrfs_add_durable_block_rsv(root->fs_info, rsv);
6541
6542 trans = btrfs_start_transaction(root, 4);
6543 if (IS_ERR(trans)) {
6544 err = PTR_ERR(trans);
6545 goto out;
6546 }
6519 6547
6520 btrfs_set_trans_block_group(trans, inode); 6548 /*
6549 * Reserve space for the truncate process. Truncate should be adding
6550 * space, but if there are snapshots it may end up using space.
6551 */
6552 ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
6553 BUG_ON(ret);
6521 6554
6522 ret = btrfs_orphan_add(trans, inode); 6555 ret = btrfs_orphan_add(trans, inode);
6523 if (ret) { 6556 if (ret) {
6524 btrfs_end_transaction(trans, root); 6557 btrfs_end_transaction(trans, root);
6525 return ret; 6558 goto out;
6526 } 6559 }
6527 6560
6528 nr = trans->blocks_used; 6561 nr = trans->blocks_used;
6529 btrfs_end_transaction(trans, root); 6562 btrfs_end_transaction(trans, root);
6530 btrfs_btree_balance_dirty(root, nr); 6563 btrfs_btree_balance_dirty(root, nr);
6531 6564
6532 /* Now start a transaction for the truncate */ 6565 /*
6533 trans = btrfs_start_transaction(root, 0); 6566 * Ok so we've already migrated our bytes over for the truncate, so here
6534 if (IS_ERR(trans)) 6567 * just reserve the one slot we need for updating the inode.
6535 return PTR_ERR(trans); 6568 */
6536 btrfs_set_trans_block_group(trans, inode); 6569 trans = btrfs_start_transaction(root, 1);
6537 trans->block_rsv = root->orphan_block_rsv; 6570 if (IS_ERR(trans)) {
6571 err = PTR_ERR(trans);
6572 goto out;
6573 }
6574 trans->block_rsv = rsv;
6538 6575
6539 /* 6576 /*
6540 * setattr is responsible for setting the ordered_data_close flag, 6577 * setattr is responsible for setting the ordered_data_close flag,
@@ -6558,24 +6595,17 @@ static int btrfs_truncate(struct inode *inode)
6558 6595
6559 while (1) { 6596 while (1) {
6560 if (!trans) { 6597 if (!trans) {
6561 trans = btrfs_start_transaction(root, 0); 6598 trans = btrfs_start_transaction(root, 3);
6562 if (IS_ERR(trans)) 6599 if (IS_ERR(trans)) {
6563 return PTR_ERR(trans); 6600 err = PTR_ERR(trans);
6564 btrfs_set_trans_block_group(trans, inode); 6601 goto out;
6565 trans->block_rsv = root->orphan_block_rsv; 6602 }
6566 }
6567 6603
6568 ret = btrfs_block_rsv_check(trans, root, 6604 ret = btrfs_truncate_reserve_metadata(trans, root,
6569 root->orphan_block_rsv, 0, 5); 6605 rsv);
6570 if (ret == -EAGAIN) { 6606 BUG_ON(ret);
6571 ret = btrfs_commit_transaction(trans, root); 6607
6572 if (ret) 6608 trans->block_rsv = rsv;
6573 return ret;
6574 trans = NULL;
6575 continue;
6576 } else if (ret) {
6577 err = ret;
6578 break;
6579 } 6609 }
6580 6610
6581 ret = btrfs_truncate_inode_items(trans, root, inode, 6611 ret = btrfs_truncate_inode_items(trans, root, inode,
@@ -6586,6 +6616,7 @@ static int btrfs_truncate(struct inode *inode)
6586 break; 6616 break;
6587 } 6617 }
6588 6618
6619 trans->block_rsv = &root->fs_info->trans_block_rsv;
6589 ret = btrfs_update_inode(trans, root, inode); 6620 ret = btrfs_update_inode(trans, root, inode);
6590 if (ret) { 6621 if (ret) {
6591 err = ret; 6622 err = ret;
@@ -6599,6 +6630,7 @@ static int btrfs_truncate(struct inode *inode)
6599 } 6630 }
6600 6631
6601 if (ret == 0 && inode->i_nlink > 0) { 6632 if (ret == 0 && inode->i_nlink > 0) {
6633 trans->block_rsv = root->orphan_block_rsv;
6602 ret = btrfs_orphan_del(trans, inode); 6634 ret = btrfs_orphan_del(trans, inode);
6603 if (ret) 6635 if (ret)
6604 err = ret; 6636 err = ret;
@@ -6610,15 +6642,20 @@ static int btrfs_truncate(struct inode *inode)
6610 ret = btrfs_orphan_del(NULL, inode); 6642 ret = btrfs_orphan_del(NULL, inode);
6611 } 6643 }
6612 6644
6645 trans->block_rsv = &root->fs_info->trans_block_rsv;
6613 ret = btrfs_update_inode(trans, root, inode); 6646 ret = btrfs_update_inode(trans, root, inode);
6614 if (ret && !err) 6647 if (ret && !err)
6615 err = ret; 6648 err = ret;
6616 6649
6617 nr = trans->blocks_used; 6650 nr = trans->blocks_used;
6618 ret = btrfs_end_transaction_throttle(trans, root); 6651 ret = btrfs_end_transaction_throttle(trans, root);
6652 btrfs_btree_balance_dirty(root, nr);
6653
6654out:
6655 btrfs_free_block_rsv(root, rsv);
6656
6619 if (ret && !err) 6657 if (ret && !err)
6620 err = ret; 6658 err = ret;
6621 btrfs_btree_balance_dirty(root, nr);
6622 6659
6623 return err; 6660 return err;
6624} 6661}
@@ -6627,15 +6664,14 @@ static int btrfs_truncate(struct inode *inode)
6627 * create a new subvolume directory/inode (helper for the ioctl). 6664 * create a new subvolume directory/inode (helper for the ioctl).
6628 */ 6665 */
6629int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 6666int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
6630 struct btrfs_root *new_root, 6667 struct btrfs_root *new_root, u64 new_dirid)
6631 u64 new_dirid, u64 alloc_hint)
6632{ 6668{
6633 struct inode *inode; 6669 struct inode *inode;
6634 int err; 6670 int err;
6635 u64 index = 0; 6671 u64 index = 0;
6636 6672
6637 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, 6673 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
6638 new_dirid, alloc_hint, S_IFDIR | 0700, &index); 6674 new_dirid, S_IFDIR | 0700, &index);
6639 if (IS_ERR(inode)) 6675 if (IS_ERR(inode))
6640 return PTR_ERR(inode); 6676 return PTR_ERR(inode);
6641 inode->i_op = &btrfs_dir_inode_operations; 6677 inode->i_op = &btrfs_dir_inode_operations;
@@ -6748,21 +6784,6 @@ void btrfs_destroy_inode(struct inode *inode)
6748 spin_unlock(&root->fs_info->ordered_extent_lock); 6784 spin_unlock(&root->fs_info->ordered_extent_lock);
6749 } 6785 }
6750 6786
6751 if (root == root->fs_info->tree_root) {
6752 struct btrfs_block_group_cache *block_group;
6753
6754 block_group = btrfs_lookup_block_group(root->fs_info,
6755 BTRFS_I(inode)->block_group);
6756 if (block_group && block_group->inode == inode) {
6757 spin_lock(&block_group->lock);
6758 block_group->inode = NULL;
6759 spin_unlock(&block_group->lock);
6760 btrfs_put_block_group(block_group);
6761 } else if (block_group) {
6762 btrfs_put_block_group(block_group);
6763 }
6764 }
6765
6766 spin_lock(&root->orphan_lock); 6787 spin_lock(&root->orphan_lock);
6767 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6788 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6768 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n", 6789 printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
@@ -6948,8 +6969,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6948 goto out_notrans; 6969 goto out_notrans;
6949 } 6970 }
6950 6971
6951 btrfs_set_trans_block_group(trans, new_dir);
6952
6953 if (dest != root) 6972 if (dest != root)
6954 btrfs_record_root_in_trans(trans, dest); 6973 btrfs_record_root_in_trans(trans, dest);
6955 6974
@@ -7131,16 +7150,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7131 if (IS_ERR(trans)) 7150 if (IS_ERR(trans))
7132 return PTR_ERR(trans); 7151 return PTR_ERR(trans);
7133 7152
7134 btrfs_set_trans_block_group(trans, dir);
7135
7136 err = btrfs_find_free_ino(root, &objectid); 7153 err = btrfs_find_free_ino(root, &objectid);
7137 if (err) 7154 if (err)
7138 goto out_unlock; 7155 goto out_unlock;
7139 7156
7140 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 7157 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
7141 dentry->d_name.len, btrfs_ino(dir), objectid, 7158 dentry->d_name.len, btrfs_ino(dir), objectid,
7142 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 7159 S_IFLNK|S_IRWXUGO, &index);
7143 &index);
7144 if (IS_ERR(inode)) { 7160 if (IS_ERR(inode)) {
7145 err = PTR_ERR(inode); 7161 err = PTR_ERR(inode);
7146 goto out_unlock; 7162 goto out_unlock;
@@ -7152,7 +7168,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7152 goto out_unlock; 7168 goto out_unlock;
7153 } 7169 }
7154 7170
7155 btrfs_set_trans_block_group(trans, inode);
7156 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 7171 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
7157 if (err) 7172 if (err)
7158 drop_inode = 1; 7173 drop_inode = 1;
@@ -7163,8 +7178,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7163 inode->i_op = &btrfs_file_inode_operations; 7178 inode->i_op = &btrfs_file_inode_operations;
7164 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 7179 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
7165 } 7180 }
7166 btrfs_update_inode_block_group(trans, inode);
7167 btrfs_update_inode_block_group(trans, dir);
7168 if (drop_inode) 7181 if (drop_inode)
7169 goto out_unlock; 7182 goto out_unlock;
7170 7183
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 85e818ce00c5..b793d112d1f6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -243,7 +243,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
243 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 243 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
244 } 244 }
245 245
246 trans = btrfs_join_transaction(root, 1); 246 trans = btrfs_join_transaction(root);
247 BUG_ON(IS_ERR(trans)); 247 BUG_ON(IS_ERR(trans));
248 248
249 ret = btrfs_update_inode(trans, root, inode); 249 ret = btrfs_update_inode(trans, root, inode);
@@ -414,8 +414,7 @@ static noinline int create_subvol(struct btrfs_root *root,
414 414
415 btrfs_record_root_in_trans(trans, new_root); 415 btrfs_record_root_in_trans(trans, new_root);
416 416
417 ret = btrfs_create_subvol_root(trans, new_root, new_dirid, 417 ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
418 BTRFS_I(dir)->block_group);
419 /* 418 /*
420 * insert the directory item 419 * insert the directory item
421 */ 420 */
@@ -707,16 +706,17 @@ static int find_new_extents(struct btrfs_root *root,
707 struct btrfs_file_extent_item *extent; 706 struct btrfs_file_extent_item *extent;
708 int type; 707 int type;
709 int ret; 708 int ret;
709 u64 ino = btrfs_ino(inode);
710 710
711 path = btrfs_alloc_path(); 711 path = btrfs_alloc_path();
712 if (!path) 712 if (!path)
713 return -ENOMEM; 713 return -ENOMEM;
714 714
715 min_key.objectid = inode->i_ino; 715 min_key.objectid = ino;
716 min_key.type = BTRFS_EXTENT_DATA_KEY; 716 min_key.type = BTRFS_EXTENT_DATA_KEY;
717 min_key.offset = *off; 717 min_key.offset = *off;
718 718
719 max_key.objectid = inode->i_ino; 719 max_key.objectid = ino;
720 max_key.type = (u8)-1; 720 max_key.type = (u8)-1;
721 max_key.offset = (u64)-1; 721 max_key.offset = (u64)-1;
722 722
@@ -727,7 +727,7 @@ static int find_new_extents(struct btrfs_root *root,
727 path, 0, newer_than); 727 path, 0, newer_than);
728 if (ret != 0) 728 if (ret != 0)
729 goto none; 729 goto none;
730 if (min_key.objectid != inode->i_ino) 730 if (min_key.objectid != ino)
731 goto none; 731 goto none;
732 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 732 if (min_key.type != BTRFS_EXTENT_DATA_KEY)
733 goto none; 733 goto none;
@@ -2054,29 +2054,34 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
2054 2054
2055static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) 2055static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
2056{ 2056{
2057 struct btrfs_ioctl_fs_info_args fi_args; 2057 struct btrfs_ioctl_fs_info_args *fi_args;
2058 struct btrfs_device *device; 2058 struct btrfs_device *device;
2059 struct btrfs_device *next; 2059 struct btrfs_device *next;
2060 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2060 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2061 int ret = 0;
2061 2062
2062 if (!capable(CAP_SYS_ADMIN)) 2063 if (!capable(CAP_SYS_ADMIN))
2063 return -EPERM; 2064 return -EPERM;
2064 2065
2065 fi_args.num_devices = fs_devices->num_devices; 2066 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);
2066 fi_args.max_id = 0; 2067 if (!fi_args)
2067 memcpy(&fi_args.fsid, root->fs_info->fsid, sizeof(fi_args.fsid)); 2068 return -ENOMEM;
2069
2070 fi_args->num_devices = fs_devices->num_devices;
2071 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
2068 2072
2069 mutex_lock(&fs_devices->device_list_mutex); 2073 mutex_lock(&fs_devices->device_list_mutex);
2070 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2074 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
2071 if (device->devid > fi_args.max_id) 2075 if (device->devid > fi_args->max_id)
2072 fi_args.max_id = device->devid; 2076 fi_args->max_id = device->devid;
2073 } 2077 }
2074 mutex_unlock(&fs_devices->device_list_mutex); 2078 mutex_unlock(&fs_devices->device_list_mutex);
2075 2079
2076 if (copy_to_user(arg, &fi_args, sizeof(fi_args))) 2080 if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
2077 return -EFAULT; 2081 ret = -EFAULT;
2078 2082
2079 return 0; 2083 kfree(fi_args);
2084 return ret;
2080} 2085}
2081 2086
2082static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) 2087static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
@@ -2489,12 +2494,10 @@ static long btrfs_ioctl_trans_start(struct file *file)
2489 if (ret) 2494 if (ret)
2490 goto out; 2495 goto out;
2491 2496
2492 mutex_lock(&root->fs_info->trans_mutex); 2497 atomic_inc(&root->fs_info->open_ioctl_trans);
2493 root->fs_info->open_ioctl_trans++;
2494 mutex_unlock(&root->fs_info->trans_mutex);
2495 2498
2496 ret = -ENOMEM; 2499 ret = -ENOMEM;
2497 trans = btrfs_start_ioctl_transaction(root, 0); 2500 trans = btrfs_start_ioctl_transaction(root);
2498 if (IS_ERR(trans)) 2501 if (IS_ERR(trans))
2499 goto out_drop; 2502 goto out_drop;
2500 2503
@@ -2502,9 +2505,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2502 return 0; 2505 return 0;
2503 2506
2504out_drop: 2507out_drop:
2505 mutex_lock(&root->fs_info->trans_mutex); 2508 atomic_dec(&root->fs_info->open_ioctl_trans);
2506 root->fs_info->open_ioctl_trans--;
2507 mutex_unlock(&root->fs_info->trans_mutex);
2508 mnt_drop_write(file->f_path.mnt); 2509 mnt_drop_write(file->f_path.mnt);
2509out: 2510out:
2510 return ret; 2511 return ret;
@@ -2738,9 +2739,7 @@ long btrfs_ioctl_trans_end(struct file *file)
2738 2739
2739 btrfs_end_transaction(trans, root); 2740 btrfs_end_transaction(trans, root);
2740 2741
2741 mutex_lock(&root->fs_info->trans_mutex); 2742 atomic_dec(&root->fs_info->open_ioctl_trans);
2742 root->fs_info->open_ioctl_trans--;
2743 mutex_unlock(&root->fs_info->trans_mutex);
2744 2743
2745 mnt_drop_write(file->f_path.mnt); 2744 mnt_drop_write(file->f_path.mnt);
2746 return 0; 2745 return 0;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ca38eca70af0..b1ef27cc673b 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -677,6 +677,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
677 err = -ENOMEM; 677 err = -ENOMEM;
678 goto out; 678 goto out;
679 } 679 }
680 path1->reada = 1;
681 path2->reada = 2;
680 682
681 node = alloc_backref_node(cache); 683 node = alloc_backref_node(cache);
682 if (!node) { 684 if (!node) {
@@ -1999,6 +2001,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1999 path = btrfs_alloc_path(); 2001 path = btrfs_alloc_path();
2000 if (!path) 2002 if (!path)
2001 return -ENOMEM; 2003 return -ENOMEM;
2004 path->reada = 1;
2002 2005
2003 reloc_root = root->reloc_root; 2006 reloc_root = root->reloc_root;
2004 root_item = &reloc_root->root_item; 2007 root_item = &reloc_root->root_item;
@@ -2139,10 +2142,10 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2139 u64 num_bytes = 0; 2142 u64 num_bytes = 0;
2140 int ret; 2143 int ret;
2141 2144
2142 mutex_lock(&root->fs_info->trans_mutex); 2145 spin_lock(&root->fs_info->trans_lock);
2143 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2146 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2144 rc->merging_rsv_size += rc->nodes_relocated * 2; 2147 rc->merging_rsv_size += rc->nodes_relocated * 2;
2145 mutex_unlock(&root->fs_info->trans_mutex); 2148 spin_unlock(&root->fs_info->trans_lock);
2146again: 2149again:
2147 if (!err) { 2150 if (!err) {
2148 num_bytes = rc->merging_rsv_size; 2151 num_bytes = rc->merging_rsv_size;
@@ -2152,7 +2155,7 @@ again:
2152 err = ret; 2155 err = ret;
2153 } 2156 }
2154 2157
2155 trans = btrfs_join_transaction(rc->extent_root, 1); 2158 trans = btrfs_join_transaction(rc->extent_root);
2156 if (IS_ERR(trans)) { 2159 if (IS_ERR(trans)) {
2157 if (!err) 2160 if (!err)
2158 btrfs_block_rsv_release(rc->extent_root, 2161 btrfs_block_rsv_release(rc->extent_root,
@@ -2211,9 +2214,9 @@ int merge_reloc_roots(struct reloc_control *rc)
2211 int ret; 2214 int ret;
2212again: 2215again:
2213 root = rc->extent_root; 2216 root = rc->extent_root;
2214 mutex_lock(&root->fs_info->trans_mutex); 2217 spin_lock(&root->fs_info->trans_lock);
2215 list_splice_init(&rc->reloc_roots, &reloc_roots); 2218 list_splice_init(&rc->reloc_roots, &reloc_roots);
2216 mutex_unlock(&root->fs_info->trans_mutex); 2219 spin_unlock(&root->fs_info->trans_lock);
2217 2220
2218 while (!list_empty(&reloc_roots)) { 2221 while (!list_empty(&reloc_roots)) {
2219 found = 1; 2222 found = 1;
@@ -3236,7 +3239,7 @@ truncate:
3236 goto out; 3239 goto out;
3237 } 3240 }
3238 3241
3239 trans = btrfs_join_transaction(root, 0); 3242 trans = btrfs_join_transaction(root);
3240 if (IS_ERR(trans)) { 3243 if (IS_ERR(trans)) {
3241 btrfs_free_path(path); 3244 btrfs_free_path(path);
3242 ret = PTR_ERR(trans); 3245 ret = PTR_ERR(trans);
@@ -3300,6 +3303,7 @@ static int find_data_references(struct reloc_control *rc,
3300 path = btrfs_alloc_path(); 3303 path = btrfs_alloc_path();
3301 if (!path) 3304 if (!path)
3302 return -ENOMEM; 3305 return -ENOMEM;
3306 path->reada = 1;
3303 3307
3304 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3308 root = read_fs_root(rc->extent_root->fs_info, ref_root);
3305 if (IS_ERR(root)) { 3309 if (IS_ERR(root)) {
@@ -3586,17 +3590,17 @@ next:
3586static void set_reloc_control(struct reloc_control *rc) 3590static void set_reloc_control(struct reloc_control *rc)
3587{ 3591{
3588 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3592 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3589 mutex_lock(&fs_info->trans_mutex); 3593 spin_lock(&fs_info->trans_lock);
3590 fs_info->reloc_ctl = rc; 3594 fs_info->reloc_ctl = rc;
3591 mutex_unlock(&fs_info->trans_mutex); 3595 spin_unlock(&fs_info->trans_lock);
3592} 3596}
3593 3597
3594static void unset_reloc_control(struct reloc_control *rc) 3598static void unset_reloc_control(struct reloc_control *rc)
3595{ 3599{
3596 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3600 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3597 mutex_lock(&fs_info->trans_mutex); 3601 spin_lock(&fs_info->trans_lock);
3598 fs_info->reloc_ctl = NULL; 3602 fs_info->reloc_ctl = NULL;
3599 mutex_unlock(&fs_info->trans_mutex); 3603 spin_unlock(&fs_info->trans_lock);
3600} 3604}
3601 3605
3602static int check_extent_flags(u64 flags) 3606static int check_extent_flags(u64 flags)
@@ -3645,7 +3649,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3645 rc->create_reloc_tree = 1; 3649 rc->create_reloc_tree = 1;
3646 set_reloc_control(rc); 3650 set_reloc_control(rc);
3647 3651
3648 trans = btrfs_join_transaction(rc->extent_root, 1); 3652 trans = btrfs_join_transaction(rc->extent_root);
3649 BUG_ON(IS_ERR(trans)); 3653 BUG_ON(IS_ERR(trans));
3650 btrfs_commit_transaction(trans, rc->extent_root); 3654 btrfs_commit_transaction(trans, rc->extent_root);
3651 return 0; 3655 return 0;
@@ -3668,6 +3672,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3668 path = btrfs_alloc_path(); 3672 path = btrfs_alloc_path();
3669 if (!path) 3673 if (!path)
3670 return -ENOMEM; 3674 return -ENOMEM;
3675 path->reada = 1;
3671 3676
3672 ret = prepare_to_relocate(rc); 3677 ret = prepare_to_relocate(rc);
3673 if (ret) { 3678 if (ret) {
@@ -3834,7 +3839,7 @@ restart:
3834 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 3839 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
3835 3840
3836 /* get rid of pinned extents */ 3841 /* get rid of pinned extents */
3837 trans = btrfs_join_transaction(rc->extent_root, 1); 3842 trans = btrfs_join_transaction(rc->extent_root);
3838 if (IS_ERR(trans)) 3843 if (IS_ERR(trans))
3839 err = PTR_ERR(trans); 3844 err = PTR_ERR(trans);
3840 else 3845 else
@@ -4093,6 +4098,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4093 path = btrfs_alloc_path(); 4098 path = btrfs_alloc_path();
4094 if (!path) 4099 if (!path)
4095 return -ENOMEM; 4100 return -ENOMEM;
4101 path->reada = -1;
4096 4102
4097 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 4103 key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4098 key.type = BTRFS_ROOT_ITEM_KEY; 4104 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -4159,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4159 4165
4160 set_reloc_control(rc); 4166 set_reloc_control(rc);
4161 4167
4162 trans = btrfs_join_transaction(rc->extent_root, 1); 4168 trans = btrfs_join_transaction(rc->extent_root);
4163 if (IS_ERR(trans)) { 4169 if (IS_ERR(trans)) {
4164 unset_reloc_control(rc); 4170 unset_reloc_control(rc);
4165 err = PTR_ERR(trans); 4171 err = PTR_ERR(trans);
@@ -4193,7 +4199,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4193 4199
4194 unset_reloc_control(rc); 4200 unset_reloc_control(rc);
4195 4201
4196 trans = btrfs_join_transaction(rc->extent_root, 1); 4202 trans = btrfs_join_transaction(rc->extent_root);
4197 if (IS_ERR(trans)) 4203 if (IS_ERR(trans))
4198 err = PTR_ERR(trans); 4204 err = PTR_ERR(trans);
4199 else 4205 else
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6dfed0c27ac3..a8d03d5efb5d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -16,13 +16,7 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h> 19#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
26#include "ctree.h" 20#include "ctree.h"
27#include "volumes.h" 21#include "volumes.h"
28#include "disk-io.h" 22#include "disk-io.h"
@@ -117,33 +111,37 @@ static void scrub_free_csums(struct scrub_dev *sdev)
117 } 111 }
118} 112}
119 113
114static void scrub_free_bio(struct bio *bio)
115{
116 int i;
117 struct page *last_page = NULL;
118
119 if (!bio)
120 return;
121
122 for (i = 0; i < bio->bi_vcnt; ++i) {
123 if (bio->bi_io_vec[i].bv_page == last_page)
124 continue;
125 last_page = bio->bi_io_vec[i].bv_page;
126 __free_page(last_page);
127 }
128 bio_put(bio);
129}
130
120static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) 131static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
121{ 132{
122 int i; 133 int i;
123 int j;
124 struct page *last_page;
125 134
126 if (!sdev) 135 if (!sdev)
127 return; 136 return;
128 137
129 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 138 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
130 struct scrub_bio *sbio = sdev->bios[i]; 139 struct scrub_bio *sbio = sdev->bios[i];
131 struct bio *bio;
132 140
133 if (!sbio) 141 if (!sbio)
134 break; 142 break;
135 143
136 bio = sbio->bio; 144 scrub_free_bio(sbio->bio);
137 if (bio) {
138 last_page = NULL;
139 for (j = 0; j < bio->bi_vcnt; ++j) {
140 if (bio->bi_io_vec[j].bv_page == last_page)
141 continue;
142 last_page = bio->bi_io_vec[j].bv_page;
143 __free_page(last_page);
144 }
145 bio_put(bio);
146 }
147 kfree(sbio); 145 kfree(sbio);
148 } 146 }
149 147
@@ -156,8 +154,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
156{ 154{
157 struct scrub_dev *sdev; 155 struct scrub_dev *sdev;
158 int i; 156 int i;
159 int j;
160 int ret;
161 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; 157 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
162 158
163 sdev = kzalloc(sizeof(*sdev), GFP_NOFS); 159 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
@@ -165,7 +161,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
165 goto nomem; 161 goto nomem;
166 sdev->dev = dev; 162 sdev->dev = dev;
167 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { 163 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
168 struct bio *bio;
169 struct scrub_bio *sbio; 164 struct scrub_bio *sbio;
170 165
171 sbio = kzalloc(sizeof(*sbio), GFP_NOFS); 166 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
@@ -173,32 +168,10 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
173 goto nomem; 168 goto nomem;
174 sdev->bios[i] = sbio; 169 sdev->bios[i] = sbio;
175 170
176 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
177 if (!bio)
178 goto nomem;
179
180 sbio->index = i; 171 sbio->index = i;
181 sbio->sdev = sdev; 172 sbio->sdev = sdev;
182 sbio->bio = bio;
183 sbio->count = 0; 173 sbio->count = 0;
184 sbio->work.func = scrub_checksum; 174 sbio->work.func = scrub_checksum;
185 bio->bi_private = sdev->bios[i];
186 bio->bi_end_io = scrub_bio_end_io;
187 bio->bi_sector = 0;
188 bio->bi_bdev = dev->bdev;
189 bio->bi_size = 0;
190
191 for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
192 struct page *page;
193 page = alloc_page(GFP_NOFS);
194 if (!page)
195 goto nomem;
196
197 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
198 if (!ret)
199 goto nomem;
200 }
201 WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
202 175
203 if (i != SCRUB_BIOS_PER_DEV-1) 176 if (i != SCRUB_BIOS_PER_DEV-1)
204 sdev->bios[i]->next_free = i + 1; 177 sdev->bios[i]->next_free = i + 1;
@@ -369,9 +342,6 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
369 int ret; 342 int ret;
370 DECLARE_COMPLETION_ONSTACK(complete); 343 DECLARE_COMPLETION_ONSTACK(complete);
371 344
372 /* we are going to wait on this IO */
373 rw |= REQ_SYNC;
374
375 bio = bio_alloc(GFP_NOFS, 1); 345 bio = bio_alloc(GFP_NOFS, 1);
376 bio->bi_bdev = bdev; 346 bio->bi_bdev = bdev;
377 bio->bi_sector = sector; 347 bio->bi_sector = sector;
@@ -380,6 +350,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
380 bio->bi_private = &complete; 350 bio->bi_private = &complete;
381 submit_bio(rw, bio); 351 submit_bio(rw, bio);
382 352
353 /* this will also unplug the queue */
383 wait_for_completion(&complete); 354 wait_for_completion(&complete);
384 355
385 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); 356 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -394,6 +365,7 @@ static void scrub_bio_end_io(struct bio *bio, int err)
394 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 365 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
395 366
396 sbio->err = err; 367 sbio->err = err;
368 sbio->bio = bio;
397 369
398 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); 370 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
399} 371}
@@ -453,6 +425,8 @@ static void scrub_checksum(struct btrfs_work *work)
453 } 425 }
454 426
455out: 427out:
428 scrub_free_bio(sbio->bio);
429 sbio->bio = NULL;
456 spin_lock(&sdev->list_lock); 430 spin_lock(&sdev->list_lock);
457 sbio->next_free = sdev->first_free; 431 sbio->next_free = sdev->first_free;
458 sdev->first_free = sbio->index; 432 sdev->first_free = sbio->index;
@@ -583,25 +557,50 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
583static int scrub_submit(struct scrub_dev *sdev) 557static int scrub_submit(struct scrub_dev *sdev)
584{ 558{
585 struct scrub_bio *sbio; 559 struct scrub_bio *sbio;
560 struct bio *bio;
561 int i;
586 562
587 if (sdev->curr == -1) 563 if (sdev->curr == -1)
588 return 0; 564 return 0;
589 565
590 sbio = sdev->bios[sdev->curr]; 566 sbio = sdev->bios[sdev->curr];
591 567
592 sbio->bio->bi_sector = sbio->physical >> 9; 568 bio = bio_alloc(GFP_NOFS, sbio->count);
593 sbio->bio->bi_size = sbio->count * PAGE_SIZE; 569 if (!bio)
594 sbio->bio->bi_next = NULL; 570 goto nomem;
595 sbio->bio->bi_flags |= 1 << BIO_UPTODATE; 571
596 sbio->bio->bi_comp_cpu = -1; 572 bio->bi_private = sbio;
597 sbio->bio->bi_bdev = sdev->dev->bdev; 573 bio->bi_end_io = scrub_bio_end_io;
574 bio->bi_bdev = sdev->dev->bdev;
575 bio->bi_sector = sbio->physical >> 9;
576
577 for (i = 0; i < sbio->count; ++i) {
578 struct page *page;
579 int ret;
580
581 page = alloc_page(GFP_NOFS);
582 if (!page)
583 goto nomem;
584
585 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
586 if (!ret) {
587 __free_page(page);
588 goto nomem;
589 }
590 }
591
598 sbio->err = 0; 592 sbio->err = 0;
599 sdev->curr = -1; 593 sdev->curr = -1;
600 atomic_inc(&sdev->in_flight); 594 atomic_inc(&sdev->in_flight);
601 595
602 submit_bio(0, sbio->bio); 596 submit_bio(READ, bio);
603 597
604 return 0; 598 return 0;
599
600nomem:
601 scrub_free_bio(bio);
602
603 return -ENOMEM;
605} 604}
606 605
607static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, 606static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
@@ -633,7 +632,11 @@ again:
633 sbio->logical = logical; 632 sbio->logical = logical;
634 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical || 633 } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
635 sbio->logical + sbio->count * PAGE_SIZE != logical) { 634 sbio->logical + sbio->count * PAGE_SIZE != logical) {
636 scrub_submit(sdev); 635 int ret;
636
637 ret = scrub_submit(sdev);
638 if (ret)
639 return ret;
637 goto again; 640 goto again;
638 } 641 }
639 sbio->spag[sbio->count].flags = flags; 642 sbio->spag[sbio->count].flags = flags;
@@ -645,8 +648,13 @@ again:
645 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); 648 memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
646 } 649 }
647 ++sbio->count; 650 ++sbio->count;
648 if (sbio->count == SCRUB_PAGES_PER_BIO || force) 651 if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
649 scrub_submit(sdev); 652 int ret;
653
654 ret = scrub_submit(sdev);
655 if (ret)
656 return ret;
657 }
650 658
651 return 0; 659 return 0;
652} 660}
@@ -727,6 +735,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
727 struct btrfs_root *root = fs_info->extent_root; 735 struct btrfs_root *root = fs_info->extent_root;
728 struct btrfs_root *csum_root = fs_info->csum_root; 736 struct btrfs_root *csum_root = fs_info->csum_root;
729 struct btrfs_extent_item *extent; 737 struct btrfs_extent_item *extent;
738 struct blk_plug plug;
730 u64 flags; 739 u64 flags;
731 int ret; 740 int ret;
732 int slot; 741 int slot;
@@ -789,18 +798,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
789 798
790 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 799 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
791 if (ret < 0) 800 if (ret < 0)
792 goto out; 801 goto out_noplug;
793
794 l = path->nodes[0];
795 slot = path->slots[0];
796 btrfs_item_key_to_cpu(l, &key, slot);
797 if (key.objectid != logical) {
798 ret = btrfs_previous_item(root, path, 0,
799 BTRFS_EXTENT_ITEM_KEY);
800 if (ret < 0)
801 goto out;
802 }
803 802
803 /*
804 * we might miss half an extent here, but that doesn't matter,
805 * as it's only the prefetch
806 */
804 while (1) { 807 while (1) {
805 l = path->nodes[0]; 808 l = path->nodes[0];
806 slot = path->slots[0]; 809 slot = path->slots[0];
@@ -809,7 +812,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
809 if (ret == 0) 812 if (ret == 0)
810 continue; 813 continue;
811 if (ret < 0) 814 if (ret < 0)
812 goto out; 815 goto out_noplug;
813 816
814 break; 817 break;
815 } 818 }
@@ -831,6 +834,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
831 * the scrub. This might currently (crc32) end up to be about 1MB 834 * the scrub. This might currently (crc32) end up to be about 1MB
832 */ 835 */
833 start_stripe = 0; 836 start_stripe = 0;
837 blk_start_plug(&plug);
834again: 838again:
835 logical = base + offset + start_stripe * increment; 839 logical = base + offset + start_stripe * increment;
836 for (i = start_stripe; i < nstripes; ++i) { 840 for (i = start_stripe; i < nstripes; ++i) {
@@ -890,15 +894,20 @@ again:
890 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 894 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
891 if (ret < 0) 895 if (ret < 0)
892 goto out; 896 goto out;
893 897 if (ret > 0) {
894 l = path->nodes[0];
895 slot = path->slots[0];
896 btrfs_item_key_to_cpu(l, &key, slot);
897 if (key.objectid != logical) {
898 ret = btrfs_previous_item(root, path, 0, 898 ret = btrfs_previous_item(root, path, 0,
899 BTRFS_EXTENT_ITEM_KEY); 899 BTRFS_EXTENT_ITEM_KEY);
900 if (ret < 0) 900 if (ret < 0)
901 goto out; 901 goto out;
902 if (ret > 0) {
903 /* there's no smaller item, so stick with the
904 * larger one */
905 btrfs_release_path(path);
906 ret = btrfs_search_slot(NULL, root, &key,
907 path, 0, 0);
908 if (ret < 0)
909 goto out;
910 }
902 } 911 }
903 912
904 while (1) { 913 while (1) {
@@ -972,6 +981,8 @@ next:
972 scrub_submit(sdev); 981 scrub_submit(sdev);
973 982
974out: 983out:
984 blk_finish_plug(&plug);
985out_noplug:
975 btrfs_free_path(path); 986 btrfs_free_path(path);
976 return ret < 0 ? ret : 0; 987 return ret < 0 ? ret : 0;
977} 988}
@@ -1047,8 +1058,15 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1047 while (1) { 1058 while (1) {
1048 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1059 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1049 if (ret < 0) 1060 if (ret < 0)
1050 goto out; 1061 break;
1051 ret = 0; 1062 if (ret > 0) {
1063 if (path->slots[0] >=
1064 btrfs_header_nritems(path->nodes[0])) {
1065 ret = btrfs_next_leaf(root, path);
1066 if (ret)
1067 break;
1068 }
1069 }
1052 1070
1053 l = path->nodes[0]; 1071 l = path->nodes[0];
1054 slot = path->slots[0]; 1072 slot = path->slots[0];
@@ -1058,7 +1076,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1058 if (found_key.objectid != sdev->dev->devid) 1076 if (found_key.objectid != sdev->dev->devid)
1059 break; 1077 break;
1060 1078
1061 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) 1079 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
1062 break; 1080 break;
1063 1081
1064 if (found_key.offset >= end) 1082 if (found_key.offset >= end)
@@ -1087,7 +1105,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1087 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 1105 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
1088 if (!cache) { 1106 if (!cache) {
1089 ret = -ENOENT; 1107 ret = -ENOENT;
1090 goto out; 1108 break;
1091 } 1109 }
1092 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, 1110 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
1093 chunk_offset, length); 1111 chunk_offset, length);
@@ -1099,9 +1117,13 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
1099 btrfs_release_path(path); 1117 btrfs_release_path(path);
1100 } 1118 }
1101 1119
1102out:
1103 btrfs_free_path(path); 1120 btrfs_free_path(path);
1104 return ret; 1121
1122 /*
1123 * ret can still be 1 from search_slot or next_leaf,
1124 * that's not an error
1125 */
1126 return ret < 0 ? ret : 0;
1105} 1127}
1106 1128
1107static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) 1129static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
@@ -1138,8 +1160,12 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1138 struct btrfs_fs_info *fs_info = root->fs_info; 1160 struct btrfs_fs_info *fs_info = root->fs_info;
1139 1161
1140 mutex_lock(&fs_info->scrub_lock); 1162 mutex_lock(&fs_info->scrub_lock);
1141 if (fs_info->scrub_workers_refcnt == 0) 1163 if (fs_info->scrub_workers_refcnt == 0) {
1164 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1165 fs_info->thread_pool_size, &fs_info->generic_worker);
1166 fs_info->scrub_workers.idle_thresh = 4;
1142 btrfs_start_workers(&fs_info->scrub_workers, 1); 1167 btrfs_start_workers(&fs_info->scrub_workers, 1);
1168 }
1143 ++fs_info->scrub_workers_refcnt; 1169 ++fs_info->scrub_workers_refcnt;
1144 mutex_unlock(&fs_info->scrub_lock); 1170 mutex_unlock(&fs_info->scrub_lock);
1145 1171
@@ -1166,7 +1192,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1166 int ret; 1192 int ret;
1167 struct btrfs_device *dev; 1193 struct btrfs_device *dev;
1168 1194
1169 if (root->fs_info->closing) 1195 if (btrfs_fs_closing(root->fs_info))
1170 return -EINVAL; 1196 return -EINVAL;
1171 1197
1172 /* 1198 /*
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9b2e7e5bc3ef..0bb4ebbb71b7 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -161,7 +161,8 @@ enum {
161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 161 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 162 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 163 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err, 164 Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
165 Opt_inode_cache, Opt_err,
165}; 166};
166 167
167static match_table_t tokens = { 168static match_table_t tokens = {
@@ -193,6 +194,7 @@ static match_table_t tokens = {
193 {Opt_enospc_debug, "enospc_debug"}, 194 {Opt_enospc_debug, "enospc_debug"},
194 {Opt_subvolrootid, "subvolrootid=%d"}, 195 {Opt_subvolrootid, "subvolrootid=%d"},
195 {Opt_defrag, "autodefrag"}, 196 {Opt_defrag, "autodefrag"},
197 {Opt_inode_cache, "inode_cache"},
196 {Opt_err, NULL}, 198 {Opt_err, NULL},
197}; 199};
198 200
@@ -361,6 +363,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
361 printk(KERN_INFO "btrfs: enabling disk space caching\n"); 363 printk(KERN_INFO "btrfs: enabling disk space caching\n");
362 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 364 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
363 break; 365 break;
366 case Opt_inode_cache:
367 printk(KERN_INFO "btrfs: enabling inode map caching\n");
368 btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
369 break;
364 case Opt_clear_cache: 370 case Opt_clear_cache:
365 printk(KERN_INFO "btrfs: force clearing of disk cache\n"); 371 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
366 btrfs_set_opt(info->mount_opt, CLEAR_CACHE); 372 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
@@ -819,7 +825,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
819 } else { 825 } else {
820 char b[BDEVNAME_SIZE]; 826 char b[BDEVNAME_SIZE];
821 827
822 s->s_flags = flags; 828 s->s_flags = flags | MS_NOSEC;
823 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 829 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
824 error = btrfs_fill_super(s, fs_devices, data, 830 error = btrfs_fill_super(s, fs_devices, data,
825 flags & MS_SILENT ? 1 : 0); 831 flags & MS_SILENT ? 1 : 0);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index dc80f7156923..2b3590b9fe98 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -35,6 +35,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
35{ 35{
36 WARN_ON(atomic_read(&transaction->use_count) == 0); 36 WARN_ON(atomic_read(&transaction->use_count) == 0);
37 if (atomic_dec_and_test(&transaction->use_count)) { 37 if (atomic_dec_and_test(&transaction->use_count)) {
38 BUG_ON(!list_empty(&transaction->list));
38 memset(transaction, 0, sizeof(*transaction)); 39 memset(transaction, 0, sizeof(*transaction));
39 kmem_cache_free(btrfs_transaction_cachep, transaction); 40 kmem_cache_free(btrfs_transaction_cachep, transaction);
40 } 41 }
@@ -49,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root)
49/* 50/*
50 * either allocate a new transaction or hop into the existing one 51 * either allocate a new transaction or hop into the existing one
51 */ 52 */
52static noinline int join_transaction(struct btrfs_root *root) 53static noinline int join_transaction(struct btrfs_root *root, int nofail)
53{ 54{
54 struct btrfs_transaction *cur_trans; 55 struct btrfs_transaction *cur_trans;
56
57 spin_lock(&root->fs_info->trans_lock);
58 if (root->fs_info->trans_no_join) {
59 if (!nofail) {
60 spin_unlock(&root->fs_info->trans_lock);
61 return -EBUSY;
62 }
63 }
64
55 cur_trans = root->fs_info->running_transaction; 65 cur_trans = root->fs_info->running_transaction;
56 if (!cur_trans) { 66 if (cur_trans) {
57 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 67 atomic_inc(&cur_trans->use_count);
58 GFP_NOFS);
59 if (!cur_trans)
60 return -ENOMEM;
61 root->fs_info->generation++;
62 atomic_set(&cur_trans->num_writers, 1);
63 cur_trans->num_joined = 0;
64 cur_trans->transid = root->fs_info->generation;
65 init_waitqueue_head(&cur_trans->writer_wait);
66 init_waitqueue_head(&cur_trans->commit_wait);
67 cur_trans->in_commit = 0;
68 cur_trans->blocked = 0;
69 atomic_set(&cur_trans->use_count, 1);
70 cur_trans->commit_done = 0;
71 cur_trans->start_time = get_seconds();
72
73 cur_trans->delayed_refs.root = RB_ROOT;
74 cur_trans->delayed_refs.num_entries = 0;
75 cur_trans->delayed_refs.num_heads_ready = 0;
76 cur_trans->delayed_refs.num_heads = 0;
77 cur_trans->delayed_refs.flushing = 0;
78 cur_trans->delayed_refs.run_delayed_start = 0;
79 spin_lock_init(&cur_trans->delayed_refs.lock);
80
81 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
82 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
83 extent_io_tree_init(&cur_trans->dirty_pages,
84 root->fs_info->btree_inode->i_mapping);
85 spin_lock(&root->fs_info->new_trans_lock);
86 root->fs_info->running_transaction = cur_trans;
87 spin_unlock(&root->fs_info->new_trans_lock);
88 } else {
89 atomic_inc(&cur_trans->num_writers); 68 atomic_inc(&cur_trans->num_writers);
90 cur_trans->num_joined++; 69 cur_trans->num_joined++;
70 spin_unlock(&root->fs_info->trans_lock);
71 return 0;
91 } 72 }
73 spin_unlock(&root->fs_info->trans_lock);
74
75 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
76 if (!cur_trans)
77 return -ENOMEM;
78 spin_lock(&root->fs_info->trans_lock);
79 if (root->fs_info->running_transaction) {
80 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
81 cur_trans = root->fs_info->running_transaction;
82 atomic_inc(&cur_trans->use_count);
83 atomic_inc(&cur_trans->num_writers);
84 cur_trans->num_joined++;
85 spin_unlock(&root->fs_info->trans_lock);
86 return 0;
87 }
88 atomic_set(&cur_trans->num_writers, 1);
89 cur_trans->num_joined = 0;
90 init_waitqueue_head(&cur_trans->writer_wait);
91 init_waitqueue_head(&cur_trans->commit_wait);
92 cur_trans->in_commit = 0;
93 cur_trans->blocked = 0;
94 /*
95 * One for this trans handle, one so it will live on until we
96 * commit the transaction.
97 */
98 atomic_set(&cur_trans->use_count, 2);
99 cur_trans->commit_done = 0;
100 cur_trans->start_time = get_seconds();
101
102 cur_trans->delayed_refs.root = RB_ROOT;
103 cur_trans->delayed_refs.num_entries = 0;
104 cur_trans->delayed_refs.num_heads_ready = 0;
105 cur_trans->delayed_refs.num_heads = 0;
106 cur_trans->delayed_refs.flushing = 0;
107 cur_trans->delayed_refs.run_delayed_start = 0;
108 spin_lock_init(&cur_trans->commit_lock);
109 spin_lock_init(&cur_trans->delayed_refs.lock);
110
111 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
112 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
113 extent_io_tree_init(&cur_trans->dirty_pages,
114 root->fs_info->btree_inode->i_mapping);
115 root->fs_info->generation++;
116 cur_trans->transid = root->fs_info->generation;
117 root->fs_info->running_transaction = cur_trans;
118 spin_unlock(&root->fs_info->trans_lock);
92 119
93 return 0; 120 return 0;
94} 121}
@@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root)
99 * to make sure the old root from before we joined the transaction is deleted 126 * to make sure the old root from before we joined the transaction is deleted
100 * when the transaction commits 127 * when the transaction commits
101 */ 128 */
102static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 129int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root) 130 struct btrfs_root *root)
104{ 131{
105 if (root->ref_cows && root->last_trans < trans->transid) { 132 if (root->ref_cows && root->last_trans < trans->transid) {
106 WARN_ON(root == root->fs_info->extent_root); 133 WARN_ON(root == root->fs_info->extent_root);
107 WARN_ON(root->commit_root != root->node); 134 WARN_ON(root->commit_root != root->node);
108 135
136 spin_lock(&root->fs_info->fs_roots_radix_lock);
137 if (root->last_trans == trans->transid) {
138 spin_unlock(&root->fs_info->fs_roots_radix_lock);
139 return 0;
140 }
141 root->last_trans = trans->transid;
109 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 142 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
110 (unsigned long)root->root_key.objectid, 143 (unsigned long)root->root_key.objectid,
111 BTRFS_ROOT_TRANS_TAG); 144 BTRFS_ROOT_TRANS_TAG);
112 root->last_trans = trans->transid; 145 spin_unlock(&root->fs_info->fs_roots_radix_lock);
113 btrfs_init_reloc_root(trans, root); 146 btrfs_init_reloc_root(trans, root);
114 } 147 }
115 return 0; 148 return 0;
116} 149}
117 150
118int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
119 struct btrfs_root *root)
120{
121 if (!root->ref_cows)
122 return 0;
123
124 mutex_lock(&root->fs_info->trans_mutex);
125 if (root->last_trans == trans->transid) {
126 mutex_unlock(&root->fs_info->trans_mutex);
127 return 0;
128 }
129
130 record_root_in_trans(trans, root);
131 mutex_unlock(&root->fs_info->trans_mutex);
132 return 0;
133}
134
135/* wait for commit against the current transaction to become unblocked 151/* wait for commit against the current transaction to become unblocked
136 * when this is done, it is safe to start a new transaction, but the current 152 * when this is done, it is safe to start a new transaction, but the current
137 * transaction might not be fully on disk. 153 * transaction might not be fully on disk.
@@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root)
140{ 156{
141 struct btrfs_transaction *cur_trans; 157 struct btrfs_transaction *cur_trans;
142 158
159 spin_lock(&root->fs_info->trans_lock);
143 cur_trans = root->fs_info->running_transaction; 160 cur_trans = root->fs_info->running_transaction;
144 if (cur_trans && cur_trans->blocked) { 161 if (cur_trans && cur_trans->blocked) {
145 DEFINE_WAIT(wait); 162 DEFINE_WAIT(wait);
146 atomic_inc(&cur_trans->use_count); 163 atomic_inc(&cur_trans->use_count);
164 spin_unlock(&root->fs_info->trans_lock);
147 while (1) { 165 while (1) {
148 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 166 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
149 TASK_UNINTERRUPTIBLE); 167 TASK_UNINTERRUPTIBLE);
150 if (!cur_trans->blocked) 168 if (!cur_trans->blocked)
151 break; 169 break;
152 mutex_unlock(&root->fs_info->trans_mutex);
153 schedule(); 170 schedule();
154 mutex_lock(&root->fs_info->trans_mutex);
155 } 171 }
156 finish_wait(&root->fs_info->transaction_wait, &wait); 172 finish_wait(&root->fs_info->transaction_wait, &wait);
157 put_transaction(cur_trans); 173 put_transaction(cur_trans);
174 } else {
175 spin_unlock(&root->fs_info->trans_lock);
158 } 176 }
159} 177}
160 178
@@ -167,10 +185,16 @@ enum btrfs_trans_type {
167 185
168static int may_wait_transaction(struct btrfs_root *root, int type) 186static int may_wait_transaction(struct btrfs_root *root, int type)
169{ 187{
170 if (!root->fs_info->log_root_recovering && 188 if (root->fs_info->log_root_recovering)
171 ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || 189 return 0;
172 type == TRANS_USERSPACE)) 190
191 if (type == TRANS_USERSPACE)
173 return 1; 192 return 1;
193
194 if (type == TRANS_START &&
195 !atomic_read(&root->fs_info->open_ioctl_trans))
196 return 1;
197
174 return 0; 198 return 0;
175} 199}
176 200
@@ -184,36 +208,44 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
184 208
185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 209 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
186 return ERR_PTR(-EROFS); 210 return ERR_PTR(-EROFS);
211
212 if (current->journal_info) {
213 WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
214 h = current->journal_info;
215 h->use_count++;
216 h->orig_rsv = h->block_rsv;
217 h->block_rsv = NULL;
218 goto got_it;
219 }
187again: 220again:
188 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 221 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
189 if (!h) 222 if (!h)
190 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
191 224
192 if (type != TRANS_JOIN_NOLOCK)
193 mutex_lock(&root->fs_info->trans_mutex);
194 if (may_wait_transaction(root, type)) 225 if (may_wait_transaction(root, type))
195 wait_current_trans(root); 226 wait_current_trans(root);
196 227
197 ret = join_transaction(root); 228 do {
229 ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
230 if (ret == -EBUSY)
231 wait_current_trans(root);
232 } while (ret == -EBUSY);
233
198 if (ret < 0) { 234 if (ret < 0) {
199 kmem_cache_free(btrfs_trans_handle_cachep, h); 235 kmem_cache_free(btrfs_trans_handle_cachep, h);
200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex);
202 return ERR_PTR(ret); 236 return ERR_PTR(ret);
203 } 237 }
204 238
205 cur_trans = root->fs_info->running_transaction; 239 cur_trans = root->fs_info->running_transaction;
206 atomic_inc(&cur_trans->use_count);
207 if (type != TRANS_JOIN_NOLOCK)
208 mutex_unlock(&root->fs_info->trans_mutex);
209 240
210 h->transid = cur_trans->transid; 241 h->transid = cur_trans->transid;
211 h->transaction = cur_trans; 242 h->transaction = cur_trans;
212 h->blocks_used = 0; 243 h->blocks_used = 0;
213 h->block_group = 0;
214 h->bytes_reserved = 0; 244 h->bytes_reserved = 0;
215 h->delayed_ref_updates = 0; 245 h->delayed_ref_updates = 0;
246 h->use_count = 1;
216 h->block_rsv = NULL; 247 h->block_rsv = NULL;
248 h->orig_rsv = NULL;
217 249
218 smp_mb(); 250 smp_mb();
219 if (cur_trans->blocked && may_wait_transaction(root, type)) { 251 if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -241,11 +273,8 @@ again:
241 } 273 }
242 } 274 }
243 275
244 if (type != TRANS_JOIN_NOLOCK) 276got_it:
245 mutex_lock(&root->fs_info->trans_mutex); 277 btrfs_record_root_in_trans(h, root);
246 record_root_in_trans(h, root);
247 if (type != TRANS_JOIN_NOLOCK)
248 mutex_unlock(&root->fs_info->trans_mutex);
249 278
250 if (!current->journal_info && type != TRANS_USERSPACE) 279 if (!current->journal_info && type != TRANS_USERSPACE)
251 current->journal_info = h; 280 current->journal_info = h;
@@ -257,22 +286,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
257{ 286{
258 return start_transaction(root, num_items, TRANS_START); 287 return start_transaction(root, num_items, TRANS_START);
259} 288}
260struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 289struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
261 int num_blocks)
262{ 290{
263 return start_transaction(root, 0, TRANS_JOIN); 291 return start_transaction(root, 0, TRANS_JOIN);
264} 292}
265 293
266struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 294struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
267 int num_blocks)
268{ 295{
269 return start_transaction(root, 0, TRANS_JOIN_NOLOCK); 296 return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
270} 297}
271 298
272struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 299struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
273 int num_blocks)
274{ 300{
275 return start_transaction(r, 0, TRANS_USERSPACE); 301 return start_transaction(root, 0, TRANS_USERSPACE);
276} 302}
277 303
278/* wait for a transaction commit to be fully complete */ 304/* wait for a transaction commit to be fully complete */
@@ -280,17 +306,13 @@ static noinline int wait_for_commit(struct btrfs_root *root,
280 struct btrfs_transaction *commit) 306 struct btrfs_transaction *commit)
281{ 307{
282 DEFINE_WAIT(wait); 308 DEFINE_WAIT(wait);
283 mutex_lock(&root->fs_info->trans_mutex);
284 while (!commit->commit_done) { 309 while (!commit->commit_done) {
285 prepare_to_wait(&commit->commit_wait, &wait, 310 prepare_to_wait(&commit->commit_wait, &wait,
286 TASK_UNINTERRUPTIBLE); 311 TASK_UNINTERRUPTIBLE);
287 if (commit->commit_done) 312 if (commit->commit_done)
288 break; 313 break;
289 mutex_unlock(&root->fs_info->trans_mutex);
290 schedule(); 314 schedule();
291 mutex_lock(&root->fs_info->trans_mutex);
292 } 315 }
293 mutex_unlock(&root->fs_info->trans_mutex);
294 finish_wait(&commit->commit_wait, &wait); 316 finish_wait(&commit->commit_wait, &wait);
295 return 0; 317 return 0;
296} 318}
@@ -300,59 +322,56 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
300 struct btrfs_transaction *cur_trans = NULL, *t; 322 struct btrfs_transaction *cur_trans = NULL, *t;
301 int ret; 323 int ret;
302 324
303 mutex_lock(&root->fs_info->trans_mutex);
304
305 ret = 0; 325 ret = 0;
306 if (transid) { 326 if (transid) {
307 if (transid <= root->fs_info->last_trans_committed) 327 if (transid <= root->fs_info->last_trans_committed)
308 goto out_unlock; 328 goto out;
309 329
310 /* find specified transaction */ 330 /* find specified transaction */
331 spin_lock(&root->fs_info->trans_lock);
311 list_for_each_entry(t, &root->fs_info->trans_list, list) { 332 list_for_each_entry(t, &root->fs_info->trans_list, list) {
312 if (t->transid == transid) { 333 if (t->transid == transid) {
313 cur_trans = t; 334 cur_trans = t;
335 atomic_inc(&cur_trans->use_count);
314 break; 336 break;
315 } 337 }
316 if (t->transid > transid) 338 if (t->transid > transid)
317 break; 339 break;
318 } 340 }
341 spin_unlock(&root->fs_info->trans_lock);
319 ret = -EINVAL; 342 ret = -EINVAL;
320 if (!cur_trans) 343 if (!cur_trans)
321 goto out_unlock; /* bad transid */ 344 goto out; /* bad transid */
322 } else { 345 } else {
323 /* find newest transaction that is committing | committed */ 346 /* find newest transaction that is committing | committed */
347 spin_lock(&root->fs_info->trans_lock);
324 list_for_each_entry_reverse(t, &root->fs_info->trans_list, 348 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
325 list) { 349 list) {
326 if (t->in_commit) { 350 if (t->in_commit) {
327 if (t->commit_done) 351 if (t->commit_done)
328 goto out_unlock; 352 break;
329 cur_trans = t; 353 cur_trans = t;
354 atomic_inc(&cur_trans->use_count);
330 break; 355 break;
331 } 356 }
332 } 357 }
358 spin_unlock(&root->fs_info->trans_lock);
333 if (!cur_trans) 359 if (!cur_trans)
334 goto out_unlock; /* nothing committing|committed */ 360 goto out; /* nothing committing|committed */
335 } 361 }
336 362
337 atomic_inc(&cur_trans->use_count);
338 mutex_unlock(&root->fs_info->trans_mutex);
339
340 wait_for_commit(root, cur_trans); 363 wait_for_commit(root, cur_trans);
341 364
342 mutex_lock(&root->fs_info->trans_mutex);
343 put_transaction(cur_trans); 365 put_transaction(cur_trans);
344 ret = 0; 366 ret = 0;
345out_unlock: 367out:
346 mutex_unlock(&root->fs_info->trans_mutex);
347 return ret; 368 return ret;
348} 369}
349 370
350void btrfs_throttle(struct btrfs_root *root) 371void btrfs_throttle(struct btrfs_root *root)
351{ 372{
352 mutex_lock(&root->fs_info->trans_mutex); 373 if (!atomic_read(&root->fs_info->open_ioctl_trans))
353 if (!root->fs_info->open_ioctl_trans)
354 wait_current_trans(root); 374 wait_current_trans(root);
355 mutex_unlock(&root->fs_info->trans_mutex);
356} 375}
357 376
358static int should_end_transaction(struct btrfs_trans_handle *trans, 377static int should_end_transaction(struct btrfs_trans_handle *trans,
@@ -370,6 +389,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
370 struct btrfs_transaction *cur_trans = trans->transaction; 389 struct btrfs_transaction *cur_trans = trans->transaction;
371 int updates; 390 int updates;
372 391
392 smp_mb();
373 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 393 if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
374 return 1; 394 return 1;
375 395
@@ -388,6 +408,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
388 struct btrfs_fs_info *info = root->fs_info; 408 struct btrfs_fs_info *info = root->fs_info;
389 int count = 0; 409 int count = 0;
390 410
411 if (--trans->use_count) {
412 trans->block_rsv = trans->orig_rsv;
413 return 0;
414 }
415
391 while (count < 4) { 416 while (count < 4) {
392 unsigned long cur = trans->delayed_ref_updates; 417 unsigned long cur = trans->delayed_ref_updates;
393 trans->delayed_ref_updates = 0; 418 trans->delayed_ref_updates = 0;
@@ -410,9 +435,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
410 435
411 btrfs_trans_release_metadata(trans, root); 436 btrfs_trans_release_metadata(trans, root);
412 437
413 if (lock && !root->fs_info->open_ioctl_trans && 438 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
414 should_end_transaction(trans, root)) 439 should_end_transaction(trans, root)) {
415 trans->transaction->blocked = 1; 440 trans->transaction->blocked = 1;
441 smp_wmb();
442 }
416 443
417 if (lock && cur_trans->blocked && !cur_trans->in_commit) { 444 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
418 if (throttle) 445 if (throttle)
@@ -703,9 +730,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
703 */ 730 */
704int btrfs_add_dead_root(struct btrfs_root *root) 731int btrfs_add_dead_root(struct btrfs_root *root)
705{ 732{
706 mutex_lock(&root->fs_info->trans_mutex); 733 spin_lock(&root->fs_info->trans_lock);
707 list_add(&root->root_list, &root->fs_info->dead_roots); 734 list_add(&root->root_list, &root->fs_info->dead_roots);
708 mutex_unlock(&root->fs_info->trans_mutex); 735 spin_unlock(&root->fs_info->trans_lock);
709 return 0; 736 return 0;
710} 737}
711 738
@@ -721,6 +748,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
721 int ret; 748 int ret;
722 int err = 0; 749 int err = 0;
723 750
751 spin_lock(&fs_info->fs_roots_radix_lock);
724 while (1) { 752 while (1) {
725 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 753 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
726 (void **)gang, 0, 754 (void **)gang, 0,
@@ -733,6 +761,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
733 radix_tree_tag_clear(&fs_info->fs_roots_radix, 761 radix_tree_tag_clear(&fs_info->fs_roots_radix,
734 (unsigned long)root->root_key.objectid, 762 (unsigned long)root->root_key.objectid,
735 BTRFS_ROOT_TRANS_TAG); 763 BTRFS_ROOT_TRANS_TAG);
764 spin_unlock(&fs_info->fs_roots_radix_lock);
736 765
737 btrfs_free_log(trans, root); 766 btrfs_free_log(trans, root);
738 btrfs_update_reloc_root(trans, root); 767 btrfs_update_reloc_root(trans, root);
@@ -753,10 +782,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
753 err = btrfs_update_root(trans, fs_info->tree_root, 782 err = btrfs_update_root(trans, fs_info->tree_root,
754 &root->root_key, 783 &root->root_key,
755 &root->root_item); 784 &root->root_item);
785 spin_lock(&fs_info->fs_roots_radix_lock);
756 if (err) 786 if (err)
757 break; 787 break;
758 } 788 }
759 } 789 }
790 spin_unlock(&fs_info->fs_roots_radix_lock);
760 return err; 791 return err;
761} 792}
762 793
@@ -786,7 +817,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
786 btrfs_btree_balance_dirty(info->tree_root, nr); 817 btrfs_btree_balance_dirty(info->tree_root, nr);
787 cond_resched(); 818 cond_resched();
788 819
789 if (root->fs_info->closing || ret != -EAGAIN) 820 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
790 break; 821 break;
791 } 822 }
792 root->defrag_running = 0; 823 root->defrag_running = 0;
@@ -851,7 +882,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
851 parent = dget_parent(dentry); 882 parent = dget_parent(dentry);
852 parent_inode = parent->d_inode; 883 parent_inode = parent->d_inode;
853 parent_root = BTRFS_I(parent_inode)->root; 884 parent_root = BTRFS_I(parent_inode)->root;
854 record_root_in_trans(trans, parent_root); 885 btrfs_record_root_in_trans(trans, parent_root);
855 886
856 /* 887 /*
857 * insert the directory item 888 * insert the directory item
@@ -869,7 +900,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
869 ret = btrfs_update_inode(trans, parent_root, parent_inode); 900 ret = btrfs_update_inode(trans, parent_root, parent_inode);
870 BUG_ON(ret); 901 BUG_ON(ret);
871 902
872 record_root_in_trans(trans, root); 903 btrfs_record_root_in_trans(trans, root);
873 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 904 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
874 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 905 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
875 btrfs_check_and_init_root_item(new_root_item); 906 btrfs_check_and_init_root_item(new_root_item);
@@ -967,20 +998,20 @@ static void update_super_roots(struct btrfs_root *root)
967int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 998int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
968{ 999{
969 int ret = 0; 1000 int ret = 0;
970 spin_lock(&info->new_trans_lock); 1001 spin_lock(&info->trans_lock);
971 if (info->running_transaction) 1002 if (info->running_transaction)
972 ret = info->running_transaction->in_commit; 1003 ret = info->running_transaction->in_commit;
973 spin_unlock(&info->new_trans_lock); 1004 spin_unlock(&info->trans_lock);
974 return ret; 1005 return ret;
975} 1006}
976 1007
977int btrfs_transaction_blocked(struct btrfs_fs_info *info) 1008int btrfs_transaction_blocked(struct btrfs_fs_info *info)
978{ 1009{
979 int ret = 0; 1010 int ret = 0;
980 spin_lock(&info->new_trans_lock); 1011 spin_lock(&info->trans_lock);
981 if (info->running_transaction) 1012 if (info->running_transaction)
982 ret = info->running_transaction->blocked; 1013 ret = info->running_transaction->blocked;
983 spin_unlock(&info->new_trans_lock); 1014 spin_unlock(&info->trans_lock);
984 return ret; 1015 return ret;
985} 1016}
986 1017
@@ -1004,9 +1035,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
1004 &wait); 1035 &wait);
1005 break; 1036 break;
1006 } 1037 }
1007 mutex_unlock(&root->fs_info->trans_mutex);
1008 schedule(); 1038 schedule();
1009 mutex_lock(&root->fs_info->trans_mutex);
1010 finish_wait(&root->fs_info->transaction_blocked_wait, &wait); 1039 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1011 } 1040 }
1012} 1041}
@@ -1032,9 +1061,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1032 &wait); 1061 &wait);
1033 break; 1062 break;
1034 } 1063 }
1035 mutex_unlock(&root->fs_info->trans_mutex);
1036 schedule(); 1064 schedule();
1037 mutex_lock(&root->fs_info->trans_mutex);
1038 finish_wait(&root->fs_info->transaction_wait, 1065 finish_wait(&root->fs_info->transaction_wait,
1039 &wait); 1066 &wait);
1040 } 1067 }
@@ -1072,7 +1099,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1072 1099
1073 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1100 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1074 ac->root = root; 1101 ac->root = root;
1075 ac->newtrans = btrfs_join_transaction(root, 0); 1102 ac->newtrans = btrfs_join_transaction(root);
1076 if (IS_ERR(ac->newtrans)) { 1103 if (IS_ERR(ac->newtrans)) {
1077 int err = PTR_ERR(ac->newtrans); 1104 int err = PTR_ERR(ac->newtrans);
1078 kfree(ac); 1105 kfree(ac);
@@ -1080,23 +1107,22 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1080 } 1107 }
1081 1108
1082 /* take transaction reference */ 1109 /* take transaction reference */
1083 mutex_lock(&root->fs_info->trans_mutex);
1084 cur_trans = trans->transaction; 1110 cur_trans = trans->transaction;
1085 atomic_inc(&cur_trans->use_count); 1111 atomic_inc(&cur_trans->use_count);
1086 mutex_unlock(&root->fs_info->trans_mutex);
1087 1112
1088 btrfs_end_transaction(trans, root); 1113 btrfs_end_transaction(trans, root);
1089 schedule_delayed_work(&ac->work, 0); 1114 schedule_delayed_work(&ac->work, 0);
1090 1115
1091 /* wait for transaction to start and unblock */ 1116 /* wait for transaction to start and unblock */
1092 mutex_lock(&root->fs_info->trans_mutex);
1093 if (wait_for_unblock) 1117 if (wait_for_unblock)
1094 wait_current_trans_commit_start_and_unblock(root, cur_trans); 1118 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1095 else 1119 else
1096 wait_current_trans_commit_start(root, cur_trans); 1120 wait_current_trans_commit_start(root, cur_trans);
1097 put_transaction(cur_trans);
1098 mutex_unlock(&root->fs_info->trans_mutex);
1099 1121
1122 if (current->journal_info == trans)
1123 current->journal_info = NULL;
1124
1125 put_transaction(cur_trans);
1100 return 0; 1126 return 0;
1101} 1127}
1102 1128
@@ -1139,38 +1165,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1139 ret = btrfs_run_delayed_refs(trans, root, 0); 1165 ret = btrfs_run_delayed_refs(trans, root, 0);
1140 BUG_ON(ret); 1166 BUG_ON(ret);
1141 1167
1142 mutex_lock(&root->fs_info->trans_mutex); 1168 spin_lock(&cur_trans->commit_lock);
1143 if (cur_trans->in_commit) { 1169 if (cur_trans->in_commit) {
1170 spin_unlock(&cur_trans->commit_lock);
1144 atomic_inc(&cur_trans->use_count); 1171 atomic_inc(&cur_trans->use_count);
1145 mutex_unlock(&root->fs_info->trans_mutex);
1146 btrfs_end_transaction(trans, root); 1172 btrfs_end_transaction(trans, root);
1147 1173
1148 ret = wait_for_commit(root, cur_trans); 1174 ret = wait_for_commit(root, cur_trans);
1149 BUG_ON(ret); 1175 BUG_ON(ret);
1150 1176
1151 mutex_lock(&root->fs_info->trans_mutex);
1152 put_transaction(cur_trans); 1177 put_transaction(cur_trans);
1153 mutex_unlock(&root->fs_info->trans_mutex);
1154 1178
1155 return 0; 1179 return 0;
1156 } 1180 }
1157 1181
1158 trans->transaction->in_commit = 1; 1182 trans->transaction->in_commit = 1;
1159 trans->transaction->blocked = 1; 1183 trans->transaction->blocked = 1;
1184 spin_unlock(&cur_trans->commit_lock);
1160 wake_up(&root->fs_info->transaction_blocked_wait); 1185 wake_up(&root->fs_info->transaction_blocked_wait);
1161 1186
1187 spin_lock(&root->fs_info->trans_lock);
1162 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1188 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1163 prev_trans = list_entry(cur_trans->list.prev, 1189 prev_trans = list_entry(cur_trans->list.prev,
1164 struct btrfs_transaction, list); 1190 struct btrfs_transaction, list);
1165 if (!prev_trans->commit_done) { 1191 if (!prev_trans->commit_done) {
1166 atomic_inc(&prev_trans->use_count); 1192 atomic_inc(&prev_trans->use_count);
1167 mutex_unlock(&root->fs_info->trans_mutex); 1193 spin_unlock(&root->fs_info->trans_lock);
1168 1194
1169 wait_for_commit(root, prev_trans); 1195 wait_for_commit(root, prev_trans);
1170 1196
1171 mutex_lock(&root->fs_info->trans_mutex);
1172 put_transaction(prev_trans); 1197 put_transaction(prev_trans);
1198 } else {
1199 spin_unlock(&root->fs_info->trans_lock);
1173 } 1200 }
1201 } else {
1202 spin_unlock(&root->fs_info->trans_lock);
1174 } 1203 }
1175 1204
1176 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 1205 if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
@@ -1178,12 +1207,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1178 1207
1179 do { 1208 do {
1180 int snap_pending = 0; 1209 int snap_pending = 0;
1210
1181 joined = cur_trans->num_joined; 1211 joined = cur_trans->num_joined;
1182 if (!list_empty(&trans->transaction->pending_snapshots)) 1212 if (!list_empty(&trans->transaction->pending_snapshots))
1183 snap_pending = 1; 1213 snap_pending = 1;
1184 1214
1185 WARN_ON(cur_trans != trans->transaction); 1215 WARN_ON(cur_trans != trans->transaction);
1186 mutex_unlock(&root->fs_info->trans_mutex);
1187 1216
1188 if (flush_on_commit || snap_pending) { 1217 if (flush_on_commit || snap_pending) {
1189 btrfs_start_delalloc_inodes(root, 1); 1218 btrfs_start_delalloc_inodes(root, 1);
@@ -1206,14 +1235,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1206 prepare_to_wait(&cur_trans->writer_wait, &wait, 1235 prepare_to_wait(&cur_trans->writer_wait, &wait,
1207 TASK_UNINTERRUPTIBLE); 1236 TASK_UNINTERRUPTIBLE);
1208 1237
1209 smp_mb();
1210 if (atomic_read(&cur_trans->num_writers) > 1) 1238 if (atomic_read(&cur_trans->num_writers) > 1)
1211 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1239 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1212 else if (should_grow) 1240 else if (should_grow)
1213 schedule_timeout(1); 1241 schedule_timeout(1);
1214 1242
1215 mutex_lock(&root->fs_info->trans_mutex);
1216 finish_wait(&cur_trans->writer_wait, &wait); 1243 finish_wait(&cur_trans->writer_wait, &wait);
1244 spin_lock(&root->fs_info->trans_lock);
1245 root->fs_info->trans_no_join = 1;
1246 spin_unlock(&root->fs_info->trans_lock);
1217 } while (atomic_read(&cur_trans->num_writers) > 1 || 1247 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1218 (should_grow && cur_trans->num_joined != joined)); 1248 (should_grow && cur_trans->num_joined != joined));
1219 1249
@@ -1258,9 +1288,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1258 btrfs_prepare_extent_commit(trans, root); 1288 btrfs_prepare_extent_commit(trans, root);
1259 1289
1260 cur_trans = root->fs_info->running_transaction; 1290 cur_trans = root->fs_info->running_transaction;
1261 spin_lock(&root->fs_info->new_trans_lock);
1262 root->fs_info->running_transaction = NULL;
1263 spin_unlock(&root->fs_info->new_trans_lock);
1264 1291
1265 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1292 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1266 root->fs_info->tree_root->node); 1293 root->fs_info->tree_root->node);
@@ -1281,10 +1308,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1281 sizeof(root->fs_info->super_copy)); 1308 sizeof(root->fs_info->super_copy));
1282 1309
1283 trans->transaction->blocked = 0; 1310 trans->transaction->blocked = 0;
1311 spin_lock(&root->fs_info->trans_lock);
1312 root->fs_info->running_transaction = NULL;
1313 root->fs_info->trans_no_join = 0;
1314 spin_unlock(&root->fs_info->trans_lock);
1284 1315
1285 wake_up(&root->fs_info->transaction_wait); 1316 wake_up(&root->fs_info->transaction_wait);
1286 1317
1287 mutex_unlock(&root->fs_info->trans_mutex);
1288 ret = btrfs_write_and_wait_transaction(trans, root); 1318 ret = btrfs_write_and_wait_transaction(trans, root);
1289 BUG_ON(ret); 1319 BUG_ON(ret);
1290 write_ctree_super(trans, root, 0); 1320 write_ctree_super(trans, root, 0);
@@ -1297,22 +1327,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1297 1327
1298 btrfs_finish_extent_commit(trans, root); 1328 btrfs_finish_extent_commit(trans, root);
1299 1329
1300 mutex_lock(&root->fs_info->trans_mutex);
1301
1302 cur_trans->commit_done = 1; 1330 cur_trans->commit_done = 1;
1303 1331
1304 root->fs_info->last_trans_committed = cur_trans->transid; 1332 root->fs_info->last_trans_committed = cur_trans->transid;
1305 1333
1306 wake_up(&cur_trans->commit_wait); 1334 wake_up(&cur_trans->commit_wait);
1307 1335
1336 spin_lock(&root->fs_info->trans_lock);
1308 list_del_init(&cur_trans->list); 1337 list_del_init(&cur_trans->list);
1338 spin_unlock(&root->fs_info->trans_lock);
1339
1309 put_transaction(cur_trans); 1340 put_transaction(cur_trans);
1310 put_transaction(cur_trans); 1341 put_transaction(cur_trans);
1311 1342
1312 trace_btrfs_transaction_commit(root); 1343 trace_btrfs_transaction_commit(root);
1313 1344
1314 mutex_unlock(&root->fs_info->trans_mutex);
1315
1316 btrfs_scrub_continue(root); 1345 btrfs_scrub_continue(root);
1317 1346
1318 if (current->journal_info == trans) 1347 if (current->journal_info == trans)
@@ -1334,9 +1363,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1334 LIST_HEAD(list); 1363 LIST_HEAD(list);
1335 struct btrfs_fs_info *fs_info = root->fs_info; 1364 struct btrfs_fs_info *fs_info = root->fs_info;
1336 1365
1337 mutex_lock(&fs_info->trans_mutex); 1366 spin_lock(&fs_info->trans_lock);
1338 list_splice_init(&fs_info->dead_roots, &list); 1367 list_splice_init(&fs_info->dead_roots, &list);
1339 mutex_unlock(&fs_info->trans_mutex); 1368 spin_unlock(&fs_info->trans_lock);
1340 1369
1341 while (!list_empty(&list)) { 1370 while (!list_empty(&list)) {
1342 root = list_entry(list.next, struct btrfs_root, root_list); 1371 root = list_entry(list.next, struct btrfs_root, root_list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 804c88639e5d..02564e6230ac 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -28,10 +28,12 @@ struct btrfs_transaction {
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 atomic_t num_writers; 30 atomic_t num_writers;
31 atomic_t use_count;
31 32
32 unsigned long num_joined; 33 unsigned long num_joined;
34
35 spinlock_t commit_lock;
33 int in_commit; 36 int in_commit;
34 atomic_t use_count;
35 int commit_done; 37 int commit_done;
36 int blocked; 38 int blocked;
37 struct list_head list; 39 struct list_head list;
@@ -45,13 +47,14 @@ struct btrfs_transaction {
45 47
46struct btrfs_trans_handle { 48struct btrfs_trans_handle {
47 u64 transid; 49 u64 transid;
48 u64 block_group;
49 u64 bytes_reserved; 50 u64 bytes_reserved;
51 unsigned long use_count;
50 unsigned long blocks_reserved; 52 unsigned long blocks_reserved;
51 unsigned long blocks_used; 53 unsigned long blocks_used;
52 unsigned long delayed_ref_updates; 54 unsigned long delayed_ref_updates;
53 struct btrfs_transaction *transaction; 55 struct btrfs_transaction *transaction;
54 struct btrfs_block_rsv *block_rsv; 56 struct btrfs_block_rsv *block_rsv;
57 struct btrfs_block_rsv *orig_rsv;
55}; 58};
56 59
57struct btrfs_pending_snapshot { 60struct btrfs_pending_snapshot {
@@ -66,19 +69,6 @@ struct btrfs_pending_snapshot {
66 struct list_head list; 69 struct list_head list;
67}; 70};
68 71
69static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
70 struct inode *inode)
71{
72 trans->block_group = BTRFS_I(inode)->block_group;
73}
74
75static inline void btrfs_update_inode_block_group(
76 struct btrfs_trans_handle *trans,
77 struct inode *inode)
78{
79 BTRFS_I(inode)->block_group = trans->block_group;
80}
81
82static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, 72static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
83 struct inode *inode) 73 struct inode *inode)
84{ 74{
@@ -92,12 +82,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
92 struct btrfs_root *root); 82 struct btrfs_root *root);
93struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 83struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
94 int num_items); 84 int num_items);
95struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 85struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
96 int num_blocks); 86struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
97struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root, 87struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
98 int num_blocks);
99struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
100 int num_blocks);
101int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); 88int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
102int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 89int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
103 struct btrfs_root *root); 90 struct btrfs_root *root);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c48214ef5c09..1efa56e18f9b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -504,7 +504,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
504 BUG_ON(!new_device); 504 BUG_ON(!new_device);
505 memcpy(new_device, device, sizeof(*new_device)); 505 memcpy(new_device, device, sizeof(*new_device));
506 new_device->name = kstrdup(device->name, GFP_NOFS); 506 new_device->name = kstrdup(device->name, GFP_NOFS);
507 BUG_ON(!new_device->name); 507 BUG_ON(device->name && !new_device->name);
508 new_device->bdev = NULL; 508 new_device->bdev = NULL;
509 new_device->writeable = 0; 509 new_device->writeable = 0;
510 new_device->in_fs_metadata = 0; 510 new_device->in_fs_metadata = 0;
@@ -689,12 +689,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
689 transid = btrfs_super_generation(disk_super); 689 transid = btrfs_super_generation(disk_super);
690 if (disk_super->label[0]) 690 if (disk_super->label[0])
691 printk(KERN_INFO "device label %s ", disk_super->label); 691 printk(KERN_INFO "device label %s ", disk_super->label);
692 else { 692 else
693 /* FIXME, make a readl uuid parser */ 693 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
694 printk(KERN_INFO "device fsid %llx-%llx ",
695 *(unsigned long long *)disk_super->fsid,
696 *(unsigned long long *)(disk_super->fsid + 8));
697 }
698 printk(KERN_CONT "devid %llu transid %llu %s\n", 694 printk(KERN_CONT "devid %llu transid %llu %s\n",
699 (unsigned long long)devid, (unsigned long long)transid, path); 695 (unsigned long long)devid, (unsigned long long)transid, path);
700 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 696 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index f3107e4b4d56..5366fe452ab0 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
158 if (IS_ERR(trans)) 158 if (IS_ERR(trans))
159 return PTR_ERR(trans); 159 return PTR_ERR(trans);
160 160
161 btrfs_set_trans_block_group(trans, inode);
162
163 ret = do_setxattr(trans, inode, name, value, size, flags); 161 ret = do_setxattr(trans, inode, name, value, size, flags);
164 if (ret) 162 if (ret)
165 goto out; 163 goto out;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 33da49dc3cc6..5a3953db8118 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -453,7 +453,7 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
453 int err; 453 int err;
454 struct inode *inode = page->mapping->host; 454 struct inode *inode = page->mapping->host;
455 BUG_ON(!inode); 455 BUG_ON(!inode);
456 igrab(inode); 456 ihold(inode);
457 err = writepage_nounlock(page, wbc); 457 err = writepage_nounlock(page, wbc);
458 unlock_page(page); 458 unlock_page(page);
459 iput(inode); 459 iput(inode);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1f72b00447c4..f605753c8fe9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2940,14 +2940,12 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2940 while (!list_empty(&mdsc->cap_dirty)) { 2940 while (!list_empty(&mdsc->cap_dirty)) {
2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, 2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
2942 i_dirty_item); 2942 i_dirty_item);
2943 inode = igrab(&ci->vfs_inode); 2943 inode = &ci->vfs_inode;
2944 ihold(inode);
2944 dout("flush_dirty_caps %p\n", inode); 2945 dout("flush_dirty_caps %p\n", inode);
2945 spin_unlock(&mdsc->cap_dirty_lock); 2946 spin_unlock(&mdsc->cap_dirty_lock);
2946 if (inode) { 2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, 2948 iput(inode);
2948 NULL);
2949 iput(inode);
2950 }
2951 spin_lock(&mdsc->cap_dirty_lock); 2949 spin_lock(&mdsc->cap_dirty_lock);
2952 } 2950 }
2953 spin_unlock(&mdsc->cap_dirty_lock); 2951 spin_unlock(&mdsc->cap_dirty_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 33729e822bb9..ef8f08c343e8 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -308,7 +308,8 @@ more:
308 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 308 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
309 if (IS_ERR(req)) 309 if (IS_ERR(req))
310 return PTR_ERR(req); 310 return PTR_ERR(req);
311 req->r_inode = igrab(inode); 311 req->r_inode = inode;
312 ihold(inode);
312 req->r_dentry = dget(filp->f_dentry); 313 req->r_dentry = dget(filp->f_dentry);
313 /* hints to request -> mds selection code */ 314 /* hints to request -> mds selection code */
314 req->r_direct_mode = USE_AUTH_MDS; 315 req->r_direct_mode = USE_AUTH_MDS;
@@ -787,10 +788,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
787 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 788 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
788 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 789 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
789 err = ceph_mdsc_do_request(mdsc, dir, req); 790 err = ceph_mdsc_do_request(mdsc, dir, req);
790 if (err) 791 if (err) {
791 d_drop(dentry); 792 d_drop(dentry);
792 else if (!req->r_reply_info.head->is_dentry) 793 } else if (!req->r_reply_info.head->is_dentry) {
793 d_instantiate(dentry, igrab(old_dentry->d_inode)); 794 ihold(old_dentry->d_inode);
795 d_instantiate(dentry, old_dentry->d_inode);
796 }
794 ceph_mdsc_put_request(req); 797 ceph_mdsc_put_request(req);
795 return err; 798 return err;
796} 799}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index a610d3d67488..f67b687550de 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -109,7 +109,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
109 err = ceph_mdsc_do_request(mdsc, NULL, req); 109 err = ceph_mdsc_do_request(mdsc, NULL, req);
110 inode = req->r_target_inode; 110 inode = req->r_target_inode;
111 if (inode) 111 if (inode)
112 igrab(inode); 112 ihold(inode);
113 ceph_mdsc_put_request(req); 113 ceph_mdsc_put_request(req);
114 if (!inode) 114 if (!inode)
115 return ERR_PTR(-ESTALE); 115 return ERR_PTR(-ESTALE);
@@ -167,7 +167,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
167 err = ceph_mdsc_do_request(mdsc, NULL, req); 167 err = ceph_mdsc_do_request(mdsc, NULL, req);
168 inode = req->r_target_inode; 168 inode = req->r_target_inode;
169 if (inode) 169 if (inode)
170 igrab(inode); 170 ihold(inode);
171 ceph_mdsc_put_request(req); 171 ceph_mdsc_put_request(req);
172 if (!inode) 172 if (!inode)
173 return ERR_PTR(err ? err : -ESTALE); 173 return ERR_PTR(err ? err : -ESTALE);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 203252d88d9f..9542f07d0b93 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -191,7 +191,8 @@ int ceph_open(struct inode *inode, struct file *file)
191 err = PTR_ERR(req); 191 err = PTR_ERR(req);
192 goto out; 192 goto out;
193 } 193 }
194 req->r_inode = igrab(inode); 194 req->r_inode = inode;
195 ihold(inode);
195 req->r_num_caps = 1; 196 req->r_num_caps = 1;
196 err = ceph_mdsc_do_request(mdsc, parent_inode, req); 197 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
197 if (!err) 198 if (!err)
@@ -282,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file)
282static int striped_read(struct inode *inode, 283static int striped_read(struct inode *inode,
283 u64 off, u64 len, 284 u64 off, u64 len,
284 struct page **pages, int num_pages, 285 struct page **pages, int num_pages,
285 int *checkeof, bool align_to_pages, 286 int *checkeof, bool o_direct,
286 unsigned long buf_align) 287 unsigned long buf_align)
287{ 288{
288 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 289 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -307,7 +308,7 @@ static int striped_read(struct inode *inode,
307 io_align = off & ~PAGE_MASK; 308 io_align = off & ~PAGE_MASK;
308 309
309more: 310more:
310 if (align_to_pages) 311 if (o_direct)
311 page_align = (pos - io_align + buf_align) & ~PAGE_MASK; 312 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
312 else 313 else
313 page_align = pos & ~PAGE_MASK; 314 page_align = pos & ~PAGE_MASK;
@@ -317,10 +318,10 @@ more:
317 ci->i_truncate_seq, 318 ci->i_truncate_seq,
318 ci->i_truncate_size, 319 ci->i_truncate_size,
319 page_pos, pages_left, page_align); 320 page_pos, pages_left, page_align);
320 hit_stripe = this_len < left;
321 was_short = ret >= 0 && ret < this_len;
322 if (ret == -ENOENT) 321 if (ret == -ENOENT)
323 ret = 0; 322 ret = 0;
323 hit_stripe = this_len < left;
324 was_short = ret >= 0 && ret < this_len;
324 dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, 325 dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
325 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); 326 ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
326 327
@@ -345,20 +346,22 @@ more:
345 } 346 }
346 347
347 if (was_short) { 348 if (was_short) {
348 /* was original extent fully inside i_size? */ 349 /* did we bounce off eof? */
349 if (pos + left <= inode->i_size) { 350 if (pos + left > inode->i_size)
350 dout("zero tail\n"); 351 *checkeof = 1;
351 ceph_zero_page_vector_range(page_off + read, len - read, 352
353 /* zero trailing bytes (inside i_size) */
354 if (left > 0 && pos < inode->i_size) {
355 if (pos + left > inode->i_size)
356 left = inode->i_size - pos;
357
358 dout("zero tail %d\n", left);
359 ceph_zero_page_vector_range(page_off + read, left,
352 pages); 360 pages);
353 read = len; 361 read += left;
354 goto out;
355 } 362 }
356
357 /* check i_size */
358 *checkeof = 1;
359 } 363 }
360 364
361out:
362 if (ret >= 0) 365 if (ret >= 0)
363 ret = read; 366 ret = read;
364 dout("striped_read returns %d\n", ret); 367 dout("striped_read returns %d\n", ret);
@@ -658,7 +661,7 @@ out:
658 661
659 /* hit EOF or hole? */ 662 /* hit EOF or hole? */
660 if (statret == 0 && *ppos < inode->i_size) { 663 if (statret == 0 && *ppos < inode->i_size) {
661 dout("aio_read sync_read hit hole, reading more\n"); 664 dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size);
662 read += ret; 665 read += ret;
663 base += ret; 666 base += ret;
664 len -= ret; 667 len -= ret;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 70b6a4839c38..d8858e96ab18 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1101,10 +1101,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1101 goto done; 1101 goto done;
1102 } 1102 }
1103 req->r_dentry = dn; /* may have spliced */ 1103 req->r_dentry = dn; /* may have spliced */
1104 igrab(in); 1104 ihold(in);
1105 } else if (ceph_ino(in) == vino.ino && 1105 } else if (ceph_ino(in) == vino.ino &&
1106 ceph_snap(in) == vino.snap) { 1106 ceph_snap(in) == vino.snap) {
1107 igrab(in); 1107 ihold(in);
1108 } else { 1108 } else {
1109 dout(" %p links to %p %llx.%llx, not %llx.%llx\n", 1109 dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
1110 dn, in, ceph_ino(in), ceph_snap(in), 1110 dn, in, ceph_ino(in), ceph_snap(in),
@@ -1144,7 +1144,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1144 goto done; 1144 goto done;
1145 } 1145 }
1146 req->r_dentry = dn; /* may have spliced */ 1146 req->r_dentry = dn; /* may have spliced */
1147 igrab(in); 1147 ihold(in);
1148 rinfo->head->is_dentry = 1; /* fool notrace handlers */ 1148 rinfo->head->is_dentry = 1; /* fool notrace handlers */
1149 } 1149 }
1150 1150
@@ -1328,7 +1328,7 @@ void ceph_queue_writeback(struct inode *inode)
1328 if (queue_work(ceph_inode_to_client(inode)->wb_wq, 1328 if (queue_work(ceph_inode_to_client(inode)->wb_wq,
1329 &ceph_inode(inode)->i_wb_work)) { 1329 &ceph_inode(inode)->i_wb_work)) {
1330 dout("ceph_queue_writeback %p\n", inode); 1330 dout("ceph_queue_writeback %p\n", inode);
1331 igrab(inode); 1331 ihold(inode);
1332 } else { 1332 } else {
1333 dout("ceph_queue_writeback %p failed\n", inode); 1333 dout("ceph_queue_writeback %p failed\n", inode);
1334 } 1334 }
@@ -1353,7 +1353,7 @@ void ceph_queue_invalidate(struct inode *inode)
1353 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, 1353 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
1354 &ceph_inode(inode)->i_pg_inv_work)) { 1354 &ceph_inode(inode)->i_pg_inv_work)) {
1355 dout("ceph_queue_invalidate %p\n", inode); 1355 dout("ceph_queue_invalidate %p\n", inode);
1356 igrab(inode); 1356 ihold(inode);
1357 } else { 1357 } else {
1358 dout("ceph_queue_invalidate %p failed\n", inode); 1358 dout("ceph_queue_invalidate %p failed\n", inode);
1359 } 1359 }
@@ -1477,7 +1477,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
1477 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, 1477 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1478 &ci->i_vmtruncate_work)) { 1478 &ci->i_vmtruncate_work)) {
1479 dout("ceph_queue_vmtruncate %p\n", inode); 1479 dout("ceph_queue_vmtruncate %p\n", inode);
1480 igrab(inode); 1480 ihold(inode);
1481 } else { 1481 } else {
1482 dout("ceph_queue_vmtruncate %p failed, pending=%d\n", 1482 dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
1483 inode, ci->i_truncate_pending); 1483 inode, ci->i_truncate_pending);
@@ -1738,7 +1738,8 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1738 __mark_inode_dirty(inode, inode_dirty_flags); 1738 __mark_inode_dirty(inode, inode_dirty_flags);
1739 1739
1740 if (mask) { 1740 if (mask) {
1741 req->r_inode = igrab(inode); 1741 req->r_inode = inode;
1742 ihold(inode);
1742 req->r_inode_drop = release; 1743 req->r_inode_drop = release;
1743 req->r_args.setattr.mask = cpu_to_le32(mask); 1744 req->r_args.setattr.mask = cpu_to_le32(mask);
1744 req->r_num_caps = 1; 1745 req->r_num_caps = 1;
@@ -1779,7 +1780,8 @@ int ceph_do_getattr(struct inode *inode, int mask)
1779 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1780 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
1780 if (IS_ERR(req)) 1781 if (IS_ERR(req))
1781 return PTR_ERR(req); 1782 return PTR_ERR(req);
1782 req->r_inode = igrab(inode); 1783 req->r_inode = inode;
1784 ihold(inode);
1783 req->r_num_caps = 1; 1785 req->r_num_caps = 1;
1784 req->r_args.getattr.mask = cpu_to_le32(mask); 1786 req->r_args.getattr.mask = cpu_to_le32(mask);
1785 err = ceph_mdsc_do_request(mdsc, NULL, req); 1787 err = ceph_mdsc_do_request(mdsc, NULL, req);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 8888c9ba68db..ef0b5f48e13a 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -73,7 +73,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
73 USE_AUTH_MDS); 73 USE_AUTH_MDS);
74 if (IS_ERR(req)) 74 if (IS_ERR(req))
75 return PTR_ERR(req); 75 return PTR_ERR(req);
76 req->r_inode = igrab(inode); 76 req->r_inode = inode;
77 ihold(inode);
77 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; 78 req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
78 79
79 req->r_args.setlayout.layout.fl_stripe_unit = 80 req->r_args.setlayout.layout.fl_stripe_unit =
@@ -135,7 +136,8 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
135 136
136 if (IS_ERR(req)) 137 if (IS_ERR(req))
137 return PTR_ERR(req); 138 return PTR_ERR(req);
138 req->r_inode = igrab(inode); 139 req->r_inode = inode;
140 ihold(inode);
139 141
140 req->r_args.setlayout.layout.fl_stripe_unit = 142 req->r_args.setlayout.layout.fl_stripe_unit =
141 cpu_to_le32(l.stripe_unit); 143 cpu_to_le32(l.stripe_unit);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 476b329867d4..80576d05d687 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -23,7 +23,8 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
24 if (IS_ERR(req)) 24 if (IS_ERR(req))
25 return PTR_ERR(req); 25 return PTR_ERR(req);
26 req->r_inode = igrab(inode); 26 req->r_inode = inode;
27 ihold(inode);
27 28
28 /* mds requires start and length rather than start and end */ 29 /* mds requires start and length rather than start and end */
29 if (LLONG_MAX == fl->fl_end) 30 if (LLONG_MAX == fl->fl_end)
@@ -32,11 +33,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
32 length = fl->fl_end - fl->fl_start + 1; 33 length = fl->fl_end - fl->fl_start + 1;
33 34
34 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 35 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
35 "length: %llu, wait: %d, type`: %d", (int)lock_type, 36 "length: %llu, wait: %d, type: %d", (int)lock_type,
36 (int)operation, (u64)fl->fl_pid, fl->fl_start, 37 (int)operation, (u64)fl->fl_pid, fl->fl_start,
37 length, wait, fl->fl_type); 38 length, wait, fl->fl_type);
38 39
39
40 req->r_args.filelock_change.rule = lock_type; 40 req->r_args.filelock_change.rule = lock_type;
41 req->r_args.filelock_change.type = cmd; 41 req->r_args.filelock_change.type = cmd;
42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); 42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
@@ -70,7 +70,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
70 } 70 }
71 ceph_mdsc_put_request(req); 71 ceph_mdsc_put_request(req);
72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
73 "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type, 73 "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
74 (int)operation, (u64)fl->fl_pid, fl->fl_start, 74 (int)operation, (u64)fl->fl_pid, fl->fl_start,
75 length, wait, fl->fl_type, err); 75 length, wait, fl->fl_type, err);
76 return err; 76 return err;
@@ -109,16 +109,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
109 dout("mds locked, locking locally"); 109 dout("mds locked, locking locally");
110 err = posix_lock_file(file, fl, NULL); 110 err = posix_lock_file(file, fl, NULL);
111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
112 /* undo! This should only happen if the kernel detects 112 /* undo! This should only happen if
113 * local deadlock. */ 113 * the kernel detects local
114 * deadlock. */
114 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 115 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
115 CEPH_LOCK_UNLOCK, 0, fl); 116 CEPH_LOCK_UNLOCK, 0, fl);
116 dout("got %d on posix_lock_file, undid lock", err); 117 dout("got %d on posix_lock_file, undid lock",
118 err);
117 } 119 }
118 } 120 }
119 121
120 } else { 122 } else if (err == -ERESTARTSYS) {
121 dout("mds returned error code %d", err); 123 dout("undoing lock\n");
124 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
125 CEPH_LOCK_UNLOCK, 0, fl);
122 } 126 }
123 return err; 127 return err;
124} 128}
@@ -155,8 +159,11 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
155 file, CEPH_LOCK_UNLOCK, 0, fl); 159 file, CEPH_LOCK_UNLOCK, 0, fl);
156 dout("got %d on flock_lock_file_wait, undid lock", err); 160 dout("got %d on flock_lock_file_wait, undid lock", err);
157 } 161 }
158 } else { 162 } else if (err == -ERESTARTSYS) {
159 dout("mds error code %d", err); 163 dout("undoing lock\n");
164 ceph_lock_message(CEPH_LOCK_FLOCK,
165 CEPH_MDS_OP_SETFILELOCK,
166 file, CEPH_LOCK_UNLOCK, 0, fl);
160 } 167 }
161 return err; 168 return err;
162} 169}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 24067d68a554..54b14de2e729 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -722,7 +722,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
722 ci = list_first_entry(&mdsc->snap_flush_list, 722 ci = list_first_entry(&mdsc->snap_flush_list,
723 struct ceph_inode_info, i_snap_flush_item); 723 struct ceph_inode_info, i_snap_flush_item);
724 inode = &ci->vfs_inode; 724 inode = &ci->vfs_inode;
725 igrab(inode); 725 ihold(inode);
726 spin_unlock(&mdsc->snap_flush_lock); 726 spin_unlock(&mdsc->snap_flush_lock);
727 spin_lock(&inode->i_lock); 727 spin_lock(&inode->i_lock);
728 __ceph_flush_snaps(ci, &session, 0); 728 __ceph_flush_snaps(ci, &session, 0);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f2b628696180..f42d730f1b66 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -665,7 +665,8 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
665 err = PTR_ERR(req); 665 err = PTR_ERR(req);
666 goto out; 666 goto out;
667 } 667 }
668 req->r_inode = igrab(inode); 668 req->r_inode = inode;
669 ihold(inode);
669 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 670 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
670 req->r_num_caps = 1; 671 req->r_num_caps = 1;
671 req->r_args.setxattr.flags = cpu_to_le32(flags); 672 req->r_args.setxattr.flags = cpu_to_le32(flags);
@@ -795,7 +796,8 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
795 USE_AUTH_MDS); 796 USE_AUTH_MDS);
796 if (IS_ERR(req)) 797 if (IS_ERR(req))
797 return PTR_ERR(req); 798 return PTR_ERR(req);
798 req->r_inode = igrab(inode); 799 req->r_inode = inode;
800 ihold(inode);
799 req->r_inode_drop = CEPH_CAP_XATTR_SHARED; 801 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
800 req->r_num_caps = 1; 802 req->r_num_caps = 1;
801 req->r_path2 = kstrdup(name, GFP_NOFS); 803 req->r_path2 = kstrdup(name, GFP_NOFS);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 1cd4c3a1862d..53ed1ad2c112 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
7 select CRYPTO_MD5 7 select CRYPTO_MD5
8 select CRYPTO_HMAC 8 select CRYPTO_HMAC
9 select CRYPTO_ARC4 9 select CRYPTO_ARC4
10 select CRYPTO_ECB
10 select CRYPTO_DES 11 select CRYPTO_DES
11 help 12 help
12 This is the client VFS module for the Common Internet File System 13 This is the client VFS module for the Common Internet File System
@@ -148,7 +149,7 @@ config CIFS_FSCACHE
148 149
149config CIFS_ACL 150config CIFS_ACL
150 bool "Provide CIFS ACL support (EXPERIMENTAL)" 151 bool "Provide CIFS ACL support (EXPERIMENTAL)"
151 depends on EXPERIMENTAL && CIFS_XATTR 152 depends on EXPERIMENTAL && CIFS_XATTR && KEYS
152 help 153 help
153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 154 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
154 is handed over to the application/caller. 155 is handed over to the application/caller.
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index dfbd9f1f373d..5a0ee7f2af06 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -184,7 +184,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
184 if (cifs_pdu == NULL || server == NULL) 184 if (cifs_pdu == NULL || server == NULL)
185 return -EINVAL; 185 return -EINVAL;
186 186
187 if (cifs_pdu->Command == SMB_COM_NEGOTIATE) 187 if (!server->session_estab)
188 return 0; 188 return 0;
189 189
190 if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) { 190 if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6d88b82537c3..bb659eb73810 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -784,7 +784,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
784 struct smb_vol *vol) 784 struct smb_vol *vol)
785{ 785{
786 char *value, *data, *end; 786 char *value, *data, *end;
787 char *mountdata_copy, *options; 787 char *mountdata_copy = NULL, *options;
788 unsigned int temp_len, i, j; 788 unsigned int temp_len, i, j;
789 char separator[2]; 789 char separator[2];
790 short int override_uid = -1; 790 short int override_uid = -1;
@@ -1391,7 +1391,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
1391 "/proc/fs/cifs/LookupCacheEnabled to 0\n"); 1391 "/proc/fs/cifs/LookupCacheEnabled to 0\n");
1392 } else if (strnicmp(data, "fsc", 3) == 0) { 1392 } else if (strnicmp(data, "fsc", 3) == 0) {
1393#ifndef CONFIG_CIFS_FSCACHE 1393#ifndef CONFIG_CIFS_FSCACHE
1394 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" 1394 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE "
1395 "kernel config option set"); 1395 "kernel config option set");
1396 goto cifs_parse_mount_err; 1396 goto cifs_parse_mount_err;
1397#endif 1397#endif
@@ -1976,7 +1976,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1976 warned_on_ntlm = true; 1976 warned_on_ntlm = true;
1977 cERROR(1, "default security mechanism requested. The default " 1977 cERROR(1, "default security mechanism requested. The default "
1978 "security mechanism will be upgraded from ntlm to " 1978 "security mechanism will be upgraded from ntlm to "
1979 "ntlmv2 in kernel release 2.6.41"); 1979 "ntlmv2 in kernel release 3.1");
1980 } 1980 }
1981 ses->overrideSecFlg = volume_info->secFlg; 1981 ses->overrideSecFlg = volume_info->secFlg;
1982 1982
diff --git a/fs/dcookies.c b/fs/dcookies.c
index a21cabdbd87b..dda0dc702d1b 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -178,6 +178,8 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len)
178 /* FIXME: (deleted) ? */ 178 /* FIXME: (deleted) ? */
179 path = d_path(&dcs->path, kbuf, PAGE_SIZE); 179 path = d_path(&dcs->path, kbuf, PAGE_SIZE);
180 180
181 mutex_unlock(&dcookie_mutex);
182
181 if (IS_ERR(path)) { 183 if (IS_ERR(path)) {
182 err = PTR_ERR(path); 184 err = PTR_ERR(path);
183 goto out_free; 185 goto out_free;
@@ -194,6 +196,7 @@ SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len)
194 196
195out_free: 197out_free:
196 kfree(kbuf); 198 kfree(kbuf);
199 return err;
197out: 200out:
198 mutex_unlock(&dcookie_mutex); 201 mutex_unlock(&dcookie_mutex);
199 return err; 202 return err;
diff --git a/fs/exec.c b/fs/exec.c
index ea5f748906a8..97e0d52d72fd 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1093,6 +1093,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1093 1093
1094 bprm->mm = NULL; /* We're using it now */ 1094 bprm->mm = NULL; /* We're using it now */
1095 1095
1096 set_fs(USER_DS);
1096 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); 1097 current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
1097 flush_thread(); 1098 flush_thread();
1098 current->personality &= ~bprm->per_clear; 1099 current->personality &= ~bprm->per_clear;
@@ -1357,10 +1358,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1357 if (retval) 1358 if (retval)
1358 return retval; 1359 return retval;
1359 1360
1360 /* kernel module loader fixup */
1361 /* so we don't try to load run modprobe in kernel space. */
1362 set_fs(USER_DS);
1363
1364 retval = audit_bprm(bprm); 1361 retval = audit_bprm(bprm);
1365 if (retval) 1362 if (retval)
1366 return retval; 1363 return retval;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7257752b6d5d..7018e1d8902d 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -102,7 +102,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
102 if (attr & ATTR_SYS) 102 if (attr & ATTR_SYS)
103 inode->i_flags |= S_IMMUTABLE; 103 inode->i_flags |= S_IMMUTABLE;
104 else 104 else
105 inode->i_flags &= S_IMMUTABLE; 105 inode->i_flags &= ~S_IMMUTABLE;
106 } 106 }
107 107
108 fat_save_attrs(inode, attr); 108 fat_save_attrs(inode, attr);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cc6ec4b2f0ff..38f84cd48b67 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -921,6 +921,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
921 if (sb->s_flags & MS_MANDLOCK) 921 if (sb->s_flags & MS_MANDLOCK)
922 goto err; 922 goto err;
923 923
924 sb->s_flags &= ~MS_NOSEC;
925
924 if (!parse_fuse_opt((char *) data, &d, is_bdev)) 926 if (!parse_fuse_opt((char *) data, &d, is_bdev))
925 goto err; 927 goto err;
926 928
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2792a790e50b..1c1336e7b3b2 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -663,14 +663,19 @@ static void glock_work_func(struct work_struct *work)
663 drop_ref = 1; 663 drop_ref = 1;
664 } 664 }
665 spin_lock(&gl->gl_spin); 665 spin_lock(&gl->gl_spin);
666 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 666 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
667 gl->gl_state != LM_ST_UNLOCKED && 667 gl->gl_state != LM_ST_UNLOCKED &&
668 gl->gl_demote_state != LM_ST_EXCLUSIVE) { 668 gl->gl_demote_state != LM_ST_EXCLUSIVE) {
669 unsigned long holdtime, now = jiffies; 669 unsigned long holdtime, now = jiffies;
670
670 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; 671 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
671 if (time_before(now, holdtime)) 672 if (time_before(now, holdtime))
672 delay = holdtime - now; 673 delay = holdtime - now;
673 set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags); 674
675 if (!delay) {
676 clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
677 set_bit(GLF_DEMOTE, &gl->gl_flags);
678 }
674 } 679 }
675 run_queue(gl, 0); 680 run_queue(gl, 0);
676 spin_unlock(&gl->gl_spin); 681 spin_unlock(&gl->gl_spin);
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 278e3fb40b71..583636f745e5 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1123,7 +1123,7 @@ int lmLogOpen(struct super_block *sb)
1123 bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, 1123 bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1124 log); 1124 log);
1125 if (IS_ERR(bdev)) { 1125 if (IS_ERR(bdev)) {
1126 rc = -PTR_ERR(bdev); 1126 rc = PTR_ERR(bdev);
1127 goto free; 1127 goto free;
1128 } 1128 }
1129 1129
diff --git a/fs/namei.c b/fs/namei.c
index e2e4e8d032ee..9802345df5e7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2624,6 +2624,10 @@ static long do_rmdir(int dfd, const char __user *pathname)
2624 error = PTR_ERR(dentry); 2624 error = PTR_ERR(dentry);
2625 if (IS_ERR(dentry)) 2625 if (IS_ERR(dentry))
2626 goto exit2; 2626 goto exit2;
2627 if (!dentry->d_inode) {
2628 error = -ENOENT;
2629 goto exit3;
2630 }
2627 error = mnt_want_write(nd.path.mnt); 2631 error = mnt_want_write(nd.path.mnt);
2628 if (error) 2632 if (error)
2629 goto exit3; 2633 goto exit3;
@@ -2709,11 +2713,10 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2709 error = PTR_ERR(dentry); 2713 error = PTR_ERR(dentry);
2710 if (!IS_ERR(dentry)) { 2714 if (!IS_ERR(dentry)) {
2711 /* Why not before? Because we want correct error value */ 2715 /* Why not before? Because we want correct error value */
2712 if (nd.last.name[nd.last.len])
2713 goto slashes;
2714 inode = dentry->d_inode; 2716 inode = dentry->d_inode;
2715 if (inode) 2717 if (nd.last.name[nd.last.len] || !inode)
2716 ihold(inode); 2718 goto slashes;
2719 ihold(inode);
2717 error = mnt_want_write(nd.path.mnt); 2720 error = mnt_want_write(nd.path.mnt);
2718 if (error) 2721 if (error)
2719 goto exit2; 2722 goto exit2;
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 7eafe468a29c..b2e3ff347620 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1346,6 +1346,11 @@ static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1346 path[level].bp_bh = NULL; 1346 path[level].bp_bh = NULL;
1347} 1347}
1348 1348
1349static void nilfs_btree_nop(struct nilfs_bmap *btree,
1350 struct nilfs_btree_path *path,
1351 int level, __u64 *keyp, __u64 *ptrp)
1352{
1353}
1349 1354
1350static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1355static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1351 struct nilfs_btree_path *path, 1356 struct nilfs_btree_path *path,
@@ -1356,20 +1361,19 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1356 struct buffer_head *bh; 1361 struct buffer_head *bh;
1357 struct nilfs_btree_node *node, *parent, *sib; 1362 struct nilfs_btree_node *node, *parent, *sib;
1358 __u64 sibptr; 1363 __u64 sibptr;
1359 int pindex, level, ncmin, ncmax, ncblk, ret; 1364 int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1360 1365
1361 ret = 0; 1366 ret = 0;
1362 stats->bs_nblocks = 0; 1367 stats->bs_nblocks = 0;
1363 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1368 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1364 ncblk = nilfs_btree_nchildren_per_block(btree); 1369 ncblk = nilfs_btree_nchildren_per_block(btree);
1365 1370
1366 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1371 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1367 level < nilfs_btree_height(btree) - 1; 1372 level < nilfs_btree_height(btree) - 1;
1368 level++) { 1373 level++) {
1369 node = nilfs_btree_get_nonroot_node(path, level); 1374 node = nilfs_btree_get_nonroot_node(path, level);
1370 path[level].bp_oldreq.bpr_ptr = 1375 path[level].bp_oldreq.bpr_ptr =
1371 nilfs_btree_node_get_ptr(node, path[level].bp_index, 1376 nilfs_btree_node_get_ptr(node, dindex, ncblk);
1372 ncblk);
1373 ret = nilfs_bmap_prepare_end_ptr(btree, 1377 ret = nilfs_bmap_prepare_end_ptr(btree,
1374 &path[level].bp_oldreq, dat); 1378 &path[level].bp_oldreq, dat);
1375 if (ret < 0) 1379 if (ret < 0)
@@ -1383,6 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1383 1387
1384 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1388 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1385 pindex = path[level + 1].bp_index; 1389 pindex = path[level + 1].bp_index;
1390 dindex = pindex;
1386 1391
1387 if (pindex > 0) { 1392 if (pindex > 0) {
1388 /* left sibling */ 1393 /* left sibling */
@@ -1421,6 +1426,14 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1421 path[level].bp_sib_bh = bh; 1426 path[level].bp_sib_bh = bh;
1422 path[level].bp_op = nilfs_btree_concat_right; 1427 path[level].bp_op = nilfs_btree_concat_right;
1423 stats->bs_nblocks++; 1428 stats->bs_nblocks++;
1429 /*
1430 * When merging right sibling node
1431 * into the current node, pointer to
1432 * the right sibling node must be
1433 * terminated instead. The adjustment
1434 * below is required for that.
1435 */
1436 dindex = pindex + 1;
1424 /* continue; */ 1437 /* continue; */
1425 } 1438 }
1426 } else { 1439 } else {
@@ -1431,29 +1444,31 @@ static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1431 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1444 NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1432 path[level].bp_op = nilfs_btree_shrink; 1445 path[level].bp_op = nilfs_btree_shrink;
1433 stats->bs_nblocks += 2; 1446 stats->bs_nblocks += 2;
1447 level++;
1448 path[level].bp_op = nilfs_btree_nop;
1449 goto shrink_root_child;
1434 } else { 1450 } else {
1435 path[level].bp_op = nilfs_btree_do_delete; 1451 path[level].bp_op = nilfs_btree_do_delete;
1436 stats->bs_nblocks++; 1452 stats->bs_nblocks++;
1453 goto out;
1437 } 1454 }
1438
1439 goto out;
1440
1441 } 1455 }
1442 } 1456 }
1443 1457
1458 /* child of the root node is deleted */
1459 path[level].bp_op = nilfs_btree_do_delete;
1460 stats->bs_nblocks++;
1461
1462shrink_root_child:
1444 node = nilfs_btree_get_root(btree); 1463 node = nilfs_btree_get_root(btree);
1445 path[level].bp_oldreq.bpr_ptr = 1464 path[level].bp_oldreq.bpr_ptr =
1446 nilfs_btree_node_get_ptr(node, path[level].bp_index, 1465 nilfs_btree_node_get_ptr(node, dindex,
1447 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1466 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1448 1467
1449 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1468 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1450 if (ret < 0) 1469 if (ret < 0)
1451 goto err_out_child_node; 1470 goto err_out_child_node;
1452 1471
1453 /* child of the root node is deleted */
1454 path[level].bp_op = nilfs_btree_do_delete;
1455 stats->bs_nblocks++;
1456
1457 /* success */ 1472 /* success */
1458 out: 1473 out:
1459 *levelp = level; 1474 *levelp = level;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 141646e88fb5..bb24ab6c282f 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2573,7 +2573,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2573 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2573 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2574 2574
2575 if (nilfs->ns_interval) 2575 if (nilfs->ns_interval)
2576 sci->sc_interval = nilfs->ns_interval; 2576 sci->sc_interval = HZ * nilfs->ns_interval;
2577 if (nilfs->ns_watermark) 2577 if (nilfs->ns_watermark)
2578 sci->sc_watermark = nilfs->ns_watermark; 2578 sci->sc_watermark = nilfs->ns_watermark;
2579 return sci; 2579 return sci;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index cdbaf5e97308..56f61027236b 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1072,7 +1072,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1072 1072
1073 sb->s_magic = OCFS2_SUPER_MAGIC; 1073 sb->s_magic = OCFS2_SUPER_MAGIC;
1074 1074
1075 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 1075 sb->s_flags = (sb->s_flags & ~(MS_POSIXACL | MS_NOSEC)) |
1076 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 1076 ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
1077 1077
1078 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, 1078 /* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index f82e762eeca2..d545e97d99c3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,13 +255,7 @@ ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf) 255 struct device_attribute *attr, char *buf)
256{ 256{
257 struct hd_struct *p = dev_to_part(dev); 257 struct hd_struct *p = dev_to_part(dev);
258 struct gendisk *disk = dev_to_disk(dev); 258 return sprintf(buf, "%u\n", p->discard_alignment);
259 unsigned int alignment = 0;
260
261 if (disk->queue)
262 alignment = queue_limit_discard_alignment(&disk->queue->limits,
263 p->start_sect);
264 return sprintf(buf, "%u\n", alignment);
265} 259}
266 260
267ssize_t part_stat_show(struct device *dev, 261ssize_t part_stat_show(struct device *dev,
@@ -455,6 +449,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
455 p->start_sect = start; 449 p->start_sect = start;
456 p->alignment_offset = 450 p->alignment_offset =
457 queue_limit_alignment_offset(&disk->queue->limits, start); 451 queue_limit_alignment_offset(&disk->queue->limits, start);
452 p->discard_alignment =
453 queue_limit_discard_alignment(&disk->queue->limits, start);
458 p->nr_sects = len; 454 p->nr_sects = len;
459 p->partno = partno; 455 p->partno = partno;
460 p->policy = get_disk_ro(disk); 456 p->policy = get_disk_ro(disk);
diff --git a/fs/super.c b/fs/super.c
index c75593953c52..ab3d672db0de 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -822,7 +822,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
822 } else { 822 } else {
823 char b[BDEVNAME_SIZE]; 823 char b[BDEVNAME_SIZE];
824 824
825 s->s_flags = flags; 825 s->s_flags = flags | MS_NOSEC;
826 s->s_mode = mode; 826 s->s_mode = mode;
827 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 827 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
828 sb_set_blocksize(s, block_size(bdev)); 828 sb_set_blocksize(s, block_size(bdev));
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 166951e0dcd3..3be645e012c9 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -581,6 +581,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
581 ubifs_assert(wbuf->size % c->min_io_size == 0); 581 ubifs_assert(wbuf->size % c->min_io_size == 0);
582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
583 ubifs_assert(!c->ro_media && !c->ro_mount); 583 ubifs_assert(!c->ro_media && !c->ro_mount);
584 ubifs_assert(!c->space_fixup);
584 if (c->leb_size - wbuf->offs >= c->max_write_size) 585 if (c->leb_size - wbuf->offs >= c->max_write_size)
585 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); 586 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
586 587
@@ -759,6 +760,7 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
759 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 760 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
760 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); 761 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
761 ubifs_assert(!c->ro_media && !c->ro_mount); 762 ubifs_assert(!c->ro_media && !c->ro_mount);
763 ubifs_assert(!c->space_fixup);
762 764
763 if (c->ro_error) 765 if (c->ro_error)
764 return -EROFS; 766 return -EROFS;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 34b1679e6e3a..cef0460f4c54 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -669,6 +669,7 @@ out_free:
669 669
670out_release: 670out_release:
671 release_head(c, BASEHD); 671 release_head(c, BASEHD);
672 kfree(dent);
672out_ro: 673out_ro:
673 ubifs_ro_mode(c, err); 674 ubifs_ro_mode(c, err);
674 if (last_reference) 675 if (last_reference)
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index bd644bf587a8..a5422fffbd69 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -674,7 +674,7 @@ static int kill_orphans(struct ubifs_info *c)
674 if (IS_ERR(sleb)) { 674 if (IS_ERR(sleb)) {
675 if (PTR_ERR(sleb) == -EUCLEAN) 675 if (PTR_ERR(sleb) == -EUCLEAN)
676 sleb = ubifs_recover_leb(c, lnum, 0, 676 sleb = ubifs_recover_leb(c, lnum, 0,
677 c->sbuf, 0); 677 c->sbuf, -1);
678 if (IS_ERR(sleb)) { 678 if (IS_ERR(sleb)) {
679 err = PTR_ERR(sleb); 679 err = PTR_ERR(sleb);
680 break; 680 break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 731d9e2e7b50..783d8e0beb76 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -564,19 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
564} 564}
565 565
566/** 566/**
567 * drop_last_node - drop the last node or group of nodes. 567 * drop_last_group - drop the last group of nodes.
568 * @sleb: scanned LEB information 568 * @sleb: scanned LEB information
569 * @offs: offset of dropped nodes is returned here 569 * @offs: offset of dropped nodes is returned here
570 * @grouped: non-zero if whole group of nodes have to be dropped
571 * 570 *
572 * This is a helper function for 'ubifs_recover_leb()' which drops the last 571 * This is a helper function for 'ubifs_recover_leb()' which drops the last
573 * node of the scanned LEB or the last group of nodes if @grouped is not zero. 572 * group of nodes of the scanned LEB.
574 * This function returns %1 if a node was dropped and %0 otherwise.
575 */ 573 */
576static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) 574static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
577{ 575{
578 int dropped = 0;
579
580 while (!list_empty(&sleb->nodes)) { 576 while (!list_empty(&sleb->nodes)) {
581 struct ubifs_scan_node *snod; 577 struct ubifs_scan_node *snod;
582 struct ubifs_ch *ch; 578 struct ubifs_ch *ch;
@@ -585,17 +581,40 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
585 list); 581 list);
586 ch = snod->node; 582 ch = snod->node;
587 if (ch->group_type != UBIFS_IN_NODE_GROUP) 583 if (ch->group_type != UBIFS_IN_NODE_GROUP)
588 return dropped; 584 break;
589 dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); 585
586 dbg_rcvry("dropping grouped node at %d:%d",
587 sleb->lnum, snod->offs);
588 *offs = snod->offs;
589 list_del(&snod->list);
590 kfree(snod);
591 sleb->nodes_cnt -= 1;
592 }
593}
594
595/**
596 * drop_last_node - drop the last node.
597 * @sleb: scanned LEB information
598 * @offs: offset of dropped nodes is returned here
599 * @grouped: non-zero if whole group of nodes have to be dropped
600 *
601 * This is a helper function for 'ubifs_recover_leb()' which drops the last
602 * node of the scanned LEB.
603 */
604static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
605{
606 struct ubifs_scan_node *snod;
607
608 if (!list_empty(&sleb->nodes)) {
609 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
610 list);
611
612 dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs);
590 *offs = snod->offs; 613 *offs = snod->offs;
591 list_del(&snod->list); 614 list_del(&snod->list);
592 kfree(snod); 615 kfree(snod);
593 sleb->nodes_cnt -= 1; 616 sleb->nodes_cnt -= 1;
594 dropped = 1;
595 if (!grouped)
596 break;
597 } 617 }
598 return dropped;
599} 618}
600 619
601/** 620/**
@@ -604,7 +623,8 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
604 * @lnum: LEB number 623 * @lnum: LEB number
605 * @offs: offset 624 * @offs: offset
606 * @sbuf: LEB-sized buffer to use 625 * @sbuf: LEB-sized buffer to use
607 * @grouped: nodes may be grouped for recovery 626 * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
627 * belong to any journal head)
608 * 628 *
609 * This function does a scan of a LEB, but caters for errors that might have 629 * This function does a scan of a LEB, but caters for errors that might have
610 * been caused by the unclean unmount from which we are attempting to recover. 630 * been caused by the unclean unmount from which we are attempting to recover.
@@ -612,13 +632,14 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
612 * found, and a negative error code in case of failure. 632 * found, and a negative error code in case of failure.
613 */ 633 */
614struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 634struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
615 int offs, void *sbuf, int grouped) 635 int offs, void *sbuf, int jhead)
616{ 636{
617 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; 637 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
638 int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
618 struct ubifs_scan_leb *sleb; 639 struct ubifs_scan_leb *sleb;
619 void *buf = sbuf + offs; 640 void *buf = sbuf + offs;
620 641
621 dbg_rcvry("%d:%d", lnum, offs); 642 dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
622 643
623 sleb = ubifs_start_scan(c, lnum, offs, sbuf); 644 sleb = ubifs_start_scan(c, lnum, offs, sbuf);
624 if (IS_ERR(sleb)) 645 if (IS_ERR(sleb))
@@ -635,7 +656,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
635 * Scan quietly until there is an error from which we cannot 656 * Scan quietly until there is an error from which we cannot
636 * recover 657 * recover
637 */ 658 */
638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); 659 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
639 if (ret == SCANNED_A_NODE) { 660 if (ret == SCANNED_A_NODE) {
640 /* A valid node, and not a padding node */ 661 /* A valid node, and not a padding node */
641 struct ubifs_ch *ch = buf; 662 struct ubifs_ch *ch = buf;
@@ -695,59 +716,62 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
695 * If nodes are grouped, always drop the incomplete group at 716 * If nodes are grouped, always drop the incomplete group at
696 * the end. 717 * the end.
697 */ 718 */
698 drop_last_node(sleb, &offs, 1); 719 drop_last_group(sleb, &offs);
699 720
700 /* 721 if (jhead == GCHD) {
701 * While we are in the middle of the same min. I/O unit keep dropping 722 /*
702 * nodes. So basically, what we want is to make sure that the last min. 723 * If this LEB belongs to the GC head then while we are in the
703 * I/O unit where we saw the corruption is dropped completely with all 724 * middle of the same min. I/O unit keep dropping nodes. So
704 * the uncorrupted node which may possibly sit there. 725 * basically, what we want is to make sure that the last min.
705 * 726 * I/O unit where we saw the corruption is dropped completely
706 * In other words, let's name the min. I/O unit where the corruption 727 * with all the uncorrupted nodes which may possibly sit there.
707 * starts B, and the previous min. I/O unit A. The below code tries to 728 *
708 * deal with a situation when half of B contains valid nodes or the end 729 * In other words, let's name the min. I/O unit where the
709 * of a valid node, and the second half of B contains corrupted data or 730 * corruption starts B, and the previous min. I/O unit A. The
710 * garbage. This means that UBIFS had been writing to B just before the 731 * below code tries to deal with a situation when half of B
711 * power cut happened. I do not know how realistic is this scenario 732 * contains valid nodes or the end of a valid node, and the
712 * that half of the min. I/O unit had been written successfully and the 733 * second half of B contains corrupted data or garbage. This
713 * other half not, but this is possible in our 'failure mode emulation' 734 * means that UBIFS had been writing to B just before the power
714 * infrastructure at least. 735 * cut happened. I do not know how realistic is this scenario
715 * 736 * that half of the min. I/O unit had been written successfully
716 * So what is the problem, why we need to drop those nodes? Whey can't 737 * and the other half not, but this is possible in our 'failure
717 * we just clean-up the second half of B by putting a padding node 738 * mode emulation' infrastructure at least.
718 * there? We can, and this works fine with one exception which was 739 *
719 * reproduced with power cut emulation testing and happens extremely 740 * So what is the problem, why we need to drop those nodes? Why
720 * rarely. The description follows, but it is worth noting that that is 741 * can't we just clean-up the second half of B by putting a
721 * only about the GC head, so we could do this trick only if the bud 742 * padding node there? We can, and this works fine with one
722 * belongs to the GC head, but it does not seem to be worth an 743 * exception which was reproduced with power cut emulation
723 * additional "if" statement. 744 * testing and happens extremely rarely.
724 * 745 *
725 * So, imagine the file-system is full, we run GC which is moving valid 746 * Imagine the file-system is full, we run GC which starts
726 * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head 747 * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
727 * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X 748 * the current GC head LEB). The @c->gc_lnum is -1, which means
728 * and will try to continue. Imagine that LEB X is currently the 749 * that GC will retain LEB X and will try to continue. Imagine
729 * dirtiest LEB, and the amount of used space in LEB Y is exactly the 750 * that LEB X is currently the dirtiest LEB, and the amount of
730 * same as amount of free space in LEB X. 751 * used space in LEB Y is exactly the same as amount of free
731 * 752 * space in LEB X.
732 * And a power cut happens when nodes are moved from LEB X to LEB Y. We 753 *
733 * are here trying to recover LEB Y which is the GC head LEB. We find 754 * And a power cut happens when nodes are moved from LEB X to
734 * the min. I/O unit B as described above. Then we clean-up LEB Y by 755 * LEB Y. We are here trying to recover LEB Y which is the GC
735 * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function 756 * head LEB. We find the min. I/O unit B as described above.
736 * fails, because it cannot find a dirty LEB which could be GC'd into 757 * Then we clean-up LEB Y by padding min. I/O unit. And later
737 * LEB Y! Even LEB X does not match because the amount of valid nodes 758 * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
738 * there does not fit the free space in LEB Y any more! And this is 759 * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
739 * because of the padding node which we added to LEB Y. The 760 * does not match because the amount of valid nodes there does
740 * user-visible effect of this which I once observed and analysed is 761 * not fit the free space in LEB Y any more! And this is
741 * that we cannot mount the file-system with -ENOSPC error. 762 * because of the padding node which we added to LEB Y. The
742 * 763 * user-visible effect of this which I once observed and
743 * So obviously, to make sure that situation does not happen we should 764 * analysed is that we cannot mount the file-system with
744 * free min. I/O unit B in LEB Y completely and the last used min. I/O 765 * -ENOSPC error.
745 * unit in LEB Y should be A. This is basically what the below code 766 *
746 * tries to do. 767 * So obviously, to make sure that situation does not happen we
747 */ 768 * should free min. I/O unit B in LEB Y completely and the last
748 while (min_io_unit == round_down(offs, c->min_io_size) && 769 * used min. I/O unit in LEB Y should be A. This is basically
749 min_io_unit != offs && 770 * what the below code tries to do.
750 drop_last_node(sleb, &offs, grouped)); 771 */
772 while (offs > min_io_unit)
773 drop_last_node(sleb, &offs);
774 }
751 775
752 buf = sbuf + offs; 776 buf = sbuf + offs;
753 len = c->leb_size - offs; 777 len = c->leb_size - offs;
@@ -881,7 +905,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
881 } 905 }
882 ubifs_scan_destroy(sleb); 906 ubifs_scan_destroy(sleb);
883 } 907 }
884 return ubifs_recover_leb(c, lnum, offs, sbuf, 0); 908 return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
885} 909}
886 910
887/** 911/**
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 6617280d1679..5e97161ce4d3 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -557,8 +557,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
557 * these LEBs could possibly be written to at the power cut 557 * these LEBs could possibly be written to at the power cut
558 * time. 558 * time.
559 */ 559 */
560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, 560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
561 b->bud->jhead != GCHD);
562 else 561 else
563 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 562 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
564 if (IS_ERR(sleb)) 563 if (IS_ERR(sleb))
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index ca953a945029..9e1d05666fed 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -284,7 +284,11 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
284 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 284 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
285 285
286 if (nr == 0) 286 if (nr == 0)
287 return clean_zn_cnt; 287 /*
288 * Due to the way UBIFS updates the clean znode counter it may
289 * temporarily be negative.
290 */
291 return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
288 292
289 if (!clean_zn_cnt) { 293 if (!clean_zn_cnt) {
290 /* 294 /*
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1ab0d22e4c94..b5aeb5a8ebed 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -811,15 +811,18 @@ static int alloc_wbufs(struct ubifs_info *c)
811 811
812 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; 812 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
813 c->jheads[i].wbuf.jhead = i; 813 c->jheads[i].wbuf.jhead = i;
814 c->jheads[i].grouped = 1;
814 } 815 }
815 816
816 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; 817 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
817 /* 818 /*
818 * Garbage Collector head likely contains long-term data and 819 * Garbage Collector head likely contains long-term data and
819 * does not need to be synchronized by timer. 820 * does not need to be synchronized by timer. Also GC head nodes are
821 * not grouped.
820 */ 822 */
821 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; 823 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
822 c->jheads[GCHD].wbuf.no_timer = 1; 824 c->jheads[GCHD].wbuf.no_timer = 1;
825 c->jheads[GCHD].grouped = 0;
823 826
824 return 0; 827 return 0;
825} 828}
@@ -1284,12 +1287,25 @@ static int mount_ubifs(struct ubifs_info *c)
1284 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { 1287 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1285 ubifs_msg("recovery needed"); 1288 ubifs_msg("recovery needed");
1286 c->need_recovery = 1; 1289 c->need_recovery = 1;
1287 if (!c->ro_mount) { 1290 }
1288 err = ubifs_recover_inl_heads(c, c->sbuf); 1291
1289 if (err) 1292 if (c->need_recovery && !c->ro_mount) {
1290 goto out_master; 1293 err = ubifs_recover_inl_heads(c, c->sbuf);
1291 } 1294 if (err)
1292 } else if (!c->ro_mount) { 1295 goto out_master;
1296 }
1297
1298 err = ubifs_lpt_init(c, 1, !c->ro_mount);
1299 if (err)
1300 goto out_master;
1301
1302 if (!c->ro_mount && c->space_fixup) {
1303 err = ubifs_fixup_free_space(c);
1304 if (err)
1305 goto out_master;
1306 }
1307
1308 if (!c->ro_mount) {
1293 /* 1309 /*
1294 * Set the "dirty" flag so that if we reboot uncleanly we 1310 * Set the "dirty" flag so that if we reboot uncleanly we
1295 * will notice this immediately on the next mount. 1311 * will notice this immediately on the next mount.
@@ -1297,13 +1313,9 @@ static int mount_ubifs(struct ubifs_info *c)
1297 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); 1313 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
1298 err = ubifs_write_master(c); 1314 err = ubifs_write_master(c);
1299 if (err) 1315 if (err)
1300 goto out_master; 1316 goto out_lpt;
1301 } 1317 }
1302 1318
1303 err = ubifs_lpt_init(c, 1, !c->ro_mount);
1304 if (err)
1305 goto out_lpt;
1306
1307 err = dbg_check_idx_size(c, c->bi.old_idx_sz); 1319 err = dbg_check_idx_size(c, c->bi.old_idx_sz);
1308 if (err) 1320 if (err)
1309 goto out_lpt; 1321 goto out_lpt;
@@ -1396,12 +1408,6 @@ static int mount_ubifs(struct ubifs_info *c)
1396 } else 1408 } else
1397 ubifs_assert(c->lst.taken_empty_lebs > 0); 1409 ubifs_assert(c->lst.taken_empty_lebs > 0);
1398 1410
1399 if (!c->ro_mount && c->space_fixup) {
1400 err = ubifs_fixup_free_space(c);
1401 if (err)
1402 goto out_infos;
1403 }
1404
1405 err = dbg_check_filesystem(c); 1411 err = dbg_check_filesystem(c);
1406 if (err) 1412 if (err)
1407 goto out_infos; 1413 goto out_infos;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 8119b1fd8d94..91b4213dde84 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2876,12 +2876,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
2876 */ 2876 */
2877void ubifs_tnc_close(struct ubifs_info *c) 2877void ubifs_tnc_close(struct ubifs_info *c)
2878{ 2878{
2879 long clean_freed;
2880
2881 tnc_destroy_cnext(c); 2879 tnc_destroy_cnext(c);
2882 if (c->zroot.znode) { 2880 if (c->zroot.znode) {
2883 clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); 2881 long n;
2884 atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); 2882
2883 ubifs_destroy_tnc_subtree(c->zroot.znode);
2884 n = atomic_long_read(&c->clean_zn_cnt);
2885 atomic_long_sub(n, &ubifs_clean_zn_cnt);
2885 } 2886 }
2886 kfree(c->gap_lebs); 2887 kfree(c->gap_lebs);
2887 kfree(c->ilebs); 2888 kfree(c->ilebs);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a70d7b4ffb25..f79983d6f860 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -722,12 +722,14 @@ struct ubifs_bud {
722 * struct ubifs_jhead - journal head. 722 * struct ubifs_jhead - journal head.
723 * @wbuf: head's write-buffer 723 * @wbuf: head's write-buffer
724 * @buds_list: list of bud LEBs belonging to this journal head 724 * @buds_list: list of bud LEBs belonging to this journal head
725 * @grouped: non-zero if UBIFS groups nodes when writing to this journal head
725 * 726 *
726 * Note, the @buds list is protected by the @c->buds_lock. 727 * Note, the @buds list is protected by the @c->buds_lock.
727 */ 728 */
728struct ubifs_jhead { 729struct ubifs_jhead {
729 struct ubifs_wbuf wbuf; 730 struct ubifs_wbuf wbuf;
730 struct list_head buds_list; 731 struct list_head buds_list;
732 unsigned int grouped:1;
731}; 733};
732 734
733/** 735/**
@@ -1742,7 +1744,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
1742int ubifs_recover_master_node(struct ubifs_info *c); 1744int ubifs_recover_master_node(struct ubifs_info *c);
1743int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); 1745int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
1744struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 1746struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
1745 int offs, void *sbuf, int grouped); 1747 int offs, void *sbuf, int jhead);
1746struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, 1748struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
1747 int offs, void *sbuf); 1749 int offs, void *sbuf);
1748int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); 1750int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);