aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c260
1 files changed, 199 insertions, 61 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 81a313874ae5..a6b83744b05d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -16,7 +16,6 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/version.h>
20#include <linux/fs.h> 19#include <linux/fs.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/scatterlist.h> 21#include <linux/scatterlist.h>
@@ -39,6 +38,7 @@
39#include "locking.h" 38#include "locking.h"
40#include "ref-cache.h" 39#include "ref-cache.h"
41#include "tree-log.h" 40#include "tree-log.h"
41#include "free-space-cache.h"
42 42
43static struct extent_io_ops btree_extent_io_ops; 43static struct extent_io_ops btree_extent_io_ops;
44static void end_workqueue_fn(struct btrfs_work *work); 44static void end_workqueue_fn(struct btrfs_work *work);
@@ -76,6 +76,40 @@ struct async_submit_bio {
76 struct btrfs_work work; 76 struct btrfs_work work;
77}; 77};
78 78
79/* These are used to set the lockdep class on the extent buffer locks.
80 * The class is set by the readpage_end_io_hook after the buffer has
81 * passed csum validation but before the pages are unlocked.
82 *
83 * The lockdep class is also set by btrfs_init_new_buffer on freshly
84 * allocated blocks.
85 *
86 * The class is based on the level in the tree block, which allows lockdep
87 * to know that lower nodes nest inside the locks of higher nodes.
88 *
89 * We also add a check to make sure the highest level of the tree is
90 * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
91 * code needs update as well.
92 */
93#ifdef CONFIG_DEBUG_LOCK_ALLOC
94# if BTRFS_MAX_LEVEL != 8
95# error
96# endif
97static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
98static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
99 /* leaf */
100 "btrfs-extent-00",
101 "btrfs-extent-01",
102 "btrfs-extent-02",
103 "btrfs-extent-03",
104 "btrfs-extent-04",
105 "btrfs-extent-05",
106 "btrfs-extent-06",
107 "btrfs-extent-07",
108 /* highest possible level */
109 "btrfs-extent-08",
110};
111#endif
112
79/* 113/*
80 * extents on the btree inode are pretty simple, there's one extent 114 * extents on the btree inode are pretty simple, there's one extent
81 * that covers the entire device 115 * that covers the entire device
@@ -348,6 +382,15 @@ static int check_tree_block_fsid(struct btrfs_root *root,
348 return ret; 382 return ret;
349} 383}
350 384
385#ifdef CONFIG_DEBUG_LOCK_ALLOC
386void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
387{
388 lockdep_set_class_and_name(&eb->lock,
389 &btrfs_eb_class[level],
390 btrfs_eb_name[level]);
391}
392#endif
393
351static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 394static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
352 struct extent_state *state) 395 struct extent_state *state)
353{ 396{
@@ -393,6 +436,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
393 } 436 }
394 found_level = btrfs_header_level(eb); 437 found_level = btrfs_header_level(eb);
395 438
439 btrfs_set_buffer_lockdep_class(eb, found_level);
440
396 ret = csum_tree_block(root, eb, 1); 441 ret = csum_tree_block(root, eb, 1);
397 if (ret) 442 if (ret)
398 ret = -EIO; 443 ret = -EIO;
@@ -534,6 +579,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
534 async->bio_flags = bio_flags; 579 async->bio_flags = bio_flags;
535 580
536 atomic_inc(&fs_info->nr_async_submits); 581 atomic_inc(&fs_info->nr_async_submits);
582
583 if (rw & (1 << BIO_RW_SYNCIO))
584 btrfs_set_work_high_prio(&async->work);
585
537 btrfs_queue_worker(&fs_info->workers, &async->work); 586 btrfs_queue_worker(&fs_info->workers, &async->work);
538#if 0 587#if 0
539 int limit = btrfs_async_submit_limit(fs_info); 588 int limit = btrfs_async_submit_limit(fs_info);
@@ -611,6 +660,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
611 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 660 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
612 mirror_num, 0); 661 mirror_num, 0);
613 } 662 }
663
614 /* 664 /*
615 * kthread helpers are used to submit writes so that checksumming 665 * kthread helpers are used to submit writes so that checksumming
616 * can happen in parallel across all CPUs 666 * can happen in parallel across all CPUs
@@ -624,14 +674,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
624static int btree_writepage(struct page *page, struct writeback_control *wbc) 674static int btree_writepage(struct page *page, struct writeback_control *wbc)
625{ 675{
626 struct extent_io_tree *tree; 676 struct extent_io_tree *tree;
677 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
678 struct extent_buffer *eb;
679 int was_dirty;
680
627 tree = &BTRFS_I(page->mapping->host)->io_tree; 681 tree = &BTRFS_I(page->mapping->host)->io_tree;
682 if (!(current->flags & PF_MEMALLOC)) {
683 return extent_write_full_page(tree, page,
684 btree_get_extent, wbc);
685 }
628 686
629 if (current->flags & PF_MEMALLOC) { 687 redirty_page_for_writepage(wbc, page);
630 redirty_page_for_writepage(wbc, page); 688 eb = btrfs_find_tree_block(root, page_offset(page),
631 unlock_page(page); 689 PAGE_CACHE_SIZE);
632 return 0; 690 WARN_ON(!eb);
691
692 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
693 if (!was_dirty) {
694 spin_lock(&root->fs_info->delalloc_lock);
695 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
696 spin_unlock(&root->fs_info->delalloc_lock);
633 } 697 }
634 return extent_write_full_page(tree, page, btree_get_extent, wbc); 698 free_extent_buffer(eb);
699
700 unlock_page(page);
701 return 0;
635} 702}
636 703
637static int btree_writepages(struct address_space *mapping, 704static int btree_writepages(struct address_space *mapping,
@@ -640,15 +707,15 @@ static int btree_writepages(struct address_space *mapping,
640 struct extent_io_tree *tree; 707 struct extent_io_tree *tree;
641 tree = &BTRFS_I(mapping->host)->io_tree; 708 tree = &BTRFS_I(mapping->host)->io_tree;
642 if (wbc->sync_mode == WB_SYNC_NONE) { 709 if (wbc->sync_mode == WB_SYNC_NONE) {
710 struct btrfs_root *root = BTRFS_I(mapping->host)->root;
643 u64 num_dirty; 711 u64 num_dirty;
644 u64 start = 0;
645 unsigned long thresh = 32 * 1024 * 1024; 712 unsigned long thresh = 32 * 1024 * 1024;
646 713
647 if (wbc->for_kupdate) 714 if (wbc->for_kupdate)
648 return 0; 715 return 0;
649 716
650 num_dirty = count_range_bits(tree, &start, (u64)-1, 717 /* this is a bit racy, but that's ok */
651 thresh, EXTENT_DIRTY); 718 num_dirty = root->fs_info->dirty_metadata_bytes;
652 if (num_dirty < thresh) 719 if (num_dirty < thresh)
653 return 0; 720 return 0;
654 } 721 }
@@ -800,7 +867,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
800 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 867 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
801 868
802 if (ret == 0) 869 if (ret == 0)
803 buf->flags |= EXTENT_UPTODATE; 870 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
804 else 871 else
805 WARN_ON(1); 872 WARN_ON(1);
806 return buf; 873 return buf;
@@ -813,7 +880,19 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
813 struct inode *btree_inode = root->fs_info->btree_inode; 880 struct inode *btree_inode = root->fs_info->btree_inode;
814 if (btrfs_header_generation(buf) == 881 if (btrfs_header_generation(buf) ==
815 root->fs_info->running_transaction->transid) { 882 root->fs_info->running_transaction->transid) {
816 WARN_ON(!btrfs_tree_locked(buf)); 883 btrfs_assert_tree_locked(buf);
884
885 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
886 spin_lock(&root->fs_info->delalloc_lock);
887 if (root->fs_info->dirty_metadata_bytes >= buf->len)
888 root->fs_info->dirty_metadata_bytes -= buf->len;
889 else
890 WARN_ON(1);
891 spin_unlock(&root->fs_info->delalloc_lock);
892 }
893
894 /* ugh, clear_extent_buffer_dirty needs to lock the page */
895 btrfs_set_lock_blocking(buf);
817 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 896 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
818 buf); 897 buf);
819 } 898 }
@@ -850,6 +929,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
850 spin_lock_init(&root->list_lock); 929 spin_lock_init(&root->list_lock);
851 mutex_init(&root->objectid_mutex); 930 mutex_init(&root->objectid_mutex);
852 mutex_init(&root->log_mutex); 931 mutex_init(&root->log_mutex);
932 init_waitqueue_head(&root->log_writer_wait);
933 init_waitqueue_head(&root->log_commit_wait[0]);
934 init_waitqueue_head(&root->log_commit_wait[1]);
935 atomic_set(&root->log_commit[0], 0);
936 atomic_set(&root->log_commit[1], 0);
937 atomic_set(&root->log_writers, 0);
938 root->log_batch = 0;
939 root->log_transid = 0;
853 extent_io_tree_init(&root->dirty_log_pages, 940 extent_io_tree_init(&root->dirty_log_pages,
854 fs_info->btree_inode->i_mapping, GFP_NOFS); 941 fs_info->btree_inode->i_mapping, GFP_NOFS);
855 942
@@ -934,15 +1021,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
934 return 0; 1021 return 0;
935} 1022}
936 1023
937int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, 1024static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
938 struct btrfs_fs_info *fs_info) 1025 struct btrfs_fs_info *fs_info)
939{ 1026{
940 struct btrfs_root *root; 1027 struct btrfs_root *root;
941 struct btrfs_root *tree_root = fs_info->tree_root; 1028 struct btrfs_root *tree_root = fs_info->tree_root;
1029 struct extent_buffer *leaf;
942 1030
943 root = kzalloc(sizeof(*root), GFP_NOFS); 1031 root = kzalloc(sizeof(*root), GFP_NOFS);
944 if (!root) 1032 if (!root)
945 return -ENOMEM; 1033 return ERR_PTR(-ENOMEM);
946 1034
947 __setup_root(tree_root->nodesize, tree_root->leafsize, 1035 __setup_root(tree_root->nodesize, tree_root->leafsize,
948 tree_root->sectorsize, tree_root->stripesize, 1036 tree_root->sectorsize, tree_root->stripesize,
@@ -951,12 +1039,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
951 root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; 1039 root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
952 root->root_key.type = BTRFS_ROOT_ITEM_KEY; 1040 root->root_key.type = BTRFS_ROOT_ITEM_KEY;
953 root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; 1041 root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
1042 /*
1043 * log trees do not get reference counted because they go away
1044 * before a real commit is actually done. They do store pointers
1045 * to file data extents, and those reference counts still get
1046 * updated (along with back refs to the log tree).
1047 */
954 root->ref_cows = 0; 1048 root->ref_cows = 0;
955 1049
956 root->node = btrfs_alloc_free_block(trans, root, root->leafsize, 1050 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
957 0, BTRFS_TREE_LOG_OBJECTID, 1051 0, BTRFS_TREE_LOG_OBJECTID,
958 trans->transid, 0, 0, 0); 1052 trans->transid, 0, 0, 0);
1053 if (IS_ERR(leaf)) {
1054 kfree(root);
1055 return ERR_CAST(leaf);
1056 }
959 1057
1058 root->node = leaf;
960 btrfs_set_header_nritems(root->node, 0); 1059 btrfs_set_header_nritems(root->node, 0);
961 btrfs_set_header_level(root->node, 0); 1060 btrfs_set_header_level(root->node, 0);
962 btrfs_set_header_bytenr(root->node, root->node->start); 1061 btrfs_set_header_bytenr(root->node, root->node->start);
@@ -968,7 +1067,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
968 BTRFS_FSID_SIZE); 1067 BTRFS_FSID_SIZE);
969 btrfs_mark_buffer_dirty(root->node); 1068 btrfs_mark_buffer_dirty(root->node);
970 btrfs_tree_unlock(root->node); 1069 btrfs_tree_unlock(root->node);
971 fs_info->log_root_tree = root; 1070 return root;
1071}
1072
1073int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
1074 struct btrfs_fs_info *fs_info)
1075{
1076 struct btrfs_root *log_root;
1077
1078 log_root = alloc_log_tree(trans, fs_info);
1079 if (IS_ERR(log_root))
1080 return PTR_ERR(log_root);
1081 WARN_ON(fs_info->log_root_tree);
1082 fs_info->log_root_tree = log_root;
1083 return 0;
1084}
1085
1086int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
1087 struct btrfs_root *root)
1088{
1089 struct btrfs_root *log_root;
1090 struct btrfs_inode_item *inode_item;
1091
1092 log_root = alloc_log_tree(trans, root->fs_info);
1093 if (IS_ERR(log_root))
1094 return PTR_ERR(log_root);
1095
1096 log_root->last_trans = trans->transid;
1097 log_root->root_key.offset = root->root_key.objectid;
1098
1099 inode_item = &log_root->root_item.inode;
1100 inode_item->generation = cpu_to_le64(1);
1101 inode_item->size = cpu_to_le64(3);
1102 inode_item->nlink = cpu_to_le32(1);
1103 inode_item->nbytes = cpu_to_le64(root->leafsize);
1104 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
1105
1106 btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
1107 btrfs_set_root_generation(&log_root->root_item, trans->transid);
1108
1109 WARN_ON(root->log_root);
1110 root->log_root = log_root;
1111 root->log_transid = 0;
972 return 0; 1112 return 0;
973} 1113}
974 1114
@@ -1136,7 +1276,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1136{ 1276{
1137 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; 1277 struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
1138 int ret = 0; 1278 int ret = 0;
1139 struct list_head *cur;
1140 struct btrfs_device *device; 1279 struct btrfs_device *device;
1141 struct backing_dev_info *bdi; 1280 struct backing_dev_info *bdi;
1142#if 0 1281#if 0
@@ -1144,8 +1283,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1144 btrfs_congested_async(info, 0)) 1283 btrfs_congested_async(info, 0))
1145 return 1; 1284 return 1;
1146#endif 1285#endif
1147 list_for_each(cur, &info->fs_devices->devices) { 1286 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1148 device = list_entry(cur, struct btrfs_device, dev_list);
1149 if (!device->bdev) 1287 if (!device->bdev)
1150 continue; 1288 continue;
1151 bdi = blk_get_backing_dev_info(device->bdev); 1289 bdi = blk_get_backing_dev_info(device->bdev);
@@ -1163,13 +1301,11 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1163 */ 1301 */
1164static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 1302static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1165{ 1303{
1166 struct list_head *cur;
1167 struct btrfs_device *device; 1304 struct btrfs_device *device;
1168 struct btrfs_fs_info *info; 1305 struct btrfs_fs_info *info;
1169 1306
1170 info = (struct btrfs_fs_info *)bdi->unplug_io_data; 1307 info = (struct btrfs_fs_info *)bdi->unplug_io_data;
1171 list_for_each(cur, &info->fs_devices->devices) { 1308 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1172 device = list_entry(cur, struct btrfs_device, dev_list);
1173 if (!device->bdev) 1309 if (!device->bdev)
1174 continue; 1310 continue;
1175 1311
@@ -1282,8 +1418,6 @@ static int bio_ready_for_csum(struct bio *bio)
1282 1418
1283 ret = extent_range_uptodate(io_tree, start + length, 1419 ret = extent_range_uptodate(io_tree, start + length,
1284 start + buf_len - 1); 1420 start + buf_len - 1);
1285 if (ret == 1)
1286 return ret;
1287 return ret; 1421 return ret;
1288} 1422}
1289 1423
@@ -1366,12 +1500,6 @@ static int transaction_kthread(void *arg)
1366 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1500 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1367 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1501 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1368 1502
1369 if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
1370 printk(KERN_INFO "btrfs: total reference cache "
1371 "size %llu\n",
1372 root->fs_info->total_ref_cache_size);
1373 }
1374
1375 mutex_lock(&root->fs_info->trans_mutex); 1503 mutex_lock(&root->fs_info->trans_mutex);
1376 cur = root->fs_info->running_transaction; 1504 cur = root->fs_info->running_transaction;
1377 if (!cur) { 1505 if (!cur) {
@@ -1388,6 +1516,7 @@ static int transaction_kthread(void *arg)
1388 mutex_unlock(&root->fs_info->trans_mutex); 1516 mutex_unlock(&root->fs_info->trans_mutex);
1389 trans = btrfs_start_transaction(root, 1); 1517 trans = btrfs_start_transaction(root, 1);
1390 ret = btrfs_commit_transaction(trans, root); 1518 ret = btrfs_commit_transaction(trans, root);
1519
1391sleep: 1520sleep:
1392 wake_up_process(root->fs_info->cleaner_kthread); 1521 wake_up_process(root->fs_info->cleaner_kthread);
1393 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1522 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -1447,7 +1576,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1447 INIT_LIST_HEAD(&fs_info->dead_roots); 1576 INIT_LIST_HEAD(&fs_info->dead_roots);
1448 INIT_LIST_HEAD(&fs_info->hashers); 1577 INIT_LIST_HEAD(&fs_info->hashers);
1449 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1578 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1450 spin_lock_init(&fs_info->hash_lock); 1579 INIT_LIST_HEAD(&fs_info->ordered_operations);
1451 spin_lock_init(&fs_info->delalloc_lock); 1580 spin_lock_init(&fs_info->delalloc_lock);
1452 spin_lock_init(&fs_info->new_trans_lock); 1581 spin_lock_init(&fs_info->new_trans_lock);
1453 spin_lock_init(&fs_info->ref_cache_lock); 1582 spin_lock_init(&fs_info->ref_cache_lock);
@@ -1507,10 +1636,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1507 1636
1508 extent_io_tree_init(&fs_info->pinned_extents, 1637 extent_io_tree_init(&fs_info->pinned_extents,
1509 fs_info->btree_inode->i_mapping, GFP_NOFS); 1638 fs_info->btree_inode->i_mapping, GFP_NOFS);
1510 extent_io_tree_init(&fs_info->pending_del,
1511 fs_info->btree_inode->i_mapping, GFP_NOFS);
1512 extent_io_tree_init(&fs_info->extent_ins,
1513 fs_info->btree_inode->i_mapping, GFP_NOFS);
1514 fs_info->do_barriers = 1; 1639 fs_info->do_barriers = 1;
1515 1640
1516 INIT_LIST_HEAD(&fs_info->dead_reloc_roots); 1641 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
@@ -1523,22 +1648,21 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1523 insert_inode_hash(fs_info->btree_inode); 1648 insert_inode_hash(fs_info->btree_inode);
1524 1649
1525 mutex_init(&fs_info->trans_mutex); 1650 mutex_init(&fs_info->trans_mutex);
1651 mutex_init(&fs_info->ordered_operations_mutex);
1526 mutex_init(&fs_info->tree_log_mutex); 1652 mutex_init(&fs_info->tree_log_mutex);
1527 mutex_init(&fs_info->drop_mutex); 1653 mutex_init(&fs_info->drop_mutex);
1528 mutex_init(&fs_info->extent_ins_mutex);
1529 mutex_init(&fs_info->pinned_mutex);
1530 mutex_init(&fs_info->chunk_mutex); 1654 mutex_init(&fs_info->chunk_mutex);
1531 mutex_init(&fs_info->transaction_kthread_mutex); 1655 mutex_init(&fs_info->transaction_kthread_mutex);
1532 mutex_init(&fs_info->cleaner_mutex); 1656 mutex_init(&fs_info->cleaner_mutex);
1533 mutex_init(&fs_info->volume_mutex); 1657 mutex_init(&fs_info->volume_mutex);
1534 mutex_init(&fs_info->tree_reloc_mutex); 1658 mutex_init(&fs_info->tree_reloc_mutex);
1659
1660 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
1661 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
1662
1535 init_waitqueue_head(&fs_info->transaction_throttle); 1663 init_waitqueue_head(&fs_info->transaction_throttle);
1536 init_waitqueue_head(&fs_info->transaction_wait); 1664 init_waitqueue_head(&fs_info->transaction_wait);
1537 init_waitqueue_head(&fs_info->async_submit_wait); 1665 init_waitqueue_head(&fs_info->async_submit_wait);
1538 init_waitqueue_head(&fs_info->tree_log_wait);
1539 atomic_set(&fs_info->tree_log_commit, 0);
1540 atomic_set(&fs_info->tree_log_writers, 0);
1541 fs_info->tree_log_transid = 0;
1542 1666
1543 __setup_root(4096, 4096, 4096, 4096, tree_root, 1667 __setup_root(4096, 4096, 4096, 4096, tree_root,
1544 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1668 fs_info, BTRFS_ROOT_TREE_OBJECTID);
@@ -1627,6 +1751,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1627 * low idle thresh 1751 * low idle thresh
1628 */ 1752 */
1629 fs_info->endio_workers.idle_thresh = 4; 1753 fs_info->endio_workers.idle_thresh = 4;
1754 fs_info->endio_meta_workers.idle_thresh = 4;
1755
1630 fs_info->endio_write_workers.idle_thresh = 64; 1756 fs_info->endio_write_workers.idle_thresh = 64;
1631 fs_info->endio_meta_write_workers.idle_thresh = 64; 1757 fs_info->endio_meta_write_workers.idle_thresh = 64;
1632 1758
@@ -1720,7 +1846,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1720 ret = find_and_setup_root(tree_root, fs_info, 1846 ret = find_and_setup_root(tree_root, fs_info,
1721 BTRFS_DEV_TREE_OBJECTID, dev_root); 1847 BTRFS_DEV_TREE_OBJECTID, dev_root);
1722 dev_root->track_dirty = 1; 1848 dev_root->track_dirty = 1;
1723
1724 if (ret) 1849 if (ret)
1725 goto fail_extent_root; 1850 goto fail_extent_root;
1726 1851
@@ -1740,13 +1865,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1740 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; 1865 fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
1741 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 1866 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
1742 "btrfs-cleaner"); 1867 "btrfs-cleaner");
1743 if (!fs_info->cleaner_kthread) 1868 if (IS_ERR(fs_info->cleaner_kthread))
1744 goto fail_csum_root; 1869 goto fail_csum_root;
1745 1870
1746 fs_info->transaction_kthread = kthread_run(transaction_kthread, 1871 fs_info->transaction_kthread = kthread_run(transaction_kthread,
1747 tree_root, 1872 tree_root,
1748 "btrfs-transaction"); 1873 "btrfs-transaction");
1749 if (!fs_info->transaction_kthread) 1874 if (IS_ERR(fs_info->transaction_kthread))
1750 goto fail_cleaner; 1875 goto fail_cleaner;
1751 1876
1752 if (btrfs_super_log_root(disk_super) != 0) { 1877 if (btrfs_super_log_root(disk_super) != 0) {
@@ -1828,13 +1953,14 @@ fail_sb_buffer:
1828fail_iput: 1953fail_iput:
1829 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 1954 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
1830 iput(fs_info->btree_inode); 1955 iput(fs_info->btree_inode);
1831fail: 1956
1832 btrfs_close_devices(fs_info->fs_devices); 1957 btrfs_close_devices(fs_info->fs_devices);
1833 btrfs_mapping_tree_free(&fs_info->mapping_tree); 1958 btrfs_mapping_tree_free(&fs_info->mapping_tree);
1959 bdi_destroy(&fs_info->bdi);
1834 1960
1961fail:
1835 kfree(extent_root); 1962 kfree(extent_root);
1836 kfree(tree_root); 1963 kfree(tree_root);
1837 bdi_destroy(&fs_info->bdi);
1838 kfree(fs_info); 1964 kfree(fs_info);
1839 kfree(chunk_root); 1965 kfree(chunk_root);
1840 kfree(dev_root); 1966 kfree(dev_root);
@@ -1974,10 +2100,10 @@ static int write_dev_supers(struct btrfs_device *device,
1974 device->barriers = 0; 2100 device->barriers = 0;
1975 get_bh(bh); 2101 get_bh(bh);
1976 lock_buffer(bh); 2102 lock_buffer(bh);
1977 ret = submit_bh(WRITE, bh); 2103 ret = submit_bh(WRITE_SYNC, bh);
1978 } 2104 }
1979 } else { 2105 } else {
1980 ret = submit_bh(WRITE, bh); 2106 ret = submit_bh(WRITE_SYNC, bh);
1981 } 2107 }
1982 2108
1983 if (!ret && wait) { 2109 if (!ret && wait) {
@@ -1995,7 +2121,6 @@ static int write_dev_supers(struct btrfs_device *device,
1995 2121
1996int write_all_supers(struct btrfs_root *root, int max_mirrors) 2122int write_all_supers(struct btrfs_root *root, int max_mirrors)
1997{ 2123{
1998 struct list_head *cur;
1999 struct list_head *head = &root->fs_info->fs_devices->devices; 2124 struct list_head *head = &root->fs_info->fs_devices->devices;
2000 struct btrfs_device *dev; 2125 struct btrfs_device *dev;
2001 struct btrfs_super_block *sb; 2126 struct btrfs_super_block *sb;
@@ -2011,8 +2136,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2011 2136
2012 sb = &root->fs_info->super_for_commit; 2137 sb = &root->fs_info->super_for_commit;
2013 dev_item = &sb->dev_item; 2138 dev_item = &sb->dev_item;
2014 list_for_each(cur, head) { 2139 list_for_each_entry(dev, head, dev_list) {
2015 dev = list_entry(cur, struct btrfs_device, dev_list);
2016 if (!dev->bdev) { 2140 if (!dev->bdev) {
2017 total_errors++; 2141 total_errors++;
2018 continue; 2142 continue;
@@ -2045,8 +2169,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2045 } 2169 }
2046 2170
2047 total_errors = 0; 2171 total_errors = 0;
2048 list_for_each(cur, head) { 2172 list_for_each_entry(dev, head, dev_list) {
2049 dev = list_entry(cur, struct btrfs_device, dev_list);
2050 if (!dev->bdev) 2173 if (!dev->bdev)
2051 continue; 2174 continue;
2052 if (!dev->in_fs_metadata || !dev->writeable) 2175 if (!dev->in_fs_metadata || !dev->writeable)
@@ -2259,8 +2382,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2259 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 2382 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
2260 u64 transid = btrfs_header_generation(buf); 2383 u64 transid = btrfs_header_generation(buf);
2261 struct inode *btree_inode = root->fs_info->btree_inode; 2384 struct inode *btree_inode = root->fs_info->btree_inode;
2385 int was_dirty;
2262 2386
2263 WARN_ON(!btrfs_tree_locked(buf)); 2387 btrfs_assert_tree_locked(buf);
2264 if (transid != root->fs_info->generation) { 2388 if (transid != root->fs_info->generation) {
2265 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " 2389 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
2266 "found %llu running %llu\n", 2390 "found %llu running %llu\n",
@@ -2269,7 +2393,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2269 (unsigned long long)root->fs_info->generation); 2393 (unsigned long long)root->fs_info->generation);
2270 WARN_ON(1); 2394 WARN_ON(1);
2271 } 2395 }
2272 set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); 2396 was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
2397 buf);
2398 if (!was_dirty) {
2399 spin_lock(&root->fs_info->delalloc_lock);
2400 root->fs_info->dirty_metadata_bytes += buf->len;
2401 spin_unlock(&root->fs_info->delalloc_lock);
2402 }
2273} 2403}
2274 2404
2275void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) 2405void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
@@ -2284,7 +2414,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2284 unsigned long thresh = 32 * 1024 * 1024; 2414 unsigned long thresh = 32 * 1024 * 1024;
2285 tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 2415 tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
2286 2416
2287 if (current_is_pdflush() || current->flags & PF_MEMALLOC) 2417 if (current->flags & PF_MEMALLOC)
2288 return; 2418 return;
2289 2419
2290 num_dirty = count_range_bits(tree, &start, (u64)-1, 2420 num_dirty = count_range_bits(tree, &start, (u64)-1,
@@ -2302,7 +2432,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
2302 int ret; 2432 int ret;
2303 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); 2433 ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
2304 if (ret == 0) 2434 if (ret == 0)
2305 buf->flags |= EXTENT_UPTODATE; 2435 set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags);
2306 return ret; 2436 return ret;
2307} 2437}
2308 2438
@@ -2324,9 +2454,17 @@ int btree_lock_page_hook(struct page *page)
2324 goto out; 2454 goto out;
2325 2455
2326 btrfs_tree_lock(eb); 2456 btrfs_tree_lock(eb);
2327 spin_lock(&root->fs_info->hash_lock);
2328 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 2457 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2329 spin_unlock(&root->fs_info->hash_lock); 2458
2459 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
2460 spin_lock(&root->fs_info->delalloc_lock);
2461 if (root->fs_info->dirty_metadata_bytes >= eb->len)
2462 root->fs_info->dirty_metadata_bytes -= eb->len;
2463 else
2464 WARN_ON(1);
2465 spin_unlock(&root->fs_info->delalloc_lock);
2466 }
2467
2330 btrfs_tree_unlock(eb); 2468 btrfs_tree_unlock(eb);
2331 free_extent_buffer(eb); 2469 free_extent_buffer(eb);
2332out: 2470out: