diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 260 |
1 files changed, 199 insertions, 61 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81a313874ae5..a6b83744b05d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -16,7 +16,6 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/version.h> | ||
20 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
21 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
22 | #include <linux/scatterlist.h> | 21 | #include <linux/scatterlist.h> |
@@ -39,6 +38,7 @@ | |||
39 | #include "locking.h" | 38 | #include "locking.h" |
40 | #include "ref-cache.h" | 39 | #include "ref-cache.h" |
41 | #include "tree-log.h" | 40 | #include "tree-log.h" |
41 | #include "free-space-cache.h" | ||
42 | 42 | ||
43 | static struct extent_io_ops btree_extent_io_ops; | 43 | static struct extent_io_ops btree_extent_io_ops; |
44 | static void end_workqueue_fn(struct btrfs_work *work); | 44 | static void end_workqueue_fn(struct btrfs_work *work); |
@@ -76,6 +76,40 @@ struct async_submit_bio { | |||
76 | struct btrfs_work work; | 76 | struct btrfs_work work; |
77 | }; | 77 | }; |
78 | 78 | ||
79 | /* These are used to set the lockdep class on the extent buffer locks. | ||
80 | * The class is set by the readpage_end_io_hook after the buffer has | ||
81 | * passed csum validation but before the pages are unlocked. | ||
82 | * | ||
83 | * The lockdep class is also set by btrfs_init_new_buffer on freshly | ||
84 | * allocated blocks. | ||
85 | * | ||
86 | * The class is based on the level in the tree block, which allows lockdep | ||
87 | * to know that lower nodes nest inside the locks of higher nodes. | ||
88 | * | ||
89 | * We also add a check to make sure the highest level of the tree is | ||
90 | * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this | ||
91 | * code needs update as well. | ||
92 | */ | ||
93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
94 | # if BTRFS_MAX_LEVEL != 8 | ||
95 | # error | ||
96 | # endif | ||
97 | static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; | ||
98 | static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { | ||
99 | /* leaf */ | ||
100 | "btrfs-extent-00", | ||
101 | "btrfs-extent-01", | ||
102 | "btrfs-extent-02", | ||
103 | "btrfs-extent-03", | ||
104 | "btrfs-extent-04", | ||
105 | "btrfs-extent-05", | ||
106 | "btrfs-extent-06", | ||
107 | "btrfs-extent-07", | ||
108 | /* highest possible level */ | ||
109 | "btrfs-extent-08", | ||
110 | }; | ||
111 | #endif | ||
112 | |||
79 | /* | 113 | /* |
80 | * extents on the btree inode are pretty simple, there's one extent | 114 | * extents on the btree inode are pretty simple, there's one extent |
81 | * that covers the entire device | 115 | * that covers the entire device |
@@ -348,6 +382,15 @@ static int check_tree_block_fsid(struct btrfs_root *root, | |||
348 | return ret; | 382 | return ret; |
349 | } | 383 | } |
350 | 384 | ||
385 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
386 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | ||
387 | { | ||
388 | lockdep_set_class_and_name(&eb->lock, | ||
389 | &btrfs_eb_class[level], | ||
390 | btrfs_eb_name[level]); | ||
391 | } | ||
392 | #endif | ||
393 | |||
351 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 394 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
352 | struct extent_state *state) | 395 | struct extent_state *state) |
353 | { | 396 | { |
@@ -393,6 +436,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
393 | } | 436 | } |
394 | found_level = btrfs_header_level(eb); | 437 | found_level = btrfs_header_level(eb); |
395 | 438 | ||
439 | btrfs_set_buffer_lockdep_class(eb, found_level); | ||
440 | |||
396 | ret = csum_tree_block(root, eb, 1); | 441 | ret = csum_tree_block(root, eb, 1); |
397 | if (ret) | 442 | if (ret) |
398 | ret = -EIO; | 443 | ret = -EIO; |
@@ -534,6 +579,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
534 | async->bio_flags = bio_flags; | 579 | async->bio_flags = bio_flags; |
535 | 580 | ||
536 | atomic_inc(&fs_info->nr_async_submits); | 581 | atomic_inc(&fs_info->nr_async_submits); |
582 | |||
583 | if (rw & (1 << BIO_RW_SYNCIO)) | ||
584 | btrfs_set_work_high_prio(&async->work); | ||
585 | |||
537 | btrfs_queue_worker(&fs_info->workers, &async->work); | 586 | btrfs_queue_worker(&fs_info->workers, &async->work); |
538 | #if 0 | 587 | #if 0 |
539 | int limit = btrfs_async_submit_limit(fs_info); | 588 | int limit = btrfs_async_submit_limit(fs_info); |
@@ -611,6 +660,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
611 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 660 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
612 | mirror_num, 0); | 661 | mirror_num, 0); |
613 | } | 662 | } |
663 | |||
614 | /* | 664 | /* |
615 | * kthread helpers are used to submit writes so that checksumming | 665 | * kthread helpers are used to submit writes so that checksumming |
616 | * can happen in parallel across all CPUs | 666 | * can happen in parallel across all CPUs |
@@ -624,14 +674,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
624 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 674 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
625 | { | 675 | { |
626 | struct extent_io_tree *tree; | 676 | struct extent_io_tree *tree; |
677 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
678 | struct extent_buffer *eb; | ||
679 | int was_dirty; | ||
680 | |||
627 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 681 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
682 | if (!(current->flags & PF_MEMALLOC)) { | ||
683 | return extent_write_full_page(tree, page, | ||
684 | btree_get_extent, wbc); | ||
685 | } | ||
628 | 686 | ||
629 | if (current->flags & PF_MEMALLOC) { | 687 | redirty_page_for_writepage(wbc, page); |
630 | redirty_page_for_writepage(wbc, page); | 688 | eb = btrfs_find_tree_block(root, page_offset(page), |
631 | unlock_page(page); | 689 | PAGE_CACHE_SIZE); |
632 | return 0; | 690 | WARN_ON(!eb); |
691 | |||
692 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
693 | if (!was_dirty) { | ||
694 | spin_lock(&root->fs_info->delalloc_lock); | ||
695 | root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; | ||
696 | spin_unlock(&root->fs_info->delalloc_lock); | ||
633 | } | 697 | } |
634 | return extent_write_full_page(tree, page, btree_get_extent, wbc); | 698 | free_extent_buffer(eb); |
699 | |||
700 | unlock_page(page); | ||
701 | return 0; | ||
635 | } | 702 | } |
636 | 703 | ||
637 | static int btree_writepages(struct address_space *mapping, | 704 | static int btree_writepages(struct address_space *mapping, |
@@ -640,15 +707,15 @@ static int btree_writepages(struct address_space *mapping, | |||
640 | struct extent_io_tree *tree; | 707 | struct extent_io_tree *tree; |
641 | tree = &BTRFS_I(mapping->host)->io_tree; | 708 | tree = &BTRFS_I(mapping->host)->io_tree; |
642 | if (wbc->sync_mode == WB_SYNC_NONE) { | 709 | if (wbc->sync_mode == WB_SYNC_NONE) { |
710 | struct btrfs_root *root = BTRFS_I(mapping->host)->root; | ||
643 | u64 num_dirty; | 711 | u64 num_dirty; |
644 | u64 start = 0; | ||
645 | unsigned long thresh = 32 * 1024 * 1024; | 712 | unsigned long thresh = 32 * 1024 * 1024; |
646 | 713 | ||
647 | if (wbc->for_kupdate) | 714 | if (wbc->for_kupdate) |
648 | return 0; | 715 | return 0; |
649 | 716 | ||
650 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 717 | /* this is a bit racy, but that's ok */ |
651 | thresh, EXTENT_DIRTY); | 718 | num_dirty = root->fs_info->dirty_metadata_bytes; |
652 | if (num_dirty < thresh) | 719 | if (num_dirty < thresh) |
653 | return 0; | 720 | return 0; |
654 | } | 721 | } |
@@ -800,7 +867,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
800 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 867 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
801 | 868 | ||
802 | if (ret == 0) | 869 | if (ret == 0) |
803 | buf->flags |= EXTENT_UPTODATE; | 870 | set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); |
804 | else | 871 | else |
805 | WARN_ON(1); | 872 | WARN_ON(1); |
806 | return buf; | 873 | return buf; |
@@ -813,7 +880,19 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
813 | struct inode *btree_inode = root->fs_info->btree_inode; | 880 | struct inode *btree_inode = root->fs_info->btree_inode; |
814 | if (btrfs_header_generation(buf) == | 881 | if (btrfs_header_generation(buf) == |
815 | root->fs_info->running_transaction->transid) { | 882 | root->fs_info->running_transaction->transid) { |
816 | WARN_ON(!btrfs_tree_locked(buf)); | 883 | btrfs_assert_tree_locked(buf); |
884 | |||
885 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { | ||
886 | spin_lock(&root->fs_info->delalloc_lock); | ||
887 | if (root->fs_info->dirty_metadata_bytes >= buf->len) | ||
888 | root->fs_info->dirty_metadata_bytes -= buf->len; | ||
889 | else | ||
890 | WARN_ON(1); | ||
891 | spin_unlock(&root->fs_info->delalloc_lock); | ||
892 | } | ||
893 | |||
894 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | ||
895 | btrfs_set_lock_blocking(buf); | ||
817 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | 896 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
818 | buf); | 897 | buf); |
819 | } | 898 | } |
@@ -850,6 +929,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
850 | spin_lock_init(&root->list_lock); | 929 | spin_lock_init(&root->list_lock); |
851 | mutex_init(&root->objectid_mutex); | 930 | mutex_init(&root->objectid_mutex); |
852 | mutex_init(&root->log_mutex); | 931 | mutex_init(&root->log_mutex); |
932 | init_waitqueue_head(&root->log_writer_wait); | ||
933 | init_waitqueue_head(&root->log_commit_wait[0]); | ||
934 | init_waitqueue_head(&root->log_commit_wait[1]); | ||
935 | atomic_set(&root->log_commit[0], 0); | ||
936 | atomic_set(&root->log_commit[1], 0); | ||
937 | atomic_set(&root->log_writers, 0); | ||
938 | root->log_batch = 0; | ||
939 | root->log_transid = 0; | ||
853 | extent_io_tree_init(&root->dirty_log_pages, | 940 | extent_io_tree_init(&root->dirty_log_pages, |
854 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 941 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
855 | 942 | ||
@@ -934,15 +1021,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | |||
934 | return 0; | 1021 | return 0; |
935 | } | 1022 | } |
936 | 1023 | ||
937 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | 1024 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
938 | struct btrfs_fs_info *fs_info) | 1025 | struct btrfs_fs_info *fs_info) |
939 | { | 1026 | { |
940 | struct btrfs_root *root; | 1027 | struct btrfs_root *root; |
941 | struct btrfs_root *tree_root = fs_info->tree_root; | 1028 | struct btrfs_root *tree_root = fs_info->tree_root; |
1029 | struct extent_buffer *leaf; | ||
942 | 1030 | ||
943 | root = kzalloc(sizeof(*root), GFP_NOFS); | 1031 | root = kzalloc(sizeof(*root), GFP_NOFS); |
944 | if (!root) | 1032 | if (!root) |
945 | return -ENOMEM; | 1033 | return ERR_PTR(-ENOMEM); |
946 | 1034 | ||
947 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1035 | __setup_root(tree_root->nodesize, tree_root->leafsize, |
948 | tree_root->sectorsize, tree_root->stripesize, | 1036 | tree_root->sectorsize, tree_root->stripesize, |
@@ -951,12 +1039,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | |||
951 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; | 1039 | root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; |
952 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; | 1040 | root->root_key.type = BTRFS_ROOT_ITEM_KEY; |
953 | root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; | 1041 | root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; |
1042 | /* | ||
1043 | * log trees do not get reference counted because they go away | ||
1044 | * before a real commit is actually done. They do store pointers | ||
1045 | * to file data extents, and those reference counts still get | ||
1046 | * updated (along with back refs to the log tree). | ||
1047 | */ | ||
954 | root->ref_cows = 0; | 1048 | root->ref_cows = 0; |
955 | 1049 | ||
956 | root->node = btrfs_alloc_free_block(trans, root, root->leafsize, | 1050 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
957 | 0, BTRFS_TREE_LOG_OBJECTID, | 1051 | 0, BTRFS_TREE_LOG_OBJECTID, |
958 | trans->transid, 0, 0, 0); | 1052 | trans->transid, 0, 0, 0); |
1053 | if (IS_ERR(leaf)) { | ||
1054 | kfree(root); | ||
1055 | return ERR_CAST(leaf); | ||
1056 | } | ||
959 | 1057 | ||
1058 | root->node = leaf; | ||
960 | btrfs_set_header_nritems(root->node, 0); | 1059 | btrfs_set_header_nritems(root->node, 0); |
961 | btrfs_set_header_level(root->node, 0); | 1060 | btrfs_set_header_level(root->node, 0); |
962 | btrfs_set_header_bytenr(root->node, root->node->start); | 1061 | btrfs_set_header_bytenr(root->node, root->node->start); |
@@ -968,7 +1067,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | |||
968 | BTRFS_FSID_SIZE); | 1067 | BTRFS_FSID_SIZE); |
969 | btrfs_mark_buffer_dirty(root->node); | 1068 | btrfs_mark_buffer_dirty(root->node); |
970 | btrfs_tree_unlock(root->node); | 1069 | btrfs_tree_unlock(root->node); |
971 | fs_info->log_root_tree = root; | 1070 | return root; |
1071 | } | ||
1072 | |||
1073 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | ||
1074 | struct btrfs_fs_info *fs_info) | ||
1075 | { | ||
1076 | struct btrfs_root *log_root; | ||
1077 | |||
1078 | log_root = alloc_log_tree(trans, fs_info); | ||
1079 | if (IS_ERR(log_root)) | ||
1080 | return PTR_ERR(log_root); | ||
1081 | WARN_ON(fs_info->log_root_tree); | ||
1082 | fs_info->log_root_tree = log_root; | ||
1083 | return 0; | ||
1084 | } | ||
1085 | |||
1086 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | ||
1087 | struct btrfs_root *root) | ||
1088 | { | ||
1089 | struct btrfs_root *log_root; | ||
1090 | struct btrfs_inode_item *inode_item; | ||
1091 | |||
1092 | log_root = alloc_log_tree(trans, root->fs_info); | ||
1093 | if (IS_ERR(log_root)) | ||
1094 | return PTR_ERR(log_root); | ||
1095 | |||
1096 | log_root->last_trans = trans->transid; | ||
1097 | log_root->root_key.offset = root->root_key.objectid; | ||
1098 | |||
1099 | inode_item = &log_root->root_item.inode; | ||
1100 | inode_item->generation = cpu_to_le64(1); | ||
1101 | inode_item->size = cpu_to_le64(3); | ||
1102 | inode_item->nlink = cpu_to_le32(1); | ||
1103 | inode_item->nbytes = cpu_to_le64(root->leafsize); | ||
1104 | inode_item->mode = cpu_to_le32(S_IFDIR | 0755); | ||
1105 | |||
1106 | btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); | ||
1107 | btrfs_set_root_generation(&log_root->root_item, trans->transid); | ||
1108 | |||
1109 | WARN_ON(root->log_root); | ||
1110 | root->log_root = log_root; | ||
1111 | root->log_transid = 0; | ||
972 | return 0; | 1112 | return 0; |
973 | } | 1113 | } |
974 | 1114 | ||
@@ -1136,7 +1276,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1136 | { | 1276 | { |
1137 | struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; | 1277 | struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; |
1138 | int ret = 0; | 1278 | int ret = 0; |
1139 | struct list_head *cur; | ||
1140 | struct btrfs_device *device; | 1279 | struct btrfs_device *device; |
1141 | struct backing_dev_info *bdi; | 1280 | struct backing_dev_info *bdi; |
1142 | #if 0 | 1281 | #if 0 |
@@ -1144,8 +1283,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1144 | btrfs_congested_async(info, 0)) | 1283 | btrfs_congested_async(info, 0)) |
1145 | return 1; | 1284 | return 1; |
1146 | #endif | 1285 | #endif |
1147 | list_for_each(cur, &info->fs_devices->devices) { | 1286 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { |
1148 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
1149 | if (!device->bdev) | 1287 | if (!device->bdev) |
1150 | continue; | 1288 | continue; |
1151 | bdi = blk_get_backing_dev_info(device->bdev); | 1289 | bdi = blk_get_backing_dev_info(device->bdev); |
@@ -1163,13 +1301,11 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1163 | */ | 1301 | */ |
1164 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | 1302 | static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) |
1165 | { | 1303 | { |
1166 | struct list_head *cur; | ||
1167 | struct btrfs_device *device; | 1304 | struct btrfs_device *device; |
1168 | struct btrfs_fs_info *info; | 1305 | struct btrfs_fs_info *info; |
1169 | 1306 | ||
1170 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; | 1307 | info = (struct btrfs_fs_info *)bdi->unplug_io_data; |
1171 | list_for_each(cur, &info->fs_devices->devices) { | 1308 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { |
1172 | device = list_entry(cur, struct btrfs_device, dev_list); | ||
1173 | if (!device->bdev) | 1309 | if (!device->bdev) |
1174 | continue; | 1310 | continue; |
1175 | 1311 | ||
@@ -1282,8 +1418,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1282 | 1418 | ||
1283 | ret = extent_range_uptodate(io_tree, start + length, | 1419 | ret = extent_range_uptodate(io_tree, start + length, |
1284 | start + buf_len - 1); | 1420 | start + buf_len - 1); |
1285 | if (ret == 1) | ||
1286 | return ret; | ||
1287 | return ret; | 1421 | return ret; |
1288 | } | 1422 | } |
1289 | 1423 | ||
@@ -1366,12 +1500,6 @@ static int transaction_kthread(void *arg) | |||
1366 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1500 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1367 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1501 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1368 | 1502 | ||
1369 | if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { | ||
1370 | printk(KERN_INFO "btrfs: total reference cache " | ||
1371 | "size %llu\n", | ||
1372 | root->fs_info->total_ref_cache_size); | ||
1373 | } | ||
1374 | |||
1375 | mutex_lock(&root->fs_info->trans_mutex); | 1503 | mutex_lock(&root->fs_info->trans_mutex); |
1376 | cur = root->fs_info->running_transaction; | 1504 | cur = root->fs_info->running_transaction; |
1377 | if (!cur) { | 1505 | if (!cur) { |
@@ -1388,6 +1516,7 @@ static int transaction_kthread(void *arg) | |||
1388 | mutex_unlock(&root->fs_info->trans_mutex); | 1516 | mutex_unlock(&root->fs_info->trans_mutex); |
1389 | trans = btrfs_start_transaction(root, 1); | 1517 | trans = btrfs_start_transaction(root, 1); |
1390 | ret = btrfs_commit_transaction(trans, root); | 1518 | ret = btrfs_commit_transaction(trans, root); |
1519 | |||
1391 | sleep: | 1520 | sleep: |
1392 | wake_up_process(root->fs_info->cleaner_kthread); | 1521 | wake_up_process(root->fs_info->cleaner_kthread); |
1393 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1522 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1447,7 +1576,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1447 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1576 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1448 | INIT_LIST_HEAD(&fs_info->hashers); | 1577 | INIT_LIST_HEAD(&fs_info->hashers); |
1449 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1578 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1450 | spin_lock_init(&fs_info->hash_lock); | 1579 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
1451 | spin_lock_init(&fs_info->delalloc_lock); | 1580 | spin_lock_init(&fs_info->delalloc_lock); |
1452 | spin_lock_init(&fs_info->new_trans_lock); | 1581 | spin_lock_init(&fs_info->new_trans_lock); |
1453 | spin_lock_init(&fs_info->ref_cache_lock); | 1582 | spin_lock_init(&fs_info->ref_cache_lock); |
@@ -1507,10 +1636,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1507 | 1636 | ||
1508 | extent_io_tree_init(&fs_info->pinned_extents, | 1637 | extent_io_tree_init(&fs_info->pinned_extents, |
1509 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1638 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1510 | extent_io_tree_init(&fs_info->pending_del, | ||
1511 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1512 | extent_io_tree_init(&fs_info->extent_ins, | ||
1513 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1514 | fs_info->do_barriers = 1; | 1639 | fs_info->do_barriers = 1; |
1515 | 1640 | ||
1516 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | 1641 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); |
@@ -1523,22 +1648,21 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1523 | insert_inode_hash(fs_info->btree_inode); | 1648 | insert_inode_hash(fs_info->btree_inode); |
1524 | 1649 | ||
1525 | mutex_init(&fs_info->trans_mutex); | 1650 | mutex_init(&fs_info->trans_mutex); |
1651 | mutex_init(&fs_info->ordered_operations_mutex); | ||
1526 | mutex_init(&fs_info->tree_log_mutex); | 1652 | mutex_init(&fs_info->tree_log_mutex); |
1527 | mutex_init(&fs_info->drop_mutex); | 1653 | mutex_init(&fs_info->drop_mutex); |
1528 | mutex_init(&fs_info->extent_ins_mutex); | ||
1529 | mutex_init(&fs_info->pinned_mutex); | ||
1530 | mutex_init(&fs_info->chunk_mutex); | 1654 | mutex_init(&fs_info->chunk_mutex); |
1531 | mutex_init(&fs_info->transaction_kthread_mutex); | 1655 | mutex_init(&fs_info->transaction_kthread_mutex); |
1532 | mutex_init(&fs_info->cleaner_mutex); | 1656 | mutex_init(&fs_info->cleaner_mutex); |
1533 | mutex_init(&fs_info->volume_mutex); | 1657 | mutex_init(&fs_info->volume_mutex); |
1534 | mutex_init(&fs_info->tree_reloc_mutex); | 1658 | mutex_init(&fs_info->tree_reloc_mutex); |
1659 | |||
1660 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | ||
1661 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | ||
1662 | |||
1535 | init_waitqueue_head(&fs_info->transaction_throttle); | 1663 | init_waitqueue_head(&fs_info->transaction_throttle); |
1536 | init_waitqueue_head(&fs_info->transaction_wait); | 1664 | init_waitqueue_head(&fs_info->transaction_wait); |
1537 | init_waitqueue_head(&fs_info->async_submit_wait); | 1665 | init_waitqueue_head(&fs_info->async_submit_wait); |
1538 | init_waitqueue_head(&fs_info->tree_log_wait); | ||
1539 | atomic_set(&fs_info->tree_log_commit, 0); | ||
1540 | atomic_set(&fs_info->tree_log_writers, 0); | ||
1541 | fs_info->tree_log_transid = 0; | ||
1542 | 1666 | ||
1543 | __setup_root(4096, 4096, 4096, 4096, tree_root, | 1667 | __setup_root(4096, 4096, 4096, 4096, tree_root, |
1544 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 1668 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
@@ -1627,6 +1751,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1627 | * low idle thresh | 1751 | * low idle thresh |
1628 | */ | 1752 | */ |
1629 | fs_info->endio_workers.idle_thresh = 4; | 1753 | fs_info->endio_workers.idle_thresh = 4; |
1754 | fs_info->endio_meta_workers.idle_thresh = 4; | ||
1755 | |||
1630 | fs_info->endio_write_workers.idle_thresh = 64; | 1756 | fs_info->endio_write_workers.idle_thresh = 64; |
1631 | fs_info->endio_meta_write_workers.idle_thresh = 64; | 1757 | fs_info->endio_meta_write_workers.idle_thresh = 64; |
1632 | 1758 | ||
@@ -1720,7 +1846,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1720 | ret = find_and_setup_root(tree_root, fs_info, | 1846 | ret = find_and_setup_root(tree_root, fs_info, |
1721 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 1847 | BTRFS_DEV_TREE_OBJECTID, dev_root); |
1722 | dev_root->track_dirty = 1; | 1848 | dev_root->track_dirty = 1; |
1723 | |||
1724 | if (ret) | 1849 | if (ret) |
1725 | goto fail_extent_root; | 1850 | goto fail_extent_root; |
1726 | 1851 | ||
@@ -1740,13 +1865,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1740 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | 1865 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; |
1741 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 1866 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
1742 | "btrfs-cleaner"); | 1867 | "btrfs-cleaner"); |
1743 | if (!fs_info->cleaner_kthread) | 1868 | if (IS_ERR(fs_info->cleaner_kthread)) |
1744 | goto fail_csum_root; | 1869 | goto fail_csum_root; |
1745 | 1870 | ||
1746 | fs_info->transaction_kthread = kthread_run(transaction_kthread, | 1871 | fs_info->transaction_kthread = kthread_run(transaction_kthread, |
1747 | tree_root, | 1872 | tree_root, |
1748 | "btrfs-transaction"); | 1873 | "btrfs-transaction"); |
1749 | if (!fs_info->transaction_kthread) | 1874 | if (IS_ERR(fs_info->transaction_kthread)) |
1750 | goto fail_cleaner; | 1875 | goto fail_cleaner; |
1751 | 1876 | ||
1752 | if (btrfs_super_log_root(disk_super) != 0) { | 1877 | if (btrfs_super_log_root(disk_super) != 0) { |
@@ -1828,13 +1953,14 @@ fail_sb_buffer: | |||
1828 | fail_iput: | 1953 | fail_iput: |
1829 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 1954 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
1830 | iput(fs_info->btree_inode); | 1955 | iput(fs_info->btree_inode); |
1831 | fail: | 1956 | |
1832 | btrfs_close_devices(fs_info->fs_devices); | 1957 | btrfs_close_devices(fs_info->fs_devices); |
1833 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 1958 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
1959 | bdi_destroy(&fs_info->bdi); | ||
1834 | 1960 | ||
1961 | fail: | ||
1835 | kfree(extent_root); | 1962 | kfree(extent_root); |
1836 | kfree(tree_root); | 1963 | kfree(tree_root); |
1837 | bdi_destroy(&fs_info->bdi); | ||
1838 | kfree(fs_info); | 1964 | kfree(fs_info); |
1839 | kfree(chunk_root); | 1965 | kfree(chunk_root); |
1840 | kfree(dev_root); | 1966 | kfree(dev_root); |
@@ -1974,10 +2100,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
1974 | device->barriers = 0; | 2100 | device->barriers = 0; |
1975 | get_bh(bh); | 2101 | get_bh(bh); |
1976 | lock_buffer(bh); | 2102 | lock_buffer(bh); |
1977 | ret = submit_bh(WRITE, bh); | 2103 | ret = submit_bh(WRITE_SYNC, bh); |
1978 | } | 2104 | } |
1979 | } else { | 2105 | } else { |
1980 | ret = submit_bh(WRITE, bh); | 2106 | ret = submit_bh(WRITE_SYNC, bh); |
1981 | } | 2107 | } |
1982 | 2108 | ||
1983 | if (!ret && wait) { | 2109 | if (!ret && wait) { |
@@ -1995,7 +2121,6 @@ static int write_dev_supers(struct btrfs_device *device, | |||
1995 | 2121 | ||
1996 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 2122 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
1997 | { | 2123 | { |
1998 | struct list_head *cur; | ||
1999 | struct list_head *head = &root->fs_info->fs_devices->devices; | 2124 | struct list_head *head = &root->fs_info->fs_devices->devices; |
2000 | struct btrfs_device *dev; | 2125 | struct btrfs_device *dev; |
2001 | struct btrfs_super_block *sb; | 2126 | struct btrfs_super_block *sb; |
@@ -2011,8 +2136,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2011 | 2136 | ||
2012 | sb = &root->fs_info->super_for_commit; | 2137 | sb = &root->fs_info->super_for_commit; |
2013 | dev_item = &sb->dev_item; | 2138 | dev_item = &sb->dev_item; |
2014 | list_for_each(cur, head) { | 2139 | list_for_each_entry(dev, head, dev_list) { |
2015 | dev = list_entry(cur, struct btrfs_device, dev_list); | ||
2016 | if (!dev->bdev) { | 2140 | if (!dev->bdev) { |
2017 | total_errors++; | 2141 | total_errors++; |
2018 | continue; | 2142 | continue; |
@@ -2045,8 +2169,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2045 | } | 2169 | } |
2046 | 2170 | ||
2047 | total_errors = 0; | 2171 | total_errors = 0; |
2048 | list_for_each(cur, head) { | 2172 | list_for_each_entry(dev, head, dev_list) { |
2049 | dev = list_entry(cur, struct btrfs_device, dev_list); | ||
2050 | if (!dev->bdev) | 2173 | if (!dev->bdev) |
2051 | continue; | 2174 | continue; |
2052 | if (!dev->in_fs_metadata || !dev->writeable) | 2175 | if (!dev->in_fs_metadata || !dev->writeable) |
@@ -2259,8 +2382,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2259 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | 2382 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; |
2260 | u64 transid = btrfs_header_generation(buf); | 2383 | u64 transid = btrfs_header_generation(buf); |
2261 | struct inode *btree_inode = root->fs_info->btree_inode; | 2384 | struct inode *btree_inode = root->fs_info->btree_inode; |
2385 | int was_dirty; | ||
2262 | 2386 | ||
2263 | WARN_ON(!btrfs_tree_locked(buf)); | 2387 | btrfs_assert_tree_locked(buf); |
2264 | if (transid != root->fs_info->generation) { | 2388 | if (transid != root->fs_info->generation) { |
2265 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | 2389 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " |
2266 | "found %llu running %llu\n", | 2390 | "found %llu running %llu\n", |
@@ -2269,7 +2393,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2269 | (unsigned long long)root->fs_info->generation); | 2393 | (unsigned long long)root->fs_info->generation); |
2270 | WARN_ON(1); | 2394 | WARN_ON(1); |
2271 | } | 2395 | } |
2272 | set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); | 2396 | was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
2397 | buf); | ||
2398 | if (!was_dirty) { | ||
2399 | spin_lock(&root->fs_info->delalloc_lock); | ||
2400 | root->fs_info->dirty_metadata_bytes += buf->len; | ||
2401 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2402 | } | ||
2273 | } | 2403 | } |
2274 | 2404 | ||
2275 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 2405 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) |
@@ -2284,7 +2414,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
2284 | unsigned long thresh = 32 * 1024 * 1024; | 2414 | unsigned long thresh = 32 * 1024 * 1024; |
2285 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 2415 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
2286 | 2416 | ||
2287 | if (current_is_pdflush() || current->flags & PF_MEMALLOC) | 2417 | if (current->flags & PF_MEMALLOC) |
2288 | return; | 2418 | return; |
2289 | 2419 | ||
2290 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 2420 | num_dirty = count_range_bits(tree, &start, (u64)-1, |
@@ -2302,7 +2432,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
2302 | int ret; | 2432 | int ret; |
2303 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 2433 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
2304 | if (ret == 0) | 2434 | if (ret == 0) |
2305 | buf->flags |= EXTENT_UPTODATE; | 2435 | set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); |
2306 | return ret; | 2436 | return ret; |
2307 | } | 2437 | } |
2308 | 2438 | ||
@@ -2324,9 +2454,17 @@ int btree_lock_page_hook(struct page *page) | |||
2324 | goto out; | 2454 | goto out; |
2325 | 2455 | ||
2326 | btrfs_tree_lock(eb); | 2456 | btrfs_tree_lock(eb); |
2327 | spin_lock(&root->fs_info->hash_lock); | ||
2328 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 2457 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
2329 | spin_unlock(&root->fs_info->hash_lock); | 2458 | |
2459 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
2460 | spin_lock(&root->fs_info->delalloc_lock); | ||
2461 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
2462 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
2463 | else | ||
2464 | WARN_ON(1); | ||
2465 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2466 | } | ||
2467 | |||
2330 | btrfs_tree_unlock(eb); | 2468 | btrfs_tree_unlock(eb); |
2331 | free_extent_buffer(eb); | 2469 | free_extent_buffer(eb); |
2332 | out: | 2470 | out: |